assembler_x86.hpp revision 6182:2f459c5235f9
1/*
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_VM_ASSEMBLER_X86_HPP
26#define CPU_X86_VM_ASSEMBLER_X86_HPP
27
28#include "asm/register.hpp"
29
30class BiasedLockingCounters;
31
32// Contains all the definitions needed for x86 assembly code generation.
33
34// Calling convention
35class Argument VALUE_OBJ_CLASS_SPEC {
36 public:
37  enum {
38#ifdef _LP64
39#ifdef _WIN64
40    n_int_register_parameters_c   = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
41    n_float_register_parameters_c = 4,  // xmm0 - xmm3 (c_farg0, c_farg1, ... )
42#else
43    n_int_register_parameters_c   = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
44    n_float_register_parameters_c = 8,  // xmm0 - xmm7 (c_farg0, c_farg1, ... )
45#endif // _WIN64
46    n_int_register_parameters_j   = 6, // j_rarg0, j_rarg1, ...
47    n_float_register_parameters_j = 8  // j_farg0, j_farg1, ...
48#else
49    n_register_parameters = 0   // 0 registers used to pass arguments
50#endif // _LP64
51  };
52};
53
54
55#ifdef _LP64
56// Symbolically name the register arguments used by the c calling convention.
57// Windows is different from linux/solaris. So much for standards...
58
59#ifdef _WIN64
60
61REGISTER_DECLARATION(Register, c_rarg0, rcx);
62REGISTER_DECLARATION(Register, c_rarg1, rdx);
63REGISTER_DECLARATION(Register, c_rarg2, r8);
64REGISTER_DECLARATION(Register, c_rarg3, r9);
65
66REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
67REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
68REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
69REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
70
71#else
72
73REGISTER_DECLARATION(Register, c_rarg0, rdi);
74REGISTER_DECLARATION(Register, c_rarg1, rsi);
75REGISTER_DECLARATION(Register, c_rarg2, rdx);
76REGISTER_DECLARATION(Register, c_rarg3, rcx);
77REGISTER_DECLARATION(Register, c_rarg4, r8);
78REGISTER_DECLARATION(Register, c_rarg5, r9);
79
80REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
81REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
82REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
83REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
84REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
85REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
86REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
87REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
88
89#endif // _WIN64
90
91// Symbolically name the register arguments used by the Java calling convention.
92// We have control over the convention for java so we can do what we please.
93// What pleases us is to offset the java calling convention so that when
94// we call a suitable jni method the arguments are lined up and we don't
95// have to do little shuffling. A suitable jni method is non-static and a
96// small number of arguments (two fewer args on windows)
97//
98//        |-------------------------------------------------------|
99//        | c_rarg0   c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5    |
100//        |-------------------------------------------------------|
101//        | rcx       rdx      r8      r9      rdi*    rsi*       | windows (* not a c_rarg)
102//        | rdi       rsi      rdx     rcx     r8      r9         | solaris/linux
103//        |-------------------------------------------------------|
104//        | j_rarg5   j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4    |
105//        |-------------------------------------------------------|
106
107REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
108REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
109REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
110// Windows runs out of register args here
111#ifdef _WIN64
112REGISTER_DECLARATION(Register, j_rarg3, rdi);
113REGISTER_DECLARATION(Register, j_rarg4, rsi);
114#else
115REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
116REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
117#endif /* _WIN64 */
118REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
119
120REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
121REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
122REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
123REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
124REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
125REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
126REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
127REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
128
129REGISTER_DECLARATION(Register, rscratch1, r10);  // volatile
130REGISTER_DECLARATION(Register, rscratch2, r11);  // volatile
131
132REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
133REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
134
135#else
136// rscratch1 will apear in 32bit code that is dead but of course must compile
137// Using noreg ensures if the dead code is incorrectly live and executed it
138// will cause an assertion failure
139#define rscratch1 noreg
140#define rscratch2 noreg
141
142#endif // _LP64
143
144// JSR 292 fixed register usages:
145REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
146
147// Address is an abstraction used to represent a memory location
148// using any of the amd64 addressing modes with one object.
149//
150// Note: A register location is represented via a Register, not
151//       via an address for efficiency & simplicity reasons.
152
153class ArrayAddress;
154
155class Address VALUE_OBJ_CLASS_SPEC {
156 public:
157  enum ScaleFactor {
158    no_scale = -1,
159    times_1  =  0,
160    times_2  =  1,
161    times_4  =  2,
162    times_8  =  3,
163    times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
164  };
165  static ScaleFactor times(int size) {
166    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
167    if (size == 8)  return times_8;
168    if (size == 4)  return times_4;
169    if (size == 2)  return times_2;
170    return times_1;
171  }
172  static int scale_size(ScaleFactor scale) {
173    assert(scale != no_scale, "");
174    assert(((1 << (int)times_1) == 1 &&
175            (1 << (int)times_2) == 2 &&
176            (1 << (int)times_4) == 4 &&
177            (1 << (int)times_8) == 8), "");
178    return (1 << (int)scale);
179  }
180
181 private:
182  Register         _base;
183  Register         _index;
184  ScaleFactor      _scale;
185  int              _disp;
186  RelocationHolder _rspec;
187
188  // Easily misused constructors make them private
189  // %%% can we make these go away?
190  NOT_LP64(Address(address loc, RelocationHolder spec);)
191  Address(int disp, address loc, relocInfo::relocType rtype);
192  Address(int disp, address loc, RelocationHolder spec);
193
194 public:
195
196 int disp() { return _disp; }
197  // creation
198  Address()
199    : _base(noreg),
200      _index(noreg),
201      _scale(no_scale),
202      _disp(0) {
203  }
204
205  // No default displacement otherwise Register can be implicitly
206  // converted to 0(Register) which is quite a different animal.
207
208  Address(Register base, int disp)
209    : _base(base),
210      _index(noreg),
211      _scale(no_scale),
212      _disp(disp) {
213  }
214
215  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
216    : _base (base),
217      _index(index),
218      _scale(scale),
219      _disp (disp) {
220    assert(!index->is_valid() == (scale == Address::no_scale),
221           "inconsistent address");
222  }
223
224  Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
225    : _base (base),
226      _index(index.register_or_noreg()),
227      _scale(scale),
228      _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
229    if (!index.is_register())  scale = Address::no_scale;
230    assert(!_index->is_valid() == (scale == Address::no_scale),
231           "inconsistent address");
232  }
233
234  Address plus_disp(int disp) const {
235    Address a = (*this);
236    a._disp += disp;
237    return a;
238  }
239  Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
240    Address a = (*this);
241    a._disp += disp.constant_or_zero() * scale_size(scale);
242    if (disp.is_register()) {
243      assert(!a.index()->is_valid(), "competing indexes");
244      a._index = disp.as_register();
245      a._scale = scale;
246    }
247    return a;
248  }
249  bool is_same_address(Address a) const {
250    // disregard _rspec
251    return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
252  }
253
254  // The following two overloads are used in connection with the
255  // ByteSize type (see sizes.hpp).  They simplify the use of
256  // ByteSize'd arguments in assembly code. Note that their equivalent
257  // for the optimized build are the member functions with int disp
258  // argument since ByteSize is mapped to an int type in that case.
259  //
260  // Note: DO NOT introduce similar overloaded functions for WordSize
261  // arguments as in the optimized mode, both ByteSize and WordSize
262  // are mapped to the same type and thus the compiler cannot make a
263  // distinction anymore (=> compiler errors).
264
265#ifdef ASSERT
266  Address(Register base, ByteSize disp)
267    : _base(base),
268      _index(noreg),
269      _scale(no_scale),
270      _disp(in_bytes(disp)) {
271  }
272
273  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
274    : _base(base),
275      _index(index),
276      _scale(scale),
277      _disp(in_bytes(disp)) {
278    assert(!index->is_valid() == (scale == Address::no_scale),
279           "inconsistent address");
280  }
281
282  Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
283    : _base (base),
284      _index(index.register_or_noreg()),
285      _scale(scale),
286      _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
287    if (!index.is_register())  scale = Address::no_scale;
288    assert(!_index->is_valid() == (scale == Address::no_scale),
289           "inconsistent address");
290  }
291
292#endif // ASSERT
293
294  // accessors
295  bool        uses(Register reg) const { return _base == reg || _index == reg; }
296  Register    base()             const { return _base;  }
297  Register    index()            const { return _index; }
298  ScaleFactor scale()            const { return _scale; }
299  int         disp()             const { return _disp;  }
300
301  // Convert the raw encoding form into the form expected by the constructor for
302  // Address.  An index of 4 (rsp) corresponds to having no index, so convert
303  // that to noreg for the Address constructor.
304  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
305
306  static Address make_array(ArrayAddress);
307
308 private:
309  bool base_needs_rex() const {
310    return _base != noreg && _base->encoding() >= 8;
311  }
312
313  bool index_needs_rex() const {
314    return _index != noreg &&_index->encoding() >= 8;
315  }
316
317  relocInfo::relocType reloc() const { return _rspec.type(); }
318
319  friend class Assembler;
320  friend class MacroAssembler;
321  friend class LIR_Assembler; // base/index/scale/disp
322};
323
324//
325// AddressLiteral has been split out from Address because operands of this type
326// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
327// the few instructions that need to deal with address literals are unique and the
328// MacroAssembler does not have to implement every instruction in the Assembler
329// in order to search for address literals that may need special handling depending
330// on the instruction and the platform. As small step on the way to merging i486/amd64
331// directories.
332//
333class AddressLiteral VALUE_OBJ_CLASS_SPEC {
334  friend class ArrayAddress;
335  RelocationHolder _rspec;
336  // Typically we use AddressLiterals we want to use their rval
337  // However in some situations we want the lval (effect address) of the item.
338  // We provide a special factory for making those lvals.
339  bool _is_lval;
340
341  // If the target is far we'll need to load the ea of this to
342  // a register to reach it. Otherwise if near we can do rip
343  // relative addressing.
344
345  address          _target;
346
347 protected:
348  // creation
349  AddressLiteral()
350    : _is_lval(false),
351      _target(NULL)
352  {}
353
354  public:
355
356
357  AddressLiteral(address target, relocInfo::relocType rtype);
358
359  AddressLiteral(address target, RelocationHolder const& rspec)
360    : _rspec(rspec),
361      _is_lval(false),
362      _target(target)
363  {}
364
365  AddressLiteral addr() {
366    AddressLiteral ret = *this;
367    ret._is_lval = true;
368    return ret;
369  }
370
371
372 private:
373
374  address target() { return _target; }
375  bool is_lval() { return _is_lval; }
376
377  relocInfo::relocType reloc() const { return _rspec.type(); }
378  const RelocationHolder& rspec() const { return _rspec; }
379
380  friend class Assembler;
381  friend class MacroAssembler;
382  friend class Address;
383  friend class LIR_Assembler;
384};
385
386// Convience classes
387class RuntimeAddress: public AddressLiteral {
388
389  public:
390
391  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
392
393};
394
395class ExternalAddress: public AddressLiteral {
396 private:
397  static relocInfo::relocType reloc_for_target(address target) {
398    // Sometimes ExternalAddress is used for values which aren't
399    // exactly addresses, like the card table base.
400    // external_word_type can't be used for values in the first page
401    // so just skip the reloc in that case.
402    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
403  }
404
405 public:
406
407  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
408
409};
410
411class InternalAddress: public AddressLiteral {
412
413  public:
414
415  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
416
417};
418
419// x86 can do array addressing as a single operation since disp can be an absolute
420// address amd64 can't. We create a class that expresses the concept but does extra
421// magic on amd64 to get the final result
422
423class ArrayAddress VALUE_OBJ_CLASS_SPEC {
424  private:
425
426  AddressLiteral _base;
427  Address        _index;
428
429  public:
430
431  ArrayAddress() {};
432  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
433  AddressLiteral base() { return _base; }
434  Address index() { return _index; }
435
436};
437
438const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
439
440// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
441// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
442// is what you get. The Assembler is generating code into a CodeBuffer.
443
444class Assembler : public AbstractAssembler  {
445  friend class AbstractAssembler; // for the non-virtual hack
446  friend class LIR_Assembler; // as_Address()
447  friend class StubGenerator;
448
449 public:
450  enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
451    zero          = 0x4,
452    notZero       = 0x5,
453    equal         = 0x4,
454    notEqual      = 0x5,
455    less          = 0xc,
456    lessEqual     = 0xe,
457    greater       = 0xf,
458    greaterEqual  = 0xd,
459    below         = 0x2,
460    belowEqual    = 0x6,
461    above         = 0x7,
462    aboveEqual    = 0x3,
463    overflow      = 0x0,
464    noOverflow    = 0x1,
465    carrySet      = 0x2,
466    carryClear    = 0x3,
467    negative      = 0x8,
468    positive      = 0x9,
469    parity        = 0xa,
470    noParity      = 0xb
471  };
472
473  enum Prefix {
474    // segment overrides
475    CS_segment = 0x2e,
476    SS_segment = 0x36,
477    DS_segment = 0x3e,
478    ES_segment = 0x26,
479    FS_segment = 0x64,
480    GS_segment = 0x65,
481
482    REX        = 0x40,
483
484    REX_B      = 0x41,
485    REX_X      = 0x42,
486    REX_XB     = 0x43,
487    REX_R      = 0x44,
488    REX_RB     = 0x45,
489    REX_RX     = 0x46,
490    REX_RXB    = 0x47,
491
492    REX_W      = 0x48,
493
494    REX_WB     = 0x49,
495    REX_WX     = 0x4A,
496    REX_WXB    = 0x4B,
497    REX_WR     = 0x4C,
498    REX_WRB    = 0x4D,
499    REX_WRX    = 0x4E,
500    REX_WRXB   = 0x4F,
501
502    VEX_3bytes = 0xC4,
503    VEX_2bytes = 0xC5
504  };
505
506  enum VexPrefix {
507    VEX_B = 0x20,
508    VEX_X = 0x40,
509    VEX_R = 0x80,
510    VEX_W = 0x80
511  };
512
513  enum VexSimdPrefix {
514    VEX_SIMD_NONE = 0x0,
515    VEX_SIMD_66   = 0x1,
516    VEX_SIMD_F3   = 0x2,
517    VEX_SIMD_F2   = 0x3
518  };
519
520  enum VexOpcode {
521    VEX_OPCODE_NONE  = 0x0,
522    VEX_OPCODE_0F    = 0x1,
523    VEX_OPCODE_0F_38 = 0x2,
524    VEX_OPCODE_0F_3A = 0x3
525  };
526
527  enum WhichOperand {
528    // input to locate_operand, and format code for relocations
529    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
530    disp32_operand = 1,          // embedded 32-bit displacement or address
531    call32_operand = 2,          // embedded 32-bit self-relative displacement
532#ifndef _LP64
533    _WhichOperand_limit = 3
534#else
535     narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
536    _WhichOperand_limit = 4
537#endif
538  };
539
540
541
542  // NOTE: The general philopsophy of the declarations here is that 64bit versions
543  // of instructions are freely declared without the need for wrapping them an ifdef.
544  // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
545  // In the .cpp file the implementations are wrapped so that they are dropped out
546  // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
547  // to the size it was prior to merging up the 32bit and 64bit assemblers.
548  //
549  // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
550  // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
551
552private:
553
554
555  // 64bit prefixes
556  int prefix_and_encode(int reg_enc, bool byteinst = false);
557  int prefixq_and_encode(int reg_enc);
558
559  int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
560  int prefixq_and_encode(int dst_enc, int src_enc);
561
562  void prefix(Register reg);
563  void prefix(Address adr);
564  void prefixq(Address adr);
565
566  void prefix(Address adr, Register reg,  bool byteinst = false);
567  void prefix(Address adr, XMMRegister reg);
568  void prefixq(Address adr, Register reg);
569  void prefixq(Address adr, XMMRegister reg);
570
571  void prefetch_prefix(Address src);
572
573  void rex_prefix(Address adr, XMMRegister xreg,
574                  VexSimdPrefix pre, VexOpcode opc, bool rex_w);
575  int  rex_prefix_and_encode(int dst_enc, int src_enc,
576                             VexSimdPrefix pre, VexOpcode opc, bool rex_w);
577
578  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
579                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
580                  bool vector256);
581
582  void vex_prefix(Address adr, int nds_enc, int xreg_enc,
583                  VexSimdPrefix pre, VexOpcode opc,
584                  bool vex_w, bool vector256);
585
586  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
587                  VexSimdPrefix pre, bool vector256 = false) {
588    int dst_enc = dst->encoding();
589    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
590    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
591  }
592
593  void vex_prefix_0F38(Register dst, Register nds, Address src) {
594    bool vex_w = false;
595    bool vector256 = false;
596    vex_prefix(src, nds->encoding(), dst->encoding(),
597               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
598  }
599
600  void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
601    bool vex_w = true;
602    bool vector256 = false;
603    vex_prefix(src, nds->encoding(), dst->encoding(),
604               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
605  }
606  int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
607                             VexSimdPrefix pre, VexOpcode opc,
608                             bool vex_w, bool vector256);
609
610  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
611    bool vex_w = false;
612    bool vector256 = false;
613    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
614                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
615  }
616  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
617    bool vex_w = true;
618    bool vector256 = false;
619    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
620                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
621  }
622  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
623                             VexSimdPrefix pre, bool vector256 = false,
624                             VexOpcode opc = VEX_OPCODE_0F) {
625    int src_enc = src->encoding();
626    int dst_enc = dst->encoding();
627    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
628    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
629  }
630
631  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
632                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
633                   bool rex_w = false, bool vector256 = false);
634
635  void simd_prefix(XMMRegister dst, Address src,
636                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
637    simd_prefix(dst, xnoreg, src, pre, opc);
638  }
639
640  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
641    simd_prefix(src, dst, pre);
642  }
643  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
644                     VexSimdPrefix pre) {
645    bool rex_w = true;
646    simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
647  }
648
649  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
650                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
651                             bool rex_w = false, bool vector256 = false);
652
653  // Move/convert 32-bit integer value.
654  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
655                             VexSimdPrefix pre) {
656    // It is OK to cast from Register to XMMRegister to pass argument here
657    // since only encoding is used in simd_prefix_and_encode() and number of
658    // Gen and Xmm registers are the same.
659    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
660  }
661  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
662    return simd_prefix_and_encode(dst, xnoreg, src, pre);
663  }
664  int simd_prefix_and_encode(Register dst, XMMRegister src,
665                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
666    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
667  }
668
669  // Move/convert 64-bit integer value.
670  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
671                               VexSimdPrefix pre) {
672    bool rex_w = true;
673    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
674  }
675  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
676    return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
677  }
678  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
679                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
680    bool rex_w = true;
681    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
682  }
683
684  // Helper functions for groups of instructions
685  void emit_arith_b(int op1, int op2, Register dst, int imm8);
686
687  void emit_arith(int op1, int op2, Register dst, int32_t imm32);
688  // Force generation of a 4 byte immediate value even if it fits into 8bit
689  void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
690  void emit_arith(int op1, int op2, Register dst, Register src);
691
692  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
693  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
694  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
695  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
696  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
697                      Address src, VexSimdPrefix pre, bool vector256);
698  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
699                      XMMRegister src, VexSimdPrefix pre, bool vector256);
700
701  void emit_operand(Register reg,
702                    Register base, Register index, Address::ScaleFactor scale,
703                    int disp,
704                    RelocationHolder const& rspec,
705                    int rip_relative_correction = 0);
706
707  void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
708
709  // operands that only take the original 32bit registers
710  void emit_operand32(Register reg, Address adr);
711
712  void emit_operand(XMMRegister reg,
713                    Register base, Register index, Address::ScaleFactor scale,
714                    int disp,
715                    RelocationHolder const& rspec);
716
717  void emit_operand(XMMRegister reg, Address adr);
718
719  void emit_operand(MMXRegister reg, Address adr);
720
721  // workaround gcc (3.2.1-7) bug
722  void emit_operand(Address adr, MMXRegister reg);
723
724
725  // Immediate-to-memory forms
726  void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
727
728  void emit_farith(int b1, int b2, int i);
729
730
731 protected:
732  #ifdef ASSERT
733  void check_relocation(RelocationHolder const& rspec, int format);
734  #endif
735
736  void emit_data(jint data, relocInfo::relocType    rtype, int format);
737  void emit_data(jint data, RelocationHolder const& rspec, int format);
738  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
739  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
740
741  bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
742
743  // These are all easily abused and hence protected
744
745  // 32BIT ONLY SECTION
746#ifndef _LP64
747  // Make these disappear in 64bit mode since they would never be correct
748  void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
749  void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
750
751  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
752  void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
753
754  void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
755#else
756  // 64BIT ONLY SECTION
757  void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
758
759  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
760  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
761
762  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
763  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
764#endif // _LP64
765
766  // These are unique in that we are ensured by the caller that the 32bit
767  // relative in these instructions will always be able to reach the potentially
768  // 64bit address described by entry. Since they can take a 64bit address they
769  // don't have the 32 suffix like the other instructions in this class.
770
771  void call_literal(address entry, RelocationHolder const& rspec);
772  void jmp_literal(address entry, RelocationHolder const& rspec);
773
774  // Avoid using directly section
775  // Instructions in this section are actually usable by anyone without danger
776  // of failure but have performance issues that are addressed my enhanced
777  // instructions which will do the proper thing base on the particular cpu.
778  // We protect them because we don't trust you...
779
780  // Don't use next inc() and dec() methods directly. INC & DEC instructions
781  // could cause a partial flag stall since they don't set CF flag.
782  // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
783  // which call inc() & dec() or add() & sub() in accordance with
784  // the product flag UseIncDec value.
785
786  void decl(Register dst);
787  void decl(Address dst);
788  void decq(Register dst);
789  void decq(Address dst);
790
791  void incl(Register dst);
792  void incl(Address dst);
793  void incq(Register dst);
794  void incq(Address dst);
795
796  // New cpus require use of movsd and movss to avoid partial register stall
797  // when loading from memory. But for old Opteron use movlpd instead of movsd.
798  // The selection is done in MacroAssembler::movdbl() and movflt().
799
800  // Move Scalar Single-Precision Floating-Point Values
801  void movss(XMMRegister dst, Address src);
802  void movss(XMMRegister dst, XMMRegister src);
803  void movss(Address dst, XMMRegister src);
804
805  // Move Scalar Double-Precision Floating-Point Values
806  void movsd(XMMRegister dst, Address src);
807  void movsd(XMMRegister dst, XMMRegister src);
808  void movsd(Address dst, XMMRegister src);
809  void movlpd(XMMRegister dst, Address src);
810
811  // New cpus require use of movaps and movapd to avoid partial register stall
812  // when moving between registers.
813  void movaps(XMMRegister dst, XMMRegister src);
814  void movapd(XMMRegister dst, XMMRegister src);
815
816  // End avoid using directly
817
818
819  // Instruction prefixes
820  void prefix(Prefix p);
821
822  public:
823
824  // Creation
825  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
826
827  // Decoding
828  static address locate_operand(address inst, WhichOperand which);
829  static address locate_next_instruction(address inst);
830
831  // Utilities
832  static bool is_polling_page_far() NOT_LP64({ return false;});
833
834  // Generic instructions
835  // Does 32bit or 64bit as needed for the platform. In some sense these
836  // belong in macro assembler but there is no need for both varieties to exist
837
838  void lea(Register dst, Address src);
839
840  void mov(Register dst, Register src);
841
842  void pusha();
843  void popa();
844
845  void pushf();
846  void popf();
847
848  void push(int32_t imm32);
849
850  void push(Register src);
851
852  void pop(Register dst);
853
854  // These are dummies to prevent surprise implicit conversions to Register
855  void push(void* v);
856  void pop(void* v);
857
858  // These do register sized moves/scans
859  void rep_mov();
860  void rep_stos();
861  void rep_stosb();
862  void repne_scan();
863#ifdef _LP64
864  void repne_scanl();
865#endif
866
867  // Vanilla instructions in lexical order
868
869  void adcl(Address dst, int32_t imm32);
870  void adcl(Address dst, Register src);
871  void adcl(Register dst, int32_t imm32);
872  void adcl(Register dst, Address src);
873  void adcl(Register dst, Register src);
874
875  void adcq(Register dst, int32_t imm32);
876  void adcq(Register dst, Address src);
877  void adcq(Register dst, Register src);
878
879  void addl(Address dst, int32_t imm32);
880  void addl(Address dst, Register src);
881  void addl(Register dst, int32_t imm32);
882  void addl(Register dst, Address src);
883  void addl(Register dst, Register src);
884
885  void addq(Address dst, int32_t imm32);
886  void addq(Address dst, Register src);
887  void addq(Register dst, int32_t imm32);
888  void addq(Register dst, Address src);
889  void addq(Register dst, Register src);
890
891  void addr_nop_4();
892  void addr_nop_5();
893  void addr_nop_7();
894  void addr_nop_8();
895
896  // Add Scalar Double-Precision Floating-Point Values
897  void addsd(XMMRegister dst, Address src);
898  void addsd(XMMRegister dst, XMMRegister src);
899
900  // Add Scalar Single-Precision Floating-Point Values
901  void addss(XMMRegister dst, Address src);
902  void addss(XMMRegister dst, XMMRegister src);
903
904  // AES instructions
905  void aesdec(XMMRegister dst, Address src);
906  void aesdec(XMMRegister dst, XMMRegister src);
907  void aesdeclast(XMMRegister dst, Address src);
908  void aesdeclast(XMMRegister dst, XMMRegister src);
909  void aesenc(XMMRegister dst, Address src);
910  void aesenc(XMMRegister dst, XMMRegister src);
911  void aesenclast(XMMRegister dst, Address src);
912  void aesenclast(XMMRegister dst, XMMRegister src);
913
914
915  void andl(Address  dst, int32_t imm32);
916  void andl(Register dst, int32_t imm32);
917  void andl(Register dst, Address src);
918  void andl(Register dst, Register src);
919
920  void andq(Address  dst, int32_t imm32);
921  void andq(Register dst, int32_t imm32);
922  void andq(Register dst, Address src);
923  void andq(Register dst, Register src);
924
925  // BMI instructions
926  void andnl(Register dst, Register src1, Register src2);
927  void andnl(Register dst, Register src1, Address src2);
928  void andnq(Register dst, Register src1, Register src2);
929  void andnq(Register dst, Register src1, Address src2);
930
931  void blsil(Register dst, Register src);
932  void blsil(Register dst, Address src);
933  void blsiq(Register dst, Register src);
934  void blsiq(Register dst, Address src);
935
936  void blsmskl(Register dst, Register src);
937  void blsmskl(Register dst, Address src);
938  void blsmskq(Register dst, Register src);
939  void blsmskq(Register dst, Address src);
940
941  void blsrl(Register dst, Register src);
942  void blsrl(Register dst, Address src);
943  void blsrq(Register dst, Register src);
944  void blsrq(Register dst, Address src);
945
946  void bsfl(Register dst, Register src);
947  void bsrl(Register dst, Register src);
948
949#ifdef _LP64
950  void bsfq(Register dst, Register src);
951  void bsrq(Register dst, Register src);
952#endif
953
954  void bswapl(Register reg);
955
956  void bswapq(Register reg);
957
958  void call(Label& L, relocInfo::relocType rtype);
959  void call(Register reg);  // push pc; pc <- reg
960  void call(Address adr);   // push pc; pc <- adr
961
962  void cdql();
963
964  void cdqq();
965
966  void cld();
967
968  void clflush(Address adr);
969
970  void cmovl(Condition cc, Register dst, Register src);
971  void cmovl(Condition cc, Register dst, Address src);
972
973  void cmovq(Condition cc, Register dst, Register src);
974  void cmovq(Condition cc, Register dst, Address src);
975
976
977  void cmpb(Address dst, int imm8);
978
979  void cmpl(Address dst, int32_t imm32);
980
981  void cmpl(Register dst, int32_t imm32);
982  void cmpl(Register dst, Register src);
983  void cmpl(Register dst, Address src);
984
985  void cmpq(Address dst, int32_t imm32);
986  void cmpq(Address dst, Register src);
987
988  void cmpq(Register dst, int32_t imm32);
989  void cmpq(Register dst, Register src);
990  void cmpq(Register dst, Address src);
991
992  // these are dummies used to catch attempting to convert NULL to Register
993  void cmpl(Register dst, void* junk); // dummy
994  void cmpq(Register dst, void* junk); // dummy
995
996  void cmpw(Address dst, int imm16);
997
998  void cmpxchg8 (Address adr);
999
1000  void cmpxchgl(Register reg, Address adr);
1001
1002  void cmpxchgq(Register reg, Address adr);
1003
1004  // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1005  void comisd(XMMRegister dst, Address src);
1006  void comisd(XMMRegister dst, XMMRegister src);
1007
1008  // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1009  void comiss(XMMRegister dst, Address src);
1010  void comiss(XMMRegister dst, XMMRegister src);
1011
1012  // Identify processor type and features
1013  void cpuid();
1014
1015  // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1016  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1017  void cvtsd2ss(XMMRegister dst, Address src);
1018
1019  // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1020  void cvtsi2sdl(XMMRegister dst, Register src);
1021  void cvtsi2sdl(XMMRegister dst, Address src);
1022  void cvtsi2sdq(XMMRegister dst, Register src);
1023  void cvtsi2sdq(XMMRegister dst, Address src);
1024
1025  // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1026  void cvtsi2ssl(XMMRegister dst, Register src);
1027  void cvtsi2ssl(XMMRegister dst, Address src);
1028  void cvtsi2ssq(XMMRegister dst, Register src);
1029  void cvtsi2ssq(XMMRegister dst, Address src);
1030
1031  // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1032  void cvtdq2pd(XMMRegister dst, XMMRegister src);
1033
1034  // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1035  void cvtdq2ps(XMMRegister dst, XMMRegister src);
1036
1037  // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1038  void cvtss2sd(XMMRegister dst, XMMRegister src);
1039  void cvtss2sd(XMMRegister dst, Address src);
1040
1041  // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1042  void cvttsd2sil(Register dst, Address src);
1043  void cvttsd2sil(Register dst, XMMRegister src);
1044  void cvttsd2siq(Register dst, XMMRegister src);
1045
1046  // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1047  void cvttss2sil(Register dst, XMMRegister src);
1048  void cvttss2siq(Register dst, XMMRegister src);
1049
1050  // Divide Scalar Double-Precision Floating-Point Values
1051  void divsd(XMMRegister dst, Address src);
1052  void divsd(XMMRegister dst, XMMRegister src);
1053
1054  // Divide Scalar Single-Precision Floating-Point Values
1055  void divss(XMMRegister dst, Address src);
1056  void divss(XMMRegister dst, XMMRegister src);
1057
1058  void emms();
1059
1060  void fabs();
1061
1062  void fadd(int i);
1063
1064  void fadd_d(Address src);
1065  void fadd_s(Address src);
1066
1067  // "Alternate" versions of x87 instructions place result down in FPU
1068  // stack instead of on TOS
1069
1070  void fadda(int i); // "alternate" fadd
1071  void faddp(int i = 1);
1072
1073  void fchs();
1074
1075  void fcom(int i);
1076
1077  void fcomp(int i = 1);
1078  void fcomp_d(Address src);
1079  void fcomp_s(Address src);
1080
1081  void fcompp();
1082
1083  void fcos();
1084
1085  void fdecstp();
1086
1087  void fdiv(int i);
1088  void fdiv_d(Address src);
1089  void fdivr_s(Address src);
1090  void fdiva(int i);  // "alternate" fdiv
1091  void fdivp(int i = 1);
1092
1093  void fdivr(int i);
1094  void fdivr_d(Address src);
1095  void fdiv_s(Address src);
1096
1097  void fdivra(int i); // "alternate" reversed fdiv
1098
1099  void fdivrp(int i = 1);
1100
1101  void ffree(int i = 0);
1102
1103  void fild_d(Address adr);
1104  void fild_s(Address adr);
1105
1106  void fincstp();
1107
1108  void finit();
1109
1110  void fist_s (Address adr);
1111  void fistp_d(Address adr);
1112  void fistp_s(Address adr);
1113
1114  void fld1();
1115
1116  void fld_d(Address adr);
1117  void fld_s(Address adr);
1118  void fld_s(int index);
1119  void fld_x(Address adr);  // extended-precision (80-bit) format
1120
1121  void fldcw(Address src);
1122
1123  void fldenv(Address src);
1124
1125  void fldlg2();
1126
1127  void fldln2();
1128
1129  void fldz();
1130
1131  void flog();
1132  void flog10();
1133
1134  void fmul(int i);
1135
1136  void fmul_d(Address src);
1137  void fmul_s(Address src);
1138
1139  void fmula(int i);  // "alternate" fmul
1140
1141  void fmulp(int i = 1);
1142
1143  void fnsave(Address dst);
1144
1145  void fnstcw(Address src);
1146
1147  void fnstsw_ax();
1148
1149  void fprem();
1150  void fprem1();
1151
1152  void frstor(Address src);
1153
1154  void fsin();
1155
1156  void fsqrt();
1157
1158  void fst_d(Address adr);
1159  void fst_s(Address adr);
1160
1161  void fstp_d(Address adr);
1162  void fstp_d(int index);
1163  void fstp_s(Address adr);
1164  void fstp_x(Address adr); // extended-precision (80-bit) format
1165
1166  void fsub(int i);
1167  void fsub_d(Address src);
1168  void fsub_s(Address src);
1169
1170  void fsuba(int i);  // "alternate" fsub
1171
1172  void fsubp(int i = 1);
1173
1174  void fsubr(int i);
1175  void fsubr_d(Address src);
1176  void fsubr_s(Address src);
1177
1178  void fsubra(int i); // "alternate" reversed fsub
1179
1180  void fsubrp(int i = 1);
1181
1182  void ftan();
1183
1184  void ftst();
1185
1186  void fucomi(int i = 1);
1187  void fucomip(int i = 1);
1188
1189  void fwait();
1190
1191  void fxch(int i = 1);
1192
1193  void fxrstor(Address src);
1194
1195  void fxsave(Address dst);
1196
1197  void fyl2x();
1198  void frndint();
1199  void f2xm1();
1200  void fldl2e();
1201
1202  void hlt();
1203
1204  void idivl(Register src);
1205  void divl(Register src); // Unsigned division
1206
1207  void idivq(Register src);
1208
1209  void imull(Register dst, Register src);
1210  void imull(Register dst, Register src, int value);
1211  void imull(Register dst, Address src);
1212
1213  void imulq(Register dst, Register src);
1214  void imulq(Register dst, Register src, int value);
1215#ifdef _LP64
1216  void imulq(Register dst, Address src);
1217#endif
1218
1219
1220  // jcc is the generic conditional branch generator to run-
1221  // time routines, jcc is used for branches to labels. jcc
1222  // takes a branch opcode (cc) and a label (L) and generates
1223  // either a backward branch or a forward branch and links it
1224  // to the label fixup chain. Usage:
1225  //
1226  // Label L;      // unbound label
1227  // jcc(cc, L);   // forward branch to unbound label
1228  // bind(L);      // bind label to the current pc
1229  // jcc(cc, L);   // backward branch to bound label
1230  // bind(L);      // illegal: a label may be bound only once
1231  //
1232  // Note: The same Label can be used for forward and backward branches
1233  // but it may be bound only once.
1234
1235  void jcc(Condition cc, Label& L, bool maybe_short = true);
1236
1237  // Conditional jump to a 8-bit offset to L.
1238  // WARNING: be very careful using this for forward jumps.  If the label is
1239  // not bound within an 8-bit offset of this instruction, a run-time error
1240  // will occur.
1241  void jccb(Condition cc, Label& L);
1242
1243  void jmp(Address entry);    // pc <- entry
1244
1245  // Label operations & relative jumps (PPUM Appendix D)
1246  void jmp(Label& L, bool maybe_short = true);   // unconditional jump to L
1247
1248  void jmp(Register entry); // pc <- entry
1249
1250  // Unconditional 8-bit offset jump to L.
1251  // WARNING: be very careful using this for forward jumps.  If the label is
1252  // not bound within an 8-bit offset of this instruction, a run-time error
1253  // will occur.
1254  void jmpb(Label& L);
1255
1256  void ldmxcsr( Address src );
1257
1258  void leal(Register dst, Address src);
1259
1260  void leaq(Register dst, Address src);
1261
1262  void lfence();
1263
1264  void lock();
1265
1266  void lzcntl(Register dst, Register src);
1267
1268#ifdef _LP64
1269  void lzcntq(Register dst, Register src);
1270#endif
1271
1272  enum Membar_mask_bits {
1273    StoreStore = 1 << 3,
1274    LoadStore  = 1 << 2,
1275    StoreLoad  = 1 << 1,
1276    LoadLoad   = 1 << 0
1277  };
1278
1279  // Serializes memory and blows flags
1280  void membar(Membar_mask_bits order_constraint) {
1281    if (os::is_MP()) {
1282      // We only have to handle StoreLoad
1283      if (order_constraint & StoreLoad) {
1284        // All usable chips support "locked" instructions which suffice
1285        // as barriers, and are much faster than the alternative of
1286        // using cpuid instruction. We use here a locked add [esp],0.
1287        // This is conveniently otherwise a no-op except for blowing
1288        // flags.
1289        // Any change to this code may need to revisit other places in
1290        // the code where this idiom is used, in particular the
1291        // orderAccess code.
1292        lock();
1293        addl(Address(rsp, 0), 0);// Assert the lock# signal here
1294      }
1295    }
1296  }
1297
1298  void mfence();
1299
1300  // Moves
1301
1302  void mov64(Register dst, int64_t imm64);
1303
1304  void movb(Address dst, Register src);
1305  void movb(Address dst, int imm8);
1306  void movb(Register dst, Address src);
1307
1308  void movdl(XMMRegister dst, Register src);
1309  void movdl(Register dst, XMMRegister src);
1310  void movdl(XMMRegister dst, Address src);
1311  void movdl(Address dst, XMMRegister src);
1312
1313  // Move Double Quadword
1314  void movdq(XMMRegister dst, Register src);
1315  void movdq(Register dst, XMMRegister src);
1316
1317  // Move Aligned Double Quadword
1318  void movdqa(XMMRegister dst, XMMRegister src);
1319  void movdqa(XMMRegister dst, Address src);
1320
1321  // Move Unaligned Double Quadword
1322  void movdqu(Address     dst, XMMRegister src);
1323  void movdqu(XMMRegister dst, Address src);
1324  void movdqu(XMMRegister dst, XMMRegister src);
1325
1326  // Move Unaligned 256bit Vector
1327  void vmovdqu(Address dst, XMMRegister src);
1328  void vmovdqu(XMMRegister dst, Address src);
1329  void vmovdqu(XMMRegister dst, XMMRegister src);
1330
1331  // Move lower 64bit to high 64bit in 128bit register
1332  void movlhps(XMMRegister dst, XMMRegister src);
1333
1334  void movl(Register dst, int32_t imm32);
1335  void movl(Address dst, int32_t imm32);
1336  void movl(Register dst, Register src);
1337  void movl(Register dst, Address src);
1338  void movl(Address dst, Register src);
1339
1340  // These dummies prevent using movl from converting a zero (like NULL) into Register
1341  // by giving the compiler two choices it can't resolve
1342
1343  void movl(Address  dst, void* junk);
1344  void movl(Register dst, void* junk);
1345
1346#ifdef _LP64
1347  void movq(Register dst, Register src);
1348  void movq(Register dst, Address src);
1349  void movq(Address  dst, Register src);
1350#endif
1351
1352  void movq(Address     dst, MMXRegister src );
1353  void movq(MMXRegister dst, Address src );
1354
1355#ifdef _LP64
1356  // These dummies prevent using movq from converting a zero (like NULL) into Register
1357  // by giving the compiler two choices it can't resolve
1358
1359  void movq(Address  dst, void* dummy);
1360  void movq(Register dst, void* dummy);
1361#endif
1362
1363  // Move Quadword
1364  void movq(Address     dst, XMMRegister src);
1365  void movq(XMMRegister dst, Address src);
1366
1367  void movsbl(Register dst, Address src);
1368  void movsbl(Register dst, Register src);
1369
1370#ifdef _LP64
1371  void movsbq(Register dst, Address src);
1372  void movsbq(Register dst, Register src);
1373
1374  // Move signed 32bit immediate to 64bit extending sign
1375  void movslq(Address  dst, int32_t imm64);
1376  void movslq(Register dst, int32_t imm64);
1377
1378  void movslq(Register dst, Address src);
1379  void movslq(Register dst, Register src);
1380  void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1381#endif
1382
1383  void movswl(Register dst, Address src);
1384  void movswl(Register dst, Register src);
1385
1386#ifdef _LP64
1387  void movswq(Register dst, Address src);
1388  void movswq(Register dst, Register src);
1389#endif
1390
1391  void movw(Address dst, int imm16);
1392  void movw(Register dst, Address src);
1393  void movw(Address dst, Register src);
1394
1395  void movzbl(Register dst, Address src);
1396  void movzbl(Register dst, Register src);
1397
1398#ifdef _LP64
1399  void movzbq(Register dst, Address src);
1400  void movzbq(Register dst, Register src);
1401#endif
1402
1403  void movzwl(Register dst, Address src);
1404  void movzwl(Register dst, Register src);
1405
1406#ifdef _LP64
1407  void movzwq(Register dst, Address src);
1408  void movzwq(Register dst, Register src);
1409#endif
1410
1411  void mull(Address src);
1412  void mull(Register src);
1413
1414  // Multiply Scalar Double-Precision Floating-Point Values
1415  void mulsd(XMMRegister dst, Address src);
1416  void mulsd(XMMRegister dst, XMMRegister src);
1417
1418  // Multiply Scalar Single-Precision Floating-Point Values
1419  void mulss(XMMRegister dst, Address src);
1420  void mulss(XMMRegister dst, XMMRegister src);
1421
1422  void negl(Register dst);
1423
1424#ifdef _LP64
1425  void negq(Register dst);
1426#endif
1427
1428  void nop(int i = 1);
1429
1430  void notl(Register dst);
1431
1432#ifdef _LP64
1433  void notq(Register dst);
1434#endif
1435
1436  void orl(Address dst, int32_t imm32);
1437  void orl(Register dst, int32_t imm32);
1438  void orl(Register dst, Address src);
1439  void orl(Register dst, Register src);
1440
1441  void orq(Address dst, int32_t imm32);
1442  void orq(Register dst, int32_t imm32);
1443  void orq(Register dst, Address src);
1444  void orq(Register dst, Register src);
1445
1446  // Pack with unsigned saturation
1447  void packuswb(XMMRegister dst, XMMRegister src);
1448  void packuswb(XMMRegister dst, Address src);
1449  void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1450
1451  // Pemutation of 64bit words
1452  void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
1453
1454  void pause();
1455
1456  // SSE4.2 string instructions
1457  void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1458  void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1459
1460  // SSE 4.1 extract
1461  void pextrd(Register dst, XMMRegister src, int imm8);
1462  void pextrq(Register dst, XMMRegister src, int imm8);
1463
1464  // SSE 4.1 insert
1465  void pinsrd(XMMRegister dst, Register src, int imm8);
1466  void pinsrq(XMMRegister dst, Register src, int imm8);
1467
1468  // SSE4.1 packed move
1469  void pmovzxbw(XMMRegister dst, XMMRegister src);
1470  void pmovzxbw(XMMRegister dst, Address src);
1471
1472#ifndef _LP64 // no 32bit push/pop on amd64
1473  void popl(Address dst);
1474#endif
1475
1476#ifdef _LP64
1477  void popq(Address dst);
1478#endif
1479
1480  void popcntl(Register dst, Address src);
1481  void popcntl(Register dst, Register src);
1482
1483#ifdef _LP64
1484  void popcntq(Register dst, Address src);
1485  void popcntq(Register dst, Register src);
1486#endif
1487
1488  // Prefetches (SSE, SSE2, 3DNOW only)
1489
1490  void prefetchnta(Address src);
1491  void prefetchr(Address src);
1492  void prefetcht0(Address src);
1493  void prefetcht1(Address src);
1494  void prefetcht2(Address src);
1495  void prefetchw(Address src);
1496
1497  // Shuffle Bytes
1498  void pshufb(XMMRegister dst, XMMRegister src);
1499  void pshufb(XMMRegister dst, Address src);
1500
1501  // Shuffle Packed Doublewords
1502  void pshufd(XMMRegister dst, XMMRegister src, int mode);
1503  void pshufd(XMMRegister dst, Address src,     int mode);
1504
1505  // Shuffle Packed Low Words
1506  void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1507  void pshuflw(XMMRegister dst, Address src,     int mode);
1508
1509  // Shift Right by bytes Logical DoubleQuadword Immediate
1510  void psrldq(XMMRegister dst, int shift);
1511
1512  // Logical Compare 128bit
1513  void ptest(XMMRegister dst, XMMRegister src);
1514  void ptest(XMMRegister dst, Address src);
1515  // Logical Compare 256bit
1516  void vptest(XMMRegister dst, XMMRegister src);
1517  void vptest(XMMRegister dst, Address src);
1518
1519  // Interleave Low Bytes
1520  void punpcklbw(XMMRegister dst, XMMRegister src);
1521  void punpcklbw(XMMRegister dst, Address src);
1522
1523  // Interleave Low Doublewords
1524  void punpckldq(XMMRegister dst, XMMRegister src);
1525  void punpckldq(XMMRegister dst, Address src);
1526
1527  // Interleave Low Quadwords
1528  void punpcklqdq(XMMRegister dst, XMMRegister src);
1529
1530#ifndef _LP64 // no 32bit push/pop on amd64
1531  void pushl(Address src);
1532#endif
1533
1534  void pushq(Address src);
1535
1536  void rcll(Register dst, int imm8);
1537
1538  void rclq(Register dst, int imm8);
1539
1540  void rdtsc();
1541
1542  void ret(int imm16);
1543
1544  void sahf();
1545
1546  void sarl(Register dst, int imm8);
1547  void sarl(Register dst);
1548
1549  void sarq(Register dst, int imm8);
1550  void sarq(Register dst);
1551
1552  void sbbl(Address dst, int32_t imm32);
1553  void sbbl(Register dst, int32_t imm32);
1554  void sbbl(Register dst, Address src);
1555  void sbbl(Register dst, Register src);
1556
1557  void sbbq(Address dst, int32_t imm32);
1558  void sbbq(Register dst, int32_t imm32);
1559  void sbbq(Register dst, Address src);
1560  void sbbq(Register dst, Register src);
1561
1562  void setb(Condition cc, Register dst);
1563
1564  void shldl(Register dst, Register src);
1565
1566  void shll(Register dst, int imm8);
1567  void shll(Register dst);
1568
1569  void shlq(Register dst, int imm8);
1570  void shlq(Register dst);
1571
1572  void shrdl(Register dst, Register src);
1573
1574  void shrl(Register dst, int imm8);
1575  void shrl(Register dst);
1576
1577  void shrq(Register dst, int imm8);
1578  void shrq(Register dst);
1579
1580  void smovl(); // QQQ generic?
1581
1582  // Compute Square Root of Scalar Double-Precision Floating-Point Value
1583  void sqrtsd(XMMRegister dst, Address src);
1584  void sqrtsd(XMMRegister dst, XMMRegister src);
1585
1586  // Compute Square Root of Scalar Single-Precision Floating-Point Value
1587  void sqrtss(XMMRegister dst, Address src);
1588  void sqrtss(XMMRegister dst, XMMRegister src);
1589
1590  void std();
1591
1592  void stmxcsr( Address dst );
1593
1594  void subl(Address dst, int32_t imm32);
1595  void subl(Address dst, Register src);
1596  void subl(Register dst, int32_t imm32);
1597  void subl(Register dst, Address src);
1598  void subl(Register dst, Register src);
1599
1600  void subq(Address dst, int32_t imm32);
1601  void subq(Address dst, Register src);
1602  void subq(Register dst, int32_t imm32);
1603  void subq(Register dst, Address src);
1604  void subq(Register dst, Register src);
1605
1606  // Force generation of a 4 byte immediate value even if it fits into 8bit
1607  void subl_imm32(Register dst, int32_t imm32);
1608  void subq_imm32(Register dst, int32_t imm32);
1609
1610  // Subtract Scalar Double-Precision Floating-Point Values
1611  void subsd(XMMRegister dst, Address src);
1612  void subsd(XMMRegister dst, XMMRegister src);
1613
1614  // Subtract Scalar Single-Precision Floating-Point Values
1615  void subss(XMMRegister dst, Address src);
1616  void subss(XMMRegister dst, XMMRegister src);
1617
1618  void testb(Register dst, int imm8);
1619
1620  void testl(Register dst, int32_t imm32);
1621  void testl(Register dst, Register src);
1622  void testl(Register dst, Address src);
1623
1624  void testq(Register dst, int32_t imm32);
1625  void testq(Register dst, Register src);
1626
1627  // BMI - count trailing zeros
1628  void tzcntl(Register dst, Register src);
1629  void tzcntq(Register dst, Register src);
1630
1631  // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1632  void ucomisd(XMMRegister dst, Address src);
1633  void ucomisd(XMMRegister dst, XMMRegister src);
1634
1635  // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1636  void ucomiss(XMMRegister dst, Address src);
1637  void ucomiss(XMMRegister dst, XMMRegister src);
1638
1639  void xabort(int8_t imm8);
1640
1641  void xaddl(Address dst, Register src);
1642
1643  void xaddq(Address dst, Register src);
1644
1645  void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
1646
1647  void xchgl(Register reg, Address adr);
1648  void xchgl(Register dst, Register src);
1649
1650  void xchgq(Register reg, Address adr);
1651  void xchgq(Register dst, Register src);
1652
1653  void xend();
1654
1655  // Get Value of Extended Control Register
1656  void xgetbv();
1657
1658  void xorl(Register dst, int32_t imm32);
1659  void xorl(Register dst, Address src);
1660  void xorl(Register dst, Register src);
1661
1662  void xorq(Register dst, Address src);
1663  void xorq(Register dst, Register src);
1664
1665  void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1666
1667  // AVX 3-operands scalar instructions (encoded with VEX prefix)
1668
1669  void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1670  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1671  void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1672  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1673  void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1674  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1675  void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1676  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1677  void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1678  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1679  void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1680  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1681  void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1682  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1683  void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1684  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1685
1686
1687  //====================VECTOR ARITHMETIC=====================================
1688
1689  // Add Packed Floating-Point Values
1690  void addpd(XMMRegister dst, XMMRegister src);
1691  void addps(XMMRegister dst, XMMRegister src);
1692  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1693  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1694  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1695  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1696
1697  // Subtract Packed Floating-Point Values
1698  void subpd(XMMRegister dst, XMMRegister src);
1699  void subps(XMMRegister dst, XMMRegister src);
1700  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1701  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1702  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1703  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1704
1705  // Multiply Packed Floating-Point Values
1706  void mulpd(XMMRegister dst, XMMRegister src);
1707  void mulps(XMMRegister dst, XMMRegister src);
1708  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1709  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1710  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1711  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1712
1713  // Divide Packed Floating-Point Values
1714  void divpd(XMMRegister dst, XMMRegister src);
1715  void divps(XMMRegister dst, XMMRegister src);
1716  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1717  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1718  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1719  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1720
1721  // Bitwise Logical AND of Packed Floating-Point Values
1722  void andpd(XMMRegister dst, XMMRegister src);
1723  void andps(XMMRegister dst, XMMRegister src);
1724  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1725  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1726  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1727  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1728
1729  // Bitwise Logical XOR of Packed Floating-Point Values
1730  void xorpd(XMMRegister dst, XMMRegister src);
1731  void xorps(XMMRegister dst, XMMRegister src);
1732  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1733  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1734  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1735  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1736
1737  // Add packed integers
1738  void paddb(XMMRegister dst, XMMRegister src);
1739  void paddw(XMMRegister dst, XMMRegister src);
1740  void paddd(XMMRegister dst, XMMRegister src);
1741  void paddq(XMMRegister dst, XMMRegister src);
1742  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1743  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1744  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1745  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1746  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1747  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1748  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1749  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1750
1751  // Sub packed integers
1752  void psubb(XMMRegister dst, XMMRegister src);
1753  void psubw(XMMRegister dst, XMMRegister src);
1754  void psubd(XMMRegister dst, XMMRegister src);
1755  void psubq(XMMRegister dst, XMMRegister src);
1756  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1757  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1758  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1759  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1760  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1761  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1762  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1763  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1764
1765  // Multiply packed integers (only shorts and ints)
1766  void pmullw(XMMRegister dst, XMMRegister src);
1767  void pmulld(XMMRegister dst, XMMRegister src);
1768  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1769  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1770  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1771  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1772
1773  // Shift left packed integers
1774  void psllw(XMMRegister dst, int shift);
1775  void pslld(XMMRegister dst, int shift);
1776  void psllq(XMMRegister dst, int shift);
1777  void psllw(XMMRegister dst, XMMRegister shift);
1778  void pslld(XMMRegister dst, XMMRegister shift);
1779  void psllq(XMMRegister dst, XMMRegister shift);
1780  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1781  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1782  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1783  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1784  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1785  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1786
1787  // Logical shift right packed integers
1788  void psrlw(XMMRegister dst, int shift);
1789  void psrld(XMMRegister dst, int shift);
1790  void psrlq(XMMRegister dst, int shift);
1791  void psrlw(XMMRegister dst, XMMRegister shift);
1792  void psrld(XMMRegister dst, XMMRegister shift);
1793  void psrlq(XMMRegister dst, XMMRegister shift);
1794  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1795  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1796  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1797  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1798  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1799  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1800
1801  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1802  void psraw(XMMRegister dst, int shift);
1803  void psrad(XMMRegister dst, int shift);
1804  void psraw(XMMRegister dst, XMMRegister shift);
1805  void psrad(XMMRegister dst, XMMRegister shift);
1806  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1807  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1808  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1809  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1810
1811  // And packed integers
1812  void pand(XMMRegister dst, XMMRegister src);
1813  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1814  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1815
1816  // Or packed integers
1817  void por(XMMRegister dst, XMMRegister src);
1818  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1819  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1820
1821  // Xor packed integers
1822  void pxor(XMMRegister dst, XMMRegister src);
1823  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1824  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1825
1826  // Copy low 128bit into high 128bit of YMM registers.
1827  void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1828  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1829
1830  // Load/store high 128bit of YMM registers which does not destroy other half.
1831  void vinsertf128h(XMMRegister dst, Address src);
1832  void vinserti128h(XMMRegister dst, Address src);
1833  void vextractf128h(Address dst, XMMRegister src);
1834  void vextracti128h(Address dst, XMMRegister src);
1835
1836  // duplicate 4-bytes integer data from src into 8 locations in dest
1837  void vpbroadcastd(XMMRegister dst, XMMRegister src);
1838
1839  // Carry-Less Multiplication Quadword
1840  void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
1841
1842  // AVX instruction which is used to clear upper 128 bits of YMM registers and
1843  // to avoid transaction penalty between AVX and SSE states. There is no
1844  // penalty if legacy SSE instructions are encoded using VEX prefix because
1845  // they always clear upper 128 bits. It should be used before calling
1846  // runtime code and native libraries.
1847  void vzeroupper();
1848
1849 protected:
1850  // Next instructions require address alignment 16 bytes SSE mode.
1851  // They should be called only from corresponding MacroAssembler instructions.
1852  void andpd(XMMRegister dst, Address src);
1853  void andps(XMMRegister dst, Address src);
1854  void xorpd(XMMRegister dst, Address src);
1855  void xorps(XMMRegister dst, Address src);
1856
1857};
1858
1859#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
1860