x86_64.ad revision 113:ba764ed4b6f2
12322Sdg//
22322Sdg// Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
32322Sdg// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
42322Sdg//
52322Sdg// This code is free software; you can redistribute it and/or modify it
62322Sdg// under the terms of the GNU General Public License version 2 only, as
72322Sdg// published by the Free Software Foundation.
82322Sdg//
92322Sdg// This code is distributed in the hope that it will be useful, but WITHOUT
102322Sdg// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
112322Sdg// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
122322Sdg// version 2 for more details (a copy is included in the LICENSE file that
132322Sdg// accompanied this code).
142322Sdg//
152322Sdg// You should have received a copy of the GNU General Public License version
162322Sdg// 2 along with this work; if not, write to the Free Software Foundation,
172322Sdg// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
182322Sdg//
192322Sdg// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
202322Sdg// CA 95054 USA or visit www.sun.com if you need additional information or
212322Sdg// have any questions.
222322Sdg//
232322Sdg//
242322Sdg
252322Sdg// AMD64 Architecture Description File
262322Sdg
272322Sdg//----------REGISTER DEFINITION BLOCK------------------------------------------
282322Sdg// This information is used by the matcher and the register allocator to
29116176Sobrien// describe individual registers and classes of registers within the target
30116176Sobrien// archtecture.
31116176Sobrien
32116176Sobrienregister %{
332322Sdg//----------Architecture Description Register Definitions----------------------
34158092Sjhb// General Registers
35158032Sjhb// "reg_def"  name ( register save type, C convention save type,
36158032Sjhb//                   ideal register type, encoding );
372322Sdg// Register Save Types:
38158032Sjhb//
39158092Sjhb// NS  = No-Save:       The register allocator assumes that these registers
40228569Skib//                      can be used without saving upon entry to the method, &
41108338Sjulian//                      that they do not need to be saved at call sites.
42108338Sjulian//
43108338Sjulian// SOC = Save-On-Call:  The register allocator assumes that these registers
4412734Sbde//                      can be used without saving upon entry to the method,
4512734Sbde//                      but that they must be saved at call sites.
4612734Sbde//
47158032Sjhb// SOE = Save-On-Entry: The register allocator assumes that these registers
48158032Sjhb//                      must be saved before using them upon entry to the
49177615Ssam//                      method, but they do not need to be saved at call
50177615Ssam//                      sites.
51177615Ssam//
52177615Ssam// AS  = Always-Save:   The register allocator assumes that these registers
53177615Ssam//                      must be saved before using them upon entry to the
54177615Ssam//                      method, & that they must be saved at call sites.
55177615Ssam//
56177615Ssam// Ideal Register Type is used to determine how to save & restore a
57177615Ssam// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58158032Sjhb// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59158032Sjhb//
60158032Sjhb// The encoding number is the actual bit-pattern placed into the opcodes.
61158032Sjhb
62158032Sjhb// General Registers
63158032Sjhb// R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
64158032Sjhb// used as byte registers)
65158032Sjhb
66158032Sjhb// Previously set RBX, RSI, and RDI as save-on-entry for java code
67158032Sjhb// Turn off SOE in java-code due to frequent use of uncommon-traps.
68158032Sjhb// Now that allocator is better, turn on RSI and RDI as SOE registers.
69160893Sjhb
70160893Sjhbreg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
71160893Sjhbreg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
72160893Sjhb
73158032Sjhbreg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
74158032Sjhbreg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
75158032Sjhb
76158032Sjhbreg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
772322Sdgreg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
78158032Sjhb
7910349Sbdereg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
8010128Sdgreg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
8185207Sjhb
82158032Sjhbreg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
83158032Sjhbreg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
84158032Sjhb
85160312Sjhb// now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
8610128Sdgreg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
872322Sdgreg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
882322Sdg
8970527Sphk#ifdef _WIN64
9070527Sphk
9110128Sdgreg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
9210128Sdgreg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
9310128Sdg
94158456Sjhbreg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
95160893Sjhbreg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
96158032Sjhb
97160893Sjhb#else
98158032Sjhb
99160312Sjhbreg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
10016381Speterreg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
101158032Sjhb
10216381Speterreg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
10316381Speterreg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
1042322Sdg
10510128Sdg#endif
1062322Sdg
10710128Sdgreg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
108158032Sjhbreg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
109158032Sjhb
110160878Sjhbreg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
111160878Sjhbreg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
112160878Sjhb
11399072Sjulianreg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114158032Sjhbreg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115158091Sjhb
11699072Sjulianreg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
11799072Sjulianreg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118158032Sjhb
119158032Sjhbreg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120158032Sjhbreg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121158032Sjhb
122158032Sjhbreg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123158032Sjhbreg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124158032Sjhb
125158032Sjhbreg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126158032Sjhbreg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127158032Sjhb
128158032Sjhbreg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129158032Sjhbreg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130158032Sjhb
131158032Sjhb
132158032Sjhb// Floating Point Registers
133158032Sjhb
134158032Sjhb// XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
135158032Sjhb// Word a in each register holds a Float, words ab hold a Double.  We
136158032Sjhb// currently do not use the SIMD capabilities, so registers cd are
137158032Sjhb// unused at the moment.
138158032Sjhb// XMM8-XMM15 must be encoded with REX.
139201794Strasz// Linux ABI:   No register preserved across function calls
140158032Sjhb//              XMM0-XMM7 might hold parameters
141158032Sjhb// Windows ABI: XMM6-XMM15 preserved across function calls
142158032Sjhb//              XMM0-XMM3 might hold parameters
143158032Sjhb
144158032Sjhbreg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
145158032Sjhbreg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
146158032Sjhb
147158032Sjhbreg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
148158032Sjhbreg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
149158032Sjhb
150158032Sjhbreg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
151158032Sjhbreg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
152158032Sjhb
153158032Sjhbreg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
154158032Sjhbreg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
155158032Sjhb
156158032Sjhbreg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
157158032Sjhbreg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
158158032Sjhb
159158032Sjhbreg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
16099072Sjulianreg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
16199072Sjulian
162158032Sjhb#ifdef _WIN64
16399072Sjulian
16499072Sjulianreg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
165158032Sjhbreg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
16699072Sjulian
16799072Sjulianreg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
168158032Sjhbreg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
16999072Sjulian
17099072Sjulianreg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
171158032Sjhbreg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
172158032Sjhb
173158032Sjhbreg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
174201794Straszreg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
175158032Sjhb
176158032Sjhbreg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
177158032Sjhbreg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
178158032Sjhb
179158032Sjhbreg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
180158032Sjhbreg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
181158032Sjhb
182158032Sjhbreg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
183158032Sjhbreg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
184158032Sjhb
185158032Sjhbreg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
186158032Sjhbreg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
187158032Sjhb
188158032Sjhbreg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
189158032Sjhbreg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
190158032Sjhb
191158032Sjhbreg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
192197684Sjhbreg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
193158456Sjhb
194197684Sjhb#else
195158032Sjhb
196197684Sjhbreg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
197158032Sjhbreg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
198197684Sjhb
199197684Sjhbreg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
200197684Sjhbreg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
201197684Sjhb
202197684Sjhbreg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
203197684Sjhbreg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
204197684Sjhb
205103216Sjulianreg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
206158032Sjhbreg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
207160312Sjhb
208118269Sjhbreg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
2092322Sdgreg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
21094456Sjhb
21170527Sphkreg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
21210128Sdgreg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
21370527Sphk
2142322Sdgreg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
2152322Sdgreg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
216115903Sjhb
217108338Sjulianreg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
218158032Sjhbreg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
219108338Sjulian
220158032Sjhbreg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
221158032Sjhbreg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
222158032Sjhb
223158032Sjhbreg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
224158032Sjhbreg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
225160878Sjhb
226158032Sjhb#endif // _WIN64
227158032Sjhb
228158032Sjhbreg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
229158032Sjhb
230158032Sjhb// Specify priority of register selection within phases of register
231158032Sjhb// allocation.  Highest priority is first.  A useful heuristic is to
232158032Sjhb// give registers a low priority when they are required by machine
233158032Sjhb// instructions, like EAX and EDX on I486, and choose no-save registers
234158032Sjhb// before save-on-call, & save-on-call before save-on-entry.  Registers
235158032Sjhb// which participate in fixed calling sequences should come last.
236158032Sjhb// Registers which are used as pairs must fall on an even boundary.
237158032Sjhb
238158032Sjhballoc_class chunk0(R10,         R10_H,
239158032Sjhb                   R11,         R11_H,
240158032Sjhb                   R8,          R8_H,
241158032Sjhb                   R9,          R9_H,
242158032Sjhb                   R12,         R12_H,
243158032Sjhb                   RCX,         RCX_H,
244158032Sjhb                   RBX,         RBX_H,
245158032Sjhb                   RDI,         RDI_H,
246158032Sjhb                   RDX,         RDX_H,
247158032Sjhb                   RSI,         RSI_H,
248158032Sjhb                   RAX,         RAX_H,
249158032Sjhb                   RBP,         RBP_H,
250158032Sjhb                   R13,         R13_H,
251158032Sjhb                   R14,         R14_H,
252158032Sjhb                   R15,         R15_H,
253158032Sjhb                   RSP,         RSP_H);
254158032Sjhb
255158032Sjhb// XXX probably use 8-15 first on Linux
256158032Sjhballoc_class chunk1(XMM0,  XMM0_H,
257158032Sjhb                   XMM1,  XMM1_H,
258158032Sjhb                   XMM2,  XMM2_H,
259158032Sjhb                   XMM3,  XMM3_H,
260158032Sjhb                   XMM4,  XMM4_H,
261158032Sjhb                   XMM5,  XMM5_H,
262158032Sjhb                   XMM6,  XMM6_H,
263158032Sjhb                   XMM7,  XMM7_H,
264158032Sjhb                   XMM8,  XMM8_H,
265158032Sjhb                   XMM9,  XMM9_H,
266158032Sjhb                   XMM10, XMM10_H,
267158032Sjhb                   XMM11, XMM11_H,
268158032Sjhb                   XMM12, XMM12_H,
269158032Sjhb                   XMM13, XMM13_H,
270158032Sjhb                   XMM14, XMM14_H,
271158032Sjhb                   XMM15, XMM15_H);
272158032Sjhb
273158032Sjhballoc_class chunk2(RFLAGS);
274158032Sjhb
275158032Sjhb
276158032Sjhb//----------Architecture Description Register Classes--------------------------
277158032Sjhb// Several register classes are automatically defined based upon information in
278158032Sjhb// this architecture description.
279158032Sjhb// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
280158456Sjhb// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
281158032Sjhb// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
282158032Sjhb// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
283158032Sjhb//
284158032Sjhb
285158032Sjhb// Class for all pointer registers (including RSP)
286158032Sjhbreg_class any_reg(RAX, RAX_H,
287158032Sjhb                  RDX, RDX_H,
288158032Sjhb                  RBP, RBP_H,
289158032Sjhb                  RDI, RDI_H,
290158032Sjhb                  RSI, RSI_H,
291158032Sjhb                  RCX, RCX_H,
292158032Sjhb                  RBX, RBX_H,
293158032Sjhb                  RSP, RSP_H,
294158032Sjhb                  R8,  R8_H,
295158032Sjhb                  R9,  R9_H,
296158032Sjhb                  R10, R10_H,
297118228Sjhb                  R11, R11_H,
298158032Sjhb                  R12, R12_H,
299158032Sjhb                  R13, R13_H,
300158032Sjhb                  R14, R14_H,
301179861Sattilio                  R15, R15_H);
302158032Sjhb
303158032Sjhb// Class for all pointer registers except RSP
304158032Sjhbreg_class ptr_reg(RAX, RAX_H,
305158032Sjhb                  RDX, RDX_H,
306158032Sjhb                  RBP, RBP_H,
307158032Sjhb                  RDI, RDI_H,
308158032Sjhb                  RSI, RSI_H,
309179861Sattilio                  RCX, RCX_H,
310158032Sjhb                  RBX, RBX_H,
311158032Sjhb                  R8,  R8_H,
312163709Sjb                  R9,  R9_H,
313158032Sjhb                  R10, R10_H,
314158032Sjhb                  R11, R11_H,
315172705Smarcel                  R13, R13_H,
316172705Smarcel                  R14, R14_H);
317158032Sjhb
318158032Sjhb// Class for all pointer registers except RAX and RSP
319158032Sjhbreg_class ptr_no_rax_reg(RDX, RDX_H,
320108338Sjulian                         RBP, RBP_H,
321158032Sjhb                         RDI, RDI_H,
322158032Sjhb                         RSI, RSI_H,
323158032Sjhb                         RCX, RCX_H,
324158032Sjhb                         RBX, RBX_H,
325158032Sjhb                         R8,  R8_H,
326158032Sjhb                         R9,  R9_H,
327158032Sjhb                         R10, R10_H,
328158032Sjhb                         R11, R11_H,
329158032Sjhb                         R12, R12_H,
330158032Sjhb                         R13, R13_H,
331158032Sjhb                         R14, R14_H);
332158032Sjhb
333108338Sjulianreg_class ptr_no_rbp_reg(RDX, RDX_H,
334158032Sjhb                         RAX, RAX_H,
335158032Sjhb                         RDI, RDI_H,
336108338Sjulian                         RSI, RSI_H,
337158032Sjhb                         RCX, RCX_H,
338158032Sjhb                         RBX, RBX_H,
339108338Sjulian                         R8,  R8_H,
340108338Sjulian                         R9,  R9_H,
341158032Sjhb                         R10, R10_H,
342158032Sjhb                         R11, R11_H,
343158032Sjhb                         R12, R12_H,
344158032Sjhb                         R13, R13_H,
345108338Sjulian                         R14, R14_H);
346158032Sjhb
347158032Sjhb// Class for all pointer registers except RAX, RBX and RSP
348158032Sjhbreg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
349158032Sjhb                             RBP, RBP_H,
350158032Sjhb                             RDI, RDI_H,
351158032Sjhb                             RSI, RSI_H,
352158032Sjhb                             RCX, RCX_H,
353158032Sjhb                             R8,  R8_H,
354158032Sjhb                             R9,  R9_H,
355158032Sjhb                             R10, R10_H,
356158032Sjhb                             R11, R11_H,
357158032Sjhb                             R12, R12_H,
358108338Sjulian                             R13, R13_H,
359158032Sjhb                             R14, R14_H);
360158032Sjhb
361158032Sjhb// Singleton class for RAX pointer register
362108338Sjulianreg_class ptr_rax_reg(RAX, RAX_H);
363158032Sjhb
364108338Sjulian// Singleton class for RBX pointer register
365158032Sjhbreg_class ptr_rbx_reg(RBX, RBX_H);
366158032Sjhb
367108338Sjulian// Singleton class for RSI pointer register
368158032Sjhbreg_class ptr_rsi_reg(RSI, RSI_H);
369158032Sjhb
370158032Sjhb// Singleton class for RDI pointer register
371158032Sjhbreg_class ptr_rdi_reg(RDI, RDI_H);
372158032Sjhb
373158032Sjhb// Singleton class for RBP pointer register
374158032Sjhbreg_class ptr_rbp_reg(RBP, RBP_H);
375158032Sjhb
376179861Sattilio// Singleton class for stack pointer
377158032Sjhbreg_class ptr_rsp_reg(RSP, RSP_H);
378158032Sjhb
379158032Sjhb// Singleton class for TLS pointer
380158032Sjhbreg_class ptr_r15_reg(R15, R15_H);
381158032Sjhb
382158032Sjhb// Class for all long registers (except RSP)
383160312Sjhbreg_class long_reg(RAX, RAX_H,
384158032Sjhb                   RDX, RDX_H,
385158032Sjhb                   RBP, RBP_H,
386158032Sjhb                   RDI, RDI_H,
387158032Sjhb                   RSI, RSI_H,
388158032Sjhb                   RCX, RCX_H,
389158032Sjhb                   RBX, RBX_H,
390158032Sjhb                   R8,  R8_H,
391158032Sjhb                   R9,  R9_H,
392158032Sjhb                   R10, R10_H,
393158032Sjhb                   R11, R11_H,
394158032Sjhb                   R13, R13_H,
395158032Sjhb                   R14, R14_H);
396108338Sjulian
397158032Sjhb// Class for all long registers except RAX, RDX (and RSP)
398158032Sjhbreg_class long_no_rax_rdx_reg(RBP, RBP_H,
399108338Sjulian                              RDI, RDI_H,
400158032Sjhb                              RSI, RSI_H,
401158032Sjhb                              RCX, RCX_H,
402115904Sjhb                              RBX, RBX_H,
403108338Sjulian                              R8,  R8_H,
404158032Sjhb                              R9,  R9_H,
405108338Sjulian                              R10, R10_H,
406158032Sjhb                              R11, R11_H,
407158032Sjhb                              R13, R13_H,
408158032Sjhb                              R14, R14_H);
409158032Sjhb
410158032Sjhb// Class for all long registers except RCX (and RSP)
411158032Sjhbreg_class long_no_rcx_reg(RBP, RBP_H,
412158032Sjhb                          RDI, RDI_H,
413108338Sjulian                          RSI, RSI_H,
414158032Sjhb                          RAX, RAX_H,
415158032Sjhb                          RDX, RDX_H,
416158032Sjhb                          RBX, RBX_H,
417158032Sjhb                          R8,  R8_H,
418158032Sjhb                          R9,  R9_H,
419158032Sjhb                          R10, R10_H,
420158032Sjhb                          R11, R11_H,
421158032Sjhb                          R13, R13_H,
422158032Sjhb                          R14, R14_H);
423158032Sjhb
424158032Sjhb// Class for all long registers except RAX (and RSP)
425158032Sjhbreg_class long_no_rax_reg(RBP, RBP_H,
426158032Sjhb                          RDX, RDX_H,
427158032Sjhb                          RDI, RDI_H,
428158032Sjhb                          RSI, RSI_H,
429160312Sjhb                          RCX, RCX_H,
430158032Sjhb                          RBX, RBX_H,
431158032Sjhb                          R8,  R8_H,
432108338Sjulian                          R9,  R9_H,
433228569Skib                          R10, R10_H,
434228569Skib                          R11, R11_H,
435228569Skib                          R13, R13_H,
436228569Skib                          R14, R14_H);
437228569Skib
438228569Skib// Singleton class for RAX long register
439228569Skibreg_class long_rax_reg(RAX, RAX_H);
440228569Skib
441228569Skib// Singleton class for RCX long register
442228569Skibreg_class long_rcx_reg(RCX, RCX_H);
443228569Skib
444228569Skib// Singleton class for RDX long register
445228569Skibreg_class long_rdx_reg(RDX, RDX_H);
446228569Skib
447228569Skib// Singleton class for R12 long register
448228569Skibreg_class long_r12_reg(R12, R12_H);
449228569Skib
450228687Spluknet// Class for all int registers (except RSP)
451228569Skibreg_class int_reg(RAX,
452228569Skib                  RDX,
453228569Skib                  RBP,
454228569Skib                  RDI,
455228569Skib                  RSI,
456228569Skib                  RCX,
457228569Skib                  RBX,
458228569Skib                  R8,
459228569Skib                  R9,
460228569Skib                  R10,
461228569Skib                  R11,
462228569Skib                  R13,
463228569Skib                  R14);
464228569Skib
465228569Skib// Class for all int registers except RCX (and RSP)
466228569Skibreg_class int_no_rcx_reg(RAX,
467228569Skib                         RDX,
468228569Skib                         RBP,
469                         RDI,
470                         RSI,
471                         RBX,
472                         R8,
473                         R9,
474                         R10,
475                         R11,
476                         R13,
477                         R14);
478
479// Class for all int registers except RAX, RDX (and RSP)
480reg_class int_no_rax_rdx_reg(RBP,
481                             RDI
482                             RSI,
483                             RCX,
484                             RBX,
485                             R8,
486                             R9,
487                             R10,
488                             R11,
489                             R13,
490                             R14);
491
492// Singleton class for RAX int register
493reg_class int_rax_reg(RAX);
494
495// Singleton class for RBX int register
496reg_class int_rbx_reg(RBX);
497
498// Singleton class for RCX int register
499reg_class int_rcx_reg(RCX);
500
501// Singleton class for RCX int register
502reg_class int_rdx_reg(RDX);
503
504// Singleton class for RCX int register
505reg_class int_rdi_reg(RDI);
506
507// Singleton class for instruction pointer
508// reg_class ip_reg(RIP);
509
510// Singleton class for condition codes
511reg_class int_flags(RFLAGS);
512
513// Class for all float registers
514reg_class float_reg(XMM0,
515                    XMM1,
516                    XMM2,
517                    XMM3,
518                    XMM4,
519                    XMM5,
520                    XMM6,
521                    XMM7,
522                    XMM8,
523                    XMM9,
524                    XMM10,
525                    XMM11,
526                    XMM12,
527                    XMM13,
528                    XMM14,
529                    XMM15);
530
531// Class for all double registers
532reg_class double_reg(XMM0,  XMM0_H,
533                     XMM1,  XMM1_H,
534                     XMM2,  XMM2_H,
535                     XMM3,  XMM3_H,
536                     XMM4,  XMM4_H,
537                     XMM5,  XMM5_H,
538                     XMM6,  XMM6_H,
539                     XMM7,  XMM7_H,
540                     XMM8,  XMM8_H,
541                     XMM9,  XMM9_H,
542                     XMM10, XMM10_H,
543                     XMM11, XMM11_H,
544                     XMM12, XMM12_H,
545                     XMM13, XMM13_H,
546                     XMM14, XMM14_H,
547                     XMM15, XMM15_H);
548%}
549
550
551//----------SOURCE BLOCK-------------------------------------------------------
552// This is a block of C++ code which provides values, functions, and
553// definitions necessary in the rest of the architecture description
554source %{
555#define   RELOC_IMM64    Assembler::imm64_operand
556#define   RELOC_DISP32   Assembler::disp32_operand
557
558#define __ _masm.
559
560// !!!!! Special hack to get all types of calls to specify the byte offset
561//       from the start of the call to the point where the return address
562//       will point.
563int MachCallStaticJavaNode::ret_addr_offset()
564{
565  return 5; // 5 bytes from start of call to where return address points
566}
567
568int MachCallDynamicJavaNode::ret_addr_offset()
569{
570  return 15; // 15 bytes from start of call to where return address points
571}
572
573// In os_cpu .ad file
574// int MachCallRuntimeNode::ret_addr_offset()
575
576// Indicate if the safepoint node needs the polling page as an input.
577// Since amd64 does not have absolute addressing but RIP-relative
578// addressing and the polling page is within 2G, it doesn't.
579bool SafePointNode::needs_polling_address_input()
580{
581  return false;
582}
583
584//
585// Compute padding required for nodes which need alignment
586//
587
588// The address of the call instruction needs to be 4-byte aligned to
589// ensure that it does not span a cache line so that it can be patched.
590int CallStaticJavaDirectNode::compute_padding(int current_offset) const
591{
592  current_offset += 1; // skip call opcode byte
593  return round_to(current_offset, alignment_required()) - current_offset;
594}
595
596// The address of the call instruction needs to be 4-byte aligned to
597// ensure that it does not span a cache line so that it can be patched.
598int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
599{
600  current_offset += 11; // skip movq instruction + call opcode byte
601  return round_to(current_offset, alignment_required()) - current_offset;
602}
603
604#ifndef PRODUCT
605void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
606{
607  st->print("INT3");
608}
609#endif
610
611// EMIT_RM()
612void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
613{
614  unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
615  *(cbuf.code_end()) = c;
616  cbuf.set_code_end(cbuf.code_end() + 1);
617}
618
619// EMIT_CC()
620void emit_cc(CodeBuffer &cbuf, int f1, int f2)
621{
622  unsigned char c = (unsigned char) (f1 | f2);
623  *(cbuf.code_end()) = c;
624  cbuf.set_code_end(cbuf.code_end() + 1);
625}
626
627// EMIT_OPCODE()
628void emit_opcode(CodeBuffer &cbuf, int code)
629{
630  *(cbuf.code_end()) = (unsigned char) code;
631  cbuf.set_code_end(cbuf.code_end() + 1);
632}
633
634// EMIT_OPCODE() w/ relocation information
635void emit_opcode(CodeBuffer &cbuf,
636                 int code, relocInfo::relocType reloc, int offset, int format)
637{
638  cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
639  emit_opcode(cbuf, code);
640}
641
642// EMIT_D8()
643void emit_d8(CodeBuffer &cbuf, int d8)
644{
645  *(cbuf.code_end()) = (unsigned char) d8;
646  cbuf.set_code_end(cbuf.code_end() + 1);
647}
648
649// EMIT_D16()
650void emit_d16(CodeBuffer &cbuf, int d16)
651{
652  *((short *)(cbuf.code_end())) = d16;
653  cbuf.set_code_end(cbuf.code_end() + 2);
654}
655
656// EMIT_D32()
657void emit_d32(CodeBuffer &cbuf, int d32)
658{
659  *((int *)(cbuf.code_end())) = d32;
660  cbuf.set_code_end(cbuf.code_end() + 4);
661}
662
663// EMIT_D64()
664void emit_d64(CodeBuffer &cbuf, int64_t d64)
665{
666  *((int64_t*) (cbuf.code_end())) = d64;
667  cbuf.set_code_end(cbuf.code_end() + 8);
668}
669
670// emit 32 bit value and construct relocation entry from relocInfo::relocType
671void emit_d32_reloc(CodeBuffer& cbuf,
672                    int d32,
673                    relocInfo::relocType reloc,
674                    int format)
675{
676  assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
677  cbuf.relocate(cbuf.inst_mark(), reloc, format);
678
679  *((int*) (cbuf.code_end())) = d32;
680  cbuf.set_code_end(cbuf.code_end() + 4);
681}
682
683// emit 32 bit value and construct relocation entry from RelocationHolder
684void emit_d32_reloc(CodeBuffer& cbuf,
685                    int d32,
686                    RelocationHolder const& rspec,
687                    int format)
688{
689#ifdef ASSERT
690  if (rspec.reloc()->type() == relocInfo::oop_type &&
691      d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
692    assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
693  }
694#endif
695  cbuf.relocate(cbuf.inst_mark(), rspec, format);
696
697  *((int* )(cbuf.code_end())) = d32;
698  cbuf.set_code_end(cbuf.code_end() + 4);
699}
700
701void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
702  address next_ip = cbuf.code_end() + 4;
703  emit_d32_reloc(cbuf, (int) (addr - next_ip),
704                 external_word_Relocation::spec(addr),
705                 RELOC_DISP32);
706}
707
708
709// emit 64 bit value and construct relocation entry from relocInfo::relocType
710void emit_d64_reloc(CodeBuffer& cbuf,
711                    int64_t d64,
712                    relocInfo::relocType reloc,
713                    int format)
714{
715  cbuf.relocate(cbuf.inst_mark(), reloc, format);
716
717  *((int64_t*) (cbuf.code_end())) = d64;
718  cbuf.set_code_end(cbuf.code_end() + 8);
719}
720
721// emit 64 bit value and construct relocation entry from RelocationHolder
722void emit_d64_reloc(CodeBuffer& cbuf,
723                    int64_t d64,
724                    RelocationHolder const& rspec,
725                    int format)
726{
727#ifdef ASSERT
728  if (rspec.reloc()->type() == relocInfo::oop_type &&
729      d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
730    assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
731           "cannot embed non-perm oops in code");
732  }
733#endif
734  cbuf.relocate(cbuf.inst_mark(), rspec, format);
735
736  *((int64_t*) (cbuf.code_end())) = d64;
737  cbuf.set_code_end(cbuf.code_end() + 8);
738}
739
740// Access stack slot for load or store
741void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
742{
743  emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
744  if (-0x80 <= disp && disp < 0x80) {
745    emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
746    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
747    emit_d8(cbuf, disp);     // Displacement  // R/M byte
748  } else {
749    emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
750    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
751    emit_d32(cbuf, disp);     // Displacement // R/M byte
752  }
753}
754
755   // rRegI ereg, memory mem) %{    // emit_reg_mem
756void encode_RegMem(CodeBuffer &cbuf,
757                   int reg,
758                   int base, int index, int scale, int disp, bool disp_is_oop)
759{
760  assert(!disp_is_oop, "cannot have disp");
761  int regenc = reg & 7;
762  int baseenc = base & 7;
763  int indexenc = index & 7;
764
765  // There is no index & no scale, use form without SIB byte
766  if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
767    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
768    if (disp == 0 && base != RBP_enc && base != R13_enc) {
769      emit_rm(cbuf, 0x0, regenc, baseenc); // *
770    } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
771      // If 8-bit displacement, mode 0x1
772      emit_rm(cbuf, 0x1, regenc, baseenc); // *
773      emit_d8(cbuf, disp);
774    } else {
775      // If 32-bit displacement
776      if (base == -1) { // Special flag for absolute address
777        emit_rm(cbuf, 0x0, regenc, 0x5); // *
778        if (disp_is_oop) {
779          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
780        } else {
781          emit_d32(cbuf, disp);
782        }
783      } else {
784        // Normal base + offset
785        emit_rm(cbuf, 0x2, regenc, baseenc); // *
786        if (disp_is_oop) {
787          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
788        } else {
789          emit_d32(cbuf, disp);
790        }
791      }
792    }
793  } else {
794    // Else, encode with the SIB byte
795    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
796    if (disp == 0 && base != RBP_enc && base != R13_enc) {
797      // If no displacement
798      emit_rm(cbuf, 0x0, regenc, 0x4); // *
799      emit_rm(cbuf, scale, indexenc, baseenc);
800    } else {
801      if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
802        // If 8-bit displacement, mode 0x1
803        emit_rm(cbuf, 0x1, regenc, 0x4); // *
804        emit_rm(cbuf, scale, indexenc, baseenc);
805        emit_d8(cbuf, disp);
806      } else {
807        // If 32-bit displacement
808        if (base == 0x04 ) {
809          emit_rm(cbuf, 0x2, regenc, 0x4);
810          emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
811        } else {
812          emit_rm(cbuf, 0x2, regenc, 0x4);
813          emit_rm(cbuf, scale, indexenc, baseenc); // *
814        }
815        if (disp_is_oop) {
816          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
817        } else {
818          emit_d32(cbuf, disp);
819        }
820      }
821    }
822  }
823}
824
825void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
826{
827  if (dstenc != srcenc) {
828    if (dstenc < 8) {
829      if (srcenc >= 8) {
830        emit_opcode(cbuf, Assembler::REX_B);
831        srcenc -= 8;
832      }
833    } else {
834      if (srcenc < 8) {
835        emit_opcode(cbuf, Assembler::REX_R);
836      } else {
837        emit_opcode(cbuf, Assembler::REX_RB);
838        srcenc -= 8;
839      }
840      dstenc -= 8;
841    }
842
843    emit_opcode(cbuf, 0x8B);
844    emit_rm(cbuf, 0x3, dstenc, srcenc);
845  }
846}
847
848void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
849  if( dst_encoding == src_encoding ) {
850    // reg-reg copy, use an empty encoding
851  } else {
852    MacroAssembler _masm(&cbuf);
853
854    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
855  }
856}
857
858
859//=============================================================================
860#ifndef PRODUCT
861void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
862{
863  Compile* C = ra_->C;
864
865  int framesize = C->frame_slots() << LogBytesPerInt;
866  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
867  // Remove wordSize for return adr already pushed
868  // and another for the RBP we are going to save
869  framesize -= 2*wordSize;
870  bool need_nop = true;
871
872  // Calls to C2R adapters often do not accept exceptional returns.
873  // We require that their callers must bang for them.  But be
874  // careful, because some VM calls (such as call site linkage) can
875  // use several kilobytes of stack.  But the stack safety zone should
876  // account for that.  See bugs 4446381, 4468289, 4497237.
877  if (C->need_stack_bang(framesize)) {
878    st->print_cr("# stack bang"); st->print("\t");
879    need_nop = false;
880  }
881  st->print_cr("pushq   rbp"); st->print("\t");
882
883  if (VerifyStackAtCalls) {
884    // Majik cookie to verify stack depth
885    st->print_cr("pushq   0xffffffffbadb100d"
886                  "\t# Majik cookie for stack depth check");
887    st->print("\t");
888    framesize -= wordSize; // Remove 2 for cookie
889    need_nop = false;
890  }
891
892  if (framesize) {
893    st->print("subq    rsp, #%d\t# Create frame", framesize);
894    if (framesize < 0x80 && need_nop) {
895      st->print("\n\tnop\t# nop for patch_verified_entry");
896    }
897  }
898}
899#endif
900
901void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
902{
903  Compile* C = ra_->C;
904
905  // WARNING: Initial instruction MUST be 5 bytes or longer so that
906  // NativeJump::patch_verified_entry will be able to patch out the entry
907  // code safely. The fldcw is ok at 6 bytes, the push to verify stack
908  // depth is ok at 5 bytes, the frame allocation can be either 3 or
909  // 6 bytes. So if we don't do the fldcw or the push then we must
910  // use the 6 byte frame allocation even if we have no frame. :-(
911  // If method sets FPU control word do it now
912
913  int framesize = C->frame_slots() << LogBytesPerInt;
914  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
915  // Remove wordSize for return adr already pushed
916  // and another for the RBP we are going to save
917  framesize -= 2*wordSize;
918  bool need_nop = true;
919
920  // Calls to C2R adapters often do not accept exceptional returns.
921  // We require that their callers must bang for them.  But be
922  // careful, because some VM calls (such as call site linkage) can
923  // use several kilobytes of stack.  But the stack safety zone should
924  // account for that.  See bugs 4446381, 4468289, 4497237.
925  if (C->need_stack_bang(framesize)) {
926    MacroAssembler masm(&cbuf);
927    masm.generate_stack_overflow_check(framesize);
928    need_nop = false;
929  }
930
931  // We always push rbp so that on return to interpreter rbp will be
932  // restored correctly and we can correct the stack.
933  emit_opcode(cbuf, 0x50 | RBP_enc);
934
935  if (VerifyStackAtCalls) {
936    // Majik cookie to verify stack depth
937    emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
938    emit_d32(cbuf, 0xbadb100d);
939    framesize -= wordSize; // Remove 2 for cookie
940    need_nop = false;
941  }
942
943  if (framesize) {
944    emit_opcode(cbuf, Assembler::REX_W);
945    if (framesize < 0x80) {
946      emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
947      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
948      emit_d8(cbuf, framesize);
949      if (need_nop) {
950        emit_opcode(cbuf, 0x90); // nop
951      }
952    } else {
953      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
954      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
955      emit_d32(cbuf, framesize);
956    }
957  }
958
959  C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
960
961#ifdef ASSERT
962  if (VerifyStackAtCalls) {
963    Label L;
964    MacroAssembler masm(&cbuf);
965    masm.pushq(rax);
966    masm.movq(rax, rsp);
967    masm.andq(rax, StackAlignmentInBytes-1);
968    masm.cmpq(rax, StackAlignmentInBytes-wordSize);
969    masm.popq(rax);
970    masm.jcc(Assembler::equal, L);
971    masm.stop("Stack is not properly aligned!");
972    masm.bind(L);
973  }
974#endif
975}
976
977uint MachPrologNode::size(PhaseRegAlloc* ra_) const
978{
979  return MachNode::size(ra_); // too many variables; just compute it
980                              // the hard way
981}
982
983int MachPrologNode::reloc() const
984{
985  return 0; // a large enough number
986}
987
988//=============================================================================
989#ifndef PRODUCT
990void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
991{
992  Compile* C = ra_->C;
993  int framesize = C->frame_slots() << LogBytesPerInt;
994  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
995  // Remove word for return adr already pushed
996  // and RBP
997  framesize -= 2*wordSize;
998
999  if (framesize) {
1000    st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1001    st->print("\t");
1002  }
1003
1004  st->print_cr("popq\trbp");
1005  if (do_polling() && C->is_method_compilation()) {
1006    st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1007                  "# Safepoint: poll for GC");
1008    st->print("\t");
1009  }
1010}
1011#endif
1012
1013void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1014{
1015  Compile* C = ra_->C;
1016  int framesize = C->frame_slots() << LogBytesPerInt;
1017  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1018  // Remove word for return adr already pushed
1019  // and RBP
1020  framesize -= 2*wordSize;
1021
1022  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1023
1024  if (framesize) {
1025    emit_opcode(cbuf, Assembler::REX_W);
1026    if (framesize < 0x80) {
1027      emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1028      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1029      emit_d8(cbuf, framesize);
1030    } else {
1031      emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1032      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1033      emit_d32(cbuf, framesize);
1034    }
1035  }
1036
1037  // popq rbp
1038  emit_opcode(cbuf, 0x58 | RBP_enc);
1039
1040  if (do_polling() && C->is_method_compilation()) {
1041    // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1042    // XXX reg_mem doesn't support RIP-relative addressing yet
1043    cbuf.set_inst_mark();
1044    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1045    emit_opcode(cbuf, 0x85); // testl
1046    emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1047    // cbuf.inst_mark() is beginning of instruction
1048    emit_d32_reloc(cbuf, os::get_polling_page());
1049//                    relocInfo::poll_return_type,
1050  }
1051}
1052
1053uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1054{
1055  Compile* C = ra_->C;
1056  int framesize = C->frame_slots() << LogBytesPerInt;
1057  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1058  // Remove word for return adr already pushed
1059  // and RBP
1060  framesize -= 2*wordSize;
1061
1062  uint size = 0;
1063
1064  if (do_polling() && C->is_method_compilation()) {
1065    size += 6;
1066  }
1067
1068  // count popq rbp
1069  size++;
1070
1071  if (framesize) {
1072    if (framesize < 0x80) {
1073      size += 4;
1074    } else if (framesize) {
1075      size += 7;
1076    }
1077  }
1078
1079  return size;
1080}
1081
1082int MachEpilogNode::reloc() const
1083{
1084  return 2; // a large enough number
1085}
1086
1087const Pipeline* MachEpilogNode::pipeline() const
1088{
1089  return MachNode::pipeline_class();
1090}
1091
1092int MachEpilogNode::safepoint_offset() const
1093{
1094  return 0;
1095}
1096
1097//=============================================================================
1098
1099enum RC {
1100  rc_bad,
1101  rc_int,
1102  rc_float,
1103  rc_stack
1104};
1105
1106static enum RC rc_class(OptoReg::Name reg)
1107{
1108  if( !OptoReg::is_valid(reg)  ) return rc_bad;
1109
1110  if (OptoReg::is_stack(reg)) return rc_stack;
1111
1112  VMReg r = OptoReg::as_VMReg(reg);
1113
1114  if (r->is_Register()) return rc_int;
1115
1116  assert(r->is_XMMRegister(), "must be");
1117  return rc_float;
1118}
1119
1120uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1121                                       PhaseRegAlloc* ra_,
1122                                       bool do_size,
1123                                       outputStream* st) const
1124{
1125
1126  // Get registers to move
1127  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1128  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1129  OptoReg::Name dst_second = ra_->get_reg_second(this);
1130  OptoReg::Name dst_first = ra_->get_reg_first(this);
1131
1132  enum RC src_second_rc = rc_class(src_second);
1133  enum RC src_first_rc = rc_class(src_first);
1134  enum RC dst_second_rc = rc_class(dst_second);
1135  enum RC dst_first_rc = rc_class(dst_first);
1136
1137  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1138         "must move at least 1 register" );
1139
1140  if (src_first == dst_first && src_second == dst_second) {
1141    // Self copy, no move
1142    return 0;
1143  } else if (src_first_rc == rc_stack) {
1144    // mem ->
1145    if (dst_first_rc == rc_stack) {
1146      // mem -> mem
1147      assert(src_second != dst_first, "overlap");
1148      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1149          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1150        // 64-bit
1151        int src_offset = ra_->reg2offset(src_first);
1152        int dst_offset = ra_->reg2offset(dst_first);
1153        if (cbuf) {
1154          emit_opcode(*cbuf, 0xFF);
1155          encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1156
1157          emit_opcode(*cbuf, 0x8F);
1158          encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1159
1160#ifndef PRODUCT
1161        } else if (!do_size) {
1162          st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1163                     "popq    [rsp + #%d]",
1164                     src_offset,
1165                     dst_offset);
1166#endif
1167        }
1168        return
1169          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1170          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1171      } else {
1172        // 32-bit
1173        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1174        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1175        // No pushl/popl, so:
1176        int src_offset = ra_->reg2offset(src_first);
1177        int dst_offset = ra_->reg2offset(dst_first);
1178        if (cbuf) {
1179          emit_opcode(*cbuf, Assembler::REX_W);
1180          emit_opcode(*cbuf, 0x89);
1181          emit_opcode(*cbuf, 0x44);
1182          emit_opcode(*cbuf, 0x24);
1183          emit_opcode(*cbuf, 0xF8);
1184
1185          emit_opcode(*cbuf, 0x8B);
1186          encode_RegMem(*cbuf,
1187                        RAX_enc,
1188                        RSP_enc, 0x4, 0, src_offset,
1189                        false);
1190
1191          emit_opcode(*cbuf, 0x89);
1192          encode_RegMem(*cbuf,
1193                        RAX_enc,
1194                        RSP_enc, 0x4, 0, dst_offset,
1195                        false);
1196
1197          emit_opcode(*cbuf, Assembler::REX_W);
1198          emit_opcode(*cbuf, 0x8B);
1199          emit_opcode(*cbuf, 0x44);
1200          emit_opcode(*cbuf, 0x24);
1201          emit_opcode(*cbuf, 0xF8);
1202
1203#ifndef PRODUCT
1204        } else if (!do_size) {
1205          st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1206                     "movl    rax, [rsp + #%d]\n\t"
1207                     "movl    [rsp + #%d], rax\n\t"
1208                     "movq    rax, [rsp - #8]",
1209                     src_offset,
1210                     dst_offset);
1211#endif
1212        }
1213        return
1214          5 + // movq
1215          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1216          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1217          5; // movq
1218      }
1219    } else if (dst_first_rc == rc_int) {
1220      // mem -> gpr
1221      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1222          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1223        // 64-bit
1224        int offset = ra_->reg2offset(src_first);
1225        if (cbuf) {
1226          if (Matcher::_regEncode[dst_first] < 8) {
1227            emit_opcode(*cbuf, Assembler::REX_W);
1228          } else {
1229            emit_opcode(*cbuf, Assembler::REX_WR);
1230          }
1231          emit_opcode(*cbuf, 0x8B);
1232          encode_RegMem(*cbuf,
1233                        Matcher::_regEncode[dst_first],
1234                        RSP_enc, 0x4, 0, offset,
1235                        false);
1236#ifndef PRODUCT
1237        } else if (!do_size) {
1238          st->print("movq    %s, [rsp + #%d]\t# spill",
1239                     Matcher::regName[dst_first],
1240                     offset);
1241#endif
1242        }
1243        return
1244          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1245      } else {
1246        // 32-bit
1247        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1248        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1249        int offset = ra_->reg2offset(src_first);
1250        if (cbuf) {
1251          if (Matcher::_regEncode[dst_first] >= 8) {
1252            emit_opcode(*cbuf, Assembler::REX_R);
1253          }
1254          emit_opcode(*cbuf, 0x8B);
1255          encode_RegMem(*cbuf,
1256                        Matcher::_regEncode[dst_first],
1257                        RSP_enc, 0x4, 0, offset,
1258                        false);
1259#ifndef PRODUCT
1260        } else if (!do_size) {
1261          st->print("movl    %s, [rsp + #%d]\t# spill",
1262                     Matcher::regName[dst_first],
1263                     offset);
1264#endif
1265        }
1266        return
1267          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1268          ((Matcher::_regEncode[dst_first] < 8)
1269           ? 3
1270           : 4); // REX
1271      }
1272    } else if (dst_first_rc == rc_float) {
1273      // mem-> xmm
1274      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1275          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1276        // 64-bit
1277        int offset = ra_->reg2offset(src_first);
1278        if (cbuf) {
1279          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1280          if (Matcher::_regEncode[dst_first] >= 8) {
1281            emit_opcode(*cbuf, Assembler::REX_R);
1282          }
1283          emit_opcode(*cbuf, 0x0F);
1284          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1285          encode_RegMem(*cbuf,
1286                        Matcher::_regEncode[dst_first],
1287                        RSP_enc, 0x4, 0, offset,
1288                        false);
1289#ifndef PRODUCT
1290        } else if (!do_size) {
1291          st->print("%s  %s, [rsp + #%d]\t# spill",
1292                     UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1293                     Matcher::regName[dst_first],
1294                     offset);
1295#endif
1296        }
1297        return
1298          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1299          ((Matcher::_regEncode[dst_first] < 8)
1300           ? 5
1301           : 6); // REX
1302      } else {
1303        // 32-bit
1304        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1305        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1306        int offset = ra_->reg2offset(src_first);
1307        if (cbuf) {
1308          emit_opcode(*cbuf, 0xF3);
1309          if (Matcher::_regEncode[dst_first] >= 8) {
1310            emit_opcode(*cbuf, Assembler::REX_R);
1311          }
1312          emit_opcode(*cbuf, 0x0F);
1313          emit_opcode(*cbuf, 0x10);
1314          encode_RegMem(*cbuf,
1315                        Matcher::_regEncode[dst_first],
1316                        RSP_enc, 0x4, 0, offset,
1317                        false);
1318#ifndef PRODUCT
1319        } else if (!do_size) {
1320          st->print("movss   %s, [rsp + #%d]\t# spill",
1321                     Matcher::regName[dst_first],
1322                     offset);
1323#endif
1324        }
1325        return
1326          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1327          ((Matcher::_regEncode[dst_first] < 8)
1328           ? 5
1329           : 6); // REX
1330      }
1331    }
1332  } else if (src_first_rc == rc_int) {
1333    // gpr ->
1334    if (dst_first_rc == rc_stack) {
1335      // gpr -> mem
1336      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1337          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1338        // 64-bit
1339        int offset = ra_->reg2offset(dst_first);
1340        if (cbuf) {
1341          if (Matcher::_regEncode[src_first] < 8) {
1342            emit_opcode(*cbuf, Assembler::REX_W);
1343          } else {
1344            emit_opcode(*cbuf, Assembler::REX_WR);
1345          }
1346          emit_opcode(*cbuf, 0x89);
1347          encode_RegMem(*cbuf,
1348                        Matcher::_regEncode[src_first],
1349                        RSP_enc, 0x4, 0, offset,
1350                        false);
1351#ifndef PRODUCT
1352        } else if (!do_size) {
1353          st->print("movq    [rsp + #%d], %s\t# spill",
1354                     offset,
1355                     Matcher::regName[src_first]);
1356#endif
1357        }
1358        return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1359      } else {
1360        // 32-bit
1361        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1362        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1363        int offset = ra_->reg2offset(dst_first);
1364        if (cbuf) {
1365          if (Matcher::_regEncode[src_first] >= 8) {
1366            emit_opcode(*cbuf, Assembler::REX_R);
1367          }
1368          emit_opcode(*cbuf, 0x89);
1369          encode_RegMem(*cbuf,
1370                        Matcher::_regEncode[src_first],
1371                        RSP_enc, 0x4, 0, offset,
1372                        false);
1373#ifndef PRODUCT
1374        } else if (!do_size) {
1375          st->print("movl    [rsp + #%d], %s\t# spill",
1376                     offset,
1377                     Matcher::regName[src_first]);
1378#endif
1379        }
1380        return
1381          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1382          ((Matcher::_regEncode[src_first] < 8)
1383           ? 3
1384           : 4); // REX
1385      }
1386    } else if (dst_first_rc == rc_int) {
1387      // gpr -> gpr
1388      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1389          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1390        // 64-bit
1391        if (cbuf) {
1392          if (Matcher::_regEncode[dst_first] < 8) {
1393            if (Matcher::_regEncode[src_first] < 8) {
1394              emit_opcode(*cbuf, Assembler::REX_W);
1395            } else {
1396              emit_opcode(*cbuf, Assembler::REX_WB);
1397            }
1398          } else {
1399            if (Matcher::_regEncode[src_first] < 8) {
1400              emit_opcode(*cbuf, Assembler::REX_WR);
1401            } else {
1402              emit_opcode(*cbuf, Assembler::REX_WRB);
1403            }
1404          }
1405          emit_opcode(*cbuf, 0x8B);
1406          emit_rm(*cbuf, 0x3,
1407                  Matcher::_regEncode[dst_first] & 7,
1408                  Matcher::_regEncode[src_first] & 7);
1409#ifndef PRODUCT
1410        } else if (!do_size) {
1411          st->print("movq    %s, %s\t# spill",
1412                     Matcher::regName[dst_first],
1413                     Matcher::regName[src_first]);
1414#endif
1415        }
1416        return 3; // REX
1417      } else {
1418        // 32-bit
1419        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1420        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1421        if (cbuf) {
1422          if (Matcher::_regEncode[dst_first] < 8) {
1423            if (Matcher::_regEncode[src_first] >= 8) {
1424              emit_opcode(*cbuf, Assembler::REX_B);
1425            }
1426          } else {
1427            if (Matcher::_regEncode[src_first] < 8) {
1428              emit_opcode(*cbuf, Assembler::REX_R);
1429            } else {
1430              emit_opcode(*cbuf, Assembler::REX_RB);
1431            }
1432          }
1433          emit_opcode(*cbuf, 0x8B);
1434          emit_rm(*cbuf, 0x3,
1435                  Matcher::_regEncode[dst_first] & 7,
1436                  Matcher::_regEncode[src_first] & 7);
1437#ifndef PRODUCT
1438        } else if (!do_size) {
1439          st->print("movl    %s, %s\t# spill",
1440                     Matcher::regName[dst_first],
1441                     Matcher::regName[src_first]);
1442#endif
1443        }
1444        return
1445          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1446          ? 2
1447          : 3; // REX
1448      }
1449    } else if (dst_first_rc == rc_float) {
1450      // gpr -> xmm
1451      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1452          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1453        // 64-bit
1454        if (cbuf) {
1455          emit_opcode(*cbuf, 0x66);
1456          if (Matcher::_regEncode[dst_first] < 8) {
1457            if (Matcher::_regEncode[src_first] < 8) {
1458              emit_opcode(*cbuf, Assembler::REX_W);
1459            } else {
1460              emit_opcode(*cbuf, Assembler::REX_WB);
1461            }
1462          } else {
1463            if (Matcher::_regEncode[src_first] < 8) {
1464              emit_opcode(*cbuf, Assembler::REX_WR);
1465            } else {
1466              emit_opcode(*cbuf, Assembler::REX_WRB);
1467            }
1468          }
1469          emit_opcode(*cbuf, 0x0F);
1470          emit_opcode(*cbuf, 0x6E);
1471          emit_rm(*cbuf, 0x3,
1472                  Matcher::_regEncode[dst_first] & 7,
1473                  Matcher::_regEncode[src_first] & 7);
1474#ifndef PRODUCT
1475        } else if (!do_size) {
1476          st->print("movdq   %s, %s\t# spill",
1477                     Matcher::regName[dst_first],
1478                     Matcher::regName[src_first]);
1479#endif
1480        }
1481        return 5; // REX
1482      } else {
1483        // 32-bit
1484        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1485        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1486        if (cbuf) {
1487          emit_opcode(*cbuf, 0x66);
1488          if (Matcher::_regEncode[dst_first] < 8) {
1489            if (Matcher::_regEncode[src_first] >= 8) {
1490              emit_opcode(*cbuf, Assembler::REX_B);
1491            }
1492          } else {
1493            if (Matcher::_regEncode[src_first] < 8) {
1494              emit_opcode(*cbuf, Assembler::REX_R);
1495            } else {
1496              emit_opcode(*cbuf, Assembler::REX_RB);
1497            }
1498          }
1499          emit_opcode(*cbuf, 0x0F);
1500          emit_opcode(*cbuf, 0x6E);
1501          emit_rm(*cbuf, 0x3,
1502                  Matcher::_regEncode[dst_first] & 7,
1503                  Matcher::_regEncode[src_first] & 7);
1504#ifndef PRODUCT
1505        } else if (!do_size) {
1506          st->print("movdl   %s, %s\t# spill",
1507                     Matcher::regName[dst_first],
1508                     Matcher::regName[src_first]);
1509#endif
1510        }
1511        return
1512          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1513          ? 4
1514          : 5; // REX
1515      }
1516    }
1517  } else if (src_first_rc == rc_float) {
1518    // xmm ->
1519    if (dst_first_rc == rc_stack) {
1520      // xmm -> mem
1521      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1522          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1523        // 64-bit
1524        int offset = ra_->reg2offset(dst_first);
1525        if (cbuf) {
1526          emit_opcode(*cbuf, 0xF2);
1527          if (Matcher::_regEncode[src_first] >= 8) {
1528              emit_opcode(*cbuf, Assembler::REX_R);
1529          }
1530          emit_opcode(*cbuf, 0x0F);
1531          emit_opcode(*cbuf, 0x11);
1532          encode_RegMem(*cbuf,
1533                        Matcher::_regEncode[src_first],
1534                        RSP_enc, 0x4, 0, offset,
1535                        false);
1536#ifndef PRODUCT
1537        } else if (!do_size) {
1538          st->print("movsd   [rsp + #%d], %s\t# spill",
1539                     offset,
1540                     Matcher::regName[src_first]);
1541#endif
1542        }
1543        return
1544          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1545          ((Matcher::_regEncode[src_first] < 8)
1546           ? 5
1547           : 6); // REX
1548      } else {
1549        // 32-bit
1550        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1551        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1552        int offset = ra_->reg2offset(dst_first);
1553        if (cbuf) {
1554          emit_opcode(*cbuf, 0xF3);
1555          if (Matcher::_regEncode[src_first] >= 8) {
1556              emit_opcode(*cbuf, Assembler::REX_R);
1557          }
1558          emit_opcode(*cbuf, 0x0F);
1559          emit_opcode(*cbuf, 0x11);
1560          encode_RegMem(*cbuf,
1561                        Matcher::_regEncode[src_first],
1562                        RSP_enc, 0x4, 0, offset,
1563                        false);
1564#ifndef PRODUCT
1565        } else if (!do_size) {
1566          st->print("movss   [rsp + #%d], %s\t# spill",
1567                     offset,
1568                     Matcher::regName[src_first]);
1569#endif
1570        }
1571        return
1572          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1573          ((Matcher::_regEncode[src_first] < 8)
1574           ? 5
1575           : 6); // REX
1576      }
1577    } else if (dst_first_rc == rc_int) {
1578      // xmm -> gpr
1579      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1580          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1581        // 64-bit
1582        if (cbuf) {
1583          emit_opcode(*cbuf, 0x66);
1584          if (Matcher::_regEncode[dst_first] < 8) {
1585            if (Matcher::_regEncode[src_first] < 8) {
1586              emit_opcode(*cbuf, Assembler::REX_W);
1587            } else {
1588              emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1589            }
1590          } else {
1591            if (Matcher::_regEncode[src_first] < 8) {
1592              emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1593            } else {
1594              emit_opcode(*cbuf, Assembler::REX_WRB);
1595            }
1596          }
1597          emit_opcode(*cbuf, 0x0F);
1598          emit_opcode(*cbuf, 0x7E);
1599          emit_rm(*cbuf, 0x3,
1600                  Matcher::_regEncode[dst_first] & 7,
1601                  Matcher::_regEncode[src_first] & 7);
1602#ifndef PRODUCT
1603        } else if (!do_size) {
1604          st->print("movdq   %s, %s\t# spill",
1605                     Matcher::regName[dst_first],
1606                     Matcher::regName[src_first]);
1607#endif
1608        }
1609        return 5; // REX
1610      } else {
1611        // 32-bit
1612        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1613        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1614        if (cbuf) {
1615          emit_opcode(*cbuf, 0x66);
1616          if (Matcher::_regEncode[dst_first] < 8) {
1617            if (Matcher::_regEncode[src_first] >= 8) {
1618              emit_opcode(*cbuf, Assembler::REX_R); // attention!
1619            }
1620          } else {
1621            if (Matcher::_regEncode[src_first] < 8) {
1622              emit_opcode(*cbuf, Assembler::REX_B); // attention!
1623            } else {
1624              emit_opcode(*cbuf, Assembler::REX_RB);
1625            }
1626          }
1627          emit_opcode(*cbuf, 0x0F);
1628          emit_opcode(*cbuf, 0x7E);
1629          emit_rm(*cbuf, 0x3,
1630                  Matcher::_regEncode[dst_first] & 7,
1631                  Matcher::_regEncode[src_first] & 7);
1632#ifndef PRODUCT
1633        } else if (!do_size) {
1634          st->print("movdl   %s, %s\t# spill",
1635                     Matcher::regName[dst_first],
1636                     Matcher::regName[src_first]);
1637#endif
1638        }
1639        return
1640          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1641          ? 4
1642          : 5; // REX
1643      }
1644    } else if (dst_first_rc == rc_float) {
1645      // xmm -> xmm
1646      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1647          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1648        // 64-bit
1649        if (cbuf) {
1650          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1651          if (Matcher::_regEncode[dst_first] < 8) {
1652            if (Matcher::_regEncode[src_first] >= 8) {
1653              emit_opcode(*cbuf, Assembler::REX_B);
1654            }
1655          } else {
1656            if (Matcher::_regEncode[src_first] < 8) {
1657              emit_opcode(*cbuf, Assembler::REX_R);
1658            } else {
1659              emit_opcode(*cbuf, Assembler::REX_RB);
1660            }
1661          }
1662          emit_opcode(*cbuf, 0x0F);
1663          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1664          emit_rm(*cbuf, 0x3,
1665                  Matcher::_regEncode[dst_first] & 7,
1666                  Matcher::_regEncode[src_first] & 7);
1667#ifndef PRODUCT
1668        } else if (!do_size) {
1669          st->print("%s  %s, %s\t# spill",
1670                     UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1671                     Matcher::regName[dst_first],
1672                     Matcher::regName[src_first]);
1673#endif
1674        }
1675        return
1676          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1677          ? 4
1678          : 5; // REX
1679      } else {
1680        // 32-bit
1681        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1682        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1683        if (cbuf) {
1684          if (!UseXmmRegToRegMoveAll)
1685            emit_opcode(*cbuf, 0xF3);
1686          if (Matcher::_regEncode[dst_first] < 8) {
1687            if (Matcher::_regEncode[src_first] >= 8) {
1688              emit_opcode(*cbuf, Assembler::REX_B);
1689            }
1690          } else {
1691            if (Matcher::_regEncode[src_first] < 8) {
1692              emit_opcode(*cbuf, Assembler::REX_R);
1693            } else {
1694              emit_opcode(*cbuf, Assembler::REX_RB);
1695            }
1696          }
1697          emit_opcode(*cbuf, 0x0F);
1698          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1699          emit_rm(*cbuf, 0x3,
1700                  Matcher::_regEncode[dst_first] & 7,
1701                  Matcher::_regEncode[src_first] & 7);
1702#ifndef PRODUCT
1703        } else if (!do_size) {
1704          st->print("%s  %s, %s\t# spill",
1705                     UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1706                     Matcher::regName[dst_first],
1707                     Matcher::regName[src_first]);
1708#endif
1709        }
1710        return
1711          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1712          ? (UseXmmRegToRegMoveAll ? 3 : 4)
1713          : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1714      }
1715    }
1716  }
1717
1718  assert(0," foo ");
1719  Unimplemented();
1720
1721  return 0;
1722}
1723
1724#ifndef PRODUCT
1725void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1726{
1727  implementation(NULL, ra_, false, st);
1728}
1729#endif
1730
1731void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1732{
1733  implementation(&cbuf, ra_, false, NULL);
1734}
1735
1736uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1737{
1738  return implementation(NULL, ra_, true, NULL);
1739}
1740
1741//=============================================================================
1742#ifndef PRODUCT
1743void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1744{
1745  st->print("nop \t# %d bytes pad for loops and calls", _count);
1746}
1747#endif
1748
1749void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1750{
1751  MacroAssembler _masm(&cbuf);
1752  __ nop(_count);
1753}
1754
1755uint MachNopNode::size(PhaseRegAlloc*) const
1756{
1757  return _count;
1758}
1759
1760
1761//=============================================================================
1762#ifndef PRODUCT
1763void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1764{
1765  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1766  int reg = ra_->get_reg_first(this);
1767  st->print("leaq    %s, [rsp + #%d]\t# box lock",
1768            Matcher::regName[reg], offset);
1769}
1770#endif
1771
1772void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1773{
1774  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1775  int reg = ra_->get_encode(this);
1776  if (offset >= 0x80) {
1777    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779    emit_rm(cbuf, 0x2, reg & 7, 0x04);
1780    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781    emit_d32(cbuf, offset);
1782  } else {
1783    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1784    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1785    emit_rm(cbuf, 0x1, reg & 7, 0x04);
1786    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1787    emit_d8(cbuf, offset);
1788  }
1789}
1790
1791uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1792{
1793  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1794  return (offset < 0x80) ? 5 : 8; // REX
1795}
1796
1797//=============================================================================
1798
1799// emit call stub, compiled java to interpreter
1800void emit_java_to_interp(CodeBuffer& cbuf)
1801{
1802  // Stub is fixed up when the corresponding call is converted from
1803  // calling compiled code to calling interpreted code.
1804  // movq rbx, 0
1805  // jmp -5 # to self
1806
1807  address mark = cbuf.inst_mark();  // get mark within main instrs section
1808
1809  // Note that the code buffer's inst_mark is always relative to insts.
1810  // That's why we must use the macroassembler to generate a stub.
1811  MacroAssembler _masm(&cbuf);
1812
1813  address base =
1814  __ start_a_stub(Compile::MAX_stubs_size);
1815  if (base == NULL)  return;  // CodeBuffer::expand failed
1816  // static stub relocation stores the instruction address of the call
1817  __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1818  // static stub relocation also tags the methodOop in the code-stream.
1819  __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1820  __ jump(RuntimeAddress(__ pc()));
1821
1822  // Update current stubs pointer and restore code_end.
1823  __ end_a_stub();
1824}
1825
1826// size of call stub, compiled java to interpretor
1827uint size_java_to_interp()
1828{
1829  return 15;  // movq (1+1+8); jmp (1+4)
1830}
1831
1832// relocation entries for call stub, compiled java to interpretor
1833uint reloc_java_to_interp()
1834{
1835  return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1836}
1837
1838//=============================================================================
1839#ifndef PRODUCT
1840void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1841{
1842  if (UseCompressedOops) {
1843    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1844    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1845    st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1846  } else {
1847    st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1848                 "# Inline cache check", oopDesc::klass_offset_in_bytes());
1849  }
1850  st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1851  st->print_cr("\tnop");
1852  if (!OptoBreakpoint) {
1853    st->print_cr("\tnop");
1854  }
1855}
1856#endif
1857
1858void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1859{
1860  MacroAssembler masm(&cbuf);
1861#ifdef ASSERT
1862  uint code_size = cbuf.code_size();
1863#endif
1864  if (UseCompressedOops) {
1865    masm.load_klass(rscratch1, j_rarg0);
1866    masm.cmpq(rax, rscratch1);
1867  } else {
1868    masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1869  }
1870
1871  masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1872
1873  /* WARNING these NOPs are critical so that verified entry point is properly
1874     aligned for patching by NativeJump::patch_verified_entry() */
1875  int nops_cnt = 1;
1876  if (!OptoBreakpoint) {
1877    // Leave space for int3
1878     nops_cnt += 1;
1879  }
1880  if (UseCompressedOops) {
1881    // ??? divisible by 4 is aligned?
1882    nops_cnt += 1;
1883  }
1884  masm.nop(nops_cnt);
1885
1886  assert(cbuf.code_size() - code_size == size(ra_),
1887         "checking code size of inline cache node");
1888}
1889
1890uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1891{
1892  if (UseCompressedOops) {
1893    return OptoBreakpoint ? 19 : 20;
1894  } else {
1895    return OptoBreakpoint ? 11 : 12;
1896  }
1897}
1898
1899
1900//=============================================================================
1901uint size_exception_handler()
1902{
1903  // NativeCall instruction size is the same as NativeJump.
1904  // Note that this value is also credited (in output.cpp) to
1905  // the size of the code section.
1906  return NativeJump::instruction_size;
1907}
1908
1909// Emit exception handler code.
1910int emit_exception_handler(CodeBuffer& cbuf)
1911{
1912
1913  // Note that the code buffer's inst_mark is always relative to insts.
1914  // That's why we must use the macroassembler to generate a handler.
1915  MacroAssembler _masm(&cbuf);
1916  address base =
1917  __ start_a_stub(size_exception_handler());
1918  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1919  int offset = __ offset();
1920  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1921  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1922  __ end_a_stub();
1923  return offset;
1924}
1925
1926uint size_deopt_handler()
1927{
1928  // three 5 byte instructions
1929  return 15;
1930}
1931
1932// Emit deopt handler code.
1933int emit_deopt_handler(CodeBuffer& cbuf)
1934{
1935
1936  // Note that the code buffer's inst_mark is always relative to insts.
1937  // That's why we must use the macroassembler to generate a handler.
1938  MacroAssembler _masm(&cbuf);
1939  address base =
1940  __ start_a_stub(size_deopt_handler());
1941  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1942  int offset = __ offset();
1943  address the_pc = (address) __ pc();
1944  Label next;
1945  // push a "the_pc" on the stack without destroying any registers
1946  // as they all may be live.
1947
1948  // push address of "next"
1949  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1950  __ bind(next);
1951  // adjust it so it matches "the_pc"
1952  __ subq(Address(rsp, 0), __ offset() - offset);
1953  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1954  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1955  __ end_a_stub();
1956  return offset;
1957}
1958
1959static void emit_double_constant(CodeBuffer& cbuf, double x) {
1960  int mark = cbuf.insts()->mark_off();
1961  MacroAssembler _masm(&cbuf);
1962  address double_address = __ double_constant(x);
1963  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1964  emit_d32_reloc(cbuf,
1965                 (int) (double_address - cbuf.code_end() - 4),
1966                 internal_word_Relocation::spec(double_address),
1967                 RELOC_DISP32);
1968}
1969
1970static void emit_float_constant(CodeBuffer& cbuf, float x) {
1971  int mark = cbuf.insts()->mark_off();
1972  MacroAssembler _masm(&cbuf);
1973  address float_address = __ float_constant(x);
1974  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1975  emit_d32_reloc(cbuf,
1976                 (int) (float_address - cbuf.code_end() - 4),
1977                 internal_word_Relocation::spec(float_address),
1978                 RELOC_DISP32);
1979}
1980
1981
1982int Matcher::regnum_to_fpu_offset(int regnum)
1983{
1984  return regnum - 32; // The FP registers are in the second chunk
1985}
1986
1987// This is UltraSparc specific, true just means we have fast l2f conversion
1988const bool Matcher::convL2FSupported(void) {
1989  return true;
1990}
1991
1992// Vector width in bytes
1993const uint Matcher::vector_width_in_bytes(void) {
1994  return 8;
1995}
1996
1997// Vector ideal reg
1998const uint Matcher::vector_ideal_reg(void) {
1999  return Op_RegD;
2000}
2001
2002// Is this branch offset short enough that a short branch can be used?
2003//
2004// NOTE: If the platform does not provide any short branch variants, then
2005//       this method should return false for offset 0.
2006bool Matcher::is_short_branch_offset(int offset)
2007{
2008  return -0x80 <= offset && offset < 0x80;
2009}
2010
2011const bool Matcher::isSimpleConstant64(jlong value) {
2012  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2013  //return value == (int) value;  // Cf. storeImmL and immL32.
2014
2015  // Probably always true, even if a temp register is required.
2016  return true;
2017}
2018
2019// The ecx parameter to rep stosq for the ClearArray node is in words.
2020const bool Matcher::init_array_count_is_in_bytes = false;
2021
2022// Threshold size for cleararray.
2023const int Matcher::init_array_short_size = 8 * BytesPerLong;
2024
2025// Should the Matcher clone shifts on addressing modes, expecting them
2026// to be subsumed into complex addressing expressions or compute them
2027// into registers?  True for Intel but false for most RISCs
2028const bool Matcher::clone_shift_expressions = true;
2029
2030// Is it better to copy float constants, or load them directly from
2031// memory?  Intel can load a float constant from a direct address,
2032// requiring no extra registers.  Most RISCs will have to materialize
2033// an address into a register first, so they would do better to copy
2034// the constant from stack.
2035const bool Matcher::rematerialize_float_constants = true; // XXX
2036
2037// If CPU can load and store mis-aligned doubles directly then no
2038// fixup is needed.  Else we split the double into 2 integer pieces
2039// and move it piece-by-piece.  Only happens when passing doubles into
2040// C code as the Java calling convention forces doubles to be aligned.
2041const bool Matcher::misaligned_doubles_ok = true;
2042
2043// No-op on amd64
2044void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2045
2046// Advertise here if the CPU requires explicit rounding operations to
2047// implement the UseStrictFP mode.
2048const bool Matcher::strict_fp_requires_explicit_rounding = true;
2049
2050// Do floats take an entire double register or just half?
2051const bool Matcher::float_in_double = true;
2052// Do ints take an entire long register or just half?
2053const bool Matcher::int_in_long = true;
2054
2055// Return whether or not this register is ever used as an argument.
2056// This function is used on startup to build the trampoline stubs in
2057// generateOptoStub.  Registers not mentioned will be killed by the VM
2058// call in the trampoline, and arguments in those registers not be
2059// available to the callee.
2060bool Matcher::can_be_java_arg(int reg)
2061{
2062  return
2063    reg ==  RDI_num || reg ==  RDI_H_num ||
2064    reg ==  RSI_num || reg ==  RSI_H_num ||
2065    reg ==  RDX_num || reg ==  RDX_H_num ||
2066    reg ==  RCX_num || reg ==  RCX_H_num ||
2067    reg ==   R8_num || reg ==   R8_H_num ||
2068    reg ==   R9_num || reg ==   R9_H_num ||
2069    reg ==  R12_num || reg ==  R12_H_num ||
2070    reg == XMM0_num || reg == XMM0_H_num ||
2071    reg == XMM1_num || reg == XMM1_H_num ||
2072    reg == XMM2_num || reg == XMM2_H_num ||
2073    reg == XMM3_num || reg == XMM3_H_num ||
2074    reg == XMM4_num || reg == XMM4_H_num ||
2075    reg == XMM5_num || reg == XMM5_H_num ||
2076    reg == XMM6_num || reg == XMM6_H_num ||
2077    reg == XMM7_num || reg == XMM7_H_num;
2078}
2079
2080bool Matcher::is_spillable_arg(int reg)
2081{
2082  return can_be_java_arg(reg);
2083}
2084
2085// Register for DIVI projection of divmodI
2086RegMask Matcher::divI_proj_mask() {
2087  return INT_RAX_REG_mask;
2088}
2089
2090// Register for MODI projection of divmodI
2091RegMask Matcher::modI_proj_mask() {
2092  return INT_RDX_REG_mask;
2093}
2094
2095// Register for DIVL projection of divmodL
2096RegMask Matcher::divL_proj_mask() {
2097  return LONG_RAX_REG_mask;
2098}
2099
2100// Register for MODL projection of divmodL
2101RegMask Matcher::modL_proj_mask() {
2102  return LONG_RDX_REG_mask;
2103}
2104
2105static Address build_address(int b, int i, int s, int d) {
2106  Register index = as_Register(i);
2107  Address::ScaleFactor scale = (Address::ScaleFactor)s;
2108  if (index == rsp) {
2109    index = noreg;
2110    scale = Address::no_scale;
2111  }
2112  Address addr(as_Register(b), index, scale, d);
2113  return addr;
2114}
2115
2116%}
2117
2118//----------ENCODING BLOCK-----------------------------------------------------
2119// This block specifies the encoding classes used by the compiler to
2120// output byte streams.  Encoding classes are parameterized macros
2121// used by Machine Instruction Nodes in order to generate the bit
2122// encoding of the instruction.  Operands specify their base encoding
2123// interface with the interface keyword.  There are currently
2124// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2125// COND_INTER.  REG_INTER causes an operand to generate a function
2126// which returns its register number when queried.  CONST_INTER causes
2127// an operand to generate a function which returns the value of the
2128// constant when queried.  MEMORY_INTER causes an operand to generate
2129// four functions which return the Base Register, the Index Register,
2130// the Scale Value, and the Offset Value of the operand when queried.
2131// COND_INTER causes an operand to generate six functions which return
2132// the encoding code (ie - encoding bits for the instruction)
2133// associated with each basic boolean condition for a conditional
2134// instruction.
2135//
2136// Instructions specify two basic values for encoding.  Again, a
2137// function is available to check if the constant displacement is an
2138// oop. They use the ins_encode keyword to specify their encoding
2139// classes (which must be a sequence of enc_class names, and their
2140// parameters, specified in the encoding block), and they use the
2141// opcode keyword to specify, in order, their primary, secondary, and
2142// tertiary opcode.  Only the opcode sections which a particular
2143// instruction needs for encoding need to be specified.
2144encode %{
2145  // Build emit functions for each basic byte or larger field in the
2146  // intel encoding scheme (opcode, rm, sib, immediate), and call them
2147  // from C++ code in the enc_class source block.  Emit functions will
2148  // live in the main source block for now.  In future, we can
2149  // generalize this by adding a syntax that specifies the sizes of
2150  // fields in an order, so that the adlc can build the emit functions
2151  // automagically
2152
2153  // Emit primary opcode
2154  enc_class OpcP
2155  %{
2156    emit_opcode(cbuf, $primary);
2157  %}
2158
2159  // Emit secondary opcode
2160  enc_class OpcS
2161  %{
2162    emit_opcode(cbuf, $secondary);
2163  %}
2164
2165  // Emit tertiary opcode
2166  enc_class OpcT
2167  %{
2168    emit_opcode(cbuf, $tertiary);
2169  %}
2170
2171  // Emit opcode directly
2172  enc_class Opcode(immI d8)
2173  %{
2174    emit_opcode(cbuf, $d8$$constant);
2175  %}
2176
2177  // Emit size prefix
2178  enc_class SizePrefix
2179  %{
2180    emit_opcode(cbuf, 0x66);
2181  %}
2182
2183  enc_class reg(rRegI reg)
2184  %{
2185    emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2186  %}
2187
2188  enc_class reg_reg(rRegI dst, rRegI src)
2189  %{
2190    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2191  %}
2192
2193  enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2194  %{
2195    emit_opcode(cbuf, $opcode$$constant);
2196    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2197  %}
2198
2199  enc_class cmpfp_fixup()
2200  %{
2201    // jnp,s exit
2202    emit_opcode(cbuf, 0x7B);
2203    emit_d8(cbuf, 0x0A);
2204
2205    // pushfq
2206    emit_opcode(cbuf, 0x9C);
2207
2208    // andq $0xffffff2b, (%rsp)
2209    emit_opcode(cbuf, Assembler::REX_W);
2210    emit_opcode(cbuf, 0x81);
2211    emit_opcode(cbuf, 0x24);
2212    emit_opcode(cbuf, 0x24);
2213    emit_d32(cbuf, 0xffffff2b);
2214
2215    // popfq
2216    emit_opcode(cbuf, 0x9D);
2217
2218    // nop (target for branch to avoid branch to branch)
2219    emit_opcode(cbuf, 0x90);
2220  %}
2221
2222  enc_class cmpfp3(rRegI dst)
2223  %{
2224    int dstenc = $dst$$reg;
2225
2226    // movl $dst, -1
2227    if (dstenc >= 8) {
2228      emit_opcode(cbuf, Assembler::REX_B);
2229    }
2230    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2231    emit_d32(cbuf, -1);
2232
2233    // jp,s done
2234    emit_opcode(cbuf, 0x7A);
2235    emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2236
2237    // jb,s done
2238    emit_opcode(cbuf, 0x72);
2239    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2240
2241    // setne $dst
2242    if (dstenc >= 4) {
2243      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2244    }
2245    emit_opcode(cbuf, 0x0F);
2246    emit_opcode(cbuf, 0x95);
2247    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2248
2249    // movzbl $dst, $dst
2250    if (dstenc >= 4) {
2251      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2252    }
2253    emit_opcode(cbuf, 0x0F);
2254    emit_opcode(cbuf, 0xB6);
2255    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2256  %}
2257
2258  enc_class cdql_enc(no_rax_rdx_RegI div)
2259  %{
2260    // Full implementation of Java idiv and irem; checks for
2261    // special case as described in JVM spec., p.243 & p.271.
2262    //
2263    //         normal case                           special case
2264    //
2265    // input : rax: dividend                         min_int
2266    //         reg: divisor                          -1
2267    //
2268    // output: rax: quotient  (= rax idiv reg)       min_int
2269    //         rdx: remainder (= rax irem reg)       0
2270    //
2271    //  Code sequnce:
2272    //
2273    //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2274    //    5:   75 07/08                jne    e <normal>
2275    //    7:   33 d2                   xor    %edx,%edx
2276    //  [div >= 8 -> offset + 1]
2277    //  [REX_B]
2278    //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2279    //    c:   74 03/04                je     11 <done>
2280    // 000000000000000e <normal>:
2281    //    e:   99                      cltd
2282    //  [div >= 8 -> offset + 1]
2283    //  [REX_B]
2284    //    f:   f7 f9                   idiv   $div
2285    // 0000000000000011 <done>:
2286
2287    // cmp    $0x80000000,%eax
2288    emit_opcode(cbuf, 0x3d);
2289    emit_d8(cbuf, 0x00);
2290    emit_d8(cbuf, 0x00);
2291    emit_d8(cbuf, 0x00);
2292    emit_d8(cbuf, 0x80);
2293
2294    // jne    e <normal>
2295    emit_opcode(cbuf, 0x75);
2296    emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2297
2298    // xor    %edx,%edx
2299    emit_opcode(cbuf, 0x33);
2300    emit_d8(cbuf, 0xD2);
2301
2302    // cmp    $0xffffffffffffffff,%ecx
2303    if ($div$$reg >= 8) {
2304      emit_opcode(cbuf, Assembler::REX_B);
2305    }
2306    emit_opcode(cbuf, 0x83);
2307    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2308    emit_d8(cbuf, 0xFF);
2309
2310    // je     11 <done>
2311    emit_opcode(cbuf, 0x74);
2312    emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2313
2314    // <normal>
2315    // cltd
2316    emit_opcode(cbuf, 0x99);
2317
2318    // idivl (note: must be emitted by the user of this rule)
2319    // <done>
2320  %}
2321
2322  enc_class cdqq_enc(no_rax_rdx_RegL div)
2323  %{
2324    // Full implementation of Java ldiv and lrem; checks for
2325    // special case as described in JVM spec., p.243 & p.271.
2326    //
2327    //         normal case                           special case
2328    //
2329    // input : rax: dividend                         min_long
2330    //         reg: divisor                          -1
2331    //
2332    // output: rax: quotient  (= rax idiv reg)       min_long
2333    //         rdx: remainder (= rax irem reg)       0
2334    //
2335    //  Code sequnce:
2336    //
2337    //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2338    //    7:   00 00 80
2339    //    a:   48 39 d0                cmp    %rdx,%rax
2340    //    d:   75 08                   jne    17 <normal>
2341    //    f:   33 d2                   xor    %edx,%edx
2342    //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2343    //   15:   74 05                   je     1c <done>
2344    // 0000000000000017 <normal>:
2345    //   17:   48 99                   cqto
2346    //   19:   48 f7 f9                idiv   $div
2347    // 000000000000001c <done>:
2348
2349    // mov    $0x8000000000000000,%rdx
2350    emit_opcode(cbuf, Assembler::REX_W);
2351    emit_opcode(cbuf, 0xBA);
2352    emit_d8(cbuf, 0x00);
2353    emit_d8(cbuf, 0x00);
2354    emit_d8(cbuf, 0x00);
2355    emit_d8(cbuf, 0x00);
2356    emit_d8(cbuf, 0x00);
2357    emit_d8(cbuf, 0x00);
2358    emit_d8(cbuf, 0x00);
2359    emit_d8(cbuf, 0x80);
2360
2361    // cmp    %rdx,%rax
2362    emit_opcode(cbuf, Assembler::REX_W);
2363    emit_opcode(cbuf, 0x39);
2364    emit_d8(cbuf, 0xD0);
2365
2366    // jne    17 <normal>
2367    emit_opcode(cbuf, 0x75);
2368    emit_d8(cbuf, 0x08);
2369
2370    // xor    %edx,%edx
2371    emit_opcode(cbuf, 0x33);
2372    emit_d8(cbuf, 0xD2);
2373
2374    // cmp    $0xffffffffffffffff,$div
2375    emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2376    emit_opcode(cbuf, 0x83);
2377    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2378    emit_d8(cbuf, 0xFF);
2379
2380    // je     1e <done>
2381    emit_opcode(cbuf, 0x74);
2382    emit_d8(cbuf, 0x05);
2383
2384    // <normal>
2385    // cqto
2386    emit_opcode(cbuf, Assembler::REX_W);
2387    emit_opcode(cbuf, 0x99);
2388
2389    // idivq (note: must be emitted by the user of this rule)
2390    // <done>
2391  %}
2392
2393  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2394  enc_class OpcSE(immI imm)
2395  %{
2396    // Emit primary opcode and set sign-extend bit
2397    // Check for 8-bit immediate, and set sign extend bit in opcode
2398    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2399      emit_opcode(cbuf, $primary | 0x02);
2400    } else {
2401      // 32-bit immediate
2402      emit_opcode(cbuf, $primary);
2403    }
2404  %}
2405
2406  enc_class OpcSErm(rRegI dst, immI imm)
2407  %{
2408    // OpcSEr/m
2409    int dstenc = $dst$$reg;
2410    if (dstenc >= 8) {
2411      emit_opcode(cbuf, Assembler::REX_B);
2412      dstenc -= 8;
2413    }
2414    // Emit primary opcode and set sign-extend bit
2415    // Check for 8-bit immediate, and set sign extend bit in opcode
2416    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2417      emit_opcode(cbuf, $primary | 0x02);
2418    } else {
2419      // 32-bit immediate
2420      emit_opcode(cbuf, $primary);
2421    }
2422    // Emit r/m byte with secondary opcode, after primary opcode.
2423    emit_rm(cbuf, 0x3, $secondary, dstenc);
2424  %}
2425
2426  enc_class OpcSErm_wide(rRegL dst, immI imm)
2427  %{
2428    // OpcSEr/m
2429    int dstenc = $dst$$reg;
2430    if (dstenc < 8) {
2431      emit_opcode(cbuf, Assembler::REX_W);
2432    } else {
2433      emit_opcode(cbuf, Assembler::REX_WB);
2434      dstenc -= 8;
2435    }
2436    // Emit primary opcode and set sign-extend bit
2437    // Check for 8-bit immediate, and set sign extend bit in opcode
2438    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2439      emit_opcode(cbuf, $primary | 0x02);
2440    } else {
2441      // 32-bit immediate
2442      emit_opcode(cbuf, $primary);
2443    }
2444    // Emit r/m byte with secondary opcode, after primary opcode.
2445    emit_rm(cbuf, 0x3, $secondary, dstenc);
2446  %}
2447
2448  enc_class Con8or32(immI imm)
2449  %{
2450    // Check for 8-bit immediate, and set sign extend bit in opcode
2451    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2452      $$$emit8$imm$$constant;
2453    } else {
2454      // 32-bit immediate
2455      $$$emit32$imm$$constant;
2456    }
2457  %}
2458
2459  enc_class Lbl(label labl)
2460  %{
2461    // JMP, CALL
2462    Label* l = $labl$$label;
2463    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2464  %}
2465
2466  enc_class LblShort(label labl)
2467  %{
2468    // JMP, CALL
2469    Label* l = $labl$$label;
2470    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2471    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2472    emit_d8(cbuf, disp);
2473  %}
2474
2475  enc_class opc2_reg(rRegI dst)
2476  %{
2477    // BSWAP
2478    emit_cc(cbuf, $secondary, $dst$$reg);
2479  %}
2480
2481  enc_class opc3_reg(rRegI dst)
2482  %{
2483    // BSWAP
2484    emit_cc(cbuf, $tertiary, $dst$$reg);
2485  %}
2486
2487  enc_class reg_opc(rRegI div)
2488  %{
2489    // INC, DEC, IDIV, IMOD, JMP indirect, ...
2490    emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2491  %}
2492
2493  enc_class Jcc(cmpOp cop, label labl)
2494  %{
2495    // JCC
2496    Label* l = $labl$$label;
2497    $$$emit8$primary;
2498    emit_cc(cbuf, $secondary, $cop$$cmpcode);
2499    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2500  %}
2501
2502  enc_class JccShort (cmpOp cop, label labl)
2503  %{
2504  // JCC
2505    Label *l = $labl$$label;
2506    emit_cc(cbuf, $primary, $cop$$cmpcode);
2507    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2508    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2509    emit_d8(cbuf, disp);
2510  %}
2511
2512  enc_class enc_cmov(cmpOp cop)
2513  %{
2514    // CMOV
2515    $$$emit8$primary;
2516    emit_cc(cbuf, $secondary, $cop$$cmpcode);
2517  %}
2518
2519  enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2520  %{
2521    // Invert sense of branch from sense of cmov
2522    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2523    emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2524                  ? (UseXmmRegToRegMoveAll ? 3 : 4)
2525                  : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2526    // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2527    if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2528    if ($dst$$reg < 8) {
2529      if ($src$$reg >= 8) {
2530        emit_opcode(cbuf, Assembler::REX_B);
2531      }
2532    } else {
2533      if ($src$$reg < 8) {
2534        emit_opcode(cbuf, Assembler::REX_R);
2535      } else {
2536        emit_opcode(cbuf, Assembler::REX_RB);
2537      }
2538    }
2539    emit_opcode(cbuf, 0x0F);
2540    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2541    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2542  %}
2543
2544  enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2545  %{
2546    // Invert sense of branch from sense of cmov
2547    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2548    emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2549
2550    //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2551    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2552    if ($dst$$reg < 8) {
2553      if ($src$$reg >= 8) {
2554        emit_opcode(cbuf, Assembler::REX_B);
2555      }
2556    } else {
2557      if ($src$$reg < 8) {
2558        emit_opcode(cbuf, Assembler::REX_R);
2559      } else {
2560        emit_opcode(cbuf, Assembler::REX_RB);
2561      }
2562    }
2563    emit_opcode(cbuf, 0x0F);
2564    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2565    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2566  %}
2567
2568  enc_class enc_PartialSubtypeCheck()
2569  %{
2570    Register Rrdi = as_Register(RDI_enc); // result register
2571    Register Rrax = as_Register(RAX_enc); // super class
2572    Register Rrcx = as_Register(RCX_enc); // killed
2573    Register Rrsi = as_Register(RSI_enc); // sub class
2574    Label hit, miss, cmiss;
2575
2576    MacroAssembler _masm(&cbuf);
2577    // Compare super with sub directly, since super is not in its own SSA.
2578    // The compiler used to emit this test, but we fold it in here,
2579    // to allow platform-specific tweaking on sparc.
2580    __ cmpq(Rrax, Rrsi);
2581    __ jcc(Assembler::equal, hit);
2582#ifndef PRODUCT
2583    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
2584    __ incrementl(Address(Rrcx, 0));
2585#endif //PRODUCT
2586    __ movq(Rrdi, Address(Rrsi,
2587                          sizeof(oopDesc) +
2588                          Klass::secondary_supers_offset_in_bytes()));
2589    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
2590    __ addq(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2591    if (UseCompressedOops) {
2592      __ encode_heap_oop(Rrax);
2593      __ repne_scanl();
2594      __ jcc(Assembler::notEqual, cmiss);
2595      __ decode_heap_oop(Rrax);
2596      __ movq(Address(Rrsi,
2597                      sizeof(oopDesc) +
2598                      Klass::secondary_super_cache_offset_in_bytes()),
2599              Rrax);
2600      __ jmp(hit);
2601      __ bind(cmiss);
2602      __ decode_heap_oop(Rrax);
2603      __ jmp(miss);
2604    } else {
2605      __ repne_scanq();
2606      __ jcc(Assembler::notEqual, miss);
2607      __ movq(Address(Rrsi,
2608                      sizeof(oopDesc) +
2609                      Klass::secondary_super_cache_offset_in_bytes()),
2610              Rrax);
2611    }
2612    __ bind(hit);
2613    if ($primary) {
2614      __ xorq(Rrdi, Rrdi);
2615    }
2616    __ bind(miss);
2617  %}
2618
2619  enc_class Java_To_Interpreter(method meth)
2620  %{
2621    // CALL Java_To_Interpreter
2622    // This is the instruction starting address for relocation info.
2623    cbuf.set_inst_mark();
2624    $$$emit8$primary;
2625    // CALL directly to the runtime
2626    emit_d32_reloc(cbuf,
2627                   (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2628                   runtime_call_Relocation::spec(),
2629                   RELOC_DISP32);
2630  %}
2631
2632  enc_class Java_Static_Call(method meth)
2633  %{
2634    // JAVA STATIC CALL
2635    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2636    // determine who we intended to call.
2637    cbuf.set_inst_mark();
2638    $$$emit8$primary;
2639
2640    if (!_method) {
2641      emit_d32_reloc(cbuf,
2642                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2643                     runtime_call_Relocation::spec(),
2644                     RELOC_DISP32);
2645    } else if (_optimized_virtual) {
2646      emit_d32_reloc(cbuf,
2647                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2648                     opt_virtual_call_Relocation::spec(),
2649                     RELOC_DISP32);
2650    } else {
2651      emit_d32_reloc(cbuf,
2652                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2653                     static_call_Relocation::spec(),
2654                     RELOC_DISP32);
2655    }
2656    if (_method) {
2657      // Emit stub for static call
2658      emit_java_to_interp(cbuf);
2659    }
2660  %}
2661
2662  enc_class Java_Dynamic_Call(method meth)
2663  %{
2664    // JAVA DYNAMIC CALL
2665    // !!!!!
2666    // Generate  "movq rax, -1", placeholder instruction to load oop-info
2667    // emit_call_dynamic_prologue( cbuf );
2668    cbuf.set_inst_mark();
2669
2670    // movq rax, -1
2671    emit_opcode(cbuf, Assembler::REX_W);
2672    emit_opcode(cbuf, 0xB8 | RAX_enc);
2673    emit_d64_reloc(cbuf,
2674                   (int64_t) Universe::non_oop_word(),
2675                   oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2676    address virtual_call_oop_addr = cbuf.inst_mark();
2677    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2678    // who we intended to call.
2679    cbuf.set_inst_mark();
2680    $$$emit8$primary;
2681    emit_d32_reloc(cbuf,
2682                   (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2683                   virtual_call_Relocation::spec(virtual_call_oop_addr),
2684                   RELOC_DISP32);
2685  %}
2686
2687  enc_class Java_Compiled_Call(method meth)
2688  %{
2689    // JAVA COMPILED CALL
2690    int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2691
2692    // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2693    // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2694
2695    // callq *disp(%rax)
2696    cbuf.set_inst_mark();
2697    $$$emit8$primary;
2698    if (disp < 0x80) {
2699      emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2700      emit_d8(cbuf, disp); // Displacement
2701    } else {
2702      emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2703      emit_d32(cbuf, disp); // Displacement
2704    }
2705  %}
2706
2707  enc_class reg_opc_imm(rRegI dst, immI8 shift)
2708  %{
2709    // SAL, SAR, SHR
2710    int dstenc = $dst$$reg;
2711    if (dstenc >= 8) {
2712      emit_opcode(cbuf, Assembler::REX_B);
2713      dstenc -= 8;
2714    }
2715    $$$emit8$primary;
2716    emit_rm(cbuf, 0x3, $secondary, dstenc);
2717    $$$emit8$shift$$constant;
2718  %}
2719
2720  enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2721  %{
2722    // SAL, SAR, SHR
2723    int dstenc = $dst$$reg;
2724    if (dstenc < 8) {
2725      emit_opcode(cbuf, Assembler::REX_W);
2726    } else {
2727      emit_opcode(cbuf, Assembler::REX_WB);
2728      dstenc -= 8;
2729    }
2730    $$$emit8$primary;
2731    emit_rm(cbuf, 0x3, $secondary, dstenc);
2732    $$$emit8$shift$$constant;
2733  %}
2734
2735  enc_class load_immI(rRegI dst, immI src)
2736  %{
2737    int dstenc = $dst$$reg;
2738    if (dstenc >= 8) {
2739      emit_opcode(cbuf, Assembler::REX_B);
2740      dstenc -= 8;
2741    }
2742    emit_opcode(cbuf, 0xB8 | dstenc);
2743    $$$emit32$src$$constant;
2744  %}
2745
2746  enc_class load_immL(rRegL dst, immL src)
2747  %{
2748    int dstenc = $dst$$reg;
2749    if (dstenc < 8) {
2750      emit_opcode(cbuf, Assembler::REX_W);
2751    } else {
2752      emit_opcode(cbuf, Assembler::REX_WB);
2753      dstenc -= 8;
2754    }
2755    emit_opcode(cbuf, 0xB8 | dstenc);
2756    emit_d64(cbuf, $src$$constant);
2757  %}
2758
2759  enc_class load_immUL32(rRegL dst, immUL32 src)
2760  %{
2761    // same as load_immI, but this time we care about zeroes in the high word
2762    int dstenc = $dst$$reg;
2763    if (dstenc >= 8) {
2764      emit_opcode(cbuf, Assembler::REX_B);
2765      dstenc -= 8;
2766    }
2767    emit_opcode(cbuf, 0xB8 | dstenc);
2768    $$$emit32$src$$constant;
2769  %}
2770
2771  enc_class load_immL32(rRegL dst, immL32 src)
2772  %{
2773    int dstenc = $dst$$reg;
2774    if (dstenc < 8) {
2775      emit_opcode(cbuf, Assembler::REX_W);
2776    } else {
2777      emit_opcode(cbuf, Assembler::REX_WB);
2778      dstenc -= 8;
2779    }
2780    emit_opcode(cbuf, 0xC7);
2781    emit_rm(cbuf, 0x03, 0x00, dstenc);
2782    $$$emit32$src$$constant;
2783  %}
2784
2785  enc_class load_immP31(rRegP dst, immP32 src)
2786  %{
2787    // same as load_immI, but this time we care about zeroes in the high word
2788    int dstenc = $dst$$reg;
2789    if (dstenc >= 8) {
2790      emit_opcode(cbuf, Assembler::REX_B);
2791      dstenc -= 8;
2792    }
2793    emit_opcode(cbuf, 0xB8 | dstenc);
2794    $$$emit32$src$$constant;
2795  %}
2796
2797  enc_class load_immP(rRegP dst, immP src)
2798  %{
2799    int dstenc = $dst$$reg;
2800    if (dstenc < 8) {
2801      emit_opcode(cbuf, Assembler::REX_W);
2802    } else {
2803      emit_opcode(cbuf, Assembler::REX_WB);
2804      dstenc -= 8;
2805    }
2806    emit_opcode(cbuf, 0xB8 | dstenc);
2807    // This next line should be generated from ADLC
2808    if ($src->constant_is_oop()) {
2809      emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2810    } else {
2811      emit_d64(cbuf, $src$$constant);
2812    }
2813  %}
2814
2815  enc_class load_immF(regF dst, immF con)
2816  %{
2817    // XXX reg_mem doesn't support RIP-relative addressing yet
2818    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2819    emit_float_constant(cbuf, $con$$constant);
2820  %}
2821
2822  enc_class load_immD(regD dst, immD con)
2823  %{
2824    // XXX reg_mem doesn't support RIP-relative addressing yet
2825    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2826    emit_double_constant(cbuf, $con$$constant);
2827  %}
2828
2829  enc_class load_conF (regF dst, immF con) %{    // Load float constant
2830    emit_opcode(cbuf, 0xF3);
2831    if ($dst$$reg >= 8) {
2832      emit_opcode(cbuf, Assembler::REX_R);
2833    }
2834    emit_opcode(cbuf, 0x0F);
2835    emit_opcode(cbuf, 0x10);
2836    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2837    emit_float_constant(cbuf, $con$$constant);
2838  %}
2839
2840  enc_class load_conD (regD dst, immD con) %{    // Load double constant
2841    // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2842    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2843    if ($dst$$reg >= 8) {
2844      emit_opcode(cbuf, Assembler::REX_R);
2845    }
2846    emit_opcode(cbuf, 0x0F);
2847    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2848    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2849    emit_double_constant(cbuf, $con$$constant);
2850  %}
2851
2852  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2853  enc_class enc_copy(rRegI dst, rRegI src)
2854  %{
2855    encode_copy(cbuf, $dst$$reg, $src$$reg);
2856  %}
2857
2858  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2859  enc_class enc_CopyXD( RegD dst, RegD src ) %{
2860    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2861  %}
2862
2863  enc_class enc_copy_always(rRegI dst, rRegI src)
2864  %{
2865    int srcenc = $src$$reg;
2866    int dstenc = $dst$$reg;
2867
2868    if (dstenc < 8) {
2869      if (srcenc >= 8) {
2870        emit_opcode(cbuf, Assembler::REX_B);
2871        srcenc -= 8;
2872      }
2873    } else {
2874      if (srcenc < 8) {
2875        emit_opcode(cbuf, Assembler::REX_R);
2876      } else {
2877        emit_opcode(cbuf, Assembler::REX_RB);
2878        srcenc -= 8;
2879      }
2880      dstenc -= 8;
2881    }
2882
2883    emit_opcode(cbuf, 0x8B);
2884    emit_rm(cbuf, 0x3, dstenc, srcenc);
2885  %}
2886
2887  enc_class enc_copy_wide(rRegL dst, rRegL src)
2888  %{
2889    int srcenc = $src$$reg;
2890    int dstenc = $dst$$reg;
2891
2892    if (dstenc != srcenc) {
2893      if (dstenc < 8) {
2894        if (srcenc < 8) {
2895          emit_opcode(cbuf, Assembler::REX_W);
2896        } else {
2897          emit_opcode(cbuf, Assembler::REX_WB);
2898          srcenc -= 8;
2899        }
2900      } else {
2901        if (srcenc < 8) {
2902          emit_opcode(cbuf, Assembler::REX_WR);
2903        } else {
2904          emit_opcode(cbuf, Assembler::REX_WRB);
2905          srcenc -= 8;
2906        }
2907        dstenc -= 8;
2908      }
2909      emit_opcode(cbuf, 0x8B);
2910      emit_rm(cbuf, 0x3, dstenc, srcenc);
2911    }
2912  %}
2913
2914  enc_class Con32(immI src)
2915  %{
2916    // Output immediate
2917    $$$emit32$src$$constant;
2918  %}
2919
2920  enc_class Con64(immL src)
2921  %{
2922    // Output immediate
2923    emit_d64($src$$constant);
2924  %}
2925
2926  enc_class Con32F_as_bits(immF src)
2927  %{
2928    // Output Float immediate bits
2929    jfloat jf = $src$$constant;
2930    jint jf_as_bits = jint_cast(jf);
2931    emit_d32(cbuf, jf_as_bits);
2932  %}
2933
2934  enc_class Con16(immI src)
2935  %{
2936    // Output immediate
2937    $$$emit16$src$$constant;
2938  %}
2939
2940  // How is this different from Con32??? XXX
2941  enc_class Con_d32(immI src)
2942  %{
2943    emit_d32(cbuf,$src$$constant);
2944  %}
2945
2946  enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2947    // Output immediate memory reference
2948    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2949    emit_d32(cbuf, 0x00);
2950  %}
2951
2952  enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2953    MacroAssembler masm(&cbuf);
2954
2955    Register switch_reg = as_Register($switch_val$$reg);
2956    Register dest_reg   = as_Register($dest$$reg);
2957    address table_base  = masm.address_table_constant(_index2label);
2958
2959    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2960    // to do that and the compiler is using that register as one it can allocate.
2961    // So we build it all by hand.
2962    // Address index(noreg, switch_reg, Address::times_1);
2963    // ArrayAddress dispatch(table, index);
2964
2965    Address dispatch(dest_reg, switch_reg, Address::times_1);
2966
2967    masm.lea(dest_reg, InternalAddress(table_base));
2968    masm.jmp(dispatch);
2969  %}
2970
2971  enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2972    MacroAssembler masm(&cbuf);
2973
2974    Register switch_reg = as_Register($switch_val$$reg);
2975    Register dest_reg   = as_Register($dest$$reg);
2976    address table_base  = masm.address_table_constant(_index2label);
2977
2978    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2979    // to do that and the compiler is using that register as one it can allocate.
2980    // So we build it all by hand.
2981    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2982    // ArrayAddress dispatch(table, index);
2983
2984    Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2985
2986    masm.lea(dest_reg, InternalAddress(table_base));
2987    masm.jmp(dispatch);
2988  %}
2989
2990  enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2991    MacroAssembler masm(&cbuf);
2992
2993    Register switch_reg = as_Register($switch_val$$reg);
2994    Register dest_reg   = as_Register($dest$$reg);
2995    address table_base  = masm.address_table_constant(_index2label);
2996
2997    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2998    // to do that and the compiler is using that register as one it can allocate.
2999    // So we build it all by hand.
3000    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3001    // ArrayAddress dispatch(table, index);
3002
3003    Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3004    masm.lea(dest_reg, InternalAddress(table_base));
3005    masm.jmp(dispatch);
3006
3007  %}
3008
3009  enc_class lock_prefix()
3010  %{
3011    if (os::is_MP()) {
3012      emit_opcode(cbuf, 0xF0); // lock
3013    }
3014  %}
3015
3016  enc_class REX_mem(memory mem)
3017  %{
3018    if ($mem$$base >= 8) {
3019      if ($mem$$index < 8) {
3020        emit_opcode(cbuf, Assembler::REX_B);
3021      } else {
3022        emit_opcode(cbuf, Assembler::REX_XB);
3023      }
3024    } else {
3025      if ($mem$$index >= 8) {
3026        emit_opcode(cbuf, Assembler::REX_X);
3027      }
3028    }
3029  %}
3030
3031  enc_class REX_mem_wide(memory mem)
3032  %{
3033    if ($mem$$base >= 8) {
3034      if ($mem$$index < 8) {
3035        emit_opcode(cbuf, Assembler::REX_WB);
3036      } else {
3037        emit_opcode(cbuf, Assembler::REX_WXB);
3038      }
3039    } else {
3040      if ($mem$$index < 8) {
3041        emit_opcode(cbuf, Assembler::REX_W);
3042      } else {
3043        emit_opcode(cbuf, Assembler::REX_WX);
3044      }
3045    }
3046  %}
3047
3048  // for byte regs
3049  enc_class REX_breg(rRegI reg)
3050  %{
3051    if ($reg$$reg >= 4) {
3052      emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3053    }
3054  %}
3055
3056  // for byte regs
3057  enc_class REX_reg_breg(rRegI dst, rRegI src)
3058  %{
3059    if ($dst$$reg < 8) {
3060      if ($src$$reg >= 4) {
3061        emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3062      }
3063    } else {
3064      if ($src$$reg < 8) {
3065        emit_opcode(cbuf, Assembler::REX_R);
3066      } else {
3067        emit_opcode(cbuf, Assembler::REX_RB);
3068      }
3069    }
3070  %}
3071
3072  // for byte regs
3073  enc_class REX_breg_mem(rRegI reg, memory mem)
3074  %{
3075    if ($reg$$reg < 8) {
3076      if ($mem$$base < 8) {
3077        if ($mem$$index >= 8) {
3078          emit_opcode(cbuf, Assembler::REX_X);
3079        } else if ($reg$$reg >= 4) {
3080          emit_opcode(cbuf, Assembler::REX);
3081        }
3082      } else {
3083        if ($mem$$index < 8) {
3084          emit_opcode(cbuf, Assembler::REX_B);
3085        } else {
3086          emit_opcode(cbuf, Assembler::REX_XB);
3087        }
3088      }
3089    } else {
3090      if ($mem$$base < 8) {
3091        if ($mem$$index < 8) {
3092          emit_opcode(cbuf, Assembler::REX_R);
3093        } else {
3094          emit_opcode(cbuf, Assembler::REX_RX);
3095        }
3096      } else {
3097        if ($mem$$index < 8) {
3098          emit_opcode(cbuf, Assembler::REX_RB);
3099        } else {
3100          emit_opcode(cbuf, Assembler::REX_RXB);
3101        }
3102      }
3103    }
3104  %}
3105
3106  enc_class REX_reg(rRegI reg)
3107  %{
3108    if ($reg$$reg >= 8) {
3109      emit_opcode(cbuf, Assembler::REX_B);
3110    }
3111  %}
3112
3113  enc_class REX_reg_wide(rRegI reg)
3114  %{
3115    if ($reg$$reg < 8) {
3116      emit_opcode(cbuf, Assembler::REX_W);
3117    } else {
3118      emit_opcode(cbuf, Assembler::REX_WB);
3119    }
3120  %}
3121
3122  enc_class REX_reg_reg(rRegI dst, rRegI src)
3123  %{
3124    if ($dst$$reg < 8) {
3125      if ($src$$reg >= 8) {
3126        emit_opcode(cbuf, Assembler::REX_B);
3127      }
3128    } else {
3129      if ($src$$reg < 8) {
3130        emit_opcode(cbuf, Assembler::REX_R);
3131      } else {
3132        emit_opcode(cbuf, Assembler::REX_RB);
3133      }
3134    }
3135  %}
3136
3137  enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3138  %{
3139    if ($dst$$reg < 8) {
3140      if ($src$$reg < 8) {
3141        emit_opcode(cbuf, Assembler::REX_W);
3142      } else {
3143        emit_opcode(cbuf, Assembler::REX_WB);
3144      }
3145    } else {
3146      if ($src$$reg < 8) {
3147        emit_opcode(cbuf, Assembler::REX_WR);
3148      } else {
3149        emit_opcode(cbuf, Assembler::REX_WRB);
3150      }
3151    }
3152  %}
3153
3154  enc_class REX_reg_mem(rRegI reg, memory mem)
3155  %{
3156    if ($reg$$reg < 8) {
3157      if ($mem$$base < 8) {
3158        if ($mem$$index >= 8) {
3159          emit_opcode(cbuf, Assembler::REX_X);
3160        }
3161      } else {
3162        if ($mem$$index < 8) {
3163          emit_opcode(cbuf, Assembler::REX_B);
3164        } else {
3165          emit_opcode(cbuf, Assembler::REX_XB);
3166        }
3167      }
3168    } else {
3169      if ($mem$$base < 8) {
3170        if ($mem$$index < 8) {
3171          emit_opcode(cbuf, Assembler::REX_R);
3172        } else {
3173          emit_opcode(cbuf, Assembler::REX_RX);
3174        }
3175      } else {
3176        if ($mem$$index < 8) {
3177          emit_opcode(cbuf, Assembler::REX_RB);
3178        } else {
3179          emit_opcode(cbuf, Assembler::REX_RXB);
3180        }
3181      }
3182    }
3183  %}
3184
3185  enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3186  %{
3187    if ($reg$$reg < 8) {
3188      if ($mem$$base < 8) {
3189        if ($mem$$index < 8) {
3190          emit_opcode(cbuf, Assembler::REX_W);
3191        } else {
3192          emit_opcode(cbuf, Assembler::REX_WX);
3193        }
3194      } else {
3195        if ($mem$$index < 8) {
3196          emit_opcode(cbuf, Assembler::REX_WB);
3197        } else {
3198          emit_opcode(cbuf, Assembler::REX_WXB);
3199        }
3200      }
3201    } else {
3202      if ($mem$$base < 8) {
3203        if ($mem$$index < 8) {
3204          emit_opcode(cbuf, Assembler::REX_WR);
3205        } else {
3206          emit_opcode(cbuf, Assembler::REX_WRX);
3207        }
3208      } else {
3209        if ($mem$$index < 8) {
3210          emit_opcode(cbuf, Assembler::REX_WRB);
3211        } else {
3212          emit_opcode(cbuf, Assembler::REX_WRXB);
3213        }
3214      }
3215    }
3216  %}
3217
3218  enc_class reg_mem(rRegI ereg, memory mem)
3219  %{
3220    // High registers handle in encode_RegMem
3221    int reg = $ereg$$reg;
3222    int base = $mem$$base;
3223    int index = $mem$$index;
3224    int scale = $mem$$scale;
3225    int disp = $mem$$disp;
3226    bool disp_is_oop = $mem->disp_is_oop();
3227
3228    encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3229  %}
3230
3231  enc_class RM_opc_mem(immI rm_opcode, memory mem)
3232  %{
3233    int rm_byte_opcode = $rm_opcode$$constant;
3234
3235    // High registers handle in encode_RegMem
3236    int base = $mem$$base;
3237    int index = $mem$$index;
3238    int scale = $mem$$scale;
3239    int displace = $mem$$disp;
3240
3241    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3242                                            // working with static
3243                                            // globals
3244    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3245                  disp_is_oop);
3246  %}
3247
3248  enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3249  %{
3250    int reg_encoding = $dst$$reg;
3251    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3252    int index        = 0x04;            // 0x04 indicates no index
3253    int scale        = 0x00;            // 0x00 indicates no scale
3254    int displace     = $src1$$constant; // 0x00 indicates no displacement
3255    bool disp_is_oop = false;
3256    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3257                  disp_is_oop);
3258  %}
3259
3260  enc_class neg_reg(rRegI dst)
3261  %{
3262    int dstenc = $dst$$reg;
3263    if (dstenc >= 8) {
3264      emit_opcode(cbuf, Assembler::REX_B);
3265      dstenc -= 8;
3266    }
3267    // NEG $dst
3268    emit_opcode(cbuf, 0xF7);
3269    emit_rm(cbuf, 0x3, 0x03, dstenc);
3270  %}
3271
3272  enc_class neg_reg_wide(rRegI dst)
3273  %{
3274    int dstenc = $dst$$reg;
3275    if (dstenc < 8) {
3276      emit_opcode(cbuf, Assembler::REX_W);
3277    } else {
3278      emit_opcode(cbuf, Assembler::REX_WB);
3279      dstenc -= 8;
3280    }
3281    // NEG $dst
3282    emit_opcode(cbuf, 0xF7);
3283    emit_rm(cbuf, 0x3, 0x03, dstenc);
3284  %}
3285
3286  enc_class setLT_reg(rRegI dst)
3287  %{
3288    int dstenc = $dst$$reg;
3289    if (dstenc >= 8) {
3290      emit_opcode(cbuf, Assembler::REX_B);
3291      dstenc -= 8;
3292    } else if (dstenc >= 4) {
3293      emit_opcode(cbuf, Assembler::REX);
3294    }
3295    // SETLT $dst
3296    emit_opcode(cbuf, 0x0F);
3297    emit_opcode(cbuf, 0x9C);
3298    emit_rm(cbuf, 0x3, 0x0, dstenc);
3299  %}
3300
3301  enc_class setNZ_reg(rRegI dst)
3302  %{
3303    int dstenc = $dst$$reg;
3304    if (dstenc >= 8) {
3305      emit_opcode(cbuf, Assembler::REX_B);
3306      dstenc -= 8;
3307    } else if (dstenc >= 4) {
3308      emit_opcode(cbuf, Assembler::REX);
3309    }
3310    // SETNZ $dst
3311    emit_opcode(cbuf, 0x0F);
3312    emit_opcode(cbuf, 0x95);
3313    emit_rm(cbuf, 0x3, 0x0, dstenc);
3314  %}
3315
3316  enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3317                       rcx_RegI tmp)
3318  %{
3319    // cadd_cmpLT
3320
3321    int tmpReg = $tmp$$reg;
3322
3323    int penc = $p$$reg;
3324    int qenc = $q$$reg;
3325    int yenc = $y$$reg;
3326
3327    // subl $p,$q
3328    if (penc < 8) {
3329      if (qenc >= 8) {
3330        emit_opcode(cbuf, Assembler::REX_B);
3331      }
3332    } else {
3333      if (qenc < 8) {
3334        emit_opcode(cbuf, Assembler::REX_R);
3335      } else {
3336        emit_opcode(cbuf, Assembler::REX_RB);
3337      }
3338    }
3339    emit_opcode(cbuf, 0x2B);
3340    emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3341
3342    // sbbl $tmp, $tmp
3343    emit_opcode(cbuf, 0x1B);
3344    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3345
3346    // andl $tmp, $y
3347    if (yenc >= 8) {
3348      emit_opcode(cbuf, Assembler::REX_B);
3349    }
3350    emit_opcode(cbuf, 0x23);
3351    emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3352
3353    // addl $p,$tmp
3354    if (penc >= 8) {
3355        emit_opcode(cbuf, Assembler::REX_R);
3356    }
3357    emit_opcode(cbuf, 0x03);
3358    emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3359  %}
3360
3361  // Compare the lonogs and set -1, 0, or 1 into dst
3362  enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3363  %{
3364    int src1enc = $src1$$reg;
3365    int src2enc = $src2$$reg;
3366    int dstenc = $dst$$reg;
3367
3368    // cmpq $src1, $src2
3369    if (src1enc < 8) {
3370      if (src2enc < 8) {
3371        emit_opcode(cbuf, Assembler::REX_W);
3372      } else {
3373        emit_opcode(cbuf, Assembler::REX_WB);
3374      }
3375    } else {
3376      if (src2enc < 8) {
3377        emit_opcode(cbuf, Assembler::REX_WR);
3378      } else {
3379        emit_opcode(cbuf, Assembler::REX_WRB);
3380      }
3381    }
3382    emit_opcode(cbuf, 0x3B);
3383    emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3384
3385    // movl $dst, -1
3386    if (dstenc >= 8) {
3387      emit_opcode(cbuf, Assembler::REX_B);
3388    }
3389    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3390    emit_d32(cbuf, -1);
3391
3392    // jl,s done
3393    emit_opcode(cbuf, 0x7C);
3394    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3395
3396    // setne $dst
3397    if (dstenc >= 4) {
3398      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3399    }
3400    emit_opcode(cbuf, 0x0F);
3401    emit_opcode(cbuf, 0x95);
3402    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3403
3404    // movzbl $dst, $dst
3405    if (dstenc >= 4) {
3406      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3407    }
3408    emit_opcode(cbuf, 0x0F);
3409    emit_opcode(cbuf, 0xB6);
3410    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3411  %}
3412
3413  enc_class Push_ResultXD(regD dst) %{
3414    int dstenc = $dst$$reg;
3415
3416    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3417
3418    // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3419    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3420    if (dstenc >= 8) {
3421      emit_opcode(cbuf, Assembler::REX_R);
3422    }
3423    emit_opcode  (cbuf, 0x0F );
3424    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3425    encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3426
3427    // add rsp,8
3428    emit_opcode(cbuf, Assembler::REX_W);
3429    emit_opcode(cbuf,0x83);
3430    emit_rm(cbuf,0x3, 0x0, RSP_enc);
3431    emit_d8(cbuf,0x08);
3432  %}
3433
3434  enc_class Push_SrcXD(regD src) %{
3435    int srcenc = $src$$reg;
3436
3437    // subq rsp,#8
3438    emit_opcode(cbuf, Assembler::REX_W);
3439    emit_opcode(cbuf, 0x83);
3440    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3441    emit_d8(cbuf, 0x8);
3442
3443    // movsd [rsp],src
3444    emit_opcode(cbuf, 0xF2);
3445    if (srcenc >= 8) {
3446      emit_opcode(cbuf, Assembler::REX_R);
3447    }
3448    emit_opcode(cbuf, 0x0F);
3449    emit_opcode(cbuf, 0x11);
3450    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3451
3452    // fldd [rsp]
3453    emit_opcode(cbuf, 0x66);
3454    emit_opcode(cbuf, 0xDD);
3455    encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3456  %}
3457
3458
3459  enc_class movq_ld(regD dst, memory mem) %{
3460    MacroAssembler _masm(&cbuf);
3461    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3462    __ movq(as_XMMRegister($dst$$reg), madr);
3463  %}
3464
3465  enc_class movq_st(memory mem, regD src) %{
3466    MacroAssembler _masm(&cbuf);
3467    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3468    __ movq(madr, as_XMMRegister($src$$reg));
3469  %}
3470
3471  enc_class pshufd_8x8(regF dst, regF src) %{
3472    MacroAssembler _masm(&cbuf);
3473
3474    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3475    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3476    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3477  %}
3478
3479  enc_class pshufd_4x16(regF dst, regF src) %{
3480    MacroAssembler _masm(&cbuf);
3481
3482    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3483  %}
3484
3485  enc_class pshufd(regD dst, regD src, int mode) %{
3486    MacroAssembler _masm(&cbuf);
3487
3488    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3489  %}
3490
3491  enc_class pxor(regD dst, regD src) %{
3492    MacroAssembler _masm(&cbuf);
3493
3494    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3495  %}
3496
3497  enc_class mov_i2x(regD dst, rRegI src) %{
3498    MacroAssembler _masm(&cbuf);
3499
3500    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3501  %}
3502
3503  // obj: object to lock
3504  // box: box address (header location) -- killed
3505  // tmp: rax -- killed
3506  // scr: rbx -- killed
3507  //
3508  // What follows is a direct transliteration of fast_lock() and fast_unlock()
3509  // from i486.ad.  See that file for comments.
3510  // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3511  // use the shorter encoding.  (Movl clears the high-order 32-bits).
3512
3513
3514  enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3515  %{
3516    Register objReg = as_Register((int)$obj$$reg);
3517    Register boxReg = as_Register((int)$box$$reg);
3518    Register tmpReg = as_Register($tmp$$reg);
3519    Register scrReg = as_Register($scr$$reg);
3520    MacroAssembler masm(&cbuf);
3521
3522    // Verify uniqueness of register assignments -- necessary but not sufficient
3523    assert (objReg != boxReg && objReg != tmpReg &&
3524            objReg != scrReg && tmpReg != scrReg, "invariant") ;
3525
3526    if (_counters != NULL) {
3527      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3528    }
3529    if (EmitSync & 1) {
3530        masm.movptr (Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
3531        masm.cmpq   (rsp, 0) ;
3532    } else
3533    if (EmitSync & 2) {
3534        Label DONE_LABEL;
3535        if (UseBiasedLocking) {
3536           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3537          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3538        }
3539        masm.movl(tmpReg, 0x1);
3540        masm.orq(tmpReg, Address(objReg, 0));
3541        masm.movq(Address(boxReg, 0), tmpReg);
3542        if (os::is_MP()) {
3543          masm.lock();
3544        }
3545        masm.cmpxchgq(boxReg, Address(objReg, 0)); // Updates tmpReg
3546        masm.jcc(Assembler::equal, DONE_LABEL);
3547
3548        // Recursive locking
3549        masm.subq(tmpReg, rsp);
3550        masm.andq(tmpReg, 7 - os::vm_page_size());
3551        masm.movq(Address(boxReg, 0), tmpReg);
3552
3553        masm.bind(DONE_LABEL);
3554        masm.nop(); // avoid branch to branch
3555    } else {
3556        Label DONE_LABEL, IsInflated, Egress;
3557
3558        masm.movq  (tmpReg, Address(objReg, 0)) ;
3559        masm.testq (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3560        masm.jcc   (Assembler::notZero, IsInflated) ;
3561
3562        // it's stack-locked, biased or neutral
3563        // TODO: optimize markword triage order to reduce the number of
3564        // conditional branches in the most common cases.
3565        // Beware -- there's a subtle invariant that fetch of the markword
3566        // at [FETCH], below, will never observe a biased encoding (*101b).
3567        // If this invariant is not held we'll suffer exclusion (safety) failure.
3568
3569        if (UseBiasedLocking) {
3570          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3571          masm.movq  (tmpReg, Address(objReg, 0)) ;        // [FETCH]
3572        }
3573
3574        masm.orq   (tmpReg, 1) ;
3575        masm.movq  (Address(boxReg, 0), tmpReg) ;
3576        if (os::is_MP()) { masm.lock(); }
3577        masm.cmpxchgq(boxReg, Address(objReg, 0)); // Updates tmpReg
3578        if (_counters != NULL) {
3579           masm.cond_inc32(Assembler::equal,
3580                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3581        }
3582        masm.jcc   (Assembler::equal, DONE_LABEL);
3583
3584        // Recursive locking
3585        masm.subq  (tmpReg, rsp);
3586        masm.andq  (tmpReg, 7 - os::vm_page_size());
3587        masm.movq  (Address(boxReg, 0), tmpReg);
3588        if (_counters != NULL) {
3589           masm.cond_inc32(Assembler::equal,
3590                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3591        }
3592        masm.jmp   (DONE_LABEL) ;
3593
3594        masm.bind  (IsInflated) ;
3595        // It's inflated
3596
3597        // TODO: someday avoid the ST-before-CAS penalty by
3598        // relocating (deferring) the following ST.
3599        // We should also think about trying a CAS without having
3600        // fetched _owner.  If the CAS is successful we may
3601        // avoid an RTO->RTS upgrade on the $line.
3602        masm.movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
3603
3604        masm.movq  (boxReg, tmpReg) ;
3605        masm.movq  (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3606        masm.testq (tmpReg, tmpReg) ;
3607        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3608
3609        // It's inflated and appears unlocked
3610        if (os::is_MP()) { masm.lock(); }
3611        masm.cmpxchgq(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3612        // Intentional fall-through into DONE_LABEL ...
3613
3614        masm.bind  (DONE_LABEL) ;
3615        masm.nop   () ;                 // avoid jmp to jmp
3616    }
3617  %}
3618
3619  // obj: object to unlock
3620  // box: box address (displaced header location), killed
3621  // RBX: killed tmp; cannot be obj nor box
3622  enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3623  %{
3624
3625    Register objReg = as_Register($obj$$reg);
3626    Register boxReg = as_Register($box$$reg);
3627    Register tmpReg = as_Register($tmp$$reg);
3628    MacroAssembler masm(&cbuf);
3629
3630    if (EmitSync & 4) {
3631       masm.cmpq  (rsp, 0) ;
3632    } else
3633    if (EmitSync & 8) {
3634       Label DONE_LABEL;
3635       if (UseBiasedLocking) {
3636         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3637       }
3638
3639       // Check whether the displaced header is 0
3640       //(=> recursive unlock)
3641       masm.movq(tmpReg, Address(boxReg, 0));
3642       masm.testq(tmpReg, tmpReg);
3643       masm.jcc(Assembler::zero, DONE_LABEL);
3644
3645       // If not recursive lock, reset the header to displaced header
3646       if (os::is_MP()) {
3647         masm.lock();
3648       }
3649       masm.cmpxchgq(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3650       masm.bind(DONE_LABEL);
3651       masm.nop(); // avoid branch to branch
3652    } else {
3653       Label DONE_LABEL, Stacked, CheckSucc ;
3654
3655       if (UseBiasedLocking) {
3656         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3657       }
3658
3659       masm.movq  (tmpReg, Address(objReg, 0)) ;
3660       masm.cmpq  (Address(boxReg, 0), (int)NULL_WORD) ;
3661       masm.jcc   (Assembler::zero, DONE_LABEL) ;
3662       masm.testq (tmpReg, 0x02) ;
3663       masm.jcc   (Assembler::zero, Stacked) ;
3664
3665       // It's inflated
3666       masm.movq  (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3667       masm.xorq  (boxReg, r15_thread) ;
3668       masm.orq   (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3669       masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3670       masm.movq  (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3671       masm.orq   (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3672       masm.jcc   (Assembler::notZero, CheckSucc) ;
3673       masm.mov64 (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int)NULL_WORD) ;
3674       masm.jmp   (DONE_LABEL) ;
3675
3676       if ((EmitSync & 65536) == 0) {
3677         Label LSuccess, LGoSlowPath ;
3678         masm.bind  (CheckSucc) ;
3679         masm.cmpq  (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int)NULL_WORD) ;
3680         masm.jcc   (Assembler::zero, LGoSlowPath) ;
3681
3682         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3683         // the explicit ST;MEMBAR combination, but masm doesn't currently support
3684         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3685         // are all faster when the write buffer is populated.
3686         masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int)NULL_WORD) ;
3687         if (os::is_MP()) {
3688            masm.lock () ; masm.addq (Address(rsp, 0), 0) ;
3689         }
3690         masm.cmpq  (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int)NULL_WORD) ;
3691         masm.jcc   (Assembler::notZero, LSuccess) ;
3692
3693         masm.movptr (boxReg, (int)NULL_WORD) ;                   // box is really EAX
3694         if (os::is_MP()) { masm.lock(); }
3695         masm.cmpxchgq (r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3696         masm.jcc   (Assembler::notEqual, LSuccess) ;
3697         // Intentional fall-through into slow-path
3698
3699         masm.bind  (LGoSlowPath) ;
3700         masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3701         masm.jmp   (DONE_LABEL) ;
3702
3703         masm.bind  (LSuccess) ;
3704         masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3705         masm.jmp   (DONE_LABEL) ;
3706       }
3707
3708       masm.bind  (Stacked) ;
3709       masm.movq  (tmpReg, Address (boxReg, 0)) ;      // re-fetch
3710       if (os::is_MP()) { masm.lock(); }
3711       masm.cmpxchgq(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3712
3713       if (EmitSync & 65536) {
3714          masm.bind (CheckSucc) ;
3715       }
3716       masm.bind(DONE_LABEL);
3717       if (EmitSync & 32768) {
3718          masm.nop();                      // avoid branch to branch
3719       }
3720    }
3721  %}
3722
3723  enc_class enc_String_Compare()
3724  %{
3725    Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3726          POP_LABEL, DONE_LABEL, CONT_LABEL,
3727          WHILE_HEAD_LABEL;
3728    MacroAssembler masm(&cbuf);
3729
3730    // Get the first character position in both strings
3731    //         [8] char array, [12] offset, [16] count
3732    int value_offset  = java_lang_String::value_offset_in_bytes();
3733    int offset_offset = java_lang_String::offset_offset_in_bytes();
3734    int count_offset  = java_lang_String::count_offset_in_bytes();
3735    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3736
3737    masm.load_heap_oop(rax, Address(rsi, value_offset));
3738    masm.movl(rcx, Address(rsi, offset_offset));
3739    masm.leaq(rax, Address(rax, rcx, Address::times_2, base_offset));
3740    masm.load_heap_oop(rbx, Address(rdi, value_offset));
3741    masm.movl(rcx, Address(rdi, offset_offset));
3742    masm.leaq(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3743
3744    // Compute the minimum of the string lengths(rsi) and the
3745    // difference of the string lengths (stack)
3746
3747    masm.movl(rdi, Address(rdi, count_offset));
3748    masm.movl(rsi, Address(rsi, count_offset));
3749    masm.movl(rcx, rdi);
3750    masm.subl(rdi, rsi);
3751    masm.pushq(rdi);
3752    masm.cmovl(Assembler::lessEqual, rsi, rcx);
3753
3754    // Is the minimum length zero?
3755    masm.bind(RCX_GOOD_LABEL);
3756    masm.testl(rsi, rsi);
3757    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3758
3759    // Load first characters
3760    masm.load_unsigned_word(rcx, Address(rbx, 0));
3761    masm.load_unsigned_word(rdi, Address(rax, 0));
3762
3763    // Compare first characters
3764    masm.subl(rcx, rdi);
3765    masm.jcc(Assembler::notZero,  POP_LABEL);
3766    masm.decrementl(rsi);
3767    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3768
3769    {
3770      // Check after comparing first character to see if strings are equivalent
3771      Label LSkip2;
3772      // Check if the strings start at same location
3773      masm.cmpq(rbx, rax);
3774      masm.jcc(Assembler::notEqual, LSkip2);
3775
3776      // Check if the length difference is zero (from stack)
3777      masm.cmpl(Address(rsp, 0), 0x0);
3778      masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3779
3780      // Strings might not be equivalent
3781      masm.bind(LSkip2);
3782    }
3783
3784    // Shift RAX and RBX to the end of the arrays, negate min
3785    masm.leaq(rax, Address(rax, rsi, Address::times_2, 2));
3786    masm.leaq(rbx, Address(rbx, rsi, Address::times_2, 2));
3787    masm.negq(rsi);
3788
3789    // Compare the rest of the characters
3790    masm.bind(WHILE_HEAD_LABEL);
3791    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
3792    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
3793    masm.subl(rcx, rdi);
3794    masm.jcc(Assembler::notZero, POP_LABEL);
3795    masm.incrementq(rsi);
3796    masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3797
3798    // Strings are equal up to min length.  Return the length difference.
3799    masm.bind(LENGTH_DIFF_LABEL);
3800    masm.popq(rcx);
3801    masm.jmp(DONE_LABEL);
3802
3803    // Discard the stored length difference
3804    masm.bind(POP_LABEL);
3805    masm.addq(rsp, 8);
3806
3807    // That's it
3808    masm.bind(DONE_LABEL);
3809  %}
3810
3811  enc_class enc_rethrow()
3812  %{
3813    cbuf.set_inst_mark();
3814    emit_opcode(cbuf, 0xE9); // jmp entry
3815    emit_d32_reloc(cbuf,
3816                   (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3817                   runtime_call_Relocation::spec(),
3818                   RELOC_DISP32);
3819  %}
3820
3821  enc_class absF_encoding(regF dst)
3822  %{
3823    int dstenc = $dst$$reg;
3824    address signmask_address = (address) StubRoutines::amd64::float_sign_mask();
3825
3826    cbuf.set_inst_mark();
3827    if (dstenc >= 8) {
3828      emit_opcode(cbuf, Assembler::REX_R);
3829      dstenc -= 8;
3830    }
3831    // XXX reg_mem doesn't support RIP-relative addressing yet
3832    emit_opcode(cbuf, 0x0F);
3833    emit_opcode(cbuf, 0x54);
3834    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3835    emit_d32_reloc(cbuf, signmask_address);
3836  %}
3837
3838  enc_class absD_encoding(regD dst)
3839  %{
3840    int dstenc = $dst$$reg;
3841    address signmask_address = (address) StubRoutines::amd64::double_sign_mask();
3842
3843    cbuf.set_inst_mark();
3844    emit_opcode(cbuf, 0x66);
3845    if (dstenc >= 8) {
3846      emit_opcode(cbuf, Assembler::REX_R);
3847      dstenc -= 8;
3848    }
3849    // XXX reg_mem doesn't support RIP-relative addressing yet
3850    emit_opcode(cbuf, 0x0F);
3851    emit_opcode(cbuf, 0x54);
3852    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3853    emit_d32_reloc(cbuf, signmask_address);
3854  %}
3855
3856  enc_class negF_encoding(regF dst)
3857  %{
3858    int dstenc = $dst$$reg;
3859    address signflip_address = (address) StubRoutines::amd64::float_sign_flip();
3860
3861    cbuf.set_inst_mark();
3862    if (dstenc >= 8) {
3863      emit_opcode(cbuf, Assembler::REX_R);
3864      dstenc -= 8;
3865    }
3866    // XXX reg_mem doesn't support RIP-relative addressing yet
3867    emit_opcode(cbuf, 0x0F);
3868    emit_opcode(cbuf, 0x57);
3869    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3870    emit_d32_reloc(cbuf, signflip_address);
3871  %}
3872
3873  enc_class negD_encoding(regD dst)
3874  %{
3875    int dstenc = $dst$$reg;
3876    address signflip_address = (address) StubRoutines::amd64::double_sign_flip();
3877
3878    cbuf.set_inst_mark();
3879    emit_opcode(cbuf, 0x66);
3880    if (dstenc >= 8) {
3881      emit_opcode(cbuf, Assembler::REX_R);
3882      dstenc -= 8;
3883    }
3884    // XXX reg_mem doesn't support RIP-relative addressing yet
3885    emit_opcode(cbuf, 0x0F);
3886    emit_opcode(cbuf, 0x57);
3887    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3888    emit_d32_reloc(cbuf, signflip_address);
3889  %}
3890
3891  enc_class f2i_fixup(rRegI dst, regF src)
3892  %{
3893    int dstenc = $dst$$reg;
3894    int srcenc = $src$$reg;
3895
3896    // cmpl $dst, #0x80000000
3897    if (dstenc >= 8) {
3898      emit_opcode(cbuf, Assembler::REX_B);
3899    }
3900    emit_opcode(cbuf, 0x81);
3901    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3902    emit_d32(cbuf, 0x80000000);
3903
3904    // jne,s done
3905    emit_opcode(cbuf, 0x75);
3906    if (srcenc < 8 && dstenc < 8) {
3907      emit_d8(cbuf, 0xF);
3908    } else if (srcenc >= 8 && dstenc >= 8) {
3909      emit_d8(cbuf, 0x11);
3910    } else {
3911      emit_d8(cbuf, 0x10);
3912    }
3913
3914    // subq rsp, #8
3915    emit_opcode(cbuf, Assembler::REX_W);
3916    emit_opcode(cbuf, 0x83);
3917    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3918    emit_d8(cbuf, 8);
3919
3920    // movss [rsp], $src
3921    emit_opcode(cbuf, 0xF3);
3922    if (srcenc >= 8) {
3923      emit_opcode(cbuf, Assembler::REX_R);
3924    }
3925    emit_opcode(cbuf, 0x0F);
3926    emit_opcode(cbuf, 0x11);
3927    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3928
3929    // call f2i_fixup
3930    cbuf.set_inst_mark();
3931    emit_opcode(cbuf, 0xE8);
3932    emit_d32_reloc(cbuf,
3933                   (int)
3934                   (StubRoutines::amd64::f2i_fixup() - cbuf.code_end() - 4),
3935                   runtime_call_Relocation::spec(),
3936                   RELOC_DISP32);
3937
3938    // popq $dst
3939    if (dstenc >= 8) {
3940      emit_opcode(cbuf, Assembler::REX_B);
3941    }
3942    emit_opcode(cbuf, 0x58 | (dstenc & 7));
3943
3944    // done:
3945  %}
3946
3947  enc_class f2l_fixup(rRegL dst, regF src)
3948  %{
3949    int dstenc = $dst$$reg;
3950    int srcenc = $src$$reg;
3951    address const_address = (address) StubRoutines::amd64::double_sign_flip();
3952
3953    // cmpq $dst, [0x8000000000000000]
3954    cbuf.set_inst_mark();
3955    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3956    emit_opcode(cbuf, 0x39);
3957    // XXX reg_mem doesn't support RIP-relative addressing yet
3958    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3959    emit_d32_reloc(cbuf, const_address);
3960
3961
3962    // jne,s done
3963    emit_opcode(cbuf, 0x75);
3964    if (srcenc < 8 && dstenc < 8) {
3965      emit_d8(cbuf, 0xF);
3966    } else if (srcenc >= 8 && dstenc >= 8) {
3967      emit_d8(cbuf, 0x11);
3968    } else {
3969      emit_d8(cbuf, 0x10);
3970    }
3971
3972    // subq rsp, #8
3973    emit_opcode(cbuf, Assembler::REX_W);
3974    emit_opcode(cbuf, 0x83);
3975    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3976    emit_d8(cbuf, 8);
3977
3978    // movss [rsp], $src
3979    emit_opcode(cbuf, 0xF3);
3980    if (srcenc >= 8) {
3981      emit_opcode(cbuf, Assembler::REX_R);
3982    }
3983    emit_opcode(cbuf, 0x0F);
3984    emit_opcode(cbuf, 0x11);
3985    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3986
3987    // call f2l_fixup
3988    cbuf.set_inst_mark();
3989    emit_opcode(cbuf, 0xE8);
3990    emit_d32_reloc(cbuf,
3991                   (int)
3992                   (StubRoutines::amd64::f2l_fixup() - cbuf.code_end() - 4),
3993                   runtime_call_Relocation::spec(),
3994                   RELOC_DISP32);
3995
3996    // popq $dst
3997    if (dstenc >= 8) {
3998      emit_opcode(cbuf, Assembler::REX_B);
3999    }
4000    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4001
4002    // done:
4003  %}
4004
4005  enc_class d2i_fixup(rRegI dst, regD src)
4006  %{
4007    int dstenc = $dst$$reg;
4008    int srcenc = $src$$reg;
4009
4010    // cmpl $dst, #0x80000000
4011    if (dstenc >= 8) {
4012      emit_opcode(cbuf, Assembler::REX_B);
4013    }
4014    emit_opcode(cbuf, 0x81);
4015    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4016    emit_d32(cbuf, 0x80000000);
4017
4018    // jne,s done
4019    emit_opcode(cbuf, 0x75);
4020    if (srcenc < 8 && dstenc < 8) {
4021      emit_d8(cbuf, 0xF);
4022    } else if (srcenc >= 8 && dstenc >= 8) {
4023      emit_d8(cbuf, 0x11);
4024    } else {
4025      emit_d8(cbuf, 0x10);
4026    }
4027
4028    // subq rsp, #8
4029    emit_opcode(cbuf, Assembler::REX_W);
4030    emit_opcode(cbuf, 0x83);
4031    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4032    emit_d8(cbuf, 8);
4033
4034    // movsd [rsp], $src
4035    emit_opcode(cbuf, 0xF2);
4036    if (srcenc >= 8) {
4037      emit_opcode(cbuf, Assembler::REX_R);
4038    }
4039    emit_opcode(cbuf, 0x0F);
4040    emit_opcode(cbuf, 0x11);
4041    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4042
4043    // call d2i_fixup
4044    cbuf.set_inst_mark();
4045    emit_opcode(cbuf, 0xE8);
4046    emit_d32_reloc(cbuf,
4047                   (int)
4048                   (StubRoutines::amd64::d2i_fixup() - cbuf.code_end() - 4),
4049                   runtime_call_Relocation::spec(),
4050                   RELOC_DISP32);
4051
4052    // popq $dst
4053    if (dstenc >= 8) {
4054      emit_opcode(cbuf, Assembler::REX_B);
4055    }
4056    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4057
4058    // done:
4059  %}
4060
4061  enc_class d2l_fixup(rRegL dst, regD src)
4062  %{
4063    int dstenc = $dst$$reg;
4064    int srcenc = $src$$reg;
4065    address const_address = (address) StubRoutines::amd64::double_sign_flip();
4066
4067    // cmpq $dst, [0x8000000000000000]
4068    cbuf.set_inst_mark();
4069    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4070    emit_opcode(cbuf, 0x39);
4071    // XXX reg_mem doesn't support RIP-relative addressing yet
4072    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4073    emit_d32_reloc(cbuf, const_address);
4074
4075
4076    // jne,s done
4077    emit_opcode(cbuf, 0x75);
4078    if (srcenc < 8 && dstenc < 8) {
4079      emit_d8(cbuf, 0xF);
4080    } else if (srcenc >= 8 && dstenc >= 8) {
4081      emit_d8(cbuf, 0x11);
4082    } else {
4083      emit_d8(cbuf, 0x10);
4084    }
4085
4086    // subq rsp, #8
4087    emit_opcode(cbuf, Assembler::REX_W);
4088    emit_opcode(cbuf, 0x83);
4089    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4090    emit_d8(cbuf, 8);
4091
4092    // movsd [rsp], $src
4093    emit_opcode(cbuf, 0xF2);
4094    if (srcenc >= 8) {
4095      emit_opcode(cbuf, Assembler::REX_R);
4096    }
4097    emit_opcode(cbuf, 0x0F);
4098    emit_opcode(cbuf, 0x11);
4099    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4100
4101    // call d2l_fixup
4102    cbuf.set_inst_mark();
4103    emit_opcode(cbuf, 0xE8);
4104    emit_d32_reloc(cbuf,
4105                   (int)
4106                   (StubRoutines::amd64::d2l_fixup() - cbuf.code_end() - 4),
4107                   runtime_call_Relocation::spec(),
4108                   RELOC_DISP32);
4109
4110    // popq $dst
4111    if (dstenc >= 8) {
4112      emit_opcode(cbuf, Assembler::REX_B);
4113    }
4114    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4115
4116    // done:
4117  %}
4118
4119  enc_class enc_membar_acquire
4120  %{
4121    // [jk] not needed currently, if you enable this and it really
4122    // emits code don't forget to the remove the "size(0)" line in
4123    // membar_acquire()
4124    // MacroAssembler masm(&cbuf);
4125    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4126    //                                         Assembler::LoadLoad));
4127  %}
4128
4129  enc_class enc_membar_release
4130  %{
4131    // [jk] not needed currently, if you enable this and it really
4132    // emits code don't forget to the remove the "size(0)" line in
4133    // membar_release()
4134    // MacroAssembler masm(&cbuf);
4135    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4136    //                                         Assembler::StoreStore));
4137  %}
4138
4139  enc_class enc_membar_volatile
4140  %{
4141    MacroAssembler masm(&cbuf);
4142    masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
4143                                            Assembler::StoreStore));
4144  %}
4145
4146  // Safepoint Poll.  This polls the safepoint page, and causes an
4147  // exception if it is not readable. Unfortunately, it kills
4148  // RFLAGS in the process.
4149  enc_class enc_safepoint_poll
4150  %{
4151    // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4152    // XXX reg_mem doesn't support RIP-relative addressing yet
4153    cbuf.set_inst_mark();
4154    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4155    emit_opcode(cbuf, 0x85); // testl
4156    emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4157    // cbuf.inst_mark() is beginning of instruction
4158    emit_d32_reloc(cbuf, os::get_polling_page());
4159//                    relocInfo::poll_type,
4160  %}
4161%}
4162
4163
4164
4165//----------FRAME--------------------------------------------------------------
4166// Definition of frame structure and management information.
4167//
4168//  S T A C K   L A Y O U T    Allocators stack-slot number
4169//                             |   (to get allocators register number
4170//  G  Owned by    |        |  v    add OptoReg::stack0())
4171//  r   CALLER     |        |
4172//  o     |        +--------+      pad to even-align allocators stack-slot
4173//  w     V        |  pad0  |        numbers; owned by CALLER
4174//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4175//  h     ^        |   in   |  5
4176//        |        |  args  |  4   Holes in incoming args owned by SELF
4177//  |     |        |        |  3
4178//  |     |        +--------+
4179//  V     |        | old out|      Empty on Intel, window on Sparc
4180//        |    old |preserve|      Must be even aligned.
4181//        |     SP-+--------+----> Matcher::_old_SP, even aligned
4182//        |        |   in   |  3   area for Intel ret address
4183//     Owned by    |preserve|      Empty on Sparc.
4184//       SELF      +--------+
4185//        |        |  pad2  |  2   pad to align old SP
4186//        |        +--------+  1
4187//        |        | locks  |  0
4188//        |        +--------+----> OptoReg::stack0(), even aligned
4189//        |        |  pad1  | 11   pad to align new SP
4190//        |        +--------+
4191//        |        |        | 10
4192//        |        | spills |  9   spills
4193//        V        |        |  8   (pad0 slot for callee)
4194//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4195//        ^        |  out   |  7
4196//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4197//     Owned by    +--------+
4198//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4199//        |    new |preserve|      Must be even-aligned.
4200//        |     SP-+--------+----> Matcher::_new_SP, even aligned
4201//        |        |        |
4202//
4203// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4204//         known from SELF's arguments and the Java calling convention.
4205//         Region 6-7 is determined per call site.
4206// Note 2: If the calling convention leaves holes in the incoming argument
4207//         area, those holes are owned by SELF.  Holes in the outgoing area
4208//         are owned by the CALLEE.  Holes should not be nessecary in the
4209//         incoming area, as the Java calling convention is completely under
4210//         the control of the AD file.  Doubles can be sorted and packed to
4211//         avoid holes.  Holes in the outgoing arguments may be nessecary for
4212//         varargs C calling conventions.
4213// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4214//         even aligned with pad0 as needed.
4215//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4216//         region 6-11 is even aligned; it may be padded out more so that
4217//         the region from SP to FP meets the minimum stack alignment.
4218// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4219//         alignment.  Region 11, pad1, may be dynamically extended so that
4220//         SP meets the minimum alignment.
4221
4222frame
4223%{
4224  // What direction does stack grow in (assumed to be same for C & Java)
4225  stack_direction(TOWARDS_LOW);
4226
4227  // These three registers define part of the calling convention
4228  // between compiled code and the interpreter.
4229  inline_cache_reg(RAX);                // Inline Cache Register
4230  interpreter_method_oop_reg(RBX);      // Method Oop Register when
4231                                        // calling interpreter
4232
4233  // Optional: name the operand used by cisc-spilling to access
4234  // [stack_pointer + offset]
4235  cisc_spilling_operand_name(indOffset32);
4236
4237  // Number of stack slots consumed by locking an object
4238  sync_stack_slots(2);
4239
4240  // Compiled code's Frame Pointer
4241  frame_pointer(RSP);
4242
4243  // Interpreter stores its frame pointer in a register which is
4244  // stored to the stack by I2CAdaptors.
4245  // I2CAdaptors convert from interpreted java to compiled java.
4246  interpreter_frame_pointer(RBP);
4247
4248  // Stack alignment requirement
4249  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4250
4251  // Number of stack slots between incoming argument block and the start of
4252  // a new frame.  The PROLOG must add this many slots to the stack.  The
4253  // EPILOG must remove this many slots.  amd64 needs two slots for
4254  // return address.
4255  in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4256
4257  // Number of outgoing stack slots killed above the out_preserve_stack_slots
4258  // for calls to C.  Supports the var-args backing area for register parms.
4259  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4260
4261  // The after-PROLOG location of the return address.  Location of
4262  // return address specifies a type (REG or STACK) and a number
4263  // representing the register number (i.e. - use a register name) or
4264  // stack slot.
4265  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4266  // Otherwise, it is above the locks and verification slot and alignment word
4267  return_addr(STACK - 2 +
4268              round_to(2 + 2 * VerifyStackAtCalls +
4269                       Compile::current()->fixed_slots(),
4270                       WordsPerLong * 2));
4271
4272  // Body of function which returns an integer array locating
4273  // arguments either in registers or in stack slots.  Passed an array
4274  // of ideal registers called "sig" and a "length" count.  Stack-slot
4275  // offsets are based on outgoing arguments, i.e. a CALLER setting up
4276  // arguments for a CALLEE.  Incoming stack arguments are
4277  // automatically biased by the preserve_stack_slots field above.
4278
4279  calling_convention
4280  %{
4281    // No difference between ingoing/outgoing just pass false
4282    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4283  %}
4284
4285  c_calling_convention
4286  %{
4287    // This is obviously always outgoing
4288    (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4289  %}
4290
4291  // Location of compiled Java return values.  Same as C for now.
4292  return_value
4293  %{
4294    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4295           "only return normal values");
4296
4297    static const int lo[Op_RegL + 1] = {
4298      0,
4299      0,
4300      RAX_num,  // Op_RegN
4301      RAX_num,  // Op_RegI
4302      RAX_num,  // Op_RegP
4303      XMM0_num, // Op_RegF
4304      XMM0_num, // Op_RegD
4305      RAX_num   // Op_RegL
4306    };
4307    static const int hi[Op_RegL + 1] = {
4308      0,
4309      0,
4310      OptoReg::Bad, // Op_RegN
4311      OptoReg::Bad, // Op_RegI
4312      RAX_H_num,    // Op_RegP
4313      OptoReg::Bad, // Op_RegF
4314      XMM0_H_num,   // Op_RegD
4315      RAX_H_num     // Op_RegL
4316    };
4317    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4318    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4319  %}
4320%}
4321
4322//----------ATTRIBUTES---------------------------------------------------------
4323//----------Operand Attributes-------------------------------------------------
4324op_attrib op_cost(0);        // Required cost attribute
4325
4326//----------Instruction Attributes---------------------------------------------
4327ins_attrib ins_cost(100);       // Required cost attribute
4328ins_attrib ins_size(8);         // Required size attribute (in bits)
4329ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4330ins_attrib ins_short_branch(0); // Required flag: is this instruction
4331                                // a non-matching short branch variant
4332                                // of some long branch?
4333ins_attrib ins_alignment(1);    // Required alignment attribute (must
4334                                // be a power of 2) specifies the
4335                                // alignment that some part of the
4336                                // instruction (not necessarily the
4337                                // start) requires.  If > 1, a
4338                                // compute_padding() function must be
4339                                // provided for the instruction
4340
4341//----------OPERANDS-----------------------------------------------------------
4342// Operand definitions must precede instruction definitions for correct parsing
4343// in the ADLC because operands constitute user defined types which are used in
4344// instruction definitions.
4345
4346//----------Simple Operands----------------------------------------------------
4347// Immediate Operands
4348// Integer Immediate
4349operand immI()
4350%{
4351  match(ConI);
4352
4353  op_cost(10);
4354  format %{ %}
4355  interface(CONST_INTER);
4356%}
4357
4358// Constant for test vs zero
4359operand immI0()
4360%{
4361  predicate(n->get_int() == 0);
4362  match(ConI);
4363
4364  op_cost(0);
4365  format %{ %}
4366  interface(CONST_INTER);
4367%}
4368
4369// Constant for increment
4370operand immI1()
4371%{
4372  predicate(n->get_int() == 1);
4373  match(ConI);
4374
4375  op_cost(0);
4376  format %{ %}
4377  interface(CONST_INTER);
4378%}
4379
4380// Constant for decrement
4381operand immI_M1()
4382%{
4383  predicate(n->get_int() == -1);
4384  match(ConI);
4385
4386  op_cost(0);
4387  format %{ %}
4388  interface(CONST_INTER);
4389%}
4390
4391// Valid scale values for addressing modes
4392operand immI2()
4393%{
4394  predicate(0 <= n->get_int() && (n->get_int() <= 3));
4395  match(ConI);
4396
4397  format %{ %}
4398  interface(CONST_INTER);
4399%}
4400
4401operand immI8()
4402%{
4403  predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4404  match(ConI);
4405
4406  op_cost(5);
4407  format %{ %}
4408  interface(CONST_INTER);
4409%}
4410
4411operand immI16()
4412%{
4413  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4414  match(ConI);
4415
4416  op_cost(10);
4417  format %{ %}
4418  interface(CONST_INTER);
4419%}
4420
4421// Constant for long shifts
4422operand immI_32()
4423%{
4424  predicate( n->get_int() == 32 );
4425  match(ConI);
4426
4427  op_cost(0);
4428  format %{ %}
4429  interface(CONST_INTER);
4430%}
4431
4432// Constant for long shifts
4433operand immI_64()
4434%{
4435  predicate( n->get_int() == 64 );
4436  match(ConI);
4437
4438  op_cost(0);
4439  format %{ %}
4440  interface(CONST_INTER);
4441%}
4442
4443// Pointer Immediate
4444operand immP()
4445%{
4446  match(ConP);
4447
4448  op_cost(10);
4449  format %{ %}
4450  interface(CONST_INTER);
4451%}
4452
4453// NULL Pointer Immediate
4454operand immP0()
4455%{
4456  predicate(n->get_ptr() == 0);
4457  match(ConP);
4458
4459  op_cost(5);
4460  format %{ %}
4461  interface(CONST_INTER);
4462%}
4463
4464// Pointer Immediate
4465operand immN() %{
4466  match(ConN);
4467
4468  op_cost(10);
4469  format %{ %}
4470  interface(CONST_INTER);
4471%}
4472
4473// NULL Pointer Immediate
4474operand immN0() %{
4475  predicate(n->get_narrowcon() == 0);
4476  match(ConN);
4477
4478  op_cost(5);
4479  format %{ %}
4480  interface(CONST_INTER);
4481%}
4482
4483operand immP31()
4484%{
4485  predicate(!n->as_Type()->type()->isa_oopptr()
4486            && (n->get_ptr() >> 31) == 0);
4487  match(ConP);
4488
4489  op_cost(5);
4490  format %{ %}
4491  interface(CONST_INTER);
4492%}
4493
4494
4495// Long Immediate
4496operand immL()
4497%{
4498  match(ConL);
4499
4500  op_cost(20);
4501  format %{ %}
4502  interface(CONST_INTER);
4503%}
4504
4505// Long Immediate 8-bit
4506operand immL8()
4507%{
4508  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4509  match(ConL);
4510
4511  op_cost(5);
4512  format %{ %}
4513  interface(CONST_INTER);
4514%}
4515
4516// Long Immediate 32-bit unsigned
4517operand immUL32()
4518%{
4519  predicate(n->get_long() == (unsigned int) (n->get_long()));
4520  match(ConL);
4521
4522  op_cost(10);
4523  format %{ %}
4524  interface(CONST_INTER);
4525%}
4526
4527// Long Immediate 32-bit signed
4528operand immL32()
4529%{
4530  predicate(n->get_long() == (int) (n->get_long()));
4531  match(ConL);
4532
4533  op_cost(15);
4534  format %{ %}
4535  interface(CONST_INTER);
4536%}
4537
4538// Long Immediate zero
4539operand immL0()
4540%{
4541  predicate(n->get_long() == 0L);
4542  match(ConL);
4543
4544  op_cost(10);
4545  format %{ %}
4546  interface(CONST_INTER);
4547%}
4548
4549// Constant for increment
4550operand immL1()
4551%{
4552  predicate(n->get_long() == 1);
4553  match(ConL);
4554
4555  format %{ %}
4556  interface(CONST_INTER);
4557%}
4558
4559// Constant for decrement
4560operand immL_M1()
4561%{
4562  predicate(n->get_long() == -1);
4563  match(ConL);
4564
4565  format %{ %}
4566  interface(CONST_INTER);
4567%}
4568
4569// Long Immediate: the value 10
4570operand immL10()
4571%{
4572  predicate(n->get_long() == 10);
4573  match(ConL);
4574
4575  format %{ %}
4576  interface(CONST_INTER);
4577%}
4578
4579// Long immediate from 0 to 127.
4580// Used for a shorter form of long mul by 10.
4581operand immL_127()
4582%{
4583  predicate(0 <= n->get_long() && n->get_long() < 0x80);
4584  match(ConL);
4585
4586  op_cost(10);
4587  format %{ %}
4588  interface(CONST_INTER);
4589%}
4590
4591// Long Immediate: low 32-bit mask
4592operand immL_32bits()
4593%{
4594  predicate(n->get_long() == 0xFFFFFFFFL);
4595  match(ConL);
4596  op_cost(20);
4597
4598  format %{ %}
4599  interface(CONST_INTER);
4600%}
4601
4602// Float Immediate zero
4603operand immF0()
4604%{
4605  predicate(jint_cast(n->getf()) == 0);
4606  match(ConF);
4607
4608  op_cost(5);
4609  format %{ %}
4610  interface(CONST_INTER);
4611%}
4612
4613// Float Immediate
4614operand immF()
4615%{
4616  match(ConF);
4617
4618  op_cost(15);
4619  format %{ %}
4620  interface(CONST_INTER);
4621%}
4622
4623// Double Immediate zero
4624operand immD0()
4625%{
4626  predicate(jlong_cast(n->getd()) == 0);
4627  match(ConD);
4628
4629  op_cost(5);
4630  format %{ %}
4631  interface(CONST_INTER);
4632%}
4633
4634// Double Immediate
4635operand immD()
4636%{
4637  match(ConD);
4638
4639  op_cost(15);
4640  format %{ %}
4641  interface(CONST_INTER);
4642%}
4643
4644// Immediates for special shifts (sign extend)
4645
4646// Constants for increment
4647operand immI_16()
4648%{
4649  predicate(n->get_int() == 16);
4650  match(ConI);
4651
4652  format %{ %}
4653  interface(CONST_INTER);
4654%}
4655
4656operand immI_24()
4657%{
4658  predicate(n->get_int() == 24);
4659  match(ConI);
4660
4661  format %{ %}
4662  interface(CONST_INTER);
4663%}
4664
4665// Constant for byte-wide masking
4666operand immI_255()
4667%{
4668  predicate(n->get_int() == 255);
4669  match(ConI);
4670
4671  format %{ %}
4672  interface(CONST_INTER);
4673%}
4674
4675// Constant for short-wide masking
4676operand immI_65535()
4677%{
4678  predicate(n->get_int() == 65535);
4679  match(ConI);
4680
4681  format %{ %}
4682  interface(CONST_INTER);
4683%}
4684
4685// Constant for byte-wide masking
4686operand immL_255()
4687%{
4688  predicate(n->get_long() == 255);
4689  match(ConL);
4690
4691  format %{ %}
4692  interface(CONST_INTER);
4693%}
4694
4695// Constant for short-wide masking
4696operand immL_65535()
4697%{
4698  predicate(n->get_long() == 65535);
4699  match(ConL);
4700
4701  format %{ %}
4702  interface(CONST_INTER);
4703%}
4704
4705// Register Operands
4706// Integer Register
4707operand rRegI()
4708%{
4709  constraint(ALLOC_IN_RC(int_reg));
4710  match(RegI);
4711
4712  match(rax_RegI);
4713  match(rbx_RegI);
4714  match(rcx_RegI);
4715  match(rdx_RegI);
4716  match(rdi_RegI);
4717
4718  format %{ %}
4719  interface(REG_INTER);
4720%}
4721
4722// Special Registers
4723operand rax_RegI()
4724%{
4725  constraint(ALLOC_IN_RC(int_rax_reg));
4726  match(RegI);
4727  match(rRegI);
4728
4729  format %{ "RAX" %}
4730  interface(REG_INTER);
4731%}
4732
4733// Special Registers
4734operand rbx_RegI()
4735%{
4736  constraint(ALLOC_IN_RC(int_rbx_reg));
4737  match(RegI);
4738  match(rRegI);
4739
4740  format %{ "RBX" %}
4741  interface(REG_INTER);
4742%}
4743
4744operand rcx_RegI()
4745%{
4746  constraint(ALLOC_IN_RC(int_rcx_reg));
4747  match(RegI);
4748  match(rRegI);
4749
4750  format %{ "RCX" %}
4751  interface(REG_INTER);
4752%}
4753
4754operand rdx_RegI()
4755%{
4756  constraint(ALLOC_IN_RC(int_rdx_reg));
4757  match(RegI);
4758  match(rRegI);
4759
4760  format %{ "RDX" %}
4761  interface(REG_INTER);
4762%}
4763
4764operand rdi_RegI()
4765%{
4766  constraint(ALLOC_IN_RC(int_rdi_reg));
4767  match(RegI);
4768  match(rRegI);
4769
4770  format %{ "RDI" %}
4771  interface(REG_INTER);
4772%}
4773
4774operand no_rcx_RegI()
4775%{
4776  constraint(ALLOC_IN_RC(int_no_rcx_reg));
4777  match(RegI);
4778  match(rax_RegI);
4779  match(rbx_RegI);
4780  match(rdx_RegI);
4781  match(rdi_RegI);
4782
4783  format %{ %}
4784  interface(REG_INTER);
4785%}
4786
4787operand no_rax_rdx_RegI()
4788%{
4789  constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4790  match(RegI);
4791  match(rbx_RegI);
4792  match(rcx_RegI);
4793  match(rdi_RegI);
4794
4795  format %{ %}
4796  interface(REG_INTER);
4797%}
4798
4799// Pointer Register
4800operand any_RegP()
4801%{
4802  constraint(ALLOC_IN_RC(any_reg));
4803  match(RegP);
4804  match(rax_RegP);
4805  match(rbx_RegP);
4806  match(rdi_RegP);
4807  match(rsi_RegP);
4808  match(rbp_RegP);
4809  match(r15_RegP);
4810  match(rRegP);
4811
4812  format %{ %}
4813  interface(REG_INTER);
4814%}
4815
4816operand rRegP()
4817%{
4818  constraint(ALLOC_IN_RC(ptr_reg));
4819  match(RegP);
4820  match(rax_RegP);
4821  match(rbx_RegP);
4822  match(rdi_RegP);
4823  match(rsi_RegP);
4824  match(rbp_RegP);
4825  match(r15_RegP);  // See Q&A below about r15_RegP.
4826
4827  format %{ %}
4828  interface(REG_INTER);
4829%}
4830
4831
4832operand r12RegL() %{
4833  constraint(ALLOC_IN_RC(long_r12_reg));
4834  match(RegL);
4835
4836  format %{ %}
4837  interface(REG_INTER);
4838%}
4839
4840operand rRegN() %{
4841  constraint(ALLOC_IN_RC(int_reg));
4842  match(RegN);
4843
4844  format %{ %}
4845  interface(REG_INTER);
4846%}
4847
4848// Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4849// Answer: Operand match rules govern the DFA as it processes instruction inputs.
4850// It's fine for an instruction input which expects rRegP to match a r15_RegP.
4851// The output of an instruction is controlled by the allocator, which respects
4852// register class masks, not match rules.  Unless an instruction mentions
4853// r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4854// by the allocator as an input.
4855
4856operand no_rax_RegP()
4857%{
4858  constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4859  match(RegP);
4860  match(rbx_RegP);
4861  match(rsi_RegP);
4862  match(rdi_RegP);
4863
4864  format %{ %}
4865  interface(REG_INTER);
4866%}
4867
4868operand no_rbp_RegP()
4869%{
4870  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4871  match(RegP);
4872  match(rbx_RegP);
4873  match(rsi_RegP);
4874  match(rdi_RegP);
4875
4876  format %{ %}
4877  interface(REG_INTER);
4878%}
4879
4880operand no_rax_rbx_RegP()
4881%{
4882  constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4883  match(RegP);
4884  match(rsi_RegP);
4885  match(rdi_RegP);
4886
4887  format %{ %}
4888  interface(REG_INTER);
4889%}
4890
4891// Special Registers
4892// Return a pointer value
4893operand rax_RegP()
4894%{
4895  constraint(ALLOC_IN_RC(ptr_rax_reg));
4896  match(RegP);
4897  match(rRegP);
4898
4899  format %{ %}
4900  interface(REG_INTER);
4901%}
4902
4903// Special Registers
4904// Return a compressed pointer value
4905operand rax_RegN()
4906%{
4907  constraint(ALLOC_IN_RC(int_rax_reg));
4908  match(RegN);
4909  match(rRegN);
4910
4911  format %{ %}
4912  interface(REG_INTER);
4913%}
4914
4915// Used in AtomicAdd
4916operand rbx_RegP()
4917%{
4918  constraint(ALLOC_IN_RC(ptr_rbx_reg));
4919  match(RegP);
4920  match(rRegP);
4921
4922  format %{ %}
4923  interface(REG_INTER);
4924%}
4925
4926operand rsi_RegP()
4927%{
4928  constraint(ALLOC_IN_RC(ptr_rsi_reg));
4929  match(RegP);
4930  match(rRegP);
4931
4932  format %{ %}
4933  interface(REG_INTER);
4934%}
4935
4936// Used in rep stosq
4937operand rdi_RegP()
4938%{
4939  constraint(ALLOC_IN_RC(ptr_rdi_reg));
4940  match(RegP);
4941  match(rRegP);
4942
4943  format %{ %}
4944  interface(REG_INTER);
4945%}
4946
4947operand rbp_RegP()
4948%{
4949  constraint(ALLOC_IN_RC(ptr_rbp_reg));
4950  match(RegP);
4951  match(rRegP);
4952
4953  format %{ %}
4954  interface(REG_INTER);
4955%}
4956
4957operand r15_RegP()
4958%{
4959  constraint(ALLOC_IN_RC(ptr_r15_reg));
4960  match(RegP);
4961  match(rRegP);
4962
4963  format %{ %}
4964  interface(REG_INTER);
4965%}
4966
4967operand rRegL()
4968%{
4969  constraint(ALLOC_IN_RC(long_reg));
4970  match(RegL);
4971  match(rax_RegL);
4972  match(rdx_RegL);
4973
4974  format %{ %}
4975  interface(REG_INTER);
4976%}
4977
4978// Special Registers
4979operand no_rax_rdx_RegL()
4980%{
4981  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4982  match(RegL);
4983  match(rRegL);
4984
4985  format %{ %}
4986  interface(REG_INTER);
4987%}
4988
4989operand no_rax_RegL()
4990%{
4991  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4992  match(RegL);
4993  match(rRegL);
4994  match(rdx_RegL);
4995
4996  format %{ %}
4997  interface(REG_INTER);
4998%}
4999
5000operand no_rcx_RegL()
5001%{
5002  constraint(ALLOC_IN_RC(long_no_rcx_reg));
5003  match(RegL);
5004  match(rRegL);
5005
5006  format %{ %}
5007  interface(REG_INTER);
5008%}
5009
5010operand rax_RegL()
5011%{
5012  constraint(ALLOC_IN_RC(long_rax_reg));
5013  match(RegL);
5014  match(rRegL);
5015
5016  format %{ "RAX" %}
5017  interface(REG_INTER);
5018%}
5019
5020operand rcx_RegL()
5021%{
5022  constraint(ALLOC_IN_RC(long_rcx_reg));
5023  match(RegL);
5024  match(rRegL);
5025
5026  format %{ %}
5027  interface(REG_INTER);
5028%}
5029
5030operand rdx_RegL()
5031%{
5032  constraint(ALLOC_IN_RC(long_rdx_reg));
5033  match(RegL);
5034  match(rRegL);
5035
5036  format %{ %}
5037  interface(REG_INTER);
5038%}
5039
5040// Flags register, used as output of compare instructions
5041operand rFlagsReg()
5042%{
5043  constraint(ALLOC_IN_RC(int_flags));
5044  match(RegFlags);
5045
5046  format %{ "RFLAGS" %}
5047  interface(REG_INTER);
5048%}
5049
5050// Flags register, used as output of FLOATING POINT compare instructions
5051operand rFlagsRegU()
5052%{
5053  constraint(ALLOC_IN_RC(int_flags));
5054  match(RegFlags);
5055
5056  format %{ "RFLAGS_U" %}
5057  interface(REG_INTER);
5058%}
5059
5060// Float register operands
5061operand regF()
5062%{
5063  constraint(ALLOC_IN_RC(float_reg));
5064  match(RegF);
5065
5066  format %{ %}
5067  interface(REG_INTER);
5068%}
5069
5070// Double register operands
5071operand regD()
5072%{
5073  constraint(ALLOC_IN_RC(double_reg));
5074  match(RegD);
5075
5076  format %{ %}
5077  interface(REG_INTER);
5078%}
5079
5080
5081//----------Memory Operands----------------------------------------------------
5082// Direct Memory Operand
5083// operand direct(immP addr)
5084// %{
5085//   match(addr);
5086
5087//   format %{ "[$addr]" %}
5088//   interface(MEMORY_INTER) %{
5089//     base(0xFFFFFFFF);
5090//     index(0x4);
5091//     scale(0x0);
5092//     disp($addr);
5093//   %}
5094// %}
5095
5096// Indirect Memory Operand
5097operand indirect(any_RegP reg)
5098%{
5099  constraint(ALLOC_IN_RC(ptr_reg));
5100  match(reg);
5101
5102  format %{ "[$reg]" %}
5103  interface(MEMORY_INTER) %{
5104    base($reg);
5105    index(0x4);
5106    scale(0x0);
5107    disp(0x0);
5108  %}
5109%}
5110
5111// Indirect Memory Plus Short Offset Operand
5112operand indOffset8(any_RegP reg, immL8 off)
5113%{
5114  constraint(ALLOC_IN_RC(ptr_reg));
5115  match(AddP reg off);
5116
5117  format %{ "[$reg + $off (8-bit)]" %}
5118  interface(MEMORY_INTER) %{
5119    base($reg);
5120    index(0x4);
5121    scale(0x0);
5122    disp($off);
5123  %}
5124%}
5125
5126// Indirect Memory Plus Long Offset Operand
5127operand indOffset32(any_RegP reg, immL32 off)
5128%{
5129  constraint(ALLOC_IN_RC(ptr_reg));
5130  match(AddP reg off);
5131
5132  format %{ "[$reg + $off (32-bit)]" %}
5133  interface(MEMORY_INTER) %{
5134    base($reg);
5135    index(0x4);
5136    scale(0x0);
5137    disp($off);
5138  %}
5139%}
5140
5141// Indirect Memory Plus Index Register Plus Offset Operand
5142operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5143%{
5144  constraint(ALLOC_IN_RC(ptr_reg));
5145  match(AddP (AddP reg lreg) off);
5146
5147  op_cost(10);
5148  format %{"[$reg + $off + $lreg]" %}
5149  interface(MEMORY_INTER) %{
5150    base($reg);
5151    index($lreg);
5152    scale(0x0);
5153    disp($off);
5154  %}
5155%}
5156
5157// Indirect Memory Plus Index Register Plus Offset Operand
5158operand indIndex(any_RegP reg, rRegL lreg)
5159%{
5160  constraint(ALLOC_IN_RC(ptr_reg));
5161  match(AddP reg lreg);
5162
5163  op_cost(10);
5164  format %{"[$reg + $lreg]" %}
5165  interface(MEMORY_INTER) %{
5166    base($reg);
5167    index($lreg);
5168    scale(0x0);
5169    disp(0x0);
5170  %}
5171%}
5172
5173// Indirect Memory Times Scale Plus Index Register
5174operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5175%{
5176  constraint(ALLOC_IN_RC(ptr_reg));
5177  match(AddP reg (LShiftL lreg scale));
5178
5179  op_cost(10);
5180  format %{"[$reg + $lreg << $scale]" %}
5181  interface(MEMORY_INTER) %{
5182    base($reg);
5183    index($lreg);
5184    scale($scale);
5185    disp(0x0);
5186  %}
5187%}
5188
5189// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5190operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5191%{
5192  constraint(ALLOC_IN_RC(ptr_reg));
5193  match(AddP (AddP reg (LShiftL lreg scale)) off);
5194
5195  op_cost(10);
5196  format %{"[$reg + $off + $lreg << $scale]" %}
5197  interface(MEMORY_INTER) %{
5198    base($reg);
5199    index($lreg);
5200    scale($scale);
5201    disp($off);
5202  %}
5203%}
5204
5205// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5206operand indIndexScaleOffsetComp(rRegN src, immL32 off, r12RegL base) %{
5207  constraint(ALLOC_IN_RC(ptr_reg));
5208  match(AddP (DecodeN src base) off);
5209
5210  op_cost(10);
5211  format %{"[$base + $src << 3 + $off] (compressed)" %}
5212  interface(MEMORY_INTER) %{
5213    base($base);
5214    index($src);
5215    scale(0x3);
5216    disp($off);
5217  %}
5218%}
5219
5220// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5221operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5222%{
5223  constraint(ALLOC_IN_RC(ptr_reg));
5224  predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5225  match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5226
5227  op_cost(10);
5228  format %{"[$reg + $off + $idx << $scale]" %}
5229  interface(MEMORY_INTER) %{
5230    base($reg);
5231    index($idx);
5232    scale($scale);
5233    disp($off);
5234  %}
5235%}
5236
5237//----------Special Memory Operands--------------------------------------------
5238// Stack Slot Operand - This operand is used for loading and storing temporary
5239//                      values on the stack where a match requires a value to
5240//                      flow through memory.
5241operand stackSlotP(sRegP reg)
5242%{
5243  constraint(ALLOC_IN_RC(stack_slots));
5244  // No match rule because this operand is only generated in matching
5245
5246  format %{ "[$reg]" %}
5247  interface(MEMORY_INTER) %{
5248    base(0x4);   // RSP
5249    index(0x4);  // No Index
5250    scale(0x0);  // No Scale
5251    disp($reg);  // Stack Offset
5252  %}
5253%}
5254
5255operand stackSlotI(sRegI reg)
5256%{
5257  constraint(ALLOC_IN_RC(stack_slots));
5258  // No match rule because this operand is only generated in matching
5259
5260  format %{ "[$reg]" %}
5261  interface(MEMORY_INTER) %{
5262    base(0x4);   // RSP
5263    index(0x4);  // No Index
5264    scale(0x0);  // No Scale
5265    disp($reg);  // Stack Offset
5266  %}
5267%}
5268
5269operand stackSlotF(sRegF reg)
5270%{
5271  constraint(ALLOC_IN_RC(stack_slots));
5272  // No match rule because this operand is only generated in matching
5273
5274  format %{ "[$reg]" %}
5275  interface(MEMORY_INTER) %{
5276    base(0x4);   // RSP
5277    index(0x4);  // No Index
5278    scale(0x0);  // No Scale
5279    disp($reg);  // Stack Offset
5280  %}
5281%}
5282
5283operand stackSlotD(sRegD reg)
5284%{
5285  constraint(ALLOC_IN_RC(stack_slots));
5286  // No match rule because this operand is only generated in matching
5287
5288  format %{ "[$reg]" %}
5289  interface(MEMORY_INTER) %{
5290    base(0x4);   // RSP
5291    index(0x4);  // No Index
5292    scale(0x0);  // No Scale
5293    disp($reg);  // Stack Offset
5294  %}
5295%}
5296operand stackSlotL(sRegL reg)
5297%{
5298  constraint(ALLOC_IN_RC(stack_slots));
5299  // No match rule because this operand is only generated in matching
5300
5301  format %{ "[$reg]" %}
5302  interface(MEMORY_INTER) %{
5303    base(0x4);   // RSP
5304    index(0x4);  // No Index
5305    scale(0x0);  // No Scale
5306    disp($reg);  // Stack Offset
5307  %}
5308%}
5309
5310//----------Conditional Branch Operands----------------------------------------
5311// Comparison Op  - This is the operation of the comparison, and is limited to
5312//                  the following set of codes:
5313//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5314//
5315// Other attributes of the comparison, such as unsignedness, are specified
5316// by the comparison instruction that sets a condition code flags register.
5317// That result is represented by a flags operand whose subtype is appropriate
5318// to the unsignedness (etc.) of the comparison.
5319//
5320// Later, the instruction which matches both the Comparison Op (a Bool) and
5321// the flags (produced by the Cmp) specifies the coding of the comparison op
5322// by matching a specific subtype of Bool operand below, such as cmpOpU.
5323
5324// Comparision Code
5325operand cmpOp()
5326%{
5327  match(Bool);
5328
5329  format %{ "" %}
5330  interface(COND_INTER) %{
5331    equal(0x4);
5332    not_equal(0x5);
5333    less(0xC);
5334    greater_equal(0xD);
5335    less_equal(0xE);
5336    greater(0xF);
5337  %}
5338%}
5339
5340// Comparison Code, unsigned compare.  Used by FP also, with
5341// C2 (unordered) turned into GT or LT already.  The other bits
5342// C0 and C3 are turned into Carry & Zero flags.
5343operand cmpOpU()
5344%{
5345  match(Bool);
5346
5347  format %{ "" %}
5348  interface(COND_INTER) %{
5349    equal(0x4);
5350    not_equal(0x5);
5351    less(0x2);
5352    greater_equal(0x3);
5353    less_equal(0x6);
5354    greater(0x7);
5355  %}
5356%}
5357
5358
5359//----------OPERAND CLASSES----------------------------------------------------
5360// Operand Classes are groups of operands that are used as to simplify
5361// instruction definitions by not requiring the AD writer to specify seperate
5362// instructions for every form of operand when the instruction accepts
5363// multiple operand types with the same basic encoding and format.  The classic
5364// case of this is memory operands.
5365
5366opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5367               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5368               indIndexScaleOffsetComp);
5369
5370//----------PIPELINE-----------------------------------------------------------
5371// Rules which define the behavior of the target architectures pipeline.
5372pipeline %{
5373
5374//----------ATTRIBUTES---------------------------------------------------------
5375attributes %{
5376  variable_size_instructions;        // Fixed size instructions
5377  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5378  instruction_unit_size = 1;         // An instruction is 1 bytes long
5379  instruction_fetch_unit_size = 16;  // The processor fetches one line
5380  instruction_fetch_units = 1;       // of 16 bytes
5381
5382  // List of nop instructions
5383  nops( MachNop );
5384%}
5385
5386//----------RESOURCES----------------------------------------------------------
5387// Resources are the functional units available to the machine
5388
5389// Generic P2/P3 pipeline
5390// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5391// 3 instructions decoded per cycle.
5392// 2 load/store ops per cycle, 1 branch, 1 FPU,
5393// 3 ALU op, only ALU0 handles mul instructions.
5394resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5395           MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5396           BR, FPU,
5397           ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5398
5399//----------PIPELINE DESCRIPTION-----------------------------------------------
5400// Pipeline Description specifies the stages in the machine's pipeline
5401
5402// Generic P2/P3 pipeline
5403pipe_desc(S0, S1, S2, S3, S4, S5);
5404
5405//----------PIPELINE CLASSES---------------------------------------------------
5406// Pipeline Classes describe the stages in which input and output are
5407// referenced by the hardware pipeline.
5408
5409// Naming convention: ialu or fpu
5410// Then: _reg
5411// Then: _reg if there is a 2nd register
5412// Then: _long if it's a pair of instructions implementing a long
5413// Then: _fat if it requires the big decoder
5414//   Or: _mem if it requires the big decoder and a memory unit.
5415
5416// Integer ALU reg operation
5417pipe_class ialu_reg(rRegI dst)
5418%{
5419    single_instruction;
5420    dst    : S4(write);
5421    dst    : S3(read);
5422    DECODE : S0;        // any decoder
5423    ALU    : S3;        // any alu
5424%}
5425
5426// Long ALU reg operation
5427pipe_class ialu_reg_long(rRegL dst)
5428%{
5429    instruction_count(2);
5430    dst    : S4(write);
5431    dst    : S3(read);
5432    DECODE : S0(2);     // any 2 decoders
5433    ALU    : S3(2);     // both alus
5434%}
5435
5436// Integer ALU reg operation using big decoder
5437pipe_class ialu_reg_fat(rRegI dst)
5438%{
5439    single_instruction;
5440    dst    : S4(write);
5441    dst    : S3(read);
5442    D0     : S0;        // big decoder only
5443    ALU    : S3;        // any alu
5444%}
5445
5446// Long ALU reg operation using big decoder
5447pipe_class ialu_reg_long_fat(rRegL dst)
5448%{
5449    instruction_count(2);
5450    dst    : S4(write);
5451    dst    : S3(read);
5452    D0     : S0(2);     // big decoder only; twice
5453    ALU    : S3(2);     // any 2 alus
5454%}
5455
5456// Integer ALU reg-reg operation
5457pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5458%{
5459    single_instruction;
5460    dst    : S4(write);
5461    src    : S3(read);
5462    DECODE : S0;        // any decoder
5463    ALU    : S3;        // any alu
5464%}
5465
5466// Long ALU reg-reg operation
5467pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5468%{
5469    instruction_count(2);
5470    dst    : S4(write);
5471    src    : S3(read);
5472    DECODE : S0(2);     // any 2 decoders
5473    ALU    : S3(2);     // both alus
5474%}
5475
5476// Integer ALU reg-reg operation
5477pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5478%{
5479    single_instruction;
5480    dst    : S4(write);
5481    src    : S3(read);
5482    D0     : S0;        // big decoder only
5483    ALU    : S3;        // any alu
5484%}
5485
5486// Long ALU reg-reg operation
5487pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5488%{
5489    instruction_count(2);
5490    dst    : S4(write);
5491    src    : S3(read);
5492    D0     : S0(2);     // big decoder only; twice
5493    ALU    : S3(2);     // both alus
5494%}
5495
5496// Integer ALU reg-mem operation
5497pipe_class ialu_reg_mem(rRegI dst, memory mem)
5498%{
5499    single_instruction;
5500    dst    : S5(write);
5501    mem    : S3(read);
5502    D0     : S0;        // big decoder only
5503    ALU    : S4;        // any alu
5504    MEM    : S3;        // any mem
5505%}
5506
5507// Integer mem operation (prefetch)
5508pipe_class ialu_mem(memory mem)
5509%{
5510    single_instruction;
5511    mem    : S3(read);
5512    D0     : S0;        // big decoder only
5513    MEM    : S3;        // any mem
5514%}
5515
5516// Integer Store to Memory
5517pipe_class ialu_mem_reg(memory mem, rRegI src)
5518%{
5519    single_instruction;
5520    mem    : S3(read);
5521    src    : S5(read);
5522    D0     : S0;        // big decoder only
5523    ALU    : S4;        // any alu
5524    MEM    : S3;
5525%}
5526
5527// // Long Store to Memory
5528// pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5529// %{
5530//     instruction_count(2);
5531//     mem    : S3(read);
5532//     src    : S5(read);
5533//     D0     : S0(2);          // big decoder only; twice
5534//     ALU    : S4(2);     // any 2 alus
5535//     MEM    : S3(2);  // Both mems
5536// %}
5537
5538// Integer Store to Memory
5539pipe_class ialu_mem_imm(memory mem)
5540%{
5541    single_instruction;
5542    mem    : S3(read);
5543    D0     : S0;        // big decoder only
5544    ALU    : S4;        // any alu
5545    MEM    : S3;
5546%}
5547
5548// Integer ALU0 reg-reg operation
5549pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5550%{
5551    single_instruction;
5552    dst    : S4(write);
5553    src    : S3(read);
5554    D0     : S0;        // Big decoder only
5555    ALU0   : S3;        // only alu0
5556%}
5557
5558// Integer ALU0 reg-mem operation
5559pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5560%{
5561    single_instruction;
5562    dst    : S5(write);
5563    mem    : S3(read);
5564    D0     : S0;        // big decoder only
5565    ALU0   : S4;        // ALU0 only
5566    MEM    : S3;        // any mem
5567%}
5568
5569// Integer ALU reg-reg operation
5570pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5571%{
5572    single_instruction;
5573    cr     : S4(write);
5574    src1   : S3(read);
5575    src2   : S3(read);
5576    DECODE : S0;        // any decoder
5577    ALU    : S3;        // any alu
5578%}
5579
5580// Integer ALU reg-imm operation
5581pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5582%{
5583    single_instruction;
5584    cr     : S4(write);
5585    src1   : S3(read);
5586    DECODE : S0;        // any decoder
5587    ALU    : S3;        // any alu
5588%}
5589
5590// Integer ALU reg-mem operation
5591pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5592%{
5593    single_instruction;
5594    cr     : S4(write);
5595    src1   : S3(read);
5596    src2   : S3(read);
5597    D0     : S0;        // big decoder only
5598    ALU    : S4;        // any alu
5599    MEM    : S3;
5600%}
5601
5602// Conditional move reg-reg
5603pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5604%{
5605    instruction_count(4);
5606    y      : S4(read);
5607    q      : S3(read);
5608    p      : S3(read);
5609    DECODE : S0(4);     // any decoder
5610%}
5611
5612// Conditional move reg-reg
5613pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5614%{
5615    single_instruction;
5616    dst    : S4(write);
5617    src    : S3(read);
5618    cr     : S3(read);
5619    DECODE : S0;        // any decoder
5620%}
5621
5622// Conditional move reg-mem
5623pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5624%{
5625    single_instruction;
5626    dst    : S4(write);
5627    src    : S3(read);
5628    cr     : S3(read);
5629    DECODE : S0;        // any decoder
5630    MEM    : S3;
5631%}
5632
5633// Conditional move reg-reg long
5634pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5635%{
5636    single_instruction;
5637    dst    : S4(write);
5638    src    : S3(read);
5639    cr     : S3(read);
5640    DECODE : S0(2);     // any 2 decoders
5641%}
5642
5643// XXX
5644// // Conditional move double reg-reg
5645// pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5646// %{
5647//     single_instruction;
5648//     dst    : S4(write);
5649//     src    : S3(read);
5650//     cr     : S3(read);
5651//     DECODE : S0;     // any decoder
5652// %}
5653
5654// Float reg-reg operation
5655pipe_class fpu_reg(regD dst)
5656%{
5657    instruction_count(2);
5658    dst    : S3(read);
5659    DECODE : S0(2);     // any 2 decoders
5660    FPU    : S3;
5661%}
5662
5663// Float reg-reg operation
5664pipe_class fpu_reg_reg(regD dst, regD src)
5665%{
5666    instruction_count(2);
5667    dst    : S4(write);
5668    src    : S3(read);
5669    DECODE : S0(2);     // any 2 decoders
5670    FPU    : S3;
5671%}
5672
5673// Float reg-reg operation
5674pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5675%{
5676    instruction_count(3);
5677    dst    : S4(write);
5678    src1   : S3(read);
5679    src2   : S3(read);
5680    DECODE : S0(3);     // any 3 decoders
5681    FPU    : S3(2);
5682%}
5683
5684// Float reg-reg operation
5685pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5686%{
5687    instruction_count(4);
5688    dst    : S4(write);
5689    src1   : S3(read);
5690    src2   : S3(read);
5691    src3   : S3(read);
5692    DECODE : S0(4);     // any 3 decoders
5693    FPU    : S3(2);
5694%}
5695
5696// Float reg-reg operation
5697pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5698%{
5699    instruction_count(4);
5700    dst    : S4(write);
5701    src1   : S3(read);
5702    src2   : S3(read);
5703    src3   : S3(read);
5704    DECODE : S1(3);     // any 3 decoders
5705    D0     : S0;        // Big decoder only
5706    FPU    : S3(2);
5707    MEM    : S3;
5708%}
5709
5710// Float reg-mem operation
5711pipe_class fpu_reg_mem(regD dst, memory mem)
5712%{
5713    instruction_count(2);
5714    dst    : S5(write);
5715    mem    : S3(read);
5716    D0     : S0;        // big decoder only
5717    DECODE : S1;        // any decoder for FPU POP
5718    FPU    : S4;
5719    MEM    : S3;        // any mem
5720%}
5721
5722// Float reg-mem operation
5723pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5724%{
5725    instruction_count(3);
5726    dst    : S5(write);
5727    src1   : S3(read);
5728    mem    : S3(read);
5729    D0     : S0;        // big decoder only
5730    DECODE : S1(2);     // any decoder for FPU POP
5731    FPU    : S4;
5732    MEM    : S3;        // any mem
5733%}
5734
5735// Float mem-reg operation
5736pipe_class fpu_mem_reg(memory mem, regD src)
5737%{
5738    instruction_count(2);
5739    src    : S5(read);
5740    mem    : S3(read);
5741    DECODE : S0;        // any decoder for FPU PUSH
5742    D0     : S1;        // big decoder only
5743    FPU    : S4;
5744    MEM    : S3;        // any mem
5745%}
5746
5747pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5748%{
5749    instruction_count(3);
5750    src1   : S3(read);
5751    src2   : S3(read);
5752    mem    : S3(read);
5753    DECODE : S0(2);     // any decoder for FPU PUSH
5754    D0     : S1;        // big decoder only
5755    FPU    : S4;
5756    MEM    : S3;        // any mem
5757%}
5758
5759pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5760%{
5761    instruction_count(3);
5762    src1   : S3(read);
5763    src2   : S3(read);
5764    mem    : S4(read);
5765    DECODE : S0;        // any decoder for FPU PUSH
5766    D0     : S0(2);     // big decoder only
5767    FPU    : S4;
5768    MEM    : S3(2);     // any mem
5769%}
5770
5771pipe_class fpu_mem_mem(memory dst, memory src1)
5772%{
5773    instruction_count(2);
5774    src1   : S3(read);
5775    dst    : S4(read);
5776    D0     : S0(2);     // big decoder only
5777    MEM    : S3(2);     // any mem
5778%}
5779
5780pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5781%{
5782    instruction_count(3);
5783    src1   : S3(read);
5784    src2   : S3(read);
5785    dst    : S4(read);
5786    D0     : S0(3);     // big decoder only
5787    FPU    : S4;
5788    MEM    : S3(3);     // any mem
5789%}
5790
5791pipe_class fpu_mem_reg_con(memory mem, regD src1)
5792%{
5793    instruction_count(3);
5794    src1   : S4(read);
5795    mem    : S4(read);
5796    DECODE : S0;        // any decoder for FPU PUSH
5797    D0     : S0(2);     // big decoder only
5798    FPU    : S4;
5799    MEM    : S3(2);     // any mem
5800%}
5801
5802// Float load constant
5803pipe_class fpu_reg_con(regD dst)
5804%{
5805    instruction_count(2);
5806    dst    : S5(write);
5807    D0     : S0;        // big decoder only for the load
5808    DECODE : S1;        // any decoder for FPU POP
5809    FPU    : S4;
5810    MEM    : S3;        // any mem
5811%}
5812
5813// Float load constant
5814pipe_class fpu_reg_reg_con(regD dst, regD src)
5815%{
5816    instruction_count(3);
5817    dst    : S5(write);
5818    src    : S3(read);
5819    D0     : S0;        // big decoder only for the load
5820    DECODE : S1(2);     // any decoder for FPU POP
5821    FPU    : S4;
5822    MEM    : S3;        // any mem
5823%}
5824
5825// UnConditional branch
5826pipe_class pipe_jmp(label labl)
5827%{
5828    single_instruction;
5829    BR   : S3;
5830%}
5831
5832// Conditional branch
5833pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5834%{
5835    single_instruction;
5836    cr    : S1(read);
5837    BR    : S3;
5838%}
5839
5840// Allocation idiom
5841pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5842%{
5843    instruction_count(1); force_serialization;
5844    fixed_latency(6);
5845    heap_ptr : S3(read);
5846    DECODE   : S0(3);
5847    D0       : S2;
5848    MEM      : S3;
5849    ALU      : S3(2);
5850    dst      : S5(write);
5851    BR       : S5;
5852%}
5853
5854// Generic big/slow expanded idiom
5855pipe_class pipe_slow()
5856%{
5857    instruction_count(10); multiple_bundles; force_serialization;
5858    fixed_latency(100);
5859    D0  : S0(2);
5860    MEM : S3(2);
5861%}
5862
5863// The real do-nothing guy
5864pipe_class empty()
5865%{
5866    instruction_count(0);
5867%}
5868
5869// Define the class for the Nop node
5870define
5871%{
5872   MachNop = empty;
5873%}
5874
5875%}
5876
5877//----------INSTRUCTIONS-------------------------------------------------------
5878//
5879// match      -- States which machine-independent subtree may be replaced
5880//               by this instruction.
5881// ins_cost   -- The estimated cost of this instruction is used by instruction
5882//               selection to identify a minimum cost tree of machine
5883//               instructions that matches a tree of machine-independent
5884//               instructions.
5885// format     -- A string providing the disassembly for this instruction.
5886//               The value of an instruction's operand may be inserted
5887//               by referring to it with a '$' prefix.
5888// opcode     -- Three instruction opcodes may be provided.  These are referred
5889//               to within an encode class as $primary, $secondary, and $tertiary
5890//               rrspectively.  The primary opcode is commonly used to
5891//               indicate the type of machine instruction, while secondary
5892//               and tertiary are often used for prefix options or addressing
5893//               modes.
5894// ins_encode -- A list of encode classes with parameters. The encode class
5895//               name must have been defined in an 'enc_class' specification
5896//               in the encode section of the architecture description.
5897
5898
5899//----------Load/Store/Move Instructions---------------------------------------
5900//----------Load Instructions--------------------------------------------------
5901
5902// Load Byte (8 bit signed)
5903instruct loadB(rRegI dst, memory mem)
5904%{
5905  match(Set dst (LoadB mem));
5906
5907  ins_cost(125);
5908  format %{ "movsbl  $dst, $mem\t# byte" %}
5909  opcode(0x0F, 0xBE);
5910  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5911  ins_pipe(ialu_reg_mem);
5912%}
5913
5914// Load Byte (8 bit signed) into long
5915// instruct loadB2L(rRegL dst, memory mem)
5916// %{
5917//   match(Set dst (ConvI2L (LoadB mem)));
5918
5919//   ins_cost(125);
5920//   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5921//   opcode(0x0F, 0xBE);
5922//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5923//   ins_pipe(ialu_reg_mem);
5924// %}
5925
5926// Load Byte (8 bit UNsigned)
5927instruct loadUB(rRegI dst, memory mem, immI_255 bytemask)
5928%{
5929  match(Set dst (AndI (LoadB mem) bytemask));
5930
5931  ins_cost(125);
5932  format %{ "movzbl  $dst, $mem\t# ubyte" %}
5933  opcode(0x0F, 0xB6);
5934  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5935  ins_pipe(ialu_reg_mem);
5936%}
5937
5938// Load Byte (8 bit UNsigned) into long
5939// instruct loadUB2L(rRegL dst, memory mem, immI_255 bytemask)
5940// %{
5941//   match(Set dst (ConvI2L (AndI (LoadB mem) bytemask)));
5942
5943//   ins_cost(125);
5944//   format %{ "movzbl  $dst, $mem\t# ubyte -> long" %}
5945//   opcode(0x0F, 0xB6);
5946//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5947//   ins_pipe(ialu_reg_mem);
5948// %}
5949
5950// Load Short (16 bit signed)
5951instruct loadS(rRegI dst, memory mem)
5952%{
5953  match(Set dst (LoadS mem));
5954
5955  ins_cost(125); // XXX
5956  format %{ "movswl $dst, $mem\t# short" %}
5957  opcode(0x0F, 0xBF);
5958  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5959  ins_pipe(ialu_reg_mem);
5960%}
5961
5962// Load Short (16 bit signed) into long
5963// instruct loadS2L(rRegL dst, memory mem)
5964// %{
5965//   match(Set dst (ConvI2L (LoadS mem)));
5966
5967//   ins_cost(125); // XXX
5968//   format %{ "movswq $dst, $mem\t# short -> long" %}
5969//   opcode(0x0F, 0xBF);
5970//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5971//   ins_pipe(ialu_reg_mem);
5972// %}
5973
5974// Load Char (16 bit UNsigned)
5975instruct loadC(rRegI dst, memory mem)
5976%{
5977  match(Set dst (LoadC mem));
5978
5979  ins_cost(125);
5980  format %{ "movzwl  $dst, $mem\t# char" %}
5981  opcode(0x0F, 0xB7);
5982  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5983  ins_pipe(ialu_reg_mem);
5984%}
5985
5986// Load Char (16 bit UNsigned) into long
5987// instruct loadC2L(rRegL dst, memory mem)
5988// %{
5989//   match(Set dst (ConvI2L (LoadC mem)));
5990
5991//   ins_cost(125);
5992//   format %{ "movzwl  $dst, $mem\t# char -> long" %}
5993//   opcode(0x0F, 0xB7);
5994//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5995//   ins_pipe(ialu_reg_mem);
5996// %}
5997
5998// Load Integer
5999instruct loadI(rRegI dst, memory mem)
6000%{
6001  match(Set dst (LoadI mem));
6002
6003  ins_cost(125); // XXX
6004  format %{ "movl    $dst, $mem\t# int" %}
6005  opcode(0x8B);
6006  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6007  ins_pipe(ialu_reg_mem);
6008%}
6009
6010// Load Long
6011instruct loadL(rRegL dst, memory mem)
6012%{
6013  match(Set dst (LoadL mem));
6014
6015  ins_cost(125); // XXX
6016  format %{ "movq    $dst, $mem\t# long" %}
6017  opcode(0x8B);
6018  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6019  ins_pipe(ialu_reg_mem); // XXX
6020%}
6021
6022// Load Range
6023instruct loadRange(rRegI dst, memory mem)
6024%{
6025  match(Set dst (LoadRange mem));
6026
6027  ins_cost(125); // XXX
6028  format %{ "movl    $dst, $mem\t# range" %}
6029  opcode(0x8B);
6030  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6031  ins_pipe(ialu_reg_mem);
6032%}
6033
6034// Load Pointer
6035instruct loadP(rRegP dst, memory mem)
6036%{
6037  match(Set dst (LoadP mem));
6038
6039  ins_cost(125); // XXX
6040  format %{ "movq    $dst, $mem\t# ptr" %}
6041  opcode(0x8B);
6042  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6043  ins_pipe(ialu_reg_mem); // XXX
6044%}
6045
6046// Load Compressed Pointer
6047instruct loadN(rRegN dst, memory mem, rFlagsReg cr)
6048%{
6049   match(Set dst (LoadN mem));
6050   effect(KILL cr);
6051
6052   ins_cost(125); // XXX
6053   format %{ "movl    $dst, $mem\t# compressed ptr" %}
6054   ins_encode %{
6055     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
6056     Register dst = as_Register($dst$$reg);
6057     __ movl(dst, addr);
6058   %}
6059   ins_pipe(ialu_reg_mem); // XXX
6060%}
6061
6062
6063// Load Klass Pointer
6064instruct loadKlass(rRegP dst, memory mem)
6065%{
6066  match(Set dst (LoadKlass mem));
6067  predicate(!n->in(MemNode::Address)->bottom_type()->is_narrow());
6068
6069  ins_cost(125); // XXX
6070  format %{ "movq    $dst, $mem\t# class" %}
6071  opcode(0x8B);
6072  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6073  ins_pipe(ialu_reg_mem); // XXX
6074%}
6075
6076// Load Klass Pointer
6077instruct loadKlassComp(rRegP dst, memory mem)
6078%{
6079  match(Set dst (LoadKlass mem));
6080  predicate(n->in(MemNode::Address)->bottom_type()->is_narrow());
6081
6082  ins_cost(125); // XXX
6083  format %{ "movl    $dst, $mem\t# compressed class" %}
6084  ins_encode %{
6085    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
6086    Register dst = as_Register($dst$$reg);
6087    __ movl(dst, addr);
6088    // klass is never null in the header but this is generated for all
6089    // klass loads not just the _klass field in the header.
6090    __ decode_heap_oop(dst);
6091  %}
6092  ins_pipe(ialu_reg_mem); // XXX
6093%}
6094
6095// Load Float
6096instruct loadF(regF dst, memory mem)
6097%{
6098  match(Set dst (LoadF mem));
6099
6100  ins_cost(145); // XXX
6101  format %{ "movss   $dst, $mem\t# float" %}
6102  opcode(0xF3, 0x0F, 0x10);
6103  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6104  ins_pipe(pipe_slow); // XXX
6105%}
6106
6107// Load Double
6108instruct loadD_partial(regD dst, memory mem)
6109%{
6110  predicate(!UseXmmLoadAndClearUpper);
6111  match(Set dst (LoadD mem));
6112
6113  ins_cost(145); // XXX
6114  format %{ "movlpd  $dst, $mem\t# double" %}
6115  opcode(0x66, 0x0F, 0x12);
6116  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6117  ins_pipe(pipe_slow); // XXX
6118%}
6119
6120instruct loadD(regD dst, memory mem)
6121%{
6122  predicate(UseXmmLoadAndClearUpper);
6123  match(Set dst (LoadD mem));
6124
6125  ins_cost(145); // XXX
6126  format %{ "movsd   $dst, $mem\t# double" %}
6127  opcode(0xF2, 0x0F, 0x10);
6128  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6129  ins_pipe(pipe_slow); // XXX
6130%}
6131
6132// Load Aligned Packed Byte to XMM register
6133instruct loadA8B(regD dst, memory mem) %{
6134  match(Set dst (Load8B mem));
6135  ins_cost(125);
6136  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6137  ins_encode( movq_ld(dst, mem));
6138  ins_pipe( pipe_slow );
6139%}
6140
6141// Load Aligned Packed Short to XMM register
6142instruct loadA4S(regD dst, memory mem) %{
6143  match(Set dst (Load4S mem));
6144  ins_cost(125);
6145  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6146  ins_encode( movq_ld(dst, mem));
6147  ins_pipe( pipe_slow );
6148%}
6149
6150// Load Aligned Packed Char to XMM register
6151instruct loadA4C(regD dst, memory mem) %{
6152  match(Set dst (Load4C mem));
6153  ins_cost(125);
6154  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6155  ins_encode( movq_ld(dst, mem));
6156  ins_pipe( pipe_slow );
6157%}
6158
6159// Load Aligned Packed Integer to XMM register
6160instruct load2IU(regD dst, memory mem) %{
6161  match(Set dst (Load2I mem));
6162  ins_cost(125);
6163  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6164  ins_encode( movq_ld(dst, mem));
6165  ins_pipe( pipe_slow );
6166%}
6167
6168// Load Aligned Packed Single to XMM
6169instruct loadA2F(regD dst, memory mem) %{
6170  match(Set dst (Load2F mem));
6171  ins_cost(145);
6172  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6173  ins_encode( movq_ld(dst, mem));
6174  ins_pipe( pipe_slow );
6175%}
6176
6177// Load Effective Address
6178instruct leaP8(rRegP dst, indOffset8 mem)
6179%{
6180  match(Set dst mem);
6181
6182  ins_cost(110); // XXX
6183  format %{ "leaq    $dst, $mem\t# ptr 8" %}
6184  opcode(0x8D);
6185  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6186  ins_pipe(ialu_reg_reg_fat);
6187%}
6188
6189instruct leaP32(rRegP dst, indOffset32 mem)
6190%{
6191  match(Set dst mem);
6192
6193  ins_cost(110);
6194  format %{ "leaq    $dst, $mem\t# ptr 32" %}
6195  opcode(0x8D);
6196  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6197  ins_pipe(ialu_reg_reg_fat);
6198%}
6199
6200// instruct leaPIdx(rRegP dst, indIndex mem)
6201// %{
6202//   match(Set dst mem);
6203
6204//   ins_cost(110);
6205//   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6206//   opcode(0x8D);
6207//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6208//   ins_pipe(ialu_reg_reg_fat);
6209// %}
6210
6211instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6212%{
6213  match(Set dst mem);
6214
6215  ins_cost(110);
6216  format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6217  opcode(0x8D);
6218  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6219  ins_pipe(ialu_reg_reg_fat);
6220%}
6221
6222instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6223%{
6224  match(Set dst mem);
6225
6226  ins_cost(110);
6227  format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6228  opcode(0x8D);
6229  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6230  ins_pipe(ialu_reg_reg_fat);
6231%}
6232
6233instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6234%{
6235  match(Set dst mem);
6236
6237  ins_cost(110);
6238  format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6239  opcode(0x8D);
6240  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6241  ins_pipe(ialu_reg_reg_fat);
6242%}
6243
6244instruct loadConI(rRegI dst, immI src)
6245%{
6246  match(Set dst src);
6247
6248  format %{ "movl    $dst, $src\t# int" %}
6249  ins_encode(load_immI(dst, src));
6250  ins_pipe(ialu_reg_fat); // XXX
6251%}
6252
6253instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6254%{
6255  match(Set dst src);
6256  effect(KILL cr);
6257
6258  ins_cost(50);
6259  format %{ "xorl    $dst, $dst\t# int" %}
6260  opcode(0x33); /* + rd */
6261  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6262  ins_pipe(ialu_reg);
6263%}
6264
6265instruct loadConL(rRegL dst, immL src)
6266%{
6267  match(Set dst src);
6268
6269  ins_cost(150);
6270  format %{ "movq    $dst, $src\t# long" %}
6271  ins_encode(load_immL(dst, src));
6272  ins_pipe(ialu_reg);
6273%}
6274
6275instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6276%{
6277  match(Set dst src);
6278  effect(KILL cr);
6279
6280  ins_cost(50);
6281  format %{ "xorl    $dst, $dst\t# long" %}
6282  opcode(0x33); /* + rd */
6283  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6284  ins_pipe(ialu_reg); // XXX
6285%}
6286
6287instruct loadConUL32(rRegL dst, immUL32 src)
6288%{
6289  match(Set dst src);
6290
6291  ins_cost(60);
6292  format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6293  ins_encode(load_immUL32(dst, src));
6294  ins_pipe(ialu_reg);
6295%}
6296
6297instruct loadConL32(rRegL dst, immL32 src)
6298%{
6299  match(Set dst src);
6300
6301  ins_cost(70);
6302  format %{ "movq    $dst, $src\t# long (32-bit)" %}
6303  ins_encode(load_immL32(dst, src));
6304  ins_pipe(ialu_reg);
6305%}
6306
6307instruct loadConP(rRegP dst, immP src)
6308%{
6309  match(Set dst src);
6310
6311  format %{ "movq    $dst, $src\t# ptr" %}
6312  ins_encode(load_immP(dst, src));
6313  ins_pipe(ialu_reg_fat); // XXX
6314%}
6315
6316instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6317%{
6318  match(Set dst src);
6319  effect(KILL cr);
6320
6321  ins_cost(50);
6322  format %{ "xorl    $dst, $dst\t# ptr" %}
6323  opcode(0x33); /* + rd */
6324  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6325  ins_pipe(ialu_reg);
6326%}
6327
6328instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6329%{
6330  match(Set dst src);
6331  effect(KILL cr);
6332
6333  ins_cost(60);
6334  format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6335  ins_encode(load_immP31(dst, src));
6336  ins_pipe(ialu_reg);
6337%}
6338
6339instruct loadConF(regF dst, immF src)
6340%{
6341  match(Set dst src);
6342  ins_cost(125);
6343
6344  format %{ "movss   $dst, [$src]" %}
6345  ins_encode(load_conF(dst, src));
6346  ins_pipe(pipe_slow);
6347%}
6348
6349instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6350  match(Set dst src);
6351  effect(KILL cr);
6352  format %{ "xorq    $dst, $src\t# compressed ptr" %}
6353  ins_encode %{
6354    Register dst = $dst$$Register;
6355    __ xorq(dst, dst);
6356  %}
6357  ins_pipe(ialu_reg);
6358%}
6359
6360instruct loadConN(rRegN dst, immN src) %{
6361  match(Set dst src);
6362
6363  ins_cost(125);
6364  format %{ "movl    $dst, $src\t# compressed ptr" %}
6365  ins_encode %{
6366    address con = (address)$src$$constant;
6367    Register dst = $dst$$Register;
6368    if (con == NULL) {
6369      ShouldNotReachHere();
6370    } else {
6371      __ movoop(dst, (jobject)$src$$constant);
6372      __ encode_heap_oop_not_null(dst);
6373    }
6374  %}
6375  ins_pipe(ialu_reg_fat); // XXX
6376%}
6377
6378instruct loadConF0(regF dst, immF0 src)
6379%{
6380  match(Set dst src);
6381  ins_cost(100);
6382
6383  format %{ "xorps   $dst, $dst\t# float 0.0" %}
6384  opcode(0x0F, 0x57);
6385  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6386  ins_pipe(pipe_slow);
6387%}
6388
6389// Use the same format since predicate() can not be used here.
6390instruct loadConD(regD dst, immD src)
6391%{
6392  match(Set dst src);
6393  ins_cost(125);
6394
6395  format %{ "movsd   $dst, [$src]" %}
6396  ins_encode(load_conD(dst, src));
6397  ins_pipe(pipe_slow);
6398%}
6399
6400instruct loadConD0(regD dst, immD0 src)
6401%{
6402  match(Set dst src);
6403  ins_cost(100);
6404
6405  format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6406  opcode(0x66, 0x0F, 0x57);
6407  ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6408  ins_pipe(pipe_slow);
6409%}
6410
6411instruct loadSSI(rRegI dst, stackSlotI src)
6412%{
6413  match(Set dst src);
6414
6415  ins_cost(125);
6416  format %{ "movl    $dst, $src\t# int stk" %}
6417  opcode(0x8B);
6418  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6419  ins_pipe(ialu_reg_mem);
6420%}
6421
6422instruct loadSSL(rRegL dst, stackSlotL src)
6423%{
6424  match(Set dst src);
6425
6426  ins_cost(125);
6427  format %{ "movq    $dst, $src\t# long stk" %}
6428  opcode(0x8B);
6429  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6430  ins_pipe(ialu_reg_mem);
6431%}
6432
6433instruct loadSSP(rRegP dst, stackSlotP src)
6434%{
6435  match(Set dst src);
6436
6437  ins_cost(125);
6438  format %{ "movq    $dst, $src\t# ptr stk" %}
6439  opcode(0x8B);
6440  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6441  ins_pipe(ialu_reg_mem);
6442%}
6443
6444instruct loadSSF(regF dst, stackSlotF src)
6445%{
6446  match(Set dst src);
6447
6448  ins_cost(125);
6449  format %{ "movss   $dst, $src\t# float stk" %}
6450  opcode(0xF3, 0x0F, 0x10);
6451  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6452  ins_pipe(pipe_slow); // XXX
6453%}
6454
6455// Use the same format since predicate() can not be used here.
6456instruct loadSSD(regD dst, stackSlotD src)
6457%{
6458  match(Set dst src);
6459
6460  ins_cost(125);
6461  format %{ "movsd   $dst, $src\t# double stk" %}
6462  ins_encode  %{
6463    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6464  %}
6465  ins_pipe(pipe_slow); // XXX
6466%}
6467
6468// Prefetch instructions.
6469// Must be safe to execute with invalid address (cannot fault).
6470
6471instruct prefetchr( memory mem ) %{
6472  predicate(ReadPrefetchInstr==3);
6473  match(PrefetchRead mem);
6474  ins_cost(125);
6475
6476  format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6477  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6478  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6479  ins_pipe(ialu_mem);
6480%}
6481
6482instruct prefetchrNTA( memory mem ) %{
6483  predicate(ReadPrefetchInstr==0);
6484  match(PrefetchRead mem);
6485  ins_cost(125);
6486
6487  format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6488  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6489  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6490  ins_pipe(ialu_mem);
6491%}
6492
6493instruct prefetchrT0( memory mem ) %{
6494  predicate(ReadPrefetchInstr==1);
6495  match(PrefetchRead mem);
6496  ins_cost(125);
6497
6498  format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6499  opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6500  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6501  ins_pipe(ialu_mem);
6502%}
6503
6504instruct prefetchrT2( memory mem ) %{
6505  predicate(ReadPrefetchInstr==2);
6506  match(PrefetchRead mem);
6507  ins_cost(125);
6508
6509  format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6510  opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6511  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6512  ins_pipe(ialu_mem);
6513%}
6514
6515instruct prefetchw( memory mem ) %{
6516  predicate(AllocatePrefetchInstr==3);
6517  match(PrefetchWrite mem);
6518  ins_cost(125);
6519
6520  format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6521  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6522  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6523  ins_pipe(ialu_mem);
6524%}
6525
6526instruct prefetchwNTA( memory mem ) %{
6527  predicate(AllocatePrefetchInstr==0);
6528  match(PrefetchWrite mem);
6529  ins_cost(125);
6530
6531  format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6532  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6533  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6534  ins_pipe(ialu_mem);
6535%}
6536
6537instruct prefetchwT0( memory mem ) %{
6538  predicate(AllocatePrefetchInstr==1);
6539  match(PrefetchWrite mem);
6540  ins_cost(125);
6541
6542  format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6543  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6544  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6545  ins_pipe(ialu_mem);
6546%}
6547
6548instruct prefetchwT2( memory mem ) %{
6549  predicate(AllocatePrefetchInstr==2);
6550  match(PrefetchWrite mem);
6551  ins_cost(125);
6552
6553  format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6554  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6555  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6556  ins_pipe(ialu_mem);
6557%}
6558
6559//----------Store Instructions-------------------------------------------------
6560
6561// Store Byte
6562instruct storeB(memory mem, rRegI src)
6563%{
6564  match(Set mem (StoreB mem src));
6565
6566  ins_cost(125); // XXX
6567  format %{ "movb    $mem, $src\t# byte" %}
6568  opcode(0x88);
6569  ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6570  ins_pipe(ialu_mem_reg);
6571%}
6572
6573// Store Char/Short
6574instruct storeC(memory mem, rRegI src)
6575%{
6576  match(Set mem (StoreC mem src));
6577
6578  ins_cost(125); // XXX
6579  format %{ "movw    $mem, $src\t# char/short" %}
6580  opcode(0x89);
6581  ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6582  ins_pipe(ialu_mem_reg);
6583%}
6584
6585// Store Integer
6586instruct storeI(memory mem, rRegI src)
6587%{
6588  match(Set mem (StoreI mem src));
6589
6590  ins_cost(125); // XXX
6591  format %{ "movl    $mem, $src\t# int" %}
6592  opcode(0x89);
6593  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6594  ins_pipe(ialu_mem_reg);
6595%}
6596
6597// Store Long
6598instruct storeL(memory mem, rRegL src)
6599%{
6600  match(Set mem (StoreL mem src));
6601
6602  ins_cost(125); // XXX
6603  format %{ "movq    $mem, $src\t# long" %}
6604  opcode(0x89);
6605  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6606  ins_pipe(ialu_mem_reg); // XXX
6607%}
6608
6609// Store Pointer
6610instruct storeP(memory mem, any_RegP src)
6611%{
6612  match(Set mem (StoreP mem src));
6613
6614  ins_cost(125); // XXX
6615  format %{ "movq    $mem, $src\t# ptr" %}
6616  opcode(0x89);
6617  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6618  ins_pipe(ialu_mem_reg);
6619%}
6620
6621// Store NULL Pointer, mark word, or other simple pointer constant.
6622instruct storeImmP(memory mem, immP31 src)
6623%{
6624  match(Set mem (StoreP mem src));
6625
6626  ins_cost(125); // XXX
6627  format %{ "movq    $mem, $src\t# ptr" %}
6628  opcode(0xC7); /* C7 /0 */
6629  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6630  ins_pipe(ialu_mem_imm);
6631%}
6632
6633// Store Compressed Pointer
6634instruct storeN(memory mem, rRegN src, rFlagsReg cr)
6635%{
6636  match(Set mem (StoreN mem src));
6637  effect(KILL cr);
6638
6639  ins_cost(125); // XXX
6640  format %{ "movl    $mem, $src\t# ptr" %}
6641  ins_encode %{
6642    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
6643    Register src = as_Register($src$$reg);
6644    __ movl(addr, src);
6645  %}
6646  ins_pipe(ialu_mem_reg);
6647%}
6648
6649// Store Integer Immediate
6650instruct storeImmI(memory mem, immI src)
6651%{
6652  match(Set mem (StoreI mem src));
6653
6654  ins_cost(150);
6655  format %{ "movl    $mem, $src\t# int" %}
6656  opcode(0xC7); /* C7 /0 */
6657  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6658  ins_pipe(ialu_mem_imm);
6659%}
6660
6661// Store Long Immediate
6662instruct storeImmL(memory mem, immL32 src)
6663%{
6664  match(Set mem (StoreL mem src));
6665
6666  ins_cost(150);
6667  format %{ "movq    $mem, $src\t# long" %}
6668  opcode(0xC7); /* C7 /0 */
6669  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6670  ins_pipe(ialu_mem_imm);
6671%}
6672
6673// Store Short/Char Immediate
6674instruct storeImmI16(memory mem, immI16 src)
6675%{
6676  predicate(UseStoreImmI16);
6677  match(Set mem (StoreC mem src));
6678
6679  ins_cost(150);
6680  format %{ "movw    $mem, $src\t# short/char" %}
6681  opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6682  ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6683  ins_pipe(ialu_mem_imm);
6684%}
6685
6686// Store Byte Immediate
6687instruct storeImmB(memory mem, immI8 src)
6688%{
6689  match(Set mem (StoreB mem src));
6690
6691  ins_cost(150); // XXX
6692  format %{ "movb    $mem, $src\t# byte" %}
6693  opcode(0xC6); /* C6 /0 */
6694  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6695  ins_pipe(ialu_mem_imm);
6696%}
6697
6698// Store Aligned Packed Byte XMM register to memory
6699instruct storeA8B(memory mem, regD src) %{
6700  match(Set mem (Store8B mem src));
6701  ins_cost(145);
6702  format %{ "MOVQ  $mem,$src\t! packed8B" %}
6703  ins_encode( movq_st(mem, src));
6704  ins_pipe( pipe_slow );
6705%}
6706
6707// Store Aligned Packed Char/Short XMM register to memory
6708instruct storeA4C(memory mem, regD src) %{
6709  match(Set mem (Store4C mem src));
6710  ins_cost(145);
6711  format %{ "MOVQ  $mem,$src\t! packed4C" %}
6712  ins_encode( movq_st(mem, src));
6713  ins_pipe( pipe_slow );
6714%}
6715
6716// Store Aligned Packed Integer XMM register to memory
6717instruct storeA2I(memory mem, regD src) %{
6718  match(Set mem (Store2I mem src));
6719  ins_cost(145);
6720  format %{ "MOVQ  $mem,$src\t! packed2I" %}
6721  ins_encode( movq_st(mem, src));
6722  ins_pipe( pipe_slow );
6723%}
6724
6725// Store CMS card-mark Immediate
6726instruct storeImmCM0(memory mem, immI0 src)
6727%{
6728  match(Set mem (StoreCM mem src));
6729
6730  ins_cost(150); // XXX
6731  format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6732  opcode(0xC6); /* C6 /0 */
6733  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6734  ins_pipe(ialu_mem_imm);
6735%}
6736
6737// Store Aligned Packed Single Float XMM register to memory
6738instruct storeA2F(memory mem, regD src) %{
6739  match(Set mem (Store2F mem src));
6740  ins_cost(145);
6741  format %{ "MOVQ  $mem,$src\t! packed2F" %}
6742  ins_encode( movq_st(mem, src));
6743  ins_pipe( pipe_slow );
6744%}
6745
6746// Store Float
6747instruct storeF(memory mem, regF src)
6748%{
6749  match(Set mem (StoreF mem src));
6750
6751  ins_cost(95); // XXX
6752  format %{ "movss   $mem, $src\t# float" %}
6753  opcode(0xF3, 0x0F, 0x11);
6754  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
6755  ins_pipe(pipe_slow); // XXX
6756%}
6757
6758// Store immediate Float value (it is faster than store from XMM register)
6759instruct storeF_imm(memory mem, immF src)
6760%{
6761  match(Set mem (StoreF mem src));
6762
6763  ins_cost(50);
6764  format %{ "movl    $mem, $src\t# float" %}
6765  opcode(0xC7); /* C7 /0 */
6766  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6767  ins_pipe(ialu_mem_imm);
6768%}
6769
6770// Store Double
6771instruct storeD(memory mem, regD src)
6772%{
6773  match(Set mem (StoreD mem src));
6774
6775  ins_cost(95); // XXX
6776  format %{ "movsd   $mem, $src\t# double" %}
6777  opcode(0xF2, 0x0F, 0x11);
6778  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
6779  ins_pipe(pipe_slow); // XXX
6780%}
6781
6782// Store immediate double 0.0 (it is faster than store from XMM register)
6783instruct storeD0_imm(memory mem, immD0 src)
6784%{
6785  match(Set mem (StoreD mem src));
6786
6787  ins_cost(50);
6788  format %{ "movq    $mem, $src\t# double 0." %}
6789  opcode(0xC7); /* C7 /0 */
6790  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6791  ins_pipe(ialu_mem_imm);
6792%}
6793
6794instruct storeSSI(stackSlotI dst, rRegI src)
6795%{
6796  match(Set dst src);
6797
6798  ins_cost(100);
6799  format %{ "movl    $dst, $src\t# int stk" %}
6800  opcode(0x89);
6801  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6802  ins_pipe( ialu_mem_reg );
6803%}
6804
6805instruct storeSSL(stackSlotL dst, rRegL src)
6806%{
6807  match(Set dst src);
6808
6809  ins_cost(100);
6810  format %{ "movq    $dst, $src\t# long stk" %}
6811  opcode(0x89);
6812  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6813  ins_pipe(ialu_mem_reg);
6814%}
6815
6816instruct storeSSP(stackSlotP dst, rRegP src)
6817%{
6818  match(Set dst src);
6819
6820  ins_cost(100);
6821  format %{ "movq    $dst, $src\t# ptr stk" %}
6822  opcode(0x89);
6823  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6824  ins_pipe(ialu_mem_reg);
6825%}
6826
6827instruct storeSSF(stackSlotF dst, regF src)
6828%{
6829  match(Set dst src);
6830
6831  ins_cost(95); // XXX
6832  format %{ "movss   $dst, $src\t# float stk" %}
6833  opcode(0xF3, 0x0F, 0x11);
6834  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
6835  ins_pipe(pipe_slow); // XXX
6836%}
6837
6838instruct storeSSD(stackSlotD dst, regD src)
6839%{
6840  match(Set dst src);
6841
6842  ins_cost(95); // XXX
6843  format %{ "movsd   $dst, $src\t# double stk" %}
6844  opcode(0xF2, 0x0F, 0x11);
6845  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
6846  ins_pipe(pipe_slow); // XXX
6847%}
6848
6849//----------BSWAP Instructions-------------------------------------------------
6850instruct bytes_reverse_int(rRegI dst) %{
6851  match(Set dst (ReverseBytesI dst));
6852
6853  format %{ "bswapl  $dst" %}
6854  opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6855  ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6856  ins_pipe( ialu_reg );
6857%}
6858
6859instruct bytes_reverse_long(rRegL dst) %{
6860  match(Set dst (ReverseBytesL dst));
6861
6862  format %{ "bswapq  $dst" %}
6863
6864  opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6865  ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6866  ins_pipe( ialu_reg);
6867%}
6868
6869instruct loadI_reversed(rRegI dst, memory src) %{
6870  match(Set dst (ReverseBytesI (LoadI src)));
6871
6872  format %{ "bswap_movl $dst, $src" %}
6873  opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
6874  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
6875  ins_pipe( ialu_reg_mem );
6876%}
6877
6878instruct loadL_reversed(rRegL dst, memory src) %{
6879  match(Set dst (ReverseBytesL (LoadL src)));
6880
6881  format %{ "bswap_movq $dst, $src" %}
6882  opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
6883  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
6884  ins_pipe( ialu_reg_mem );
6885%}
6886
6887instruct storeI_reversed(memory dst, rRegI src) %{
6888  match(Set dst (StoreI dst (ReverseBytesI  src)));
6889
6890  format %{ "movl_bswap $dst, $src" %}
6891  opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
6892  ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
6893  ins_pipe( ialu_mem_reg );
6894%}
6895
6896instruct storeL_reversed(memory dst, rRegL src) %{
6897  match(Set dst (StoreL dst (ReverseBytesL  src)));
6898
6899  format %{ "movq_bswap $dst, $src" %}
6900  opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
6901  ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
6902  ins_pipe( ialu_mem_reg );
6903%}
6904
6905//----------MemBar Instructions-----------------------------------------------
6906// Memory barrier flavors
6907
6908instruct membar_acquire()
6909%{
6910  match(MemBarAcquire);
6911  ins_cost(0);
6912
6913  size(0);
6914  format %{ "MEMBAR-acquire" %}
6915  ins_encode();
6916  ins_pipe(empty);
6917%}
6918
6919instruct membar_acquire_lock()
6920%{
6921  match(MemBarAcquire);
6922  predicate(Matcher::prior_fast_lock(n));
6923  ins_cost(0);
6924
6925  size(0);
6926  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6927  ins_encode();
6928  ins_pipe(empty);
6929%}
6930
6931instruct membar_release()
6932%{
6933  match(MemBarRelease);
6934  ins_cost(0);
6935
6936  size(0);
6937  format %{ "MEMBAR-release" %}
6938  ins_encode();
6939  ins_pipe(empty);
6940%}
6941
6942instruct membar_release_lock()
6943%{
6944  match(MemBarRelease);
6945  predicate(Matcher::post_fast_unlock(n));
6946  ins_cost(0);
6947
6948  size(0);
6949  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6950  ins_encode();
6951  ins_pipe(empty);
6952%}
6953
6954instruct membar_volatile()
6955%{
6956  match(MemBarVolatile);
6957  ins_cost(400);
6958
6959  format %{ "MEMBAR-volatile" %}
6960  ins_encode(enc_membar_volatile);
6961  ins_pipe(pipe_slow);
6962%}
6963
6964instruct unnecessary_membar_volatile()
6965%{
6966  match(MemBarVolatile);
6967  predicate(Matcher::post_store_load_barrier(n));
6968  ins_cost(0);
6969
6970  size(0);
6971  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6972  ins_encode();
6973  ins_pipe(empty);
6974%}
6975
6976//----------Move Instructions--------------------------------------------------
6977
6978instruct castX2P(rRegP dst, rRegL src)
6979%{
6980  match(Set dst (CastX2P src));
6981
6982  format %{ "movq    $dst, $src\t# long->ptr" %}
6983  ins_encode(enc_copy_wide(dst, src));
6984  ins_pipe(ialu_reg_reg); // XXX
6985%}
6986
6987instruct castP2X(rRegL dst, rRegP src)
6988%{
6989  match(Set dst (CastP2X src));
6990
6991  format %{ "movq    $dst, $src\t# ptr -> long" %}
6992  ins_encode(enc_copy_wide(dst, src));
6993  ins_pipe(ialu_reg_reg); // XXX
6994%}
6995
6996
6997// Convert oop pointer into compressed form
6998instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6999  match(Set dst (EncodeP src));
7000  effect(KILL cr);
7001  format %{ "encode_heap_oop $dst,$src" %}
7002  ins_encode %{
7003    Register s = $src$$Register;
7004    Register d = $dst$$Register;
7005    if (s != d) {
7006      __ movq(d, s);
7007    }
7008    __ encode_heap_oop(d);
7009  %}
7010  ins_pipe(ialu_reg_long);
7011%}
7012
7013instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7014  match(Set dst (DecodeN src));
7015  effect(KILL cr);
7016  format %{ "decode_heap_oop $dst,$src" %}
7017  ins_encode %{
7018    Register s = $src$$Register;
7019    Register d = $dst$$Register;
7020    if (s != d) {
7021      __ movq(d, s);
7022    }
7023    __ decode_heap_oop(d);
7024  %}
7025  ins_pipe(ialu_reg_long);
7026%}
7027
7028
7029//----------Conditional Move---------------------------------------------------
7030// Jump
7031// dummy instruction for generating temp registers
7032instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7033  match(Jump (LShiftL switch_val shift));
7034  ins_cost(350);
7035  predicate(false);
7036  effect(TEMP dest);
7037
7038  format %{ "leaq    $dest, table_base\n\t"
7039            "jmp     [$dest + $switch_val << $shift]\n\t" %}
7040  ins_encode(jump_enc_offset(switch_val, shift, dest));
7041  ins_pipe(pipe_jmp);
7042  ins_pc_relative(1);
7043%}
7044
7045instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7046  match(Jump (AddL (LShiftL switch_val shift) offset));
7047  ins_cost(350);
7048  effect(TEMP dest);
7049
7050  format %{ "leaq    $dest, table_base\n\t"
7051            "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7052  ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7053  ins_pipe(pipe_jmp);
7054  ins_pc_relative(1);
7055%}
7056
7057instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7058  match(Jump switch_val);
7059  ins_cost(350);
7060  effect(TEMP dest);
7061
7062  format %{ "leaq    $dest, table_base\n\t"
7063            "jmp     [$dest + $switch_val]\n\t" %}
7064  ins_encode(jump_enc(switch_val, dest));
7065  ins_pipe(pipe_jmp);
7066  ins_pc_relative(1);
7067%}
7068
7069// Conditional move
7070instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7071%{
7072  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7073
7074  ins_cost(200); // XXX
7075  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7076  opcode(0x0F, 0x40);
7077  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7078  ins_pipe(pipe_cmov_reg);
7079%}
7080
7081instruct cmovI_regU(rRegI dst, rRegI src, rFlagsRegU cr, cmpOpU cop)
7082%{
7083  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7084
7085  ins_cost(200); // XXX
7086  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7087  opcode(0x0F, 0x40);
7088  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7089  ins_pipe(pipe_cmov_reg);
7090%}
7091
7092// Conditional move
7093instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src)
7094%{
7095  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7096
7097  ins_cost(250); // XXX
7098  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7099  opcode(0x0F, 0x40);
7100  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7101  ins_pipe(pipe_cmov_mem);
7102%}
7103
7104// Conditional move
7105instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7106%{
7107  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7108
7109  ins_cost(250); // XXX
7110  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7111  opcode(0x0F, 0x40);
7112  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7113  ins_pipe(pipe_cmov_mem);
7114%}
7115
7116// Conditional move
7117instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7118%{
7119  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7120
7121  ins_cost(200); // XXX
7122  format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7123  opcode(0x0F, 0x40);
7124  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7125  ins_pipe(pipe_cmov_reg);  // XXX
7126%}
7127
7128// Conditional move
7129instruct cmovP_regU(rRegP dst, rRegP src, rFlagsRegU cr, cmpOpU cop)
7130%{
7131  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7132
7133  ins_cost(200); // XXX
7134  format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7135  opcode(0x0F, 0x40);
7136  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7137  ins_pipe(pipe_cmov_reg); // XXX
7138%}
7139
7140// DISABLED: Requires the ADLC to emit a bottom_type call that
7141// correctly meets the two pointer arguments; one is an incoming
7142// register but the other is a memory operand.  ALSO appears to
7143// be buggy with implicit null checks.
7144//
7145//// Conditional move
7146//instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7147//%{
7148//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7149//  ins_cost(250);
7150//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7151//  opcode(0x0F,0x40);
7152//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7153//  ins_pipe( pipe_cmov_mem );
7154//%}
7155//
7156//// Conditional move
7157//instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7158//%{
7159//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7160//  ins_cost(250);
7161//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7162//  opcode(0x0F,0x40);
7163//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7164//  ins_pipe( pipe_cmov_mem );
7165//%}
7166
7167instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7168%{
7169  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7170
7171  ins_cost(200); // XXX
7172  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7173  opcode(0x0F, 0x40);
7174  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7175  ins_pipe(pipe_cmov_reg);  // XXX
7176%}
7177
7178instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7179%{
7180  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7181
7182  ins_cost(200); // XXX
7183  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7184  opcode(0x0F, 0x40);
7185  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7186  ins_pipe(pipe_cmov_mem);  // XXX
7187%}
7188
7189instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7190%{
7191  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7192
7193  ins_cost(200); // XXX
7194  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7195  opcode(0x0F, 0x40);
7196  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7197  ins_pipe(pipe_cmov_reg); // XXX
7198%}
7199
7200instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7201%{
7202  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7203
7204  ins_cost(200); // XXX
7205  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7206  opcode(0x0F, 0x40);
7207  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7208  ins_pipe(pipe_cmov_mem); // XXX
7209%}
7210
7211instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7212%{
7213  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7214
7215  ins_cost(200); // XXX
7216  format %{ "jn$cop    skip\t# signed cmove float\n\t"
7217            "movss     $dst, $src\n"
7218    "skip:" %}
7219  ins_encode(enc_cmovf_branch(cop, dst, src));
7220  ins_pipe(pipe_slow);
7221%}
7222
7223// instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7224// %{
7225//   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7226
7227//   ins_cost(200); // XXX
7228//   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7229//             "movss     $dst, $src\n"
7230//     "skip:" %}
7231//   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7232//   ins_pipe(pipe_slow);
7233// %}
7234
7235instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7236%{
7237  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7238
7239  ins_cost(200); // XXX
7240  format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7241            "movss     $dst, $src\n"
7242    "skip:" %}
7243  ins_encode(enc_cmovf_branch(cop, dst, src));
7244  ins_pipe(pipe_slow);
7245%}
7246
7247instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7248%{
7249  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7250
7251  ins_cost(200); // XXX
7252  format %{ "jn$cop    skip\t# signed cmove double\n\t"
7253            "movsd     $dst, $src\n"
7254    "skip:" %}
7255  ins_encode(enc_cmovd_branch(cop, dst, src));
7256  ins_pipe(pipe_slow);
7257%}
7258
7259instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7260%{
7261  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7262
7263  ins_cost(200); // XXX
7264  format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7265            "movsd     $dst, $src\n"
7266    "skip:" %}
7267  ins_encode(enc_cmovd_branch(cop, dst, src));
7268  ins_pipe(pipe_slow);
7269%}
7270
7271//----------Arithmetic Instructions--------------------------------------------
7272//----------Addition Instructions----------------------------------------------
7273
7274instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7275%{
7276  match(Set dst (AddI dst src));
7277  effect(KILL cr);
7278
7279  format %{ "addl    $dst, $src\t# int" %}
7280  opcode(0x03);
7281  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7282  ins_pipe(ialu_reg_reg);
7283%}
7284
7285instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7286%{
7287  match(Set dst (AddI dst src));
7288  effect(KILL cr);
7289
7290  format %{ "addl    $dst, $src\t# int" %}
7291  opcode(0x81, 0x00); /* /0 id */
7292  ins_encode(OpcSErm(dst, src), Con8or32(src));
7293  ins_pipe( ialu_reg );
7294%}
7295
7296instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7297%{
7298  match(Set dst (AddI dst (LoadI src)));
7299  effect(KILL cr);
7300
7301  ins_cost(125); // XXX
7302  format %{ "addl    $dst, $src\t# int" %}
7303  opcode(0x03);
7304  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7305  ins_pipe(ialu_reg_mem);
7306%}
7307
7308instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7309%{
7310  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7311  effect(KILL cr);
7312
7313  ins_cost(150); // XXX
7314  format %{ "addl    $dst, $src\t# int" %}
7315  opcode(0x01); /* Opcode 01 /r */
7316  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7317  ins_pipe(ialu_mem_reg);
7318%}
7319
7320instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7321%{
7322  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7323  effect(KILL cr);
7324
7325  ins_cost(125); // XXX
7326  format %{ "addl    $dst, $src\t# int" %}
7327  opcode(0x81); /* Opcode 81 /0 id */
7328  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7329  ins_pipe(ialu_mem_imm);
7330%}
7331
7332instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7333%{
7334  predicate(UseIncDec);
7335  match(Set dst (AddI dst src));
7336  effect(KILL cr);
7337
7338  format %{ "incl    $dst\t# int" %}
7339  opcode(0xFF, 0x00); // FF /0
7340  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7341  ins_pipe(ialu_reg);
7342%}
7343
7344instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7345%{
7346  predicate(UseIncDec);
7347  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7348  effect(KILL cr);
7349
7350  ins_cost(125); // XXX
7351  format %{ "incl    $dst\t# int" %}
7352  opcode(0xFF); /* Opcode FF /0 */
7353  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7354  ins_pipe(ialu_mem_imm);
7355%}
7356
7357// XXX why does that use AddI
7358instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7359%{
7360  predicate(UseIncDec);
7361  match(Set dst (AddI dst src));
7362  effect(KILL cr);
7363
7364  format %{ "decl    $dst\t# int" %}
7365  opcode(0xFF, 0x01); // FF /1
7366  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7367  ins_pipe(ialu_reg);
7368%}
7369
7370// XXX why does that use AddI
7371instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7372%{
7373  predicate(UseIncDec);
7374  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7375  effect(KILL cr);
7376
7377  ins_cost(125); // XXX
7378  format %{ "decl    $dst\t# int" %}
7379  opcode(0xFF); /* Opcode FF /1 */
7380  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7381  ins_pipe(ialu_mem_imm);
7382%}
7383
7384instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7385%{
7386  match(Set dst (AddI src0 src1));
7387
7388  ins_cost(110);
7389  format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7390  opcode(0x8D); /* 0x8D /r */
7391  ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7392  ins_pipe(ialu_reg_reg);
7393%}
7394
7395instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7396%{
7397  match(Set dst (AddL dst src));
7398  effect(KILL cr);
7399
7400  format %{ "addq    $dst, $src\t# long" %}
7401  opcode(0x03);
7402  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7403  ins_pipe(ialu_reg_reg);
7404%}
7405
7406instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7407%{
7408  match(Set dst (AddL dst src));
7409  effect(KILL cr);
7410
7411  format %{ "addq    $dst, $src\t# long" %}
7412  opcode(0x81, 0x00); /* /0 id */
7413  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7414  ins_pipe( ialu_reg );
7415%}
7416
7417instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7418%{
7419  match(Set dst (AddL dst (LoadL src)));
7420  effect(KILL cr);
7421
7422  ins_cost(125); // XXX
7423  format %{ "addq    $dst, $src\t# long" %}
7424  opcode(0x03);
7425  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7426  ins_pipe(ialu_reg_mem);
7427%}
7428
7429instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7430%{
7431  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7432  effect(KILL cr);
7433
7434  ins_cost(150); // XXX
7435  format %{ "addq    $dst, $src\t# long" %}
7436  opcode(0x01); /* Opcode 01 /r */
7437  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7438  ins_pipe(ialu_mem_reg);
7439%}
7440
7441instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7442%{
7443  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7444  effect(KILL cr);
7445
7446  ins_cost(125); // XXX
7447  format %{ "addq    $dst, $src\t# long" %}
7448  opcode(0x81); /* Opcode 81 /0 id */
7449  ins_encode(REX_mem_wide(dst),
7450             OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7451  ins_pipe(ialu_mem_imm);
7452%}
7453
7454instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7455%{
7456  predicate(UseIncDec);
7457  match(Set dst (AddL dst src));
7458  effect(KILL cr);
7459
7460  format %{ "incq    $dst\t# long" %}
7461  opcode(0xFF, 0x00); // FF /0
7462  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7463  ins_pipe(ialu_reg);
7464%}
7465
7466instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7467%{
7468  predicate(UseIncDec);
7469  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7470  effect(KILL cr);
7471
7472  ins_cost(125); // XXX
7473  format %{ "incq    $dst\t# long" %}
7474  opcode(0xFF); /* Opcode FF /0 */
7475  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7476  ins_pipe(ialu_mem_imm);
7477%}
7478
7479// XXX why does that use AddL
7480instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7481%{
7482  predicate(UseIncDec);
7483  match(Set dst (AddL dst src));
7484  effect(KILL cr);
7485
7486  format %{ "decq    $dst\t# long" %}
7487  opcode(0xFF, 0x01); // FF /1
7488  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7489  ins_pipe(ialu_reg);
7490%}
7491
7492// XXX why does that use AddL
7493instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7494%{
7495  predicate(UseIncDec);
7496  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7497  effect(KILL cr);
7498
7499  ins_cost(125); // XXX
7500  format %{ "decq    $dst\t# long" %}
7501  opcode(0xFF); /* Opcode FF /1 */
7502  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7503  ins_pipe(ialu_mem_imm);
7504%}
7505
7506instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7507%{
7508  match(Set dst (AddL src0 src1));
7509
7510  ins_cost(110);
7511  format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7512  opcode(0x8D); /* 0x8D /r */
7513  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7514  ins_pipe(ialu_reg_reg);
7515%}
7516
7517instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7518%{
7519  match(Set dst (AddP dst src));
7520  effect(KILL cr);
7521
7522  format %{ "addq    $dst, $src\t# ptr" %}
7523  opcode(0x03);
7524  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7525  ins_pipe(ialu_reg_reg);
7526%}
7527
7528instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7529%{
7530  match(Set dst (AddP dst src));
7531  effect(KILL cr);
7532
7533  format %{ "addq    $dst, $src\t# ptr" %}
7534  opcode(0x81, 0x00); /* /0 id */
7535  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7536  ins_pipe( ialu_reg );
7537%}
7538
7539// XXX addP mem ops ????
7540
7541instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7542%{
7543  match(Set dst (AddP src0 src1));
7544
7545  ins_cost(110);
7546  format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7547  opcode(0x8D); /* 0x8D /r */
7548  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7549  ins_pipe(ialu_reg_reg);
7550%}
7551
7552instruct checkCastPP(rRegP dst)
7553%{
7554  match(Set dst (CheckCastPP dst));
7555
7556  size(0);
7557  format %{ "# checkcastPP of $dst" %}
7558  ins_encode(/* empty encoding */);
7559  ins_pipe(empty);
7560%}
7561
7562instruct castPP(rRegP dst)
7563%{
7564  match(Set dst (CastPP dst));
7565
7566  size(0);
7567  format %{ "# castPP of $dst" %}
7568  ins_encode(/* empty encoding */);
7569  ins_pipe(empty);
7570%}
7571
7572instruct castII(rRegI dst)
7573%{
7574  match(Set dst (CastII dst));
7575
7576  size(0);
7577  format %{ "# castII of $dst" %}
7578  ins_encode(/* empty encoding */);
7579  ins_cost(0);
7580  ins_pipe(empty);
7581%}
7582
7583// LoadP-locked same as a regular LoadP when used with compare-swap
7584instruct loadPLocked(rRegP dst, memory mem)
7585%{
7586  match(Set dst (LoadPLocked mem));
7587
7588  ins_cost(125); // XXX
7589  format %{ "movq    $dst, $mem\t# ptr locked" %}
7590  opcode(0x8B);
7591  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7592  ins_pipe(ialu_reg_mem); // XXX
7593%}
7594
7595// LoadL-locked - same as a regular LoadL when used with compare-swap
7596instruct loadLLocked(rRegL dst, memory mem)
7597%{
7598  match(Set dst (LoadLLocked mem));
7599
7600  ins_cost(125); // XXX
7601  format %{ "movq    $dst, $mem\t# long locked" %}
7602  opcode(0x8B);
7603  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7604  ins_pipe(ialu_reg_mem); // XXX
7605%}
7606
7607// Conditional-store of the updated heap-top.
7608// Used during allocation of the shared heap.
7609// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7610
7611instruct storePConditional(memory heap_top_ptr,
7612                           rax_RegP oldval, rRegP newval,
7613                           rFlagsReg cr)
7614%{
7615  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7616
7617  format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7618            "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7619  opcode(0x0F, 0xB1);
7620  ins_encode(lock_prefix,
7621             REX_reg_mem_wide(newval, heap_top_ptr),
7622             OpcP, OpcS,
7623             reg_mem(newval, heap_top_ptr));
7624  ins_pipe(pipe_cmpxchg);
7625%}
7626
7627// Conditional-store of a long value
7628// Returns a boolean value (0/1) on success.  Implemented with a
7629// CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
7630
7631instruct storeLConditional(rRegI res,
7632                           memory mem_ptr,
7633                           rax_RegL oldval, rRegL newval,
7634                           rFlagsReg cr)
7635%{
7636  match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
7637  effect(KILL cr);
7638
7639  format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
7640            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7641            "sete    $res\n\t"
7642            "movzbl  $res, $res" %}
7643  opcode(0x0F, 0xB1);
7644  ins_encode(lock_prefix,
7645             REX_reg_mem_wide(newval, mem_ptr),
7646             OpcP, OpcS,
7647             reg_mem(newval, mem_ptr),
7648             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7649             REX_reg_breg(res, res), // movzbl
7650             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7651  ins_pipe(pipe_cmpxchg);
7652%}
7653
7654// Conditional-store of a long value
7655// ZF flag is set on success, reset otherwise. Implemented with a
7656// CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
7657instruct storeLConditional_flags(memory mem_ptr,
7658                                 rax_RegL oldval, rRegL newval,
7659                                 rFlagsReg cr,
7660                                 immI0 zero)
7661%{
7662  match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
7663
7664  format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
7665            "If rax == $mem_ptr then store $newval into $mem_ptr" %}
7666  opcode(0x0F, 0xB1);
7667  ins_encode(lock_prefix,
7668             REX_reg_mem_wide(newval, mem_ptr),
7669             OpcP, OpcS,
7670             reg_mem(newval, mem_ptr));
7671  ins_pipe(pipe_cmpxchg);
7672%}
7673
7674instruct compareAndSwapP(rRegI res,
7675                         memory mem_ptr,
7676                         rax_RegP oldval, rRegP newval,
7677                         rFlagsReg cr)
7678%{
7679  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7680  effect(KILL cr, KILL oldval);
7681
7682  format %{ "cmpxchgq $mem_ptr,$newval\t# "
7683            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7684            "sete    $res\n\t"
7685            "movzbl  $res, $res" %}
7686  opcode(0x0F, 0xB1);
7687  ins_encode(lock_prefix,
7688             REX_reg_mem_wide(newval, mem_ptr),
7689             OpcP, OpcS,
7690             reg_mem(newval, mem_ptr),
7691             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7692             REX_reg_breg(res, res), // movzbl
7693             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7694  ins_pipe( pipe_cmpxchg );
7695%}
7696
7697// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7698instruct compareAndSwapL(rRegI res,
7699                         memory mem_ptr,
7700                         rax_RegL oldval, rRegL newval,
7701                         rFlagsReg cr)
7702%{
7703  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7704  effect(KILL cr, KILL oldval);
7705
7706  format %{ "cmpxchgq $mem_ptr,$newval\t# "
7707            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7708            "sete    $res\n\t"
7709            "movzbl  $res, $res" %}
7710  opcode(0x0F, 0xB1);
7711  ins_encode(lock_prefix,
7712             REX_reg_mem_wide(newval, mem_ptr),
7713             OpcP, OpcS,
7714             reg_mem(newval, mem_ptr),
7715             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7716             REX_reg_breg(res, res), // movzbl
7717             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7718  ins_pipe( pipe_cmpxchg );
7719%}
7720
7721instruct compareAndSwapI(rRegI res,
7722                         memory mem_ptr,
7723                         rax_RegI oldval, rRegI newval,
7724                         rFlagsReg cr)
7725%{
7726  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7727  effect(KILL cr, KILL oldval);
7728
7729  format %{ "cmpxchgl $mem_ptr,$newval\t# "
7730            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7731            "sete    $res\n\t"
7732            "movzbl  $res, $res" %}
7733  opcode(0x0F, 0xB1);
7734  ins_encode(lock_prefix,
7735             REX_reg_mem(newval, mem_ptr),
7736             OpcP, OpcS,
7737             reg_mem(newval, mem_ptr),
7738             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7739             REX_reg_breg(res, res), // movzbl
7740             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7741  ins_pipe( pipe_cmpxchg );
7742%}
7743
7744
7745instruct compareAndSwapN(rRegI res,
7746                          memory mem_ptr,
7747                          rax_RegN oldval, rRegN newval,
7748                          rFlagsReg cr) %{
7749  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7750  effect(KILL cr, KILL oldval);
7751
7752  format %{ "cmpxchgl $mem_ptr,$newval\t# "
7753            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7754            "sete    $res\n\t"
7755            "movzbl  $res, $res" %}
7756  opcode(0x0F, 0xB1);
7757  ins_encode(lock_prefix,
7758             REX_reg_mem(newval, mem_ptr),
7759             OpcP, OpcS,
7760             reg_mem(newval, mem_ptr),
7761             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7762             REX_reg_breg(res, res), // movzbl
7763             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7764  ins_pipe( pipe_cmpxchg );
7765%}
7766
7767//----------Subtraction Instructions-------------------------------------------
7768
7769// Integer Subtraction Instructions
7770instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7771%{
7772  match(Set dst (SubI dst src));
7773  effect(KILL cr);
7774
7775  format %{ "subl    $dst, $src\t# int" %}
7776  opcode(0x2B);
7777  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7778  ins_pipe(ialu_reg_reg);
7779%}
7780
7781instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7782%{
7783  match(Set dst (SubI dst src));
7784  effect(KILL cr);
7785
7786  format %{ "subl    $dst, $src\t# int" %}
7787  opcode(0x81, 0x05);  /* Opcode 81 /5 */
7788  ins_encode(OpcSErm(dst, src), Con8or32(src));
7789  ins_pipe(ialu_reg);
7790%}
7791
7792instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7793%{
7794  match(Set dst (SubI dst (LoadI src)));
7795  effect(KILL cr);
7796
7797  ins_cost(125);
7798  format %{ "subl    $dst, $src\t# int" %}
7799  opcode(0x2B);
7800  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7801  ins_pipe(ialu_reg_mem);
7802%}
7803
7804instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7805%{
7806  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7807  effect(KILL cr);
7808
7809  ins_cost(150);
7810  format %{ "subl    $dst, $src\t# int" %}
7811  opcode(0x29); /* Opcode 29 /r */
7812  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7813  ins_pipe(ialu_mem_reg);
7814%}
7815
7816instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7817%{
7818  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7819  effect(KILL cr);
7820
7821  ins_cost(125); // XXX
7822  format %{ "subl    $dst, $src\t# int" %}
7823  opcode(0x81); /* Opcode 81 /5 id */
7824  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7825  ins_pipe(ialu_mem_imm);
7826%}
7827
7828instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7829%{
7830  match(Set dst (SubL dst src));
7831  effect(KILL cr);
7832
7833  format %{ "subq    $dst, $src\t# long" %}
7834  opcode(0x2B);
7835  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7836  ins_pipe(ialu_reg_reg);
7837%}
7838
7839instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7840%{
7841  match(Set dst (SubL dst src));
7842  effect(KILL cr);
7843
7844  format %{ "subq    $dst, $src\t# long" %}
7845  opcode(0x81, 0x05);  /* Opcode 81 /5 */
7846  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7847  ins_pipe(ialu_reg);
7848%}
7849
7850instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7851%{
7852  match(Set dst (SubL dst (LoadL src)));
7853  effect(KILL cr);
7854
7855  ins_cost(125);
7856  format %{ "subq    $dst, $src\t# long" %}
7857  opcode(0x2B);
7858  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7859  ins_pipe(ialu_reg_mem);
7860%}
7861
7862instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7863%{
7864  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7865  effect(KILL cr);
7866
7867  ins_cost(150);
7868  format %{ "subq    $dst, $src\t# long" %}
7869  opcode(0x29); /* Opcode 29 /r */
7870  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7871  ins_pipe(ialu_mem_reg);
7872%}
7873
7874instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7875%{
7876  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7877  effect(KILL cr);
7878
7879  ins_cost(125); // XXX
7880  format %{ "subq    $dst, $src\t# long" %}
7881  opcode(0x81); /* Opcode 81 /5 id */
7882  ins_encode(REX_mem_wide(dst),
7883             OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7884  ins_pipe(ialu_mem_imm);
7885%}
7886
7887// Subtract from a pointer
7888// XXX hmpf???
7889instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7890%{
7891  match(Set dst (AddP dst (SubI zero src)));
7892  effect(KILL cr);
7893
7894  format %{ "subq    $dst, $src\t# ptr - int" %}
7895  opcode(0x2B);
7896  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7897  ins_pipe(ialu_reg_reg);
7898%}
7899
7900instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7901%{
7902  match(Set dst (SubI zero dst));
7903  effect(KILL cr);
7904
7905  format %{ "negl    $dst\t# int" %}
7906  opcode(0xF7, 0x03);  // Opcode F7 /3
7907  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7908  ins_pipe(ialu_reg);
7909%}
7910
7911instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7912%{
7913  match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7914  effect(KILL cr);
7915
7916  format %{ "negl    $dst\t# int" %}
7917  opcode(0xF7, 0x03);  // Opcode F7 /3
7918  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7919  ins_pipe(ialu_reg);
7920%}
7921
7922instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7923%{
7924  match(Set dst (SubL zero dst));
7925  effect(KILL cr);
7926
7927  format %{ "negq    $dst\t# long" %}
7928  opcode(0xF7, 0x03);  // Opcode F7 /3
7929  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7930  ins_pipe(ialu_reg);
7931%}
7932
7933instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7934%{
7935  match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7936  effect(KILL cr);
7937
7938  format %{ "negq    $dst\t# long" %}
7939  opcode(0xF7, 0x03);  // Opcode F7 /3
7940  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7941  ins_pipe(ialu_reg);
7942%}
7943
7944
7945//----------Multiplication/Division Instructions-------------------------------
7946// Integer Multiplication Instructions
7947// Multiply Register
7948
7949instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7950%{
7951  match(Set dst (MulI dst src));
7952  effect(KILL cr);
7953
7954  ins_cost(300);
7955  format %{ "imull   $dst, $src\t# int" %}
7956  opcode(0x0F, 0xAF);
7957  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7958  ins_pipe(ialu_reg_reg_alu0);
7959%}
7960
7961instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7962%{
7963  match(Set dst (MulI src imm));
7964  effect(KILL cr);
7965
7966  ins_cost(300);
7967  format %{ "imull   $dst, $src, $imm\t# int" %}
7968  opcode(0x69); /* 69 /r id */
7969  ins_encode(REX_reg_reg(dst, src),
7970             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7971  ins_pipe(ialu_reg_reg_alu0);
7972%}
7973
7974instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7975%{
7976  match(Set dst (MulI dst (LoadI src)));
7977  effect(KILL cr);
7978
7979  ins_cost(350);
7980  format %{ "imull   $dst, $src\t# int" %}
7981  opcode(0x0F, 0xAF);
7982  ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7983  ins_pipe(ialu_reg_mem_alu0);
7984%}
7985
7986instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7987%{
7988  match(Set dst (MulI (LoadI src) imm));
7989  effect(KILL cr);
7990
7991  ins_cost(300);
7992  format %{ "imull   $dst, $src, $imm\t# int" %}
7993  opcode(0x69); /* 69 /r id */
7994  ins_encode(REX_reg_mem(dst, src),
7995             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7996  ins_pipe(ialu_reg_mem_alu0);
7997%}
7998
7999instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8000%{
8001  match(Set dst (MulL dst src));
8002  effect(KILL cr);
8003
8004  ins_cost(300);
8005  format %{ "imulq   $dst, $src\t# long" %}
8006  opcode(0x0F, 0xAF);
8007  ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8008  ins_pipe(ialu_reg_reg_alu0);
8009%}
8010
8011instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8012%{
8013  match(Set dst (MulL src imm));
8014  effect(KILL cr);
8015
8016  ins_cost(300);
8017  format %{ "imulq   $dst, $src, $imm\t# long" %}
8018  opcode(0x69); /* 69 /r id */
8019  ins_encode(REX_reg_reg_wide(dst, src),
8020             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8021  ins_pipe(ialu_reg_reg_alu0);
8022%}
8023
8024instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8025%{
8026  match(Set dst (MulL dst (LoadL src)));
8027  effect(KILL cr);
8028
8029  ins_cost(350);
8030  format %{ "imulq   $dst, $src\t# long" %}
8031  opcode(0x0F, 0xAF);
8032  ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8033  ins_pipe(ialu_reg_mem_alu0);
8034%}
8035
8036instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8037%{
8038  match(Set dst (MulL (LoadL src) imm));
8039  effect(KILL cr);
8040
8041  ins_cost(300);
8042  format %{ "imulq   $dst, $src, $imm\t# long" %}
8043  opcode(0x69); /* 69 /r id */
8044  ins_encode(REX_reg_mem_wide(dst, src),
8045             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8046  ins_pipe(ialu_reg_mem_alu0);
8047%}
8048
8049instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8050                   rFlagsReg cr)
8051%{
8052  match(Set rax (DivI rax div));
8053  effect(KILL rdx, KILL cr);
8054
8055  ins_cost(30*100+10*100); // XXX
8056  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8057            "jne,s   normal\n\t"
8058            "xorl    rdx, rdx\n\t"
8059            "cmpl    $div, -1\n\t"
8060            "je,s    done\n"
8061    "normal: cdql\n\t"
8062            "idivl   $div\n"
8063    "done:"        %}
8064  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8065  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8066  ins_pipe(ialu_reg_reg_alu0);
8067%}
8068
8069instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8070                   rFlagsReg cr)
8071%{
8072  match(Set rax (DivL rax div));
8073  effect(KILL rdx, KILL cr);
8074
8075  ins_cost(30*100+10*100); // XXX
8076  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8077            "cmpq    rax, rdx\n\t"
8078            "jne,s   normal\n\t"
8079            "xorl    rdx, rdx\n\t"
8080            "cmpq    $div, -1\n\t"
8081            "je,s    done\n"
8082    "normal: cdqq\n\t"
8083            "idivq   $div\n"
8084    "done:"        %}
8085  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8086  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8087  ins_pipe(ialu_reg_reg_alu0);
8088%}
8089
8090// Integer DIVMOD with Register, both quotient and mod results
8091instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8092                             rFlagsReg cr)
8093%{
8094  match(DivModI rax div);
8095  effect(KILL cr);
8096
8097  ins_cost(30*100+10*100); // XXX
8098  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8099            "jne,s   normal\n\t"
8100            "xorl    rdx, rdx\n\t"
8101            "cmpl    $div, -1\n\t"
8102            "je,s    done\n"
8103    "normal: cdql\n\t"
8104            "idivl   $div\n"
8105    "done:"        %}
8106  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8107  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8108  ins_pipe(pipe_slow);
8109%}
8110
8111// Long DIVMOD with Register, both quotient and mod results
8112instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8113                             rFlagsReg cr)
8114%{
8115  match(DivModL rax div);
8116  effect(KILL cr);
8117
8118  ins_cost(30*100+10*100); // XXX
8119  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8120            "cmpq    rax, rdx\n\t"
8121            "jne,s   normal\n\t"
8122            "xorl    rdx, rdx\n\t"
8123            "cmpq    $div, -1\n\t"
8124            "je,s    done\n"
8125    "normal: cdqq\n\t"
8126            "idivq   $div\n"
8127    "done:"        %}
8128  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8129  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8130  ins_pipe(pipe_slow);
8131%}
8132
8133//----------- DivL-By-Constant-Expansions--------------------------------------
8134// DivI cases are handled by the compiler
8135
8136// Magic constant, reciprical of 10
8137instruct loadConL_0x6666666666666667(rRegL dst)
8138%{
8139  effect(DEF dst);
8140
8141  format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8142  ins_encode(load_immL(dst, 0x6666666666666667));
8143  ins_pipe(ialu_reg);
8144%}
8145
8146instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8147%{
8148  effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8149
8150  format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8151  opcode(0xF7, 0x5); /* Opcode F7 /5 */
8152  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8153  ins_pipe(ialu_reg_reg_alu0);
8154%}
8155
8156instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8157%{
8158  effect(USE_DEF dst, KILL cr);
8159
8160  format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8161  opcode(0xC1, 0x7); /* C1 /7 ib */
8162  ins_encode(reg_opc_imm_wide(dst, 0x3F));
8163  ins_pipe(ialu_reg);
8164%}
8165
8166instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8167%{
8168  effect(USE_DEF dst, KILL cr);
8169
8170  format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8171  opcode(0xC1, 0x7); /* C1 /7 ib */
8172  ins_encode(reg_opc_imm_wide(dst, 0x2));
8173  ins_pipe(ialu_reg);
8174%}
8175
8176instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8177%{
8178  match(Set dst (DivL src div));
8179
8180  ins_cost((5+8)*100);
8181  expand %{
8182    rax_RegL rax;                     // Killed temp
8183    rFlagsReg cr;                     // Killed
8184    loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8185    mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8186    sarL_rReg_63(src, cr);            // sarq  src, 63
8187    sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8188    subL_rReg(dst, src, cr);          // subl  rdx, src
8189  %}
8190%}
8191
8192//-----------------------------------------------------------------------------
8193
8194instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8195                   rFlagsReg cr)
8196%{
8197  match(Set rdx (ModI rax div));
8198  effect(KILL rax, KILL cr);
8199
8200  ins_cost(300); // XXX
8201  format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8202            "jne,s   normal\n\t"
8203            "xorl    rdx, rdx\n\t"
8204            "cmpl    $div, -1\n\t"
8205            "je,s    done\n"
8206    "normal: cdql\n\t"
8207            "idivl   $div\n"
8208    "done:"        %}
8209  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8210  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8211  ins_pipe(ialu_reg_reg_alu0);
8212%}
8213
8214instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8215                   rFlagsReg cr)
8216%{
8217  match(Set rdx (ModL rax div));
8218  effect(KILL rax, KILL cr);
8219
8220  ins_cost(300); // XXX
8221  format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8222            "cmpq    rax, rdx\n\t"
8223            "jne,s   normal\n\t"
8224            "xorl    rdx, rdx\n\t"
8225            "cmpq    $div, -1\n\t"
8226            "je,s    done\n"
8227    "normal: cdqq\n\t"
8228            "idivq   $div\n"
8229    "done:"        %}
8230  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8231  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8232  ins_pipe(ialu_reg_reg_alu0);
8233%}
8234
8235// Integer Shift Instructions
8236// Shift Left by one
8237instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8238%{
8239  match(Set dst (LShiftI dst shift));
8240  effect(KILL cr);
8241
8242  format %{ "sall    $dst, $shift" %}
8243  opcode(0xD1, 0x4); /* D1 /4 */
8244  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8245  ins_pipe(ialu_reg);
8246%}
8247
8248// Shift Left by one
8249instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8250%{
8251  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8252  effect(KILL cr);
8253
8254  format %{ "sall    $dst, $shift\t" %}
8255  opcode(0xD1, 0x4); /* D1 /4 */
8256  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8257  ins_pipe(ialu_mem_imm);
8258%}
8259
8260// Shift Left by 8-bit immediate
8261instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8262%{
8263  match(Set dst (LShiftI dst shift));
8264  effect(KILL cr);
8265
8266  format %{ "sall    $dst, $shift" %}
8267  opcode(0xC1, 0x4); /* C1 /4 ib */
8268  ins_encode(reg_opc_imm(dst, shift));
8269  ins_pipe(ialu_reg);
8270%}
8271
8272// Shift Left by 8-bit immediate
8273instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8274%{
8275  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8276  effect(KILL cr);
8277
8278  format %{ "sall    $dst, $shift" %}
8279  opcode(0xC1, 0x4); /* C1 /4 ib */
8280  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8281  ins_pipe(ialu_mem_imm);
8282%}
8283
8284// Shift Left by variable
8285instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8286%{
8287  match(Set dst (LShiftI dst shift));
8288  effect(KILL cr);
8289
8290  format %{ "sall    $dst, $shift" %}
8291  opcode(0xD3, 0x4); /* D3 /4 */
8292  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8293  ins_pipe(ialu_reg_reg);
8294%}
8295
8296// Shift Left by variable
8297instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8298%{
8299  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8300  effect(KILL cr);
8301
8302  format %{ "sall    $dst, $shift" %}
8303  opcode(0xD3, 0x4); /* D3 /4 */
8304  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8305  ins_pipe(ialu_mem_reg);
8306%}
8307
8308// Arithmetic shift right by one
8309instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8310%{
8311  match(Set dst (RShiftI dst shift));
8312  effect(KILL cr);
8313
8314  format %{ "sarl    $dst, $shift" %}
8315  opcode(0xD1, 0x7); /* D1 /7 */
8316  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8317  ins_pipe(ialu_reg);
8318%}
8319
8320// Arithmetic shift right by one
8321instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8322%{
8323  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8324  effect(KILL cr);
8325
8326  format %{ "sarl    $dst, $shift" %}
8327  opcode(0xD1, 0x7); /* D1 /7 */
8328  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8329  ins_pipe(ialu_mem_imm);
8330%}
8331
8332// Arithmetic Shift Right by 8-bit immediate
8333instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8334%{
8335  match(Set dst (RShiftI dst shift));
8336  effect(KILL cr);
8337
8338  format %{ "sarl    $dst, $shift" %}
8339  opcode(0xC1, 0x7); /* C1 /7 ib */
8340  ins_encode(reg_opc_imm(dst, shift));
8341  ins_pipe(ialu_mem_imm);
8342%}
8343
8344// Arithmetic Shift Right by 8-bit immediate
8345instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8346%{
8347  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8348  effect(KILL cr);
8349
8350  format %{ "sarl    $dst, $shift" %}
8351  opcode(0xC1, 0x7); /* C1 /7 ib */
8352  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8353  ins_pipe(ialu_mem_imm);
8354%}
8355
8356// Arithmetic Shift Right by variable
8357instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8358%{
8359  match(Set dst (RShiftI dst shift));
8360  effect(KILL cr);
8361
8362  format %{ "sarl    $dst, $shift" %}
8363  opcode(0xD3, 0x7); /* D3 /7 */
8364  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8365  ins_pipe(ialu_reg_reg);
8366%}
8367
8368// Arithmetic Shift Right by variable
8369instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8370%{
8371  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8372  effect(KILL cr);
8373
8374  format %{ "sarl    $dst, $shift" %}
8375  opcode(0xD3, 0x7); /* D3 /7 */
8376  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8377  ins_pipe(ialu_mem_reg);
8378%}
8379
8380// Logical shift right by one
8381instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8382%{
8383  match(Set dst (URShiftI dst shift));
8384  effect(KILL cr);
8385
8386  format %{ "shrl    $dst, $shift" %}
8387  opcode(0xD1, 0x5); /* D1 /5 */
8388  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8389  ins_pipe(ialu_reg);
8390%}
8391
8392// Logical shift right by one
8393instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8394%{
8395  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8396  effect(KILL cr);
8397
8398  format %{ "shrl    $dst, $shift" %}
8399  opcode(0xD1, 0x5); /* D1 /5 */
8400  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8401  ins_pipe(ialu_mem_imm);
8402%}
8403
8404// Logical Shift Right by 8-bit immediate
8405instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8406%{
8407  match(Set dst (URShiftI dst shift));
8408  effect(KILL cr);
8409
8410  format %{ "shrl    $dst, $shift" %}
8411  opcode(0xC1, 0x5); /* C1 /5 ib */
8412  ins_encode(reg_opc_imm(dst, shift));
8413  ins_pipe(ialu_reg);
8414%}
8415
8416// Logical Shift Right by 8-bit immediate
8417instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8418%{
8419  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8420  effect(KILL cr);
8421
8422  format %{ "shrl    $dst, $shift" %}
8423  opcode(0xC1, 0x5); /* C1 /5 ib */
8424  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8425  ins_pipe(ialu_mem_imm);
8426%}
8427
8428// Logical Shift Right by variable
8429instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8430%{
8431  match(Set dst (URShiftI dst shift));
8432  effect(KILL cr);
8433
8434  format %{ "shrl    $dst, $shift" %}
8435  opcode(0xD3, 0x5); /* D3 /5 */
8436  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8437  ins_pipe(ialu_reg_reg);
8438%}
8439
8440// Logical Shift Right by variable
8441instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8442%{
8443  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8444  effect(KILL cr);
8445
8446  format %{ "shrl    $dst, $shift" %}
8447  opcode(0xD3, 0x5); /* D3 /5 */
8448  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8449  ins_pipe(ialu_mem_reg);
8450%}
8451
8452// Long Shift Instructions
8453// Shift Left by one
8454instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8455%{
8456  match(Set dst (LShiftL dst shift));
8457  effect(KILL cr);
8458
8459  format %{ "salq    $dst, $shift" %}
8460  opcode(0xD1, 0x4); /* D1 /4 */
8461  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8462  ins_pipe(ialu_reg);
8463%}
8464
8465// Shift Left by one
8466instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8467%{
8468  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8469  effect(KILL cr);
8470
8471  format %{ "salq    $dst, $shift" %}
8472  opcode(0xD1, 0x4); /* D1 /4 */
8473  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8474  ins_pipe(ialu_mem_imm);
8475%}
8476
8477// Shift Left by 8-bit immediate
8478instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8479%{
8480  match(Set dst (LShiftL dst shift));
8481  effect(KILL cr);
8482
8483  format %{ "salq    $dst, $shift" %}
8484  opcode(0xC1, 0x4); /* C1 /4 ib */
8485  ins_encode(reg_opc_imm_wide(dst, shift));
8486  ins_pipe(ialu_reg);
8487%}
8488
8489// Shift Left by 8-bit immediate
8490instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8491%{
8492  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8493  effect(KILL cr);
8494
8495  format %{ "salq    $dst, $shift" %}
8496  opcode(0xC1, 0x4); /* C1 /4 ib */
8497  ins_encode(REX_mem_wide(dst), OpcP,
8498             RM_opc_mem(secondary, dst), Con8or32(shift));
8499  ins_pipe(ialu_mem_imm);
8500%}
8501
8502// Shift Left by variable
8503instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8504%{
8505  match(Set dst (LShiftL dst shift));
8506  effect(KILL cr);
8507
8508  format %{ "salq    $dst, $shift" %}
8509  opcode(0xD3, 0x4); /* D3 /4 */
8510  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8511  ins_pipe(ialu_reg_reg);
8512%}
8513
8514// Shift Left by variable
8515instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8516%{
8517  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8518  effect(KILL cr);
8519
8520  format %{ "salq    $dst, $shift" %}
8521  opcode(0xD3, 0x4); /* D3 /4 */
8522  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8523  ins_pipe(ialu_mem_reg);
8524%}
8525
8526// Arithmetic shift right by one
8527instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8528%{
8529  match(Set dst (RShiftL dst shift));
8530  effect(KILL cr);
8531
8532  format %{ "sarq    $dst, $shift" %}
8533  opcode(0xD1, 0x7); /* D1 /7 */
8534  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8535  ins_pipe(ialu_reg);
8536%}
8537
8538// Arithmetic shift right by one
8539instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8540%{
8541  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8542  effect(KILL cr);
8543
8544  format %{ "sarq    $dst, $shift" %}
8545  opcode(0xD1, 0x7); /* D1 /7 */
8546  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8547  ins_pipe(ialu_mem_imm);
8548%}
8549
8550// Arithmetic Shift Right by 8-bit immediate
8551instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8552%{
8553  match(Set dst (RShiftL dst shift));
8554  effect(KILL cr);
8555
8556  format %{ "sarq    $dst, $shift" %}
8557  opcode(0xC1, 0x7); /* C1 /7 ib */
8558  ins_encode(reg_opc_imm_wide(dst, shift));
8559  ins_pipe(ialu_mem_imm);
8560%}
8561
8562// Arithmetic Shift Right by 8-bit immediate
8563instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8564%{
8565  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8566  effect(KILL cr);
8567
8568  format %{ "sarq    $dst, $shift" %}
8569  opcode(0xC1, 0x7); /* C1 /7 ib */
8570  ins_encode(REX_mem_wide(dst), OpcP,
8571             RM_opc_mem(secondary, dst), Con8or32(shift));
8572  ins_pipe(ialu_mem_imm);
8573%}
8574
8575// Arithmetic Shift Right by variable
8576instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8577%{
8578  match(Set dst (RShiftL dst shift));
8579  effect(KILL cr);
8580
8581  format %{ "sarq    $dst, $shift" %}
8582  opcode(0xD3, 0x7); /* D3 /7 */
8583  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8584  ins_pipe(ialu_reg_reg);
8585%}
8586
8587// Arithmetic Shift Right by variable
8588instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8589%{
8590  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8591  effect(KILL cr);
8592
8593  format %{ "sarq    $dst, $shift" %}
8594  opcode(0xD3, 0x7); /* D3 /7 */
8595  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8596  ins_pipe(ialu_mem_reg);
8597%}
8598
8599// Logical shift right by one
8600instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8601%{
8602  match(Set dst (URShiftL dst shift));
8603  effect(KILL cr);
8604
8605  format %{ "shrq    $dst, $shift" %}
8606  opcode(0xD1, 0x5); /* D1 /5 */
8607  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8608  ins_pipe(ialu_reg);
8609%}
8610
8611// Logical shift right by one
8612instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8613%{
8614  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8615  effect(KILL cr);
8616
8617  format %{ "shrq    $dst, $shift" %}
8618  opcode(0xD1, 0x5); /* D1 /5 */
8619  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8620  ins_pipe(ialu_mem_imm);
8621%}
8622
8623// Logical Shift Right by 8-bit immediate
8624instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8625%{
8626  match(Set dst (URShiftL dst shift));
8627  effect(KILL cr);
8628
8629  format %{ "shrq    $dst, $shift" %}
8630  opcode(0xC1, 0x5); /* C1 /5 ib */
8631  ins_encode(reg_opc_imm_wide(dst, shift));
8632  ins_pipe(ialu_reg);
8633%}
8634
8635// Logical Shift Right by 8-bit immediate
8636instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8637%{
8638  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8639  effect(KILL cr);
8640
8641  format %{ "shrq    $dst, $shift" %}
8642  opcode(0xC1, 0x5); /* C1 /5 ib */
8643  ins_encode(REX_mem_wide(dst), OpcP,
8644             RM_opc_mem(secondary, dst), Con8or32(shift));
8645  ins_pipe(ialu_mem_imm);
8646%}
8647
8648// Logical Shift Right by variable
8649instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8650%{
8651  match(Set dst (URShiftL dst shift));
8652  effect(KILL cr);
8653
8654  format %{ "shrq    $dst, $shift" %}
8655  opcode(0xD3, 0x5); /* D3 /5 */
8656  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8657  ins_pipe(ialu_reg_reg);
8658%}
8659
8660// Logical Shift Right by variable
8661instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8662%{
8663  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8664  effect(KILL cr);
8665
8666  format %{ "shrq    $dst, $shift" %}
8667  opcode(0xD3, 0x5); /* D3 /5 */
8668  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8669  ins_pipe(ialu_mem_reg);
8670%}
8671
8672// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8673// This idiom is used by the compiler for the i2b bytecode.
8674instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8675%{
8676  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8677
8678  format %{ "movsbl  $dst, $src\t# i2b" %}
8679  opcode(0x0F, 0xBE);
8680  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8681  ins_pipe(ialu_reg_reg);
8682%}
8683
8684// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8685// This idiom is used by the compiler the i2s bytecode.
8686instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8687%{
8688  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8689
8690  format %{ "movswl  $dst, $src\t# i2s" %}
8691  opcode(0x0F, 0xBF);
8692  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8693  ins_pipe(ialu_reg_reg);
8694%}
8695
8696// ROL/ROR instructions
8697
8698// ROL expand
8699instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8700  effect(KILL cr, USE_DEF dst);
8701
8702  format %{ "roll    $dst" %}
8703  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8704  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8705  ins_pipe(ialu_reg);
8706%}
8707
8708instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8709  effect(USE_DEF dst, USE shift, KILL cr);
8710
8711  format %{ "roll    $dst, $shift" %}
8712  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8713  ins_encode( reg_opc_imm(dst, shift) );
8714  ins_pipe(ialu_reg);
8715%}
8716
8717instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8718%{
8719  effect(USE_DEF dst, USE shift, KILL cr);
8720
8721  format %{ "roll    $dst, $shift" %}
8722  opcode(0xD3, 0x0); /* Opcode D3 /0 */
8723  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8724  ins_pipe(ialu_reg_reg);
8725%}
8726// end of ROL expand
8727
8728// Rotate Left by one
8729instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8730%{
8731  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8732
8733  expand %{
8734    rolI_rReg_imm1(dst, cr);
8735  %}
8736%}
8737
8738// Rotate Left by 8-bit immediate
8739instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8740%{
8741  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8742  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8743
8744  expand %{
8745    rolI_rReg_imm8(dst, lshift, cr);
8746  %}
8747%}
8748
8749// Rotate Left by variable
8750instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8751%{
8752  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8753
8754  expand %{
8755    rolI_rReg_CL(dst, shift, cr);
8756  %}
8757%}
8758
8759// Rotate Left by variable
8760instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8761%{
8762  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8763
8764  expand %{
8765    rolI_rReg_CL(dst, shift, cr);
8766  %}
8767%}
8768
8769// ROR expand
8770instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8771%{
8772  effect(USE_DEF dst, KILL cr);
8773
8774  format %{ "rorl    $dst" %}
8775  opcode(0xD1, 0x1); /* D1 /1 */
8776  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8777  ins_pipe(ialu_reg);
8778%}
8779
8780instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8781%{
8782  effect(USE_DEF dst, USE shift, KILL cr);
8783
8784  format %{ "rorl    $dst, $shift" %}
8785  opcode(0xC1, 0x1); /* C1 /1 ib */
8786  ins_encode(reg_opc_imm(dst, shift));
8787  ins_pipe(ialu_reg);
8788%}
8789
8790instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8791%{
8792  effect(USE_DEF dst, USE shift, KILL cr);
8793
8794  format %{ "rorl    $dst, $shift" %}
8795  opcode(0xD3, 0x1); /* D3 /1 */
8796  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8797  ins_pipe(ialu_reg_reg);
8798%}
8799// end of ROR expand
8800
8801// Rotate Right by one
8802instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8803%{
8804  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8805
8806  expand %{
8807    rorI_rReg_imm1(dst, cr);
8808  %}
8809%}
8810
8811// Rotate Right by 8-bit immediate
8812instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8813%{
8814  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8815  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8816
8817  expand %{
8818    rorI_rReg_imm8(dst, rshift, cr);
8819  %}
8820%}
8821
8822// Rotate Right by variable
8823instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8824%{
8825  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8826
8827  expand %{
8828    rorI_rReg_CL(dst, shift, cr);
8829  %}
8830%}
8831
8832// Rotate Right by variable
8833instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8834%{
8835  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8836
8837  expand %{
8838    rorI_rReg_CL(dst, shift, cr);
8839  %}
8840%}
8841
8842// for long rotate
8843// ROL expand
8844instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8845  effect(USE_DEF dst, KILL cr);
8846
8847  format %{ "rolq    $dst" %}
8848  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8849  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8850  ins_pipe(ialu_reg);
8851%}
8852
8853instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8854  effect(USE_DEF dst, USE shift, KILL cr);
8855
8856  format %{ "rolq    $dst, $shift" %}
8857  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8858  ins_encode( reg_opc_imm_wide(dst, shift) );
8859  ins_pipe(ialu_reg);
8860%}
8861
8862instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8863%{
8864  effect(USE_DEF dst, USE shift, KILL cr);
8865
8866  format %{ "rolq    $dst, $shift" %}
8867  opcode(0xD3, 0x0); /* Opcode D3 /0 */
8868  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8869  ins_pipe(ialu_reg_reg);
8870%}
8871// end of ROL expand
8872
8873// Rotate Left by one
8874instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8875%{
8876  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8877
8878  expand %{
8879    rolL_rReg_imm1(dst, cr);
8880  %}
8881%}
8882
8883// Rotate Left by 8-bit immediate
8884instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8885%{
8886  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8887  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8888
8889  expand %{
8890    rolL_rReg_imm8(dst, lshift, cr);
8891  %}
8892%}
8893
8894// Rotate Left by variable
8895instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8896%{
8897  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8898
8899  expand %{
8900    rolL_rReg_CL(dst, shift, cr);
8901  %}
8902%}
8903
8904// Rotate Left by variable
8905instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8906%{
8907  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8908
8909  expand %{
8910    rolL_rReg_CL(dst, shift, cr);
8911  %}
8912%}
8913
8914// ROR expand
8915instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8916%{
8917  effect(USE_DEF dst, KILL cr);
8918
8919  format %{ "rorq    $dst" %}
8920  opcode(0xD1, 0x1); /* D1 /1 */
8921  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8922  ins_pipe(ialu_reg);
8923%}
8924
8925instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8926%{
8927  effect(USE_DEF dst, USE shift, KILL cr);
8928
8929  format %{ "rorq    $dst, $shift" %}
8930  opcode(0xC1, 0x1); /* C1 /1 ib */
8931  ins_encode(reg_opc_imm_wide(dst, shift));
8932  ins_pipe(ialu_reg);
8933%}
8934
8935instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8936%{
8937  effect(USE_DEF dst, USE shift, KILL cr);
8938
8939  format %{ "rorq    $dst, $shift" %}
8940  opcode(0xD3, 0x1); /* D3 /1 */
8941  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8942  ins_pipe(ialu_reg_reg);
8943%}
8944// end of ROR expand
8945
8946// Rotate Right by one
8947instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8948%{
8949  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8950
8951  expand %{
8952    rorL_rReg_imm1(dst, cr);
8953  %}
8954%}
8955
8956// Rotate Right by 8-bit immediate
8957instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8958%{
8959  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8960  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8961
8962  expand %{
8963    rorL_rReg_imm8(dst, rshift, cr);
8964  %}
8965%}
8966
8967// Rotate Right by variable
8968instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8969%{
8970  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8971
8972  expand %{
8973    rorL_rReg_CL(dst, shift, cr);
8974  %}
8975%}
8976
8977// Rotate Right by variable
8978instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8979%{
8980  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8981
8982  expand %{
8983    rorL_rReg_CL(dst, shift, cr);
8984  %}
8985%}
8986
8987// Logical Instructions
8988
8989// Integer Logical Instructions
8990
8991// And Instructions
8992// And Register with Register
8993instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8994%{
8995  match(Set dst (AndI dst src));
8996  effect(KILL cr);
8997
8998  format %{ "andl    $dst, $src\t# int" %}
8999  opcode(0x23);
9000  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9001  ins_pipe(ialu_reg_reg);
9002%}
9003
9004// And Register with Immediate 255
9005instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9006%{
9007  match(Set dst (AndI dst src));
9008
9009  format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9010  opcode(0x0F, 0xB6);
9011  ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9012  ins_pipe(ialu_reg);
9013%}
9014
9015// And Register with Immediate 255 and promote to long
9016instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9017%{
9018  match(Set dst (ConvI2L (AndI src mask)));
9019
9020  format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9021  opcode(0x0F, 0xB6);
9022  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9023  ins_pipe(ialu_reg);
9024%}
9025
9026// And Register with Immediate 65535
9027instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9028%{
9029  match(Set dst (AndI dst src));
9030
9031  format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9032  opcode(0x0F, 0xB7);
9033  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9034  ins_pipe(ialu_reg);
9035%}
9036
9037// And Register with Immediate 65535 and promote to long
9038instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9039%{
9040  match(Set dst (ConvI2L (AndI src mask)));
9041
9042  format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9043  opcode(0x0F, 0xB7);
9044  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9045  ins_pipe(ialu_reg);
9046%}
9047
9048// And Register with Immediate
9049instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9050%{
9051  match(Set dst (AndI dst src));
9052  effect(KILL cr);
9053
9054  format %{ "andl    $dst, $src\t# int" %}
9055  opcode(0x81, 0x04); /* Opcode 81 /4 */
9056  ins_encode(OpcSErm(dst, src), Con8or32(src));
9057  ins_pipe(ialu_reg);
9058%}
9059
9060// And Register with Memory
9061instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9062%{
9063  match(Set dst (AndI dst (LoadI src)));
9064  effect(KILL cr);
9065
9066  ins_cost(125);
9067  format %{ "andl    $dst, $src\t# int" %}
9068  opcode(0x23);
9069  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9070  ins_pipe(ialu_reg_mem);
9071%}
9072
9073// And Memory with Register
9074instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9075%{
9076  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9077  effect(KILL cr);
9078
9079  ins_cost(150);
9080  format %{ "andl    $dst, $src\t# int" %}
9081  opcode(0x21); /* Opcode 21 /r */
9082  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9083  ins_pipe(ialu_mem_reg);
9084%}
9085
9086// And Memory with Immediate
9087instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9088%{
9089  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9090  effect(KILL cr);
9091
9092  ins_cost(125);
9093  format %{ "andl    $dst, $src\t# int" %}
9094  opcode(0x81, 0x4); /* Opcode 81 /4 id */
9095  ins_encode(REX_mem(dst), OpcSE(src),
9096             RM_opc_mem(secondary, dst), Con8or32(src));
9097  ins_pipe(ialu_mem_imm);
9098%}
9099
9100// Or Instructions
9101// Or Register with Register
9102instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9103%{
9104  match(Set dst (OrI dst src));
9105  effect(KILL cr);
9106
9107  format %{ "orl     $dst, $src\t# int" %}
9108  opcode(0x0B);
9109  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9110  ins_pipe(ialu_reg_reg);
9111%}
9112
9113// Or Register with Immediate
9114instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9115%{
9116  match(Set dst (OrI dst src));
9117  effect(KILL cr);
9118
9119  format %{ "orl     $dst, $src\t# int" %}
9120  opcode(0x81, 0x01); /* Opcode 81 /1 id */
9121  ins_encode(OpcSErm(dst, src), Con8or32(src));
9122  ins_pipe(ialu_reg);
9123%}
9124
9125// Or Register with Memory
9126instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9127%{
9128  match(Set dst (OrI dst (LoadI src)));
9129  effect(KILL cr);
9130
9131  ins_cost(125);
9132  format %{ "orl     $dst, $src\t# int" %}
9133  opcode(0x0B);
9134  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9135  ins_pipe(ialu_reg_mem);
9136%}
9137
9138// Or Memory with Register
9139instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9140%{
9141  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9142  effect(KILL cr);
9143
9144  ins_cost(150);
9145  format %{ "orl     $dst, $src\t# int" %}
9146  opcode(0x09); /* Opcode 09 /r */
9147  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9148  ins_pipe(ialu_mem_reg);
9149%}
9150
9151// Or Memory with Immediate
9152instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9153%{
9154  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9155  effect(KILL cr);
9156
9157  ins_cost(125);
9158  format %{ "orl     $dst, $src\t# int" %}
9159  opcode(0x81, 0x1); /* Opcode 81 /1 id */
9160  ins_encode(REX_mem(dst), OpcSE(src),
9161             RM_opc_mem(secondary, dst), Con8or32(src));
9162  ins_pipe(ialu_mem_imm);
9163%}
9164
9165// Xor Instructions
9166// Xor Register with Register
9167instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9168%{
9169  match(Set dst (XorI dst src));
9170  effect(KILL cr);
9171
9172  format %{ "xorl    $dst, $src\t# int" %}
9173  opcode(0x33);
9174  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9175  ins_pipe(ialu_reg_reg);
9176%}
9177
9178// Xor Register with Immediate
9179instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9180%{
9181  match(Set dst (XorI dst src));
9182  effect(KILL cr);
9183
9184  format %{ "xorl    $dst, $src\t# int" %}
9185  opcode(0x81, 0x06); /* Opcode 81 /6 id */
9186  ins_encode(OpcSErm(dst, src), Con8or32(src));
9187  ins_pipe(ialu_reg);
9188%}
9189
9190// Xor Register with Memory
9191instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9192%{
9193  match(Set dst (XorI dst (LoadI src)));
9194  effect(KILL cr);
9195
9196  ins_cost(125);
9197  format %{ "xorl    $dst, $src\t# int" %}
9198  opcode(0x33);
9199  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9200  ins_pipe(ialu_reg_mem);
9201%}
9202
9203// Xor Memory with Register
9204instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9205%{
9206  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9207  effect(KILL cr);
9208
9209  ins_cost(150);
9210  format %{ "xorl    $dst, $src\t# int" %}
9211  opcode(0x31); /* Opcode 31 /r */
9212  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9213  ins_pipe(ialu_mem_reg);
9214%}
9215
9216// Xor Memory with Immediate
9217instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9218%{
9219  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9220  effect(KILL cr);
9221
9222  ins_cost(125);
9223  format %{ "xorl    $dst, $src\t# int" %}
9224  opcode(0x81, 0x6); /* Opcode 81 /6 id */
9225  ins_encode(REX_mem(dst), OpcSE(src),
9226             RM_opc_mem(secondary, dst), Con8or32(src));
9227  ins_pipe(ialu_mem_imm);
9228%}
9229
9230
9231// Long Logical Instructions
9232
9233// And Instructions
9234// And Register with Register
9235instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9236%{
9237  match(Set dst (AndL dst src));
9238  effect(KILL cr);
9239
9240  format %{ "andq    $dst, $src\t# long" %}
9241  opcode(0x23);
9242  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9243  ins_pipe(ialu_reg_reg);
9244%}
9245
9246// And Register with Immediate 255
9247instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9248%{
9249  match(Set dst (AndL dst src));
9250
9251  format %{ "movzbq  $dst, $src\t# long & 0xFF" %}
9252  opcode(0x0F, 0xB6);
9253  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9254  ins_pipe(ialu_reg);
9255%}
9256
9257// And Register with Immediate 65535
9258instruct andL_rReg_imm65535(rRegI dst, immL_65535 src)
9259%{
9260  match(Set dst (AndL dst src));
9261
9262  format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9263  opcode(0x0F, 0xB7);
9264  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9265  ins_pipe(ialu_reg);
9266%}
9267
9268// And Register with Immediate
9269instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9270%{
9271  match(Set dst (AndL dst src));
9272  effect(KILL cr);
9273
9274  format %{ "andq    $dst, $src\t# long" %}
9275  opcode(0x81, 0x04); /* Opcode 81 /4 */
9276  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9277  ins_pipe(ialu_reg);
9278%}
9279
9280// And Register with Memory
9281instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9282%{
9283  match(Set dst (AndL dst (LoadL src)));
9284  effect(KILL cr);
9285
9286  ins_cost(125);
9287  format %{ "andq    $dst, $src\t# long" %}
9288  opcode(0x23);
9289  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9290  ins_pipe(ialu_reg_mem);
9291%}
9292
9293// And Memory with Register
9294instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9295%{
9296  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9297  effect(KILL cr);
9298
9299  ins_cost(150);
9300  format %{ "andq    $dst, $src\t# long" %}
9301  opcode(0x21); /* Opcode 21 /r */
9302  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9303  ins_pipe(ialu_mem_reg);
9304%}
9305
9306// And Memory with Immediate
9307instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9308%{
9309  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9310  effect(KILL cr);
9311
9312  ins_cost(125);
9313  format %{ "andq    $dst, $src\t# long" %}
9314  opcode(0x81, 0x4); /* Opcode 81 /4 id */
9315  ins_encode(REX_mem_wide(dst), OpcSE(src),
9316             RM_opc_mem(secondary, dst), Con8or32(src));
9317  ins_pipe(ialu_mem_imm);
9318%}
9319
9320// Or Instructions
9321// Or Register with Register
9322instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9323%{
9324  match(Set dst (OrL dst src));
9325  effect(KILL cr);
9326
9327  format %{ "orq     $dst, $src\t# long" %}
9328  opcode(0x0B);
9329  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9330  ins_pipe(ialu_reg_reg);
9331%}
9332
9333// Or Register with Immediate
9334instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9335%{
9336  match(Set dst (OrL dst src));
9337  effect(KILL cr);
9338
9339  format %{ "orq     $dst, $src\t# long" %}
9340  opcode(0x81, 0x01); /* Opcode 81 /1 id */
9341  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9342  ins_pipe(ialu_reg);
9343%}
9344
9345// Or Register with Memory
9346instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9347%{
9348  match(Set dst (OrL dst (LoadL src)));
9349  effect(KILL cr);
9350
9351  ins_cost(125);
9352  format %{ "orq     $dst, $src\t# long" %}
9353  opcode(0x0B);
9354  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9355  ins_pipe(ialu_reg_mem);
9356%}
9357
9358// Or Memory with Register
9359instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9360%{
9361  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9362  effect(KILL cr);
9363
9364  ins_cost(150);
9365  format %{ "orq     $dst, $src\t# long" %}
9366  opcode(0x09); /* Opcode 09 /r */
9367  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9368  ins_pipe(ialu_mem_reg);
9369%}
9370
9371// Or Memory with Immediate
9372instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9373%{
9374  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9375  effect(KILL cr);
9376
9377  ins_cost(125);
9378  format %{ "orq     $dst, $src\t# long" %}
9379  opcode(0x81, 0x1); /* Opcode 81 /1 id */
9380  ins_encode(REX_mem_wide(dst), OpcSE(src),
9381             RM_opc_mem(secondary, dst), Con8or32(src));
9382  ins_pipe(ialu_mem_imm);
9383%}
9384
9385// Xor Instructions
9386// Xor Register with Register
9387instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9388%{
9389  match(Set dst (XorL dst src));
9390  effect(KILL cr);
9391
9392  format %{ "xorq    $dst, $src\t# long" %}
9393  opcode(0x33);
9394  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9395  ins_pipe(ialu_reg_reg);
9396%}
9397
9398// Xor Register with Immediate
9399instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9400%{
9401  match(Set dst (XorL dst src));
9402  effect(KILL cr);
9403
9404  format %{ "xorq    $dst, $src\t# long" %}
9405  opcode(0x81, 0x06); /* Opcode 81 /6 id */
9406  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9407  ins_pipe(ialu_reg);
9408%}
9409
9410// Xor Register with Memory
9411instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9412%{
9413  match(Set dst (XorL dst (LoadL src)));
9414  effect(KILL cr);
9415
9416  ins_cost(125);
9417  format %{ "xorq    $dst, $src\t# long" %}
9418  opcode(0x33);
9419  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9420  ins_pipe(ialu_reg_mem);
9421%}
9422
9423// Xor Memory with Register
9424instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9425%{
9426  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9427  effect(KILL cr);
9428
9429  ins_cost(150);
9430  format %{ "xorq    $dst, $src\t# long" %}
9431  opcode(0x31); /* Opcode 31 /r */
9432  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9433  ins_pipe(ialu_mem_reg);
9434%}
9435
9436// Xor Memory with Immediate
9437instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9438%{
9439  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9440  effect(KILL cr);
9441
9442  ins_cost(125);
9443  format %{ "xorq    $dst, $src\t# long" %}
9444  opcode(0x81, 0x6); /* Opcode 81 /6 id */
9445  ins_encode(REX_mem_wide(dst), OpcSE(src),
9446             RM_opc_mem(secondary, dst), Con8or32(src));
9447  ins_pipe(ialu_mem_imm);
9448%}
9449
9450// Convert Int to Boolean
9451instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9452%{
9453  match(Set dst (Conv2B src));
9454  effect(KILL cr);
9455
9456  format %{ "testl   $src, $src\t# ci2b\n\t"
9457            "setnz   $dst\n\t"
9458            "movzbl  $dst, $dst" %}
9459  ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9460             setNZ_reg(dst),
9461             REX_reg_breg(dst, dst), // movzbl
9462             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9463  ins_pipe(pipe_slow); // XXX
9464%}
9465
9466// Convert Pointer to Boolean
9467instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9468%{
9469  match(Set dst (Conv2B src));
9470  effect(KILL cr);
9471
9472  format %{ "testq   $src, $src\t# cp2b\n\t"
9473            "setnz   $dst\n\t"
9474            "movzbl  $dst, $dst" %}
9475  ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9476             setNZ_reg(dst),
9477             REX_reg_breg(dst, dst), // movzbl
9478             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9479  ins_pipe(pipe_slow); // XXX
9480%}
9481
9482instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9483%{
9484  match(Set dst (CmpLTMask p q));
9485  effect(KILL cr);
9486
9487  ins_cost(400); // XXX
9488  format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9489            "setlt   $dst\n\t"
9490            "movzbl  $dst, $dst\n\t"
9491            "negl    $dst" %}
9492  ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9493             setLT_reg(dst),
9494             REX_reg_breg(dst, dst), // movzbl
9495             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9496             neg_reg(dst));
9497  ins_pipe(pipe_slow);
9498%}
9499
9500instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9501%{
9502  match(Set dst (CmpLTMask dst zero));
9503  effect(KILL cr);
9504
9505  ins_cost(100); // XXX
9506  format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9507  opcode(0xC1, 0x7);  /* C1 /7 ib */
9508  ins_encode(reg_opc_imm(dst, 0x1F));
9509  ins_pipe(ialu_reg);
9510%}
9511
9512
9513instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
9514                         rRegI tmp,
9515                         rFlagsReg cr)
9516%{
9517  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9518  effect(TEMP tmp, KILL cr);
9519
9520  ins_cost(400); // XXX
9521  format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
9522            "sbbl    $tmp, $tmp\n\t"
9523            "andl    $tmp, $y\n\t"
9524            "addl    $p, $tmp" %}
9525  ins_encode(enc_cmpLTP(p, q, y, tmp));
9526  ins_pipe(pipe_cmplt);
9527%}
9528
9529/* If I enable this, I encourage spilling in the inner loop of compress.
9530instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
9531%{
9532  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9533  effect( TEMP tmp, KILL cr );
9534  ins_cost(400);
9535
9536  format %{ "SUB    $p,$q\n\t"
9537            "SBB    RCX,RCX\n\t"
9538            "AND    RCX,$y\n\t"
9539            "ADD    $p,RCX" %}
9540  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9541%}
9542*/
9543
9544//---------- FP Instructions------------------------------------------------
9545
9546instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9547%{
9548  match(Set cr (CmpF src1 src2));
9549
9550  ins_cost(145);
9551  format %{ "ucomiss $src1, $src2\n\t"
9552            "jnp,s   exit\n\t"
9553            "pushfq\t# saw NaN, set CF\n\t"
9554            "andq    [rsp], #0xffffff2b\n\t"
9555            "popfq\n"
9556    "exit:   nop\t# avoid branch to branch" %}
9557  opcode(0x0F, 0x2E);
9558  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
9559             cmpfp_fixup);
9560  ins_pipe(pipe_slow);
9561%}
9562
9563instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9564%{
9565  match(Set cr (CmpF src1 (LoadF src2)));
9566
9567  ins_cost(145);
9568  format %{ "ucomiss $src1, $src2\n\t"
9569            "jnp,s   exit\n\t"
9570            "pushfq\t# saw NaN, set CF\n\t"
9571            "andq    [rsp], #0xffffff2b\n\t"
9572            "popfq\n"
9573    "exit:   nop\t# avoid branch to branch" %}
9574  opcode(0x0F, 0x2E);
9575  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
9576             cmpfp_fixup);
9577  ins_pipe(pipe_slow);
9578%}
9579
9580instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
9581%{
9582  match(Set cr (CmpF src1 src2));
9583
9584  ins_cost(145);
9585  format %{ "ucomiss $src1, $src2\n\t"
9586            "jnp,s   exit\n\t"
9587            "pushfq\t# saw NaN, set CF\n\t"
9588            "andq    [rsp], #0xffffff2b\n\t"
9589            "popfq\n"
9590    "exit:   nop\t# avoid branch to branch" %}
9591  opcode(0x0F, 0x2E);
9592  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
9593             cmpfp_fixup);
9594  ins_pipe(pipe_slow);
9595%}
9596
9597instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9598%{
9599  match(Set cr (CmpD src1 src2));
9600
9601  ins_cost(145);
9602  format %{ "ucomisd $src1, $src2\n\t"
9603            "jnp,s   exit\n\t"
9604            "pushfq\t# saw NaN, set CF\n\t"
9605            "andq    [rsp], #0xffffff2b\n\t"
9606            "popfq\n"
9607    "exit:   nop\t# avoid branch to branch" %}
9608  opcode(0x66, 0x0F, 0x2E);
9609  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
9610             cmpfp_fixup);
9611  ins_pipe(pipe_slow);
9612%}
9613
9614instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9615%{
9616  match(Set cr (CmpD src1 (LoadD src2)));
9617
9618  ins_cost(145);
9619  format %{ "ucomisd $src1, $src2\n\t"
9620            "jnp,s   exit\n\t"
9621            "pushfq\t# saw NaN, set CF\n\t"
9622            "andq    [rsp], #0xffffff2b\n\t"
9623            "popfq\n"
9624    "exit:   nop\t# avoid branch to branch" %}
9625  opcode(0x66, 0x0F, 0x2E);
9626  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
9627             cmpfp_fixup);
9628  ins_pipe(pipe_slow);
9629%}
9630
9631instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
9632%{
9633  match(Set cr (CmpD src1 src2));
9634
9635  ins_cost(145);
9636  format %{ "ucomisd $src1, [$src2]\n\t"
9637            "jnp,s   exit\n\t"
9638            "pushfq\t# saw NaN, set CF\n\t"
9639            "andq    [rsp], #0xffffff2b\n\t"
9640            "popfq\n"
9641    "exit:   nop\t# avoid branch to branch" %}
9642  opcode(0x66, 0x0F, 0x2E);
9643  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
9644             cmpfp_fixup);
9645  ins_pipe(pipe_slow);
9646%}
9647
9648// Compare into -1,0,1
9649instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9650%{
9651  match(Set dst (CmpF3 src1 src2));
9652  effect(KILL cr);
9653
9654  ins_cost(275);
9655  format %{ "ucomiss $src1, $src2\n\t"
9656            "movl    $dst, #-1\n\t"
9657            "jp,s    done\n\t"
9658            "jb,s    done\n\t"
9659            "setne   $dst\n\t"
9660            "movzbl  $dst, $dst\n"
9661    "done:" %}
9662
9663  opcode(0x0F, 0x2E);
9664  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
9665             cmpfp3(dst));
9666  ins_pipe(pipe_slow);
9667%}
9668
9669// Compare into -1,0,1
9670instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9671%{
9672  match(Set dst (CmpF3 src1 (LoadF src2)));
9673  effect(KILL cr);
9674
9675  ins_cost(275);
9676  format %{ "ucomiss $src1, $src2\n\t"
9677            "movl    $dst, #-1\n\t"
9678            "jp,s    done\n\t"
9679            "jb,s    done\n\t"
9680            "setne   $dst\n\t"
9681            "movzbl  $dst, $dst\n"
9682    "done:" %}
9683
9684  opcode(0x0F, 0x2E);
9685  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
9686             cmpfp3(dst));
9687  ins_pipe(pipe_slow);
9688%}
9689
9690// Compare into -1,0,1
9691instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
9692%{
9693  match(Set dst (CmpF3 src1 src2));
9694  effect(KILL cr);
9695
9696  ins_cost(275);
9697  format %{ "ucomiss $src1, [$src2]\n\t"
9698            "movl    $dst, #-1\n\t"
9699            "jp,s    done\n\t"
9700            "jb,s    done\n\t"
9701            "setne   $dst\n\t"
9702            "movzbl  $dst, $dst\n"
9703    "done:" %}
9704
9705  opcode(0x0F, 0x2E);
9706  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
9707             cmpfp3(dst));
9708  ins_pipe(pipe_slow);
9709%}
9710
9711// Compare into -1,0,1
9712instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9713%{
9714  match(Set dst (CmpD3 src1 src2));
9715  effect(KILL cr);
9716
9717  ins_cost(275);
9718  format %{ "ucomisd $src1, $src2\n\t"
9719            "movl    $dst, #-1\n\t"
9720            "jp,s    done\n\t"
9721            "jb,s    done\n\t"
9722            "setne   $dst\n\t"
9723            "movzbl  $dst, $dst\n"
9724    "done:" %}
9725
9726  opcode(0x66, 0x0F, 0x2E);
9727  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
9728             cmpfp3(dst));
9729  ins_pipe(pipe_slow);
9730%}
9731
9732// Compare into -1,0,1
9733instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9734%{
9735  match(Set dst (CmpD3 src1 (LoadD src2)));
9736  effect(KILL cr);
9737
9738  ins_cost(275);
9739  format %{ "ucomisd $src1, $src2\n\t"
9740            "movl    $dst, #-1\n\t"
9741            "jp,s    done\n\t"
9742            "jb,s    done\n\t"
9743            "setne   $dst\n\t"
9744            "movzbl  $dst, $dst\n"
9745    "done:" %}
9746
9747  opcode(0x66, 0x0F, 0x2E);
9748  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
9749             cmpfp3(dst));
9750  ins_pipe(pipe_slow);
9751%}
9752
9753// Compare into -1,0,1
9754instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
9755%{
9756  match(Set dst (CmpD3 src1 src2));
9757  effect(KILL cr);
9758
9759  ins_cost(275);
9760  format %{ "ucomisd $src1, [$src2]\n\t"
9761            "movl    $dst, #-1\n\t"
9762            "jp,s    done\n\t"
9763            "jb,s    done\n\t"
9764            "setne   $dst\n\t"
9765            "movzbl  $dst, $dst\n"
9766    "done:" %}
9767
9768  opcode(0x66, 0x0F, 0x2E);
9769  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
9770             cmpfp3(dst));
9771  ins_pipe(pipe_slow);
9772%}
9773
9774instruct addF_reg(regF dst, regF src)
9775%{
9776  match(Set dst (AddF dst src));
9777
9778  format %{ "addss   $dst, $src" %}
9779  ins_cost(150); // XXX
9780  opcode(0xF3, 0x0F, 0x58);
9781  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9782  ins_pipe(pipe_slow);
9783%}
9784
9785instruct addF_mem(regF dst, memory src)
9786%{
9787  match(Set dst (AddF dst (LoadF src)));
9788
9789  format %{ "addss   $dst, $src" %}
9790  ins_cost(150); // XXX
9791  opcode(0xF3, 0x0F, 0x58);
9792  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9793  ins_pipe(pipe_slow);
9794%}
9795
9796instruct addF_imm(regF dst, immF src)
9797%{
9798  match(Set dst (AddF dst src));
9799
9800  format %{ "addss   $dst, [$src]" %}
9801  ins_cost(150); // XXX
9802  opcode(0xF3, 0x0F, 0x58);
9803  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9804  ins_pipe(pipe_slow);
9805%}
9806
9807instruct addD_reg(regD dst, regD src)
9808%{
9809  match(Set dst (AddD dst src));
9810
9811  format %{ "addsd   $dst, $src" %}
9812  ins_cost(150); // XXX
9813  opcode(0xF2, 0x0F, 0x58);
9814  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9815  ins_pipe(pipe_slow);
9816%}
9817
9818instruct addD_mem(regD dst, memory src)
9819%{
9820  match(Set dst (AddD dst (LoadD src)));
9821
9822  format %{ "addsd   $dst, $src" %}
9823  ins_cost(150); // XXX
9824  opcode(0xF2, 0x0F, 0x58);
9825  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9826  ins_pipe(pipe_slow);
9827%}
9828
9829instruct addD_imm(regD dst, immD src)
9830%{
9831  match(Set dst (AddD dst src));
9832
9833  format %{ "addsd   $dst, [$src]" %}
9834  ins_cost(150); // XXX
9835  opcode(0xF2, 0x0F, 0x58);
9836  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9837  ins_pipe(pipe_slow);
9838%}
9839
9840instruct subF_reg(regF dst, regF src)
9841%{
9842  match(Set dst (SubF dst src));
9843
9844  format %{ "subss   $dst, $src" %}
9845  ins_cost(150); // XXX
9846  opcode(0xF3, 0x0F, 0x5C);
9847  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9848  ins_pipe(pipe_slow);
9849%}
9850
9851instruct subF_mem(regF dst, memory src)
9852%{
9853  match(Set dst (SubF dst (LoadF src)));
9854
9855  format %{ "subss   $dst, $src" %}
9856  ins_cost(150); // XXX
9857  opcode(0xF3, 0x0F, 0x5C);
9858  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9859  ins_pipe(pipe_slow);
9860%}
9861
9862instruct subF_imm(regF dst, immF src)
9863%{
9864  match(Set dst (SubF dst src));
9865
9866  format %{ "subss   $dst, [$src]" %}
9867  ins_cost(150); // XXX
9868  opcode(0xF3, 0x0F, 0x5C);
9869  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9870  ins_pipe(pipe_slow);
9871%}
9872
9873instruct subD_reg(regD dst, regD src)
9874%{
9875  match(Set dst (SubD dst src));
9876
9877  format %{ "subsd   $dst, $src" %}
9878  ins_cost(150); // XXX
9879  opcode(0xF2, 0x0F, 0x5C);
9880  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9881  ins_pipe(pipe_slow);
9882%}
9883
9884instruct subD_mem(regD dst, memory src)
9885%{
9886  match(Set dst (SubD dst (LoadD src)));
9887
9888  format %{ "subsd   $dst, $src" %}
9889  ins_cost(150); // XXX
9890  opcode(0xF2, 0x0F, 0x5C);
9891  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9892  ins_pipe(pipe_slow);
9893%}
9894
9895instruct subD_imm(regD dst, immD src)
9896%{
9897  match(Set dst (SubD dst src));
9898
9899  format %{ "subsd   $dst, [$src]" %}
9900  ins_cost(150); // XXX
9901  opcode(0xF2, 0x0F, 0x5C);
9902  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9903  ins_pipe(pipe_slow);
9904%}
9905
9906instruct mulF_reg(regF dst, regF src)
9907%{
9908  match(Set dst (MulF dst src));
9909
9910  format %{ "mulss   $dst, $src" %}
9911  ins_cost(150); // XXX
9912  opcode(0xF3, 0x0F, 0x59);
9913  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9914  ins_pipe(pipe_slow);
9915%}
9916
9917instruct mulF_mem(regF dst, memory src)
9918%{
9919  match(Set dst (MulF dst (LoadF src)));
9920
9921  format %{ "mulss   $dst, $src" %}
9922  ins_cost(150); // XXX
9923  opcode(0xF3, 0x0F, 0x59);
9924  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9925  ins_pipe(pipe_slow);
9926%}
9927
9928instruct mulF_imm(regF dst, immF src)
9929%{
9930  match(Set dst (MulF dst src));
9931
9932  format %{ "mulss   $dst, [$src]" %}
9933  ins_cost(150); // XXX
9934  opcode(0xF3, 0x0F, 0x59);
9935  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9936  ins_pipe(pipe_slow);
9937%}
9938
9939instruct mulD_reg(regD dst, regD src)
9940%{
9941  match(Set dst (MulD dst src));
9942
9943  format %{ "mulsd   $dst, $src" %}
9944  ins_cost(150); // XXX
9945  opcode(0xF2, 0x0F, 0x59);
9946  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9947  ins_pipe(pipe_slow);
9948%}
9949
9950instruct mulD_mem(regD dst, memory src)
9951%{
9952  match(Set dst (MulD dst (LoadD src)));
9953
9954  format %{ "mulsd   $dst, $src" %}
9955  ins_cost(150); // XXX
9956  opcode(0xF2, 0x0F, 0x59);
9957  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9958  ins_pipe(pipe_slow);
9959%}
9960
9961instruct mulD_imm(regD dst, immD src)
9962%{
9963  match(Set dst (MulD dst src));
9964
9965  format %{ "mulsd   $dst, [$src]" %}
9966  ins_cost(150); // XXX
9967  opcode(0xF2, 0x0F, 0x59);
9968  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9969  ins_pipe(pipe_slow);
9970%}
9971
9972instruct divF_reg(regF dst, regF src)
9973%{
9974  match(Set dst (DivF dst src));
9975
9976  format %{ "divss   $dst, $src" %}
9977  ins_cost(150); // XXX
9978  opcode(0xF3, 0x0F, 0x5E);
9979  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9980  ins_pipe(pipe_slow);
9981%}
9982
9983instruct divF_mem(regF dst, memory src)
9984%{
9985  match(Set dst (DivF dst (LoadF src)));
9986
9987  format %{ "divss   $dst, $src" %}
9988  ins_cost(150); // XXX
9989  opcode(0xF3, 0x0F, 0x5E);
9990  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9991  ins_pipe(pipe_slow);
9992%}
9993
9994instruct divF_imm(regF dst, immF src)
9995%{
9996  match(Set dst (DivF dst src));
9997
9998  format %{ "divss   $dst, [$src]" %}
9999  ins_cost(150); // XXX
10000  opcode(0xF3, 0x0F, 0x5E);
10001  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10002  ins_pipe(pipe_slow);
10003%}
10004
10005instruct divD_reg(regD dst, regD src)
10006%{
10007  match(Set dst (DivD dst src));
10008
10009  format %{ "divsd   $dst, $src" %}
10010  ins_cost(150); // XXX
10011  opcode(0xF2, 0x0F, 0x5E);
10012  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10013  ins_pipe(pipe_slow);
10014%}
10015
10016instruct divD_mem(regD dst, memory src)
10017%{
10018  match(Set dst (DivD dst (LoadD src)));
10019
10020  format %{ "divsd   $dst, $src" %}
10021  ins_cost(150); // XXX
10022  opcode(0xF2, 0x0F, 0x5E);
10023  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10024  ins_pipe(pipe_slow);
10025%}
10026
10027instruct divD_imm(regD dst, immD src)
10028%{
10029  match(Set dst (DivD dst src));
10030
10031  format %{ "divsd   $dst, [$src]" %}
10032  ins_cost(150); // XXX
10033  opcode(0xF2, 0x0F, 0x5E);
10034  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10035  ins_pipe(pipe_slow);
10036%}
10037
10038instruct sqrtF_reg(regF dst, regF src)
10039%{
10040  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10041
10042  format %{ "sqrtss  $dst, $src" %}
10043  ins_cost(150); // XXX
10044  opcode(0xF3, 0x0F, 0x51);
10045  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10046  ins_pipe(pipe_slow);
10047%}
10048
10049instruct sqrtF_mem(regF dst, memory src)
10050%{
10051  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10052
10053  format %{ "sqrtss  $dst, $src" %}
10054  ins_cost(150); // XXX
10055  opcode(0xF3, 0x0F, 0x51);
10056  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10057  ins_pipe(pipe_slow);
10058%}
10059
10060instruct sqrtF_imm(regF dst, immF src)
10061%{
10062  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10063
10064  format %{ "sqrtss  $dst, [$src]" %}
10065  ins_cost(150); // XXX
10066  opcode(0xF3, 0x0F, 0x51);
10067  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10068  ins_pipe(pipe_slow);
10069%}
10070
10071instruct sqrtD_reg(regD dst, regD src)
10072%{
10073  match(Set dst (SqrtD src));
10074
10075  format %{ "sqrtsd  $dst, $src" %}
10076  ins_cost(150); // XXX
10077  opcode(0xF2, 0x0F, 0x51);
10078  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10079  ins_pipe(pipe_slow);
10080%}
10081
10082instruct sqrtD_mem(regD dst, memory src)
10083%{
10084  match(Set dst (SqrtD (LoadD src)));
10085
10086  format %{ "sqrtsd  $dst, $src" %}
10087  ins_cost(150); // XXX
10088  opcode(0xF2, 0x0F, 0x51);
10089  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10090  ins_pipe(pipe_slow);
10091%}
10092
10093instruct sqrtD_imm(regD dst, immD src)
10094%{
10095  match(Set dst (SqrtD src));
10096
10097  format %{ "sqrtsd  $dst, [$src]" %}
10098  ins_cost(150); // XXX
10099  opcode(0xF2, 0x0F, 0x51);
10100  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10101  ins_pipe(pipe_slow);
10102%}
10103
10104instruct absF_reg(regF dst)
10105%{
10106  match(Set dst (AbsF dst));
10107
10108  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10109  ins_encode(absF_encoding(dst));
10110  ins_pipe(pipe_slow);
10111%}
10112
10113instruct absD_reg(regD dst)
10114%{
10115  match(Set dst (AbsD dst));
10116
10117  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10118            "# abs double by sign masking" %}
10119  ins_encode(absD_encoding(dst));
10120  ins_pipe(pipe_slow);
10121%}
10122
10123instruct negF_reg(regF dst)
10124%{
10125  match(Set dst (NegF dst));
10126
10127  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10128  ins_encode(negF_encoding(dst));
10129  ins_pipe(pipe_slow);
10130%}
10131
10132instruct negD_reg(regD dst)
10133%{
10134  match(Set dst (NegD dst));
10135
10136  format %{ "xorpd   $dst, [0x8000000000000000]\t"
10137            "# neg double by sign flipping" %}
10138  ins_encode(negD_encoding(dst));
10139  ins_pipe(pipe_slow);
10140%}
10141
10142// -----------Trig and Trancendental Instructions------------------------------
10143instruct cosD_reg(regD dst) %{
10144  match(Set dst (CosD dst));
10145
10146  format %{ "dcos   $dst\n\t" %}
10147  opcode(0xD9, 0xFF);
10148  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10149  ins_pipe( pipe_slow );
10150%}
10151
10152instruct sinD_reg(regD dst) %{
10153  match(Set dst (SinD dst));
10154
10155  format %{ "dsin   $dst\n\t" %}
10156  opcode(0xD9, 0xFE);
10157  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10158  ins_pipe( pipe_slow );
10159%}
10160
10161instruct tanD_reg(regD dst) %{
10162  match(Set dst (TanD dst));
10163
10164  format %{ "dtan   $dst\n\t" %}
10165  ins_encode( Push_SrcXD(dst),
10166              Opcode(0xD9), Opcode(0xF2),   //fptan
10167              Opcode(0xDD), Opcode(0xD8),   //fstp st
10168              Push_ResultXD(dst) );
10169  ins_pipe( pipe_slow );
10170%}
10171
10172instruct log10D_reg(regD dst) %{
10173  // The source and result Double operands in XMM registers
10174  match(Set dst (Log10D dst));
10175  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10176  // fyl2x        ; compute log_10(2) * log_2(x)
10177  format %{ "fldlg2\t\t\t#Log10\n\t"
10178            "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10179         %}
10180   ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10181              Push_SrcXD(dst),
10182              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10183              Push_ResultXD(dst));
10184
10185  ins_pipe( pipe_slow );
10186%}
10187
10188instruct logD_reg(regD dst) %{
10189  // The source and result Double operands in XMM registers
10190  match(Set dst (LogD dst));
10191  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10192  // fyl2x        ; compute log_e(2) * log_2(x)
10193  format %{ "fldln2\t\t\t#Log_e\n\t"
10194            "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10195         %}
10196  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10197              Push_SrcXD(dst),
10198              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10199              Push_ResultXD(dst));
10200  ins_pipe( pipe_slow );
10201%}
10202
10203
10204
10205//----------Arithmetic Conversion Instructions---------------------------------
10206
10207instruct roundFloat_nop(regF dst)
10208%{
10209  match(Set dst (RoundFloat dst));
10210
10211  ins_cost(0);
10212  ins_encode();
10213  ins_pipe(empty);
10214%}
10215
10216instruct roundDouble_nop(regD dst)
10217%{
10218  match(Set dst (RoundDouble dst));
10219
10220  ins_cost(0);
10221  ins_encode();
10222  ins_pipe(empty);
10223%}
10224
10225instruct convF2D_reg_reg(regD dst, regF src)
10226%{
10227  match(Set dst (ConvF2D src));
10228
10229  format %{ "cvtss2sd $dst, $src" %}
10230  opcode(0xF3, 0x0F, 0x5A);
10231  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10232  ins_pipe(pipe_slow); // XXX
10233%}
10234
10235instruct convF2D_reg_mem(regD dst, memory src)
10236%{
10237  match(Set dst (ConvF2D (LoadF src)));
10238
10239  format %{ "cvtss2sd $dst, $src" %}
10240  opcode(0xF3, 0x0F, 0x5A);
10241  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10242  ins_pipe(pipe_slow); // XXX
10243%}
10244
10245instruct convD2F_reg_reg(regF dst, regD src)
10246%{
10247  match(Set dst (ConvD2F src));
10248
10249  format %{ "cvtsd2ss $dst, $src" %}
10250  opcode(0xF2, 0x0F, 0x5A);
10251  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10252  ins_pipe(pipe_slow); // XXX
10253%}
10254
10255instruct convD2F_reg_mem(regF dst, memory src)
10256%{
10257  match(Set dst (ConvD2F (LoadD src)));
10258
10259  format %{ "cvtsd2ss $dst, $src" %}
10260  opcode(0xF2, 0x0F, 0x5A);
10261  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10262  ins_pipe(pipe_slow); // XXX
10263%}
10264
10265// XXX do mem variants
10266instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10267%{
10268  match(Set dst (ConvF2I src));
10269  effect(KILL cr);
10270
10271  format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10272            "cmpl    $dst, #0x80000000\n\t"
10273            "jne,s   done\n\t"
10274            "subq    rsp, #8\n\t"
10275            "movss   [rsp], $src\n\t"
10276            "call    f2i_fixup\n\t"
10277            "popq    $dst\n"
10278    "done:   "%}
10279  opcode(0xF3, 0x0F, 0x2C);
10280  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10281             f2i_fixup(dst, src));
10282  ins_pipe(pipe_slow);
10283%}
10284
10285instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10286%{
10287  match(Set dst (ConvF2L src));
10288  effect(KILL cr);
10289
10290  format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10291            "cmpq    $dst, [0x8000000000000000]\n\t"
10292            "jne,s   done\n\t"
10293            "subq    rsp, #8\n\t"
10294            "movss   [rsp], $src\n\t"
10295            "call    f2l_fixup\n\t"
10296            "popq    $dst\n"
10297    "done:   "%}
10298  opcode(0xF3, 0x0F, 0x2C);
10299  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10300             f2l_fixup(dst, src));
10301  ins_pipe(pipe_slow);
10302%}
10303
10304instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10305%{
10306  match(Set dst (ConvD2I src));
10307  effect(KILL cr);
10308
10309  format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10310            "cmpl    $dst, #0x80000000\n\t"
10311            "jne,s   done\n\t"
10312            "subq    rsp, #8\n\t"
10313            "movsd   [rsp], $src\n\t"
10314            "call    d2i_fixup\n\t"
10315            "popq    $dst\n"
10316    "done:   "%}
10317  opcode(0xF2, 0x0F, 0x2C);
10318  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10319             d2i_fixup(dst, src));
10320  ins_pipe(pipe_slow);
10321%}
10322
10323instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10324%{
10325  match(Set dst (ConvD2L src));
10326  effect(KILL cr);
10327
10328  format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10329            "cmpq    $dst, [0x8000000000000000]\n\t"
10330            "jne,s   done\n\t"
10331            "subq    rsp, #8\n\t"
10332            "movsd   [rsp], $src\n\t"
10333            "call    d2l_fixup\n\t"
10334            "popq    $dst\n"
10335    "done:   "%}
10336  opcode(0xF2, 0x0F, 0x2C);
10337  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10338             d2l_fixup(dst, src));
10339  ins_pipe(pipe_slow);
10340%}
10341
10342instruct convI2F_reg_reg(regF dst, rRegI src)
10343%{
10344  predicate(!UseXmmI2F);
10345  match(Set dst (ConvI2F src));
10346
10347  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10348  opcode(0xF3, 0x0F, 0x2A);
10349  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10350  ins_pipe(pipe_slow); // XXX
10351%}
10352
10353instruct convI2F_reg_mem(regF dst, memory src)
10354%{
10355  match(Set dst (ConvI2F (LoadI src)));
10356
10357  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10358  opcode(0xF3, 0x0F, 0x2A);
10359  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10360  ins_pipe(pipe_slow); // XXX
10361%}
10362
10363instruct convI2D_reg_reg(regD dst, rRegI src)
10364%{
10365  predicate(!UseXmmI2D);
10366  match(Set dst (ConvI2D src));
10367
10368  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10369  opcode(0xF2, 0x0F, 0x2A);
10370  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10371  ins_pipe(pipe_slow); // XXX
10372%}
10373
10374instruct convI2D_reg_mem(regD dst, memory src)
10375%{
10376  match(Set dst (ConvI2D (LoadI src)));
10377
10378  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10379  opcode(0xF2, 0x0F, 0x2A);
10380  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10381  ins_pipe(pipe_slow); // XXX
10382%}
10383
10384instruct convXI2F_reg(regF dst, rRegI src)
10385%{
10386  predicate(UseXmmI2F);
10387  match(Set dst (ConvI2F src));
10388
10389  format %{ "movdl $dst, $src\n\t"
10390            "cvtdq2psl $dst, $dst\t# i2f" %}
10391  ins_encode %{
10392    __ movdl($dst$$XMMRegister, $src$$Register);
10393    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10394  %}
10395  ins_pipe(pipe_slow); // XXX
10396%}
10397
10398instruct convXI2D_reg(regD dst, rRegI src)
10399%{
10400  predicate(UseXmmI2D);
10401  match(Set dst (ConvI2D src));
10402
10403  format %{ "movdl $dst, $src\n\t"
10404            "cvtdq2pdl $dst, $dst\t# i2d" %}
10405  ins_encode %{
10406    __ movdl($dst$$XMMRegister, $src$$Register);
10407    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10408  %}
10409  ins_pipe(pipe_slow); // XXX
10410%}
10411
10412instruct convL2F_reg_reg(regF dst, rRegL src)
10413%{
10414  match(Set dst (ConvL2F src));
10415
10416  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10417  opcode(0xF3, 0x0F, 0x2A);
10418  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
10419  ins_pipe(pipe_slow); // XXX
10420%}
10421
10422instruct convL2F_reg_mem(regF dst, memory src)
10423%{
10424  match(Set dst (ConvL2F (LoadL src)));
10425
10426  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10427  opcode(0xF3, 0x0F, 0x2A);
10428  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
10429  ins_pipe(pipe_slow); // XXX
10430%}
10431
10432instruct convL2D_reg_reg(regD dst, rRegL src)
10433%{
10434  match(Set dst (ConvL2D src));
10435
10436  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10437  opcode(0xF2, 0x0F, 0x2A);
10438  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
10439  ins_pipe(pipe_slow); // XXX
10440%}
10441
10442instruct convL2D_reg_mem(regD dst, memory src)
10443%{
10444  match(Set dst (ConvL2D (LoadL src)));
10445
10446  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10447  opcode(0xF2, 0x0F, 0x2A);
10448  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
10449  ins_pipe(pipe_slow); // XXX
10450%}
10451
10452instruct convI2L_reg_reg(rRegL dst, rRegI src)
10453%{
10454  match(Set dst (ConvI2L src));
10455
10456  ins_cost(125);
10457  format %{ "movslq  $dst, $src\t# i2l" %}
10458  opcode(0x63); // needs REX.W
10459  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10460  ins_pipe(ialu_reg_reg);
10461%}
10462
10463// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10464// %{
10465//   match(Set dst (ConvI2L src));
10466// //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10467// //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10468//   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10469//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10470//             ((const TypeNode*) n)->type()->is_long()->_lo ==
10471//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10472
10473//   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10474//   ins_encode(enc_copy(dst, src));
10475// //   opcode(0x63); // needs REX.W
10476// //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10477//   ins_pipe(ialu_reg_reg);
10478// %}
10479
10480instruct convI2L_reg_mem(rRegL dst, memory src)
10481%{
10482  match(Set dst (ConvI2L (LoadI src)));
10483
10484  format %{ "movslq  $dst, $src\t# i2l" %}
10485  opcode(0x63); // needs REX.W
10486  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst,src));
10487  ins_pipe(ialu_reg_mem);
10488%}
10489
10490// Zero-extend convert int to long
10491instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10492%{
10493  match(Set dst (AndL (ConvI2L src) mask));
10494
10495  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10496  ins_encode(enc_copy(dst, src));
10497  ins_pipe(ialu_reg_reg);
10498%}
10499
10500// Zero-extend convert int to long
10501instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10502%{
10503  match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10504
10505  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10506  opcode(0x8B);
10507  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10508  ins_pipe(ialu_reg_mem);
10509%}
10510
10511instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10512%{
10513  match(Set dst (AndL src mask));
10514
10515  format %{ "movl    $dst, $src\t# zero-extend long" %}
10516  ins_encode(enc_copy_always(dst, src));
10517  ins_pipe(ialu_reg_reg);
10518%}
10519
10520instruct convL2I_reg_reg(rRegI dst, rRegL src)
10521%{
10522  match(Set dst (ConvL2I src));
10523
10524  format %{ "movl    $dst, $src\t# l2i" %}
10525  ins_encode(enc_copy_always(dst, src));
10526  ins_pipe(ialu_reg_reg);
10527%}
10528
10529
10530instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10531  match(Set dst (MoveF2I src));
10532  effect(DEF dst, USE src);
10533
10534  ins_cost(125);
10535  format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10536  opcode(0x8B);
10537  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10538  ins_pipe(ialu_reg_mem);
10539%}
10540
10541instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10542  match(Set dst (MoveI2F src));
10543  effect(DEF dst, USE src);
10544
10545  ins_cost(125);
10546  format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10547  opcode(0xF3, 0x0F, 0x10);
10548  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10549  ins_pipe(pipe_slow);
10550%}
10551
10552instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10553  match(Set dst (MoveD2L src));
10554  effect(DEF dst, USE src);
10555
10556  ins_cost(125);
10557  format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10558  opcode(0x8B);
10559  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10560  ins_pipe(ialu_reg_mem);
10561%}
10562
10563instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10564  predicate(!UseXmmLoadAndClearUpper);
10565  match(Set dst (MoveL2D src));
10566  effect(DEF dst, USE src);
10567
10568  ins_cost(125);
10569  format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10570  opcode(0x66, 0x0F, 0x12);
10571  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10572  ins_pipe(pipe_slow);
10573%}
10574
10575instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10576  predicate(UseXmmLoadAndClearUpper);
10577  match(Set dst (MoveL2D src));
10578  effect(DEF dst, USE src);
10579
10580  ins_cost(125);
10581  format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10582  opcode(0xF2, 0x0F, 0x10);
10583  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10584  ins_pipe(pipe_slow);
10585%}
10586
10587
10588instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10589  match(Set dst (MoveF2I src));
10590  effect(DEF dst, USE src);
10591
10592  ins_cost(95); // XXX
10593  format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10594  opcode(0xF3, 0x0F, 0x11);
10595  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
10596  ins_pipe(pipe_slow);
10597%}
10598
10599instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10600  match(Set dst (MoveI2F src));
10601  effect(DEF dst, USE src);
10602
10603  ins_cost(100);
10604  format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10605  opcode(0x89);
10606  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10607  ins_pipe( ialu_mem_reg );
10608%}
10609
10610instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10611  match(Set dst (MoveD2L src));
10612  effect(DEF dst, USE src);
10613
10614  ins_cost(95); // XXX
10615  format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10616  opcode(0xF2, 0x0F, 0x11);
10617  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
10618  ins_pipe(pipe_slow);
10619%}
10620
10621instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10622  match(Set dst (MoveL2D src));
10623  effect(DEF dst, USE src);
10624
10625  ins_cost(100);
10626  format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10627  opcode(0x89);
10628  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10629  ins_pipe(ialu_mem_reg);
10630%}
10631
10632instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10633  match(Set dst (MoveF2I src));
10634  effect(DEF dst, USE src);
10635  ins_cost(85);
10636  format %{ "movd    $dst,$src\t# MoveF2I" %}
10637  ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
10638  ins_pipe( pipe_slow );
10639%}
10640
10641instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10642  match(Set dst (MoveD2L src));
10643  effect(DEF dst, USE src);
10644  ins_cost(85);
10645  format %{ "movd    $dst,$src\t# MoveD2L" %}
10646  ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
10647  ins_pipe( pipe_slow );
10648%}
10649
10650// The next instructions have long latency and use Int unit. Set high cost.
10651instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10652  match(Set dst (MoveI2F src));
10653  effect(DEF dst, USE src);
10654  ins_cost(300);
10655  format %{ "movd    $dst,$src\t# MoveI2F" %}
10656  ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
10657  ins_pipe( pipe_slow );
10658%}
10659
10660instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10661  match(Set dst (MoveL2D src));
10662  effect(DEF dst, USE src);
10663  ins_cost(300);
10664  format %{ "movd    $dst,$src\t# MoveL2D" %}
10665  ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
10666  ins_pipe( pipe_slow );
10667%}
10668
10669// Replicate scalar to packed byte (1 byte) values in xmm
10670instruct Repl8B_reg(regD dst, regD src) %{
10671  match(Set dst (Replicate8B src));
10672  format %{ "MOVDQA  $dst,$src\n\t"
10673            "PUNPCKLBW $dst,$dst\n\t"
10674            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10675  ins_encode( pshufd_8x8(dst, src));
10676  ins_pipe( pipe_slow );
10677%}
10678
10679// Replicate scalar to packed byte (1 byte) values in xmm
10680instruct Repl8B_rRegI(regD dst, rRegI src) %{
10681  match(Set dst (Replicate8B src));
10682  format %{ "MOVD    $dst,$src\n\t"
10683            "PUNPCKLBW $dst,$dst\n\t"
10684            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10685  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
10686  ins_pipe( pipe_slow );
10687%}
10688
10689// Replicate scalar zero to packed byte (1 byte) values in xmm
10690instruct Repl8B_immI0(regD dst, immI0 zero) %{
10691  match(Set dst (Replicate8B zero));
10692  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
10693  ins_encode( pxor(dst, dst));
10694  ins_pipe( fpu_reg_reg );
10695%}
10696
10697// Replicate scalar to packed shore (2 byte) values in xmm
10698instruct Repl4S_reg(regD dst, regD src) %{
10699  match(Set dst (Replicate4S src));
10700  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
10701  ins_encode( pshufd_4x16(dst, src));
10702  ins_pipe( fpu_reg_reg );
10703%}
10704
10705// Replicate scalar to packed shore (2 byte) values in xmm
10706instruct Repl4S_rRegI(regD dst, rRegI src) %{
10707  match(Set dst (Replicate4S src));
10708  format %{ "MOVD    $dst,$src\n\t"
10709            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
10710  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
10711  ins_pipe( fpu_reg_reg );
10712%}
10713
10714// Replicate scalar zero to packed short (2 byte) values in xmm
10715instruct Repl4S_immI0(regD dst, immI0 zero) %{
10716  match(Set dst (Replicate4S zero));
10717  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
10718  ins_encode( pxor(dst, dst));
10719  ins_pipe( fpu_reg_reg );
10720%}
10721
10722// Replicate scalar to packed char (2 byte) values in xmm
10723instruct Repl4C_reg(regD dst, regD src) %{
10724  match(Set dst (Replicate4C src));
10725  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
10726  ins_encode( pshufd_4x16(dst, src));
10727  ins_pipe( fpu_reg_reg );
10728%}
10729
10730// Replicate scalar to packed char (2 byte) values in xmm
10731instruct Repl4C_rRegI(regD dst, rRegI src) %{
10732  match(Set dst (Replicate4C src));
10733  format %{ "MOVD    $dst,$src\n\t"
10734            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
10735  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
10736  ins_pipe( fpu_reg_reg );
10737%}
10738
10739// Replicate scalar zero to packed char (2 byte) values in xmm
10740instruct Repl4C_immI0(regD dst, immI0 zero) %{
10741  match(Set dst (Replicate4C zero));
10742  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
10743  ins_encode( pxor(dst, dst));
10744  ins_pipe( fpu_reg_reg );
10745%}
10746
10747// Replicate scalar to packed integer (4 byte) values in xmm
10748instruct Repl2I_reg(regD dst, regD src) %{
10749  match(Set dst (Replicate2I src));
10750  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
10751  ins_encode( pshufd(dst, src, 0x00));
10752  ins_pipe( fpu_reg_reg );
10753%}
10754
10755// Replicate scalar to packed integer (4 byte) values in xmm
10756instruct Repl2I_rRegI(regD dst, rRegI src) %{
10757  match(Set dst (Replicate2I src));
10758  format %{ "MOVD   $dst,$src\n\t"
10759            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
10760  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
10761  ins_pipe( fpu_reg_reg );
10762%}
10763
10764// Replicate scalar zero to packed integer (2 byte) values in xmm
10765instruct Repl2I_immI0(regD dst, immI0 zero) %{
10766  match(Set dst (Replicate2I zero));
10767  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
10768  ins_encode( pxor(dst, dst));
10769  ins_pipe( fpu_reg_reg );
10770%}
10771
10772// Replicate scalar to packed single precision floating point values in xmm
10773instruct Repl2F_reg(regD dst, regD src) %{
10774  match(Set dst (Replicate2F src));
10775  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10776  ins_encode( pshufd(dst, src, 0xe0));
10777  ins_pipe( fpu_reg_reg );
10778%}
10779
10780// Replicate scalar to packed single precision floating point values in xmm
10781instruct Repl2F_regF(regD dst, regF src) %{
10782  match(Set dst (Replicate2F src));
10783  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10784  ins_encode( pshufd(dst, src, 0xe0));
10785  ins_pipe( fpu_reg_reg );
10786%}
10787
10788// Replicate scalar to packed single precision floating point values in xmm
10789instruct Repl2F_immF0(regD dst, immF0 zero) %{
10790  match(Set dst (Replicate2F zero));
10791  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
10792  ins_encode( pxor(dst, dst));
10793  ins_pipe( fpu_reg_reg );
10794%}
10795
10796
10797// =======================================================================
10798// fast clearing of an array
10799instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10800                  rFlagsReg cr)
10801%{
10802  match(Set dummy (ClearArray cnt base));
10803  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10804
10805  format %{ "xorl    rax, rax\t# ClearArray:\n\t"
10806            "rep stosq\t# Store rax to *rdi++ while rcx--" %}
10807  ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
10808             Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
10809  ins_pipe(pipe_slow);
10810%}
10811
10812instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1,
10813                        rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr)
10814%{
10815  match(Set result (StrComp str1 str2));
10816  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
10817  //ins_cost(300);
10818
10819  format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
10820  ins_encode( enc_String_Compare() );
10821  ins_pipe( pipe_slow );
10822%}
10823
10824//----------Control Flow Instructions------------------------------------------
10825// Signed compare Instructions
10826
10827// XXX more variants!!
10828instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10829%{
10830  match(Set cr (CmpI op1 op2));
10831  effect(DEF cr, USE op1, USE op2);
10832
10833  format %{ "cmpl    $op1, $op2" %}
10834  opcode(0x3B);  /* Opcode 3B /r */
10835  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10836  ins_pipe(ialu_cr_reg_reg);
10837%}
10838
10839instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10840%{
10841  match(Set cr (CmpI op1 op2));
10842
10843  format %{ "cmpl    $op1, $op2" %}
10844  opcode(0x81, 0x07); /* Opcode 81 /7 */
10845  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10846  ins_pipe(ialu_cr_reg_imm);
10847%}
10848
10849instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10850%{
10851  match(Set cr (CmpI op1 (LoadI op2)));
10852
10853  ins_cost(500); // XXX
10854  format %{ "cmpl    $op1, $op2" %}
10855  opcode(0x3B); /* Opcode 3B /r */
10856  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10857  ins_pipe(ialu_cr_reg_mem);
10858%}
10859
10860instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10861%{
10862  match(Set cr (CmpI src zero));
10863
10864  format %{ "testl   $src, $src" %}
10865  opcode(0x85);
10866  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10867  ins_pipe(ialu_cr_reg_imm);
10868%}
10869
10870instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10871%{
10872  match(Set cr (CmpI (AndI src con) zero));
10873
10874  format %{ "testl   $src, $con" %}
10875  opcode(0xF7, 0x00);
10876  ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10877  ins_pipe(ialu_cr_reg_imm);
10878%}
10879
10880instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10881%{
10882  match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10883
10884  format %{ "testl   $src, $mem" %}
10885  opcode(0x85);
10886  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10887  ins_pipe(ialu_cr_reg_mem);
10888%}
10889
10890// Unsigned compare Instructions; really, same as signed except they
10891// produce an rFlagsRegU instead of rFlagsReg.
10892instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10893%{
10894  match(Set cr (CmpU op1 op2));
10895
10896  format %{ "cmpl    $op1, $op2\t# unsigned" %}
10897  opcode(0x3B); /* Opcode 3B /r */
10898  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10899  ins_pipe(ialu_cr_reg_reg);
10900%}
10901
10902instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10903%{
10904  match(Set cr (CmpU op1 op2));
10905
10906  format %{ "cmpl    $op1, $op2\t# unsigned" %}
10907  opcode(0x81,0x07); /* Opcode 81 /7 */
10908  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10909  ins_pipe(ialu_cr_reg_imm);
10910%}
10911
10912instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10913%{
10914  match(Set cr (CmpU op1 (LoadI op2)));
10915
10916  ins_cost(500); // XXX
10917  format %{ "cmpl    $op1, $op2\t# unsigned" %}
10918  opcode(0x3B); /* Opcode 3B /r */
10919  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10920  ins_pipe(ialu_cr_reg_mem);
10921%}
10922
10923// // // Cisc-spilled version of cmpU_rReg
10924// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10925// //%{
10926// //  match(Set cr (CmpU (LoadI op1) op2));
10927// //
10928// //  format %{ "CMPu   $op1,$op2" %}
10929// //  ins_cost(500);
10930// //  opcode(0x39);  /* Opcode 39 /r */
10931// //  ins_encode( OpcP, reg_mem( op1, op2) );
10932// //%}
10933
10934instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10935%{
10936  match(Set cr (CmpU src zero));
10937
10938  format %{ "testl  $src, $src\t# unsigned" %}
10939  opcode(0x85);
10940  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10941  ins_pipe(ialu_cr_reg_imm);
10942%}
10943
10944instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10945%{
10946  match(Set cr (CmpP op1 op2));
10947
10948  format %{ "cmpq    $op1, $op2\t# ptr" %}
10949  opcode(0x3B); /* Opcode 3B /r */
10950  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10951  ins_pipe(ialu_cr_reg_reg);
10952%}
10953
10954instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10955%{
10956  match(Set cr (CmpP op1 (LoadP op2)));
10957
10958  ins_cost(500); // XXX
10959  format %{ "cmpq    $op1, $op2\t# ptr" %}
10960  opcode(0x3B); /* Opcode 3B /r */
10961  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10962  ins_pipe(ialu_cr_reg_mem);
10963%}
10964
10965// // // Cisc-spilled version of cmpP_rReg
10966// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10967// //%{
10968// //  match(Set cr (CmpP (LoadP op1) op2));
10969// //
10970// //  format %{ "CMPu   $op1,$op2" %}
10971// //  ins_cost(500);
10972// //  opcode(0x39);  /* Opcode 39 /r */
10973// //  ins_encode( OpcP, reg_mem( op1, op2) );
10974// //%}
10975
10976// XXX this is generalized by compP_rReg_mem???
10977// Compare raw pointer (used in out-of-heap check).
10978// Only works because non-oop pointers must be raw pointers
10979// and raw pointers have no anti-dependencies.
10980instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10981%{
10982  predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
10983  match(Set cr (CmpP op1 (LoadP op2)));
10984
10985  format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10986  opcode(0x3B); /* Opcode 3B /r */
10987  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10988  ins_pipe(ialu_cr_reg_mem);
10989%}
10990
10991// This will generate a signed flags result. This should be OK since
10992// any compare to a zero should be eq/neq.
10993instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10994%{
10995  match(Set cr (CmpP src zero));
10996
10997  format %{ "testq   $src, $src\t# ptr" %}
10998  opcode(0x85);
10999  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11000  ins_pipe(ialu_cr_reg_imm);
11001%}
11002
11003// This will generate a signed flags result. This should be OK since
11004// any compare to a zero should be eq/neq.
11005instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
11006%{
11007  match(Set cr (CmpP (LoadP op) zero));
11008
11009  ins_cost(500); // XXX
11010  format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11011  opcode(0xF7); /* Opcode F7 /0 */
11012  ins_encode(REX_mem_wide(op),
11013             OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11014  ins_pipe(ialu_cr_reg_imm);
11015%}
11016
11017instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11018  match(Set cr (CmpN src zero));
11019
11020  format %{ "testl   $src, $src" %}
11021  ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11022  ins_pipe(ialu_cr_reg_imm);
11023%}
11024
11025// Yanked all unsigned pointer compare operations.
11026// Pointer compares are done with CmpP which is already unsigned.
11027
11028instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11029%{
11030  match(Set cr (CmpL op1 op2));
11031
11032  format %{ "cmpq    $op1, $op2" %}
11033  opcode(0x3B);  /* Opcode 3B /r */
11034  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11035  ins_pipe(ialu_cr_reg_reg);
11036%}
11037
11038instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11039%{
11040  match(Set cr (CmpL op1 op2));
11041
11042  format %{ "cmpq    $op1, $op2" %}
11043  opcode(0x81, 0x07); /* Opcode 81 /7 */
11044  ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11045  ins_pipe(ialu_cr_reg_imm);
11046%}
11047
11048instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11049%{
11050  match(Set cr (CmpL op1 (LoadL op2)));
11051
11052  ins_cost(500); // XXX
11053  format %{ "cmpq    $op1, $op2" %}
11054  opcode(0x3B); /* Opcode 3B /r */
11055  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11056  ins_pipe(ialu_cr_reg_mem);
11057%}
11058
11059instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11060%{
11061  match(Set cr (CmpL src zero));
11062
11063  format %{ "testq   $src, $src" %}
11064  opcode(0x85);
11065  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11066  ins_pipe(ialu_cr_reg_imm);
11067%}
11068
11069instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11070%{
11071  match(Set cr (CmpL (AndL src con) zero));
11072
11073  format %{ "testq   $src, $con\t# long" %}
11074  opcode(0xF7, 0x00);
11075  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11076  ins_pipe(ialu_cr_reg_imm);
11077%}
11078
11079instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11080%{
11081  match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11082
11083  format %{ "testq   $src, $mem" %}
11084  opcode(0x85);
11085  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11086  ins_pipe(ialu_cr_reg_mem);
11087%}
11088
11089// Manifest a CmpL result in an integer register.  Very painful.
11090// This is the test to avoid.
11091instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11092%{
11093  match(Set dst (CmpL3 src1 src2));
11094  effect(KILL flags);
11095
11096  ins_cost(275); // XXX
11097  format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11098            "movl    $dst, -1\n\t"
11099            "jl,s    done\n\t"
11100            "setne   $dst\n\t"
11101            "movzbl  $dst, $dst\n\t"
11102    "done:" %}
11103  ins_encode(cmpl3_flag(src1, src2, dst));
11104  ins_pipe(pipe_slow);
11105%}
11106
11107//----------Max and Min--------------------------------------------------------
11108// Min Instructions
11109
11110instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11111%{
11112  effect(USE_DEF dst, USE src, USE cr);
11113
11114  format %{ "cmovlgt $dst, $src\t# min" %}
11115  opcode(0x0F, 0x4F);
11116  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11117  ins_pipe(pipe_cmov_reg);
11118%}
11119
11120
11121instruct minI_rReg(rRegI dst, rRegI src)
11122%{
11123  match(Set dst (MinI dst src));
11124
11125  ins_cost(200);
11126  expand %{
11127    rFlagsReg cr;
11128    compI_rReg(cr, dst, src);
11129    cmovI_reg_g(dst, src, cr);
11130  %}
11131%}
11132
11133instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11134%{
11135  effect(USE_DEF dst, USE src, USE cr);
11136
11137  format %{ "cmovllt $dst, $src\t# max" %}
11138  opcode(0x0F, 0x4C);
11139  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11140  ins_pipe(pipe_cmov_reg);
11141%}
11142
11143
11144instruct maxI_rReg(rRegI dst, rRegI src)
11145%{
11146  match(Set dst (MaxI dst src));
11147
11148  ins_cost(200);
11149  expand %{
11150    rFlagsReg cr;
11151    compI_rReg(cr, dst, src);
11152    cmovI_reg_l(dst, src, cr);
11153  %}
11154%}
11155
11156// ============================================================================
11157// Branch Instructions
11158
11159// Jump Direct - Label defines a relative address from JMP+1
11160instruct jmpDir(label labl)
11161%{
11162  match(Goto);
11163  effect(USE labl);
11164
11165  ins_cost(300);
11166  format %{ "jmp     $labl" %}
11167  size(5);
11168  opcode(0xE9);
11169  ins_encode(OpcP, Lbl(labl));
11170  ins_pipe(pipe_jmp);
11171  ins_pc_relative(1);
11172%}
11173
11174// Jump Direct Conditional - Label defines a relative address from Jcc+1
11175instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
11176%{
11177  match(If cop cr);
11178  effect(USE labl);
11179
11180  ins_cost(300);
11181  format %{ "j$cop     $labl" %}
11182  size(6);
11183  opcode(0x0F, 0x80);
11184  ins_encode(Jcc(cop, labl));
11185  ins_pipe(pipe_jcc);
11186  ins_pc_relative(1);
11187%}
11188
11189// Jump Direct Conditional - Label defines a relative address from Jcc+1
11190instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
11191%{
11192  match(CountedLoopEnd cop cr);
11193  effect(USE labl);
11194
11195  ins_cost(300);
11196  format %{ "j$cop     $labl\t# loop end" %}
11197  size(6);
11198  opcode(0x0F, 0x80);
11199  ins_encode(Jcc(cop, labl));
11200  ins_pipe(pipe_jcc);
11201  ins_pc_relative(1);
11202%}
11203
11204// Jump Direct Conditional - Label defines a relative address from Jcc+1
11205instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl)
11206%{
11207  match(CountedLoopEnd cop cmp);
11208  effect(USE labl);
11209
11210  ins_cost(300);
11211  format %{ "j$cop,u   $labl\t# loop end" %}
11212  size(6);
11213  opcode(0x0F, 0x80);
11214  ins_encode(Jcc(cop, labl));
11215  ins_pipe(pipe_jcc);
11216  ins_pc_relative(1);
11217%}
11218
11219// Jump Direct Conditional - using unsigned comparison
11220instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl)
11221%{
11222  match(If cop cmp);
11223  effect(USE labl);
11224
11225  ins_cost(300);
11226  format %{ "j$cop,u   $labl" %}
11227  size(6);
11228  opcode(0x0F, 0x80);
11229  ins_encode(Jcc(cop, labl));
11230  ins_pipe(pipe_jcc);
11231  ins_pc_relative(1);
11232%}
11233
11234// ============================================================================
11235// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11236// superklass array for an instance of the superklass.  Set a hidden
11237// internal cache on a hit (cache is checked with exposed code in
11238// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11239// encoding ALSO sets flags.
11240
11241instruct partialSubtypeCheck(rdi_RegP result,
11242                             rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11243                             rFlagsReg cr)
11244%{
11245  match(Set result (PartialSubtypeCheck sub super));
11246  effect(KILL rcx, KILL cr);
11247
11248  ins_cost(1100);  // slightly larger than the next version
11249  format %{ "cmpq    rax, rsi\n\t"
11250            "jeq,s   hit\n\t"
11251            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
11252            "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
11253            "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
11254            "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11255            "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11256            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
11257    "hit:\n\t"
11258            "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11259    "miss:\t" %}
11260
11261  opcode(0x1); // Force a XOR of RDI
11262  ins_encode(enc_PartialSubtypeCheck());
11263  ins_pipe(pipe_slow);
11264%}
11265
11266instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11267                                     rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11268                                     immP0 zero,
11269                                     rdi_RegP result)
11270%{
11271  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11272  predicate(!UseCompressedOops); // decoding oop kills condition codes
11273  effect(KILL rcx, KILL result);
11274
11275  ins_cost(1000);
11276  format %{ "cmpq    rax, rsi\n\t"
11277            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
11278            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
11279            "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
11280            "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
11281            "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11282            "jne,s   miss\t\t# Missed: flags nz\n\t"
11283            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
11284    "miss:\t" %}
11285
11286  opcode(0x0); // No need to XOR RDI
11287  ins_encode(enc_PartialSubtypeCheck());
11288  ins_pipe(pipe_slow);
11289%}
11290
11291// ============================================================================
11292// Branch Instructions -- short offset versions
11293//
11294// These instructions are used to replace jumps of a long offset (the default
11295// match) with jumps of a shorter offset.  These instructions are all tagged
11296// with the ins_short_branch attribute, which causes the ADLC to suppress the
11297// match rules in general matching.  Instead, the ADLC generates a conversion
11298// method in the MachNode which can be used to do in-place replacement of the
11299// long variant with the shorter variant.  The compiler will determine if a
11300// branch can be taken by the is_short_branch_offset() predicate in the machine
11301// specific code section of the file.
11302
11303// Jump Direct - Label defines a relative address from JMP+1
11304instruct jmpDir_short(label labl)
11305%{
11306  match(Goto);
11307  effect(USE labl);
11308
11309  ins_cost(300);
11310  format %{ "jmp,s   $labl" %}
11311  size(2);
11312  opcode(0xEB);
11313  ins_encode(OpcP, LblShort(labl));
11314  ins_pipe(pipe_jmp);
11315  ins_pc_relative(1);
11316  ins_short_branch(1);
11317%}
11318
11319// Jump Direct Conditional - Label defines a relative address from Jcc+1
11320instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl)
11321%{
11322  match(If cop cr);
11323  effect(USE labl);
11324
11325  ins_cost(300);
11326  format %{ "j$cop,s   $labl" %}
11327  size(2);
11328  opcode(0x70);
11329  ins_encode(JccShort(cop, labl));
11330  ins_pipe(pipe_jcc);
11331  ins_pc_relative(1);
11332  ins_short_branch(1);
11333%}
11334
11335// Jump Direct Conditional - Label defines a relative address from Jcc+1
11336instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl)
11337%{
11338  match(CountedLoopEnd cop cr);
11339  effect(USE labl);
11340
11341  ins_cost(300);
11342  format %{ "j$cop,s   $labl" %}
11343  size(2);
11344  opcode(0x70);
11345  ins_encode(JccShort(cop, labl));
11346  ins_pipe(pipe_jcc);
11347  ins_pc_relative(1);
11348  ins_short_branch(1);
11349%}
11350
11351// Jump Direct Conditional - Label defines a relative address from Jcc+1
11352instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl)
11353%{
11354  match(CountedLoopEnd cop cmp);
11355  effect(USE labl);
11356
11357  ins_cost(300);
11358  format %{ "j$cop,us  $labl" %}
11359  size(2);
11360  opcode(0x70);
11361  ins_encode(JccShort(cop, labl));
11362  ins_pipe(pipe_jcc);
11363  ins_pc_relative(1);
11364  ins_short_branch(1);
11365%}
11366
11367// Jump Direct Conditional - using unsigned comparison
11368instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl)
11369%{
11370  match(If cop cmp);
11371  effect(USE labl);
11372
11373  ins_cost(300);
11374  format %{ "j$cop,us  $labl" %}
11375  size(2);
11376  opcode(0x70);
11377  ins_encode(JccShort(cop, labl));
11378  ins_pipe(pipe_jcc);
11379  ins_pc_relative(1);
11380  ins_short_branch(1);
11381%}
11382
11383// ============================================================================
11384// inlined locking and unlocking
11385
11386instruct cmpFastLock(rFlagsReg cr,
11387                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
11388%{
11389  match(Set cr (FastLock object box));
11390  effect(TEMP tmp, TEMP scr);
11391
11392  ins_cost(300);
11393  format %{ "fastlock $object,$box,$tmp,$scr" %}
11394  ins_encode(Fast_Lock(object, box, tmp, scr));
11395  ins_pipe(pipe_slow);
11396  ins_pc_relative(1);
11397%}
11398
11399instruct cmpFastUnlock(rFlagsReg cr,
11400                       rRegP object, rax_RegP box, rRegP tmp)
11401%{
11402  match(Set cr (FastUnlock object box));
11403  effect(TEMP tmp);
11404
11405  ins_cost(300);
11406  format %{ "fastunlock $object, $box, $tmp" %}
11407  ins_encode(Fast_Unlock(object, box, tmp));
11408  ins_pipe(pipe_slow);
11409  ins_pc_relative(1);
11410%}
11411
11412
11413// ============================================================================
11414// Safepoint Instructions
11415instruct safePoint_poll(rFlagsReg cr)
11416%{
11417  match(SafePoint);
11418  effect(KILL cr);
11419
11420  format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
11421            "# Safepoint: poll for GC" %}
11422  size(6); // Opcode + ModRM + Disp32 == 6 bytes
11423  ins_cost(125);
11424  ins_encode(enc_safepoint_poll);
11425  ins_pipe(ialu_reg_mem);
11426%}
11427
11428// ============================================================================
11429// Procedure Call/Return Instructions
11430// Call Java Static Instruction
11431// Note: If this code changes, the corresponding ret_addr_offset() and
11432//       compute_padding() functions will have to be adjusted.
11433instruct CallStaticJavaDirect(method meth)
11434%{
11435  match(CallStaticJava);
11436  effect(USE meth);
11437
11438  ins_cost(300);
11439  format %{ "call,static " %}
11440  opcode(0xE8); /* E8 cd */
11441  ins_encode(Java_Static_Call(meth), call_epilog);
11442  ins_pipe(pipe_slow);
11443  ins_pc_relative(1);
11444  ins_alignment(4);
11445%}
11446
11447// Call Java Dynamic Instruction
11448// Note: If this code changes, the corresponding ret_addr_offset() and
11449//       compute_padding() functions will have to be adjusted.
11450instruct CallDynamicJavaDirect(method meth)
11451%{
11452  match(CallDynamicJava);
11453  effect(USE meth);
11454
11455  ins_cost(300);
11456  format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11457            "call,dynamic " %}
11458  opcode(0xE8); /* E8 cd */
11459  ins_encode(Java_Dynamic_Call(meth), call_epilog);
11460  ins_pipe(pipe_slow);
11461  ins_pc_relative(1);
11462  ins_alignment(4);
11463%}
11464
11465// Call Runtime Instruction
11466instruct CallRuntimeDirect(method meth)
11467%{
11468  match(CallRuntime);
11469  effect(USE meth);
11470
11471  ins_cost(300);
11472  format %{ "call,runtime " %}
11473  opcode(0xE8); /* E8 cd */
11474  ins_encode(Java_To_Runtime(meth));
11475  ins_pipe(pipe_slow);
11476  ins_pc_relative(1);
11477%}
11478
11479// Call runtime without safepoint
11480instruct CallLeafDirect(method meth)
11481%{
11482  match(CallLeaf);
11483  effect(USE meth);
11484
11485  ins_cost(300);
11486  format %{ "call_leaf,runtime " %}
11487  opcode(0xE8); /* E8 cd */
11488  ins_encode(Java_To_Runtime(meth));
11489  ins_pipe(pipe_slow);
11490  ins_pc_relative(1);
11491%}
11492
11493// Call runtime without safepoint
11494instruct CallLeafNoFPDirect(method meth)
11495%{
11496  match(CallLeafNoFP);
11497  effect(USE meth);
11498
11499  ins_cost(300);
11500  format %{ "call_leaf_nofp,runtime " %}
11501  opcode(0xE8); /* E8 cd */
11502  ins_encode(Java_To_Runtime(meth));
11503  ins_pipe(pipe_slow);
11504  ins_pc_relative(1);
11505%}
11506
11507// Return Instruction
11508// Remove the return address & jump to it.
11509// Notice: We always emit a nop after a ret to make sure there is room
11510// for safepoint patching
11511instruct Ret()
11512%{
11513  match(Return);
11514
11515  format %{ "ret" %}
11516  opcode(0xC3);
11517  ins_encode(OpcP);
11518  ins_pipe(pipe_jmp);
11519%}
11520
11521// Tail Call; Jump from runtime stub to Java code.
11522// Also known as an 'interprocedural jump'.
11523// Target of jump will eventually return to caller.
11524// TailJump below removes the return address.
11525instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11526%{
11527  match(TailCall jump_target method_oop);
11528
11529  ins_cost(300);
11530  format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11531  opcode(0xFF, 0x4); /* Opcode FF /4 */
11532  ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11533  ins_pipe(pipe_jmp);
11534%}
11535
11536// Tail Jump; remove the return address; jump to target.
11537// TailCall above leaves the return address around.
11538instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11539%{
11540  match(TailJump jump_target ex_oop);
11541
11542  ins_cost(300);
11543  format %{ "popq    rdx\t# pop return address\n\t"
11544            "jmp     $jump_target" %}
11545  opcode(0xFF, 0x4); /* Opcode FF /4 */
11546  ins_encode(Opcode(0x5a), // popq rdx
11547             REX_reg(jump_target), OpcP, reg_opc(jump_target));
11548  ins_pipe(pipe_jmp);
11549%}
11550
11551// Create exception oop: created by stack-crawling runtime code.
11552// Created exception is now available to this handler, and is setup
11553// just prior to jumping to this handler.  No code emitted.
11554instruct CreateException(rax_RegP ex_oop)
11555%{
11556  match(Set ex_oop (CreateEx));
11557
11558  size(0);
11559  // use the following format syntax
11560  format %{ "# exception oop is in rax; no code emitted" %}
11561  ins_encode();
11562  ins_pipe(empty);
11563%}
11564
11565// Rethrow exception:
11566// The exception oop will come in the first argument position.
11567// Then JUMP (not call) to the rethrow stub code.
11568instruct RethrowException()
11569%{
11570  match(Rethrow);
11571
11572  // use the following format syntax
11573  format %{ "jmp     rethrow_stub" %}
11574  ins_encode(enc_rethrow);
11575  ins_pipe(pipe_jmp);
11576%}
11577
11578
11579//----------PEEPHOLE RULES-----------------------------------------------------
11580// These must follow all instruction definitions as they use the names
11581// defined in the instructions definitions.
11582//
11583// peepmatch ( root_instr_name [precerding_instruction]* );
11584//
11585// peepconstraint %{
11586// (instruction_number.operand_name relational_op instruction_number.operand_name
11587//  [, ...] );
11588// // instruction numbers are zero-based using left to right order in peepmatch
11589//
11590// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11591// // provide an instruction_number.operand_name for each operand that appears
11592// // in the replacement instruction's match rule
11593//
11594// ---------VM FLAGS---------------------------------------------------------
11595//
11596// All peephole optimizations can be turned off using -XX:-OptoPeephole
11597//
11598// Each peephole rule is given an identifying number starting with zero and
11599// increasing by one in the order seen by the parser.  An individual peephole
11600// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11601// on the command-line.
11602//
11603// ---------CURRENT LIMITATIONS----------------------------------------------
11604//
11605// Only match adjacent instructions in same basic block
11606// Only equality constraints
11607// Only constraints between operands, not (0.dest_reg == RAX_enc)
11608// Only one replacement instruction
11609//
11610// ---------EXAMPLE----------------------------------------------------------
11611//
11612// // pertinent parts of existing instructions in architecture description
11613// instruct movI(rRegI dst, rRegI src)
11614// %{
11615//   match(Set dst (CopyI src));
11616// %}
11617//
11618// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11619// %{
11620//   match(Set dst (AddI dst src));
11621//   effect(KILL cr);
11622// %}
11623//
11624// // Change (inc mov) to lea
11625// peephole %{
11626//   // increment preceeded by register-register move
11627//   peepmatch ( incI_rReg movI );
11628//   // require that the destination register of the increment
11629//   // match the destination register of the move
11630//   peepconstraint ( 0.dst == 1.dst );
11631//   // construct a replacement instruction that sets
11632//   // the destination to ( move's source register + one )
11633//   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11634// %}
11635//
11636
11637// Implementation no longer uses movX instructions since
11638// machine-independent system no longer uses CopyX nodes.
11639//
11640// peephole
11641// %{
11642//   peepmatch (incI_rReg movI);
11643//   peepconstraint (0.dst == 1.dst);
11644//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11645// %}
11646
11647// peephole
11648// %{
11649//   peepmatch (decI_rReg movI);
11650//   peepconstraint (0.dst == 1.dst);
11651//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11652// %}
11653
11654// peephole
11655// %{
11656//   peepmatch (addI_rReg_imm movI);
11657//   peepconstraint (0.dst == 1.dst);
11658//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11659// %}
11660
11661// peephole
11662// %{
11663//   peepmatch (incL_rReg movL);
11664//   peepconstraint (0.dst == 1.dst);
11665//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11666// %}
11667
11668// peephole
11669// %{
11670//   peepmatch (decL_rReg movL);
11671//   peepconstraint (0.dst == 1.dst);
11672//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11673// %}
11674
11675// peephole
11676// %{
11677//   peepmatch (addL_rReg_imm movL);
11678//   peepconstraint (0.dst == 1.dst);
11679//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11680// %}
11681
11682// peephole
11683// %{
11684//   peepmatch (addP_rReg_imm movP);
11685//   peepconstraint (0.dst == 1.dst);
11686//   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11687// %}
11688
11689// // Change load of spilled value to only a spill
11690// instruct storeI(memory mem, rRegI src)
11691// %{
11692//   match(Set mem (StoreI mem src));
11693// %}
11694//
11695// instruct loadI(rRegI dst, memory mem)
11696// %{
11697//   match(Set dst (LoadI mem));
11698// %}
11699//
11700
11701peephole
11702%{
11703  peepmatch (loadI storeI);
11704  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11705  peepreplace (storeI(1.mem 1.mem 1.src));
11706%}
11707
11708peephole
11709%{
11710  peepmatch (loadL storeL);
11711  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11712  peepreplace (storeL(1.mem 1.mem 1.src));
11713%}
11714
11715//----------SMARTSPILL RULES---------------------------------------------------
11716// These must follow all instruction definitions as they use the names
11717// defined in the instructions definitions.
11718