x86_64.ad revision 647:bd441136a5ce
1148456Spjd//
2220922Spjd// Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
3148456Spjd// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4148456Spjd//
5148456Spjd// This code is free software; you can redistribute it and/or modify it
6148456Spjd// under the terms of the GNU General Public License version 2 only, as
7148456Spjd// published by the Free Software Foundation.
8148456Spjd//
9148456Spjd// This code is distributed in the hope that it will be useful, but WITHOUT
10148456Spjd// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11148456Spjd// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12148456Spjd// version 2 for more details (a copy is included in the LICENSE file that
13155174Spjd// accompanied this code).
14148456Spjd//
15148456Spjd// You should have received a copy of the GNU General Public License version
16148456Spjd// 2 along with this work; if not, write to the Free Software Foundation,
17148456Spjd// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18148456Spjd//
19148456Spjd// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20148456Spjd// CA 95054 USA or visit www.sun.com if you need additional information or
21148456Spjd// have any questions.
22148456Spjd//
23148456Spjd//
24148456Spjd
25148456Spjd// AMD64 Architecture Description File
26148456Spjd
27148456Spjd//----------REGISTER DEFINITION BLOCK------------------------------------------
28148456Spjd// This information is used by the matcher and the register allocator to
29148456Spjd// describe individual registers and classes of registers within the target
30148456Spjd// archtecture.
31148456Spjd
32148456Spjdregister %{
33148456Spjd//----------Architecture Description Register Definitions----------------------
34148456Spjd// General Registers
35148456Spjd// "reg_def"  name ( register save type, C convention save type,
36148456Spjd//                   ideal register type, encoding );
37148456Spjd// Register Save Types:
38148456Spjd//
39148456Spjd// NS  = No-Save:       The register allocator assumes that these registers
40148456Spjd//                      can be used without saving upon entry to the method, &
41148456Spjd//                      that they do not need to be saved at call sites.
42148456Spjd//
43148456Spjd// SOC = Save-On-Call:  The register allocator assumes that these registers
44148456Spjd//                      can be used without saving upon entry to the method,
45148456Spjd//                      but that they must be saved at call sites.
46213067Spjd//
47213067Spjd// SOE = Save-On-Entry: The register allocator assumes that these registers
48213067Spjd//                      must be saved before using them upon entry to the
49148456Spjd//                      method, but they do not need to be saved at call
50148456Spjd//                      sites.
51148456Spjd//
52148456Spjd// AS  = Always-Save:   The register allocator assumes that these registers
53148456Spjd//                      must be saved before using them upon entry to the
54148456Spjd//                      method, & that they must be saved at call sites.
55148456Spjd//
56148456Spjd// Ideal Register Type is used to determine how to save & restore a
57148456Spjd// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58148456Spjd// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59148456Spjd//
60148456Spjd// The encoding number is the actual bit-pattern placed into the opcodes.
61148456Spjd
62148456Spjd// General Registers
63148456Spjd// R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
64148456Spjd// used as byte registers)
65148456Spjd
66148456Spjd// Previously set RBX, RSI, and RDI as save-on-entry for java code
67148456Spjd// Turn off SOE in java-code due to frequent use of uncommon-traps.
68148456Spjd// Now that allocator is better, turn on RSI and RDI as SOE registers.
69148456Spjd
70148456Spjdreg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
71148456Spjdreg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
72148456Spjd
73148456Spjdreg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
74148456Spjdreg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
75148456Spjd
76148456Spjdreg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
77148456Spjdreg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
78148456Spjd
79148456Spjdreg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
80148456Spjdreg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
81148456Spjd
82148456Spjdreg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
83148456Spjdreg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
84148456Spjd
85148456Spjd// now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86148456Spjdreg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
87148456Spjdreg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
88148456Spjd
89148456Spjd#ifdef _WIN64
90148456Spjd
91148456Spjdreg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
92148456Spjdreg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
93148456Spjd
94148456Spjdreg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
95148456Spjdreg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
96148456Spjd
97148456Spjd#else
98148456Spjd
99148456Spjdreg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
100148456Spjdreg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
101148456Spjd
102148456Spjdreg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
103148456Spjdreg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
104148456Spjd
105148456Spjd#endif
106148456Spjd
107148456Spjdreg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
108148456Spjdreg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
109148456Spjd
110148456Spjdreg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
111148456Spjdreg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
112148456Spjd
113148456Spjdreg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114148456Spjdreg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115148456Spjd
116148456Spjdreg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117148456Spjdreg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118148456Spjd
119148456Spjdreg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120148456Spjdreg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121148456Spjd
122148456Spjdreg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123148456Spjdreg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124148456Spjd
125148456Spjdreg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126148456Spjdreg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127148456Spjd
128159307Spjdreg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129148456Spjdreg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130148456Spjd
131159307Spjd
132148456Spjd// Floating Point Registers
133148456Spjd
134148456Spjd// XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
135148456Spjd// Word a in each register holds a Float, words ab hold a Double.  We
136148456Spjd// currently do not use the SIMD capabilities, so registers cd are
137148456Spjd// unused at the moment.
138148456Spjd// XMM8-XMM15 must be encoded with REX.
139148456Spjd// Linux ABI:   No register preserved across function calls
140148456Spjd//              XMM0-XMM7 might hold parameters
141148456Spjd// Windows ABI: XMM6-XMM15 preserved across function calls
142148456Spjd//              XMM0-XMM3 might hold parameters
143148456Spjd
144148456Spjdreg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
145148456Spjdreg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
146148456Spjd
147148456Spjdreg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
148148456Spjdreg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
149148456Spjd
150148456Spjdreg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
151148456Spjdreg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
152148456Spjd
153148456Spjdreg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
154148456Spjdreg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
155148456Spjd
156148456Spjdreg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
157148456Spjdreg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
158148456Spjd
159148456Spjdreg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
160148456Spjdreg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
161148456Spjd
162148456Spjd#ifdef _WIN64
163148456Spjd
164148456Spjdreg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
165148456Spjdreg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
166148456Spjd
167148456Spjdreg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
168148456Spjdreg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
169148456Spjd
170148456Spjdreg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
171148456Spjdreg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
172148456Spjd
173148456Spjdreg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
174148456Spjdreg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
175148456Spjd
176148456Spjdreg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
177148456Spjdreg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
178148456Spjd
179148456Spjdreg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
180148456Spjdreg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
181148456Spjd
182159307Spjdreg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
183159307Spjdreg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
184159307Spjd
185159307Spjdreg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
186159307Spjdreg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
187159307Spjd
188159307Spjdreg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
189159307Spjdreg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
190159307Spjd
191159307Spjdreg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
192159307Spjdreg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
193159307Spjd
194159307Spjd#else
195159307Spjd
196159307Spjdreg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
197159307Spjdreg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
198159307Spjd
199213067Spjdreg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
200238114Spjdreg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
201213067Spjd
202213067Spjdreg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
203213067Spjdreg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
204213067Spjd
205159307Spjdreg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
206213067Spjdreg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
207159307Spjd
208159307Spjdreg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
209220922Spjdreg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
210220922Spjd
211214225Spjdreg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
212238114Spjdreg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
213214225Spjd
214214225Spjdreg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
215214225Spjdreg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
216214225Spjd
217214225Spjdreg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
218214225Spjdreg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
219214225Spjd
220214225Spjdreg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
221214225Spjdreg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
222214225Spjd
223214225Spjdreg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
224214225Spjdreg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
225214225Spjd
226214225Spjd#endif // _WIN64
227214225Spjd
228214225Spjdreg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
229214225Spjd
230214225Spjd// Specify priority of register selection within phases of register
231214225Spjd// allocation.  Highest priority is first.  A useful heuristic is to
232214225Spjd// give registers a low priority when they are required by machine
233214225Spjd// instructions, like EAX and EDX on I486, and choose no-save registers
234214225Spjd// before save-on-call, & save-on-call before save-on-entry.  Registers
235214225Spjd// which participate in fixed calling sequences should come last.
236159307Spjd// Registers which are used as pairs must fall on an even boundary.
237159307Spjd
238alloc_class chunk0(R10,         R10_H,
239                   R11,         R11_H,
240                   R8,          R8_H,
241                   R9,          R9_H,
242                   R12,         R12_H,
243                   RCX,         RCX_H,
244                   RBX,         RBX_H,
245                   RDI,         RDI_H,
246                   RDX,         RDX_H,
247                   RSI,         RSI_H,
248                   RAX,         RAX_H,
249                   RBP,         RBP_H,
250                   R13,         R13_H,
251                   R14,         R14_H,
252                   R15,         R15_H,
253                   RSP,         RSP_H);
254
255// XXX probably use 8-15 first on Linux
256alloc_class chunk1(XMM0,  XMM0_H,
257                   XMM1,  XMM1_H,
258                   XMM2,  XMM2_H,
259                   XMM3,  XMM3_H,
260                   XMM4,  XMM4_H,
261                   XMM5,  XMM5_H,
262                   XMM6,  XMM6_H,
263                   XMM7,  XMM7_H,
264                   XMM8,  XMM8_H,
265                   XMM9,  XMM9_H,
266                   XMM10, XMM10_H,
267                   XMM11, XMM11_H,
268                   XMM12, XMM12_H,
269                   XMM13, XMM13_H,
270                   XMM14, XMM14_H,
271                   XMM15, XMM15_H);
272
273alloc_class chunk2(RFLAGS);
274
275
276//----------Architecture Description Register Classes--------------------------
277// Several register classes are automatically defined based upon information in
278// this architecture description.
279// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
280// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
281// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
282// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
283//
284
285// Class for all pointer registers (including RSP)
286reg_class any_reg(RAX, RAX_H,
287                  RDX, RDX_H,
288                  RBP, RBP_H,
289                  RDI, RDI_H,
290                  RSI, RSI_H,
291                  RCX, RCX_H,
292                  RBX, RBX_H,
293                  RSP, RSP_H,
294                  R8,  R8_H,
295                  R9,  R9_H,
296                  R10, R10_H,
297                  R11, R11_H,
298                  R12, R12_H,
299                  R13, R13_H,
300                  R14, R14_H,
301                  R15, R15_H);
302
303// Class for all pointer registers except RSP
304reg_class ptr_reg(RAX, RAX_H,
305                  RDX, RDX_H,
306                  RBP, RBP_H,
307                  RDI, RDI_H,
308                  RSI, RSI_H,
309                  RCX, RCX_H,
310                  RBX, RBX_H,
311                  R8,  R8_H,
312                  R9,  R9_H,
313                  R10, R10_H,
314                  R11, R11_H,
315                  R13, R13_H,
316                  R14, R14_H);
317
318// Class for all pointer registers except RAX and RSP
319reg_class ptr_no_rax_reg(RDX, RDX_H,
320                         RBP, RBP_H,
321                         RDI, RDI_H,
322                         RSI, RSI_H,
323                         RCX, RCX_H,
324                         RBX, RBX_H,
325                         R8,  R8_H,
326                         R9,  R9_H,
327                         R10, R10_H,
328                         R11, R11_H,
329                         R13, R13_H,
330                         R14, R14_H);
331
332reg_class ptr_no_rbp_reg(RDX, RDX_H,
333                         RAX, RAX_H,
334                         RDI, RDI_H,
335                         RSI, RSI_H,
336                         RCX, RCX_H,
337                         RBX, RBX_H,
338                         R8,  R8_H,
339                         R9,  R9_H,
340                         R10, R10_H,
341                         R11, R11_H,
342                         R13, R13_H,
343                         R14, R14_H);
344
345// Class for all pointer registers except RAX, RBX and RSP
346reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
347                             RBP, RBP_H,
348                             RDI, RDI_H,
349                             RSI, RSI_H,
350                             RCX, RCX_H,
351                             R8,  R8_H,
352                             R9,  R9_H,
353                             R10, R10_H,
354                             R11, R11_H,
355                             R13, R13_H,
356                             R14, R14_H);
357
358// Singleton class for RAX pointer register
359reg_class ptr_rax_reg(RAX, RAX_H);
360
361// Singleton class for RBX pointer register
362reg_class ptr_rbx_reg(RBX, RBX_H);
363
364// Singleton class for RSI pointer register
365reg_class ptr_rsi_reg(RSI, RSI_H);
366
367// Singleton class for RDI pointer register
368reg_class ptr_rdi_reg(RDI, RDI_H);
369
370// Singleton class for RBP pointer register
371reg_class ptr_rbp_reg(RBP, RBP_H);
372
373// Singleton class for stack pointer
374reg_class ptr_rsp_reg(RSP, RSP_H);
375
376// Singleton class for TLS pointer
377reg_class ptr_r15_reg(R15, R15_H);
378
379// Class for all long registers (except RSP)
380reg_class long_reg(RAX, RAX_H,
381                   RDX, RDX_H,
382                   RBP, RBP_H,
383                   RDI, RDI_H,
384                   RSI, RSI_H,
385                   RCX, RCX_H,
386                   RBX, RBX_H,
387                   R8,  R8_H,
388                   R9,  R9_H,
389                   R10, R10_H,
390                   R11, R11_H,
391                   R13, R13_H,
392                   R14, R14_H);
393
394// Class for all long registers except RAX, RDX (and RSP)
395reg_class long_no_rax_rdx_reg(RBP, RBP_H,
396                              RDI, RDI_H,
397                              RSI, RSI_H,
398                              RCX, RCX_H,
399                              RBX, RBX_H,
400                              R8,  R8_H,
401                              R9,  R9_H,
402                              R10, R10_H,
403                              R11, R11_H,
404                              R13, R13_H,
405                              R14, R14_H);
406
407// Class for all long registers except RCX (and RSP)
408reg_class long_no_rcx_reg(RBP, RBP_H,
409                          RDI, RDI_H,
410                          RSI, RSI_H,
411                          RAX, RAX_H,
412                          RDX, RDX_H,
413                          RBX, RBX_H,
414                          R8,  R8_H,
415                          R9,  R9_H,
416                          R10, R10_H,
417                          R11, R11_H,
418                          R13, R13_H,
419                          R14, R14_H);
420
421// Class for all long registers except RAX (and RSP)
422reg_class long_no_rax_reg(RBP, RBP_H,
423                          RDX, RDX_H,
424                          RDI, RDI_H,
425                          RSI, RSI_H,
426                          RCX, RCX_H,
427                          RBX, RBX_H,
428                          R8,  R8_H,
429                          R9,  R9_H,
430                          R10, R10_H,
431                          R11, R11_H,
432                          R13, R13_H,
433                          R14, R14_H);
434
435// Singleton class for RAX long register
436reg_class long_rax_reg(RAX, RAX_H);
437
438// Singleton class for RCX long register
439reg_class long_rcx_reg(RCX, RCX_H);
440
441// Singleton class for RDX long register
442reg_class long_rdx_reg(RDX, RDX_H);
443
444// Class for all int registers (except RSP)
445reg_class int_reg(RAX,
446                  RDX,
447                  RBP,
448                  RDI,
449                  RSI,
450                  RCX,
451                  RBX,
452                  R8,
453                  R9,
454                  R10,
455                  R11,
456                  R13,
457                  R14);
458
459// Class for all int registers except RCX (and RSP)
460reg_class int_no_rcx_reg(RAX,
461                         RDX,
462                         RBP,
463                         RDI,
464                         RSI,
465                         RBX,
466                         R8,
467                         R9,
468                         R10,
469                         R11,
470                         R13,
471                         R14);
472
473// Class for all int registers except RAX, RDX (and RSP)
474reg_class int_no_rax_rdx_reg(RBP,
475                             RDI,
476                             RSI,
477                             RCX,
478                             RBX,
479                             R8,
480                             R9,
481                             R10,
482                             R11,
483                             R13,
484                             R14);
485
486// Singleton class for RAX int register
487reg_class int_rax_reg(RAX);
488
489// Singleton class for RBX int register
490reg_class int_rbx_reg(RBX);
491
492// Singleton class for RCX int register
493reg_class int_rcx_reg(RCX);
494
495// Singleton class for RCX int register
496reg_class int_rdx_reg(RDX);
497
498// Singleton class for RCX int register
499reg_class int_rdi_reg(RDI);
500
501// Singleton class for instruction pointer
502// reg_class ip_reg(RIP);
503
504// Singleton class for condition codes
505reg_class int_flags(RFLAGS);
506
507// Class for all float registers
508reg_class float_reg(XMM0,
509                    XMM1,
510                    XMM2,
511                    XMM3,
512                    XMM4,
513                    XMM5,
514                    XMM6,
515                    XMM7,
516                    XMM8,
517                    XMM9,
518                    XMM10,
519                    XMM11,
520                    XMM12,
521                    XMM13,
522                    XMM14,
523                    XMM15);
524
525// Class for all double registers
526reg_class double_reg(XMM0,  XMM0_H,
527                     XMM1,  XMM1_H,
528                     XMM2,  XMM2_H,
529                     XMM3,  XMM3_H,
530                     XMM4,  XMM4_H,
531                     XMM5,  XMM5_H,
532                     XMM6,  XMM6_H,
533                     XMM7,  XMM7_H,
534                     XMM8,  XMM8_H,
535                     XMM9,  XMM9_H,
536                     XMM10, XMM10_H,
537                     XMM11, XMM11_H,
538                     XMM12, XMM12_H,
539                     XMM13, XMM13_H,
540                     XMM14, XMM14_H,
541                     XMM15, XMM15_H);
542%}
543
544
545//----------SOURCE BLOCK-------------------------------------------------------
546// This is a block of C++ code which provides values, functions, and
547// definitions necessary in the rest of the architecture description
548source %{
549#define   RELOC_IMM64    Assembler::imm_operand
550#define   RELOC_DISP32   Assembler::disp32_operand
551
552#define __ _masm.
553
554// !!!!! Special hack to get all types of calls to specify the byte offset
555//       from the start of the call to the point where the return address
556//       will point.
557int MachCallStaticJavaNode::ret_addr_offset()
558{
559  return 5; // 5 bytes from start of call to where return address points
560}
561
562int MachCallDynamicJavaNode::ret_addr_offset()
563{
564  return 15; // 15 bytes from start of call to where return address points
565}
566
567// In os_cpu .ad file
568// int MachCallRuntimeNode::ret_addr_offset()
569
570// Indicate if the safepoint node needs the polling page as an input.
571// Since amd64 does not have absolute addressing but RIP-relative
572// addressing and the polling page is within 2G, it doesn't.
573bool SafePointNode::needs_polling_address_input()
574{
575  return false;
576}
577
578//
579// Compute padding required for nodes which need alignment
580//
581
582// The address of the call instruction needs to be 4-byte aligned to
583// ensure that it does not span a cache line so that it can be patched.
584int CallStaticJavaDirectNode::compute_padding(int current_offset) const
585{
586  current_offset += 1; // skip call opcode byte
587  return round_to(current_offset, alignment_required()) - current_offset;
588}
589
590// The address of the call instruction needs to be 4-byte aligned to
591// ensure that it does not span a cache line so that it can be patched.
592int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
593{
594  current_offset += 11; // skip movq instruction + call opcode byte
595  return round_to(current_offset, alignment_required()) - current_offset;
596}
597
598#ifndef PRODUCT
599void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
600{
601  st->print("INT3");
602}
603#endif
604
605// EMIT_RM()
606void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
607{
608  unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
609  *(cbuf.code_end()) = c;
610  cbuf.set_code_end(cbuf.code_end() + 1);
611}
612
613// EMIT_CC()
614void emit_cc(CodeBuffer &cbuf, int f1, int f2)
615{
616  unsigned char c = (unsigned char) (f1 | f2);
617  *(cbuf.code_end()) = c;
618  cbuf.set_code_end(cbuf.code_end() + 1);
619}
620
621// EMIT_OPCODE()
622void emit_opcode(CodeBuffer &cbuf, int code)
623{
624  *(cbuf.code_end()) = (unsigned char) code;
625  cbuf.set_code_end(cbuf.code_end() + 1);
626}
627
628// EMIT_OPCODE() w/ relocation information
629void emit_opcode(CodeBuffer &cbuf,
630                 int code, relocInfo::relocType reloc, int offset, int format)
631{
632  cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
633  emit_opcode(cbuf, code);
634}
635
636// EMIT_D8()
637void emit_d8(CodeBuffer &cbuf, int d8)
638{
639  *(cbuf.code_end()) = (unsigned char) d8;
640  cbuf.set_code_end(cbuf.code_end() + 1);
641}
642
643// EMIT_D16()
644void emit_d16(CodeBuffer &cbuf, int d16)
645{
646  *((short *)(cbuf.code_end())) = d16;
647  cbuf.set_code_end(cbuf.code_end() + 2);
648}
649
650// EMIT_D32()
651void emit_d32(CodeBuffer &cbuf, int d32)
652{
653  *((int *)(cbuf.code_end())) = d32;
654  cbuf.set_code_end(cbuf.code_end() + 4);
655}
656
657// EMIT_D64()
658void emit_d64(CodeBuffer &cbuf, int64_t d64)
659{
660  *((int64_t*) (cbuf.code_end())) = d64;
661  cbuf.set_code_end(cbuf.code_end() + 8);
662}
663
664// emit 32 bit value and construct relocation entry from relocInfo::relocType
665void emit_d32_reloc(CodeBuffer& cbuf,
666                    int d32,
667                    relocInfo::relocType reloc,
668                    int format)
669{
670  assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
671  cbuf.relocate(cbuf.inst_mark(), reloc, format);
672
673  *((int*) (cbuf.code_end())) = d32;
674  cbuf.set_code_end(cbuf.code_end() + 4);
675}
676
677// emit 32 bit value and construct relocation entry from RelocationHolder
678void emit_d32_reloc(CodeBuffer& cbuf,
679                    int d32,
680                    RelocationHolder const& rspec,
681                    int format)
682{
683#ifdef ASSERT
684  if (rspec.reloc()->type() == relocInfo::oop_type &&
685      d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
686    assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
687  }
688#endif
689  cbuf.relocate(cbuf.inst_mark(), rspec, format);
690
691  *((int* )(cbuf.code_end())) = d32;
692  cbuf.set_code_end(cbuf.code_end() + 4);
693}
694
695void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
696  address next_ip = cbuf.code_end() + 4;
697  emit_d32_reloc(cbuf, (int) (addr - next_ip),
698                 external_word_Relocation::spec(addr),
699                 RELOC_DISP32);
700}
701
702
703// emit 64 bit value and construct relocation entry from relocInfo::relocType
704void emit_d64_reloc(CodeBuffer& cbuf,
705                    int64_t d64,
706                    relocInfo::relocType reloc,
707                    int format)
708{
709  cbuf.relocate(cbuf.inst_mark(), reloc, format);
710
711  *((int64_t*) (cbuf.code_end())) = d64;
712  cbuf.set_code_end(cbuf.code_end() + 8);
713}
714
715// emit 64 bit value and construct relocation entry from RelocationHolder
716void emit_d64_reloc(CodeBuffer& cbuf,
717                    int64_t d64,
718                    RelocationHolder const& rspec,
719                    int format)
720{
721#ifdef ASSERT
722  if (rspec.reloc()->type() == relocInfo::oop_type &&
723      d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
724    assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
725           "cannot embed non-perm oops in code");
726  }
727#endif
728  cbuf.relocate(cbuf.inst_mark(), rspec, format);
729
730  *((int64_t*) (cbuf.code_end())) = d64;
731  cbuf.set_code_end(cbuf.code_end() + 8);
732}
733
734// Access stack slot for load or store
735void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
736{
737  emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
738  if (-0x80 <= disp && disp < 0x80) {
739    emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
740    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
741    emit_d8(cbuf, disp);     // Displacement  // R/M byte
742  } else {
743    emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
744    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
745    emit_d32(cbuf, disp);     // Displacement // R/M byte
746  }
747}
748
749   // rRegI ereg, memory mem) %{    // emit_reg_mem
750void encode_RegMem(CodeBuffer &cbuf,
751                   int reg,
752                   int base, int index, int scale, int disp, bool disp_is_oop)
753{
754  assert(!disp_is_oop, "cannot have disp");
755  int regenc = reg & 7;
756  int baseenc = base & 7;
757  int indexenc = index & 7;
758
759  // There is no index & no scale, use form without SIB byte
760  if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
761    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
762    if (disp == 0 && base != RBP_enc && base != R13_enc) {
763      emit_rm(cbuf, 0x0, regenc, baseenc); // *
764    } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
765      // If 8-bit displacement, mode 0x1
766      emit_rm(cbuf, 0x1, regenc, baseenc); // *
767      emit_d8(cbuf, disp);
768    } else {
769      // If 32-bit displacement
770      if (base == -1) { // Special flag for absolute address
771        emit_rm(cbuf, 0x0, regenc, 0x5); // *
772        if (disp_is_oop) {
773          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
774        } else {
775          emit_d32(cbuf, disp);
776        }
777      } else {
778        // Normal base + offset
779        emit_rm(cbuf, 0x2, regenc, baseenc); // *
780        if (disp_is_oop) {
781          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
782        } else {
783          emit_d32(cbuf, disp);
784        }
785      }
786    }
787  } else {
788    // Else, encode with the SIB byte
789    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
790    if (disp == 0 && base != RBP_enc && base != R13_enc) {
791      // If no displacement
792      emit_rm(cbuf, 0x0, regenc, 0x4); // *
793      emit_rm(cbuf, scale, indexenc, baseenc);
794    } else {
795      if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
796        // If 8-bit displacement, mode 0x1
797        emit_rm(cbuf, 0x1, regenc, 0x4); // *
798        emit_rm(cbuf, scale, indexenc, baseenc);
799        emit_d8(cbuf, disp);
800      } else {
801        // If 32-bit displacement
802        if (base == 0x04 ) {
803          emit_rm(cbuf, 0x2, regenc, 0x4);
804          emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
805        } else {
806          emit_rm(cbuf, 0x2, regenc, 0x4);
807          emit_rm(cbuf, scale, indexenc, baseenc); // *
808        }
809        if (disp_is_oop) {
810          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
811        } else {
812          emit_d32(cbuf, disp);
813        }
814      }
815    }
816  }
817}
818
819void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
820{
821  if (dstenc != srcenc) {
822    if (dstenc < 8) {
823      if (srcenc >= 8) {
824        emit_opcode(cbuf, Assembler::REX_B);
825        srcenc -= 8;
826      }
827    } else {
828      if (srcenc < 8) {
829        emit_opcode(cbuf, Assembler::REX_R);
830      } else {
831        emit_opcode(cbuf, Assembler::REX_RB);
832        srcenc -= 8;
833      }
834      dstenc -= 8;
835    }
836
837    emit_opcode(cbuf, 0x8B);
838    emit_rm(cbuf, 0x3, dstenc, srcenc);
839  }
840}
841
842void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
843  if( dst_encoding == src_encoding ) {
844    // reg-reg copy, use an empty encoding
845  } else {
846    MacroAssembler _masm(&cbuf);
847
848    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
849  }
850}
851
852
853//=============================================================================
854#ifndef PRODUCT
855void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
856{
857  Compile* C = ra_->C;
858
859  int framesize = C->frame_slots() << LogBytesPerInt;
860  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
861  // Remove wordSize for return adr already pushed
862  // and another for the RBP we are going to save
863  framesize -= 2*wordSize;
864  bool need_nop = true;
865
866  // Calls to C2R adapters often do not accept exceptional returns.
867  // We require that their callers must bang for them.  But be
868  // careful, because some VM calls (such as call site linkage) can
869  // use several kilobytes of stack.  But the stack safety zone should
870  // account for that.  See bugs 4446381, 4468289, 4497237.
871  if (C->need_stack_bang(framesize)) {
872    st->print_cr("# stack bang"); st->print("\t");
873    need_nop = false;
874  }
875  st->print_cr("pushq   rbp"); st->print("\t");
876
877  if (VerifyStackAtCalls) {
878    // Majik cookie to verify stack depth
879    st->print_cr("pushq   0xffffffffbadb100d"
880                  "\t# Majik cookie for stack depth check");
881    st->print("\t");
882    framesize -= wordSize; // Remove 2 for cookie
883    need_nop = false;
884  }
885
886  if (framesize) {
887    st->print("subq    rsp, #%d\t# Create frame", framesize);
888    if (framesize < 0x80 && need_nop) {
889      st->print("\n\tnop\t# nop for patch_verified_entry");
890    }
891  }
892}
893#endif
894
895void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
896{
897  Compile* C = ra_->C;
898
899  // WARNING: Initial instruction MUST be 5 bytes or longer so that
900  // NativeJump::patch_verified_entry will be able to patch out the entry
901  // code safely. The fldcw is ok at 6 bytes, the push to verify stack
902  // depth is ok at 5 bytes, the frame allocation can be either 3 or
903  // 6 bytes. So if we don't do the fldcw or the push then we must
904  // use the 6 byte frame allocation even if we have no frame. :-(
905  // If method sets FPU control word do it now
906
907  int framesize = C->frame_slots() << LogBytesPerInt;
908  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
909  // Remove wordSize for return adr already pushed
910  // and another for the RBP we are going to save
911  framesize -= 2*wordSize;
912  bool need_nop = true;
913
914  // Calls to C2R adapters often do not accept exceptional returns.
915  // We require that their callers must bang for them.  But be
916  // careful, because some VM calls (such as call site linkage) can
917  // use several kilobytes of stack.  But the stack safety zone should
918  // account for that.  See bugs 4446381, 4468289, 4497237.
919  if (C->need_stack_bang(framesize)) {
920    MacroAssembler masm(&cbuf);
921    masm.generate_stack_overflow_check(framesize);
922    need_nop = false;
923  }
924
925  // We always push rbp so that on return to interpreter rbp will be
926  // restored correctly and we can correct the stack.
927  emit_opcode(cbuf, 0x50 | RBP_enc);
928
929  if (VerifyStackAtCalls) {
930    // Majik cookie to verify stack depth
931    emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
932    emit_d32(cbuf, 0xbadb100d);
933    framesize -= wordSize; // Remove 2 for cookie
934    need_nop = false;
935  }
936
937  if (framesize) {
938    emit_opcode(cbuf, Assembler::REX_W);
939    if (framesize < 0x80) {
940      emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
941      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
942      emit_d8(cbuf, framesize);
943      if (need_nop) {
944        emit_opcode(cbuf, 0x90); // nop
945      }
946    } else {
947      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
948      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
949      emit_d32(cbuf, framesize);
950    }
951  }
952
953  C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
954
955#ifdef ASSERT
956  if (VerifyStackAtCalls) {
957    Label L;
958    MacroAssembler masm(&cbuf);
959    masm.push(rax);
960    masm.mov(rax, rsp);
961    masm.andptr(rax, StackAlignmentInBytes-1);
962    masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
963    masm.pop(rax);
964    masm.jcc(Assembler::equal, L);
965    masm.stop("Stack is not properly aligned!");
966    masm.bind(L);
967  }
968#endif
969}
970
971uint MachPrologNode::size(PhaseRegAlloc* ra_) const
972{
973  return MachNode::size(ra_); // too many variables; just compute it
974                              // the hard way
975}
976
977int MachPrologNode::reloc() const
978{
979  return 0; // a large enough number
980}
981
982//=============================================================================
983#ifndef PRODUCT
984void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
985{
986  Compile* C = ra_->C;
987  int framesize = C->frame_slots() << LogBytesPerInt;
988  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
989  // Remove word for return adr already pushed
990  // and RBP
991  framesize -= 2*wordSize;
992
993  if (framesize) {
994    st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
995    st->print("\t");
996  }
997
998  st->print_cr("popq\trbp");
999  if (do_polling() && C->is_method_compilation()) {
1000    st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1001                  "# Safepoint: poll for GC");
1002    st->print("\t");
1003  }
1004}
1005#endif
1006
1007void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1008{
1009  Compile* C = ra_->C;
1010  int framesize = C->frame_slots() << LogBytesPerInt;
1011  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1012  // Remove word for return adr already pushed
1013  // and RBP
1014  framesize -= 2*wordSize;
1015
1016  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1017
1018  if (framesize) {
1019    emit_opcode(cbuf, Assembler::REX_W);
1020    if (framesize < 0x80) {
1021      emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1022      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1023      emit_d8(cbuf, framesize);
1024    } else {
1025      emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1026      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1027      emit_d32(cbuf, framesize);
1028    }
1029  }
1030
1031  // popq rbp
1032  emit_opcode(cbuf, 0x58 | RBP_enc);
1033
1034  if (do_polling() && C->is_method_compilation()) {
1035    // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1036    // XXX reg_mem doesn't support RIP-relative addressing yet
1037    cbuf.set_inst_mark();
1038    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1039    emit_opcode(cbuf, 0x85); // testl
1040    emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1041    // cbuf.inst_mark() is beginning of instruction
1042    emit_d32_reloc(cbuf, os::get_polling_page());
1043//                    relocInfo::poll_return_type,
1044  }
1045}
1046
1047uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1048{
1049  Compile* C = ra_->C;
1050  int framesize = C->frame_slots() << LogBytesPerInt;
1051  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1052  // Remove word for return adr already pushed
1053  // and RBP
1054  framesize -= 2*wordSize;
1055
1056  uint size = 0;
1057
1058  if (do_polling() && C->is_method_compilation()) {
1059    size += 6;
1060  }
1061
1062  // count popq rbp
1063  size++;
1064
1065  if (framesize) {
1066    if (framesize < 0x80) {
1067      size += 4;
1068    } else if (framesize) {
1069      size += 7;
1070    }
1071  }
1072
1073  return size;
1074}
1075
1076int MachEpilogNode::reloc() const
1077{
1078  return 2; // a large enough number
1079}
1080
1081const Pipeline* MachEpilogNode::pipeline() const
1082{
1083  return MachNode::pipeline_class();
1084}
1085
1086int MachEpilogNode::safepoint_offset() const
1087{
1088  return 0;
1089}
1090
1091//=============================================================================
1092
1093enum RC {
1094  rc_bad,
1095  rc_int,
1096  rc_float,
1097  rc_stack
1098};
1099
1100static enum RC rc_class(OptoReg::Name reg)
1101{
1102  if( !OptoReg::is_valid(reg)  ) return rc_bad;
1103
1104  if (OptoReg::is_stack(reg)) return rc_stack;
1105
1106  VMReg r = OptoReg::as_VMReg(reg);
1107
1108  if (r->is_Register()) return rc_int;
1109
1110  assert(r->is_XMMRegister(), "must be");
1111  return rc_float;
1112}
1113
1114uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1115                                       PhaseRegAlloc* ra_,
1116                                       bool do_size,
1117                                       outputStream* st) const
1118{
1119
1120  // Get registers to move
1121  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1122  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1123  OptoReg::Name dst_second = ra_->get_reg_second(this);
1124  OptoReg::Name dst_first = ra_->get_reg_first(this);
1125
1126  enum RC src_second_rc = rc_class(src_second);
1127  enum RC src_first_rc = rc_class(src_first);
1128  enum RC dst_second_rc = rc_class(dst_second);
1129  enum RC dst_first_rc = rc_class(dst_first);
1130
1131  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1132         "must move at least 1 register" );
1133
1134  if (src_first == dst_first && src_second == dst_second) {
1135    // Self copy, no move
1136    return 0;
1137  } else if (src_first_rc == rc_stack) {
1138    // mem ->
1139    if (dst_first_rc == rc_stack) {
1140      // mem -> mem
1141      assert(src_second != dst_first, "overlap");
1142      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1143          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1144        // 64-bit
1145        int src_offset = ra_->reg2offset(src_first);
1146        int dst_offset = ra_->reg2offset(dst_first);
1147        if (cbuf) {
1148          emit_opcode(*cbuf, 0xFF);
1149          encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1150
1151          emit_opcode(*cbuf, 0x8F);
1152          encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1153
1154#ifndef PRODUCT
1155        } else if (!do_size) {
1156          st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1157                     "popq    [rsp + #%d]",
1158                     src_offset,
1159                     dst_offset);
1160#endif
1161        }
1162        return
1163          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1164          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1165      } else {
1166        // 32-bit
1167        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1168        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1169        // No pushl/popl, so:
1170        int src_offset = ra_->reg2offset(src_first);
1171        int dst_offset = ra_->reg2offset(dst_first);
1172        if (cbuf) {
1173          emit_opcode(*cbuf, Assembler::REX_W);
1174          emit_opcode(*cbuf, 0x89);
1175          emit_opcode(*cbuf, 0x44);
1176          emit_opcode(*cbuf, 0x24);
1177          emit_opcode(*cbuf, 0xF8);
1178
1179          emit_opcode(*cbuf, 0x8B);
1180          encode_RegMem(*cbuf,
1181                        RAX_enc,
1182                        RSP_enc, 0x4, 0, src_offset,
1183                        false);
1184
1185          emit_opcode(*cbuf, 0x89);
1186          encode_RegMem(*cbuf,
1187                        RAX_enc,
1188                        RSP_enc, 0x4, 0, dst_offset,
1189                        false);
1190
1191          emit_opcode(*cbuf, Assembler::REX_W);
1192          emit_opcode(*cbuf, 0x8B);
1193          emit_opcode(*cbuf, 0x44);
1194          emit_opcode(*cbuf, 0x24);
1195          emit_opcode(*cbuf, 0xF8);
1196
1197#ifndef PRODUCT
1198        } else if (!do_size) {
1199          st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1200                     "movl    rax, [rsp + #%d]\n\t"
1201                     "movl    [rsp + #%d], rax\n\t"
1202                     "movq    rax, [rsp - #8]",
1203                     src_offset,
1204                     dst_offset);
1205#endif
1206        }
1207        return
1208          5 + // movq
1209          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1210          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1211          5; // movq
1212      }
1213    } else if (dst_first_rc == rc_int) {
1214      // mem -> gpr
1215      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1216          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1217        // 64-bit
1218        int offset = ra_->reg2offset(src_first);
1219        if (cbuf) {
1220          if (Matcher::_regEncode[dst_first] < 8) {
1221            emit_opcode(*cbuf, Assembler::REX_W);
1222          } else {
1223            emit_opcode(*cbuf, Assembler::REX_WR);
1224          }
1225          emit_opcode(*cbuf, 0x8B);
1226          encode_RegMem(*cbuf,
1227                        Matcher::_regEncode[dst_first],
1228                        RSP_enc, 0x4, 0, offset,
1229                        false);
1230#ifndef PRODUCT
1231        } else if (!do_size) {
1232          st->print("movq    %s, [rsp + #%d]\t# spill",
1233                     Matcher::regName[dst_first],
1234                     offset);
1235#endif
1236        }
1237        return
1238          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1239      } else {
1240        // 32-bit
1241        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1242        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1243        int offset = ra_->reg2offset(src_first);
1244        if (cbuf) {
1245          if (Matcher::_regEncode[dst_first] >= 8) {
1246            emit_opcode(*cbuf, Assembler::REX_R);
1247          }
1248          emit_opcode(*cbuf, 0x8B);
1249          encode_RegMem(*cbuf,
1250                        Matcher::_regEncode[dst_first],
1251                        RSP_enc, 0x4, 0, offset,
1252                        false);
1253#ifndef PRODUCT
1254        } else if (!do_size) {
1255          st->print("movl    %s, [rsp + #%d]\t# spill",
1256                     Matcher::regName[dst_first],
1257                     offset);
1258#endif
1259        }
1260        return
1261          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1262          ((Matcher::_regEncode[dst_first] < 8)
1263           ? 3
1264           : 4); // REX
1265      }
1266    } else if (dst_first_rc == rc_float) {
1267      // mem-> xmm
1268      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1269          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1270        // 64-bit
1271        int offset = ra_->reg2offset(src_first);
1272        if (cbuf) {
1273          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1274          if (Matcher::_regEncode[dst_first] >= 8) {
1275            emit_opcode(*cbuf, Assembler::REX_R);
1276          }
1277          emit_opcode(*cbuf, 0x0F);
1278          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1279          encode_RegMem(*cbuf,
1280                        Matcher::_regEncode[dst_first],
1281                        RSP_enc, 0x4, 0, offset,
1282                        false);
1283#ifndef PRODUCT
1284        } else if (!do_size) {
1285          st->print("%s  %s, [rsp + #%d]\t# spill",
1286                     UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1287                     Matcher::regName[dst_first],
1288                     offset);
1289#endif
1290        }
1291        return
1292          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1293          ((Matcher::_regEncode[dst_first] < 8)
1294           ? 5
1295           : 6); // REX
1296      } else {
1297        // 32-bit
1298        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300        int offset = ra_->reg2offset(src_first);
1301        if (cbuf) {
1302          emit_opcode(*cbuf, 0xF3);
1303          if (Matcher::_regEncode[dst_first] >= 8) {
1304            emit_opcode(*cbuf, Assembler::REX_R);
1305          }
1306          emit_opcode(*cbuf, 0x0F);
1307          emit_opcode(*cbuf, 0x10);
1308          encode_RegMem(*cbuf,
1309                        Matcher::_regEncode[dst_first],
1310                        RSP_enc, 0x4, 0, offset,
1311                        false);
1312#ifndef PRODUCT
1313        } else if (!do_size) {
1314          st->print("movss   %s, [rsp + #%d]\t# spill",
1315                     Matcher::regName[dst_first],
1316                     offset);
1317#endif
1318        }
1319        return
1320          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1321          ((Matcher::_regEncode[dst_first] < 8)
1322           ? 5
1323           : 6); // REX
1324      }
1325    }
1326  } else if (src_first_rc == rc_int) {
1327    // gpr ->
1328    if (dst_first_rc == rc_stack) {
1329      // gpr -> mem
1330      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1331          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1332        // 64-bit
1333        int offset = ra_->reg2offset(dst_first);
1334        if (cbuf) {
1335          if (Matcher::_regEncode[src_first] < 8) {
1336            emit_opcode(*cbuf, Assembler::REX_W);
1337          } else {
1338            emit_opcode(*cbuf, Assembler::REX_WR);
1339          }
1340          emit_opcode(*cbuf, 0x89);
1341          encode_RegMem(*cbuf,
1342                        Matcher::_regEncode[src_first],
1343                        RSP_enc, 0x4, 0, offset,
1344                        false);
1345#ifndef PRODUCT
1346        } else if (!do_size) {
1347          st->print("movq    [rsp + #%d], %s\t# spill",
1348                     offset,
1349                     Matcher::regName[src_first]);
1350#endif
1351        }
1352        return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1353      } else {
1354        // 32-bit
1355        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1356        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1357        int offset = ra_->reg2offset(dst_first);
1358        if (cbuf) {
1359          if (Matcher::_regEncode[src_first] >= 8) {
1360            emit_opcode(*cbuf, Assembler::REX_R);
1361          }
1362          emit_opcode(*cbuf, 0x89);
1363          encode_RegMem(*cbuf,
1364                        Matcher::_regEncode[src_first],
1365                        RSP_enc, 0x4, 0, offset,
1366                        false);
1367#ifndef PRODUCT
1368        } else if (!do_size) {
1369          st->print("movl    [rsp + #%d], %s\t# spill",
1370                     offset,
1371                     Matcher::regName[src_first]);
1372#endif
1373        }
1374        return
1375          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1376          ((Matcher::_regEncode[src_first] < 8)
1377           ? 3
1378           : 4); // REX
1379      }
1380    } else if (dst_first_rc == rc_int) {
1381      // gpr -> gpr
1382      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1383          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1384        // 64-bit
1385        if (cbuf) {
1386          if (Matcher::_regEncode[dst_first] < 8) {
1387            if (Matcher::_regEncode[src_first] < 8) {
1388              emit_opcode(*cbuf, Assembler::REX_W);
1389            } else {
1390              emit_opcode(*cbuf, Assembler::REX_WB);
1391            }
1392          } else {
1393            if (Matcher::_regEncode[src_first] < 8) {
1394              emit_opcode(*cbuf, Assembler::REX_WR);
1395            } else {
1396              emit_opcode(*cbuf, Assembler::REX_WRB);
1397            }
1398          }
1399          emit_opcode(*cbuf, 0x8B);
1400          emit_rm(*cbuf, 0x3,
1401                  Matcher::_regEncode[dst_first] & 7,
1402                  Matcher::_regEncode[src_first] & 7);
1403#ifndef PRODUCT
1404        } else if (!do_size) {
1405          st->print("movq    %s, %s\t# spill",
1406                     Matcher::regName[dst_first],
1407                     Matcher::regName[src_first]);
1408#endif
1409        }
1410        return 3; // REX
1411      } else {
1412        // 32-bit
1413        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1414        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1415        if (cbuf) {
1416          if (Matcher::_regEncode[dst_first] < 8) {
1417            if (Matcher::_regEncode[src_first] >= 8) {
1418              emit_opcode(*cbuf, Assembler::REX_B);
1419            }
1420          } else {
1421            if (Matcher::_regEncode[src_first] < 8) {
1422              emit_opcode(*cbuf, Assembler::REX_R);
1423            } else {
1424              emit_opcode(*cbuf, Assembler::REX_RB);
1425            }
1426          }
1427          emit_opcode(*cbuf, 0x8B);
1428          emit_rm(*cbuf, 0x3,
1429                  Matcher::_regEncode[dst_first] & 7,
1430                  Matcher::_regEncode[src_first] & 7);
1431#ifndef PRODUCT
1432        } else if (!do_size) {
1433          st->print("movl    %s, %s\t# spill",
1434                     Matcher::regName[dst_first],
1435                     Matcher::regName[src_first]);
1436#endif
1437        }
1438        return
1439          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1440          ? 2
1441          : 3; // REX
1442      }
1443    } else if (dst_first_rc == rc_float) {
1444      // gpr -> xmm
1445      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1446          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1447        // 64-bit
1448        if (cbuf) {
1449          emit_opcode(*cbuf, 0x66);
1450          if (Matcher::_regEncode[dst_first] < 8) {
1451            if (Matcher::_regEncode[src_first] < 8) {
1452              emit_opcode(*cbuf, Assembler::REX_W);
1453            } else {
1454              emit_opcode(*cbuf, Assembler::REX_WB);
1455            }
1456          } else {
1457            if (Matcher::_regEncode[src_first] < 8) {
1458              emit_opcode(*cbuf, Assembler::REX_WR);
1459            } else {
1460              emit_opcode(*cbuf, Assembler::REX_WRB);
1461            }
1462          }
1463          emit_opcode(*cbuf, 0x0F);
1464          emit_opcode(*cbuf, 0x6E);
1465          emit_rm(*cbuf, 0x3,
1466                  Matcher::_regEncode[dst_first] & 7,
1467                  Matcher::_regEncode[src_first] & 7);
1468#ifndef PRODUCT
1469        } else if (!do_size) {
1470          st->print("movdq   %s, %s\t# spill",
1471                     Matcher::regName[dst_first],
1472                     Matcher::regName[src_first]);
1473#endif
1474        }
1475        return 5; // REX
1476      } else {
1477        // 32-bit
1478        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1479        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1480        if (cbuf) {
1481          emit_opcode(*cbuf, 0x66);
1482          if (Matcher::_regEncode[dst_first] < 8) {
1483            if (Matcher::_regEncode[src_first] >= 8) {
1484              emit_opcode(*cbuf, Assembler::REX_B);
1485            }
1486          } else {
1487            if (Matcher::_regEncode[src_first] < 8) {
1488              emit_opcode(*cbuf, Assembler::REX_R);
1489            } else {
1490              emit_opcode(*cbuf, Assembler::REX_RB);
1491            }
1492          }
1493          emit_opcode(*cbuf, 0x0F);
1494          emit_opcode(*cbuf, 0x6E);
1495          emit_rm(*cbuf, 0x3,
1496                  Matcher::_regEncode[dst_first] & 7,
1497                  Matcher::_regEncode[src_first] & 7);
1498#ifndef PRODUCT
1499        } else if (!do_size) {
1500          st->print("movdl   %s, %s\t# spill",
1501                     Matcher::regName[dst_first],
1502                     Matcher::regName[src_first]);
1503#endif
1504        }
1505        return
1506          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1507          ? 4
1508          : 5; // REX
1509      }
1510    }
1511  } else if (src_first_rc == rc_float) {
1512    // xmm ->
1513    if (dst_first_rc == rc_stack) {
1514      // xmm -> mem
1515      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1516          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1517        // 64-bit
1518        int offset = ra_->reg2offset(dst_first);
1519        if (cbuf) {
1520          emit_opcode(*cbuf, 0xF2);
1521          if (Matcher::_regEncode[src_first] >= 8) {
1522              emit_opcode(*cbuf, Assembler::REX_R);
1523          }
1524          emit_opcode(*cbuf, 0x0F);
1525          emit_opcode(*cbuf, 0x11);
1526          encode_RegMem(*cbuf,
1527                        Matcher::_regEncode[src_first],
1528                        RSP_enc, 0x4, 0, offset,
1529                        false);
1530#ifndef PRODUCT
1531        } else if (!do_size) {
1532          st->print("movsd   [rsp + #%d], %s\t# spill",
1533                     offset,
1534                     Matcher::regName[src_first]);
1535#endif
1536        }
1537        return
1538          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1539          ((Matcher::_regEncode[src_first] < 8)
1540           ? 5
1541           : 6); // REX
1542      } else {
1543        // 32-bit
1544        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1545        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1546        int offset = ra_->reg2offset(dst_first);
1547        if (cbuf) {
1548          emit_opcode(*cbuf, 0xF3);
1549          if (Matcher::_regEncode[src_first] >= 8) {
1550              emit_opcode(*cbuf, Assembler::REX_R);
1551          }
1552          emit_opcode(*cbuf, 0x0F);
1553          emit_opcode(*cbuf, 0x11);
1554          encode_RegMem(*cbuf,
1555                        Matcher::_regEncode[src_first],
1556                        RSP_enc, 0x4, 0, offset,
1557                        false);
1558#ifndef PRODUCT
1559        } else if (!do_size) {
1560          st->print("movss   [rsp + #%d], %s\t# spill",
1561                     offset,
1562                     Matcher::regName[src_first]);
1563#endif
1564        }
1565        return
1566          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1567          ((Matcher::_regEncode[src_first] < 8)
1568           ? 5
1569           : 6); // REX
1570      }
1571    } else if (dst_first_rc == rc_int) {
1572      // xmm -> gpr
1573      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1574          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1575        // 64-bit
1576        if (cbuf) {
1577          emit_opcode(*cbuf, 0x66);
1578          if (Matcher::_regEncode[dst_first] < 8) {
1579            if (Matcher::_regEncode[src_first] < 8) {
1580              emit_opcode(*cbuf, Assembler::REX_W);
1581            } else {
1582              emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1583            }
1584          } else {
1585            if (Matcher::_regEncode[src_first] < 8) {
1586              emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1587            } else {
1588              emit_opcode(*cbuf, Assembler::REX_WRB);
1589            }
1590          }
1591          emit_opcode(*cbuf, 0x0F);
1592          emit_opcode(*cbuf, 0x7E);
1593          emit_rm(*cbuf, 0x3,
1594                  Matcher::_regEncode[dst_first] & 7,
1595                  Matcher::_regEncode[src_first] & 7);
1596#ifndef PRODUCT
1597        } else if (!do_size) {
1598          st->print("movdq   %s, %s\t# spill",
1599                     Matcher::regName[dst_first],
1600                     Matcher::regName[src_first]);
1601#endif
1602        }
1603        return 5; // REX
1604      } else {
1605        // 32-bit
1606        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1607        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1608        if (cbuf) {
1609          emit_opcode(*cbuf, 0x66);
1610          if (Matcher::_regEncode[dst_first] < 8) {
1611            if (Matcher::_regEncode[src_first] >= 8) {
1612              emit_opcode(*cbuf, Assembler::REX_R); // attention!
1613            }
1614          } else {
1615            if (Matcher::_regEncode[src_first] < 8) {
1616              emit_opcode(*cbuf, Assembler::REX_B); // attention!
1617            } else {
1618              emit_opcode(*cbuf, Assembler::REX_RB);
1619            }
1620          }
1621          emit_opcode(*cbuf, 0x0F);
1622          emit_opcode(*cbuf, 0x7E);
1623          emit_rm(*cbuf, 0x3,
1624                  Matcher::_regEncode[dst_first] & 7,
1625                  Matcher::_regEncode[src_first] & 7);
1626#ifndef PRODUCT
1627        } else if (!do_size) {
1628          st->print("movdl   %s, %s\t# spill",
1629                     Matcher::regName[dst_first],
1630                     Matcher::regName[src_first]);
1631#endif
1632        }
1633        return
1634          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1635          ? 4
1636          : 5; // REX
1637      }
1638    } else if (dst_first_rc == rc_float) {
1639      // xmm -> xmm
1640      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1641          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1642        // 64-bit
1643        if (cbuf) {
1644          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1645          if (Matcher::_regEncode[dst_first] < 8) {
1646            if (Matcher::_regEncode[src_first] >= 8) {
1647              emit_opcode(*cbuf, Assembler::REX_B);
1648            }
1649          } else {
1650            if (Matcher::_regEncode[src_first] < 8) {
1651              emit_opcode(*cbuf, Assembler::REX_R);
1652            } else {
1653              emit_opcode(*cbuf, Assembler::REX_RB);
1654            }
1655          }
1656          emit_opcode(*cbuf, 0x0F);
1657          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1658          emit_rm(*cbuf, 0x3,
1659                  Matcher::_regEncode[dst_first] & 7,
1660                  Matcher::_regEncode[src_first] & 7);
1661#ifndef PRODUCT
1662        } else if (!do_size) {
1663          st->print("%s  %s, %s\t# spill",
1664                     UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1665                     Matcher::regName[dst_first],
1666                     Matcher::regName[src_first]);
1667#endif
1668        }
1669        return
1670          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1671          ? 4
1672          : 5; // REX
1673      } else {
1674        // 32-bit
1675        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1676        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1677        if (cbuf) {
1678          if (!UseXmmRegToRegMoveAll)
1679            emit_opcode(*cbuf, 0xF3);
1680          if (Matcher::_regEncode[dst_first] < 8) {
1681            if (Matcher::_regEncode[src_first] >= 8) {
1682              emit_opcode(*cbuf, Assembler::REX_B);
1683            }
1684          } else {
1685            if (Matcher::_regEncode[src_first] < 8) {
1686              emit_opcode(*cbuf, Assembler::REX_R);
1687            } else {
1688              emit_opcode(*cbuf, Assembler::REX_RB);
1689            }
1690          }
1691          emit_opcode(*cbuf, 0x0F);
1692          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1693          emit_rm(*cbuf, 0x3,
1694                  Matcher::_regEncode[dst_first] & 7,
1695                  Matcher::_regEncode[src_first] & 7);
1696#ifndef PRODUCT
1697        } else if (!do_size) {
1698          st->print("%s  %s, %s\t# spill",
1699                     UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1700                     Matcher::regName[dst_first],
1701                     Matcher::regName[src_first]);
1702#endif
1703        }
1704        return
1705          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1706          ? (UseXmmRegToRegMoveAll ? 3 : 4)
1707          : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1708      }
1709    }
1710  }
1711
1712  assert(0," foo ");
1713  Unimplemented();
1714
1715  return 0;
1716}
1717
1718#ifndef PRODUCT
1719void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1720{
1721  implementation(NULL, ra_, false, st);
1722}
1723#endif
1724
1725void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1726{
1727  implementation(&cbuf, ra_, false, NULL);
1728}
1729
1730uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1731{
1732  return implementation(NULL, ra_, true, NULL);
1733}
1734
1735//=============================================================================
1736#ifndef PRODUCT
1737void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1738{
1739  st->print("nop \t# %d bytes pad for loops and calls", _count);
1740}
1741#endif
1742
1743void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1744{
1745  MacroAssembler _masm(&cbuf);
1746  __ nop(_count);
1747}
1748
1749uint MachNopNode::size(PhaseRegAlloc*) const
1750{
1751  return _count;
1752}
1753
1754
1755//=============================================================================
1756#ifndef PRODUCT
1757void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1758{
1759  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1760  int reg = ra_->get_reg_first(this);
1761  st->print("leaq    %s, [rsp + #%d]\t# box lock",
1762            Matcher::regName[reg], offset);
1763}
1764#endif
1765
1766void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1767{
1768  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1769  int reg = ra_->get_encode(this);
1770  if (offset >= 0x80) {
1771    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773    emit_rm(cbuf, 0x2, reg & 7, 0x04);
1774    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775    emit_d32(cbuf, offset);
1776  } else {
1777    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779    emit_rm(cbuf, 0x1, reg & 7, 0x04);
1780    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781    emit_d8(cbuf, offset);
1782  }
1783}
1784
1785uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1786{
1787  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1788  return (offset < 0x80) ? 5 : 8; // REX
1789}
1790
1791//=============================================================================
1792
1793// emit call stub, compiled java to interpreter
1794void emit_java_to_interp(CodeBuffer& cbuf)
1795{
1796  // Stub is fixed up when the corresponding call is converted from
1797  // calling compiled code to calling interpreted code.
1798  // movq rbx, 0
1799  // jmp -5 # to self
1800
1801  address mark = cbuf.inst_mark();  // get mark within main instrs section
1802
1803  // Note that the code buffer's inst_mark is always relative to insts.
1804  // That's why we must use the macroassembler to generate a stub.
1805  MacroAssembler _masm(&cbuf);
1806
1807  address base =
1808  __ start_a_stub(Compile::MAX_stubs_size);
1809  if (base == NULL)  return;  // CodeBuffer::expand failed
1810  // static stub relocation stores the instruction address of the call
1811  __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1812  // static stub relocation also tags the methodOop in the code-stream.
1813  __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1814  // This is recognized as unresolved by relocs/nativeinst/ic code
1815  __ jump(RuntimeAddress(__ pc()));
1816
1817  // Update current stubs pointer and restore code_end.
1818  __ end_a_stub();
1819}
1820
1821// size of call stub, compiled java to interpretor
1822uint size_java_to_interp()
1823{
1824  return 15;  // movq (1+1+8); jmp (1+4)
1825}
1826
1827// relocation entries for call stub, compiled java to interpretor
1828uint reloc_java_to_interp()
1829{
1830  return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1831}
1832
1833//=============================================================================
1834#ifndef PRODUCT
1835void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1836{
1837  if (UseCompressedOops) {
1838    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1839    if (Universe::narrow_oop_shift() != 0) {
1840      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1841    }
1842    st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1843  } else {
1844    st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1845                 "# Inline cache check", oopDesc::klass_offset_in_bytes());
1846  }
1847  st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1848  st->print_cr("\tnop");
1849  if (!OptoBreakpoint) {
1850    st->print_cr("\tnop");
1851  }
1852}
1853#endif
1854
1855void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856{
1857  MacroAssembler masm(&cbuf);
1858#ifdef ASSERT
1859  uint code_size = cbuf.code_size();
1860#endif
1861  if (UseCompressedOops) {
1862    masm.load_klass(rscratch1, j_rarg0);
1863    masm.cmpptr(rax, rscratch1);
1864  } else {
1865    masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866  }
1867
1868  masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1869
1870  /* WARNING these NOPs are critical so that verified entry point is properly
1871     aligned for patching by NativeJump::patch_verified_entry() */
1872  int nops_cnt = 1;
1873  if (!OptoBreakpoint) {
1874    // Leave space for int3
1875     nops_cnt += 1;
1876  }
1877  if (UseCompressedOops) {
1878    // ??? divisible by 4 is aligned?
1879    nops_cnt += 1;
1880  }
1881  masm.nop(nops_cnt);
1882
1883  assert(cbuf.code_size() - code_size == size(ra_),
1884         "checking code size of inline cache node");
1885}
1886
1887uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1888{
1889  if (UseCompressedOops) {
1890    if (Universe::narrow_oop_shift() == 0) {
1891      return OptoBreakpoint ? 15 : 16;
1892    } else {
1893      return OptoBreakpoint ? 19 : 20;
1894    }
1895  } else {
1896    return OptoBreakpoint ? 11 : 12;
1897  }
1898}
1899
1900
1901//=============================================================================
1902uint size_exception_handler()
1903{
1904  // NativeCall instruction size is the same as NativeJump.
1905  // Note that this value is also credited (in output.cpp) to
1906  // the size of the code section.
1907  return NativeJump::instruction_size;
1908}
1909
1910// Emit exception handler code.
1911int emit_exception_handler(CodeBuffer& cbuf)
1912{
1913
1914  // Note that the code buffer's inst_mark is always relative to insts.
1915  // That's why we must use the macroassembler to generate a handler.
1916  MacroAssembler _masm(&cbuf);
1917  address base =
1918  __ start_a_stub(size_exception_handler());
1919  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920  int offset = __ offset();
1921  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1922  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1923  __ end_a_stub();
1924  return offset;
1925}
1926
1927uint size_deopt_handler()
1928{
1929  // three 5 byte instructions
1930  return 15;
1931}
1932
1933// Emit deopt handler code.
1934int emit_deopt_handler(CodeBuffer& cbuf)
1935{
1936
1937  // Note that the code buffer's inst_mark is always relative to insts.
1938  // That's why we must use the macroassembler to generate a handler.
1939  MacroAssembler _masm(&cbuf);
1940  address base =
1941  __ start_a_stub(size_deopt_handler());
1942  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1943  int offset = __ offset();
1944  address the_pc = (address) __ pc();
1945  Label next;
1946  // push a "the_pc" on the stack without destroying any registers
1947  // as they all may be live.
1948
1949  // push address of "next"
1950  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1951  __ bind(next);
1952  // adjust it so it matches "the_pc"
1953  __ subptr(Address(rsp, 0), __ offset() - offset);
1954  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1955  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1956  __ end_a_stub();
1957  return offset;
1958}
1959
1960static void emit_double_constant(CodeBuffer& cbuf, double x) {
1961  int mark = cbuf.insts()->mark_off();
1962  MacroAssembler _masm(&cbuf);
1963  address double_address = __ double_constant(x);
1964  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1965  emit_d32_reloc(cbuf,
1966                 (int) (double_address - cbuf.code_end() - 4),
1967                 internal_word_Relocation::spec(double_address),
1968                 RELOC_DISP32);
1969}
1970
1971static void emit_float_constant(CodeBuffer& cbuf, float x) {
1972  int mark = cbuf.insts()->mark_off();
1973  MacroAssembler _masm(&cbuf);
1974  address float_address = __ float_constant(x);
1975  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1976  emit_d32_reloc(cbuf,
1977                 (int) (float_address - cbuf.code_end() - 4),
1978                 internal_word_Relocation::spec(float_address),
1979                 RELOC_DISP32);
1980}
1981
1982
1983int Matcher::regnum_to_fpu_offset(int regnum)
1984{
1985  return regnum - 32; // The FP registers are in the second chunk
1986}
1987
1988// This is UltraSparc specific, true just means we have fast l2f conversion
1989const bool Matcher::convL2FSupported(void) {
1990  return true;
1991}
1992
1993// Vector width in bytes
1994const uint Matcher::vector_width_in_bytes(void) {
1995  return 8;
1996}
1997
1998// Vector ideal reg
1999const uint Matcher::vector_ideal_reg(void) {
2000  return Op_RegD;
2001}
2002
2003// Is this branch offset short enough that a short branch can be used?
2004//
2005// NOTE: If the platform does not provide any short branch variants, then
2006//       this method should return false for offset 0.
2007bool Matcher::is_short_branch_offset(int rule, int offset) {
2008  // the short version of jmpConUCF2 contains multiple branches,
2009  // making the reach slightly less
2010  if (rule == jmpConUCF2_rule)
2011    return (-126 <= offset && offset <= 125);
2012  return (-128 <= offset && offset <= 127);
2013}
2014
2015const bool Matcher::isSimpleConstant64(jlong value) {
2016  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2017  //return value == (int) value;  // Cf. storeImmL and immL32.
2018
2019  // Probably always true, even if a temp register is required.
2020  return true;
2021}
2022
2023// The ecx parameter to rep stosq for the ClearArray node is in words.
2024const bool Matcher::init_array_count_is_in_bytes = false;
2025
2026// Threshold size for cleararray.
2027const int Matcher::init_array_short_size = 8 * BytesPerLong;
2028
2029// Should the Matcher clone shifts on addressing modes, expecting them
2030// to be subsumed into complex addressing expressions or compute them
2031// into registers?  True for Intel but false for most RISCs
2032const bool Matcher::clone_shift_expressions = true;
2033
2034// Is it better to copy float constants, or load them directly from
2035// memory?  Intel can load a float constant from a direct address,
2036// requiring no extra registers.  Most RISCs will have to materialize
2037// an address into a register first, so they would do better to copy
2038// the constant from stack.
2039const bool Matcher::rematerialize_float_constants = true; // XXX
2040
2041// If CPU can load and store mis-aligned doubles directly then no
2042// fixup is needed.  Else we split the double into 2 integer pieces
2043// and move it piece-by-piece.  Only happens when passing doubles into
2044// C code as the Java calling convention forces doubles to be aligned.
2045const bool Matcher::misaligned_doubles_ok = true;
2046
2047// No-op on amd64
2048void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2049
2050// Advertise here if the CPU requires explicit rounding operations to
2051// implement the UseStrictFP mode.
2052const bool Matcher::strict_fp_requires_explicit_rounding = true;
2053
2054// Do floats take an entire double register or just half?
2055const bool Matcher::float_in_double = true;
2056// Do ints take an entire long register or just half?
2057const bool Matcher::int_in_long = true;
2058
2059// Return whether or not this register is ever used as an argument.
2060// This function is used on startup to build the trampoline stubs in
2061// generateOptoStub.  Registers not mentioned will be killed by the VM
2062// call in the trampoline, and arguments in those registers not be
2063// available to the callee.
2064bool Matcher::can_be_java_arg(int reg)
2065{
2066  return
2067    reg ==  RDI_num || reg ==  RDI_H_num ||
2068    reg ==  RSI_num || reg ==  RSI_H_num ||
2069    reg ==  RDX_num || reg ==  RDX_H_num ||
2070    reg ==  RCX_num || reg ==  RCX_H_num ||
2071    reg ==   R8_num || reg ==   R8_H_num ||
2072    reg ==   R9_num || reg ==   R9_H_num ||
2073    reg ==  R12_num || reg ==  R12_H_num ||
2074    reg == XMM0_num || reg == XMM0_H_num ||
2075    reg == XMM1_num || reg == XMM1_H_num ||
2076    reg == XMM2_num || reg == XMM2_H_num ||
2077    reg == XMM3_num || reg == XMM3_H_num ||
2078    reg == XMM4_num || reg == XMM4_H_num ||
2079    reg == XMM5_num || reg == XMM5_H_num ||
2080    reg == XMM6_num || reg == XMM6_H_num ||
2081    reg == XMM7_num || reg == XMM7_H_num;
2082}
2083
2084bool Matcher::is_spillable_arg(int reg)
2085{
2086  return can_be_java_arg(reg);
2087}
2088
2089// Register for DIVI projection of divmodI
2090RegMask Matcher::divI_proj_mask() {
2091  return INT_RAX_REG_mask;
2092}
2093
2094// Register for MODI projection of divmodI
2095RegMask Matcher::modI_proj_mask() {
2096  return INT_RDX_REG_mask;
2097}
2098
2099// Register for DIVL projection of divmodL
2100RegMask Matcher::divL_proj_mask() {
2101  return LONG_RAX_REG_mask;
2102}
2103
2104// Register for MODL projection of divmodL
2105RegMask Matcher::modL_proj_mask() {
2106  return LONG_RDX_REG_mask;
2107}
2108
2109static Address build_address(int b, int i, int s, int d) {
2110  Register index = as_Register(i);
2111  Address::ScaleFactor scale = (Address::ScaleFactor)s;
2112  if (index == rsp) {
2113    index = noreg;
2114    scale = Address::no_scale;
2115  }
2116  Address addr(as_Register(b), index, scale, d);
2117  return addr;
2118}
2119
2120%}
2121
2122//----------ENCODING BLOCK-----------------------------------------------------
2123// This block specifies the encoding classes used by the compiler to
2124// output byte streams.  Encoding classes are parameterized macros
2125// used by Machine Instruction Nodes in order to generate the bit
2126// encoding of the instruction.  Operands specify their base encoding
2127// interface with the interface keyword.  There are currently
2128// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2129// COND_INTER.  REG_INTER causes an operand to generate a function
2130// which returns its register number when queried.  CONST_INTER causes
2131// an operand to generate a function which returns the value of the
2132// constant when queried.  MEMORY_INTER causes an operand to generate
2133// four functions which return the Base Register, the Index Register,
2134// the Scale Value, and the Offset Value of the operand when queried.
2135// COND_INTER causes an operand to generate six functions which return
2136// the encoding code (ie - encoding bits for the instruction)
2137// associated with each basic boolean condition for a conditional
2138// instruction.
2139//
2140// Instructions specify two basic values for encoding.  Again, a
2141// function is available to check if the constant displacement is an
2142// oop. They use the ins_encode keyword to specify their encoding
2143// classes (which must be a sequence of enc_class names, and their
2144// parameters, specified in the encoding block), and they use the
2145// opcode keyword to specify, in order, their primary, secondary, and
2146// tertiary opcode.  Only the opcode sections which a particular
2147// instruction needs for encoding need to be specified.
2148encode %{
2149  // Build emit functions for each basic byte or larger field in the
2150  // intel encoding scheme (opcode, rm, sib, immediate), and call them
2151  // from C++ code in the enc_class source block.  Emit functions will
2152  // live in the main source block for now.  In future, we can
2153  // generalize this by adding a syntax that specifies the sizes of
2154  // fields in an order, so that the adlc can build the emit functions
2155  // automagically
2156
2157  // Emit primary opcode
2158  enc_class OpcP
2159  %{
2160    emit_opcode(cbuf, $primary);
2161  %}
2162
2163  // Emit secondary opcode
2164  enc_class OpcS
2165  %{
2166    emit_opcode(cbuf, $secondary);
2167  %}
2168
2169  // Emit tertiary opcode
2170  enc_class OpcT
2171  %{
2172    emit_opcode(cbuf, $tertiary);
2173  %}
2174
2175  // Emit opcode directly
2176  enc_class Opcode(immI d8)
2177  %{
2178    emit_opcode(cbuf, $d8$$constant);
2179  %}
2180
2181  // Emit size prefix
2182  enc_class SizePrefix
2183  %{
2184    emit_opcode(cbuf, 0x66);
2185  %}
2186
2187  enc_class reg(rRegI reg)
2188  %{
2189    emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2190  %}
2191
2192  enc_class reg_reg(rRegI dst, rRegI src)
2193  %{
2194    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2195  %}
2196
2197  enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2198  %{
2199    emit_opcode(cbuf, $opcode$$constant);
2200    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2201  %}
2202
2203  enc_class cmpfp_fixup()
2204  %{
2205    // jnp,s exit
2206    emit_opcode(cbuf, 0x7B);
2207    emit_d8(cbuf, 0x0A);
2208
2209    // pushfq
2210    emit_opcode(cbuf, 0x9C);
2211
2212    // andq $0xffffff2b, (%rsp)
2213    emit_opcode(cbuf, Assembler::REX_W);
2214    emit_opcode(cbuf, 0x81);
2215    emit_opcode(cbuf, 0x24);
2216    emit_opcode(cbuf, 0x24);
2217    emit_d32(cbuf, 0xffffff2b);
2218
2219    // popfq
2220    emit_opcode(cbuf, 0x9D);
2221
2222    // nop (target for branch to avoid branch to branch)
2223    emit_opcode(cbuf, 0x90);
2224  %}
2225
2226  enc_class cmpfp3(rRegI dst)
2227  %{
2228    int dstenc = $dst$$reg;
2229
2230    // movl $dst, -1
2231    if (dstenc >= 8) {
2232      emit_opcode(cbuf, Assembler::REX_B);
2233    }
2234    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2235    emit_d32(cbuf, -1);
2236
2237    // jp,s done
2238    emit_opcode(cbuf, 0x7A);
2239    emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2240
2241    // jb,s done
2242    emit_opcode(cbuf, 0x72);
2243    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2244
2245    // setne $dst
2246    if (dstenc >= 4) {
2247      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2248    }
2249    emit_opcode(cbuf, 0x0F);
2250    emit_opcode(cbuf, 0x95);
2251    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2252
2253    // movzbl $dst, $dst
2254    if (dstenc >= 4) {
2255      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2256    }
2257    emit_opcode(cbuf, 0x0F);
2258    emit_opcode(cbuf, 0xB6);
2259    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2260  %}
2261
2262  enc_class cdql_enc(no_rax_rdx_RegI div)
2263  %{
2264    // Full implementation of Java idiv and irem; checks for
2265    // special case as described in JVM spec., p.243 & p.271.
2266    //
2267    //         normal case                           special case
2268    //
2269    // input : rax: dividend                         min_int
2270    //         reg: divisor                          -1
2271    //
2272    // output: rax: quotient  (= rax idiv reg)       min_int
2273    //         rdx: remainder (= rax irem reg)       0
2274    //
2275    //  Code sequnce:
2276    //
2277    //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2278    //    5:   75 07/08                jne    e <normal>
2279    //    7:   33 d2                   xor    %edx,%edx
2280    //  [div >= 8 -> offset + 1]
2281    //  [REX_B]
2282    //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2283    //    c:   74 03/04                je     11 <done>
2284    // 000000000000000e <normal>:
2285    //    e:   99                      cltd
2286    //  [div >= 8 -> offset + 1]
2287    //  [REX_B]
2288    //    f:   f7 f9                   idiv   $div
2289    // 0000000000000011 <done>:
2290
2291    // cmp    $0x80000000,%eax
2292    emit_opcode(cbuf, 0x3d);
2293    emit_d8(cbuf, 0x00);
2294    emit_d8(cbuf, 0x00);
2295    emit_d8(cbuf, 0x00);
2296    emit_d8(cbuf, 0x80);
2297
2298    // jne    e <normal>
2299    emit_opcode(cbuf, 0x75);
2300    emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2301
2302    // xor    %edx,%edx
2303    emit_opcode(cbuf, 0x33);
2304    emit_d8(cbuf, 0xD2);
2305
2306    // cmp    $0xffffffffffffffff,%ecx
2307    if ($div$$reg >= 8) {
2308      emit_opcode(cbuf, Assembler::REX_B);
2309    }
2310    emit_opcode(cbuf, 0x83);
2311    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2312    emit_d8(cbuf, 0xFF);
2313
2314    // je     11 <done>
2315    emit_opcode(cbuf, 0x74);
2316    emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2317
2318    // <normal>
2319    // cltd
2320    emit_opcode(cbuf, 0x99);
2321
2322    // idivl (note: must be emitted by the user of this rule)
2323    // <done>
2324  %}
2325
2326  enc_class cdqq_enc(no_rax_rdx_RegL div)
2327  %{
2328    // Full implementation of Java ldiv and lrem; checks for
2329    // special case as described in JVM spec., p.243 & p.271.
2330    //
2331    //         normal case                           special case
2332    //
2333    // input : rax: dividend                         min_long
2334    //         reg: divisor                          -1
2335    //
2336    // output: rax: quotient  (= rax idiv reg)       min_long
2337    //         rdx: remainder (= rax irem reg)       0
2338    //
2339    //  Code sequnce:
2340    //
2341    //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2342    //    7:   00 00 80
2343    //    a:   48 39 d0                cmp    %rdx,%rax
2344    //    d:   75 08                   jne    17 <normal>
2345    //    f:   33 d2                   xor    %edx,%edx
2346    //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2347    //   15:   74 05                   je     1c <done>
2348    // 0000000000000017 <normal>:
2349    //   17:   48 99                   cqto
2350    //   19:   48 f7 f9                idiv   $div
2351    // 000000000000001c <done>:
2352
2353    // mov    $0x8000000000000000,%rdx
2354    emit_opcode(cbuf, Assembler::REX_W);
2355    emit_opcode(cbuf, 0xBA);
2356    emit_d8(cbuf, 0x00);
2357    emit_d8(cbuf, 0x00);
2358    emit_d8(cbuf, 0x00);
2359    emit_d8(cbuf, 0x00);
2360    emit_d8(cbuf, 0x00);
2361    emit_d8(cbuf, 0x00);
2362    emit_d8(cbuf, 0x00);
2363    emit_d8(cbuf, 0x80);
2364
2365    // cmp    %rdx,%rax
2366    emit_opcode(cbuf, Assembler::REX_W);
2367    emit_opcode(cbuf, 0x39);
2368    emit_d8(cbuf, 0xD0);
2369
2370    // jne    17 <normal>
2371    emit_opcode(cbuf, 0x75);
2372    emit_d8(cbuf, 0x08);
2373
2374    // xor    %edx,%edx
2375    emit_opcode(cbuf, 0x33);
2376    emit_d8(cbuf, 0xD2);
2377
2378    // cmp    $0xffffffffffffffff,$div
2379    emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2380    emit_opcode(cbuf, 0x83);
2381    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2382    emit_d8(cbuf, 0xFF);
2383
2384    // je     1e <done>
2385    emit_opcode(cbuf, 0x74);
2386    emit_d8(cbuf, 0x05);
2387
2388    // <normal>
2389    // cqto
2390    emit_opcode(cbuf, Assembler::REX_W);
2391    emit_opcode(cbuf, 0x99);
2392
2393    // idivq (note: must be emitted by the user of this rule)
2394    // <done>
2395  %}
2396
2397  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2398  enc_class OpcSE(immI imm)
2399  %{
2400    // Emit primary opcode and set sign-extend bit
2401    // Check for 8-bit immediate, and set sign extend bit in opcode
2402    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2403      emit_opcode(cbuf, $primary | 0x02);
2404    } else {
2405      // 32-bit immediate
2406      emit_opcode(cbuf, $primary);
2407    }
2408  %}
2409
2410  enc_class OpcSErm(rRegI dst, immI imm)
2411  %{
2412    // OpcSEr/m
2413    int dstenc = $dst$$reg;
2414    if (dstenc >= 8) {
2415      emit_opcode(cbuf, Assembler::REX_B);
2416      dstenc -= 8;
2417    }
2418    // Emit primary opcode and set sign-extend bit
2419    // Check for 8-bit immediate, and set sign extend bit in opcode
2420    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2421      emit_opcode(cbuf, $primary | 0x02);
2422    } else {
2423      // 32-bit immediate
2424      emit_opcode(cbuf, $primary);
2425    }
2426    // Emit r/m byte with secondary opcode, after primary opcode.
2427    emit_rm(cbuf, 0x3, $secondary, dstenc);
2428  %}
2429
2430  enc_class OpcSErm_wide(rRegL dst, immI imm)
2431  %{
2432    // OpcSEr/m
2433    int dstenc = $dst$$reg;
2434    if (dstenc < 8) {
2435      emit_opcode(cbuf, Assembler::REX_W);
2436    } else {
2437      emit_opcode(cbuf, Assembler::REX_WB);
2438      dstenc -= 8;
2439    }
2440    // Emit primary opcode and set sign-extend bit
2441    // Check for 8-bit immediate, and set sign extend bit in opcode
2442    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2443      emit_opcode(cbuf, $primary | 0x02);
2444    } else {
2445      // 32-bit immediate
2446      emit_opcode(cbuf, $primary);
2447    }
2448    // Emit r/m byte with secondary opcode, after primary opcode.
2449    emit_rm(cbuf, 0x3, $secondary, dstenc);
2450  %}
2451
2452  enc_class Con8or32(immI imm)
2453  %{
2454    // Check for 8-bit immediate, and set sign extend bit in opcode
2455    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2456      $$$emit8$imm$$constant;
2457    } else {
2458      // 32-bit immediate
2459      $$$emit32$imm$$constant;
2460    }
2461  %}
2462
2463  enc_class Lbl(label labl)
2464  %{
2465    // JMP, CALL
2466    Label* l = $labl$$label;
2467    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2468  %}
2469
2470  enc_class LblShort(label labl)
2471  %{
2472    // JMP, CALL
2473    Label* l = $labl$$label;
2474    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2475    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2476    emit_d8(cbuf, disp);
2477  %}
2478
2479  enc_class opc2_reg(rRegI dst)
2480  %{
2481    // BSWAP
2482    emit_cc(cbuf, $secondary, $dst$$reg);
2483  %}
2484
2485  enc_class opc3_reg(rRegI dst)
2486  %{
2487    // BSWAP
2488    emit_cc(cbuf, $tertiary, $dst$$reg);
2489  %}
2490
2491  enc_class reg_opc(rRegI div)
2492  %{
2493    // INC, DEC, IDIV, IMOD, JMP indirect, ...
2494    emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2495  %}
2496
2497  enc_class Jcc(cmpOp cop, label labl)
2498  %{
2499    // JCC
2500    Label* l = $labl$$label;
2501    $$$emit8$primary;
2502    emit_cc(cbuf, $secondary, $cop$$cmpcode);
2503    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2504  %}
2505
2506  enc_class JccShort (cmpOp cop, label labl)
2507  %{
2508  // JCC
2509    Label *l = $labl$$label;
2510    emit_cc(cbuf, $primary, $cop$$cmpcode);
2511    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2512    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2513    emit_d8(cbuf, disp);
2514  %}
2515
2516  enc_class enc_cmov(cmpOp cop)
2517  %{
2518    // CMOV
2519    $$$emit8$primary;
2520    emit_cc(cbuf, $secondary, $cop$$cmpcode);
2521  %}
2522
2523  enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2524  %{
2525    // Invert sense of branch from sense of cmov
2526    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2527    emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2528                  ? (UseXmmRegToRegMoveAll ? 3 : 4)
2529                  : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2530    // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2531    if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2532    if ($dst$$reg < 8) {
2533      if ($src$$reg >= 8) {
2534        emit_opcode(cbuf, Assembler::REX_B);
2535      }
2536    } else {
2537      if ($src$$reg < 8) {
2538        emit_opcode(cbuf, Assembler::REX_R);
2539      } else {
2540        emit_opcode(cbuf, Assembler::REX_RB);
2541      }
2542    }
2543    emit_opcode(cbuf, 0x0F);
2544    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2545    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2546  %}
2547
2548  enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2549  %{
2550    // Invert sense of branch from sense of cmov
2551    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2552    emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2553
2554    //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2555    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2556    if ($dst$$reg < 8) {
2557      if ($src$$reg >= 8) {
2558        emit_opcode(cbuf, Assembler::REX_B);
2559      }
2560    } else {
2561      if ($src$$reg < 8) {
2562        emit_opcode(cbuf, Assembler::REX_R);
2563      } else {
2564        emit_opcode(cbuf, Assembler::REX_RB);
2565      }
2566    }
2567    emit_opcode(cbuf, 0x0F);
2568    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2569    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2570  %}
2571
2572  enc_class enc_PartialSubtypeCheck()
2573  %{
2574    Register Rrdi = as_Register(RDI_enc); // result register
2575    Register Rrax = as_Register(RAX_enc); // super class
2576    Register Rrcx = as_Register(RCX_enc); // killed
2577    Register Rrsi = as_Register(RSI_enc); // sub class
2578    Label miss;
2579    const bool set_cond_codes = true;
2580
2581    MacroAssembler _masm(&cbuf);
2582    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2583                                     NULL, &miss,
2584                                     /*set_cond_codes:*/ true);
2585    if ($primary) {
2586      __ xorptr(Rrdi, Rrdi);
2587    }
2588    __ bind(miss);
2589  %}
2590
2591  enc_class Java_To_Interpreter(method meth)
2592  %{
2593    // CALL Java_To_Interpreter
2594    // This is the instruction starting address for relocation info.
2595    cbuf.set_inst_mark();
2596    $$$emit8$primary;
2597    // CALL directly to the runtime
2598    emit_d32_reloc(cbuf,
2599                   (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2600                   runtime_call_Relocation::spec(),
2601                   RELOC_DISP32);
2602  %}
2603
2604  enc_class Java_Static_Call(method meth)
2605  %{
2606    // JAVA STATIC CALL
2607    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2608    // determine who we intended to call.
2609    cbuf.set_inst_mark();
2610    $$$emit8$primary;
2611
2612    if (!_method) {
2613      emit_d32_reloc(cbuf,
2614                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2615                     runtime_call_Relocation::spec(),
2616                     RELOC_DISP32);
2617    } else if (_optimized_virtual) {
2618      emit_d32_reloc(cbuf,
2619                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2620                     opt_virtual_call_Relocation::spec(),
2621                     RELOC_DISP32);
2622    } else {
2623      emit_d32_reloc(cbuf,
2624                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2625                     static_call_Relocation::spec(),
2626                     RELOC_DISP32);
2627    }
2628    if (_method) {
2629      // Emit stub for static call
2630      emit_java_to_interp(cbuf);
2631    }
2632  %}
2633
2634  enc_class Java_Dynamic_Call(method meth)
2635  %{
2636    // JAVA DYNAMIC CALL
2637    // !!!!!
2638    // Generate  "movq rax, -1", placeholder instruction to load oop-info
2639    // emit_call_dynamic_prologue( cbuf );
2640    cbuf.set_inst_mark();
2641
2642    // movq rax, -1
2643    emit_opcode(cbuf, Assembler::REX_W);
2644    emit_opcode(cbuf, 0xB8 | RAX_enc);
2645    emit_d64_reloc(cbuf,
2646                   (int64_t) Universe::non_oop_word(),
2647                   oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2648    address virtual_call_oop_addr = cbuf.inst_mark();
2649    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2650    // who we intended to call.
2651    cbuf.set_inst_mark();
2652    $$$emit8$primary;
2653    emit_d32_reloc(cbuf,
2654                   (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2655                   virtual_call_Relocation::spec(virtual_call_oop_addr),
2656                   RELOC_DISP32);
2657  %}
2658
2659  enc_class Java_Compiled_Call(method meth)
2660  %{
2661    // JAVA COMPILED CALL
2662    int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2663
2664    // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2665    // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2666
2667    // callq *disp(%rax)
2668    cbuf.set_inst_mark();
2669    $$$emit8$primary;
2670    if (disp < 0x80) {
2671      emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2672      emit_d8(cbuf, disp); // Displacement
2673    } else {
2674      emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2675      emit_d32(cbuf, disp); // Displacement
2676    }
2677  %}
2678
2679  enc_class reg_opc_imm(rRegI dst, immI8 shift)
2680  %{
2681    // SAL, SAR, SHR
2682    int dstenc = $dst$$reg;
2683    if (dstenc >= 8) {
2684      emit_opcode(cbuf, Assembler::REX_B);
2685      dstenc -= 8;
2686    }
2687    $$$emit8$primary;
2688    emit_rm(cbuf, 0x3, $secondary, dstenc);
2689    $$$emit8$shift$$constant;
2690  %}
2691
2692  enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2693  %{
2694    // SAL, SAR, SHR
2695    int dstenc = $dst$$reg;
2696    if (dstenc < 8) {
2697      emit_opcode(cbuf, Assembler::REX_W);
2698    } else {
2699      emit_opcode(cbuf, Assembler::REX_WB);
2700      dstenc -= 8;
2701    }
2702    $$$emit8$primary;
2703    emit_rm(cbuf, 0x3, $secondary, dstenc);
2704    $$$emit8$shift$$constant;
2705  %}
2706
2707  enc_class load_immI(rRegI dst, immI src)
2708  %{
2709    int dstenc = $dst$$reg;
2710    if (dstenc >= 8) {
2711      emit_opcode(cbuf, Assembler::REX_B);
2712      dstenc -= 8;
2713    }
2714    emit_opcode(cbuf, 0xB8 | dstenc);
2715    $$$emit32$src$$constant;
2716  %}
2717
2718  enc_class load_immL(rRegL dst, immL src)
2719  %{
2720    int dstenc = $dst$$reg;
2721    if (dstenc < 8) {
2722      emit_opcode(cbuf, Assembler::REX_W);
2723    } else {
2724      emit_opcode(cbuf, Assembler::REX_WB);
2725      dstenc -= 8;
2726    }
2727    emit_opcode(cbuf, 0xB8 | dstenc);
2728    emit_d64(cbuf, $src$$constant);
2729  %}
2730
2731  enc_class load_immUL32(rRegL dst, immUL32 src)
2732  %{
2733    // same as load_immI, but this time we care about zeroes in the high word
2734    int dstenc = $dst$$reg;
2735    if (dstenc >= 8) {
2736      emit_opcode(cbuf, Assembler::REX_B);
2737      dstenc -= 8;
2738    }
2739    emit_opcode(cbuf, 0xB8 | dstenc);
2740    $$$emit32$src$$constant;
2741  %}
2742
2743  enc_class load_immL32(rRegL dst, immL32 src)
2744  %{
2745    int dstenc = $dst$$reg;
2746    if (dstenc < 8) {
2747      emit_opcode(cbuf, Assembler::REX_W);
2748    } else {
2749      emit_opcode(cbuf, Assembler::REX_WB);
2750      dstenc -= 8;
2751    }
2752    emit_opcode(cbuf, 0xC7);
2753    emit_rm(cbuf, 0x03, 0x00, dstenc);
2754    $$$emit32$src$$constant;
2755  %}
2756
2757  enc_class load_immP31(rRegP dst, immP32 src)
2758  %{
2759    // same as load_immI, but this time we care about zeroes in the high word
2760    int dstenc = $dst$$reg;
2761    if (dstenc >= 8) {
2762      emit_opcode(cbuf, Assembler::REX_B);
2763      dstenc -= 8;
2764    }
2765    emit_opcode(cbuf, 0xB8 | dstenc);
2766    $$$emit32$src$$constant;
2767  %}
2768
2769  enc_class load_immP(rRegP dst, immP src)
2770  %{
2771    int dstenc = $dst$$reg;
2772    if (dstenc < 8) {
2773      emit_opcode(cbuf, Assembler::REX_W);
2774    } else {
2775      emit_opcode(cbuf, Assembler::REX_WB);
2776      dstenc -= 8;
2777    }
2778    emit_opcode(cbuf, 0xB8 | dstenc);
2779    // This next line should be generated from ADLC
2780    if ($src->constant_is_oop()) {
2781      emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2782    } else {
2783      emit_d64(cbuf, $src$$constant);
2784    }
2785  %}
2786
2787  enc_class load_immF(regF dst, immF con)
2788  %{
2789    // XXX reg_mem doesn't support RIP-relative addressing yet
2790    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2791    emit_float_constant(cbuf, $con$$constant);
2792  %}
2793
2794  enc_class load_immD(regD dst, immD con)
2795  %{
2796    // XXX reg_mem doesn't support RIP-relative addressing yet
2797    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2798    emit_double_constant(cbuf, $con$$constant);
2799  %}
2800
2801  enc_class load_conF (regF dst, immF con) %{    // Load float constant
2802    emit_opcode(cbuf, 0xF3);
2803    if ($dst$$reg >= 8) {
2804      emit_opcode(cbuf, Assembler::REX_R);
2805    }
2806    emit_opcode(cbuf, 0x0F);
2807    emit_opcode(cbuf, 0x10);
2808    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2809    emit_float_constant(cbuf, $con$$constant);
2810  %}
2811
2812  enc_class load_conD (regD dst, immD con) %{    // Load double constant
2813    // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2814    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2815    if ($dst$$reg >= 8) {
2816      emit_opcode(cbuf, Assembler::REX_R);
2817    }
2818    emit_opcode(cbuf, 0x0F);
2819    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2820    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2821    emit_double_constant(cbuf, $con$$constant);
2822  %}
2823
2824  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2825  enc_class enc_copy(rRegI dst, rRegI src)
2826  %{
2827    encode_copy(cbuf, $dst$$reg, $src$$reg);
2828  %}
2829
2830  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2831  enc_class enc_CopyXD( RegD dst, RegD src ) %{
2832    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2833  %}
2834
2835  enc_class enc_copy_always(rRegI dst, rRegI src)
2836  %{
2837    int srcenc = $src$$reg;
2838    int dstenc = $dst$$reg;
2839
2840    if (dstenc < 8) {
2841      if (srcenc >= 8) {
2842        emit_opcode(cbuf, Assembler::REX_B);
2843        srcenc -= 8;
2844      }
2845    } else {
2846      if (srcenc < 8) {
2847        emit_opcode(cbuf, Assembler::REX_R);
2848      } else {
2849        emit_opcode(cbuf, Assembler::REX_RB);
2850        srcenc -= 8;
2851      }
2852      dstenc -= 8;
2853    }
2854
2855    emit_opcode(cbuf, 0x8B);
2856    emit_rm(cbuf, 0x3, dstenc, srcenc);
2857  %}
2858
2859  enc_class enc_copy_wide(rRegL dst, rRegL src)
2860  %{
2861    int srcenc = $src$$reg;
2862    int dstenc = $dst$$reg;
2863
2864    if (dstenc != srcenc) {
2865      if (dstenc < 8) {
2866        if (srcenc < 8) {
2867          emit_opcode(cbuf, Assembler::REX_W);
2868        } else {
2869          emit_opcode(cbuf, Assembler::REX_WB);
2870          srcenc -= 8;
2871        }
2872      } else {
2873        if (srcenc < 8) {
2874          emit_opcode(cbuf, Assembler::REX_WR);
2875        } else {
2876          emit_opcode(cbuf, Assembler::REX_WRB);
2877          srcenc -= 8;
2878        }
2879        dstenc -= 8;
2880      }
2881      emit_opcode(cbuf, 0x8B);
2882      emit_rm(cbuf, 0x3, dstenc, srcenc);
2883    }
2884  %}
2885
2886  enc_class Con32(immI src)
2887  %{
2888    // Output immediate
2889    $$$emit32$src$$constant;
2890  %}
2891
2892  enc_class Con64(immL src)
2893  %{
2894    // Output immediate
2895    emit_d64($src$$constant);
2896  %}
2897
2898  enc_class Con32F_as_bits(immF src)
2899  %{
2900    // Output Float immediate bits
2901    jfloat jf = $src$$constant;
2902    jint jf_as_bits = jint_cast(jf);
2903    emit_d32(cbuf, jf_as_bits);
2904  %}
2905
2906  enc_class Con16(immI src)
2907  %{
2908    // Output immediate
2909    $$$emit16$src$$constant;
2910  %}
2911
2912  // How is this different from Con32??? XXX
2913  enc_class Con_d32(immI src)
2914  %{
2915    emit_d32(cbuf,$src$$constant);
2916  %}
2917
2918  enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2919    // Output immediate memory reference
2920    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2921    emit_d32(cbuf, 0x00);
2922  %}
2923
2924  enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2925    MacroAssembler masm(&cbuf);
2926
2927    Register switch_reg = as_Register($switch_val$$reg);
2928    Register dest_reg   = as_Register($dest$$reg);
2929    address table_base  = masm.address_table_constant(_index2label);
2930
2931    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2932    // to do that and the compiler is using that register as one it can allocate.
2933    // So we build it all by hand.
2934    // Address index(noreg, switch_reg, Address::times_1);
2935    // ArrayAddress dispatch(table, index);
2936
2937    Address dispatch(dest_reg, switch_reg, Address::times_1);
2938
2939    masm.lea(dest_reg, InternalAddress(table_base));
2940    masm.jmp(dispatch);
2941  %}
2942
2943  enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2944    MacroAssembler masm(&cbuf);
2945
2946    Register switch_reg = as_Register($switch_val$$reg);
2947    Register dest_reg   = as_Register($dest$$reg);
2948    address table_base  = masm.address_table_constant(_index2label);
2949
2950    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2951    // to do that and the compiler is using that register as one it can allocate.
2952    // So we build it all by hand.
2953    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2954    // ArrayAddress dispatch(table, index);
2955
2956    Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2957
2958    masm.lea(dest_reg, InternalAddress(table_base));
2959    masm.jmp(dispatch);
2960  %}
2961
2962  enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2963    MacroAssembler masm(&cbuf);
2964
2965    Register switch_reg = as_Register($switch_val$$reg);
2966    Register dest_reg   = as_Register($dest$$reg);
2967    address table_base  = masm.address_table_constant(_index2label);
2968
2969    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2970    // to do that and the compiler is using that register as one it can allocate.
2971    // So we build it all by hand.
2972    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2973    // ArrayAddress dispatch(table, index);
2974
2975    Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2976    masm.lea(dest_reg, InternalAddress(table_base));
2977    masm.jmp(dispatch);
2978
2979  %}
2980
2981  enc_class lock_prefix()
2982  %{
2983    if (os::is_MP()) {
2984      emit_opcode(cbuf, 0xF0); // lock
2985    }
2986  %}
2987
2988  enc_class REX_mem(memory mem)
2989  %{
2990    if ($mem$$base >= 8) {
2991      if ($mem$$index < 8) {
2992        emit_opcode(cbuf, Assembler::REX_B);
2993      } else {
2994        emit_opcode(cbuf, Assembler::REX_XB);
2995      }
2996    } else {
2997      if ($mem$$index >= 8) {
2998        emit_opcode(cbuf, Assembler::REX_X);
2999      }
3000    }
3001  %}
3002
3003  enc_class REX_mem_wide(memory mem)
3004  %{
3005    if ($mem$$base >= 8) {
3006      if ($mem$$index < 8) {
3007        emit_opcode(cbuf, Assembler::REX_WB);
3008      } else {
3009        emit_opcode(cbuf, Assembler::REX_WXB);
3010      }
3011    } else {
3012      if ($mem$$index < 8) {
3013        emit_opcode(cbuf, Assembler::REX_W);
3014      } else {
3015        emit_opcode(cbuf, Assembler::REX_WX);
3016      }
3017    }
3018  %}
3019
3020  // for byte regs
3021  enc_class REX_breg(rRegI reg)
3022  %{
3023    if ($reg$$reg >= 4) {
3024      emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3025    }
3026  %}
3027
3028  // for byte regs
3029  enc_class REX_reg_breg(rRegI dst, rRegI src)
3030  %{
3031    if ($dst$$reg < 8) {
3032      if ($src$$reg >= 4) {
3033        emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3034      }
3035    } else {
3036      if ($src$$reg < 8) {
3037        emit_opcode(cbuf, Assembler::REX_R);
3038      } else {
3039        emit_opcode(cbuf, Assembler::REX_RB);
3040      }
3041    }
3042  %}
3043
3044  // for byte regs
3045  enc_class REX_breg_mem(rRegI reg, memory mem)
3046  %{
3047    if ($reg$$reg < 8) {
3048      if ($mem$$base < 8) {
3049        if ($mem$$index >= 8) {
3050          emit_opcode(cbuf, Assembler::REX_X);
3051        } else if ($reg$$reg >= 4) {
3052          emit_opcode(cbuf, Assembler::REX);
3053        }
3054      } else {
3055        if ($mem$$index < 8) {
3056          emit_opcode(cbuf, Assembler::REX_B);
3057        } else {
3058          emit_opcode(cbuf, Assembler::REX_XB);
3059        }
3060      }
3061    } else {
3062      if ($mem$$base < 8) {
3063        if ($mem$$index < 8) {
3064          emit_opcode(cbuf, Assembler::REX_R);
3065        } else {
3066          emit_opcode(cbuf, Assembler::REX_RX);
3067        }
3068      } else {
3069        if ($mem$$index < 8) {
3070          emit_opcode(cbuf, Assembler::REX_RB);
3071        } else {
3072          emit_opcode(cbuf, Assembler::REX_RXB);
3073        }
3074      }
3075    }
3076  %}
3077
3078  enc_class REX_reg(rRegI reg)
3079  %{
3080    if ($reg$$reg >= 8) {
3081      emit_opcode(cbuf, Assembler::REX_B);
3082    }
3083  %}
3084
3085  enc_class REX_reg_wide(rRegI reg)
3086  %{
3087    if ($reg$$reg < 8) {
3088      emit_opcode(cbuf, Assembler::REX_W);
3089    } else {
3090      emit_opcode(cbuf, Assembler::REX_WB);
3091    }
3092  %}
3093
3094  enc_class REX_reg_reg(rRegI dst, rRegI src)
3095  %{
3096    if ($dst$$reg < 8) {
3097      if ($src$$reg >= 8) {
3098        emit_opcode(cbuf, Assembler::REX_B);
3099      }
3100    } else {
3101      if ($src$$reg < 8) {
3102        emit_opcode(cbuf, Assembler::REX_R);
3103      } else {
3104        emit_opcode(cbuf, Assembler::REX_RB);
3105      }
3106    }
3107  %}
3108
3109  enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3110  %{
3111    if ($dst$$reg < 8) {
3112      if ($src$$reg < 8) {
3113        emit_opcode(cbuf, Assembler::REX_W);
3114      } else {
3115        emit_opcode(cbuf, Assembler::REX_WB);
3116      }
3117    } else {
3118      if ($src$$reg < 8) {
3119        emit_opcode(cbuf, Assembler::REX_WR);
3120      } else {
3121        emit_opcode(cbuf, Assembler::REX_WRB);
3122      }
3123    }
3124  %}
3125
3126  enc_class REX_reg_mem(rRegI reg, memory mem)
3127  %{
3128    if ($reg$$reg < 8) {
3129      if ($mem$$base < 8) {
3130        if ($mem$$index >= 8) {
3131          emit_opcode(cbuf, Assembler::REX_X);
3132        }
3133      } else {
3134        if ($mem$$index < 8) {
3135          emit_opcode(cbuf, Assembler::REX_B);
3136        } else {
3137          emit_opcode(cbuf, Assembler::REX_XB);
3138        }
3139      }
3140    } else {
3141      if ($mem$$base < 8) {
3142        if ($mem$$index < 8) {
3143          emit_opcode(cbuf, Assembler::REX_R);
3144        } else {
3145          emit_opcode(cbuf, Assembler::REX_RX);
3146        }
3147      } else {
3148        if ($mem$$index < 8) {
3149          emit_opcode(cbuf, Assembler::REX_RB);
3150        } else {
3151          emit_opcode(cbuf, Assembler::REX_RXB);
3152        }
3153      }
3154    }
3155  %}
3156
3157  enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3158  %{
3159    if ($reg$$reg < 8) {
3160      if ($mem$$base < 8) {
3161        if ($mem$$index < 8) {
3162          emit_opcode(cbuf, Assembler::REX_W);
3163        } else {
3164          emit_opcode(cbuf, Assembler::REX_WX);
3165        }
3166      } else {
3167        if ($mem$$index < 8) {
3168          emit_opcode(cbuf, Assembler::REX_WB);
3169        } else {
3170          emit_opcode(cbuf, Assembler::REX_WXB);
3171        }
3172      }
3173    } else {
3174      if ($mem$$base < 8) {
3175        if ($mem$$index < 8) {
3176          emit_opcode(cbuf, Assembler::REX_WR);
3177        } else {
3178          emit_opcode(cbuf, Assembler::REX_WRX);
3179        }
3180      } else {
3181        if ($mem$$index < 8) {
3182          emit_opcode(cbuf, Assembler::REX_WRB);
3183        } else {
3184          emit_opcode(cbuf, Assembler::REX_WRXB);
3185        }
3186      }
3187    }
3188  %}
3189
3190  enc_class reg_mem(rRegI ereg, memory mem)
3191  %{
3192    // High registers handle in encode_RegMem
3193    int reg = $ereg$$reg;
3194    int base = $mem$$base;
3195    int index = $mem$$index;
3196    int scale = $mem$$scale;
3197    int disp = $mem$$disp;
3198    bool disp_is_oop = $mem->disp_is_oop();
3199
3200    encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3201  %}
3202
3203  enc_class RM_opc_mem(immI rm_opcode, memory mem)
3204  %{
3205    int rm_byte_opcode = $rm_opcode$$constant;
3206
3207    // High registers handle in encode_RegMem
3208    int base = $mem$$base;
3209    int index = $mem$$index;
3210    int scale = $mem$$scale;
3211    int displace = $mem$$disp;
3212
3213    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3214                                            // working with static
3215                                            // globals
3216    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3217                  disp_is_oop);
3218  %}
3219
3220  enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3221  %{
3222    int reg_encoding = $dst$$reg;
3223    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3224    int index        = 0x04;            // 0x04 indicates no index
3225    int scale        = 0x00;            // 0x00 indicates no scale
3226    int displace     = $src1$$constant; // 0x00 indicates no displacement
3227    bool disp_is_oop = false;
3228    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3229                  disp_is_oop);
3230  %}
3231
3232  enc_class neg_reg(rRegI dst)
3233  %{
3234    int dstenc = $dst$$reg;
3235    if (dstenc >= 8) {
3236      emit_opcode(cbuf, Assembler::REX_B);
3237      dstenc -= 8;
3238    }
3239    // NEG $dst
3240    emit_opcode(cbuf, 0xF7);
3241    emit_rm(cbuf, 0x3, 0x03, dstenc);
3242  %}
3243
3244  enc_class neg_reg_wide(rRegI dst)
3245  %{
3246    int dstenc = $dst$$reg;
3247    if (dstenc < 8) {
3248      emit_opcode(cbuf, Assembler::REX_W);
3249    } else {
3250      emit_opcode(cbuf, Assembler::REX_WB);
3251      dstenc -= 8;
3252    }
3253    // NEG $dst
3254    emit_opcode(cbuf, 0xF7);
3255    emit_rm(cbuf, 0x3, 0x03, dstenc);
3256  %}
3257
3258  enc_class setLT_reg(rRegI dst)
3259  %{
3260    int dstenc = $dst$$reg;
3261    if (dstenc >= 8) {
3262      emit_opcode(cbuf, Assembler::REX_B);
3263      dstenc -= 8;
3264    } else if (dstenc >= 4) {
3265      emit_opcode(cbuf, Assembler::REX);
3266    }
3267    // SETLT $dst
3268    emit_opcode(cbuf, 0x0F);
3269    emit_opcode(cbuf, 0x9C);
3270    emit_rm(cbuf, 0x3, 0x0, dstenc);
3271  %}
3272
3273  enc_class setNZ_reg(rRegI dst)
3274  %{
3275    int dstenc = $dst$$reg;
3276    if (dstenc >= 8) {
3277      emit_opcode(cbuf, Assembler::REX_B);
3278      dstenc -= 8;
3279    } else if (dstenc >= 4) {
3280      emit_opcode(cbuf, Assembler::REX);
3281    }
3282    // SETNZ $dst
3283    emit_opcode(cbuf, 0x0F);
3284    emit_opcode(cbuf, 0x95);
3285    emit_rm(cbuf, 0x3, 0x0, dstenc);
3286  %}
3287
3288  enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3289                       rcx_RegI tmp)
3290  %{
3291    // cadd_cmpLT
3292
3293    int tmpReg = $tmp$$reg;
3294
3295    int penc = $p$$reg;
3296    int qenc = $q$$reg;
3297    int yenc = $y$$reg;
3298
3299    // subl $p,$q
3300    if (penc < 8) {
3301      if (qenc >= 8) {
3302        emit_opcode(cbuf, Assembler::REX_B);
3303      }
3304    } else {
3305      if (qenc < 8) {
3306        emit_opcode(cbuf, Assembler::REX_R);
3307      } else {
3308        emit_opcode(cbuf, Assembler::REX_RB);
3309      }
3310    }
3311    emit_opcode(cbuf, 0x2B);
3312    emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3313
3314    // sbbl $tmp, $tmp
3315    emit_opcode(cbuf, 0x1B);
3316    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3317
3318    // andl $tmp, $y
3319    if (yenc >= 8) {
3320      emit_opcode(cbuf, Assembler::REX_B);
3321    }
3322    emit_opcode(cbuf, 0x23);
3323    emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3324
3325    // addl $p,$tmp
3326    if (penc >= 8) {
3327        emit_opcode(cbuf, Assembler::REX_R);
3328    }
3329    emit_opcode(cbuf, 0x03);
3330    emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3331  %}
3332
3333  // Compare the lonogs and set -1, 0, or 1 into dst
3334  enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3335  %{
3336    int src1enc = $src1$$reg;
3337    int src2enc = $src2$$reg;
3338    int dstenc = $dst$$reg;
3339
3340    // cmpq $src1, $src2
3341    if (src1enc < 8) {
3342      if (src2enc < 8) {
3343        emit_opcode(cbuf, Assembler::REX_W);
3344      } else {
3345        emit_opcode(cbuf, Assembler::REX_WB);
3346      }
3347    } else {
3348      if (src2enc < 8) {
3349        emit_opcode(cbuf, Assembler::REX_WR);
3350      } else {
3351        emit_opcode(cbuf, Assembler::REX_WRB);
3352      }
3353    }
3354    emit_opcode(cbuf, 0x3B);
3355    emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3356
3357    // movl $dst, -1
3358    if (dstenc >= 8) {
3359      emit_opcode(cbuf, Assembler::REX_B);
3360    }
3361    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3362    emit_d32(cbuf, -1);
3363
3364    // jl,s done
3365    emit_opcode(cbuf, 0x7C);
3366    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3367
3368    // setne $dst
3369    if (dstenc >= 4) {
3370      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3371    }
3372    emit_opcode(cbuf, 0x0F);
3373    emit_opcode(cbuf, 0x95);
3374    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3375
3376    // movzbl $dst, $dst
3377    if (dstenc >= 4) {
3378      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3379    }
3380    emit_opcode(cbuf, 0x0F);
3381    emit_opcode(cbuf, 0xB6);
3382    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3383  %}
3384
3385  enc_class Push_ResultXD(regD dst) %{
3386    int dstenc = $dst$$reg;
3387
3388    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3389
3390    // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3391    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3392    if (dstenc >= 8) {
3393      emit_opcode(cbuf, Assembler::REX_R);
3394    }
3395    emit_opcode  (cbuf, 0x0F );
3396    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3397    encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3398
3399    // add rsp,8
3400    emit_opcode(cbuf, Assembler::REX_W);
3401    emit_opcode(cbuf,0x83);
3402    emit_rm(cbuf,0x3, 0x0, RSP_enc);
3403    emit_d8(cbuf,0x08);
3404  %}
3405
3406  enc_class Push_SrcXD(regD src) %{
3407    int srcenc = $src$$reg;
3408
3409    // subq rsp,#8
3410    emit_opcode(cbuf, Assembler::REX_W);
3411    emit_opcode(cbuf, 0x83);
3412    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3413    emit_d8(cbuf, 0x8);
3414
3415    // movsd [rsp],src
3416    emit_opcode(cbuf, 0xF2);
3417    if (srcenc >= 8) {
3418      emit_opcode(cbuf, Assembler::REX_R);
3419    }
3420    emit_opcode(cbuf, 0x0F);
3421    emit_opcode(cbuf, 0x11);
3422    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3423
3424    // fldd [rsp]
3425    emit_opcode(cbuf, 0x66);
3426    emit_opcode(cbuf, 0xDD);
3427    encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3428  %}
3429
3430
3431  enc_class movq_ld(regD dst, memory mem) %{
3432    MacroAssembler _masm(&cbuf);
3433    __ movq($dst$$XMMRegister, $mem$$Address);
3434  %}
3435
3436  enc_class movq_st(memory mem, regD src) %{
3437    MacroAssembler _masm(&cbuf);
3438    __ movq($mem$$Address, $src$$XMMRegister);
3439  %}
3440
3441  enc_class pshufd_8x8(regF dst, regF src) %{
3442    MacroAssembler _masm(&cbuf);
3443
3444    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3445    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3446    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3447  %}
3448
3449  enc_class pshufd_4x16(regF dst, regF src) %{
3450    MacroAssembler _masm(&cbuf);
3451
3452    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3453  %}
3454
3455  enc_class pshufd(regD dst, regD src, int mode) %{
3456    MacroAssembler _masm(&cbuf);
3457
3458    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3459  %}
3460
3461  enc_class pxor(regD dst, regD src) %{
3462    MacroAssembler _masm(&cbuf);
3463
3464    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3465  %}
3466
3467  enc_class mov_i2x(regD dst, rRegI src) %{
3468    MacroAssembler _masm(&cbuf);
3469
3470    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3471  %}
3472
3473  // obj: object to lock
3474  // box: box address (header location) -- killed
3475  // tmp: rax -- killed
3476  // scr: rbx -- killed
3477  //
3478  // What follows is a direct transliteration of fast_lock() and fast_unlock()
3479  // from i486.ad.  See that file for comments.
3480  // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3481  // use the shorter encoding.  (Movl clears the high-order 32-bits).
3482
3483
3484  enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3485  %{
3486    Register objReg = as_Register((int)$obj$$reg);
3487    Register boxReg = as_Register((int)$box$$reg);
3488    Register tmpReg = as_Register($tmp$$reg);
3489    Register scrReg = as_Register($scr$$reg);
3490    MacroAssembler masm(&cbuf);
3491
3492    // Verify uniqueness of register assignments -- necessary but not sufficient
3493    assert (objReg != boxReg && objReg != tmpReg &&
3494            objReg != scrReg && tmpReg != scrReg, "invariant") ;
3495
3496    if (_counters != NULL) {
3497      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3498    }
3499    if (EmitSync & 1) {
3500        // Without cast to int32_t a movptr will destroy r10 which is typically obj
3501        masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3502        masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3503    } else
3504    if (EmitSync & 2) {
3505        Label DONE_LABEL;
3506        if (UseBiasedLocking) {
3507           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3508          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3509        }
3510        // QQQ was movl...
3511        masm.movptr(tmpReg, 0x1);
3512        masm.orptr(tmpReg, Address(objReg, 0));
3513        masm.movptr(Address(boxReg, 0), tmpReg);
3514        if (os::is_MP()) {
3515          masm.lock();
3516        }
3517        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3518        masm.jcc(Assembler::equal, DONE_LABEL);
3519
3520        // Recursive locking
3521        masm.subptr(tmpReg, rsp);
3522        masm.andptr(tmpReg, 7 - os::vm_page_size());
3523        masm.movptr(Address(boxReg, 0), tmpReg);
3524
3525        masm.bind(DONE_LABEL);
3526        masm.nop(); // avoid branch to branch
3527    } else {
3528        Label DONE_LABEL, IsInflated, Egress;
3529
3530        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3531        masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3532        masm.jcc   (Assembler::notZero, IsInflated) ; 
3533         
3534        // it's stack-locked, biased or neutral
3535        // TODO: optimize markword triage order to reduce the number of
3536        // conditional branches in the most common cases.
3537        // Beware -- there's a subtle invariant that fetch of the markword
3538        // at [FETCH], below, will never observe a biased encoding (*101b).
3539        // If this invariant is not held we'll suffer exclusion (safety) failure.
3540
3541        if (UseBiasedLocking && !UseOptoBiasInlining) {
3542          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3543          masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3544        }
3545
3546        // was q will it destroy high?
3547        masm.orl   (tmpReg, 1) ; 
3548        masm.movptr(Address(boxReg, 0), tmpReg) ;  
3549        if (os::is_MP()) { masm.lock(); } 
3550        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3551        if (_counters != NULL) {
3552           masm.cond_inc32(Assembler::equal,
3553                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3554        }
3555        masm.jcc   (Assembler::equal, DONE_LABEL);
3556
3557        // Recursive locking
3558        masm.subptr(tmpReg, rsp);
3559        masm.andptr(tmpReg, 7 - os::vm_page_size());
3560        masm.movptr(Address(boxReg, 0), tmpReg);
3561        if (_counters != NULL) {
3562           masm.cond_inc32(Assembler::equal,
3563                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3564        }
3565        masm.jmp   (DONE_LABEL) ;
3566
3567        masm.bind  (IsInflated) ;
3568        // It's inflated
3569
3570        // TODO: someday avoid the ST-before-CAS penalty by
3571        // relocating (deferring) the following ST.
3572        // We should also think about trying a CAS without having
3573        // fetched _owner.  If the CAS is successful we may
3574        // avoid an RTO->RTS upgrade on the $line.
3575        // Without cast to int32_t a movptr will destroy r10 which is typically obj
3576        masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3577
3578        masm.mov    (boxReg, tmpReg) ; 
3579        masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3580        masm.testptr(tmpReg, tmpReg) ;   
3581        masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3582
3583        // It's inflated and appears unlocked
3584        if (os::is_MP()) { masm.lock(); } 
3585        masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3586        // Intentional fall-through into DONE_LABEL ...
3587
3588        masm.bind  (DONE_LABEL) ;
3589        masm.nop   () ;                 // avoid jmp to jmp
3590    }
3591  %}
3592
3593  // obj: object to unlock
3594  // box: box address (displaced header location), killed
3595  // RBX: killed tmp; cannot be obj nor box
3596  enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3597  %{
3598
3599    Register objReg = as_Register($obj$$reg);
3600    Register boxReg = as_Register($box$$reg);
3601    Register tmpReg = as_Register($tmp$$reg);
3602    MacroAssembler masm(&cbuf);
3603
3604    if (EmitSync & 4) { 
3605       masm.cmpptr(rsp, 0) ; 
3606    } else
3607    if (EmitSync & 8) {
3608       Label DONE_LABEL;
3609       if (UseBiasedLocking) {
3610         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3611       }
3612
3613       // Check whether the displaced header is 0
3614       //(=> recursive unlock)
3615       masm.movptr(tmpReg, Address(boxReg, 0));
3616       masm.testptr(tmpReg, tmpReg);
3617       masm.jcc(Assembler::zero, DONE_LABEL);
3618
3619       // If not recursive lock, reset the header to displaced header
3620       if (os::is_MP()) {
3621         masm.lock();
3622       }
3623       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3624       masm.bind(DONE_LABEL);
3625       masm.nop(); // avoid branch to branch
3626    } else {
3627       Label DONE_LABEL, Stacked, CheckSucc ;
3628
3629       if (UseBiasedLocking && !UseOptoBiasInlining) {
3630         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3631       }
3632        
3633       masm.movptr(tmpReg, Address(objReg, 0)) ; 
3634       masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3635       masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3636       masm.testl (tmpReg, 0x02) ; 
3637       masm.jcc   (Assembler::zero, Stacked) ; 
3638        
3639       // It's inflated
3640       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3641       masm.xorptr(boxReg, r15_thread) ; 
3642       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3643       masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3644       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3645       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3646       masm.jcc   (Assembler::notZero, CheckSucc) ; 
3647       masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3648       masm.jmp   (DONE_LABEL) ; 
3649        
3650       if ((EmitSync & 65536) == 0) { 
3651         Label LSuccess, LGoSlowPath ;
3652         masm.bind  (CheckSucc) ;
3653         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3654         masm.jcc   (Assembler::zero, LGoSlowPath) ;
3655
3656         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3657         // the explicit ST;MEMBAR combination, but masm doesn't currently support
3658         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3659         // are all faster when the write buffer is populated.
3660         masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3661         if (os::is_MP()) {
3662            masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3663         }
3664         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3665         masm.jcc   (Assembler::notZero, LSuccess) ;
3666
3667         masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3668         if (os::is_MP()) { masm.lock(); }
3669         masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3670         masm.jcc   (Assembler::notEqual, LSuccess) ;
3671         // Intentional fall-through into slow-path
3672
3673         masm.bind  (LGoSlowPath) ;
3674         masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3675         masm.jmp   (DONE_LABEL) ;
3676
3677         masm.bind  (LSuccess) ;
3678         masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3679         masm.jmp   (DONE_LABEL) ;
3680       }
3681
3682       masm.bind  (Stacked) ; 
3683       masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3684       if (os::is_MP()) { masm.lock(); } 
3685       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3686
3687       if (EmitSync & 65536) {
3688          masm.bind (CheckSucc) ;
3689       }
3690       masm.bind(DONE_LABEL);
3691       if (EmitSync & 32768) {
3692          masm.nop();                      // avoid branch to branch
3693       }
3694    }
3695  %}
3696
3697  enc_class enc_String_Compare()
3698  %{
3699    Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3700          POP_LABEL, DONE_LABEL, CONT_LABEL,
3701          WHILE_HEAD_LABEL;
3702    MacroAssembler masm(&cbuf);
3703
3704    // Get the first character position in both strings
3705    //         [8] char array, [12] offset, [16] count
3706    int value_offset  = java_lang_String::value_offset_in_bytes();
3707    int offset_offset = java_lang_String::offset_offset_in_bytes();
3708    int count_offset  = java_lang_String::count_offset_in_bytes();
3709    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3710
3711    masm.load_heap_oop(rax, Address(rsi, value_offset));
3712    masm.movl(rcx, Address(rsi, offset_offset));
3713    masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
3714    masm.load_heap_oop(rbx, Address(rdi, value_offset));
3715    masm.movl(rcx, Address(rdi, offset_offset));
3716    masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3717
3718    // Compute the minimum of the string lengths(rsi) and the
3719    // difference of the string lengths (stack)
3720
3721    masm.movl(rdi, Address(rdi, count_offset));
3722    masm.movl(rsi, Address(rsi, count_offset));
3723    masm.movl(rcx, rdi);
3724    masm.subl(rdi, rsi);
3725    masm.push(rdi);
3726    masm.cmov(Assembler::lessEqual, rsi, rcx);
3727
3728    // Is the minimum length zero?
3729    masm.bind(RCX_GOOD_LABEL);
3730    masm.testl(rsi, rsi);
3731    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3732
3733    // Load first characters
3734    masm.load_unsigned_short(rcx, Address(rbx, 0));
3735    masm.load_unsigned_short(rdi, Address(rax, 0));
3736
3737    // Compare first characters
3738    masm.subl(rcx, rdi);
3739    masm.jcc(Assembler::notZero,  POP_LABEL);
3740    masm.decrementl(rsi);
3741    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3742
3743    {
3744      // Check after comparing first character to see if strings are equivalent
3745      Label LSkip2;
3746      // Check if the strings start at same location
3747      masm.cmpptr(rbx, rax);
3748      masm.jcc(Assembler::notEqual, LSkip2);
3749
3750      // Check if the length difference is zero (from stack)
3751      masm.cmpl(Address(rsp, 0), 0x0);
3752      masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3753
3754      // Strings might not be equivalent
3755      masm.bind(LSkip2);
3756    }
3757
3758    // Shift RAX and RBX to the end of the arrays, negate min
3759    masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
3760    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
3761    masm.negptr(rsi);
3762
3763    // Compare the rest of the characters
3764    masm.bind(WHILE_HEAD_LABEL);
3765    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
3766    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
3767    masm.subl(rcx, rdi);
3768    masm.jcc(Assembler::notZero, POP_LABEL);
3769    masm.increment(rsi);
3770    masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3771
3772    // Strings are equal up to min length.  Return the length difference.
3773    masm.bind(LENGTH_DIFF_LABEL);
3774    masm.pop(rcx);
3775    masm.jmp(DONE_LABEL);
3776
3777    // Discard the stored length difference
3778    masm.bind(POP_LABEL);
3779    masm.addptr(rsp, 8);
3780       
3781    // That's it
3782    masm.bind(DONE_LABEL);
3783  %}
3784
3785  enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{
3786    Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
3787    MacroAssembler masm(&cbuf);
3788
3789    Register ary1Reg   = as_Register($ary1$$reg);
3790    Register ary2Reg   = as_Register($ary2$$reg);
3791    Register tmp1Reg   = as_Register($tmp1$$reg);
3792    Register tmp2Reg   = as_Register($tmp2$$reg);
3793    Register resultReg = as_Register($result$$reg);
3794
3795    int length_offset  = arrayOopDesc::length_offset_in_bytes();
3796    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3797
3798    // Check the input args
3799    masm.cmpq(ary1Reg, ary2Reg);                        
3800    masm.jcc(Assembler::equal, TRUE_LABEL);
3801    masm.testq(ary1Reg, ary1Reg);                       
3802    masm.jcc(Assembler::zero, FALSE_LABEL);
3803    masm.testq(ary2Reg, ary2Reg);                       
3804    masm.jcc(Assembler::zero, FALSE_LABEL);
3805
3806    // Check the lengths
3807    masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
3808    masm.movl(resultReg, Address(ary2Reg, length_offset));
3809    masm.cmpl(tmp2Reg, resultReg);
3810    masm.jcc(Assembler::notEqual, FALSE_LABEL);
3811    masm.testl(resultReg, resultReg);
3812    masm.jcc(Assembler::zero, TRUE_LABEL);
3813
3814    // Get the number of 4 byte vectors to compare
3815    masm.shrl(resultReg, 1);
3816
3817    // Check for odd-length arrays
3818    masm.andl(tmp2Reg, 1);
3819    masm.testl(tmp2Reg, tmp2Reg);
3820    masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
3821
3822    // Compare 2-byte "tail" at end of arrays
3823    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3824    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3825    masm.cmpl(tmp1Reg, tmp2Reg);
3826    masm.jcc(Assembler::notEqual, FALSE_LABEL);
3827    masm.testl(resultReg, resultReg);
3828    masm.jcc(Assembler::zero, TRUE_LABEL);
3829
3830    // Setup compare loop
3831    masm.bind(COMPARE_LOOP_HDR);
3832    // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
3833    masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3834    masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3835    masm.negq(resultReg);
3836
3837    // 4-byte-wide compare loop
3838    masm.bind(COMPARE_LOOP);
3839    masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
3840    masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
3841    masm.cmpl(ary1Reg, ary2Reg);
3842    masm.jcc(Assembler::notEqual, FALSE_LABEL);
3843    masm.incrementq(resultReg);
3844    masm.jcc(Assembler::notZero, COMPARE_LOOP);
3845
3846    masm.bind(TRUE_LABEL);
3847    masm.movl(resultReg, 1);   // return true
3848    masm.jmp(DONE_LABEL);
3849
3850    masm.bind(FALSE_LABEL);
3851    masm.xorl(resultReg, resultReg); // return false
3852
3853    // That's it
3854    masm.bind(DONE_LABEL);
3855  %}
3856
3857  enc_class enc_rethrow()
3858  %{
3859    cbuf.set_inst_mark();
3860    emit_opcode(cbuf, 0xE9); // jmp entry
3861    emit_d32_reloc(cbuf,
3862                   (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3863                   runtime_call_Relocation::spec(),
3864                   RELOC_DISP32);
3865  %}
3866
3867  enc_class absF_encoding(regF dst)
3868  %{
3869    int dstenc = $dst$$reg;
3870    address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3871
3872    cbuf.set_inst_mark();
3873    if (dstenc >= 8) {
3874      emit_opcode(cbuf, Assembler::REX_R);
3875      dstenc -= 8;
3876    }
3877    // XXX reg_mem doesn't support RIP-relative addressing yet
3878    emit_opcode(cbuf, 0x0F);
3879    emit_opcode(cbuf, 0x54);
3880    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3881    emit_d32_reloc(cbuf, signmask_address);
3882  %}
3883
3884  enc_class absD_encoding(regD dst)
3885  %{
3886    int dstenc = $dst$$reg;
3887    address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3888
3889    cbuf.set_inst_mark();
3890    emit_opcode(cbuf, 0x66);
3891    if (dstenc >= 8) {
3892      emit_opcode(cbuf, Assembler::REX_R);
3893      dstenc -= 8;
3894    }
3895    // XXX reg_mem doesn't support RIP-relative addressing yet
3896    emit_opcode(cbuf, 0x0F);
3897    emit_opcode(cbuf, 0x54);
3898    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3899    emit_d32_reloc(cbuf, signmask_address);
3900  %}
3901
3902  enc_class negF_encoding(regF dst)
3903  %{
3904    int dstenc = $dst$$reg;
3905    address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3906
3907    cbuf.set_inst_mark();
3908    if (dstenc >= 8) {
3909      emit_opcode(cbuf, Assembler::REX_R);
3910      dstenc -= 8;
3911    }
3912    // XXX reg_mem doesn't support RIP-relative addressing yet
3913    emit_opcode(cbuf, 0x0F);
3914    emit_opcode(cbuf, 0x57);
3915    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3916    emit_d32_reloc(cbuf, signflip_address);
3917  %}
3918
3919  enc_class negD_encoding(regD dst)
3920  %{
3921    int dstenc = $dst$$reg;
3922    address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3923
3924    cbuf.set_inst_mark();
3925    emit_opcode(cbuf, 0x66);
3926    if (dstenc >= 8) {
3927      emit_opcode(cbuf, Assembler::REX_R);
3928      dstenc -= 8;
3929    }
3930    // XXX reg_mem doesn't support RIP-relative addressing yet
3931    emit_opcode(cbuf, 0x0F);
3932    emit_opcode(cbuf, 0x57);
3933    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3934    emit_d32_reloc(cbuf, signflip_address);
3935  %}
3936
3937  enc_class f2i_fixup(rRegI dst, regF src)
3938  %{
3939    int dstenc = $dst$$reg;
3940    int srcenc = $src$$reg;
3941
3942    // cmpl $dst, #0x80000000
3943    if (dstenc >= 8) {
3944      emit_opcode(cbuf, Assembler::REX_B);
3945    }
3946    emit_opcode(cbuf, 0x81);
3947    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3948    emit_d32(cbuf, 0x80000000);
3949
3950    // jne,s done
3951    emit_opcode(cbuf, 0x75);
3952    if (srcenc < 8 && dstenc < 8) {
3953      emit_d8(cbuf, 0xF);
3954    } else if (srcenc >= 8 && dstenc >= 8) {
3955      emit_d8(cbuf, 0x11);
3956    } else {
3957      emit_d8(cbuf, 0x10);
3958    }
3959
3960    // subq rsp, #8
3961    emit_opcode(cbuf, Assembler::REX_W);
3962    emit_opcode(cbuf, 0x83);
3963    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3964    emit_d8(cbuf, 8);
3965
3966    // movss [rsp], $src
3967    emit_opcode(cbuf, 0xF3);
3968    if (srcenc >= 8) {
3969      emit_opcode(cbuf, Assembler::REX_R);
3970    }
3971    emit_opcode(cbuf, 0x0F);
3972    emit_opcode(cbuf, 0x11);
3973    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3974
3975    // call f2i_fixup
3976    cbuf.set_inst_mark();
3977    emit_opcode(cbuf, 0xE8);
3978    emit_d32_reloc(cbuf,
3979                   (int)
3980                   (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
3981                   runtime_call_Relocation::spec(),
3982                   RELOC_DISP32);
3983
3984    // popq $dst
3985    if (dstenc >= 8) {
3986      emit_opcode(cbuf, Assembler::REX_B);
3987    }
3988    emit_opcode(cbuf, 0x58 | (dstenc & 7));
3989
3990    // done:
3991  %}
3992
3993  enc_class f2l_fixup(rRegL dst, regF src)
3994  %{
3995    int dstenc = $dst$$reg;
3996    int srcenc = $src$$reg;
3997    address const_address = (address) StubRoutines::x86::double_sign_flip();
3998
3999    // cmpq $dst, [0x8000000000000000]
4000    cbuf.set_inst_mark();
4001    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4002    emit_opcode(cbuf, 0x39);
4003    // XXX reg_mem doesn't support RIP-relative addressing yet
4004    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4005    emit_d32_reloc(cbuf, const_address);
4006
4007
4008    // jne,s done
4009    emit_opcode(cbuf, 0x75);
4010    if (srcenc < 8 && dstenc < 8) {
4011      emit_d8(cbuf, 0xF);
4012    } else if (srcenc >= 8 && dstenc >= 8) {
4013      emit_d8(cbuf, 0x11);
4014    } else {
4015      emit_d8(cbuf, 0x10);
4016    }
4017
4018    // subq rsp, #8
4019    emit_opcode(cbuf, Assembler::REX_W);
4020    emit_opcode(cbuf, 0x83);
4021    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4022    emit_d8(cbuf, 8);
4023
4024    // movss [rsp], $src
4025    emit_opcode(cbuf, 0xF3);
4026    if (srcenc >= 8) {
4027      emit_opcode(cbuf, Assembler::REX_R);
4028    }
4029    emit_opcode(cbuf, 0x0F);
4030    emit_opcode(cbuf, 0x11);
4031    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4032
4033    // call f2l_fixup
4034    cbuf.set_inst_mark();
4035    emit_opcode(cbuf, 0xE8);
4036    emit_d32_reloc(cbuf,
4037                   (int)
4038                   (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
4039                   runtime_call_Relocation::spec(),
4040                   RELOC_DISP32);
4041
4042    // popq $dst
4043    if (dstenc >= 8) {
4044      emit_opcode(cbuf, Assembler::REX_B);
4045    }
4046    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4047
4048    // done:
4049  %}
4050
4051  enc_class d2i_fixup(rRegI dst, regD src)
4052  %{
4053    int dstenc = $dst$$reg;
4054    int srcenc = $src$$reg;
4055
4056    // cmpl $dst, #0x80000000
4057    if (dstenc >= 8) {
4058      emit_opcode(cbuf, Assembler::REX_B);
4059    }
4060    emit_opcode(cbuf, 0x81);
4061    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4062    emit_d32(cbuf, 0x80000000);
4063
4064    // jne,s done
4065    emit_opcode(cbuf, 0x75);
4066    if (srcenc < 8 && dstenc < 8) {
4067      emit_d8(cbuf, 0xF);
4068    } else if (srcenc >= 8 && dstenc >= 8) {
4069      emit_d8(cbuf, 0x11);
4070    } else {
4071      emit_d8(cbuf, 0x10);
4072    }
4073
4074    // subq rsp, #8
4075    emit_opcode(cbuf, Assembler::REX_W);
4076    emit_opcode(cbuf, 0x83);
4077    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4078    emit_d8(cbuf, 8);
4079
4080    // movsd [rsp], $src
4081    emit_opcode(cbuf, 0xF2);
4082    if (srcenc >= 8) {
4083      emit_opcode(cbuf, Assembler::REX_R);
4084    }
4085    emit_opcode(cbuf, 0x0F);
4086    emit_opcode(cbuf, 0x11);
4087    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4088
4089    // call d2i_fixup
4090    cbuf.set_inst_mark();
4091    emit_opcode(cbuf, 0xE8);
4092    emit_d32_reloc(cbuf,
4093                   (int)
4094                   (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
4095                   runtime_call_Relocation::spec(),
4096                   RELOC_DISP32);
4097
4098    // popq $dst
4099    if (dstenc >= 8) {
4100      emit_opcode(cbuf, Assembler::REX_B);
4101    }
4102    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4103
4104    // done:
4105  %}
4106
4107  enc_class d2l_fixup(rRegL dst, regD src)
4108  %{
4109    int dstenc = $dst$$reg;
4110    int srcenc = $src$$reg;
4111    address const_address = (address) StubRoutines::x86::double_sign_flip();
4112
4113    // cmpq $dst, [0x8000000000000000]
4114    cbuf.set_inst_mark();
4115    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4116    emit_opcode(cbuf, 0x39);
4117    // XXX reg_mem doesn't support RIP-relative addressing yet
4118    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4119    emit_d32_reloc(cbuf, const_address);
4120
4121
4122    // jne,s done
4123    emit_opcode(cbuf, 0x75);
4124    if (srcenc < 8 && dstenc < 8) {
4125      emit_d8(cbuf, 0xF);
4126    } else if (srcenc >= 8 && dstenc >= 8) {
4127      emit_d8(cbuf, 0x11);
4128    } else {
4129      emit_d8(cbuf, 0x10);
4130    }
4131
4132    // subq rsp, #8
4133    emit_opcode(cbuf, Assembler::REX_W);
4134    emit_opcode(cbuf, 0x83);
4135    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4136    emit_d8(cbuf, 8);
4137
4138    // movsd [rsp], $src
4139    emit_opcode(cbuf, 0xF2);
4140    if (srcenc >= 8) {
4141      emit_opcode(cbuf, Assembler::REX_R);
4142    }
4143    emit_opcode(cbuf, 0x0F);
4144    emit_opcode(cbuf, 0x11);
4145    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4146
4147    // call d2l_fixup
4148    cbuf.set_inst_mark();
4149    emit_opcode(cbuf, 0xE8);
4150    emit_d32_reloc(cbuf,
4151                   (int)
4152                   (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4153                   runtime_call_Relocation::spec(),
4154                   RELOC_DISP32);
4155
4156    // popq $dst
4157    if (dstenc >= 8) {
4158      emit_opcode(cbuf, Assembler::REX_B);
4159    }
4160    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4161
4162    // done:
4163  %}
4164
4165  enc_class enc_membar_acquire
4166  %{
4167    // [jk] not needed currently, if you enable this and it really
4168    // emits code don't forget to the remove the "size(0)" line in
4169    // membar_acquire()
4170    // MacroAssembler masm(&cbuf);
4171    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4172    //                                         Assembler::LoadLoad));
4173  %}
4174
4175  enc_class enc_membar_release
4176  %{
4177    // [jk] not needed currently, if you enable this and it really
4178    // emits code don't forget to the remove the "size(0)" line in
4179    // membar_release()
4180    // MacroAssembler masm(&cbuf);
4181    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4182    //                                         Assembler::StoreStore));
4183  %}
4184
4185  enc_class enc_membar_volatile
4186  %{
4187    MacroAssembler masm(&cbuf);
4188    masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
4189                                            Assembler::StoreStore));
4190  %}
4191
4192  // Safepoint Poll.  This polls the safepoint page, and causes an
4193  // exception if it is not readable. Unfortunately, it kills
4194  // RFLAGS in the process.
4195  enc_class enc_safepoint_poll
4196  %{
4197    // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4198    // XXX reg_mem doesn't support RIP-relative addressing yet
4199    cbuf.set_inst_mark();
4200    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4201    emit_opcode(cbuf, 0x85); // testl
4202    emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4203    // cbuf.inst_mark() is beginning of instruction
4204    emit_d32_reloc(cbuf, os::get_polling_page());
4205//                    relocInfo::poll_type,
4206  %}
4207%}
4208
4209
4210
4211//----------FRAME--------------------------------------------------------------
4212// Definition of frame structure and management information.
4213//
4214//  S T A C K   L A Y O U T    Allocators stack-slot number
4215//                             |   (to get allocators register number
4216//  G  Owned by    |        |  v    add OptoReg::stack0())
4217//  r   CALLER     |        |
4218//  o     |        +--------+      pad to even-align allocators stack-slot
4219//  w     V        |  pad0  |        numbers; owned by CALLER
4220//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4221//  h     ^        |   in   |  5
4222//        |        |  args  |  4   Holes in incoming args owned by SELF
4223//  |     |        |        |  3
4224//  |     |        +--------+
4225//  V     |        | old out|      Empty on Intel, window on Sparc
4226//        |    old |preserve|      Must be even aligned.
4227//        |     SP-+--------+----> Matcher::_old_SP, even aligned
4228//        |        |   in   |  3   area for Intel ret address
4229//     Owned by    |preserve|      Empty on Sparc.
4230//       SELF      +--------+
4231//        |        |  pad2  |  2   pad to align old SP
4232//        |        +--------+  1
4233//        |        | locks  |  0
4234//        |        +--------+----> OptoReg::stack0(), even aligned
4235//        |        |  pad1  | 11   pad to align new SP
4236//        |        +--------+
4237//        |        |        | 10
4238//        |        | spills |  9   spills
4239//        V        |        |  8   (pad0 slot for callee)
4240//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4241//        ^        |  out   |  7
4242//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4243//     Owned by    +--------+
4244//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4245//        |    new |preserve|      Must be even-aligned.
4246//        |     SP-+--------+----> Matcher::_new_SP, even aligned
4247//        |        |        |
4248//
4249// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4250//         known from SELF's arguments and the Java calling convention.
4251//         Region 6-7 is determined per call site.
4252// Note 2: If the calling convention leaves holes in the incoming argument
4253//         area, those holes are owned by SELF.  Holes in the outgoing area
4254//         are owned by the CALLEE.  Holes should not be nessecary in the
4255//         incoming area, as the Java calling convention is completely under
4256//         the control of the AD file.  Doubles can be sorted and packed to
4257//         avoid holes.  Holes in the outgoing arguments may be nessecary for
4258//         varargs C calling conventions.
4259// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4260//         even aligned with pad0 as needed.
4261//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4262//         region 6-11 is even aligned; it may be padded out more so that
4263//         the region from SP to FP meets the minimum stack alignment.
4264// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4265//         alignment.  Region 11, pad1, may be dynamically extended so that
4266//         SP meets the minimum alignment.
4267
4268frame
4269%{
4270  // What direction does stack grow in (assumed to be same for C & Java)
4271  stack_direction(TOWARDS_LOW);
4272
4273  // These three registers define part of the calling convention
4274  // between compiled code and the interpreter.
4275  inline_cache_reg(RAX);                // Inline Cache Register
4276  interpreter_method_oop_reg(RBX);      // Method Oop Register when
4277                                        // calling interpreter
4278
4279  // Optional: name the operand used by cisc-spilling to access
4280  // [stack_pointer + offset]
4281  cisc_spilling_operand_name(indOffset32);
4282
4283  // Number of stack slots consumed by locking an object
4284  sync_stack_slots(2);
4285
4286  // Compiled code's Frame Pointer
4287  frame_pointer(RSP);
4288
4289  // Interpreter stores its frame pointer in a register which is
4290  // stored to the stack by I2CAdaptors.
4291  // I2CAdaptors convert from interpreted java to compiled java.
4292  interpreter_frame_pointer(RBP);
4293
4294  // Stack alignment requirement
4295  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4296
4297  // Number of stack slots between incoming argument block and the start of
4298  // a new frame.  The PROLOG must add this many slots to the stack.  The
4299  // EPILOG must remove this many slots.  amd64 needs two slots for
4300  // return address.
4301  in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4302
4303  // Number of outgoing stack slots killed above the out_preserve_stack_slots
4304  // for calls to C.  Supports the var-args backing area for register parms.
4305  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4306
4307  // The after-PROLOG location of the return address.  Location of
4308  // return address specifies a type (REG or STACK) and a number
4309  // representing the register number (i.e. - use a register name) or
4310  // stack slot.
4311  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4312  // Otherwise, it is above the locks and verification slot and alignment word
4313  return_addr(STACK - 2 +
4314              round_to(2 + 2 * VerifyStackAtCalls +
4315                       Compile::current()->fixed_slots(),
4316                       WordsPerLong * 2));
4317
4318  // Body of function which returns an integer array locating
4319  // arguments either in registers or in stack slots.  Passed an array
4320  // of ideal registers called "sig" and a "length" count.  Stack-slot
4321  // offsets are based on outgoing arguments, i.e. a CALLER setting up
4322  // arguments for a CALLEE.  Incoming stack arguments are
4323  // automatically biased by the preserve_stack_slots field above.
4324
4325  calling_convention
4326  %{
4327    // No difference between ingoing/outgoing just pass false
4328    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4329  %}
4330
4331  c_calling_convention
4332  %{
4333    // This is obviously always outgoing
4334    (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4335  %}
4336
4337  // Location of compiled Java return values.  Same as C for now.
4338  return_value
4339  %{
4340    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4341           "only return normal values");
4342
4343    static const int lo[Op_RegL + 1] = {
4344      0,
4345      0,
4346      RAX_num,  // Op_RegN
4347      RAX_num,  // Op_RegI
4348      RAX_num,  // Op_RegP
4349      XMM0_num, // Op_RegF
4350      XMM0_num, // Op_RegD
4351      RAX_num   // Op_RegL
4352    };
4353    static const int hi[Op_RegL + 1] = {
4354      0,
4355      0,
4356      OptoReg::Bad, // Op_RegN
4357      OptoReg::Bad, // Op_RegI
4358      RAX_H_num,    // Op_RegP
4359      OptoReg::Bad, // Op_RegF
4360      XMM0_H_num,   // Op_RegD
4361      RAX_H_num     // Op_RegL
4362    };
4363    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4364    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4365  %}
4366%}
4367
4368//----------ATTRIBUTES---------------------------------------------------------
4369//----------Operand Attributes-------------------------------------------------
4370op_attrib op_cost(0);        // Required cost attribute
4371
4372//----------Instruction Attributes---------------------------------------------
4373ins_attrib ins_cost(100);       // Required cost attribute
4374ins_attrib ins_size(8);         // Required size attribute (in bits)
4375ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4376ins_attrib ins_short_branch(0); // Required flag: is this instruction
4377                                // a non-matching short branch variant
4378                                // of some long branch?
4379ins_attrib ins_alignment(1);    // Required alignment attribute (must
4380                                // be a power of 2) specifies the
4381                                // alignment that some part of the
4382                                // instruction (not necessarily the
4383                                // start) requires.  If > 1, a
4384                                // compute_padding() function must be
4385                                // provided for the instruction
4386
4387//----------OPERANDS-----------------------------------------------------------
4388// Operand definitions must precede instruction definitions for correct parsing
4389// in the ADLC because operands constitute user defined types which are used in
4390// instruction definitions.
4391
4392//----------Simple Operands----------------------------------------------------
4393// Immediate Operands
4394// Integer Immediate
4395operand immI()
4396%{
4397  match(ConI);
4398
4399  op_cost(10);
4400  format %{ %}
4401  interface(CONST_INTER);
4402%}
4403
4404// Constant for test vs zero
4405operand immI0()
4406%{
4407  predicate(n->get_int() == 0);
4408  match(ConI);
4409
4410  op_cost(0);
4411  format %{ %}
4412  interface(CONST_INTER);
4413%}
4414
4415// Constant for increment
4416operand immI1()
4417%{
4418  predicate(n->get_int() == 1);
4419  match(ConI);
4420
4421  op_cost(0);
4422  format %{ %}
4423  interface(CONST_INTER);
4424%}
4425
4426// Constant for decrement
4427operand immI_M1()
4428%{
4429  predicate(n->get_int() == -1);
4430  match(ConI);
4431
4432  op_cost(0);
4433  format %{ %}
4434  interface(CONST_INTER);
4435%}
4436
4437// Valid scale values for addressing modes
4438operand immI2()
4439%{
4440  predicate(0 <= n->get_int() && (n->get_int() <= 3));
4441  match(ConI);
4442
4443  format %{ %}
4444  interface(CONST_INTER);
4445%}
4446
4447operand immI8()
4448%{
4449  predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4450  match(ConI);
4451
4452  op_cost(5);
4453  format %{ %}
4454  interface(CONST_INTER);
4455%}
4456
4457operand immI16()
4458%{
4459  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4460  match(ConI);
4461
4462  op_cost(10);
4463  format %{ %}
4464  interface(CONST_INTER);
4465%}
4466
4467// Constant for long shifts
4468operand immI_32()
4469%{
4470  predicate( n->get_int() == 32 );
4471  match(ConI);
4472
4473  op_cost(0);
4474  format %{ %}
4475  interface(CONST_INTER);
4476%}
4477
4478// Constant for long shifts
4479operand immI_64()
4480%{
4481  predicate( n->get_int() == 64 );
4482  match(ConI);
4483
4484  op_cost(0);
4485  format %{ %}
4486  interface(CONST_INTER);
4487%}
4488
4489// Pointer Immediate
4490operand immP()
4491%{
4492  match(ConP);
4493
4494  op_cost(10);
4495  format %{ %}
4496  interface(CONST_INTER);
4497%}
4498
4499// NULL Pointer Immediate
4500operand immP0()
4501%{
4502  predicate(n->get_ptr() == 0);
4503  match(ConP);
4504
4505  op_cost(5);
4506  format %{ %}
4507  interface(CONST_INTER);
4508%}
4509
4510// Pointer Immediate
4511operand immN() %{
4512  match(ConN);
4513
4514  op_cost(10);
4515  format %{ %}
4516  interface(CONST_INTER);
4517%}
4518
4519// NULL Pointer Immediate
4520operand immN0() %{
4521  predicate(n->get_narrowcon() == 0);
4522  match(ConN);
4523
4524  op_cost(5);
4525  format %{ %}
4526  interface(CONST_INTER);
4527%}
4528
4529operand immP31()
4530%{
4531  predicate(!n->as_Type()->type()->isa_oopptr()
4532            && (n->get_ptr() >> 31) == 0);
4533  match(ConP);
4534
4535  op_cost(5);
4536  format %{ %}
4537  interface(CONST_INTER);
4538%}
4539
4540
4541// Long Immediate
4542operand immL()
4543%{
4544  match(ConL);
4545
4546  op_cost(20);
4547  format %{ %}
4548  interface(CONST_INTER);
4549%}
4550
4551// Long Immediate 8-bit
4552operand immL8()
4553%{
4554  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4555  match(ConL);
4556
4557  op_cost(5);
4558  format %{ %}
4559  interface(CONST_INTER);
4560%}
4561
4562// Long Immediate 32-bit unsigned
4563operand immUL32()
4564%{
4565  predicate(n->get_long() == (unsigned int) (n->get_long()));
4566  match(ConL);
4567
4568  op_cost(10);
4569  format %{ %}
4570  interface(CONST_INTER);
4571%}
4572
4573// Long Immediate 32-bit signed
4574operand immL32()
4575%{
4576  predicate(n->get_long() == (int) (n->get_long()));
4577  match(ConL);
4578
4579  op_cost(15);
4580  format %{ %}
4581  interface(CONST_INTER);
4582%}
4583
4584// Long Immediate zero
4585operand immL0()
4586%{
4587  predicate(n->get_long() == 0L);
4588  match(ConL);
4589
4590  op_cost(10);
4591  format %{ %}
4592  interface(CONST_INTER);
4593%}
4594
4595// Constant for increment
4596operand immL1()
4597%{
4598  predicate(n->get_long() == 1);
4599  match(ConL);
4600
4601  format %{ %}
4602  interface(CONST_INTER);
4603%}
4604
4605// Constant for decrement
4606operand immL_M1()
4607%{
4608  predicate(n->get_long() == -1);
4609  match(ConL);
4610
4611  format %{ %}
4612  interface(CONST_INTER);
4613%}
4614
4615// Long Immediate: the value 10
4616operand immL10()
4617%{
4618  predicate(n->get_long() == 10);
4619  match(ConL);
4620
4621  format %{ %}
4622  interface(CONST_INTER);
4623%}
4624
4625// Long immediate from 0 to 127.
4626// Used for a shorter form of long mul by 10.
4627operand immL_127()
4628%{
4629  predicate(0 <= n->get_long() && n->get_long() < 0x80);
4630  match(ConL);
4631
4632  op_cost(10);
4633  format %{ %}
4634  interface(CONST_INTER);
4635%}
4636
4637// Long Immediate: low 32-bit mask
4638operand immL_32bits()
4639%{
4640  predicate(n->get_long() == 0xFFFFFFFFL);
4641  match(ConL);
4642  op_cost(20);
4643
4644  format %{ %}
4645  interface(CONST_INTER);
4646%}
4647
4648// Float Immediate zero
4649operand immF0()
4650%{
4651  predicate(jint_cast(n->getf()) == 0);
4652  match(ConF);
4653
4654  op_cost(5);
4655  format %{ %}
4656  interface(CONST_INTER);
4657%}
4658
4659// Float Immediate
4660operand immF()
4661%{
4662  match(ConF);
4663
4664  op_cost(15);
4665  format %{ %}
4666  interface(CONST_INTER);
4667%}
4668
4669// Double Immediate zero
4670operand immD0()
4671%{
4672  predicate(jlong_cast(n->getd()) == 0);
4673  match(ConD);
4674
4675  op_cost(5);
4676  format %{ %}
4677  interface(CONST_INTER);
4678%}
4679
4680// Double Immediate
4681operand immD()
4682%{
4683  match(ConD);
4684
4685  op_cost(15);
4686  format %{ %}
4687  interface(CONST_INTER);
4688%}
4689
4690// Immediates for special shifts (sign extend)
4691
4692// Constants for increment
4693operand immI_16()
4694%{
4695  predicate(n->get_int() == 16);
4696  match(ConI);
4697
4698  format %{ %}
4699  interface(CONST_INTER);
4700%}
4701
4702operand immI_24()
4703%{
4704  predicate(n->get_int() == 24);
4705  match(ConI);
4706
4707  format %{ %}
4708  interface(CONST_INTER);
4709%}
4710
4711// Constant for byte-wide masking
4712operand immI_255()
4713%{
4714  predicate(n->get_int() == 255);
4715  match(ConI);
4716
4717  format %{ %}
4718  interface(CONST_INTER);
4719%}
4720
4721// Constant for short-wide masking
4722operand immI_65535()
4723%{
4724  predicate(n->get_int() == 65535);
4725  match(ConI);
4726
4727  format %{ %}
4728  interface(CONST_INTER);
4729%}
4730
4731// Constant for byte-wide masking
4732operand immL_255()
4733%{
4734  predicate(n->get_long() == 255);
4735  match(ConL);
4736
4737  format %{ %}
4738  interface(CONST_INTER);
4739%}
4740
4741// Constant for short-wide masking
4742operand immL_65535()
4743%{
4744  predicate(n->get_long() == 65535);
4745  match(ConL);
4746
4747  format %{ %}
4748  interface(CONST_INTER);
4749%}
4750
4751// Register Operands
4752// Integer Register
4753operand rRegI()
4754%{
4755  constraint(ALLOC_IN_RC(int_reg));
4756  match(RegI);
4757
4758  match(rax_RegI);
4759  match(rbx_RegI);
4760  match(rcx_RegI);
4761  match(rdx_RegI);
4762  match(rdi_RegI);
4763
4764  format %{ %}
4765  interface(REG_INTER);
4766%}
4767
4768// Special Registers
4769operand rax_RegI()
4770%{
4771  constraint(ALLOC_IN_RC(int_rax_reg));
4772  match(RegI);
4773  match(rRegI);
4774
4775  format %{ "RAX" %}
4776  interface(REG_INTER);
4777%}
4778
4779// Special Registers
4780operand rbx_RegI()
4781%{
4782  constraint(ALLOC_IN_RC(int_rbx_reg));
4783  match(RegI);
4784  match(rRegI);
4785
4786  format %{ "RBX" %}
4787  interface(REG_INTER);
4788%}
4789
4790operand rcx_RegI()
4791%{
4792  constraint(ALLOC_IN_RC(int_rcx_reg));
4793  match(RegI);
4794  match(rRegI);
4795
4796  format %{ "RCX" %}
4797  interface(REG_INTER);
4798%}
4799
4800operand rdx_RegI()
4801%{
4802  constraint(ALLOC_IN_RC(int_rdx_reg));
4803  match(RegI);
4804  match(rRegI);
4805
4806  format %{ "RDX" %}
4807  interface(REG_INTER);
4808%}
4809
4810operand rdi_RegI()
4811%{
4812  constraint(ALLOC_IN_RC(int_rdi_reg));
4813  match(RegI);
4814  match(rRegI);
4815
4816  format %{ "RDI" %}
4817  interface(REG_INTER);
4818%}
4819
4820operand no_rcx_RegI()
4821%{
4822  constraint(ALLOC_IN_RC(int_no_rcx_reg));
4823  match(RegI);
4824  match(rax_RegI);
4825  match(rbx_RegI);
4826  match(rdx_RegI);
4827  match(rdi_RegI);
4828
4829  format %{ %}
4830  interface(REG_INTER);
4831%}
4832
4833operand no_rax_rdx_RegI()
4834%{
4835  constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4836  match(RegI);
4837  match(rbx_RegI);
4838  match(rcx_RegI);
4839  match(rdi_RegI);
4840
4841  format %{ %}
4842  interface(REG_INTER);
4843%}
4844
4845// Pointer Register
4846operand any_RegP()
4847%{
4848  constraint(ALLOC_IN_RC(any_reg));
4849  match(RegP);
4850  match(rax_RegP);
4851  match(rbx_RegP);
4852  match(rdi_RegP);
4853  match(rsi_RegP);
4854  match(rbp_RegP);
4855  match(r15_RegP);
4856  match(rRegP);
4857
4858  format %{ %}
4859  interface(REG_INTER);
4860%}
4861
4862operand rRegP()
4863%{
4864  constraint(ALLOC_IN_RC(ptr_reg));
4865  match(RegP);
4866  match(rax_RegP);
4867  match(rbx_RegP);
4868  match(rdi_RegP);
4869  match(rsi_RegP);
4870  match(rbp_RegP);
4871  match(r15_RegP);  // See Q&A below about r15_RegP.
4872
4873  format %{ %}
4874  interface(REG_INTER);
4875%}
4876
4877operand rRegN() %{
4878  constraint(ALLOC_IN_RC(int_reg));
4879  match(RegN);
4880
4881  format %{ %}
4882  interface(REG_INTER);
4883%}
4884
4885// Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4886// Answer: Operand match rules govern the DFA as it processes instruction inputs.
4887// It's fine for an instruction input which expects rRegP to match a r15_RegP.
4888// The output of an instruction is controlled by the allocator, which respects
4889// register class masks, not match rules.  Unless an instruction mentions
4890// r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4891// by the allocator as an input.
4892
4893operand no_rax_RegP()
4894%{
4895  constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4896  match(RegP);
4897  match(rbx_RegP);
4898  match(rsi_RegP);
4899  match(rdi_RegP);
4900
4901  format %{ %}
4902  interface(REG_INTER);
4903%}
4904
4905operand no_rbp_RegP()
4906%{
4907  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4908  match(RegP);
4909  match(rbx_RegP);
4910  match(rsi_RegP);
4911  match(rdi_RegP);
4912
4913  format %{ %}
4914  interface(REG_INTER);
4915%}
4916
4917operand no_rax_rbx_RegP()
4918%{
4919  constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4920  match(RegP);
4921  match(rsi_RegP);
4922  match(rdi_RegP);
4923
4924  format %{ %}
4925  interface(REG_INTER);
4926%}
4927
4928// Special Registers
4929// Return a pointer value
4930operand rax_RegP()
4931%{
4932  constraint(ALLOC_IN_RC(ptr_rax_reg));
4933  match(RegP);
4934  match(rRegP);
4935
4936  format %{ %}
4937  interface(REG_INTER);
4938%}
4939
4940// Special Registers
4941// Return a compressed pointer value
4942operand rax_RegN()
4943%{
4944  constraint(ALLOC_IN_RC(int_rax_reg));
4945  match(RegN);
4946  match(rRegN);
4947
4948  format %{ %}
4949  interface(REG_INTER);
4950%}
4951
4952// Used in AtomicAdd
4953operand rbx_RegP()
4954%{
4955  constraint(ALLOC_IN_RC(ptr_rbx_reg));
4956  match(RegP);
4957  match(rRegP);
4958
4959  format %{ %}
4960  interface(REG_INTER);
4961%}
4962
4963operand rsi_RegP()
4964%{
4965  constraint(ALLOC_IN_RC(ptr_rsi_reg));
4966  match(RegP);
4967  match(rRegP);
4968
4969  format %{ %}
4970  interface(REG_INTER);
4971%}
4972
4973// Used in rep stosq
4974operand rdi_RegP()
4975%{
4976  constraint(ALLOC_IN_RC(ptr_rdi_reg));
4977  match(RegP);
4978  match(rRegP);
4979
4980  format %{ %}
4981  interface(REG_INTER);
4982%}
4983
4984operand rbp_RegP()
4985%{
4986  constraint(ALLOC_IN_RC(ptr_rbp_reg));
4987  match(RegP);
4988  match(rRegP);
4989
4990  format %{ %}
4991  interface(REG_INTER);
4992%}
4993
4994operand r15_RegP()
4995%{
4996  constraint(ALLOC_IN_RC(ptr_r15_reg));
4997  match(RegP);
4998  match(rRegP);
4999
5000  format %{ %}
5001  interface(REG_INTER);
5002%}
5003
5004operand rRegL()
5005%{
5006  constraint(ALLOC_IN_RC(long_reg));
5007  match(RegL);
5008  match(rax_RegL);
5009  match(rdx_RegL);
5010
5011  format %{ %}
5012  interface(REG_INTER);
5013%}
5014
5015// Special Registers
5016operand no_rax_rdx_RegL()
5017%{
5018  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5019  match(RegL);
5020  match(rRegL);
5021
5022  format %{ %}
5023  interface(REG_INTER);
5024%}
5025
5026operand no_rax_RegL()
5027%{
5028  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5029  match(RegL);
5030  match(rRegL);
5031  match(rdx_RegL);
5032
5033  format %{ %}
5034  interface(REG_INTER);
5035%}
5036
5037operand no_rcx_RegL()
5038%{
5039  constraint(ALLOC_IN_RC(long_no_rcx_reg));
5040  match(RegL);
5041  match(rRegL);
5042
5043  format %{ %}
5044  interface(REG_INTER);
5045%}
5046
5047operand rax_RegL()
5048%{
5049  constraint(ALLOC_IN_RC(long_rax_reg));
5050  match(RegL);
5051  match(rRegL);
5052
5053  format %{ "RAX" %}
5054  interface(REG_INTER);
5055%}
5056
5057operand rcx_RegL()
5058%{
5059  constraint(ALLOC_IN_RC(long_rcx_reg));
5060  match(RegL);
5061  match(rRegL);
5062
5063  format %{ %}
5064  interface(REG_INTER);
5065%}
5066
5067operand rdx_RegL()
5068%{
5069  constraint(ALLOC_IN_RC(long_rdx_reg));
5070  match(RegL);
5071  match(rRegL);
5072
5073  format %{ %}
5074  interface(REG_INTER);
5075%}
5076
5077// Flags register, used as output of compare instructions
5078operand rFlagsReg()
5079%{
5080  constraint(ALLOC_IN_RC(int_flags));
5081  match(RegFlags);
5082
5083  format %{ "RFLAGS" %}
5084  interface(REG_INTER);
5085%}
5086
5087// Flags register, used as output of FLOATING POINT compare instructions
5088operand rFlagsRegU()
5089%{
5090  constraint(ALLOC_IN_RC(int_flags));
5091  match(RegFlags);
5092
5093  format %{ "RFLAGS_U" %}
5094  interface(REG_INTER);
5095%}
5096
5097operand rFlagsRegUCF() %{
5098  constraint(ALLOC_IN_RC(int_flags));
5099  match(RegFlags);
5100  predicate(false);
5101
5102  format %{ "RFLAGS_U_CF" %}
5103  interface(REG_INTER);
5104%}
5105
5106// Float register operands
5107operand regF()
5108%{
5109  constraint(ALLOC_IN_RC(float_reg));
5110  match(RegF);
5111
5112  format %{ %}
5113  interface(REG_INTER);
5114%}
5115
5116// Double register operands
5117operand regD()
5118%{
5119  constraint(ALLOC_IN_RC(double_reg));
5120  match(RegD);
5121
5122  format %{ %}
5123  interface(REG_INTER);
5124%}
5125
5126
5127//----------Memory Operands----------------------------------------------------
5128// Direct Memory Operand
5129// operand direct(immP addr)
5130// %{
5131//   match(addr);
5132
5133//   format %{ "[$addr]" %}
5134//   interface(MEMORY_INTER) %{
5135//     base(0xFFFFFFFF);
5136//     index(0x4);
5137//     scale(0x0);
5138//     disp($addr);
5139//   %}
5140// %}
5141
5142// Indirect Memory Operand
5143operand indirect(any_RegP reg)
5144%{
5145  constraint(ALLOC_IN_RC(ptr_reg));
5146  match(reg);
5147
5148  format %{ "[$reg]" %}
5149  interface(MEMORY_INTER) %{
5150    base($reg);
5151    index(0x4);
5152    scale(0x0);
5153    disp(0x0);
5154  %}
5155%}
5156
5157// Indirect Memory Plus Short Offset Operand
5158operand indOffset8(any_RegP reg, immL8 off)
5159%{
5160  constraint(ALLOC_IN_RC(ptr_reg));
5161  match(AddP reg off);
5162
5163  format %{ "[$reg + $off (8-bit)]" %}
5164  interface(MEMORY_INTER) %{
5165    base($reg);
5166    index(0x4);
5167    scale(0x0);
5168    disp($off);
5169  %}
5170%}
5171
5172// Indirect Memory Plus Long Offset Operand
5173operand indOffset32(any_RegP reg, immL32 off)
5174%{
5175  constraint(ALLOC_IN_RC(ptr_reg));
5176  match(AddP reg off);
5177
5178  format %{ "[$reg + $off (32-bit)]" %}
5179  interface(MEMORY_INTER) %{
5180    base($reg);
5181    index(0x4);
5182    scale(0x0);
5183    disp($off);
5184  %}
5185%}
5186
5187// Indirect Memory Plus Index Register Plus Offset Operand
5188operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5189%{
5190  constraint(ALLOC_IN_RC(ptr_reg));
5191  match(AddP (AddP reg lreg) off);
5192
5193  op_cost(10);
5194  format %{"[$reg + $off + $lreg]" %}
5195  interface(MEMORY_INTER) %{
5196    base($reg);
5197    index($lreg);
5198    scale(0x0);
5199    disp($off);
5200  %}
5201%}
5202
5203// Indirect Memory Plus Index Register Plus Offset Operand
5204operand indIndex(any_RegP reg, rRegL lreg)
5205%{
5206  constraint(ALLOC_IN_RC(ptr_reg));
5207  match(AddP reg lreg);
5208
5209  op_cost(10);
5210  format %{"[$reg + $lreg]" %}
5211  interface(MEMORY_INTER) %{
5212    base($reg);
5213    index($lreg);
5214    scale(0x0);
5215    disp(0x0);
5216  %}
5217%}
5218
5219// Indirect Memory Times Scale Plus Index Register
5220operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5221%{
5222  constraint(ALLOC_IN_RC(ptr_reg));
5223  match(AddP reg (LShiftL lreg scale));
5224
5225  op_cost(10);
5226  format %{"[$reg + $lreg << $scale]" %}
5227  interface(MEMORY_INTER) %{
5228    base($reg);
5229    index($lreg);
5230    scale($scale);
5231    disp(0x0);
5232  %}
5233%}
5234
5235// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5236operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5237%{
5238  constraint(ALLOC_IN_RC(ptr_reg));
5239  match(AddP (AddP reg (LShiftL lreg scale)) off);
5240
5241  op_cost(10);
5242  format %{"[$reg + $off + $lreg << $scale]" %}
5243  interface(MEMORY_INTER) %{
5244    base($reg);
5245    index($lreg);
5246    scale($scale);
5247    disp($off);
5248  %}
5249%}
5250
5251// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5252operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5253%{
5254  constraint(ALLOC_IN_RC(ptr_reg));
5255  predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5256  match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5257
5258  op_cost(10);
5259  format %{"[$reg + $off + $idx << $scale]" %}
5260  interface(MEMORY_INTER) %{
5261    base($reg);
5262    index($idx);
5263    scale($scale);
5264    disp($off);
5265  %}
5266%}
5267
5268// Indirect Narrow Oop Plus Offset Operand
5269// Note: x86 architecture doesn't support "scale * index + offset" without a base
5270// we can't free r12 even with Universe::narrow_oop_base() == NULL.
5271operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5272  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5273  constraint(ALLOC_IN_RC(ptr_reg));
5274  match(AddP (DecodeN reg) off);
5275
5276  op_cost(10);
5277  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5278  interface(MEMORY_INTER) %{
5279    base(0xc); // R12
5280    index($reg);
5281    scale(0x3);
5282    disp($off);
5283  %}
5284%}
5285
5286// Indirect Memory Operand
5287operand indirectNarrow(rRegN reg)
5288%{
5289  predicate(Universe::narrow_oop_shift() == 0);
5290  constraint(ALLOC_IN_RC(ptr_reg));
5291  match(DecodeN reg);
5292
5293  format %{ "[$reg]" %}
5294  interface(MEMORY_INTER) %{
5295    base($reg);
5296    index(0x4);
5297    scale(0x0);
5298    disp(0x0);
5299  %}
5300%}
5301
5302// Indirect Memory Plus Short Offset Operand
5303operand indOffset8Narrow(rRegN reg, immL8 off)
5304%{
5305  predicate(Universe::narrow_oop_shift() == 0);
5306  constraint(ALLOC_IN_RC(ptr_reg));
5307  match(AddP (DecodeN reg) off);
5308
5309  format %{ "[$reg + $off (8-bit)]" %}
5310  interface(MEMORY_INTER) %{
5311    base($reg);
5312    index(0x4);
5313    scale(0x0);
5314    disp($off);
5315  %}
5316%}
5317
5318// Indirect Memory Plus Long Offset Operand
5319operand indOffset32Narrow(rRegN reg, immL32 off)
5320%{
5321  predicate(Universe::narrow_oop_shift() == 0);
5322  constraint(ALLOC_IN_RC(ptr_reg));
5323  match(AddP (DecodeN reg) off);
5324
5325  format %{ "[$reg + $off (32-bit)]" %}
5326  interface(MEMORY_INTER) %{
5327    base($reg);
5328    index(0x4);
5329    scale(0x0);
5330    disp($off);
5331  %}
5332%}
5333
5334// Indirect Memory Plus Index Register Plus Offset Operand
5335operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5336%{
5337  predicate(Universe::narrow_oop_shift() == 0);
5338  constraint(ALLOC_IN_RC(ptr_reg));
5339  match(AddP (AddP (DecodeN reg) lreg) off);
5340
5341  op_cost(10);
5342  format %{"[$reg + $off + $lreg]" %}
5343  interface(MEMORY_INTER) %{
5344    base($reg);
5345    index($lreg);
5346    scale(0x0);
5347    disp($off);
5348  %}
5349%}
5350
5351// Indirect Memory Plus Index Register Plus Offset Operand
5352operand indIndexNarrow(rRegN reg, rRegL lreg)
5353%{
5354  predicate(Universe::narrow_oop_shift() == 0);
5355  constraint(ALLOC_IN_RC(ptr_reg));
5356  match(AddP (DecodeN reg) lreg);
5357
5358  op_cost(10);
5359  format %{"[$reg + $lreg]" %}
5360  interface(MEMORY_INTER) %{
5361    base($reg);
5362    index($lreg);
5363    scale(0x0);
5364    disp(0x0);
5365  %}
5366%}
5367
5368// Indirect Memory Times Scale Plus Index Register
5369operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5370%{
5371  predicate(Universe::narrow_oop_shift() == 0);
5372  constraint(ALLOC_IN_RC(ptr_reg));
5373  match(AddP (DecodeN reg) (LShiftL lreg scale));
5374
5375  op_cost(10);
5376  format %{"[$reg + $lreg << $scale]" %}
5377  interface(MEMORY_INTER) %{
5378    base($reg);
5379    index($lreg);
5380    scale($scale);
5381    disp(0x0);
5382  %}
5383%}
5384
5385// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5386operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5387%{
5388  predicate(Universe::narrow_oop_shift() == 0);
5389  constraint(ALLOC_IN_RC(ptr_reg));
5390  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5391
5392  op_cost(10);
5393  format %{"[$reg + $off + $lreg << $scale]" %}
5394  interface(MEMORY_INTER) %{
5395    base($reg);
5396    index($lreg);
5397    scale($scale);
5398    disp($off);
5399  %}
5400%}
5401
5402// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5403operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5404%{
5405  constraint(ALLOC_IN_RC(ptr_reg));
5406  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5407  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5408
5409  op_cost(10);
5410  format %{"[$reg + $off + $idx << $scale]" %}
5411  interface(MEMORY_INTER) %{
5412    base($reg);
5413    index($idx);
5414    scale($scale);
5415    disp($off);
5416  %}
5417%}
5418
5419
5420//----------Special Memory Operands--------------------------------------------
5421// Stack Slot Operand - This operand is used for loading and storing temporary
5422//                      values on the stack where a match requires a value to
5423//                      flow through memory.
5424operand stackSlotP(sRegP reg)
5425%{
5426  constraint(ALLOC_IN_RC(stack_slots));
5427  // No match rule because this operand is only generated in matching
5428
5429  format %{ "[$reg]" %}
5430  interface(MEMORY_INTER) %{
5431    base(0x4);   // RSP
5432    index(0x4);  // No Index
5433    scale(0x0);  // No Scale
5434    disp($reg);  // Stack Offset
5435  %}
5436%}
5437
5438operand stackSlotI(sRegI reg)
5439%{
5440  constraint(ALLOC_IN_RC(stack_slots));
5441  // No match rule because this operand is only generated in matching
5442
5443  format %{ "[$reg]" %}
5444  interface(MEMORY_INTER) %{
5445    base(0x4);   // RSP
5446    index(0x4);  // No Index
5447    scale(0x0);  // No Scale
5448    disp($reg);  // Stack Offset
5449  %}
5450%}
5451
5452operand stackSlotF(sRegF reg)
5453%{
5454  constraint(ALLOC_IN_RC(stack_slots));
5455  // No match rule because this operand is only generated in matching
5456
5457  format %{ "[$reg]" %}
5458  interface(MEMORY_INTER) %{
5459    base(0x4);   // RSP
5460    index(0x4);  // No Index
5461    scale(0x0);  // No Scale
5462    disp($reg);  // Stack Offset
5463  %}
5464%}
5465
5466operand stackSlotD(sRegD reg)
5467%{
5468  constraint(ALLOC_IN_RC(stack_slots));
5469  // No match rule because this operand is only generated in matching
5470
5471  format %{ "[$reg]" %}
5472  interface(MEMORY_INTER) %{
5473    base(0x4);   // RSP
5474    index(0x4);  // No Index
5475    scale(0x0);  // No Scale
5476    disp($reg);  // Stack Offset
5477  %}
5478%}
5479operand stackSlotL(sRegL reg)
5480%{
5481  constraint(ALLOC_IN_RC(stack_slots));
5482  // No match rule because this operand is only generated in matching
5483
5484  format %{ "[$reg]" %}
5485  interface(MEMORY_INTER) %{
5486    base(0x4);   // RSP
5487    index(0x4);  // No Index
5488    scale(0x0);  // No Scale
5489    disp($reg);  // Stack Offset
5490  %}
5491%}
5492
5493//----------Conditional Branch Operands----------------------------------------
5494// Comparison Op  - This is the operation of the comparison, and is limited to
5495//                  the following set of codes:
5496//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5497//
5498// Other attributes of the comparison, such as unsignedness, are specified
5499// by the comparison instruction that sets a condition code flags register.
5500// That result is represented by a flags operand whose subtype is appropriate
5501// to the unsignedness (etc.) of the comparison.
5502//
5503// Later, the instruction which matches both the Comparison Op (a Bool) and
5504// the flags (produced by the Cmp) specifies the coding of the comparison op
5505// by matching a specific subtype of Bool operand below, such as cmpOpU.
5506
5507// Comparision Code
5508operand cmpOp()
5509%{
5510  match(Bool);
5511
5512  format %{ "" %}
5513  interface(COND_INTER) %{
5514    equal(0x4, "e");
5515    not_equal(0x5, "ne");
5516    less(0xC, "l");
5517    greater_equal(0xD, "ge");
5518    less_equal(0xE, "le");
5519    greater(0xF, "g");
5520  %}
5521%}
5522
5523// Comparison Code, unsigned compare.  Used by FP also, with
5524// C2 (unordered) turned into GT or LT already.  The other bits
5525// C0 and C3 are turned into Carry & Zero flags.
5526operand cmpOpU()
5527%{
5528  match(Bool);
5529
5530  format %{ "" %}
5531  interface(COND_INTER) %{
5532    equal(0x4, "e");
5533    not_equal(0x5, "ne");
5534    less(0x2, "b");
5535    greater_equal(0x3, "nb");
5536    less_equal(0x6, "be");
5537    greater(0x7, "nbe");
5538  %}
5539%}
5540
5541
5542// Floating comparisons that don't require any fixup for the unordered case
5543operand cmpOpUCF() %{
5544  match(Bool);
5545  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5546            n->as_Bool()->_test._test == BoolTest::ge ||
5547            n->as_Bool()->_test._test == BoolTest::le ||
5548            n->as_Bool()->_test._test == BoolTest::gt);
5549  format %{ "" %}
5550  interface(COND_INTER) %{
5551    equal(0x4, "e");
5552    not_equal(0x5, "ne");
5553    less(0x2, "b");
5554    greater_equal(0x3, "nb");
5555    less_equal(0x6, "be");
5556    greater(0x7, "nbe");
5557  %}
5558%}
5559
5560
5561// Floating comparisons that can be fixed up with extra conditional jumps
5562operand cmpOpUCF2() %{
5563  match(Bool);
5564  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5565            n->as_Bool()->_test._test == BoolTest::eq);
5566  format %{ "" %}
5567  interface(COND_INTER) %{
5568    equal(0x4, "e");
5569    not_equal(0x5, "ne");
5570    less(0x2, "b");
5571    greater_equal(0x3, "nb");
5572    less_equal(0x6, "be");
5573    greater(0x7, "nbe");
5574  %}
5575%}
5576
5577
5578//----------OPERAND CLASSES----------------------------------------------------
5579// Operand Classes are groups of operands that are used as to simplify
5580// instruction definitions by not requiring the AD writer to specify separate
5581// instructions for every form of operand when the instruction accepts
5582// multiple operand types with the same basic encoding and format.  The classic
5583// case of this is memory operands.
5584
5585opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5586               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5587               indCompressedOopOffset,
5588               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5589               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5590               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5591
5592//----------PIPELINE-----------------------------------------------------------
5593// Rules which define the behavior of the target architectures pipeline.
5594pipeline %{
5595
5596//----------ATTRIBUTES---------------------------------------------------------
5597attributes %{
5598  variable_size_instructions;        // Fixed size instructions
5599  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5600  instruction_unit_size = 1;         // An instruction is 1 bytes long
5601  instruction_fetch_unit_size = 16;  // The processor fetches one line
5602  instruction_fetch_units = 1;       // of 16 bytes
5603
5604  // List of nop instructions
5605  nops( MachNop );
5606%}
5607
5608//----------RESOURCES----------------------------------------------------------
5609// Resources are the functional units available to the machine
5610
5611// Generic P2/P3 pipeline
5612// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5613// 3 instructions decoded per cycle.
5614// 2 load/store ops per cycle, 1 branch, 1 FPU,
5615// 3 ALU op, only ALU0 handles mul instructions.
5616resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5617           MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5618           BR, FPU,
5619           ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5620
5621//----------PIPELINE DESCRIPTION-----------------------------------------------
5622// Pipeline Description specifies the stages in the machine's pipeline
5623
5624// Generic P2/P3 pipeline
5625pipe_desc(S0, S1, S2, S3, S4, S5);
5626
5627//----------PIPELINE CLASSES---------------------------------------------------
5628// Pipeline Classes describe the stages in which input and output are
5629// referenced by the hardware pipeline.
5630
5631// Naming convention: ialu or fpu
5632// Then: _reg
5633// Then: _reg if there is a 2nd register
5634// Then: _long if it's a pair of instructions implementing a long
5635// Then: _fat if it requires the big decoder
5636//   Or: _mem if it requires the big decoder and a memory unit.
5637
5638// Integer ALU reg operation
5639pipe_class ialu_reg(rRegI dst)
5640%{
5641    single_instruction;
5642    dst    : S4(write);
5643    dst    : S3(read);
5644    DECODE : S0;        // any decoder
5645    ALU    : S3;        // any alu
5646%}
5647
5648// Long ALU reg operation
5649pipe_class ialu_reg_long(rRegL dst)
5650%{
5651    instruction_count(2);
5652    dst    : S4(write);
5653    dst    : S3(read);
5654    DECODE : S0(2);     // any 2 decoders
5655    ALU    : S3(2);     // both alus
5656%}
5657
5658// Integer ALU reg operation using big decoder
5659pipe_class ialu_reg_fat(rRegI dst)
5660%{
5661    single_instruction;
5662    dst    : S4(write);
5663    dst    : S3(read);
5664    D0     : S0;        // big decoder only
5665    ALU    : S3;        // any alu
5666%}
5667
5668// Long ALU reg operation using big decoder
5669pipe_class ialu_reg_long_fat(rRegL dst)
5670%{
5671    instruction_count(2);
5672    dst    : S4(write);
5673    dst    : S3(read);
5674    D0     : S0(2);     // big decoder only; twice
5675    ALU    : S3(2);     // any 2 alus
5676%}
5677
5678// Integer ALU reg-reg operation
5679pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5680%{
5681    single_instruction;
5682    dst    : S4(write);
5683    src    : S3(read);
5684    DECODE : S0;        // any decoder
5685    ALU    : S3;        // any alu
5686%}
5687
5688// Long ALU reg-reg operation
5689pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5690%{
5691    instruction_count(2);
5692    dst    : S4(write);
5693    src    : S3(read);
5694    DECODE : S0(2);     // any 2 decoders
5695    ALU    : S3(2);     // both alus
5696%}
5697
5698// Integer ALU reg-reg operation
5699pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5700%{
5701    single_instruction;
5702    dst    : S4(write);
5703    src    : S3(read);
5704    D0     : S0;        // big decoder only
5705    ALU    : S3;        // any alu
5706%}
5707
5708// Long ALU reg-reg operation
5709pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5710%{
5711    instruction_count(2);
5712    dst    : S4(write);
5713    src    : S3(read);
5714    D0     : S0(2);     // big decoder only; twice
5715    ALU    : S3(2);     // both alus
5716%}
5717
5718// Integer ALU reg-mem operation
5719pipe_class ialu_reg_mem(rRegI dst, memory mem)
5720%{
5721    single_instruction;
5722    dst    : S5(write);
5723    mem    : S3(read);
5724    D0     : S0;        // big decoder only
5725    ALU    : S4;        // any alu
5726    MEM    : S3;        // any mem
5727%}
5728
5729// Integer mem operation (prefetch)
5730pipe_class ialu_mem(memory mem)
5731%{
5732    single_instruction;
5733    mem    : S3(read);
5734    D0     : S0;        // big decoder only
5735    MEM    : S3;        // any mem
5736%}
5737
5738// Integer Store to Memory
5739pipe_class ialu_mem_reg(memory mem, rRegI src)
5740%{
5741    single_instruction;
5742    mem    : S3(read);
5743    src    : S5(read);
5744    D0     : S0;        // big decoder only
5745    ALU    : S4;        // any alu
5746    MEM    : S3;
5747%}
5748
5749// // Long Store to Memory
5750// pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5751// %{
5752//     instruction_count(2);
5753//     mem    : S3(read);
5754//     src    : S5(read);
5755//     D0     : S0(2);          // big decoder only; twice
5756//     ALU    : S4(2);     // any 2 alus
5757//     MEM    : S3(2);  // Both mems
5758// %}
5759
5760// Integer Store to Memory
5761pipe_class ialu_mem_imm(memory mem)
5762%{
5763    single_instruction;
5764    mem    : S3(read);
5765    D0     : S0;        // big decoder only
5766    ALU    : S4;        // any alu
5767    MEM    : S3;
5768%}
5769
5770// Integer ALU0 reg-reg operation
5771pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5772%{
5773    single_instruction;
5774    dst    : S4(write);
5775    src    : S3(read);
5776    D0     : S0;        // Big decoder only
5777    ALU0   : S3;        // only alu0
5778%}
5779
5780// Integer ALU0 reg-mem operation
5781pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5782%{
5783    single_instruction;
5784    dst    : S5(write);
5785    mem    : S3(read);
5786    D0     : S0;        // big decoder only
5787    ALU0   : S4;        // ALU0 only
5788    MEM    : S3;        // any mem
5789%}
5790
5791// Integer ALU reg-reg operation
5792pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5793%{
5794    single_instruction;
5795    cr     : S4(write);
5796    src1   : S3(read);
5797    src2   : S3(read);
5798    DECODE : S0;        // any decoder
5799    ALU    : S3;        // any alu
5800%}
5801
5802// Integer ALU reg-imm operation
5803pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5804%{
5805    single_instruction;
5806    cr     : S4(write);
5807    src1   : S3(read);
5808    DECODE : S0;        // any decoder
5809    ALU    : S3;        // any alu
5810%}
5811
5812// Integer ALU reg-mem operation
5813pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5814%{
5815    single_instruction;
5816    cr     : S4(write);
5817    src1   : S3(read);
5818    src2   : S3(read);
5819    D0     : S0;        // big decoder only
5820    ALU    : S4;        // any alu
5821    MEM    : S3;
5822%}
5823
5824// Conditional move reg-reg
5825pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5826%{
5827    instruction_count(4);
5828    y      : S4(read);
5829    q      : S3(read);
5830    p      : S3(read);
5831    DECODE : S0(4);     // any decoder
5832%}
5833
5834// Conditional move reg-reg
5835pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5836%{
5837    single_instruction;
5838    dst    : S4(write);
5839    src    : S3(read);
5840    cr     : S3(read);
5841    DECODE : S0;        // any decoder
5842%}
5843
5844// Conditional move reg-mem
5845pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5846%{
5847    single_instruction;
5848    dst    : S4(write);
5849    src    : S3(read);
5850    cr     : S3(read);
5851    DECODE : S0;        // any decoder
5852    MEM    : S3;
5853%}
5854
5855// Conditional move reg-reg long
5856pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5857%{
5858    single_instruction;
5859    dst    : S4(write);
5860    src    : S3(read);
5861    cr     : S3(read);
5862    DECODE : S0(2);     // any 2 decoders
5863%}
5864
5865// XXX
5866// // Conditional move double reg-reg
5867// pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5868// %{
5869//     single_instruction;
5870//     dst    : S4(write);
5871//     src    : S3(read);
5872//     cr     : S3(read);
5873//     DECODE : S0;     // any decoder
5874// %}
5875
5876// Float reg-reg operation
5877pipe_class fpu_reg(regD dst)
5878%{
5879    instruction_count(2);
5880    dst    : S3(read);
5881    DECODE : S0(2);     // any 2 decoders
5882    FPU    : S3;
5883%}
5884
5885// Float reg-reg operation
5886pipe_class fpu_reg_reg(regD dst, regD src)
5887%{
5888    instruction_count(2);
5889    dst    : S4(write);
5890    src    : S3(read);
5891    DECODE : S0(2);     // any 2 decoders
5892    FPU    : S3;
5893%}
5894
5895// Float reg-reg operation
5896pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5897%{
5898    instruction_count(3);
5899    dst    : S4(write);
5900    src1   : S3(read);
5901    src2   : S3(read);
5902    DECODE : S0(3);     // any 3 decoders
5903    FPU    : S3(2);
5904%}
5905
5906// Float reg-reg operation
5907pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5908%{
5909    instruction_count(4);
5910    dst    : S4(write);
5911    src1   : S3(read);
5912    src2   : S3(read);
5913    src3   : S3(read);
5914    DECODE : S0(4);     // any 3 decoders
5915    FPU    : S3(2);
5916%}
5917
5918// Float reg-reg operation
5919pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5920%{
5921    instruction_count(4);
5922    dst    : S4(write);
5923    src1   : S3(read);
5924    src2   : S3(read);
5925    src3   : S3(read);
5926    DECODE : S1(3);     // any 3 decoders
5927    D0     : S0;        // Big decoder only
5928    FPU    : S3(2);
5929    MEM    : S3;
5930%}
5931
5932// Float reg-mem operation
5933pipe_class fpu_reg_mem(regD dst, memory mem)
5934%{
5935    instruction_count(2);
5936    dst    : S5(write);
5937    mem    : S3(read);
5938    D0     : S0;        // big decoder only
5939    DECODE : S1;        // any decoder for FPU POP
5940    FPU    : S4;
5941    MEM    : S3;        // any mem
5942%}
5943
5944// Float reg-mem operation
5945pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5946%{
5947    instruction_count(3);
5948    dst    : S5(write);
5949    src1   : S3(read);
5950    mem    : S3(read);
5951    D0     : S0;        // big decoder only
5952    DECODE : S1(2);     // any decoder for FPU POP
5953    FPU    : S4;
5954    MEM    : S3;        // any mem
5955%}
5956
5957// Float mem-reg operation
5958pipe_class fpu_mem_reg(memory mem, regD src)
5959%{
5960    instruction_count(2);
5961    src    : S5(read);
5962    mem    : S3(read);
5963    DECODE : S0;        // any decoder for FPU PUSH
5964    D0     : S1;        // big decoder only
5965    FPU    : S4;
5966    MEM    : S3;        // any mem
5967%}
5968
5969pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5970%{
5971    instruction_count(3);
5972    src1   : S3(read);
5973    src2   : S3(read);
5974    mem    : S3(read);
5975    DECODE : S0(2);     // any decoder for FPU PUSH
5976    D0     : S1;        // big decoder only
5977    FPU    : S4;
5978    MEM    : S3;        // any mem
5979%}
5980
5981pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5982%{
5983    instruction_count(3);
5984    src1   : S3(read);
5985    src2   : S3(read);
5986    mem    : S4(read);
5987    DECODE : S0;        // any decoder for FPU PUSH
5988    D0     : S0(2);     // big decoder only
5989    FPU    : S4;
5990    MEM    : S3(2);     // any mem
5991%}
5992
5993pipe_class fpu_mem_mem(memory dst, memory src1)
5994%{
5995    instruction_count(2);
5996    src1   : S3(read);
5997    dst    : S4(read);
5998    D0     : S0(2);     // big decoder only
5999    MEM    : S3(2);     // any mem
6000%}
6001
6002pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6003%{
6004    instruction_count(3);
6005    src1   : S3(read);
6006    src2   : S3(read);
6007    dst    : S4(read);
6008    D0     : S0(3);     // big decoder only
6009    FPU    : S4;
6010    MEM    : S3(3);     // any mem
6011%}
6012
6013pipe_class fpu_mem_reg_con(memory mem, regD src1)
6014%{
6015    instruction_count(3);
6016    src1   : S4(read);
6017    mem    : S4(read);
6018    DECODE : S0;        // any decoder for FPU PUSH
6019    D0     : S0(2);     // big decoder only
6020    FPU    : S4;
6021    MEM    : S3(2);     // any mem
6022%}
6023
6024// Float load constant
6025pipe_class fpu_reg_con(regD dst)
6026%{
6027    instruction_count(2);
6028    dst    : S5(write);
6029    D0     : S0;        // big decoder only for the load
6030    DECODE : S1;        // any decoder for FPU POP
6031    FPU    : S4;
6032    MEM    : S3;        // any mem
6033%}
6034
6035// Float load constant
6036pipe_class fpu_reg_reg_con(regD dst, regD src)
6037%{
6038    instruction_count(3);
6039    dst    : S5(write);
6040    src    : S3(read);
6041    D0     : S0;        // big decoder only for the load
6042    DECODE : S1(2);     // any decoder for FPU POP
6043    FPU    : S4;
6044    MEM    : S3;        // any mem
6045%}
6046
6047// UnConditional branch
6048pipe_class pipe_jmp(label labl)
6049%{
6050    single_instruction;
6051    BR   : S3;
6052%}
6053
6054// Conditional branch
6055pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6056%{
6057    single_instruction;
6058    cr    : S1(read);
6059    BR    : S3;
6060%}
6061
6062// Allocation idiom
6063pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6064%{
6065    instruction_count(1); force_serialization;
6066    fixed_latency(6);
6067    heap_ptr : S3(read);
6068    DECODE   : S0(3);
6069    D0       : S2;
6070    MEM      : S3;
6071    ALU      : S3(2);
6072    dst      : S5(write);
6073    BR       : S5;
6074%}
6075
6076// Generic big/slow expanded idiom
6077pipe_class pipe_slow()
6078%{
6079    instruction_count(10); multiple_bundles; force_serialization;
6080    fixed_latency(100);
6081    D0  : S0(2);
6082    MEM : S3(2);
6083%}
6084
6085// The real do-nothing guy
6086pipe_class empty()
6087%{
6088    instruction_count(0);
6089%}
6090
6091// Define the class for the Nop node
6092define
6093%{
6094   MachNop = empty;
6095%}
6096
6097%}
6098
6099//----------INSTRUCTIONS-------------------------------------------------------
6100//
6101// match      -- States which machine-independent subtree may be replaced
6102//               by this instruction.
6103// ins_cost   -- The estimated cost of this instruction is used by instruction
6104//               selection to identify a minimum cost tree of machine
6105//               instructions that matches a tree of machine-independent
6106//               instructions.
6107// format     -- A string providing the disassembly for this instruction.
6108//               The value of an instruction's operand may be inserted
6109//               by referring to it with a '$' prefix.
6110// opcode     -- Three instruction opcodes may be provided.  These are referred
6111//               to within an encode class as $primary, $secondary, and $tertiary
6112//               rrspectively.  The primary opcode is commonly used to
6113//               indicate the type of machine instruction, while secondary
6114//               and tertiary are often used for prefix options or addressing
6115//               modes.
6116// ins_encode -- A list of encode classes with parameters. The encode class
6117//               name must have been defined in an 'enc_class' specification
6118//               in the encode section of the architecture description.
6119
6120
6121//----------Load/Store/Move Instructions---------------------------------------
6122//----------Load Instructions--------------------------------------------------
6123
6124// Load Byte (8 bit signed)
6125instruct loadB(rRegI dst, memory mem)
6126%{
6127  match(Set dst (LoadB mem));
6128
6129  ins_cost(125);
6130  format %{ "movsbl  $dst, $mem\t# byte" %}
6131
6132  ins_encode %{
6133    __ movsbl($dst$$Register, $mem$$Address);
6134  %}
6135
6136  ins_pipe(ialu_reg_mem);
6137%}
6138
6139// Load Byte (8 bit signed) into Long Register
6140instruct loadB2L(rRegL dst, memory mem)
6141%{
6142  match(Set dst (ConvI2L (LoadB mem)));
6143
6144  ins_cost(125);
6145  format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6146
6147  ins_encode %{
6148    __ movsbq($dst$$Register, $mem$$Address);
6149  %}
6150
6151  ins_pipe(ialu_reg_mem);
6152%}
6153
6154// Load Unsigned Byte (8 bit UNsigned)
6155instruct loadUB(rRegI dst, memory mem)
6156%{
6157  match(Set dst (LoadUB mem));
6158
6159  ins_cost(125);
6160  format %{ "movzbl  $dst, $mem\t# ubyte" %}
6161
6162  ins_encode %{
6163    __ movzbl($dst$$Register, $mem$$Address);
6164  %}
6165
6166  ins_pipe(ialu_reg_mem);
6167%}
6168
6169// Load Unsigned Byte (8 bit UNsigned) into Long Register
6170instruct loadUB2L(rRegL dst, memory mem)
6171%{
6172  match(Set dst (ConvI2L (LoadUB mem)));
6173
6174  ins_cost(125);
6175  format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6176
6177  ins_encode %{
6178    __ movzbq($dst$$Register, $mem$$Address);
6179  %}
6180
6181  ins_pipe(ialu_reg_mem);
6182%}
6183
6184// Load Short (16 bit signed)
6185instruct loadS(rRegI dst, memory mem)
6186%{
6187  match(Set dst (LoadS mem));
6188
6189  ins_cost(125);
6190  format %{ "movswl $dst, $mem\t# short" %}
6191
6192  ins_encode %{
6193    __ movswl($dst$$Register, $mem$$Address);
6194  %}
6195
6196  ins_pipe(ialu_reg_mem);
6197%}
6198
6199// Load Short (16 bit signed) into Long Register
6200instruct loadS2L(rRegL dst, memory mem)
6201%{
6202  match(Set dst (ConvI2L (LoadS mem)));
6203
6204  ins_cost(125);
6205  format %{ "movswq $dst, $mem\t# short -> long" %}
6206
6207  ins_encode %{
6208    __ movswq($dst$$Register, $mem$$Address);
6209  %}
6210
6211  ins_pipe(ialu_reg_mem);
6212%}
6213
6214// Load Unsigned Short/Char (16 bit UNsigned)
6215instruct loadUS(rRegI dst, memory mem)
6216%{
6217  match(Set dst (LoadUS mem));
6218
6219  ins_cost(125);
6220  format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6221
6222  ins_encode %{
6223    __ movzwl($dst$$Register, $mem$$Address);
6224  %}
6225
6226  ins_pipe(ialu_reg_mem);
6227%}
6228
6229// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6230instruct loadUS2L(rRegL dst, memory mem)
6231%{
6232  match(Set dst (ConvI2L (LoadUS mem)));
6233
6234  ins_cost(125);
6235  format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6236
6237  ins_encode %{
6238    __ movzwq($dst$$Register, $mem$$Address);
6239  %}
6240
6241  ins_pipe(ialu_reg_mem);
6242%}
6243
6244// Load Integer
6245instruct loadI(rRegI dst, memory mem)
6246%{
6247  match(Set dst (LoadI mem));
6248
6249  ins_cost(125);
6250  format %{ "movl    $dst, $mem\t# int" %}
6251
6252  ins_encode %{
6253    __ movl($dst$$Register, $mem$$Address);
6254  %}
6255
6256  ins_pipe(ialu_reg_mem);
6257%}
6258
6259// Load Integer into Long Register
6260instruct loadI2L(rRegL dst, memory mem)
6261%{
6262  match(Set dst (ConvI2L (LoadI mem)));
6263
6264  ins_cost(125);
6265  format %{ "movslq  $dst, $mem\t# int -> long" %}
6266
6267  ins_encode %{
6268    __ movslq($dst$$Register, $mem$$Address);
6269  %}
6270
6271  ins_pipe(ialu_reg_mem);
6272%}
6273
6274// Load Unsigned Integer into Long Register
6275instruct loadUI2L(rRegL dst, memory mem)
6276%{
6277  match(Set dst (LoadUI2L mem));
6278
6279  ins_cost(125);
6280  format %{ "movl    $dst, $mem\t# uint -> long" %}
6281
6282  ins_encode %{
6283    __ movl($dst$$Register, $mem$$Address);
6284  %}
6285
6286  ins_pipe(ialu_reg_mem);
6287%}
6288
6289// Load Long
6290instruct loadL(rRegL dst, memory mem)
6291%{
6292  match(Set dst (LoadL mem));
6293
6294  ins_cost(125);
6295  format %{ "movq    $dst, $mem\t# long" %}
6296
6297  ins_encode %{
6298    __ movq($dst$$Register, $mem$$Address);
6299  %}
6300
6301  ins_pipe(ialu_reg_mem); // XXX
6302%}
6303
6304// Load Range
6305instruct loadRange(rRegI dst, memory mem)
6306%{
6307  match(Set dst (LoadRange mem));
6308
6309  ins_cost(125); // XXX
6310  format %{ "movl    $dst, $mem\t# range" %}
6311  opcode(0x8B);
6312  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6313  ins_pipe(ialu_reg_mem);
6314%}
6315
6316// Load Pointer
6317instruct loadP(rRegP dst, memory mem)
6318%{
6319  match(Set dst (LoadP mem));
6320
6321  ins_cost(125); // XXX
6322  format %{ "movq    $dst, $mem\t# ptr" %}
6323  opcode(0x8B);
6324  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6325  ins_pipe(ialu_reg_mem); // XXX
6326%}
6327
6328// Load Compressed Pointer
6329instruct loadN(rRegN dst, memory mem)
6330%{
6331   match(Set dst (LoadN mem));
6332
6333   ins_cost(125); // XXX
6334   format %{ "movl    $dst, $mem\t# compressed ptr" %}
6335   ins_encode %{
6336     __ movl($dst$$Register, $mem$$Address);
6337   %}
6338   ins_pipe(ialu_reg_mem); // XXX
6339%}
6340
6341
6342// Load Klass Pointer
6343instruct loadKlass(rRegP dst, memory mem)
6344%{
6345  match(Set dst (LoadKlass mem));
6346
6347  ins_cost(125); // XXX
6348  format %{ "movq    $dst, $mem\t# class" %}
6349  opcode(0x8B);
6350  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6351  ins_pipe(ialu_reg_mem); // XXX
6352%}
6353
6354// Load narrow Klass Pointer
6355instruct loadNKlass(rRegN dst, memory mem)
6356%{
6357  match(Set dst (LoadNKlass mem));
6358
6359  ins_cost(125); // XXX
6360  format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6361  ins_encode %{
6362    __ movl($dst$$Register, $mem$$Address);
6363  %}
6364  ins_pipe(ialu_reg_mem); // XXX
6365%}
6366
6367// Load Float
6368instruct loadF(regF dst, memory mem)
6369%{
6370  match(Set dst (LoadF mem));
6371
6372  ins_cost(145); // XXX
6373  format %{ "movss   $dst, $mem\t# float" %}
6374  opcode(0xF3, 0x0F, 0x10);
6375  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6376  ins_pipe(pipe_slow); // XXX
6377%}
6378
6379// Load Double
6380instruct loadD_partial(regD dst, memory mem)
6381%{
6382  predicate(!UseXmmLoadAndClearUpper);
6383  match(Set dst (LoadD mem));
6384
6385  ins_cost(145); // XXX
6386  format %{ "movlpd  $dst, $mem\t# double" %}
6387  opcode(0x66, 0x0F, 0x12);
6388  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6389  ins_pipe(pipe_slow); // XXX
6390%}
6391
6392instruct loadD(regD dst, memory mem)
6393%{
6394  predicate(UseXmmLoadAndClearUpper);
6395  match(Set dst (LoadD mem));
6396
6397  ins_cost(145); // XXX
6398  format %{ "movsd   $dst, $mem\t# double" %}
6399  opcode(0xF2, 0x0F, 0x10);
6400  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6401  ins_pipe(pipe_slow); // XXX
6402%}
6403
6404// Load Aligned Packed Byte to XMM register
6405instruct loadA8B(regD dst, memory mem) %{
6406  match(Set dst (Load8B mem));
6407  ins_cost(125);
6408  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6409  ins_encode( movq_ld(dst, mem));
6410  ins_pipe( pipe_slow );
6411%}
6412
6413// Load Aligned Packed Short to XMM register
6414instruct loadA4S(regD dst, memory mem) %{
6415  match(Set dst (Load4S mem));
6416  ins_cost(125);
6417  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6418  ins_encode( movq_ld(dst, mem));
6419  ins_pipe( pipe_slow );
6420%}
6421
6422// Load Aligned Packed Char to XMM register
6423instruct loadA4C(regD dst, memory mem) %{
6424  match(Set dst (Load4C mem));
6425  ins_cost(125);
6426  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6427  ins_encode( movq_ld(dst, mem));
6428  ins_pipe( pipe_slow );
6429%}
6430
6431// Load Aligned Packed Integer to XMM register
6432instruct load2IU(regD dst, memory mem) %{
6433  match(Set dst (Load2I mem));
6434  ins_cost(125);
6435  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6436  ins_encode( movq_ld(dst, mem));
6437  ins_pipe( pipe_slow );
6438%}
6439
6440// Load Aligned Packed Single to XMM
6441instruct loadA2F(regD dst, memory mem) %{
6442  match(Set dst (Load2F mem));
6443  ins_cost(145);
6444  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6445  ins_encode( movq_ld(dst, mem));
6446  ins_pipe( pipe_slow );
6447%}
6448
6449// Load Effective Address
6450instruct leaP8(rRegP dst, indOffset8 mem)
6451%{
6452  match(Set dst mem);
6453
6454  ins_cost(110); // XXX
6455  format %{ "leaq    $dst, $mem\t# ptr 8" %}
6456  opcode(0x8D);
6457  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6458  ins_pipe(ialu_reg_reg_fat);
6459%}
6460
6461instruct leaP32(rRegP dst, indOffset32 mem)
6462%{
6463  match(Set dst mem);
6464
6465  ins_cost(110);
6466  format %{ "leaq    $dst, $mem\t# ptr 32" %}
6467  opcode(0x8D);
6468  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6469  ins_pipe(ialu_reg_reg_fat);
6470%}
6471
6472// instruct leaPIdx(rRegP dst, indIndex mem)
6473// %{
6474//   match(Set dst mem);
6475
6476//   ins_cost(110);
6477//   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6478//   opcode(0x8D);
6479//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6480//   ins_pipe(ialu_reg_reg_fat);
6481// %}
6482
6483instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6484%{
6485  match(Set dst mem);
6486
6487  ins_cost(110);
6488  format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6489  opcode(0x8D);
6490  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6491  ins_pipe(ialu_reg_reg_fat);
6492%}
6493
6494instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6495%{
6496  match(Set dst mem);
6497
6498  ins_cost(110);
6499  format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6500  opcode(0x8D);
6501  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6502  ins_pipe(ialu_reg_reg_fat);
6503%}
6504
6505instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6506%{
6507  match(Set dst mem);
6508
6509  ins_cost(110);
6510  format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6511  opcode(0x8D);
6512  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6513  ins_pipe(ialu_reg_reg_fat);
6514%}
6515
6516instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6517%{
6518  match(Set dst mem);
6519
6520  ins_cost(110);
6521  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6522  opcode(0x8D);
6523  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6524  ins_pipe(ialu_reg_reg_fat);
6525%}
6526
6527// Load Effective Address which uses Narrow (32-bits) oop
6528instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6529%{
6530  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6531  match(Set dst mem);
6532
6533  ins_cost(110);
6534  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6535  opcode(0x8D);
6536  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6537  ins_pipe(ialu_reg_reg_fat);
6538%}
6539
6540instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6541%{
6542  predicate(Universe::narrow_oop_shift() == 0);
6543  match(Set dst mem);
6544
6545  ins_cost(110); // XXX
6546  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6547  opcode(0x8D);
6548  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6549  ins_pipe(ialu_reg_reg_fat);
6550%}
6551
6552instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6553%{
6554  predicate(Universe::narrow_oop_shift() == 0);
6555  match(Set dst mem);
6556
6557  ins_cost(110);
6558  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6559  opcode(0x8D);
6560  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6561  ins_pipe(ialu_reg_reg_fat);
6562%}
6563
6564instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6565%{
6566  predicate(Universe::narrow_oop_shift() == 0);
6567  match(Set dst mem);
6568
6569  ins_cost(110);
6570  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6571  opcode(0x8D);
6572  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6573  ins_pipe(ialu_reg_reg_fat);
6574%}
6575
6576instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6577%{
6578  predicate(Universe::narrow_oop_shift() == 0);
6579  match(Set dst mem);
6580
6581  ins_cost(110);
6582  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6583  opcode(0x8D);
6584  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6585  ins_pipe(ialu_reg_reg_fat);
6586%}
6587
6588instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6589%{
6590  predicate(Universe::narrow_oop_shift() == 0);
6591  match(Set dst mem);
6592
6593  ins_cost(110);
6594  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6595  opcode(0x8D);
6596  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6597  ins_pipe(ialu_reg_reg_fat);
6598%}
6599
6600instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6601%{
6602  predicate(Universe::narrow_oop_shift() == 0);
6603  match(Set dst mem);
6604
6605  ins_cost(110);
6606  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6607  opcode(0x8D);
6608  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6609  ins_pipe(ialu_reg_reg_fat);
6610%}
6611
6612instruct loadConI(rRegI dst, immI src)
6613%{
6614  match(Set dst src);
6615
6616  format %{ "movl    $dst, $src\t# int" %}
6617  ins_encode(load_immI(dst, src));
6618  ins_pipe(ialu_reg_fat); // XXX
6619%}
6620
6621instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6622%{
6623  match(Set dst src);
6624  effect(KILL cr);
6625
6626  ins_cost(50);
6627  format %{ "xorl    $dst, $dst\t# int" %}
6628  opcode(0x33); /* + rd */
6629  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6630  ins_pipe(ialu_reg);
6631%}
6632
6633instruct loadConL(rRegL dst, immL src)
6634%{
6635  match(Set dst src);
6636
6637  ins_cost(150);
6638  format %{ "movq    $dst, $src\t# long" %}
6639  ins_encode(load_immL(dst, src));
6640  ins_pipe(ialu_reg);
6641%}
6642
6643instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6644%{
6645  match(Set dst src);
6646  effect(KILL cr);
6647
6648  ins_cost(50);
6649  format %{ "xorl    $dst, $dst\t# long" %}
6650  opcode(0x33); /* + rd */
6651  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6652  ins_pipe(ialu_reg); // XXX
6653%}
6654
6655instruct loadConUL32(rRegL dst, immUL32 src)
6656%{
6657  match(Set dst src);
6658
6659  ins_cost(60);
6660  format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6661  ins_encode(load_immUL32(dst, src));
6662  ins_pipe(ialu_reg);
6663%}
6664
6665instruct loadConL32(rRegL dst, immL32 src)
6666%{
6667  match(Set dst src);
6668
6669  ins_cost(70);
6670  format %{ "movq    $dst, $src\t# long (32-bit)" %}
6671  ins_encode(load_immL32(dst, src));
6672  ins_pipe(ialu_reg);
6673%}
6674
6675instruct loadConP(rRegP dst, immP src)
6676%{
6677  match(Set dst src);
6678
6679  format %{ "movq    $dst, $src\t# ptr" %}
6680  ins_encode(load_immP(dst, src));
6681  ins_pipe(ialu_reg_fat); // XXX
6682%}
6683
6684instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6685%{
6686  match(Set dst src);
6687  effect(KILL cr);
6688
6689  ins_cost(50);
6690  format %{ "xorl    $dst, $dst\t# ptr" %}
6691  opcode(0x33); /* + rd */
6692  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6693  ins_pipe(ialu_reg);
6694%}
6695
6696instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6697%{
6698  match(Set dst src);
6699  effect(KILL cr);
6700
6701  ins_cost(60);
6702  format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6703  ins_encode(load_immP31(dst, src));
6704  ins_pipe(ialu_reg);
6705%}
6706
6707instruct loadConF(regF dst, immF src)
6708%{
6709  match(Set dst src);
6710  ins_cost(125);
6711
6712  format %{ "movss   $dst, [$src]" %}
6713  ins_encode(load_conF(dst, src));
6714  ins_pipe(pipe_slow);
6715%}
6716
6717instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6718  match(Set dst src);
6719  effect(KILL cr);
6720  format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6721  ins_encode %{
6722    __ xorq($dst$$Register, $dst$$Register);
6723  %}
6724  ins_pipe(ialu_reg);
6725%}
6726
6727instruct loadConN(rRegN dst, immN src) %{
6728  match(Set dst src);
6729
6730  ins_cost(125);
6731  format %{ "movl    $dst, $src\t# compressed ptr" %}
6732  ins_encode %{
6733    address con = (address)$src$$constant;
6734    if (con == NULL) {
6735      ShouldNotReachHere();
6736    } else {
6737      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6738    }
6739  %}
6740  ins_pipe(ialu_reg_fat); // XXX
6741%}
6742
6743instruct loadConF0(regF dst, immF0 src)
6744%{
6745  match(Set dst src);
6746  ins_cost(100);
6747
6748  format %{ "xorps   $dst, $dst\t# float 0.0" %}
6749  opcode(0x0F, 0x57);
6750  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6751  ins_pipe(pipe_slow);
6752%}
6753
6754// Use the same format since predicate() can not be used here.
6755instruct loadConD(regD dst, immD src)
6756%{
6757  match(Set dst src);
6758  ins_cost(125);
6759
6760  format %{ "movsd   $dst, [$src]" %}
6761  ins_encode(load_conD(dst, src));
6762  ins_pipe(pipe_slow);
6763%}
6764
6765instruct loadConD0(regD dst, immD0 src)
6766%{
6767  match(Set dst src);
6768  ins_cost(100);
6769
6770  format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6771  opcode(0x66, 0x0F, 0x57);
6772  ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6773  ins_pipe(pipe_slow);
6774%}
6775
6776instruct loadSSI(rRegI dst, stackSlotI src)
6777%{
6778  match(Set dst src);
6779
6780  ins_cost(125);
6781  format %{ "movl    $dst, $src\t# int stk" %}
6782  opcode(0x8B);
6783  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6784  ins_pipe(ialu_reg_mem);
6785%}
6786
6787instruct loadSSL(rRegL dst, stackSlotL src)
6788%{
6789  match(Set dst src);
6790
6791  ins_cost(125);
6792  format %{ "movq    $dst, $src\t# long stk" %}
6793  opcode(0x8B);
6794  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6795  ins_pipe(ialu_reg_mem);
6796%}
6797
6798instruct loadSSP(rRegP dst, stackSlotP src)
6799%{
6800  match(Set dst src);
6801
6802  ins_cost(125);
6803  format %{ "movq    $dst, $src\t# ptr stk" %}
6804  opcode(0x8B);
6805  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6806  ins_pipe(ialu_reg_mem);
6807%}
6808
6809instruct loadSSF(regF dst, stackSlotF src)
6810%{
6811  match(Set dst src);
6812
6813  ins_cost(125);
6814  format %{ "movss   $dst, $src\t# float stk" %}
6815  opcode(0xF3, 0x0F, 0x10);
6816  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6817  ins_pipe(pipe_slow); // XXX
6818%}
6819
6820// Use the same format since predicate() can not be used here.
6821instruct loadSSD(regD dst, stackSlotD src)
6822%{
6823  match(Set dst src);
6824
6825  ins_cost(125);
6826  format %{ "movsd   $dst, $src\t# double stk" %}
6827  ins_encode  %{
6828    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6829  %}
6830  ins_pipe(pipe_slow); // XXX
6831%}
6832
6833// Prefetch instructions.
6834// Must be safe to execute with invalid address (cannot fault).
6835
6836instruct prefetchr( memory mem ) %{
6837  predicate(ReadPrefetchInstr==3);
6838  match(PrefetchRead mem);
6839  ins_cost(125);
6840
6841  format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6842  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6843  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6844  ins_pipe(ialu_mem);
6845%}
6846
6847instruct prefetchrNTA( memory mem ) %{
6848  predicate(ReadPrefetchInstr==0);
6849  match(PrefetchRead mem);
6850  ins_cost(125);
6851
6852  format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6853  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6854  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6855  ins_pipe(ialu_mem);
6856%}
6857
6858instruct prefetchrT0( memory mem ) %{
6859  predicate(ReadPrefetchInstr==1);
6860  match(PrefetchRead mem);
6861  ins_cost(125);
6862
6863  format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6864  opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6865  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6866  ins_pipe(ialu_mem);
6867%}
6868
6869instruct prefetchrT2( memory mem ) %{
6870  predicate(ReadPrefetchInstr==2);
6871  match(PrefetchRead mem);
6872  ins_cost(125);
6873
6874  format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6875  opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6876  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6877  ins_pipe(ialu_mem);
6878%}
6879
6880instruct prefetchw( memory mem ) %{
6881  predicate(AllocatePrefetchInstr==3);
6882  match(PrefetchWrite mem);
6883  ins_cost(125);
6884
6885  format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6886  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6887  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6888  ins_pipe(ialu_mem);
6889%}
6890
6891instruct prefetchwNTA( memory mem ) %{
6892  predicate(AllocatePrefetchInstr==0);
6893  match(PrefetchWrite mem);
6894  ins_cost(125);
6895
6896  format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6897  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6898  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6899  ins_pipe(ialu_mem);
6900%}
6901
6902instruct prefetchwT0( memory mem ) %{
6903  predicate(AllocatePrefetchInstr==1);
6904  match(PrefetchWrite mem);
6905  ins_cost(125);
6906
6907  format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6908  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6909  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6910  ins_pipe(ialu_mem);
6911%}
6912
6913instruct prefetchwT2( memory mem ) %{
6914  predicate(AllocatePrefetchInstr==2);
6915  match(PrefetchWrite mem);
6916  ins_cost(125);
6917
6918  format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6919  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6920  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6921  ins_pipe(ialu_mem);
6922%}
6923
6924//----------Store Instructions-------------------------------------------------
6925
6926// Store Byte
6927instruct storeB(memory mem, rRegI src)
6928%{
6929  match(Set mem (StoreB mem src));
6930
6931  ins_cost(125); // XXX
6932  format %{ "movb    $mem, $src\t# byte" %}
6933  opcode(0x88);
6934  ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6935  ins_pipe(ialu_mem_reg);
6936%}
6937
6938// Store Char/Short
6939instruct storeC(memory mem, rRegI src)
6940%{
6941  match(Set mem (StoreC mem src));
6942
6943  ins_cost(125); // XXX
6944  format %{ "movw    $mem, $src\t# char/short" %}
6945  opcode(0x89);
6946  ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6947  ins_pipe(ialu_mem_reg);
6948%}
6949
6950// Store Integer
6951instruct storeI(memory mem, rRegI src)
6952%{
6953  match(Set mem (StoreI mem src));
6954
6955  ins_cost(125); // XXX
6956  format %{ "movl    $mem, $src\t# int" %}
6957  opcode(0x89);
6958  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6959  ins_pipe(ialu_mem_reg);
6960%}
6961
6962// Store Long
6963instruct storeL(memory mem, rRegL src)
6964%{
6965  match(Set mem (StoreL mem src));
6966
6967  ins_cost(125); // XXX
6968  format %{ "movq    $mem, $src\t# long" %}
6969  opcode(0x89);
6970  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6971  ins_pipe(ialu_mem_reg); // XXX
6972%}
6973
6974// Store Pointer
6975instruct storeP(memory mem, any_RegP src)
6976%{
6977  match(Set mem (StoreP mem src));
6978
6979  ins_cost(125); // XXX
6980  format %{ "movq    $mem, $src\t# ptr" %}
6981  opcode(0x89);
6982  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6983  ins_pipe(ialu_mem_reg);
6984%}
6985
6986instruct storeImmP0(memory mem, immP0 zero)
6987%{
6988  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6989  match(Set mem (StoreP mem zero));
6990
6991  ins_cost(125); // XXX
6992  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6993  ins_encode %{
6994    __ movq($mem$$Address, r12);
6995  %}
6996  ins_pipe(ialu_mem_reg);
6997%}
6998
6999// Store NULL Pointer, mark word, or other simple pointer constant.
7000instruct storeImmP(memory mem, immP31 src)
7001%{
7002  match(Set mem (StoreP mem src));
7003
7004  ins_cost(150); // XXX
7005  format %{ "movq    $mem, $src\t# ptr" %}
7006  opcode(0xC7); /* C7 /0 */
7007  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7008  ins_pipe(ialu_mem_imm);
7009%}
7010
7011// Store Compressed Pointer
7012instruct storeN(memory mem, rRegN src)
7013%{
7014  match(Set mem (StoreN mem src));
7015
7016  ins_cost(125); // XXX
7017  format %{ "movl    $mem, $src\t# compressed ptr" %}
7018  ins_encode %{
7019    __ movl($mem$$Address, $src$$Register);
7020  %}
7021  ins_pipe(ialu_mem_reg);
7022%}
7023
7024instruct storeImmN0(memory mem, immN0 zero)
7025%{
7026  predicate(Universe::narrow_oop_base() == NULL);
7027  match(Set mem (StoreN mem zero));
7028
7029  ins_cost(125); // XXX
7030  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7031  ins_encode %{
7032    __ movl($mem$$Address, r12);
7033  %}
7034  ins_pipe(ialu_mem_reg);
7035%}
7036
7037instruct storeImmN(memory mem, immN src)
7038%{
7039  match(Set mem (StoreN mem src));
7040
7041  ins_cost(150); // XXX
7042  format %{ "movl    $mem, $src\t# compressed ptr" %}
7043  ins_encode %{
7044    address con = (address)$src$$constant;
7045    if (con == NULL) {
7046      __ movl($mem$$Address, (int32_t)0);
7047    } else {
7048      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7049    }
7050  %}
7051  ins_pipe(ialu_mem_imm);
7052%}
7053
7054// Store Integer Immediate
7055instruct storeImmI0(memory mem, immI0 zero)
7056%{
7057  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7058  match(Set mem (StoreI mem zero));
7059
7060  ins_cost(125); // XXX
7061  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7062  ins_encode %{
7063    __ movl($mem$$Address, r12);
7064  %}
7065  ins_pipe(ialu_mem_reg);
7066%}
7067
7068instruct storeImmI(memory mem, immI src)
7069%{
7070  match(Set mem (StoreI mem src));
7071
7072  ins_cost(150);
7073  format %{ "movl    $mem, $src\t# int" %}
7074  opcode(0xC7); /* C7 /0 */
7075  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7076  ins_pipe(ialu_mem_imm);
7077%}
7078
7079// Store Long Immediate
7080instruct storeImmL0(memory mem, immL0 zero)
7081%{
7082  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7083  match(Set mem (StoreL mem zero));
7084
7085  ins_cost(125); // XXX
7086  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7087  ins_encode %{
7088    __ movq($mem$$Address, r12);
7089  %}
7090  ins_pipe(ialu_mem_reg);
7091%}
7092
7093instruct storeImmL(memory mem, immL32 src)
7094%{
7095  match(Set mem (StoreL mem src));
7096
7097  ins_cost(150);
7098  format %{ "movq    $mem, $src\t# long" %}
7099  opcode(0xC7); /* C7 /0 */
7100  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7101  ins_pipe(ialu_mem_imm);
7102%}
7103
7104// Store Short/Char Immediate
7105instruct storeImmC0(memory mem, immI0 zero)
7106%{
7107  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7108  match(Set mem (StoreC mem zero));
7109
7110  ins_cost(125); // XXX
7111  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7112  ins_encode %{
7113    __ movw($mem$$Address, r12);
7114  %}
7115  ins_pipe(ialu_mem_reg);
7116%}
7117
7118instruct storeImmI16(memory mem, immI16 src)
7119%{
7120  predicate(UseStoreImmI16);
7121  match(Set mem (StoreC mem src));
7122
7123  ins_cost(150);
7124  format %{ "movw    $mem, $src\t# short/char" %}
7125  opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7126  ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7127  ins_pipe(ialu_mem_imm);
7128%}
7129
7130// Store Byte Immediate
7131instruct storeImmB0(memory mem, immI0 zero)
7132%{
7133  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7134  match(Set mem (StoreB mem zero));
7135
7136  ins_cost(125); // XXX
7137  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7138  ins_encode %{
7139    __ movb($mem$$Address, r12);
7140  %}
7141  ins_pipe(ialu_mem_reg);
7142%}
7143
7144instruct storeImmB(memory mem, immI8 src)
7145%{
7146  match(Set mem (StoreB mem src));
7147
7148  ins_cost(150); // XXX
7149  format %{ "movb    $mem, $src\t# byte" %}
7150  opcode(0xC6); /* C6 /0 */
7151  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7152  ins_pipe(ialu_mem_imm);
7153%}
7154
7155// Store Aligned Packed Byte XMM register to memory
7156instruct storeA8B(memory mem, regD src) %{
7157  match(Set mem (Store8B mem src));
7158  ins_cost(145);
7159  format %{ "MOVQ  $mem,$src\t! packed8B" %}
7160  ins_encode( movq_st(mem, src));
7161  ins_pipe( pipe_slow );
7162%}
7163
7164// Store Aligned Packed Char/Short XMM register to memory
7165instruct storeA4C(memory mem, regD src) %{
7166  match(Set mem (Store4C mem src));
7167  ins_cost(145);
7168  format %{ "MOVQ  $mem,$src\t! packed4C" %}
7169  ins_encode( movq_st(mem, src));
7170  ins_pipe( pipe_slow );
7171%}
7172
7173// Store Aligned Packed Integer XMM register to memory
7174instruct storeA2I(memory mem, regD src) %{
7175  match(Set mem (Store2I mem src));
7176  ins_cost(145);
7177  format %{ "MOVQ  $mem,$src\t! packed2I" %}
7178  ins_encode( movq_st(mem, src));
7179  ins_pipe( pipe_slow );
7180%}
7181
7182// Store CMS card-mark Immediate
7183instruct storeImmCM0_reg(memory mem, immI0 zero)
7184%{
7185  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7186  match(Set mem (StoreCM mem zero));
7187
7188  ins_cost(125); // XXX
7189  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7190  ins_encode %{
7191    __ movb($mem$$Address, r12);
7192  %}
7193  ins_pipe(ialu_mem_reg);
7194%}
7195
7196instruct storeImmCM0(memory mem, immI0 src)
7197%{
7198  match(Set mem (StoreCM mem src));
7199
7200  ins_cost(150); // XXX
7201  format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7202  opcode(0xC6); /* C6 /0 */
7203  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7204  ins_pipe(ialu_mem_imm);
7205%}
7206
7207// Store Aligned Packed Single Float XMM register to memory
7208instruct storeA2F(memory mem, regD src) %{
7209  match(Set mem (Store2F mem src));
7210  ins_cost(145);
7211  format %{ "MOVQ  $mem,$src\t! packed2F" %}
7212  ins_encode( movq_st(mem, src));
7213  ins_pipe( pipe_slow );
7214%}
7215
7216// Store Float
7217instruct storeF(memory mem, regF src)
7218%{
7219  match(Set mem (StoreF mem src));
7220
7221  ins_cost(95); // XXX
7222  format %{ "movss   $mem, $src\t# float" %}
7223  opcode(0xF3, 0x0F, 0x11);
7224  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7225  ins_pipe(pipe_slow); // XXX
7226%}
7227
7228// Store immediate Float value (it is faster than store from XMM register)
7229instruct storeF0(memory mem, immF0 zero)
7230%{
7231  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7232  match(Set mem (StoreF mem zero));
7233
7234  ins_cost(25); // XXX
7235  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7236  ins_encode %{
7237    __ movl($mem$$Address, r12);
7238  %}
7239  ins_pipe(ialu_mem_reg);
7240%}
7241
7242instruct storeF_imm(memory mem, immF src)
7243%{
7244  match(Set mem (StoreF mem src));
7245
7246  ins_cost(50);
7247  format %{ "movl    $mem, $src\t# float" %}
7248  opcode(0xC7); /* C7 /0 */
7249  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7250  ins_pipe(ialu_mem_imm);
7251%}
7252
7253// Store Double
7254instruct storeD(memory mem, regD src)
7255%{
7256  match(Set mem (StoreD mem src));
7257
7258  ins_cost(95); // XXX
7259  format %{ "movsd   $mem, $src\t# double" %}
7260  opcode(0xF2, 0x0F, 0x11);
7261  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7262  ins_pipe(pipe_slow); // XXX
7263%}
7264
7265// Store immediate double 0.0 (it is faster than store from XMM register)
7266instruct storeD0_imm(memory mem, immD0 src)
7267%{
7268  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7269  match(Set mem (StoreD mem src));
7270
7271  ins_cost(50);
7272  format %{ "movq    $mem, $src\t# double 0." %}
7273  opcode(0xC7); /* C7 /0 */
7274  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7275  ins_pipe(ialu_mem_imm);
7276%}
7277
7278instruct storeD0(memory mem, immD0 zero)
7279%{
7280  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7281  match(Set mem (StoreD mem zero));
7282
7283  ins_cost(25); // XXX
7284  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7285  ins_encode %{
7286    __ movq($mem$$Address, r12);
7287  %}
7288  ins_pipe(ialu_mem_reg);
7289%}
7290
7291instruct storeSSI(stackSlotI dst, rRegI src)
7292%{
7293  match(Set dst src);
7294
7295  ins_cost(100);
7296  format %{ "movl    $dst, $src\t# int stk" %}
7297  opcode(0x89);
7298  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7299  ins_pipe( ialu_mem_reg );
7300%}
7301
7302instruct storeSSL(stackSlotL dst, rRegL src)
7303%{
7304  match(Set dst src);
7305
7306  ins_cost(100);
7307  format %{ "movq    $dst, $src\t# long stk" %}
7308  opcode(0x89);
7309  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7310  ins_pipe(ialu_mem_reg);
7311%}
7312
7313instruct storeSSP(stackSlotP dst, rRegP src)
7314%{
7315  match(Set dst src);
7316
7317  ins_cost(100);
7318  format %{ "movq    $dst, $src\t# ptr stk" %}
7319  opcode(0x89);
7320  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7321  ins_pipe(ialu_mem_reg);
7322%}
7323
7324instruct storeSSF(stackSlotF dst, regF src)
7325%{
7326  match(Set dst src);
7327
7328  ins_cost(95); // XXX
7329  format %{ "movss   $dst, $src\t# float stk" %}
7330  opcode(0xF3, 0x0F, 0x11);
7331  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7332  ins_pipe(pipe_slow); // XXX
7333%}
7334
7335instruct storeSSD(stackSlotD dst, regD src)
7336%{
7337  match(Set dst src);
7338
7339  ins_cost(95); // XXX
7340  format %{ "movsd   $dst, $src\t# double stk" %}
7341  opcode(0xF2, 0x0F, 0x11);
7342  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7343  ins_pipe(pipe_slow); // XXX
7344%}
7345
7346//----------BSWAP Instructions-------------------------------------------------
7347instruct bytes_reverse_int(rRegI dst) %{
7348  match(Set dst (ReverseBytesI dst));
7349
7350  format %{ "bswapl  $dst" %}
7351  opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7352  ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7353  ins_pipe( ialu_reg );
7354%}
7355
7356instruct bytes_reverse_long(rRegL dst) %{
7357  match(Set dst (ReverseBytesL dst));
7358
7359  format %{ "bswapq  $dst" %}
7360
7361  opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7362  ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7363  ins_pipe( ialu_reg);
7364%}
7365
7366instruct loadI_reversed(rRegI dst, memory src) %{
7367  match(Set dst (ReverseBytesI (LoadI src)));
7368
7369  format %{ "bswap_movl $dst, $src" %}
7370  opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7371  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7372  ins_pipe( ialu_reg_mem );
7373%}
7374
7375instruct loadL_reversed(rRegL dst, memory src) %{
7376  match(Set dst (ReverseBytesL (LoadL src)));
7377
7378  format %{ "bswap_movq $dst, $src" %}
7379  opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7380  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7381  ins_pipe( ialu_reg_mem );
7382%}
7383
7384instruct storeI_reversed(memory dst, rRegI src) %{
7385  match(Set dst (StoreI dst (ReverseBytesI  src)));
7386
7387  format %{ "movl_bswap $dst, $src" %}
7388  opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7389  ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7390  ins_pipe( ialu_mem_reg );
7391%}
7392
7393instruct storeL_reversed(memory dst, rRegL src) %{
7394  match(Set dst (StoreL dst (ReverseBytesL  src)));
7395
7396  format %{ "movq_bswap $dst, $src" %}
7397  opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7398  ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7399  ins_pipe( ialu_mem_reg );
7400%}
7401
7402
7403//---------- Population Count Instructions -------------------------------------
7404
7405instruct popCountI(rRegI dst, rRegI src) %{
7406  predicate(UsePopCountInstruction);
7407  match(Set dst (PopCountI src));
7408
7409  format %{ "popcnt  $dst, $src" %}
7410  ins_encode %{
7411    __ popcntl($dst$$Register, $src$$Register);
7412  %}
7413  ins_pipe(ialu_reg);
7414%}
7415
7416instruct popCountI_mem(rRegI dst, memory mem) %{
7417  predicate(UsePopCountInstruction);
7418  match(Set dst (PopCountI (LoadI mem)));
7419
7420  format %{ "popcnt  $dst, $mem" %}
7421  ins_encode %{
7422    __ popcntl($dst$$Register, $mem$$Address);
7423  %}
7424  ins_pipe(ialu_reg);
7425%}
7426
7427// Note: Long.bitCount(long) returns an int.
7428instruct popCountL(rRegI dst, rRegL src) %{
7429  predicate(UsePopCountInstruction);
7430  match(Set dst (PopCountL src));
7431
7432  format %{ "popcnt  $dst, $src" %}
7433  ins_encode %{
7434    __ popcntq($dst$$Register, $src$$Register);
7435  %}
7436  ins_pipe(ialu_reg);
7437%}
7438
7439// Note: Long.bitCount(long) returns an int.
7440instruct popCountL_mem(rRegI dst, memory mem) %{
7441  predicate(UsePopCountInstruction);
7442  match(Set dst (PopCountL (LoadL mem)));
7443
7444  format %{ "popcnt  $dst, $mem" %}
7445  ins_encode %{
7446    __ popcntq($dst$$Register, $mem$$Address);
7447  %}
7448  ins_pipe(ialu_reg);
7449%}
7450
7451
7452//----------MemBar Instructions-----------------------------------------------
7453// Memory barrier flavors
7454
7455instruct membar_acquire()
7456%{
7457  match(MemBarAcquire);
7458  ins_cost(0);
7459
7460  size(0);
7461  format %{ "MEMBAR-acquire" %}
7462  ins_encode();
7463  ins_pipe(empty);
7464%}
7465
7466instruct membar_acquire_lock()
7467%{
7468  match(MemBarAcquire);
7469  predicate(Matcher::prior_fast_lock(n));
7470  ins_cost(0);
7471
7472  size(0);
7473  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7474  ins_encode();
7475  ins_pipe(empty);
7476%}
7477
7478instruct membar_release()
7479%{
7480  match(MemBarRelease);
7481  ins_cost(0);
7482
7483  size(0);
7484  format %{ "MEMBAR-release" %}
7485  ins_encode();
7486  ins_pipe(empty);
7487%}
7488
7489instruct membar_release_lock()
7490%{
7491  match(MemBarRelease);
7492  predicate(Matcher::post_fast_unlock(n));
7493  ins_cost(0);
7494
7495  size(0);
7496  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7497  ins_encode();
7498  ins_pipe(empty);
7499%}
7500
7501instruct membar_volatile()
7502%{
7503  match(MemBarVolatile);
7504  ins_cost(400);
7505
7506  format %{ "MEMBAR-volatile" %}
7507  ins_encode(enc_membar_volatile);
7508  ins_pipe(pipe_slow);
7509%}
7510
7511instruct unnecessary_membar_volatile()
7512%{
7513  match(MemBarVolatile);
7514  predicate(Matcher::post_store_load_barrier(n));
7515  ins_cost(0);
7516
7517  size(0);
7518  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7519  ins_encode();
7520  ins_pipe(empty);
7521%}
7522
7523//----------Move Instructions--------------------------------------------------
7524
7525instruct castX2P(rRegP dst, rRegL src)
7526%{
7527  match(Set dst (CastX2P src));
7528
7529  format %{ "movq    $dst, $src\t# long->ptr" %}
7530  ins_encode(enc_copy_wide(dst, src));
7531  ins_pipe(ialu_reg_reg); // XXX
7532%}
7533
7534instruct castP2X(rRegL dst, rRegP src)
7535%{
7536  match(Set dst (CastP2X src));
7537
7538  format %{ "movq    $dst, $src\t# ptr -> long" %}
7539  ins_encode(enc_copy_wide(dst, src));
7540  ins_pipe(ialu_reg_reg); // XXX
7541%}
7542
7543
7544// Convert oop pointer into compressed form
7545instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7546  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7547  match(Set dst (EncodeP src));
7548  effect(KILL cr);
7549  format %{ "encode_heap_oop $dst,$src" %}
7550  ins_encode %{
7551    Register s = $src$$Register;
7552    Register d = $dst$$Register;
7553    if (s != d) {
7554      __ movq(d, s);
7555    }
7556    __ encode_heap_oop(d);
7557  %}
7558  ins_pipe(ialu_reg_long);
7559%}
7560
7561instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7562  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7563  match(Set dst (EncodeP src));
7564  effect(KILL cr);
7565  format %{ "encode_heap_oop_not_null $dst,$src" %}
7566  ins_encode %{
7567    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7568  %}
7569  ins_pipe(ialu_reg_long);
7570%}
7571
7572instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7573  predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7574            n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7575  match(Set dst (DecodeN src));
7576  effect(KILL cr);
7577  format %{ "decode_heap_oop $dst,$src" %}
7578  ins_encode %{
7579    Register s = $src$$Register;
7580    Register d = $dst$$Register;
7581    if (s != d) {
7582      __ movq(d, s);
7583    }
7584    __ decode_heap_oop(d);
7585  %}
7586  ins_pipe(ialu_reg_long);
7587%}
7588
7589instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7590  predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7591            n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7592  match(Set dst (DecodeN src));
7593  format %{ "decode_heap_oop_not_null $dst,$src" %}
7594  ins_encode %{
7595    Register s = $src$$Register;
7596    Register d = $dst$$Register;
7597    if (s != d) {
7598      __ decode_heap_oop_not_null(d, s);
7599    } else {
7600      __ decode_heap_oop_not_null(d);
7601    }
7602  %}
7603  ins_pipe(ialu_reg_long);
7604%}
7605
7606
7607//----------Conditional Move---------------------------------------------------
7608// Jump
7609// dummy instruction for generating temp registers
7610instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7611  match(Jump (LShiftL switch_val shift));
7612  ins_cost(350);
7613  predicate(false);
7614  effect(TEMP dest);
7615
7616  format %{ "leaq    $dest, table_base\n\t"
7617            "jmp     [$dest + $switch_val << $shift]\n\t" %}
7618  ins_encode(jump_enc_offset(switch_val, shift, dest));
7619  ins_pipe(pipe_jmp);
7620  ins_pc_relative(1);
7621%}
7622
7623instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7624  match(Jump (AddL (LShiftL switch_val shift) offset));
7625  ins_cost(350);
7626  effect(TEMP dest);
7627
7628  format %{ "leaq    $dest, table_base\n\t"
7629            "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7630  ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7631  ins_pipe(pipe_jmp);
7632  ins_pc_relative(1);
7633%}
7634
7635instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7636  match(Jump switch_val);
7637  ins_cost(350);
7638  effect(TEMP dest);
7639
7640  format %{ "leaq    $dest, table_base\n\t"
7641            "jmp     [$dest + $switch_val]\n\t" %}
7642  ins_encode(jump_enc(switch_val, dest));
7643  ins_pipe(pipe_jmp);
7644  ins_pc_relative(1);
7645%}
7646
7647// Conditional move
7648instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7649%{
7650  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7651
7652  ins_cost(200); // XXX
7653  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7654  opcode(0x0F, 0x40);
7655  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7656  ins_pipe(pipe_cmov_reg);
7657%}
7658
7659instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7660  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7661
7662  ins_cost(200); // XXX
7663  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7664  opcode(0x0F, 0x40);
7665  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7666  ins_pipe(pipe_cmov_reg);
7667%}
7668
7669instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7670  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7671  ins_cost(200);
7672  expand %{
7673    cmovI_regU(cop, cr, dst, src);
7674  %}
7675%}
7676
7677// Conditional move
7678instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7679  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7680
7681  ins_cost(250); // XXX
7682  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7683  opcode(0x0F, 0x40);
7684  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7685  ins_pipe(pipe_cmov_mem);
7686%}
7687
7688// Conditional move
7689instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7690%{
7691  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7692
7693  ins_cost(250); // XXX
7694  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7695  opcode(0x0F, 0x40);
7696  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7697  ins_pipe(pipe_cmov_mem);
7698%}
7699
7700instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7701  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7702  ins_cost(250);
7703  expand %{
7704    cmovI_memU(cop, cr, dst, src);
7705  %}
7706%}
7707
7708// Conditional move
7709instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7710%{
7711  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7712
7713  ins_cost(200); // XXX
7714  format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7715  opcode(0x0F, 0x40);
7716  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7717  ins_pipe(pipe_cmov_reg);
7718%}
7719
7720// Conditional move
7721instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7722%{
7723  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7724
7725  ins_cost(200); // XXX
7726  format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7727  opcode(0x0F, 0x40);
7728  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7729  ins_pipe(pipe_cmov_reg);
7730%}
7731
7732instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7733  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7734  ins_cost(200);
7735  expand %{
7736    cmovN_regU(cop, cr, dst, src);
7737  %}
7738%}
7739
7740// Conditional move
7741instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7742%{
7743  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7744
7745  ins_cost(200); // XXX
7746  format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7747  opcode(0x0F, 0x40);
7748  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7749  ins_pipe(pipe_cmov_reg);  // XXX
7750%}
7751
7752// Conditional move
7753instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7754%{
7755  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7756
7757  ins_cost(200); // XXX
7758  format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7759  opcode(0x0F, 0x40);
7760  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7761  ins_pipe(pipe_cmov_reg); // XXX
7762%}
7763
7764instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7765  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7766  ins_cost(200);
7767  expand %{
7768    cmovP_regU(cop, cr, dst, src);
7769  %}
7770%}
7771
7772// DISABLED: Requires the ADLC to emit a bottom_type call that
7773// correctly meets the two pointer arguments; one is an incoming
7774// register but the other is a memory operand.  ALSO appears to
7775// be buggy with implicit null checks.
7776//
7777//// Conditional move
7778//instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7779//%{
7780//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7781//  ins_cost(250);
7782//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7783//  opcode(0x0F,0x40);
7784//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7785//  ins_pipe( pipe_cmov_mem );
7786//%}
7787//
7788//// Conditional move
7789//instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7790//%{
7791//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7792//  ins_cost(250);
7793//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7794//  opcode(0x0F,0x40);
7795//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7796//  ins_pipe( pipe_cmov_mem );
7797//%}
7798
7799instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7800%{
7801  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7802
7803  ins_cost(200); // XXX
7804  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7805  opcode(0x0F, 0x40);
7806  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7807  ins_pipe(pipe_cmov_reg);  // XXX
7808%}
7809
7810instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7811%{
7812  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7813
7814  ins_cost(200); // XXX
7815  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7816  opcode(0x0F, 0x40);
7817  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7818  ins_pipe(pipe_cmov_mem);  // XXX
7819%}
7820
7821instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7822%{
7823  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7824
7825  ins_cost(200); // XXX
7826  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7827  opcode(0x0F, 0x40);
7828  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7829  ins_pipe(pipe_cmov_reg); // XXX
7830%}
7831
7832instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7833  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7834  ins_cost(200);
7835  expand %{
7836    cmovL_regU(cop, cr, dst, src);
7837  %}
7838%}
7839
7840instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7841%{
7842  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7843
7844  ins_cost(200); // XXX
7845  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7846  opcode(0x0F, 0x40);
7847  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7848  ins_pipe(pipe_cmov_mem); // XXX
7849%}
7850
7851instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7852  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7853  ins_cost(200);
7854  expand %{
7855    cmovL_memU(cop, cr, dst, src);
7856  %}
7857%}
7858
7859instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7860%{
7861  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7862
7863  ins_cost(200); // XXX
7864  format %{ "jn$cop    skip\t# signed cmove float\n\t"
7865            "movss     $dst, $src\n"
7866    "skip:" %}
7867  ins_encode(enc_cmovf_branch(cop, dst, src));
7868  ins_pipe(pipe_slow);
7869%}
7870
7871// instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7872// %{
7873//   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7874
7875//   ins_cost(200); // XXX
7876//   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7877//             "movss     $dst, $src\n"
7878//     "skip:" %}
7879//   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7880//   ins_pipe(pipe_slow);
7881// %}
7882
7883instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7884%{
7885  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7886
7887  ins_cost(200); // XXX
7888  format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7889            "movss     $dst, $src\n"
7890    "skip:" %}
7891  ins_encode(enc_cmovf_branch(cop, dst, src));
7892  ins_pipe(pipe_slow);
7893%}
7894
7895instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7896  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7897  ins_cost(200);
7898  expand %{
7899    cmovF_regU(cop, cr, dst, src);
7900  %}
7901%}
7902
7903instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7904%{
7905  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7906
7907  ins_cost(200); // XXX
7908  format %{ "jn$cop    skip\t# signed cmove double\n\t"
7909            "movsd     $dst, $src\n"
7910    "skip:" %}
7911  ins_encode(enc_cmovd_branch(cop, dst, src));
7912  ins_pipe(pipe_slow);
7913%}
7914
7915instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7916%{
7917  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7918
7919  ins_cost(200); // XXX
7920  format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7921            "movsd     $dst, $src\n"
7922    "skip:" %}
7923  ins_encode(enc_cmovd_branch(cop, dst, src));
7924  ins_pipe(pipe_slow);
7925%}
7926
7927instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7928  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7929  ins_cost(200);
7930  expand %{
7931    cmovD_regU(cop, cr, dst, src);
7932  %}
7933%}
7934
7935//----------Arithmetic Instructions--------------------------------------------
7936//----------Addition Instructions----------------------------------------------
7937
7938instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7939%{
7940  match(Set dst (AddI dst src));
7941  effect(KILL cr);
7942
7943  format %{ "addl    $dst, $src\t# int" %}
7944  opcode(0x03);
7945  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7946  ins_pipe(ialu_reg_reg);
7947%}
7948
7949instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7950%{
7951  match(Set dst (AddI dst src));
7952  effect(KILL cr);
7953
7954  format %{ "addl    $dst, $src\t# int" %}
7955  opcode(0x81, 0x00); /* /0 id */
7956  ins_encode(OpcSErm(dst, src), Con8or32(src));
7957  ins_pipe( ialu_reg );
7958%}
7959
7960instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7961%{
7962  match(Set dst (AddI dst (LoadI src)));
7963  effect(KILL cr);
7964
7965  ins_cost(125); // XXX
7966  format %{ "addl    $dst, $src\t# int" %}
7967  opcode(0x03);
7968  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7969  ins_pipe(ialu_reg_mem);
7970%}
7971
7972instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7973%{
7974  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7975  effect(KILL cr);
7976
7977  ins_cost(150); // XXX
7978  format %{ "addl    $dst, $src\t# int" %}
7979  opcode(0x01); /* Opcode 01 /r */
7980  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7981  ins_pipe(ialu_mem_reg);
7982%}
7983
7984instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7985%{
7986  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7987  effect(KILL cr);
7988
7989  ins_cost(125); // XXX
7990  format %{ "addl    $dst, $src\t# int" %}
7991  opcode(0x81); /* Opcode 81 /0 id */
7992  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7993  ins_pipe(ialu_mem_imm);
7994%}
7995
7996instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7997%{
7998  predicate(UseIncDec);
7999  match(Set dst (AddI dst src));
8000  effect(KILL cr);
8001
8002  format %{ "incl    $dst\t# int" %}
8003  opcode(0xFF, 0x00); // FF /0
8004  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8005  ins_pipe(ialu_reg);
8006%}
8007
8008instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8009%{
8010  predicate(UseIncDec);
8011  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8012  effect(KILL cr);
8013
8014  ins_cost(125); // XXX
8015  format %{ "incl    $dst\t# int" %}
8016  opcode(0xFF); /* Opcode FF /0 */
8017  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8018  ins_pipe(ialu_mem_imm);
8019%}
8020
8021// XXX why does that use AddI
8022instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8023%{
8024  predicate(UseIncDec);
8025  match(Set dst (AddI dst src));
8026  effect(KILL cr);
8027
8028  format %{ "decl    $dst\t# int" %}
8029  opcode(0xFF, 0x01); // FF /1
8030  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8031  ins_pipe(ialu_reg);
8032%}
8033
8034// XXX why does that use AddI
8035instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8036%{
8037  predicate(UseIncDec);
8038  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8039  effect(KILL cr);
8040
8041  ins_cost(125); // XXX
8042  format %{ "decl    $dst\t# int" %}
8043  opcode(0xFF); /* Opcode FF /1 */
8044  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8045  ins_pipe(ialu_mem_imm);
8046%}
8047
8048instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8049%{
8050  match(Set dst (AddI src0 src1));
8051
8052  ins_cost(110);
8053  format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8054  opcode(0x8D); /* 0x8D /r */
8055  ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8056  ins_pipe(ialu_reg_reg);
8057%}
8058
8059instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8060%{
8061  match(Set dst (AddL dst src));
8062  effect(KILL cr);
8063
8064  format %{ "addq    $dst, $src\t# long" %}
8065  opcode(0x03);
8066  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8067  ins_pipe(ialu_reg_reg);
8068%}
8069
8070instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8071%{
8072  match(Set dst (AddL dst src));
8073  effect(KILL cr);
8074
8075  format %{ "addq    $dst, $src\t# long" %}
8076  opcode(0x81, 0x00); /* /0 id */
8077  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8078  ins_pipe( ialu_reg );
8079%}
8080
8081instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8082%{
8083  match(Set dst (AddL dst (LoadL src)));
8084  effect(KILL cr);
8085
8086  ins_cost(125); // XXX
8087  format %{ "addq    $dst, $src\t# long" %}
8088  opcode(0x03);
8089  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8090  ins_pipe(ialu_reg_mem);
8091%}
8092
8093instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8094%{
8095  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8096  effect(KILL cr);
8097
8098  ins_cost(150); // XXX
8099  format %{ "addq    $dst, $src\t# long" %}
8100  opcode(0x01); /* Opcode 01 /r */
8101  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8102  ins_pipe(ialu_mem_reg);
8103%}
8104
8105instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8106%{
8107  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8108  effect(KILL cr);
8109
8110  ins_cost(125); // XXX
8111  format %{ "addq    $dst, $src\t# long" %}
8112  opcode(0x81); /* Opcode 81 /0 id */
8113  ins_encode(REX_mem_wide(dst),
8114             OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8115  ins_pipe(ialu_mem_imm);
8116%}
8117
8118instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8119%{
8120  predicate(UseIncDec);
8121  match(Set dst (AddL dst src));
8122  effect(KILL cr);
8123
8124  format %{ "incq    $dst\t# long" %}
8125  opcode(0xFF, 0x00); // FF /0
8126  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8127  ins_pipe(ialu_reg);
8128%}
8129
8130instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8131%{
8132  predicate(UseIncDec);
8133  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8134  effect(KILL cr);
8135
8136  ins_cost(125); // XXX
8137  format %{ "incq    $dst\t# long" %}
8138  opcode(0xFF); /* Opcode FF /0 */
8139  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8140  ins_pipe(ialu_mem_imm);
8141%}
8142
8143// XXX why does that use AddL
8144instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8145%{
8146  predicate(UseIncDec);
8147  match(Set dst (AddL dst src));
8148  effect(KILL cr);
8149
8150  format %{ "decq    $dst\t# long" %}
8151  opcode(0xFF, 0x01); // FF /1
8152  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8153  ins_pipe(ialu_reg);
8154%}
8155
8156// XXX why does that use AddL
8157instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8158%{
8159  predicate(UseIncDec);
8160  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8161  effect(KILL cr);
8162
8163  ins_cost(125); // XXX
8164  format %{ "decq    $dst\t# long" %}
8165  opcode(0xFF); /* Opcode FF /1 */
8166  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8167  ins_pipe(ialu_mem_imm);
8168%}
8169
8170instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8171%{
8172  match(Set dst (AddL src0 src1));
8173
8174  ins_cost(110);
8175  format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8176  opcode(0x8D); /* 0x8D /r */
8177  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8178  ins_pipe(ialu_reg_reg);
8179%}
8180
8181instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8182%{
8183  match(Set dst (AddP dst src));
8184  effect(KILL cr);
8185
8186  format %{ "addq    $dst, $src\t# ptr" %}
8187  opcode(0x03);
8188  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8189  ins_pipe(ialu_reg_reg);
8190%}
8191
8192instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8193%{
8194  match(Set dst (AddP dst src));
8195  effect(KILL cr);
8196
8197  format %{ "addq    $dst, $src\t# ptr" %}
8198  opcode(0x81, 0x00); /* /0 id */
8199  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8200  ins_pipe( ialu_reg );
8201%}
8202
8203// XXX addP mem ops ????
8204
8205instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8206%{
8207  match(Set dst (AddP src0 src1));
8208
8209  ins_cost(110);
8210  format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8211  opcode(0x8D); /* 0x8D /r */
8212  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8213  ins_pipe(ialu_reg_reg);
8214%}
8215
8216instruct checkCastPP(rRegP dst)
8217%{
8218  match(Set dst (CheckCastPP dst));
8219
8220  size(0);
8221  format %{ "# checkcastPP of $dst" %}
8222  ins_encode(/* empty encoding */);
8223  ins_pipe(empty);
8224%}
8225
8226instruct castPP(rRegP dst)
8227%{
8228  match(Set dst (CastPP dst));
8229
8230  size(0);
8231  format %{ "# castPP of $dst" %}
8232  ins_encode(/* empty encoding */);
8233  ins_pipe(empty);
8234%}
8235
8236instruct castII(rRegI dst)
8237%{
8238  match(Set dst (CastII dst));
8239
8240  size(0);
8241  format %{ "# castII of $dst" %}
8242  ins_encode(/* empty encoding */);
8243  ins_cost(0);
8244  ins_pipe(empty);
8245%}
8246
8247// LoadP-locked same as a regular LoadP when used with compare-swap
8248instruct loadPLocked(rRegP dst, memory mem)
8249%{
8250  match(Set dst (LoadPLocked mem));
8251
8252  ins_cost(125); // XXX
8253  format %{ "movq    $dst, $mem\t# ptr locked" %}
8254  opcode(0x8B);
8255  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8256  ins_pipe(ialu_reg_mem); // XXX
8257%}
8258
8259// LoadL-locked - same as a regular LoadL when used with compare-swap
8260instruct loadLLocked(rRegL dst, memory mem)
8261%{
8262  match(Set dst (LoadLLocked mem));
8263
8264  ins_cost(125); // XXX
8265  format %{ "movq    $dst, $mem\t# long locked" %}
8266  opcode(0x8B);
8267  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8268  ins_pipe(ialu_reg_mem); // XXX
8269%}
8270
8271// Conditional-store of the updated heap-top.
8272// Used during allocation of the shared heap.
8273// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8274
8275instruct storePConditional(memory heap_top_ptr,
8276                           rax_RegP oldval, rRegP newval,
8277                           rFlagsReg cr)
8278%{
8279  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8280 
8281  format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8282            "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8283  opcode(0x0F, 0xB1);
8284  ins_encode(lock_prefix,
8285             REX_reg_mem_wide(newval, heap_top_ptr),
8286             OpcP, OpcS,
8287             reg_mem(newval, heap_top_ptr));
8288  ins_pipe(pipe_cmpxchg);
8289%}
8290
8291// Conditional-store of an int value.
8292// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8293instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8294%{
8295  match(Set cr (StoreIConditional mem (Binary oldval newval)));
8296  effect(KILL oldval);
8297
8298  format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8299  opcode(0x0F, 0xB1);
8300  ins_encode(lock_prefix,
8301             REX_reg_mem(newval, mem),
8302             OpcP, OpcS,
8303             reg_mem(newval, mem));
8304  ins_pipe(pipe_cmpxchg);
8305%}
8306
8307// Conditional-store of a long value.
8308// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8309instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8310%{
8311  match(Set cr (StoreLConditional mem (Binary oldval newval)));
8312  effect(KILL oldval);
8313
8314  format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8315  opcode(0x0F, 0xB1);
8316  ins_encode(lock_prefix,
8317             REX_reg_mem_wide(newval, mem),
8318             OpcP, OpcS,
8319             reg_mem(newval, mem));
8320  ins_pipe(pipe_cmpxchg);
8321%}
8322
8323
8324// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8325instruct compareAndSwapP(rRegI res,
8326                         memory mem_ptr,
8327                         rax_RegP oldval, rRegP newval,
8328                         rFlagsReg cr)
8329%{
8330  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8331  effect(KILL cr, KILL oldval);
8332
8333  format %{ "cmpxchgq $mem_ptr,$newval\t# "
8334            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8335            "sete    $res\n\t"
8336            "movzbl  $res, $res" %}
8337  opcode(0x0F, 0xB1);
8338  ins_encode(lock_prefix,
8339             REX_reg_mem_wide(newval, mem_ptr),
8340             OpcP, OpcS,
8341             reg_mem(newval, mem_ptr),
8342             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8343             REX_reg_breg(res, res), // movzbl
8344             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8345  ins_pipe( pipe_cmpxchg );
8346%}
8347
8348instruct compareAndSwapL(rRegI res,
8349                         memory mem_ptr,
8350                         rax_RegL oldval, rRegL newval,
8351                         rFlagsReg cr)
8352%{
8353  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8354  effect(KILL cr, KILL oldval);
8355
8356  format %{ "cmpxchgq $mem_ptr,$newval\t# "
8357            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8358            "sete    $res\n\t"
8359            "movzbl  $res, $res" %}
8360  opcode(0x0F, 0xB1);
8361  ins_encode(lock_prefix,
8362             REX_reg_mem_wide(newval, mem_ptr),
8363             OpcP, OpcS,
8364             reg_mem(newval, mem_ptr),
8365             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8366             REX_reg_breg(res, res), // movzbl
8367             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8368  ins_pipe( pipe_cmpxchg );
8369%}
8370
8371instruct compareAndSwapI(rRegI res,
8372                         memory mem_ptr,
8373                         rax_RegI oldval, rRegI newval,
8374                         rFlagsReg cr)
8375%{
8376  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8377  effect(KILL cr, KILL oldval);
8378
8379  format %{ "cmpxchgl $mem_ptr,$newval\t# "
8380            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8381            "sete    $res\n\t"
8382            "movzbl  $res, $res" %}
8383  opcode(0x0F, 0xB1);
8384  ins_encode(lock_prefix,
8385             REX_reg_mem(newval, mem_ptr),
8386             OpcP, OpcS,
8387             reg_mem(newval, mem_ptr),
8388             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8389             REX_reg_breg(res, res), // movzbl
8390             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8391  ins_pipe( pipe_cmpxchg );
8392%}
8393
8394
8395instruct compareAndSwapN(rRegI res,
8396                          memory mem_ptr,
8397                          rax_RegN oldval, rRegN newval,
8398                          rFlagsReg cr) %{
8399  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8400  effect(KILL cr, KILL oldval);
8401
8402  format %{ "cmpxchgl $mem_ptr,$newval\t# "
8403            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8404            "sete    $res\n\t"
8405            "movzbl  $res, $res" %}
8406  opcode(0x0F, 0xB1);
8407  ins_encode(lock_prefix,
8408             REX_reg_mem(newval, mem_ptr),
8409             OpcP, OpcS,
8410             reg_mem(newval, mem_ptr),
8411             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8412             REX_reg_breg(res, res), // movzbl
8413             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8414  ins_pipe( pipe_cmpxchg );
8415%}
8416
8417//----------Subtraction Instructions-------------------------------------------
8418
8419// Integer Subtraction Instructions
8420instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8421%{
8422  match(Set dst (SubI dst src));
8423  effect(KILL cr);
8424
8425  format %{ "subl    $dst, $src\t# int" %}
8426  opcode(0x2B);
8427  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8428  ins_pipe(ialu_reg_reg);
8429%}
8430
8431instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8432%{
8433  match(Set dst (SubI dst src));
8434  effect(KILL cr);
8435
8436  format %{ "subl    $dst, $src\t# int" %}
8437  opcode(0x81, 0x05);  /* Opcode 81 /5 */
8438  ins_encode(OpcSErm(dst, src), Con8or32(src));
8439  ins_pipe(ialu_reg);
8440%}
8441
8442instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8443%{
8444  match(Set dst (SubI dst (LoadI src)));
8445  effect(KILL cr);
8446
8447  ins_cost(125);
8448  format %{ "subl    $dst, $src\t# int" %}
8449  opcode(0x2B);
8450  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8451  ins_pipe(ialu_reg_mem);
8452%}
8453
8454instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8455%{
8456  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8457  effect(KILL cr);
8458
8459  ins_cost(150);
8460  format %{ "subl    $dst, $src\t# int" %}
8461  opcode(0x29); /* Opcode 29 /r */
8462  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8463  ins_pipe(ialu_mem_reg);
8464%}
8465
8466instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8467%{
8468  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8469  effect(KILL cr);
8470
8471  ins_cost(125); // XXX
8472  format %{ "subl    $dst, $src\t# int" %}
8473  opcode(0x81); /* Opcode 81 /5 id */
8474  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8475  ins_pipe(ialu_mem_imm);
8476%}
8477
8478instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8479%{
8480  match(Set dst (SubL dst src));
8481  effect(KILL cr);
8482
8483  format %{ "subq    $dst, $src\t# long" %}
8484  opcode(0x2B);
8485  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8486  ins_pipe(ialu_reg_reg);
8487%}
8488
8489instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8490%{
8491  match(Set dst (SubL dst src));
8492  effect(KILL cr);
8493
8494  format %{ "subq    $dst, $src\t# long" %}
8495  opcode(0x81, 0x05);  /* Opcode 81 /5 */
8496  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8497  ins_pipe(ialu_reg);
8498%}
8499
8500instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8501%{
8502  match(Set dst (SubL dst (LoadL src)));
8503  effect(KILL cr);
8504
8505  ins_cost(125);
8506  format %{ "subq    $dst, $src\t# long" %}
8507  opcode(0x2B);
8508  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8509  ins_pipe(ialu_reg_mem);
8510%}
8511
8512instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8513%{
8514  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8515  effect(KILL cr);
8516
8517  ins_cost(150);
8518  format %{ "subq    $dst, $src\t# long" %}
8519  opcode(0x29); /* Opcode 29 /r */
8520  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8521  ins_pipe(ialu_mem_reg);
8522%}
8523
8524instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8525%{
8526  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8527  effect(KILL cr);
8528
8529  ins_cost(125); // XXX
8530  format %{ "subq    $dst, $src\t# long" %}
8531  opcode(0x81); /* Opcode 81 /5 id */
8532  ins_encode(REX_mem_wide(dst),
8533             OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8534  ins_pipe(ialu_mem_imm);
8535%}
8536
8537// Subtract from a pointer
8538// XXX hmpf???
8539instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8540%{
8541  match(Set dst (AddP dst (SubI zero src)));
8542  effect(KILL cr);
8543
8544  format %{ "subq    $dst, $src\t# ptr - int" %}
8545  opcode(0x2B);
8546  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8547  ins_pipe(ialu_reg_reg);
8548%}
8549
8550instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8551%{
8552  match(Set dst (SubI zero dst));
8553  effect(KILL cr);
8554
8555  format %{ "negl    $dst\t# int" %}
8556  opcode(0xF7, 0x03);  // Opcode F7 /3
8557  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8558  ins_pipe(ialu_reg);
8559%}
8560
8561instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8562%{
8563  match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8564  effect(KILL cr);
8565
8566  format %{ "negl    $dst\t# int" %}
8567  opcode(0xF7, 0x03);  // Opcode F7 /3
8568  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8569  ins_pipe(ialu_reg);
8570%}
8571
8572instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8573%{
8574  match(Set dst (SubL zero dst));
8575  effect(KILL cr);
8576
8577  format %{ "negq    $dst\t# long" %}
8578  opcode(0xF7, 0x03);  // Opcode F7 /3
8579  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8580  ins_pipe(ialu_reg);
8581%}
8582
8583instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8584%{
8585  match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8586  effect(KILL cr);
8587
8588  format %{ "negq    $dst\t# long" %}
8589  opcode(0xF7, 0x03);  // Opcode F7 /3
8590  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8591  ins_pipe(ialu_reg);
8592%}
8593
8594
8595//----------Multiplication/Division Instructions-------------------------------
8596// Integer Multiplication Instructions
8597// Multiply Register
8598
8599instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8600%{
8601  match(Set dst (MulI dst src));
8602  effect(KILL cr);
8603
8604  ins_cost(300);
8605  format %{ "imull   $dst, $src\t# int" %}
8606  opcode(0x0F, 0xAF);
8607  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8608  ins_pipe(ialu_reg_reg_alu0);
8609%}
8610
8611instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8612%{
8613  match(Set dst (MulI src imm));
8614  effect(KILL cr);
8615
8616  ins_cost(300);
8617  format %{ "imull   $dst, $src, $imm\t# int" %}
8618  opcode(0x69); /* 69 /r id */
8619  ins_encode(REX_reg_reg(dst, src),
8620             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8621  ins_pipe(ialu_reg_reg_alu0);
8622%}
8623
8624instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8625%{
8626  match(Set dst (MulI dst (LoadI src)));
8627  effect(KILL cr);
8628
8629  ins_cost(350);
8630  format %{ "imull   $dst, $src\t# int" %}
8631  opcode(0x0F, 0xAF);
8632  ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8633  ins_pipe(ialu_reg_mem_alu0);
8634%}
8635
8636instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8637%{
8638  match(Set dst (MulI (LoadI src) imm));
8639  effect(KILL cr);
8640
8641  ins_cost(300);
8642  format %{ "imull   $dst, $src, $imm\t# int" %}
8643  opcode(0x69); /* 69 /r id */
8644  ins_encode(REX_reg_mem(dst, src),
8645             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8646  ins_pipe(ialu_reg_mem_alu0);
8647%}
8648
8649instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8650%{
8651  match(Set dst (MulL dst src));
8652  effect(KILL cr);
8653
8654  ins_cost(300);
8655  format %{ "imulq   $dst, $src\t# long" %}
8656  opcode(0x0F, 0xAF);
8657  ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8658  ins_pipe(ialu_reg_reg_alu0);
8659%}
8660
8661instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8662%{
8663  match(Set dst (MulL src imm));
8664  effect(KILL cr);
8665
8666  ins_cost(300);
8667  format %{ "imulq   $dst, $src, $imm\t# long" %}
8668  opcode(0x69); /* 69 /r id */
8669  ins_encode(REX_reg_reg_wide(dst, src),
8670             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8671  ins_pipe(ialu_reg_reg_alu0);
8672%}
8673
8674instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8675%{
8676  match(Set dst (MulL dst (LoadL src)));
8677  effect(KILL cr);
8678
8679  ins_cost(350);
8680  format %{ "imulq   $dst, $src\t# long" %}
8681  opcode(0x0F, 0xAF);
8682  ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8683  ins_pipe(ialu_reg_mem_alu0);
8684%}
8685
8686instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8687%{
8688  match(Set dst (MulL (LoadL src) imm));
8689  effect(KILL cr);
8690
8691  ins_cost(300);
8692  format %{ "imulq   $dst, $src, $imm\t# long" %}
8693  opcode(0x69); /* 69 /r id */
8694  ins_encode(REX_reg_mem_wide(dst, src),
8695             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8696  ins_pipe(ialu_reg_mem_alu0);
8697%}
8698
8699instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8700%{
8701  match(Set dst (MulHiL src rax));
8702  effect(USE_KILL rax, KILL cr);
8703
8704  ins_cost(300);
8705  format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8706  opcode(0xF7, 0x5); /* Opcode F7 /5 */
8707  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8708  ins_pipe(ialu_reg_reg_alu0);
8709%}
8710
8711instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8712                   rFlagsReg cr)
8713%{
8714  match(Set rax (DivI rax div));
8715  effect(KILL rdx, KILL cr);
8716
8717  ins_cost(30*100+10*100); // XXX
8718  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8719            "jne,s   normal\n\t"
8720            "xorl    rdx, rdx\n\t"
8721            "cmpl    $div, -1\n\t"
8722            "je,s    done\n"
8723    "normal: cdql\n\t"
8724            "idivl   $div\n"
8725    "done:"        %}
8726  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8727  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8728  ins_pipe(ialu_reg_reg_alu0);
8729%}
8730
8731instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8732                   rFlagsReg cr)
8733%{
8734  match(Set rax (DivL rax div));
8735  effect(KILL rdx, KILL cr);
8736
8737  ins_cost(30*100+10*100); // XXX
8738  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8739            "cmpq    rax, rdx\n\t"
8740            "jne,s   normal\n\t"
8741            "xorl    rdx, rdx\n\t"
8742            "cmpq    $div, -1\n\t"
8743            "je,s    done\n"
8744    "normal: cdqq\n\t"
8745            "idivq   $div\n"
8746    "done:"        %}
8747  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8748  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8749  ins_pipe(ialu_reg_reg_alu0);
8750%}
8751
8752// Integer DIVMOD with Register, both quotient and mod results
8753instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8754                             rFlagsReg cr)
8755%{
8756  match(DivModI rax div);
8757  effect(KILL cr);
8758
8759  ins_cost(30*100+10*100); // XXX
8760  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8761            "jne,s   normal\n\t"
8762            "xorl    rdx, rdx\n\t"
8763            "cmpl    $div, -1\n\t"
8764            "je,s    done\n"
8765    "normal: cdql\n\t"
8766            "idivl   $div\n"
8767    "done:"        %}
8768  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8769  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8770  ins_pipe(pipe_slow);
8771%}
8772
8773// Long DIVMOD with Register, both quotient and mod results
8774instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8775                             rFlagsReg cr)
8776%{
8777  match(DivModL rax div);
8778  effect(KILL cr);
8779
8780  ins_cost(30*100+10*100); // XXX
8781  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8782            "cmpq    rax, rdx\n\t"
8783            "jne,s   normal\n\t"
8784            "xorl    rdx, rdx\n\t"
8785            "cmpq    $div, -1\n\t"
8786            "je,s    done\n"
8787    "normal: cdqq\n\t"
8788            "idivq   $div\n"
8789    "done:"        %}
8790  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8791  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8792  ins_pipe(pipe_slow);
8793%}
8794
8795//----------- DivL-By-Constant-Expansions--------------------------------------
8796// DivI cases are handled by the compiler
8797
8798// Magic constant, reciprocal of 10
8799instruct loadConL_0x6666666666666667(rRegL dst)
8800%{
8801  effect(DEF dst);
8802
8803  format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8804  ins_encode(load_immL(dst, 0x6666666666666667));
8805  ins_pipe(ialu_reg);
8806%}
8807
8808instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8809%{
8810  effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8811
8812  format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8813  opcode(0xF7, 0x5); /* Opcode F7 /5 */
8814  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8815  ins_pipe(ialu_reg_reg_alu0);
8816%}
8817
8818instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8819%{
8820  effect(USE_DEF dst, KILL cr);
8821
8822  format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8823  opcode(0xC1, 0x7); /* C1 /7 ib */
8824  ins_encode(reg_opc_imm_wide(dst, 0x3F));
8825  ins_pipe(ialu_reg);
8826%}
8827
8828instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8829%{
8830  effect(USE_DEF dst, KILL cr);
8831
8832  format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8833  opcode(0xC1, 0x7); /* C1 /7 ib */
8834  ins_encode(reg_opc_imm_wide(dst, 0x2));
8835  ins_pipe(ialu_reg);
8836%}
8837
8838instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8839%{
8840  match(Set dst (DivL src div));
8841
8842  ins_cost((5+8)*100);
8843  expand %{
8844    rax_RegL rax;                     // Killed temp
8845    rFlagsReg cr;                     // Killed
8846    loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8847    mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8848    sarL_rReg_63(src, cr);            // sarq  src, 63
8849    sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8850    subL_rReg(dst, src, cr);          // subl  rdx, src
8851  %}
8852%}
8853
8854//-----------------------------------------------------------------------------
8855
8856instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8857                   rFlagsReg cr)
8858%{
8859  match(Set rdx (ModI rax div));
8860  effect(KILL rax, KILL cr);
8861
8862  ins_cost(300); // XXX
8863  format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8864            "jne,s   normal\n\t"
8865            "xorl    rdx, rdx\n\t"
8866            "cmpl    $div, -1\n\t"
8867            "je,s    done\n"
8868    "normal: cdql\n\t"
8869            "idivl   $div\n"
8870    "done:"        %}
8871  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8872  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8873  ins_pipe(ialu_reg_reg_alu0);
8874%}
8875
8876instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8877                   rFlagsReg cr)
8878%{
8879  match(Set rdx (ModL rax div));
8880  effect(KILL rax, KILL cr);
8881
8882  ins_cost(300); // XXX
8883  format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8884            "cmpq    rax, rdx\n\t"
8885            "jne,s   normal\n\t"
8886            "xorl    rdx, rdx\n\t"
8887            "cmpq    $div, -1\n\t"
8888            "je,s    done\n"
8889    "normal: cdqq\n\t"
8890            "idivq   $div\n"
8891    "done:"        %}
8892  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8893  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8894  ins_pipe(ialu_reg_reg_alu0);
8895%}
8896
8897// Integer Shift Instructions
8898// Shift Left by one
8899instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8900%{
8901  match(Set dst (LShiftI dst shift));
8902  effect(KILL cr);
8903
8904  format %{ "sall    $dst, $shift" %}
8905  opcode(0xD1, 0x4); /* D1 /4 */
8906  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8907  ins_pipe(ialu_reg);
8908%}
8909
8910// Shift Left by one
8911instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8912%{
8913  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8914  effect(KILL cr);
8915
8916  format %{ "sall    $dst, $shift\t" %}
8917  opcode(0xD1, 0x4); /* D1 /4 */
8918  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8919  ins_pipe(ialu_mem_imm);
8920%}
8921
8922// Shift Left by 8-bit immediate
8923instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8924%{
8925  match(Set dst (LShiftI dst shift));
8926  effect(KILL cr);
8927
8928  format %{ "sall    $dst, $shift" %}
8929  opcode(0xC1, 0x4); /* C1 /4 ib */
8930  ins_encode(reg_opc_imm(dst, shift));
8931  ins_pipe(ialu_reg);
8932%}
8933
8934// Shift Left by 8-bit immediate
8935instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8936%{
8937  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8938  effect(KILL cr);
8939
8940  format %{ "sall    $dst, $shift" %}
8941  opcode(0xC1, 0x4); /* C1 /4 ib */
8942  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8943  ins_pipe(ialu_mem_imm);
8944%}
8945
8946// Shift Left by variable
8947instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8948%{
8949  match(Set dst (LShiftI dst shift));
8950  effect(KILL cr);
8951
8952  format %{ "sall    $dst, $shift" %}
8953  opcode(0xD3, 0x4); /* D3 /4 */
8954  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8955  ins_pipe(ialu_reg_reg);
8956%}
8957
8958// Shift Left by variable
8959instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8960%{
8961  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8962  effect(KILL cr);
8963
8964  format %{ "sall    $dst, $shift" %}
8965  opcode(0xD3, 0x4); /* D3 /4 */
8966  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8967  ins_pipe(ialu_mem_reg);
8968%}
8969
8970// Arithmetic shift right by one
8971instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8972%{
8973  match(Set dst (RShiftI dst shift));
8974  effect(KILL cr);
8975
8976  format %{ "sarl    $dst, $shift" %}
8977  opcode(0xD1, 0x7); /* D1 /7 */
8978  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8979  ins_pipe(ialu_reg);
8980%}
8981
8982// Arithmetic shift right by one
8983instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8984%{
8985  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8986  effect(KILL cr);
8987
8988  format %{ "sarl    $dst, $shift" %}
8989  opcode(0xD1, 0x7); /* D1 /7 */
8990  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8991  ins_pipe(ialu_mem_imm);
8992%}
8993
8994// Arithmetic Shift Right by 8-bit immediate
8995instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8996%{
8997  match(Set dst (RShiftI dst shift));
8998  effect(KILL cr);
8999
9000  format %{ "sarl    $dst, $shift" %}
9001  opcode(0xC1, 0x7); /* C1 /7 ib */
9002  ins_encode(reg_opc_imm(dst, shift));
9003  ins_pipe(ialu_mem_imm);
9004%}
9005
9006// Arithmetic Shift Right by 8-bit immediate
9007instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9008%{
9009  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9010  effect(KILL cr);
9011
9012  format %{ "sarl    $dst, $shift" %}
9013  opcode(0xC1, 0x7); /* C1 /7 ib */
9014  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9015  ins_pipe(ialu_mem_imm);
9016%}
9017
9018// Arithmetic Shift Right by variable
9019instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9020%{
9021  match(Set dst (RShiftI dst shift));
9022  effect(KILL cr);
9023
9024  format %{ "sarl    $dst, $shift" %}
9025  opcode(0xD3, 0x7); /* D3 /7 */
9026  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9027  ins_pipe(ialu_reg_reg);
9028%}
9029
9030// Arithmetic Shift Right by variable
9031instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9032%{
9033  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9034  effect(KILL cr);
9035
9036  format %{ "sarl    $dst, $shift" %}
9037  opcode(0xD3, 0x7); /* D3 /7 */
9038  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9039  ins_pipe(ialu_mem_reg);
9040%}
9041
9042// Logical shift right by one
9043instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9044%{
9045  match(Set dst (URShiftI dst shift));
9046  effect(KILL cr);
9047
9048  format %{ "shrl    $dst, $shift" %}
9049  opcode(0xD1, 0x5); /* D1 /5 */
9050  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9051  ins_pipe(ialu_reg);
9052%}
9053
9054// Logical shift right by one
9055instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9056%{
9057  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9058  effect(KILL cr);
9059
9060  format %{ "shrl    $dst, $shift" %}
9061  opcode(0xD1, 0x5); /* D1 /5 */
9062  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9063  ins_pipe(ialu_mem_imm);
9064%}
9065
9066// Logical Shift Right by 8-bit immediate
9067instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9068%{
9069  match(Set dst (URShiftI dst shift));
9070  effect(KILL cr);
9071
9072  format %{ "shrl    $dst, $shift" %}
9073  opcode(0xC1, 0x5); /* C1 /5 ib */
9074  ins_encode(reg_opc_imm(dst, shift));
9075  ins_pipe(ialu_reg);
9076%}
9077
9078// Logical Shift Right by 8-bit immediate
9079instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9080%{
9081  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9082  effect(KILL cr);
9083
9084  format %{ "shrl    $dst, $shift" %}
9085  opcode(0xC1, 0x5); /* C1 /5 ib */
9086  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9087  ins_pipe(ialu_mem_imm);
9088%}
9089
9090// Logical Shift Right by variable
9091instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9092%{
9093  match(Set dst (URShiftI dst shift));
9094  effect(KILL cr);
9095
9096  format %{ "shrl    $dst, $shift" %}
9097  opcode(0xD3, 0x5); /* D3 /5 */
9098  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9099  ins_pipe(ialu_reg_reg);
9100%}
9101
9102// Logical Shift Right by variable
9103instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9104%{
9105  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9106  effect(KILL cr);
9107
9108  format %{ "shrl    $dst, $shift" %}
9109  opcode(0xD3, 0x5); /* D3 /5 */
9110  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9111  ins_pipe(ialu_mem_reg);
9112%}
9113
9114// Long Shift Instructions
9115// Shift Left by one
9116instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9117%{
9118  match(Set dst (LShiftL dst shift));
9119  effect(KILL cr);
9120
9121  format %{ "salq    $dst, $shift" %}
9122  opcode(0xD1, 0x4); /* D1 /4 */
9123  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9124  ins_pipe(ialu_reg);
9125%}
9126
9127// Shift Left by one
9128instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9129%{
9130  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9131  effect(KILL cr);
9132
9133  format %{ "salq    $dst, $shift" %}
9134  opcode(0xD1, 0x4); /* D1 /4 */
9135  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9136  ins_pipe(ialu_mem_imm);
9137%}
9138
9139// Shift Left by 8-bit immediate
9140instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9141%{
9142  match(Set dst (LShiftL dst shift));
9143  effect(KILL cr);
9144
9145  format %{ "salq    $dst, $shift" %}
9146  opcode(0xC1, 0x4); /* C1 /4 ib */
9147  ins_encode(reg_opc_imm_wide(dst, shift));
9148  ins_pipe(ialu_reg);
9149%}
9150
9151// Shift Left by 8-bit immediate
9152instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9153%{
9154  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9155  effect(KILL cr);
9156
9157  format %{ "salq    $dst, $shift" %}
9158  opcode(0xC1, 0x4); /* C1 /4 ib */
9159  ins_encode(REX_mem_wide(dst), OpcP,
9160             RM_opc_mem(secondary, dst), Con8or32(shift));
9161  ins_pipe(ialu_mem_imm);
9162%}
9163
9164// Shift Left by variable
9165instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9166%{
9167  match(Set dst (LShiftL dst shift));
9168  effect(KILL cr);
9169
9170  format %{ "salq    $dst, $shift" %}
9171  opcode(0xD3, 0x4); /* D3 /4 */
9172  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9173  ins_pipe(ialu_reg_reg);
9174%}
9175
9176// Shift Left by variable
9177instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9178%{
9179  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9180  effect(KILL cr);
9181
9182  format %{ "salq    $dst, $shift" %}
9183  opcode(0xD3, 0x4); /* D3 /4 */
9184  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9185  ins_pipe(ialu_mem_reg);
9186%}
9187
9188// Arithmetic shift right by one
9189instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9190%{
9191  match(Set dst (RShiftL dst shift));
9192  effect(KILL cr);
9193
9194  format %{ "sarq    $dst, $shift" %}
9195  opcode(0xD1, 0x7); /* D1 /7 */
9196  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9197  ins_pipe(ialu_reg);
9198%}
9199
9200// Arithmetic shift right by one
9201instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9202%{
9203  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9204  effect(KILL cr);
9205
9206  format %{ "sarq    $dst, $shift" %}
9207  opcode(0xD1, 0x7); /* D1 /7 */
9208  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9209  ins_pipe(ialu_mem_imm);
9210%}
9211
9212// Arithmetic Shift Right by 8-bit immediate
9213instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9214%{
9215  match(Set dst (RShiftL dst shift));
9216  effect(KILL cr);
9217
9218  format %{ "sarq    $dst, $shift" %}
9219  opcode(0xC1, 0x7); /* C1 /7 ib */
9220  ins_encode(reg_opc_imm_wide(dst, shift));
9221  ins_pipe(ialu_mem_imm);
9222%}
9223
9224// Arithmetic Shift Right by 8-bit immediate
9225instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9226%{
9227  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9228  effect(KILL cr);
9229
9230  format %{ "sarq    $dst, $shift" %}
9231  opcode(0xC1, 0x7); /* C1 /7 ib */
9232  ins_encode(REX_mem_wide(dst), OpcP,
9233             RM_opc_mem(secondary, dst), Con8or32(shift));
9234  ins_pipe(ialu_mem_imm);
9235%}
9236
9237// Arithmetic Shift Right by variable
9238instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9239%{
9240  match(Set dst (RShiftL dst shift));
9241  effect(KILL cr);
9242
9243  format %{ "sarq    $dst, $shift" %}
9244  opcode(0xD3, 0x7); /* D3 /7 */
9245  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9246  ins_pipe(ialu_reg_reg);
9247%}
9248
9249// Arithmetic Shift Right by variable
9250instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9251%{
9252  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9253  effect(KILL cr);
9254
9255  format %{ "sarq    $dst, $shift" %}
9256  opcode(0xD3, 0x7); /* D3 /7 */
9257  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9258  ins_pipe(ialu_mem_reg);
9259%}
9260
9261// Logical shift right by one
9262instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9263%{
9264  match(Set dst (URShiftL dst shift));
9265  effect(KILL cr);
9266
9267  format %{ "shrq    $dst, $shift" %}
9268  opcode(0xD1, 0x5); /* D1 /5 */
9269  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9270  ins_pipe(ialu_reg);
9271%}
9272
9273// Logical shift right by one
9274instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9275%{
9276  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9277  effect(KILL cr);
9278
9279  format %{ "shrq    $dst, $shift" %}
9280  opcode(0xD1, 0x5); /* D1 /5 */
9281  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9282  ins_pipe(ialu_mem_imm);
9283%}
9284
9285// Logical Shift Right by 8-bit immediate
9286instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9287%{
9288  match(Set dst (URShiftL dst shift));
9289  effect(KILL cr);
9290
9291  format %{ "shrq    $dst, $shift" %}
9292  opcode(0xC1, 0x5); /* C1 /5 ib */
9293  ins_encode(reg_opc_imm_wide(dst, shift));
9294  ins_pipe(ialu_reg);
9295%}
9296
9297
9298// Logical Shift Right by 8-bit immediate
9299instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9300%{
9301  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9302  effect(KILL cr);
9303
9304  format %{ "shrq    $dst, $shift" %}
9305  opcode(0xC1, 0x5); /* C1 /5 ib */
9306  ins_encode(REX_mem_wide(dst), OpcP,
9307             RM_opc_mem(secondary, dst), Con8or32(shift));
9308  ins_pipe(ialu_mem_imm);
9309%}
9310
9311// Logical Shift Right by variable
9312instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9313%{
9314  match(Set dst (URShiftL dst shift));
9315  effect(KILL cr);
9316
9317  format %{ "shrq    $dst, $shift" %}
9318  opcode(0xD3, 0x5); /* D3 /5 */
9319  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9320  ins_pipe(ialu_reg_reg);
9321%}
9322
9323// Logical Shift Right by variable
9324instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9325%{
9326  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9327  effect(KILL cr);
9328
9329  format %{ "shrq    $dst, $shift" %}
9330  opcode(0xD3, 0x5); /* D3 /5 */
9331  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9332  ins_pipe(ialu_mem_reg);
9333%}
9334
9335// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9336// This idiom is used by the compiler for the i2b bytecode.
9337instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9338%{
9339  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9340
9341  format %{ "movsbl  $dst, $src\t# i2b" %}
9342  opcode(0x0F, 0xBE);
9343  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9344  ins_pipe(ialu_reg_reg);
9345%}
9346
9347// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9348// This idiom is used by the compiler the i2s bytecode.
9349instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9350%{
9351  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9352
9353  format %{ "movswl  $dst, $src\t# i2s" %}
9354  opcode(0x0F, 0xBF);
9355  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9356  ins_pipe(ialu_reg_reg);
9357%}
9358
9359// ROL/ROR instructions
9360
9361// ROL expand
9362instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9363  effect(KILL cr, USE_DEF dst);
9364
9365  format %{ "roll    $dst" %}
9366  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9367  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9368  ins_pipe(ialu_reg);
9369%}
9370
9371instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9372  effect(USE_DEF dst, USE shift, KILL cr);
9373
9374  format %{ "roll    $dst, $shift" %}
9375  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9376  ins_encode( reg_opc_imm(dst, shift) );
9377  ins_pipe(ialu_reg);
9378%}
9379
9380instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9381%{
9382  effect(USE_DEF dst, USE shift, KILL cr);
9383
9384  format %{ "roll    $dst, $shift" %}
9385  opcode(0xD3, 0x0); /* Opcode D3 /0 */
9386  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9387  ins_pipe(ialu_reg_reg);
9388%}
9389// end of ROL expand
9390
9391// Rotate Left by one
9392instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9393%{
9394  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9395
9396  expand %{
9397    rolI_rReg_imm1(dst, cr);
9398  %}
9399%}
9400
9401// Rotate Left by 8-bit immediate
9402instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9403%{
9404  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9405  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9406
9407  expand %{
9408    rolI_rReg_imm8(dst, lshift, cr);
9409  %}
9410%}
9411
9412// Rotate Left by variable
9413instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9414%{
9415  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9416
9417  expand %{
9418    rolI_rReg_CL(dst, shift, cr);
9419  %}
9420%}
9421
9422// Rotate Left by variable
9423instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9424%{
9425  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9426
9427  expand %{
9428    rolI_rReg_CL(dst, shift, cr);
9429  %}
9430%}
9431
9432// ROR expand
9433instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9434%{
9435  effect(USE_DEF dst, KILL cr);
9436
9437  format %{ "rorl    $dst" %}
9438  opcode(0xD1, 0x1); /* D1 /1 */
9439  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9440  ins_pipe(ialu_reg);
9441%}
9442
9443instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9444%{
9445  effect(USE_DEF dst, USE shift, KILL cr);
9446
9447  format %{ "rorl    $dst, $shift" %}
9448  opcode(0xC1, 0x1); /* C1 /1 ib */
9449  ins_encode(reg_opc_imm(dst, shift));
9450  ins_pipe(ialu_reg);
9451%}
9452
9453instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9454%{
9455  effect(USE_DEF dst, USE shift, KILL cr);
9456
9457  format %{ "rorl    $dst, $shift" %}
9458  opcode(0xD3, 0x1); /* D3 /1 */
9459  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9460  ins_pipe(ialu_reg_reg);
9461%}
9462// end of ROR expand
9463
9464// Rotate Right by one
9465instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9466%{
9467  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9468
9469  expand %{
9470    rorI_rReg_imm1(dst, cr);
9471  %}
9472%}
9473
9474// Rotate Right by 8-bit immediate
9475instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9476%{
9477  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9478  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9479
9480  expand %{
9481    rorI_rReg_imm8(dst, rshift, cr);
9482  %}
9483%}
9484
9485// Rotate Right by variable
9486instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9487%{
9488  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9489
9490  expand %{
9491    rorI_rReg_CL(dst, shift, cr);
9492  %}
9493%}
9494
9495// Rotate Right by variable
9496instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9497%{
9498  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9499
9500  expand %{
9501    rorI_rReg_CL(dst, shift, cr);
9502  %}
9503%}
9504
9505// for long rotate
9506// ROL expand
9507instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9508  effect(USE_DEF dst, KILL cr);
9509
9510  format %{ "rolq    $dst" %}
9511  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9512  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9513  ins_pipe(ialu_reg);
9514%}
9515
9516instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9517  effect(USE_DEF dst, USE shift, KILL cr);
9518
9519  format %{ "rolq    $dst, $shift" %}
9520  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9521  ins_encode( reg_opc_imm_wide(dst, shift) );
9522  ins_pipe(ialu_reg);
9523%}
9524
9525instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9526%{
9527  effect(USE_DEF dst, USE shift, KILL cr);
9528
9529  format %{ "rolq    $dst, $shift" %}
9530  opcode(0xD3, 0x0); /* Opcode D3 /0 */
9531  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9532  ins_pipe(ialu_reg_reg);
9533%}
9534// end of ROL expand
9535
9536// Rotate Left by one
9537instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9538%{
9539  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9540
9541  expand %{
9542    rolL_rReg_imm1(dst, cr);
9543  %}
9544%}
9545
9546// Rotate Left by 8-bit immediate
9547instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9548%{
9549  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9550  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9551
9552  expand %{
9553    rolL_rReg_imm8(dst, lshift, cr);
9554  %}
9555%}
9556
9557// Rotate Left by variable
9558instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9559%{
9560  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9561
9562  expand %{
9563    rolL_rReg_CL(dst, shift, cr);
9564  %}
9565%}
9566
9567// Rotate Left by variable
9568instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9569%{
9570  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9571
9572  expand %{
9573    rolL_rReg_CL(dst, shift, cr);
9574  %}
9575%}
9576
9577// ROR expand
9578instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9579%{
9580  effect(USE_DEF dst, KILL cr);
9581
9582  format %{ "rorq    $dst" %}
9583  opcode(0xD1, 0x1); /* D1 /1 */
9584  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9585  ins_pipe(ialu_reg);
9586%}
9587
9588instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9589%{
9590  effect(USE_DEF dst, USE shift, KILL cr);
9591
9592  format %{ "rorq    $dst, $shift" %}
9593  opcode(0xC1, 0x1); /* C1 /1 ib */
9594  ins_encode(reg_opc_imm_wide(dst, shift));
9595  ins_pipe(ialu_reg);
9596%}
9597
9598instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9599%{
9600  effect(USE_DEF dst, USE shift, KILL cr);
9601
9602  format %{ "rorq    $dst, $shift" %}
9603  opcode(0xD3, 0x1); /* D3 /1 */
9604  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9605  ins_pipe(ialu_reg_reg);
9606%}
9607// end of ROR expand
9608
9609// Rotate Right by one
9610instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9611%{
9612  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9613
9614  expand %{
9615    rorL_rReg_imm1(dst, cr);
9616  %}
9617%}
9618
9619// Rotate Right by 8-bit immediate
9620instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9621%{
9622  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9623  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9624
9625  expand %{
9626    rorL_rReg_imm8(dst, rshift, cr);
9627  %}
9628%}
9629
9630// Rotate Right by variable
9631instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9632%{
9633  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9634
9635  expand %{
9636    rorL_rReg_CL(dst, shift, cr);
9637  %}
9638%}
9639
9640// Rotate Right by variable
9641instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9642%{
9643  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9644
9645  expand %{
9646    rorL_rReg_CL(dst, shift, cr);
9647  %}
9648%}
9649
9650// Logical Instructions
9651
9652// Integer Logical Instructions
9653
9654// And Instructions
9655// And Register with Register
9656instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9657%{
9658  match(Set dst (AndI dst src));
9659  effect(KILL cr);
9660
9661  format %{ "andl    $dst, $src\t# int" %}
9662  opcode(0x23);
9663  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9664  ins_pipe(ialu_reg_reg);
9665%}
9666
9667// And Register with Immediate 255
9668instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9669%{
9670  match(Set dst (AndI dst src));
9671
9672  format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9673  opcode(0x0F, 0xB6);
9674  ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9675  ins_pipe(ialu_reg);
9676%}
9677
9678// And Register with Immediate 255 and promote to long
9679instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9680%{
9681  match(Set dst (ConvI2L (AndI src mask)));
9682
9683  format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9684  opcode(0x0F, 0xB6);
9685  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9686  ins_pipe(ialu_reg);
9687%}
9688
9689// And Register with Immediate 65535
9690instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9691%{
9692  match(Set dst (AndI dst src));
9693
9694  format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9695  opcode(0x0F, 0xB7);
9696  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9697  ins_pipe(ialu_reg);
9698%}
9699
9700// And Register with Immediate 65535 and promote to long
9701instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9702%{
9703  match(Set dst (ConvI2L (AndI src mask)));
9704
9705  format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9706  opcode(0x0F, 0xB7);
9707  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9708  ins_pipe(ialu_reg);
9709%}
9710
9711// And Register with Immediate
9712instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9713%{
9714  match(Set dst (AndI dst src));
9715  effect(KILL cr);
9716
9717  format %{ "andl    $dst, $src\t# int" %}
9718  opcode(0x81, 0x04); /* Opcode 81 /4 */
9719  ins_encode(OpcSErm(dst, src), Con8or32(src));
9720  ins_pipe(ialu_reg);
9721%}
9722
9723// And Register with Memory
9724instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9725%{
9726  match(Set dst (AndI dst (LoadI src)));
9727  effect(KILL cr);
9728
9729  ins_cost(125);
9730  format %{ "andl    $dst, $src\t# int" %}
9731  opcode(0x23);
9732  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9733  ins_pipe(ialu_reg_mem);
9734%}
9735
9736// And Memory with Register
9737instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9738%{
9739  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9740  effect(KILL cr);
9741
9742  ins_cost(150);
9743  format %{ "andl    $dst, $src\t# int" %}
9744  opcode(0x21); /* Opcode 21 /r */
9745  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9746  ins_pipe(ialu_mem_reg);
9747%}
9748
9749// And Memory with Immediate
9750instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9751%{
9752  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9753  effect(KILL cr);
9754
9755  ins_cost(125);
9756  format %{ "andl    $dst, $src\t# int" %}
9757  opcode(0x81, 0x4); /* Opcode 81 /4 id */
9758  ins_encode(REX_mem(dst), OpcSE(src),
9759             RM_opc_mem(secondary, dst), Con8or32(src));
9760  ins_pipe(ialu_mem_imm);
9761%}
9762
9763// Or Instructions
9764// Or Register with Register
9765instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9766%{
9767  match(Set dst (OrI dst src));
9768  effect(KILL cr);
9769
9770  format %{ "orl     $dst, $src\t# int" %}
9771  opcode(0x0B);
9772  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9773  ins_pipe(ialu_reg_reg);
9774%}
9775
9776// Or Register with Immediate
9777instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9778%{
9779  match(Set dst (OrI dst src));
9780  effect(KILL cr);
9781
9782  format %{ "orl     $dst, $src\t# int" %}
9783  opcode(0x81, 0x01); /* Opcode 81 /1 id */
9784  ins_encode(OpcSErm(dst, src), Con8or32(src));
9785  ins_pipe(ialu_reg);
9786%}
9787
9788// Or Register with Memory
9789instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9790%{
9791  match(Set dst (OrI dst (LoadI src)));
9792  effect(KILL cr);
9793
9794  ins_cost(125);
9795  format %{ "orl     $dst, $src\t# int" %}
9796  opcode(0x0B);
9797  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9798  ins_pipe(ialu_reg_mem);
9799%}
9800
9801// Or Memory with Register
9802instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9803%{
9804  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9805  effect(KILL cr);
9806
9807  ins_cost(150);
9808  format %{ "orl     $dst, $src\t# int" %}
9809  opcode(0x09); /* Opcode 09 /r */
9810  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9811  ins_pipe(ialu_mem_reg);
9812%}
9813
9814// Or Memory with Immediate
9815instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9816%{
9817  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9818  effect(KILL cr);
9819
9820  ins_cost(125);
9821  format %{ "orl     $dst, $src\t# int" %}
9822  opcode(0x81, 0x1); /* Opcode 81 /1 id */
9823  ins_encode(REX_mem(dst), OpcSE(src),
9824             RM_opc_mem(secondary, dst), Con8or32(src));
9825  ins_pipe(ialu_mem_imm);
9826%}
9827
9828// Xor Instructions
9829// Xor Register with Register
9830instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9831%{
9832  match(Set dst (XorI dst src));
9833  effect(KILL cr);
9834
9835  format %{ "xorl    $dst, $src\t# int" %}
9836  opcode(0x33);
9837  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9838  ins_pipe(ialu_reg_reg);
9839%}
9840
9841// Xor Register with Immediate -1
9842instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9843  match(Set dst (XorI dst imm));  
9844
9845  format %{ "not    $dst" %}  
9846  ins_encode %{
9847     __ notl($dst$$Register);
9848  %}
9849  ins_pipe(ialu_reg);
9850%}
9851
9852// Xor Register with Immediate
9853instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9854%{
9855  match(Set dst (XorI dst src));
9856  effect(KILL cr);
9857
9858  format %{ "xorl    $dst, $src\t# int" %}
9859  opcode(0x81, 0x06); /* Opcode 81 /6 id */
9860  ins_encode(OpcSErm(dst, src), Con8or32(src));
9861  ins_pipe(ialu_reg);
9862%}
9863
9864// Xor Register with Memory
9865instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9866%{
9867  match(Set dst (XorI dst (LoadI src)));
9868  effect(KILL cr);
9869
9870  ins_cost(125);
9871  format %{ "xorl    $dst, $src\t# int" %}
9872  opcode(0x33);
9873  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9874  ins_pipe(ialu_reg_mem);
9875%}
9876
9877// Xor Memory with Register
9878instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9879%{
9880  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9881  effect(KILL cr);
9882
9883  ins_cost(150);
9884  format %{ "xorl    $dst, $src\t# int" %}
9885  opcode(0x31); /* Opcode 31 /r */
9886  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9887  ins_pipe(ialu_mem_reg);
9888%}
9889
9890// Xor Memory with Immediate
9891instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9892%{
9893  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9894  effect(KILL cr);
9895
9896  ins_cost(125);
9897  format %{ "xorl    $dst, $src\t# int" %}
9898  opcode(0x81, 0x6); /* Opcode 81 /6 id */
9899  ins_encode(REX_mem(dst), OpcSE(src),
9900             RM_opc_mem(secondary, dst), Con8or32(src));
9901  ins_pipe(ialu_mem_imm);
9902%}
9903
9904
9905// Long Logical Instructions
9906
9907// And Instructions
9908// And Register with Register
9909instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9910%{
9911  match(Set dst (AndL dst src));
9912  effect(KILL cr);
9913
9914  format %{ "andq    $dst, $src\t# long" %}
9915  opcode(0x23);
9916  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9917  ins_pipe(ialu_reg_reg);
9918%}
9919
9920// And Register with Immediate 255
9921instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9922%{
9923  match(Set dst (AndL dst src));
9924
9925  format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9926  opcode(0x0F, 0xB6);
9927  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9928  ins_pipe(ialu_reg);
9929%}
9930
9931// And Register with Immediate 65535
9932instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9933%{
9934  match(Set dst (AndL dst src));
9935
9936  format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9937  opcode(0x0F, 0xB7);
9938  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9939  ins_pipe(ialu_reg);
9940%}
9941
9942// And Register with Immediate
9943instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9944%{
9945  match(Set dst (AndL dst src));
9946  effect(KILL cr);
9947
9948  format %{ "andq    $dst, $src\t# long" %}
9949  opcode(0x81, 0x04); /* Opcode 81 /4 */
9950  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9951  ins_pipe(ialu_reg);
9952%}
9953
9954// And Register with Memory
9955instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9956%{
9957  match(Set dst (AndL dst (LoadL src)));
9958  effect(KILL cr);
9959
9960  ins_cost(125);
9961  format %{ "andq    $dst, $src\t# long" %}
9962  opcode(0x23);
9963  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9964  ins_pipe(ialu_reg_mem);
9965%}
9966
9967// And Memory with Register
9968instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9969%{
9970  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9971  effect(KILL cr);
9972
9973  ins_cost(150);
9974  format %{ "andq    $dst, $src\t# long" %}
9975  opcode(0x21); /* Opcode 21 /r */
9976  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9977  ins_pipe(ialu_mem_reg);
9978%}
9979
9980// And Memory with Immediate
9981instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9982%{
9983  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9984  effect(KILL cr);
9985
9986  ins_cost(125);
9987  format %{ "andq    $dst, $src\t# long" %}
9988  opcode(0x81, 0x4); /* Opcode 81 /4 id */
9989  ins_encode(REX_mem_wide(dst), OpcSE(src),
9990             RM_opc_mem(secondary, dst), Con8or32(src));
9991  ins_pipe(ialu_mem_imm);
9992%}
9993
9994// Or Instructions
9995// Or Register with Register
9996instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9997%{
9998  match(Set dst (OrL dst src));
9999  effect(KILL cr);
10000
10001  format %{ "orq     $dst, $src\t# long" %}
10002  opcode(0x0B);
10003  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10004  ins_pipe(ialu_reg_reg);
10005%}
10006
10007// Use any_RegP to match R15 (TLS register) without spilling.
10008instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10009  match(Set dst (OrL dst (CastP2X src)));
10010  effect(KILL cr);
10011
10012  format %{ "orq     $dst, $src\t# long" %}
10013  opcode(0x0B);
10014  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10015  ins_pipe(ialu_reg_reg);
10016%}
10017
10018
10019// Or Register with Immediate
10020instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10021%{
10022  match(Set dst (OrL dst src));
10023  effect(KILL cr);
10024
10025  format %{ "orq     $dst, $src\t# long" %}
10026  opcode(0x81, 0x01); /* Opcode 81 /1 id */
10027  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10028  ins_pipe(ialu_reg);
10029%}
10030
10031// Or Register with Memory
10032instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10033%{
10034  match(Set dst (OrL dst (LoadL src)));
10035  effect(KILL cr);
10036
10037  ins_cost(125);
10038  format %{ "orq     $dst, $src\t# long" %}
10039  opcode(0x0B);
10040  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10041  ins_pipe(ialu_reg_mem);
10042%}
10043
10044// Or Memory with Register
10045instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10046%{
10047  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10048  effect(KILL cr);
10049
10050  ins_cost(150);
10051  format %{ "orq     $dst, $src\t# long" %}
10052  opcode(0x09); /* Opcode 09 /r */
10053  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10054  ins_pipe(ialu_mem_reg);
10055%}
10056
10057// Or Memory with Immediate
10058instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10059%{
10060  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10061  effect(KILL cr);
10062
10063  ins_cost(125);
10064  format %{ "orq     $dst, $src\t# long" %}
10065  opcode(0x81, 0x1); /* Opcode 81 /1 id */
10066  ins_encode(REX_mem_wide(dst), OpcSE(src),
10067             RM_opc_mem(secondary, dst), Con8or32(src));
10068  ins_pipe(ialu_mem_imm);
10069%}
10070
10071// Xor Instructions
10072// Xor Register with Register
10073instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10074%{
10075  match(Set dst (XorL dst src));
10076  effect(KILL cr);
10077
10078  format %{ "xorq    $dst, $src\t# long" %}
10079  opcode(0x33);
10080  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10081  ins_pipe(ialu_reg_reg);
10082%}
10083
10084// Xor Register with Immediate -1
10085instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10086  match(Set dst (XorL dst imm));  
10087
10088  format %{ "notq   $dst" %}  
10089  ins_encode %{
10090     __ notq($dst$$Register);
10091  %}
10092  ins_pipe(ialu_reg);
10093%}
10094
10095// Xor Register with Immediate
10096instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10097%{
10098  match(Set dst (XorL dst src));
10099  effect(KILL cr);
10100
10101  format %{ "xorq    $dst, $src\t# long" %}
10102  opcode(0x81, 0x06); /* Opcode 81 /6 id */
10103  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10104  ins_pipe(ialu_reg);
10105%}
10106
10107// Xor Register with Memory
10108instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10109%{
10110  match(Set dst (XorL dst (LoadL src)));
10111  effect(KILL cr);
10112
10113  ins_cost(125);
10114  format %{ "xorq    $dst, $src\t# long" %}
10115  opcode(0x33);
10116  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10117  ins_pipe(ialu_reg_mem);
10118%}
10119
10120// Xor Memory with Register
10121instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10122%{
10123  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10124  effect(KILL cr);
10125
10126  ins_cost(150);
10127  format %{ "xorq    $dst, $src\t# long" %}
10128  opcode(0x31); /* Opcode 31 /r */
10129  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10130  ins_pipe(ialu_mem_reg);
10131%}
10132
10133// Xor Memory with Immediate
10134instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10135%{
10136  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10137  effect(KILL cr);
10138
10139  ins_cost(125);
10140  format %{ "xorq    $dst, $src\t# long" %}
10141  opcode(0x81, 0x6); /* Opcode 81 /6 id */
10142  ins_encode(REX_mem_wide(dst), OpcSE(src),
10143             RM_opc_mem(secondary, dst), Con8or32(src));
10144  ins_pipe(ialu_mem_imm);
10145%}
10146
10147// Convert Int to Boolean
10148instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10149%{
10150  match(Set dst (Conv2B src));
10151  effect(KILL cr);
10152
10153  format %{ "testl   $src, $src\t# ci2b\n\t"
10154            "setnz   $dst\n\t"
10155            "movzbl  $dst, $dst" %}
10156  ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10157             setNZ_reg(dst),
10158             REX_reg_breg(dst, dst), // movzbl
10159             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10160  ins_pipe(pipe_slow); // XXX
10161%}
10162
10163// Convert Pointer to Boolean
10164instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10165%{
10166  match(Set dst (Conv2B src));
10167  effect(KILL cr);
10168
10169  format %{ "testq   $src, $src\t# cp2b\n\t"
10170            "setnz   $dst\n\t"
10171            "movzbl  $dst, $dst" %}
10172  ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10173             setNZ_reg(dst),
10174             REX_reg_breg(dst, dst), // movzbl
10175             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10176  ins_pipe(pipe_slow); // XXX
10177%}
10178
10179instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10180%{
10181  match(Set dst (CmpLTMask p q));
10182  effect(KILL cr);
10183
10184  ins_cost(400); // XXX
10185  format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10186            "setlt   $dst\n\t"
10187            "movzbl  $dst, $dst\n\t"
10188            "negl    $dst" %}
10189  ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10190             setLT_reg(dst),
10191             REX_reg_breg(dst, dst), // movzbl
10192             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10193             neg_reg(dst));
10194  ins_pipe(pipe_slow);
10195%}
10196
10197instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10198%{
10199  match(Set dst (CmpLTMask dst zero));
10200  effect(KILL cr);
10201
10202  ins_cost(100); // XXX
10203  format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10204  opcode(0xC1, 0x7);  /* C1 /7 ib */
10205  ins_encode(reg_opc_imm(dst, 0x1F));
10206  ins_pipe(ialu_reg);
10207%}
10208
10209
10210instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10211                         rRegI tmp,
10212                         rFlagsReg cr)
10213%{
10214  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10215  effect(TEMP tmp, KILL cr);
10216
10217  ins_cost(400); // XXX
10218  format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10219            "sbbl    $tmp, $tmp\n\t"
10220            "andl    $tmp, $y\n\t"
10221            "addl    $p, $tmp" %}
10222  ins_encode(enc_cmpLTP(p, q, y, tmp));
10223  ins_pipe(pipe_cmplt);
10224%}
10225
10226/* If I enable this, I encourage spilling in the inner loop of compress.
10227instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10228%{
10229  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10230  effect( TEMP tmp, KILL cr );
10231  ins_cost(400);
10232
10233  format %{ "SUB    $p,$q\n\t"
10234            "SBB    RCX,RCX\n\t"
10235            "AND    RCX,$y\n\t"
10236            "ADD    $p,RCX" %}
10237  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10238%}
10239*/
10240
10241//---------- FP Instructions------------------------------------------------
10242
10243instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10244%{
10245  match(Set cr (CmpF src1 src2));
10246
10247  ins_cost(145);
10248  format %{ "ucomiss $src1, $src2\n\t"
10249            "jnp,s   exit\n\t"
10250            "pushfq\t# saw NaN, set CF\n\t"
10251            "andq    [rsp], #0xffffff2b\n\t"
10252            "popfq\n"
10253    "exit:   nop\t# avoid branch to branch" %}
10254  opcode(0x0F, 0x2E);
10255  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10256             cmpfp_fixup);
10257  ins_pipe(pipe_slow);
10258%}
10259
10260instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10261  match(Set cr (CmpF src1 src2));
10262
10263  ins_cost(145);
10264  format %{ "ucomiss $src1, $src2" %}
10265  ins_encode %{
10266    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10267  %}
10268  ins_pipe(pipe_slow);
10269%}
10270
10271instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10272%{
10273  match(Set cr (CmpF src1 (LoadF src2)));
10274
10275  ins_cost(145);
10276  format %{ "ucomiss $src1, $src2\n\t"
10277            "jnp,s   exit\n\t"
10278            "pushfq\t# saw NaN, set CF\n\t"
10279            "andq    [rsp], #0xffffff2b\n\t"
10280            "popfq\n"
10281    "exit:   nop\t# avoid branch to branch" %}
10282  opcode(0x0F, 0x2E);
10283  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10284             cmpfp_fixup);
10285  ins_pipe(pipe_slow);
10286%}
10287
10288instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10289  match(Set cr (CmpF src1 (LoadF src2)));
10290
10291  ins_cost(100);
10292  format %{ "ucomiss $src1, $src2" %}
10293  opcode(0x0F, 0x2E);
10294  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10295  ins_pipe(pipe_slow);
10296%}
10297
10298instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10299%{
10300  match(Set cr (CmpF src1 src2));
10301
10302  ins_cost(145);
10303  format %{ "ucomiss $src1, $src2\n\t"
10304            "jnp,s   exit\n\t"
10305            "pushfq\t# saw NaN, set CF\n\t"
10306            "andq    [rsp], #0xffffff2b\n\t"
10307            "popfq\n"
10308    "exit:   nop\t# avoid branch to branch" %}
10309  opcode(0x0F, 0x2E);
10310  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10311             cmpfp_fixup);
10312  ins_pipe(pipe_slow);
10313%}
10314
10315instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10316  match(Set cr (CmpF src1 src2));
10317
10318  ins_cost(100);
10319  format %{ "ucomiss $src1, $src2" %}
10320  opcode(0x0F, 0x2E);
10321  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10322  ins_pipe(pipe_slow);
10323%}
10324
10325instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10326%{
10327  match(Set cr (CmpD src1 src2));
10328
10329  ins_cost(145);
10330  format %{ "ucomisd $src1, $src2\n\t"
10331            "jnp,s   exit\n\t"
10332            "pushfq\t# saw NaN, set CF\n\t"
10333            "andq    [rsp], #0xffffff2b\n\t"
10334            "popfq\n"
10335    "exit:   nop\t# avoid branch to branch" %}
10336  opcode(0x66, 0x0F, 0x2E);
10337  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10338             cmpfp_fixup);
10339  ins_pipe(pipe_slow);
10340%}
10341
10342instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10343  match(Set cr (CmpD src1 src2));
10344
10345  ins_cost(100);
10346  format %{ "ucomisd $src1, $src2 test" %}
10347  ins_encode %{
10348    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10349  %}
10350  ins_pipe(pipe_slow);
10351%}
10352
10353instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10354%{
10355  match(Set cr (CmpD src1 (LoadD src2)));
10356
10357  ins_cost(145);
10358  format %{ "ucomisd $src1, $src2\n\t"
10359            "jnp,s   exit\n\t"
10360            "pushfq\t# saw NaN, set CF\n\t"
10361            "andq    [rsp], #0xffffff2b\n\t"
10362            "popfq\n"
10363    "exit:   nop\t# avoid branch to branch" %}
10364  opcode(0x66, 0x0F, 0x2E);
10365  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10366             cmpfp_fixup);
10367  ins_pipe(pipe_slow);
10368%}
10369
10370instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10371  match(Set cr (CmpD src1 (LoadD src2)));
10372
10373  ins_cost(100);
10374  format %{ "ucomisd $src1, $src2" %}
10375  opcode(0x66, 0x0F, 0x2E);
10376  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10377  ins_pipe(pipe_slow);
10378%}
10379
10380instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10381%{
10382  match(Set cr (CmpD src1 src2));
10383
10384  ins_cost(145);
10385  format %{ "ucomisd $src1, [$src2]\n\t"
10386            "jnp,s   exit\n\t"
10387            "pushfq\t# saw NaN, set CF\n\t"
10388            "andq    [rsp], #0xffffff2b\n\t"
10389            "popfq\n"
10390    "exit:   nop\t# avoid branch to branch" %}
10391  opcode(0x66, 0x0F, 0x2E);
10392  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10393             cmpfp_fixup);
10394  ins_pipe(pipe_slow);
10395%}
10396
10397instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10398  match(Set cr (CmpD src1 src2));
10399
10400  ins_cost(100);
10401  format %{ "ucomisd $src1, [$src2]" %}
10402  opcode(0x66, 0x0F, 0x2E);
10403  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10404  ins_pipe(pipe_slow);
10405%}
10406
10407// Compare into -1,0,1
10408instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10409%{
10410  match(Set dst (CmpF3 src1 src2));
10411  effect(KILL cr);
10412
10413  ins_cost(275);
10414  format %{ "ucomiss $src1, $src2\n\t"
10415            "movl    $dst, #-1\n\t"
10416            "jp,s    done\n\t"
10417            "jb,s    done\n\t"
10418            "setne   $dst\n\t"
10419            "movzbl  $dst, $dst\n"
10420    "done:" %}
10421
10422  opcode(0x0F, 0x2E);
10423  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10424             cmpfp3(dst));
10425  ins_pipe(pipe_slow);
10426%}
10427
10428// Compare into -1,0,1
10429instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10430%{
10431  match(Set dst (CmpF3 src1 (LoadF src2)));
10432  effect(KILL cr);
10433
10434  ins_cost(275);
10435  format %{ "ucomiss $src1, $src2\n\t"
10436            "movl    $dst, #-1\n\t"
10437            "jp,s    done\n\t"
10438            "jb,s    done\n\t"
10439            "setne   $dst\n\t"
10440            "movzbl  $dst, $dst\n"
10441    "done:" %}
10442
10443  opcode(0x0F, 0x2E);
10444  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10445             cmpfp3(dst));
10446  ins_pipe(pipe_slow);
10447%}
10448
10449// Compare into -1,0,1
10450instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10451%{
10452  match(Set dst (CmpF3 src1 src2));
10453  effect(KILL cr);
10454
10455  ins_cost(275);
10456  format %{ "ucomiss $src1, [$src2]\n\t"
10457            "movl    $dst, #-1\n\t"
10458            "jp,s    done\n\t"
10459            "jb,s    done\n\t"
10460            "setne   $dst\n\t"
10461            "movzbl  $dst, $dst\n"
10462    "done:" %}
10463
10464  opcode(0x0F, 0x2E);
10465  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10466             cmpfp3(dst));
10467  ins_pipe(pipe_slow);
10468%}
10469
10470// Compare into -1,0,1
10471instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10472%{
10473  match(Set dst (CmpD3 src1 src2));
10474  effect(KILL cr);
10475
10476  ins_cost(275);
10477  format %{ "ucomisd $src1, $src2\n\t"
10478            "movl    $dst, #-1\n\t"
10479            "jp,s    done\n\t"
10480            "jb,s    done\n\t"
10481            "setne   $dst\n\t"
10482            "movzbl  $dst, $dst\n"
10483    "done:" %}
10484
10485  opcode(0x66, 0x0F, 0x2E);
10486  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10487             cmpfp3(dst));
10488  ins_pipe(pipe_slow);
10489%}
10490
10491// Compare into -1,0,1
10492instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10493%{
10494  match(Set dst (CmpD3 src1 (LoadD src2)));
10495  effect(KILL cr);
10496
10497  ins_cost(275);
10498  format %{ "ucomisd $src1, $src2\n\t"
10499            "movl    $dst, #-1\n\t"
10500            "jp,s    done\n\t"
10501            "jb,s    done\n\t"
10502            "setne   $dst\n\t"
10503            "movzbl  $dst, $dst\n"
10504    "done:" %}
10505
10506  opcode(0x66, 0x0F, 0x2E);
10507  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10508             cmpfp3(dst));
10509  ins_pipe(pipe_slow);
10510%}
10511
10512// Compare into -1,0,1
10513instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10514%{
10515  match(Set dst (CmpD3 src1 src2));
10516  effect(KILL cr);
10517
10518  ins_cost(275);
10519  format %{ "ucomisd $src1, [$src2]\n\t"
10520            "movl    $dst, #-1\n\t"
10521            "jp,s    done\n\t"
10522            "jb,s    done\n\t"
10523            "setne   $dst\n\t"
10524            "movzbl  $dst, $dst\n"
10525    "done:" %}
10526
10527  opcode(0x66, 0x0F, 0x2E);
10528  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10529             cmpfp3(dst));
10530  ins_pipe(pipe_slow);
10531%}
10532
10533instruct addF_reg(regF dst, regF src)
10534%{
10535  match(Set dst (AddF dst src));
10536
10537  format %{ "addss   $dst, $src" %}
10538  ins_cost(150); // XXX
10539  opcode(0xF3, 0x0F, 0x58);
10540  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10541  ins_pipe(pipe_slow);
10542%}
10543
10544instruct addF_mem(regF dst, memory src)
10545%{
10546  match(Set dst (AddF dst (LoadF src)));
10547
10548  format %{ "addss   $dst, $src" %}
10549  ins_cost(150); // XXX
10550  opcode(0xF3, 0x0F, 0x58);
10551  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10552  ins_pipe(pipe_slow);
10553%}
10554
10555instruct addF_imm(regF dst, immF src)
10556%{
10557  match(Set dst (AddF dst src));
10558
10559  format %{ "addss   $dst, [$src]" %}
10560  ins_cost(150); // XXX
10561  opcode(0xF3, 0x0F, 0x58);
10562  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10563  ins_pipe(pipe_slow);
10564%}
10565
10566instruct addD_reg(regD dst, regD src)
10567%{
10568  match(Set dst (AddD dst src));
10569
10570  format %{ "addsd   $dst, $src" %}
10571  ins_cost(150); // XXX
10572  opcode(0xF2, 0x0F, 0x58);
10573  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10574  ins_pipe(pipe_slow);
10575%}
10576
10577instruct addD_mem(regD dst, memory src)
10578%{
10579  match(Set dst (AddD dst (LoadD src)));
10580
10581  format %{ "addsd   $dst, $src" %}
10582  ins_cost(150); // XXX
10583  opcode(0xF2, 0x0F, 0x58);
10584  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10585  ins_pipe(pipe_slow);
10586%}
10587
10588instruct addD_imm(regD dst, immD src)
10589%{
10590  match(Set dst (AddD dst src));
10591
10592  format %{ "addsd   $dst, [$src]" %}
10593  ins_cost(150); // XXX
10594  opcode(0xF2, 0x0F, 0x58);
10595  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10596  ins_pipe(pipe_slow);
10597%}
10598
10599instruct subF_reg(regF dst, regF src)
10600%{
10601  match(Set dst (SubF dst src));
10602
10603  format %{ "subss   $dst, $src" %}
10604  ins_cost(150); // XXX
10605  opcode(0xF3, 0x0F, 0x5C);
10606  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10607  ins_pipe(pipe_slow);
10608%}
10609
10610instruct subF_mem(regF dst, memory src)
10611%{
10612  match(Set dst (SubF dst (LoadF src)));
10613
10614  format %{ "subss   $dst, $src" %}
10615  ins_cost(150); // XXX
10616  opcode(0xF3, 0x0F, 0x5C);
10617  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10618  ins_pipe(pipe_slow);
10619%}
10620
10621instruct subF_imm(regF dst, immF src)
10622%{
10623  match(Set dst (SubF dst src));
10624
10625  format %{ "subss   $dst, [$src]" %}
10626  ins_cost(150); // XXX
10627  opcode(0xF3, 0x0F, 0x5C);
10628  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10629  ins_pipe(pipe_slow);
10630%}
10631
10632instruct subD_reg(regD dst, regD src)
10633%{
10634  match(Set dst (SubD dst src));
10635
10636  format %{ "subsd   $dst, $src" %}
10637  ins_cost(150); // XXX
10638  opcode(0xF2, 0x0F, 0x5C);
10639  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10640  ins_pipe(pipe_slow);
10641%}
10642
10643instruct subD_mem(regD dst, memory src)
10644%{
10645  match(Set dst (SubD dst (LoadD src)));
10646
10647  format %{ "subsd   $dst, $src" %}
10648  ins_cost(150); // XXX
10649  opcode(0xF2, 0x0F, 0x5C);
10650  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10651  ins_pipe(pipe_slow);
10652%}
10653
10654instruct subD_imm(regD dst, immD src)
10655%{
10656  match(Set dst (SubD dst src));
10657
10658  format %{ "subsd   $dst, [$src]" %}
10659  ins_cost(150); // XXX
10660  opcode(0xF2, 0x0F, 0x5C);
10661  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10662  ins_pipe(pipe_slow);
10663%}
10664
10665instruct mulF_reg(regF dst, regF src)
10666%{
10667  match(Set dst (MulF dst src));
10668
10669  format %{ "mulss   $dst, $src" %}
10670  ins_cost(150); // XXX
10671  opcode(0xF3, 0x0F, 0x59);
10672  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10673  ins_pipe(pipe_slow);
10674%}
10675
10676instruct mulF_mem(regF dst, memory src)
10677%{
10678  match(Set dst (MulF dst (LoadF src)));
10679
10680  format %{ "mulss   $dst, $src" %}
10681  ins_cost(150); // XXX
10682  opcode(0xF3, 0x0F, 0x59);
10683  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10684  ins_pipe(pipe_slow);
10685%}
10686
10687instruct mulF_imm(regF dst, immF src)
10688%{
10689  match(Set dst (MulF dst src));
10690
10691  format %{ "mulss   $dst, [$src]" %}
10692  ins_cost(150); // XXX
10693  opcode(0xF3, 0x0F, 0x59);
10694  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10695  ins_pipe(pipe_slow);
10696%}
10697
10698instruct mulD_reg(regD dst, regD src)
10699%{
10700  match(Set dst (MulD dst src));
10701
10702  format %{ "mulsd   $dst, $src" %}
10703  ins_cost(150); // XXX
10704  opcode(0xF2, 0x0F, 0x59);
10705  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10706  ins_pipe(pipe_slow);
10707%}
10708
10709instruct mulD_mem(regD dst, memory src)
10710%{
10711  match(Set dst (MulD dst (LoadD src)));
10712
10713  format %{ "mulsd   $dst, $src" %}
10714  ins_cost(150); // XXX
10715  opcode(0xF2, 0x0F, 0x59);
10716  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10717  ins_pipe(pipe_slow);
10718%}
10719
10720instruct mulD_imm(regD dst, immD src)
10721%{
10722  match(Set dst (MulD dst src));
10723
10724  format %{ "mulsd   $dst, [$src]" %}
10725  ins_cost(150); // XXX
10726  opcode(0xF2, 0x0F, 0x59);
10727  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10728  ins_pipe(pipe_slow);
10729%}
10730
10731instruct divF_reg(regF dst, regF src)
10732%{
10733  match(Set dst (DivF dst src));
10734
10735  format %{ "divss   $dst, $src" %}
10736  ins_cost(150); // XXX
10737  opcode(0xF3, 0x0F, 0x5E);
10738  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10739  ins_pipe(pipe_slow);
10740%}
10741
10742instruct divF_mem(regF dst, memory src)
10743%{
10744  match(Set dst (DivF dst (LoadF src)));
10745
10746  format %{ "divss   $dst, $src" %}
10747  ins_cost(150); // XXX
10748  opcode(0xF3, 0x0F, 0x5E);
10749  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10750  ins_pipe(pipe_slow);
10751%}
10752
10753instruct divF_imm(regF dst, immF src)
10754%{
10755  match(Set dst (DivF dst src));
10756
10757  format %{ "divss   $dst, [$src]" %}
10758  ins_cost(150); // XXX
10759  opcode(0xF3, 0x0F, 0x5E);
10760  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10761  ins_pipe(pipe_slow);
10762%}
10763
10764instruct divD_reg(regD dst, regD src)
10765%{
10766  match(Set dst (DivD dst src));
10767
10768  format %{ "divsd   $dst, $src" %}
10769  ins_cost(150); // XXX
10770  opcode(0xF2, 0x0F, 0x5E);
10771  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10772  ins_pipe(pipe_slow);
10773%}
10774
10775instruct divD_mem(regD dst, memory src)
10776%{
10777  match(Set dst (DivD dst (LoadD src)));
10778
10779  format %{ "divsd   $dst, $src" %}
10780  ins_cost(150); // XXX
10781  opcode(0xF2, 0x0F, 0x5E);
10782  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10783  ins_pipe(pipe_slow);
10784%}
10785
10786instruct divD_imm(regD dst, immD src)
10787%{
10788  match(Set dst (DivD dst src));
10789
10790  format %{ "divsd   $dst, [$src]" %}
10791  ins_cost(150); // XXX
10792  opcode(0xF2, 0x0F, 0x5E);
10793  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10794  ins_pipe(pipe_slow);
10795%}
10796
10797instruct sqrtF_reg(regF dst, regF src)
10798%{
10799  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10800
10801  format %{ "sqrtss  $dst, $src" %}
10802  ins_cost(150); // XXX
10803  opcode(0xF3, 0x0F, 0x51);
10804  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10805  ins_pipe(pipe_slow);
10806%}
10807
10808instruct sqrtF_mem(regF dst, memory src)
10809%{
10810  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10811
10812  format %{ "sqrtss  $dst, $src" %}
10813  ins_cost(150); // XXX
10814  opcode(0xF3, 0x0F, 0x51);
10815  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10816  ins_pipe(pipe_slow);
10817%}
10818
10819instruct sqrtF_imm(regF dst, immF src)
10820%{
10821  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10822
10823  format %{ "sqrtss  $dst, [$src]" %}
10824  ins_cost(150); // XXX
10825  opcode(0xF3, 0x0F, 0x51);
10826  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10827  ins_pipe(pipe_slow);
10828%}
10829
10830instruct sqrtD_reg(regD dst, regD src)
10831%{
10832  match(Set dst (SqrtD src));
10833
10834  format %{ "sqrtsd  $dst, $src" %}
10835  ins_cost(150); // XXX
10836  opcode(0xF2, 0x0F, 0x51);
10837  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10838  ins_pipe(pipe_slow);
10839%}
10840
10841instruct sqrtD_mem(regD dst, memory src)
10842%{
10843  match(Set dst (SqrtD (LoadD src)));
10844
10845  format %{ "sqrtsd  $dst, $src" %}
10846  ins_cost(150); // XXX
10847  opcode(0xF2, 0x0F, 0x51);
10848  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10849  ins_pipe(pipe_slow);
10850%}
10851
10852instruct sqrtD_imm(regD dst, immD src)
10853%{
10854  match(Set dst (SqrtD src));
10855
10856  format %{ "sqrtsd  $dst, [$src]" %}
10857  ins_cost(150); // XXX
10858  opcode(0xF2, 0x0F, 0x51);
10859  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10860  ins_pipe(pipe_slow);
10861%}
10862
10863instruct absF_reg(regF dst)
10864%{
10865  match(Set dst (AbsF dst));
10866
10867  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10868  ins_encode(absF_encoding(dst));
10869  ins_pipe(pipe_slow);
10870%}
10871
10872instruct absD_reg(regD dst)
10873%{
10874  match(Set dst (AbsD dst));
10875
10876  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10877            "# abs double by sign masking" %}
10878  ins_encode(absD_encoding(dst));
10879  ins_pipe(pipe_slow);
10880%}
10881
10882instruct negF_reg(regF dst)
10883%{
10884  match(Set dst (NegF dst));
10885
10886  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10887  ins_encode(negF_encoding(dst));
10888  ins_pipe(pipe_slow);
10889%}
10890
10891instruct negD_reg(regD dst)
10892%{
10893  match(Set dst (NegD dst));
10894
10895  format %{ "xorpd   $dst, [0x8000000000000000]\t"
10896            "# neg double by sign flipping" %}
10897  ins_encode(negD_encoding(dst));
10898  ins_pipe(pipe_slow);
10899%}
10900
10901// -----------Trig and Trancendental Instructions------------------------------
10902instruct cosD_reg(regD dst) %{
10903  match(Set dst (CosD dst));
10904
10905  format %{ "dcos   $dst\n\t" %}
10906  opcode(0xD9, 0xFF);
10907  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10908  ins_pipe( pipe_slow );
10909%}
10910
10911instruct sinD_reg(regD dst) %{
10912  match(Set dst (SinD dst));
10913
10914  format %{ "dsin   $dst\n\t" %}
10915  opcode(0xD9, 0xFE);
10916  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10917  ins_pipe( pipe_slow );
10918%}
10919
10920instruct tanD_reg(regD dst) %{
10921  match(Set dst (TanD dst));
10922
10923  format %{ "dtan   $dst\n\t" %}
10924  ins_encode( Push_SrcXD(dst),
10925              Opcode(0xD9), Opcode(0xF2),   //fptan
10926              Opcode(0xDD), Opcode(0xD8),   //fstp st
10927              Push_ResultXD(dst) );
10928  ins_pipe( pipe_slow );
10929%}
10930
10931instruct log10D_reg(regD dst) %{
10932  // The source and result Double operands in XMM registers
10933  match(Set dst (Log10D dst));
10934  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10935  // fyl2x        ; compute log_10(2) * log_2(x)
10936  format %{ "fldlg2\t\t\t#Log10\n\t"
10937            "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10938         %}
10939   ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10940              Push_SrcXD(dst),
10941              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10942              Push_ResultXD(dst));
10943
10944  ins_pipe( pipe_slow );
10945%}
10946
10947instruct logD_reg(regD dst) %{
10948  // The source and result Double operands in XMM registers
10949  match(Set dst (LogD dst));
10950  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10951  // fyl2x        ; compute log_e(2) * log_2(x)
10952  format %{ "fldln2\t\t\t#Log_e\n\t"
10953            "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10954         %}
10955  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10956              Push_SrcXD(dst),
10957              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10958              Push_ResultXD(dst));
10959  ins_pipe( pipe_slow );
10960%}
10961
10962
10963
10964//----------Arithmetic Conversion Instructions---------------------------------
10965
10966instruct roundFloat_nop(regF dst)
10967%{
10968  match(Set dst (RoundFloat dst));
10969
10970  ins_cost(0);
10971  ins_encode();
10972  ins_pipe(empty);
10973%}
10974
10975instruct roundDouble_nop(regD dst)
10976%{
10977  match(Set dst (RoundDouble dst));
10978
10979  ins_cost(0);
10980  ins_encode();
10981  ins_pipe(empty);
10982%}
10983
10984instruct convF2D_reg_reg(regD dst, regF src)
10985%{
10986  match(Set dst (ConvF2D src));
10987
10988  format %{ "cvtss2sd $dst, $src" %}
10989  opcode(0xF3, 0x0F, 0x5A);
10990  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10991  ins_pipe(pipe_slow); // XXX
10992%}
10993
10994instruct convF2D_reg_mem(regD dst, memory src)
10995%{
10996  match(Set dst (ConvF2D (LoadF src)));
10997
10998  format %{ "cvtss2sd $dst, $src" %}
10999  opcode(0xF3, 0x0F, 0x5A);
11000  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11001  ins_pipe(pipe_slow); // XXX
11002%}
11003
11004instruct convD2F_reg_reg(regF dst, regD src)
11005%{
11006  match(Set dst (ConvD2F src));
11007
11008  format %{ "cvtsd2ss $dst, $src" %}
11009  opcode(0xF2, 0x0F, 0x5A);
11010  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11011  ins_pipe(pipe_slow); // XXX
11012%}
11013
11014instruct convD2F_reg_mem(regF dst, memory src)
11015%{
11016  match(Set dst (ConvD2F (LoadD src)));
11017
11018  format %{ "cvtsd2ss $dst, $src" %}
11019  opcode(0xF2, 0x0F, 0x5A);
11020  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11021  ins_pipe(pipe_slow); // XXX
11022%}
11023
11024// XXX do mem variants
11025instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11026%{
11027  match(Set dst (ConvF2I src));
11028  effect(KILL cr);
11029
11030  format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11031            "cmpl    $dst, #0x80000000\n\t"
11032            "jne,s   done\n\t"
11033            "subq    rsp, #8\n\t"
11034            "movss   [rsp], $src\n\t"
11035            "call    f2i_fixup\n\t"
11036            "popq    $dst\n"
11037    "done:   "%}
11038  opcode(0xF3, 0x0F, 0x2C);
11039  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11040             f2i_fixup(dst, src));
11041  ins_pipe(pipe_slow);
11042%}
11043
11044instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11045%{
11046  match(Set dst (ConvF2L src));
11047  effect(KILL cr);
11048
11049  format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11050            "cmpq    $dst, [0x8000000000000000]\n\t"
11051            "jne,s   done\n\t"
11052            "subq    rsp, #8\n\t"
11053            "movss   [rsp], $src\n\t"
11054            "call    f2l_fixup\n\t"
11055            "popq    $dst\n"
11056    "done:   "%}
11057  opcode(0xF3, 0x0F, 0x2C);
11058  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11059             f2l_fixup(dst, src));
11060  ins_pipe(pipe_slow);
11061%}
11062
11063instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11064%{
11065  match(Set dst (ConvD2I src));
11066  effect(KILL cr);
11067
11068  format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11069            "cmpl    $dst, #0x80000000\n\t"
11070            "jne,s   done\n\t"
11071            "subq    rsp, #8\n\t"
11072            "movsd   [rsp], $src\n\t"
11073            "call    d2i_fixup\n\t"
11074            "popq    $dst\n"
11075    "done:   "%}
11076  opcode(0xF2, 0x0F, 0x2C);
11077  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11078             d2i_fixup(dst, src));
11079  ins_pipe(pipe_slow);
11080%}
11081
11082instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11083%{
11084  match(Set dst (ConvD2L src));
11085  effect(KILL cr);
11086
11087  format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11088            "cmpq    $dst, [0x8000000000000000]\n\t"
11089            "jne,s   done\n\t"
11090            "subq    rsp, #8\n\t"
11091            "movsd   [rsp], $src\n\t"
11092            "call    d2l_fixup\n\t"
11093            "popq    $dst\n"
11094    "done:   "%}
11095  opcode(0xF2, 0x0F, 0x2C);
11096  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11097             d2l_fixup(dst, src));
11098  ins_pipe(pipe_slow);
11099%}
11100
11101instruct convI2F_reg_reg(regF dst, rRegI src)
11102%{
11103  predicate(!UseXmmI2F);
11104  match(Set dst (ConvI2F src));
11105
11106  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11107  opcode(0xF3, 0x0F, 0x2A);
11108  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11109  ins_pipe(pipe_slow); // XXX
11110%}
11111
11112instruct convI2F_reg_mem(regF dst, memory src)
11113%{
11114  match(Set dst (ConvI2F (LoadI src)));
11115
11116  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11117  opcode(0xF3, 0x0F, 0x2A);
11118  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11119  ins_pipe(pipe_slow); // XXX
11120%}
11121
11122instruct convI2D_reg_reg(regD dst, rRegI src)
11123%{
11124  predicate(!UseXmmI2D);
11125  match(Set dst (ConvI2D src));
11126
11127  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11128  opcode(0xF2, 0x0F, 0x2A);
11129  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11130  ins_pipe(pipe_slow); // XXX
11131%}
11132
11133instruct convI2D_reg_mem(regD dst, memory src)
11134%{
11135  match(Set dst (ConvI2D (LoadI src)));
11136
11137  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11138  opcode(0xF2, 0x0F, 0x2A);
11139  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11140  ins_pipe(pipe_slow); // XXX
11141%}
11142
11143instruct convXI2F_reg(regF dst, rRegI src)
11144%{
11145  predicate(UseXmmI2F);
11146  match(Set dst (ConvI2F src));
11147
11148  format %{ "movdl $dst, $src\n\t"
11149            "cvtdq2psl $dst, $dst\t# i2f" %}
11150  ins_encode %{
11151    __ movdl($dst$$XMMRegister, $src$$Register);
11152    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11153  %}
11154  ins_pipe(pipe_slow); // XXX
11155%}
11156
11157instruct convXI2D_reg(regD dst, rRegI src)
11158%{
11159  predicate(UseXmmI2D);
11160  match(Set dst (ConvI2D src));
11161
11162  format %{ "movdl $dst, $src\n\t"
11163            "cvtdq2pdl $dst, $dst\t# i2d" %}
11164  ins_encode %{
11165    __ movdl($dst$$XMMRegister, $src$$Register);
11166    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11167  %}
11168  ins_pipe(pipe_slow); // XXX
11169%}
11170
11171instruct convL2F_reg_reg(regF dst, rRegL src)
11172%{
11173  match(Set dst (ConvL2F src));
11174
11175  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11176  opcode(0xF3, 0x0F, 0x2A);
11177  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11178  ins_pipe(pipe_slow); // XXX
11179%}
11180
11181instruct convL2F_reg_mem(regF dst, memory src)
11182%{
11183  match(Set dst (ConvL2F (LoadL src)));
11184
11185  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11186  opcode(0xF3, 0x0F, 0x2A);
11187  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11188  ins_pipe(pipe_slow); // XXX
11189%}
11190
11191instruct convL2D_reg_reg(regD dst, rRegL src)
11192%{
11193  match(Set dst (ConvL2D src));
11194
11195  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11196  opcode(0xF2, 0x0F, 0x2A);
11197  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11198  ins_pipe(pipe_slow); // XXX
11199%}
11200
11201instruct convL2D_reg_mem(regD dst, memory src)
11202%{
11203  match(Set dst (ConvL2D (LoadL src)));
11204
11205  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11206  opcode(0xF2, 0x0F, 0x2A);
11207  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11208  ins_pipe(pipe_slow); // XXX
11209%}
11210
11211instruct convI2L_reg_reg(rRegL dst, rRegI src)
11212%{
11213  match(Set dst (ConvI2L src));
11214
11215  ins_cost(125);
11216  format %{ "movslq  $dst, $src\t# i2l" %}
11217  opcode(0x63); // needs REX.W
11218  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11219  ins_pipe(ialu_reg_reg);
11220%}
11221
11222// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11223// %{
11224//   match(Set dst (ConvI2L src));
11225// //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11226// //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11227//   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11228//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11229//             ((const TypeNode*) n)->type()->is_long()->_lo ==
11230//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11231
11232//   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11233//   ins_encode(enc_copy(dst, src));
11234// //   opcode(0x63); // needs REX.W
11235// //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11236//   ins_pipe(ialu_reg_reg);
11237// %}
11238
11239// Zero-extend convert int to long
11240instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11241%{
11242  match(Set dst (AndL (ConvI2L src) mask));
11243
11244  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11245  ins_encode(enc_copy(dst, src));
11246  ins_pipe(ialu_reg_reg);
11247%}
11248
11249// Zero-extend convert int to long
11250instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11251%{
11252  match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11253
11254  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11255  opcode(0x8B);
11256  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11257  ins_pipe(ialu_reg_mem);
11258%}
11259
11260instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11261%{
11262  match(Set dst (AndL src mask));
11263
11264  format %{ "movl    $dst, $src\t# zero-extend long" %}
11265  ins_encode(enc_copy_always(dst, src));
11266  ins_pipe(ialu_reg_reg);
11267%}
11268
11269instruct convL2I_reg_reg(rRegI dst, rRegL src)
11270%{
11271  match(Set dst (ConvL2I src));
11272
11273  format %{ "movl    $dst, $src\t# l2i" %}
11274  ins_encode(enc_copy_always(dst, src));
11275  ins_pipe(ialu_reg_reg);
11276%}
11277
11278
11279instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11280  match(Set dst (MoveF2I src));
11281  effect(DEF dst, USE src);
11282
11283  ins_cost(125);
11284  format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11285  opcode(0x8B);
11286  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11287  ins_pipe(ialu_reg_mem);
11288%}
11289
11290instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11291  match(Set dst (MoveI2F src));
11292  effect(DEF dst, USE src);
11293
11294  ins_cost(125);
11295  format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11296  opcode(0xF3, 0x0F, 0x10);
11297  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11298  ins_pipe(pipe_slow);
11299%}
11300
11301instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11302  match(Set dst (MoveD2L src));
11303  effect(DEF dst, USE src);
11304
11305  ins_cost(125);
11306  format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11307  opcode(0x8B);
11308  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11309  ins_pipe(ialu_reg_mem);
11310%}
11311
11312instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11313  predicate(!UseXmmLoadAndClearUpper);
11314  match(Set dst (MoveL2D src));
11315  effect(DEF dst, USE src);
11316
11317  ins_cost(125);
11318  format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11319  opcode(0x66, 0x0F, 0x12);
11320  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11321  ins_pipe(pipe_slow);
11322%}
11323
11324instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11325  predicate(UseXmmLoadAndClearUpper);
11326  match(Set dst (MoveL2D src));
11327  effect(DEF dst, USE src);
11328
11329  ins_cost(125);
11330  format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11331  opcode(0xF2, 0x0F, 0x10);
11332  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11333  ins_pipe(pipe_slow);
11334%}
11335
11336
11337instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11338  match(Set dst (MoveF2I src));
11339  effect(DEF dst, USE src);
11340
11341  ins_cost(95); // XXX
11342  format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11343  opcode(0xF3, 0x0F, 0x11);
11344  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11345  ins_pipe(pipe_slow);
11346%}
11347
11348instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11349  match(Set dst (MoveI2F src));
11350  effect(DEF dst, USE src);
11351
11352  ins_cost(100);
11353  format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11354  opcode(0x89);
11355  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11356  ins_pipe( ialu_mem_reg );
11357%}
11358
11359instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11360  match(Set dst (MoveD2L src));
11361  effect(DEF dst, USE src);
11362
11363  ins_cost(95); // XXX
11364  format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11365  opcode(0xF2, 0x0F, 0x11);
11366  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11367  ins_pipe(pipe_slow);
11368%}
11369
11370instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11371  match(Set dst (MoveL2D src));
11372  effect(DEF dst, USE src);
11373
11374  ins_cost(100);
11375  format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11376  opcode(0x89);
11377  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11378  ins_pipe(ialu_mem_reg);
11379%}
11380
11381instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11382  match(Set dst (MoveF2I src));
11383  effect(DEF dst, USE src);
11384  ins_cost(85);
11385  format %{ "movd    $dst,$src\t# MoveF2I" %}
11386  ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11387  ins_pipe( pipe_slow );
11388%}
11389
11390instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11391  match(Set dst (MoveD2L src));
11392  effect(DEF dst, USE src);
11393  ins_cost(85);
11394  format %{ "movd    $dst,$src\t# MoveD2L" %}
11395  ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11396  ins_pipe( pipe_slow );
11397%}
11398
11399// The next instructions have long latency and use Int unit. Set high cost.
11400instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11401  match(Set dst (MoveI2F src));
11402  effect(DEF dst, USE src);
11403  ins_cost(300);
11404  format %{ "movd    $dst,$src\t# MoveI2F" %}
11405  ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11406  ins_pipe( pipe_slow );
11407%}
11408
11409instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11410  match(Set dst (MoveL2D src));
11411  effect(DEF dst, USE src);
11412  ins_cost(300);
11413  format %{ "movd    $dst,$src\t# MoveL2D" %}
11414  ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11415  ins_pipe( pipe_slow );
11416%}
11417
11418// Replicate scalar to packed byte (1 byte) values in xmm
11419instruct Repl8B_reg(regD dst, regD src) %{
11420  match(Set dst (Replicate8B src));
11421  format %{ "MOVDQA  $dst,$src\n\t"
11422            "PUNPCKLBW $dst,$dst\n\t"
11423            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11424  ins_encode( pshufd_8x8(dst, src));
11425  ins_pipe( pipe_slow );
11426%}
11427
11428// Replicate scalar to packed byte (1 byte) values in xmm
11429instruct Repl8B_rRegI(regD dst, rRegI src) %{
11430  match(Set dst (Replicate8B src));
11431  format %{ "MOVD    $dst,$src\n\t"
11432            "PUNPCKLBW $dst,$dst\n\t"
11433            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11434  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11435  ins_pipe( pipe_slow );
11436%}
11437
11438// Replicate scalar zero to packed byte (1 byte) values in xmm
11439instruct Repl8B_immI0(regD dst, immI0 zero) %{
11440  match(Set dst (Replicate8B zero));
11441  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11442  ins_encode( pxor(dst, dst));
11443  ins_pipe( fpu_reg_reg );
11444%}
11445
11446// Replicate scalar to packed shore (2 byte) values in xmm
11447instruct Repl4S_reg(regD dst, regD src) %{
11448  match(Set dst (Replicate4S src));
11449  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11450  ins_encode( pshufd_4x16(dst, src));
11451  ins_pipe( fpu_reg_reg );
11452%}
11453
11454// Replicate scalar to packed shore (2 byte) values in xmm
11455instruct Repl4S_rRegI(regD dst, rRegI src) %{
11456  match(Set dst (Replicate4S src));
11457  format %{ "MOVD    $dst,$src\n\t"
11458            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11459  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11460  ins_pipe( fpu_reg_reg );
11461%}
11462
11463// Replicate scalar zero to packed short (2 byte) values in xmm
11464instruct Repl4S_immI0(regD dst, immI0 zero) %{
11465  match(Set dst (Replicate4S zero));
11466  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11467  ins_encode( pxor(dst, dst));
11468  ins_pipe( fpu_reg_reg );
11469%}
11470
11471// Replicate scalar to packed char (2 byte) values in xmm
11472instruct Repl4C_reg(regD dst, regD src) %{
11473  match(Set dst (Replicate4C src));
11474  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11475  ins_encode( pshufd_4x16(dst, src));
11476  ins_pipe( fpu_reg_reg );
11477%}
11478
11479// Replicate scalar to packed char (2 byte) values in xmm
11480instruct Repl4C_rRegI(regD dst, rRegI src) %{
11481  match(Set dst (Replicate4C src));
11482  format %{ "MOVD    $dst,$src\n\t"
11483            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11484  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11485  ins_pipe( fpu_reg_reg );
11486%}
11487
11488// Replicate scalar zero to packed char (2 byte) values in xmm
11489instruct Repl4C_immI0(regD dst, immI0 zero) %{
11490  match(Set dst (Replicate4C zero));
11491  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11492  ins_encode( pxor(dst, dst));
11493  ins_pipe( fpu_reg_reg );
11494%}
11495
11496// Replicate scalar to packed integer (4 byte) values in xmm
11497instruct Repl2I_reg(regD dst, regD src) %{
11498  match(Set dst (Replicate2I src));
11499  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11500  ins_encode( pshufd(dst, src, 0x00));
11501  ins_pipe( fpu_reg_reg );
11502%}
11503
11504// Replicate scalar to packed integer (4 byte) values in xmm
11505instruct Repl2I_rRegI(regD dst, rRegI src) %{
11506  match(Set dst (Replicate2I src));
11507  format %{ "MOVD   $dst,$src\n\t"
11508            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11509  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11510  ins_pipe( fpu_reg_reg );
11511%}
11512
11513// Replicate scalar zero to packed integer (2 byte) values in xmm
11514instruct Repl2I_immI0(regD dst, immI0 zero) %{
11515  match(Set dst (Replicate2I zero));
11516  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11517  ins_encode( pxor(dst, dst));
11518  ins_pipe( fpu_reg_reg );
11519%}
11520
11521// Replicate scalar to packed single precision floating point values in xmm
11522instruct Repl2F_reg(regD dst, regD src) %{
11523  match(Set dst (Replicate2F src));
11524  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11525  ins_encode( pshufd(dst, src, 0xe0));
11526  ins_pipe( fpu_reg_reg );
11527%}
11528
11529// Replicate scalar to packed single precision floating point values in xmm
11530instruct Repl2F_regF(regD dst, regF src) %{
11531  match(Set dst (Replicate2F src));
11532  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11533  ins_encode( pshufd(dst, src, 0xe0));
11534  ins_pipe( fpu_reg_reg );
11535%}
11536
11537// Replicate scalar to packed single precision floating point values in xmm
11538instruct Repl2F_immF0(regD dst, immF0 zero) %{
11539  match(Set dst (Replicate2F zero));
11540  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11541  ins_encode( pxor(dst, dst));
11542  ins_pipe( fpu_reg_reg );
11543%}
11544
11545
11546// =======================================================================
11547// fast clearing of an array
11548instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11549                  rFlagsReg cr)
11550%{
11551  match(Set dummy (ClearArray cnt base));
11552  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11553
11554  format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11555            "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11556  ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11557             Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11558  ins_pipe(pipe_slow);
11559%}
11560
11561instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1,
11562                        rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr)
11563%{
11564  match(Set result (StrComp str1 str2));
11565  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
11566  //ins_cost(300);
11567
11568  format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
11569  ins_encode( enc_String_Compare() );
11570  ins_pipe( pipe_slow );
11571%}
11572
11573// fast array equals
11574instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, 
11575                      rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{
11576  match(Set result (AryEq ary1 ary2));
11577  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
11578  //ins_cost(300);
11579
11580  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL RAX, RBX" %}
11581  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
11582  ins_pipe( pipe_slow );
11583%}
11584
11585//----------Control Flow Instructions------------------------------------------
11586// Signed compare Instructions
11587
11588// XXX more variants!!
11589instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11590%{
11591  match(Set cr (CmpI op1 op2));
11592  effect(DEF cr, USE op1, USE op2);
11593
11594  format %{ "cmpl    $op1, $op2" %}
11595  opcode(0x3B);  /* Opcode 3B /r */
11596  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11597  ins_pipe(ialu_cr_reg_reg);
11598%}
11599
11600instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11601%{
11602  match(Set cr (CmpI op1 op2));
11603
11604  format %{ "cmpl    $op1, $op2" %}
11605  opcode(0x81, 0x07); /* Opcode 81 /7 */
11606  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11607  ins_pipe(ialu_cr_reg_imm);
11608%}
11609
11610instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11611%{
11612  match(Set cr (CmpI op1 (LoadI op2)));
11613
11614  ins_cost(500); // XXX
11615  format %{ "cmpl    $op1, $op2" %}
11616  opcode(0x3B); /* Opcode 3B /r */
11617  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11618  ins_pipe(ialu_cr_reg_mem);
11619%}
11620
11621instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11622%{
11623  match(Set cr (CmpI src zero));
11624
11625  format %{ "testl   $src, $src" %}
11626  opcode(0x85);
11627  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11628  ins_pipe(ialu_cr_reg_imm);
11629%}
11630
11631instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11632%{
11633  match(Set cr (CmpI (AndI src con) zero));
11634
11635  format %{ "testl   $src, $con" %}
11636  opcode(0xF7, 0x00);
11637  ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11638  ins_pipe(ialu_cr_reg_imm);
11639%}
11640
11641instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11642%{
11643  match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11644
11645  format %{ "testl   $src, $mem" %}
11646  opcode(0x85);
11647  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11648  ins_pipe(ialu_cr_reg_mem);
11649%}
11650
11651// Unsigned compare Instructions; really, same as signed except they
11652// produce an rFlagsRegU instead of rFlagsReg.
11653instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11654%{
11655  match(Set cr (CmpU op1 op2));
11656
11657  format %{ "cmpl    $op1, $op2\t# unsigned" %}
11658  opcode(0x3B); /* Opcode 3B /r */
11659  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11660  ins_pipe(ialu_cr_reg_reg);
11661%}
11662
11663instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11664%{
11665  match(Set cr (CmpU op1 op2));
11666
11667  format %{ "cmpl    $op1, $op2\t# unsigned" %}
11668  opcode(0x81,0x07); /* Opcode 81 /7 */
11669  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11670  ins_pipe(ialu_cr_reg_imm);
11671%}
11672
11673instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11674%{
11675  match(Set cr (CmpU op1 (LoadI op2)));
11676
11677  ins_cost(500); // XXX
11678  format %{ "cmpl    $op1, $op2\t# unsigned" %}
11679  opcode(0x3B); /* Opcode 3B /r */
11680  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11681  ins_pipe(ialu_cr_reg_mem);
11682%}
11683
11684// // // Cisc-spilled version of cmpU_rReg
11685// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11686// //%{
11687// //  match(Set cr (CmpU (LoadI op1) op2));
11688// //
11689// //  format %{ "CMPu   $op1,$op2" %}
11690// //  ins_cost(500);
11691// //  opcode(0x39);  /* Opcode 39 /r */
11692// //  ins_encode( OpcP, reg_mem( op1, op2) );
11693// //%}
11694
11695instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11696%{
11697  match(Set cr (CmpU src zero));
11698
11699  format %{ "testl  $src, $src\t# unsigned" %}
11700  opcode(0x85);
11701  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11702  ins_pipe(ialu_cr_reg_imm);
11703%}
11704
11705instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11706%{
11707  match(Set cr (CmpP op1 op2));
11708
11709  format %{ "cmpq    $op1, $op2\t# ptr" %}
11710  opcode(0x3B); /* Opcode 3B /r */
11711  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11712  ins_pipe(ialu_cr_reg_reg);
11713%}
11714
11715instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11716%{
11717  match(Set cr (CmpP op1 (LoadP op2)));
11718
11719  ins_cost(500); // XXX
11720  format %{ "cmpq    $op1, $op2\t# ptr" %}
11721  opcode(0x3B); /* Opcode 3B /r */
11722  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11723  ins_pipe(ialu_cr_reg_mem);
11724%}
11725
11726// // // Cisc-spilled version of cmpP_rReg
11727// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11728// //%{
11729// //  match(Set cr (CmpP (LoadP op1) op2));
11730// //
11731// //  format %{ "CMPu   $op1,$op2" %}
11732// //  ins_cost(500);
11733// //  opcode(0x39);  /* Opcode 39 /r */
11734// //  ins_encode( OpcP, reg_mem( op1, op2) );
11735// //%}
11736
11737// XXX this is generalized by compP_rReg_mem???
11738// Compare raw pointer (used in out-of-heap check).
11739// Only works because non-oop pointers must be raw pointers
11740// and raw pointers have no anti-dependencies.
11741instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11742%{
11743  predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11744  match(Set cr (CmpP op1 (LoadP op2)));
11745
11746  format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11747  opcode(0x3B); /* Opcode 3B /r */
11748  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11749  ins_pipe(ialu_cr_reg_mem);
11750%}
11751
11752// This will generate a signed flags result. This should be OK since
11753// any compare to a zero should be eq/neq.
11754instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11755%{
11756  match(Set cr (CmpP src zero));
11757
11758  format %{ "testq   $src, $src\t# ptr" %}
11759  opcode(0x85);
11760  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11761  ins_pipe(ialu_cr_reg_imm);
11762%}
11763
11764// This will generate a signed flags result. This should be OK since
11765// any compare to a zero should be eq/neq.
11766instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11767%{
11768  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11769  match(Set cr (CmpP (LoadP op) zero));
11770
11771  ins_cost(500); // XXX
11772  format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11773  opcode(0xF7); /* Opcode F7 /0 */
11774  ins_encode(REX_mem_wide(op),
11775             OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11776  ins_pipe(ialu_cr_reg_imm);
11777%}
11778
11779instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11780%{
11781  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11782  match(Set cr (CmpP (LoadP mem) zero));
11783
11784  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11785  ins_encode %{
11786    __ cmpq(r12, $mem$$Address);
11787  %}
11788  ins_pipe(ialu_cr_reg_mem);
11789%}
11790
11791instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11792%{
11793  match(Set cr (CmpN op1 op2));
11794
11795  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11796  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11797  ins_pipe(ialu_cr_reg_reg);
11798%}
11799
11800instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11801%{
11802  match(Set cr (CmpN src (LoadN mem)));
11803
11804  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11805  ins_encode %{
11806    __ cmpl($src$$Register, $mem$$Address);
11807  %}
11808  ins_pipe(ialu_cr_reg_mem);
11809%}
11810
11811instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11812  match(Set cr (CmpN op1 op2));
11813
11814  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11815  ins_encode %{
11816    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11817  %}
11818  ins_pipe(ialu_cr_reg_imm);
11819%}
11820
11821instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11822%{
11823  match(Set cr (CmpN src (LoadN mem)));
11824
11825  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11826  ins_encode %{
11827    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11828  %}
11829  ins_pipe(ialu_cr_reg_mem);
11830%}
11831
11832instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11833  match(Set cr (CmpN src zero));
11834
11835  format %{ "testl   $src, $src\t# compressed ptr" %}
11836  ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11837  ins_pipe(ialu_cr_reg_imm);
11838%}
11839
11840instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11841%{
11842  predicate(Universe::narrow_oop_base() != NULL);
11843  match(Set cr (CmpN (LoadN mem) zero));
11844
11845  ins_cost(500); // XXX
11846  format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11847  ins_encode %{
11848    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11849  %}
11850  ins_pipe(ialu_cr_reg_mem);
11851%}
11852
11853instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11854%{
11855  predicate(Universe::narrow_oop_base() == NULL);
11856  match(Set cr (CmpN (LoadN mem) zero));
11857
11858  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11859  ins_encode %{
11860    __ cmpl(r12, $mem$$Address);
11861  %}
11862  ins_pipe(ialu_cr_reg_mem);
11863%}
11864
11865// Yanked all unsigned pointer compare operations.
11866// Pointer compares are done with CmpP which is already unsigned.
11867
11868instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11869%{
11870  match(Set cr (CmpL op1 op2));
11871
11872  format %{ "cmpq    $op1, $op2" %}
11873  opcode(0x3B);  /* Opcode 3B /r */
11874  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11875  ins_pipe(ialu_cr_reg_reg);
11876%}
11877
11878instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11879%{
11880  match(Set cr (CmpL op1 op2));
11881
11882  format %{ "cmpq    $op1, $op2" %}
11883  opcode(0x81, 0x07); /* Opcode 81 /7 */
11884  ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11885  ins_pipe(ialu_cr_reg_imm);
11886%}
11887
11888instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11889%{
11890  match(Set cr (CmpL op1 (LoadL op2)));
11891
11892  format %{ "cmpq    $op1, $op2" %}
11893  opcode(0x3B); /* Opcode 3B /r */
11894  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11895  ins_pipe(ialu_cr_reg_mem);
11896%}
11897
11898instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11899%{
11900  match(Set cr (CmpL src zero));
11901
11902  format %{ "testq   $src, $src" %}
11903  opcode(0x85);
11904  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11905  ins_pipe(ialu_cr_reg_imm);
11906%}
11907
11908instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11909%{
11910  match(Set cr (CmpL (AndL src con) zero));
11911
11912  format %{ "testq   $src, $con\t# long" %}
11913  opcode(0xF7, 0x00);
11914  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11915  ins_pipe(ialu_cr_reg_imm);
11916%}
11917
11918instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11919%{
11920  match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11921
11922  format %{ "testq   $src, $mem" %}
11923  opcode(0x85);
11924  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11925  ins_pipe(ialu_cr_reg_mem);
11926%}
11927
11928// Manifest a CmpL result in an integer register.  Very painful.
11929// This is the test to avoid.
11930instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11931%{
11932  match(Set dst (CmpL3 src1 src2));
11933  effect(KILL flags);
11934
11935  ins_cost(275); // XXX
11936  format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11937            "movl    $dst, -1\n\t"
11938            "jl,s    done\n\t"
11939            "setne   $dst\n\t"
11940            "movzbl  $dst, $dst\n\t"
11941    "done:" %}
11942  ins_encode(cmpl3_flag(src1, src2, dst));
11943  ins_pipe(pipe_slow);
11944%}
11945
11946//----------Max and Min--------------------------------------------------------
11947// Min Instructions
11948
11949instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11950%{
11951  effect(USE_DEF dst, USE src, USE cr);
11952
11953  format %{ "cmovlgt $dst, $src\t# min" %}
11954  opcode(0x0F, 0x4F);
11955  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11956  ins_pipe(pipe_cmov_reg);
11957%}
11958
11959
11960instruct minI_rReg(rRegI dst, rRegI src)
11961%{
11962  match(Set dst (MinI dst src));
11963
11964  ins_cost(200);
11965  expand %{
11966    rFlagsReg cr;
11967    compI_rReg(cr, dst, src);
11968    cmovI_reg_g(dst, src, cr);
11969  %}
11970%}
11971
11972instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11973%{
11974  effect(USE_DEF dst, USE src, USE cr);
11975
11976  format %{ "cmovllt $dst, $src\t# max" %}
11977  opcode(0x0F, 0x4C);
11978  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11979  ins_pipe(pipe_cmov_reg);
11980%}
11981
11982
11983instruct maxI_rReg(rRegI dst, rRegI src)
11984%{
11985  match(Set dst (MaxI dst src));
11986
11987  ins_cost(200);
11988  expand %{
11989    rFlagsReg cr;
11990    compI_rReg(cr, dst, src);
11991    cmovI_reg_l(dst, src, cr);
11992  %}
11993%}
11994
11995// ============================================================================
11996// Branch Instructions
11997
11998// Jump Direct - Label defines a relative address from JMP+1
11999instruct jmpDir(label labl)
12000%{
12001  match(Goto);
12002  effect(USE labl);
12003
12004  ins_cost(300);
12005  format %{ "jmp     $labl" %}
12006  size(5);
12007  opcode(0xE9);
12008  ins_encode(OpcP, Lbl(labl));
12009  ins_pipe(pipe_jmp);
12010  ins_pc_relative(1);
12011%}
12012
12013// Jump Direct Conditional - Label defines a relative address from Jcc+1
12014instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12015%{
12016  match(If cop cr);
12017  effect(USE labl);
12018
12019  ins_cost(300);
12020  format %{ "j$cop     $labl" %}
12021  size(6);
12022  opcode(0x0F, 0x80);
12023  ins_encode(Jcc(cop, labl));
12024  ins_pipe(pipe_jcc);
12025  ins_pc_relative(1);
12026%}
12027
12028// Jump Direct Conditional - Label defines a relative address from Jcc+1
12029instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12030%{
12031  match(CountedLoopEnd cop cr);
12032  effect(USE labl);
12033
12034  ins_cost(300);
12035  format %{ "j$cop     $labl\t# loop end" %}
12036  size(6);
12037  opcode(0x0F, 0x80);
12038  ins_encode(Jcc(cop, labl));
12039  ins_pipe(pipe_jcc);
12040  ins_pc_relative(1);
12041%}
12042
12043// Jump Direct Conditional - Label defines a relative address from Jcc+1
12044instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12045  match(CountedLoopEnd cop cmp);
12046  effect(USE labl);
12047
12048  ins_cost(300);
12049  format %{ "j$cop,u   $labl\t# loop end" %}
12050  size(6);
12051  opcode(0x0F, 0x80);
12052  ins_encode(Jcc(cop, labl));
12053  ins_pipe(pipe_jcc);
12054  ins_pc_relative(1);
12055%}
12056
12057instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12058  match(CountedLoopEnd cop cmp);
12059  effect(USE labl);
12060
12061  ins_cost(200);
12062  format %{ "j$cop,u   $labl\t# loop end" %}
12063  size(6);
12064  opcode(0x0F, 0x80);
12065  ins_encode(Jcc(cop, labl));
12066  ins_pipe(pipe_jcc);
12067  ins_pc_relative(1);
12068%}
12069
12070// Jump Direct Conditional - using unsigned comparison
12071instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12072  match(If cop cmp);
12073  effect(USE labl);
12074
12075  ins_cost(300);
12076  format %{ "j$cop,u  $labl" %}
12077  size(6);
12078  opcode(0x0F, 0x80);
12079  ins_encode(Jcc(cop, labl));
12080  ins_pipe(pipe_jcc);
12081  ins_pc_relative(1);
12082%}
12083
12084instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12085  match(If cop cmp);
12086  effect(USE labl);
12087
12088  ins_cost(200);
12089  format %{ "j$cop,u  $labl" %}
12090  size(6);
12091  opcode(0x0F, 0x80);
12092  ins_encode(Jcc(cop, labl));
12093  ins_pipe(pipe_jcc);
12094  ins_pc_relative(1);
12095%}
12096
12097instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12098  match(If cop cmp);
12099  effect(USE labl);
12100
12101  ins_cost(200);
12102  format %{ $$template
12103    if ($cop$$cmpcode == Assembler::notEqual) {
12104      $$emit$$"jp,u   $labl\n\t"
12105      $$emit$$"j$cop,u   $labl"
12106    } else {
12107      $$emit$$"jp,u   done\n\t"
12108      $$emit$$"j$cop,u   $labl\n\t"
12109      $$emit$$"done:"
12110    }
12111  %}
12112  size(12);
12113  opcode(0x0F, 0x80);
12114  ins_encode %{
12115    Label* l = $labl$$label;
12116    $$$emit8$primary;
12117    emit_cc(cbuf, $secondary, Assembler::parity);
12118    int parity_disp = -1;
12119    if ($cop$$cmpcode == Assembler::notEqual) {
12120       // the two jumps 6 bytes apart so the jump distances are too
12121       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12122    } else if ($cop$$cmpcode == Assembler::equal) {
12123       parity_disp = 6;
12124    } else {
12125       ShouldNotReachHere();
12126    }
12127    emit_d32(cbuf, parity_disp);
12128    $$$emit8$primary;
12129    emit_cc(cbuf, $secondary, $cop$$cmpcode);
12130    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12131    emit_d32(cbuf, disp);
12132  %}
12133  ins_pipe(pipe_jcc);
12134  ins_pc_relative(1);
12135%}
12136
12137// ============================================================================
12138// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12139// superklass array for an instance of the superklass.  Set a hidden
12140// internal cache on a hit (cache is checked with exposed code in
12141// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12142// encoding ALSO sets flags.
12143
12144instruct partialSubtypeCheck(rdi_RegP result,
12145                             rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12146                             rFlagsReg cr)
12147%{
12148  match(Set result (PartialSubtypeCheck sub super));
12149  effect(KILL rcx, KILL cr);
12150
12151  ins_cost(1100);  // slightly larger than the next version
12152  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12153            "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12154            "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12155            "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12156            "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12157            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12158            "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12159    "miss:\t" %}
12160
12161  opcode(0x1); // Force a XOR of RDI
12162  ins_encode(enc_PartialSubtypeCheck());
12163  ins_pipe(pipe_slow);
12164%}
12165
12166instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12167                                     rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12168                                     immP0 zero,
12169                                     rdi_RegP result)
12170%{
12171  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12172  effect(KILL rcx, KILL result);
12173
12174  ins_cost(1000);
12175  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12176            "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12177            "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12178            "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12179            "jne,s   miss\t\t# Missed: flags nz\n\t"
12180            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12181    "miss:\t" %}
12182
12183  opcode(0x0); // No need to XOR RDI
12184  ins_encode(enc_PartialSubtypeCheck());
12185  ins_pipe(pipe_slow);
12186%}
12187
12188// ============================================================================
12189// Branch Instructions -- short offset versions
12190//
12191// These instructions are used to replace jumps of a long offset (the default
12192// match) with jumps of a shorter offset.  These instructions are all tagged
12193// with the ins_short_branch attribute, which causes the ADLC to suppress the
12194// match rules in general matching.  Instead, the ADLC generates a conversion
12195// method in the MachNode which can be used to do in-place replacement of the
12196// long variant with the shorter variant.  The compiler will determine if a
12197// branch can be taken by the is_short_branch_offset() predicate in the machine
12198// specific code section of the file.
12199
12200// Jump Direct - Label defines a relative address from JMP+1
12201instruct jmpDir_short(label labl) %{
12202  match(Goto);
12203  effect(USE labl);
12204
12205  ins_cost(300);
12206  format %{ "jmp,s   $labl" %}
12207  size(2);
12208  opcode(0xEB);
12209  ins_encode(OpcP, LblShort(labl));
12210  ins_pipe(pipe_jmp);
12211  ins_pc_relative(1);
12212  ins_short_branch(1);
12213%}
12214
12215// Jump Direct Conditional - Label defines a relative address from Jcc+1
12216instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12217  match(If cop cr);
12218  effect(USE labl);
12219
12220  ins_cost(300);
12221  format %{ "j$cop,s   $labl" %}
12222  size(2);
12223  opcode(0x70);
12224  ins_encode(JccShort(cop, labl));
12225  ins_pipe(pipe_jcc);
12226  ins_pc_relative(1);
12227  ins_short_branch(1);
12228%}
12229
12230// Jump Direct Conditional - Label defines a relative address from Jcc+1
12231instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12232  match(CountedLoopEnd cop cr);
12233  effect(USE labl);
12234
12235  ins_cost(300);
12236  format %{ "j$cop,s   $labl\t# loop end" %}
12237  size(2);
12238  opcode(0x70);
12239  ins_encode(JccShort(cop, labl));
12240  ins_pipe(pipe_jcc);
12241  ins_pc_relative(1);
12242  ins_short_branch(1);
12243%}
12244
12245// Jump Direct Conditional - Label defines a relative address from Jcc+1
12246instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12247  match(CountedLoopEnd cop cmp);
12248  effect(USE labl);
12249
12250  ins_cost(300);
12251  format %{ "j$cop,us  $labl\t# loop end" %}
12252  size(2);
12253  opcode(0x70);
12254  ins_encode(JccShort(cop, labl));
12255  ins_pipe(pipe_jcc);
12256  ins_pc_relative(1);
12257  ins_short_branch(1);
12258%}
12259
12260instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12261  match(CountedLoopEnd cop cmp);
12262  effect(USE labl);
12263
12264  ins_cost(300);
12265  format %{ "j$cop,us  $labl\t# loop end" %}
12266  size(2);
12267  opcode(0x70);
12268  ins_encode(JccShort(cop, labl));
12269  ins_pipe(pipe_jcc);
12270  ins_pc_relative(1);
12271  ins_short_branch(1);
12272%}
12273
12274// Jump Direct Conditional - using unsigned comparison
12275instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12276  match(If cop cmp);
12277  effect(USE labl);
12278
12279  ins_cost(300);
12280  format %{ "j$cop,us  $labl" %}
12281  size(2);
12282  opcode(0x70);
12283  ins_encode(JccShort(cop, labl));
12284  ins_pipe(pipe_jcc);
12285  ins_pc_relative(1);
12286  ins_short_branch(1);
12287%}
12288
12289instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12290  match(If cop cmp);
12291  effect(USE labl);
12292
12293  ins_cost(300);
12294  format %{ "j$cop,us  $labl" %}
12295  size(2);
12296  opcode(0x70);
12297  ins_encode(JccShort(cop, labl));
12298  ins_pipe(pipe_jcc);
12299  ins_pc_relative(1);
12300  ins_short_branch(1);
12301%}
12302
12303instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12304  match(If cop cmp);
12305  effect(USE labl);
12306
12307  ins_cost(300);
12308  format %{ $$template
12309    if ($cop$$cmpcode == Assembler::notEqual) {
12310      $$emit$$"jp,u,s   $labl\n\t"
12311      $$emit$$"j$cop,u,s   $labl"
12312    } else {
12313      $$emit$$"jp,u,s   done\n\t"
12314      $$emit$$"j$cop,u,s  $labl\n\t"
12315      $$emit$$"done:"
12316    }
12317  %}
12318  size(4);
12319  opcode(0x70);
12320  ins_encode %{
12321    Label* l = $labl$$label;
12322    emit_cc(cbuf, $primary, Assembler::parity);
12323    int parity_disp = -1;
12324    if ($cop$$cmpcode == Assembler::notEqual) {
12325      parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12326    } else if ($cop$$cmpcode == Assembler::equal) {
12327      parity_disp = 2;
12328    } else {
12329      ShouldNotReachHere();
12330    }
12331    emit_d8(cbuf, parity_disp);
12332    emit_cc(cbuf, $primary, $cop$$cmpcode);
12333    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12334    emit_d8(cbuf, disp);
12335    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12336    assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12337  %}
12338  ins_pipe(pipe_jcc);
12339  ins_pc_relative(1);
12340  ins_short_branch(1);
12341%}
12342
12343// ============================================================================
12344// inlined locking and unlocking
12345
12346instruct cmpFastLock(rFlagsReg cr,
12347                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12348%{
12349  match(Set cr (FastLock object box));
12350  effect(TEMP tmp, TEMP scr);
12351
12352  ins_cost(300);
12353  format %{ "fastlock $object,$box,$tmp,$scr" %}
12354  ins_encode(Fast_Lock(object, box, tmp, scr));
12355  ins_pipe(pipe_slow);
12356  ins_pc_relative(1);
12357%}
12358
12359instruct cmpFastUnlock(rFlagsReg cr,
12360                       rRegP object, rax_RegP box, rRegP tmp)
12361%{
12362  match(Set cr (FastUnlock object box));
12363  effect(TEMP tmp);
12364
12365  ins_cost(300);
12366  format %{ "fastunlock $object, $box, $tmp" %}
12367  ins_encode(Fast_Unlock(object, box, tmp));
12368  ins_pipe(pipe_slow);
12369  ins_pc_relative(1);
12370%}
12371
12372
12373// ============================================================================
12374// Safepoint Instructions
12375instruct safePoint_poll(rFlagsReg cr)
12376%{
12377  match(SafePoint);
12378  effect(KILL cr);
12379
12380  format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12381            "# Safepoint: poll for GC" %}
12382  size(6); // Opcode + ModRM + Disp32 == 6 bytes
12383  ins_cost(125);
12384  ins_encode(enc_safepoint_poll);
12385  ins_pipe(ialu_reg_mem);
12386%}
12387
12388// ============================================================================
12389// Procedure Call/Return Instructions
12390// Call Java Static Instruction
12391// Note: If this code changes, the corresponding ret_addr_offset() and
12392//       compute_padding() functions will have to be adjusted.
12393instruct CallStaticJavaDirect(method meth)
12394%{
12395  match(CallStaticJava);
12396  effect(USE meth);
12397
12398  ins_cost(300);
12399  format %{ "call,static " %}
12400  opcode(0xE8); /* E8 cd */
12401  ins_encode(Java_Static_Call(meth), call_epilog);
12402  ins_pipe(pipe_slow);
12403  ins_pc_relative(1);
12404  ins_alignment(4);
12405%}
12406
12407// Call Java Dynamic Instruction
12408// Note: If this code changes, the corresponding ret_addr_offset() and
12409//       compute_padding() functions will have to be adjusted.
12410instruct CallDynamicJavaDirect(method meth)
12411%{
12412  match(CallDynamicJava);
12413  effect(USE meth);
12414
12415  ins_cost(300);
12416  format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12417            "call,dynamic " %}
12418  opcode(0xE8); /* E8 cd */
12419  ins_encode(Java_Dynamic_Call(meth), call_epilog);
12420  ins_pipe(pipe_slow);
12421  ins_pc_relative(1);
12422  ins_alignment(4);
12423%}
12424
12425// Call Runtime Instruction
12426instruct CallRuntimeDirect(method meth)
12427%{
12428  match(CallRuntime);
12429  effect(USE meth);
12430
12431  ins_cost(300);
12432  format %{ "call,runtime " %}
12433  opcode(0xE8); /* E8 cd */
12434  ins_encode(Java_To_Runtime(meth));
12435  ins_pipe(pipe_slow);
12436  ins_pc_relative(1);
12437%}
12438
12439// Call runtime without safepoint
12440instruct CallLeafDirect(method meth)
12441%{
12442  match(CallLeaf);
12443  effect(USE meth);
12444
12445  ins_cost(300);
12446  format %{ "call_leaf,runtime " %}
12447  opcode(0xE8); /* E8 cd */
12448  ins_encode(Java_To_Runtime(meth));
12449  ins_pipe(pipe_slow);
12450  ins_pc_relative(1);
12451%}
12452
12453// Call runtime without safepoint
12454instruct CallLeafNoFPDirect(method meth)
12455%{
12456  match(CallLeafNoFP);
12457  effect(USE meth);
12458
12459  ins_cost(300);
12460  format %{ "call_leaf_nofp,runtime " %}
12461  opcode(0xE8); /* E8 cd */
12462  ins_encode(Java_To_Runtime(meth));
12463  ins_pipe(pipe_slow);
12464  ins_pc_relative(1);
12465%}
12466
12467// Return Instruction
12468// Remove the return address & jump to it.
12469// Notice: We always emit a nop after a ret to make sure there is room
12470// for safepoint patching
12471instruct Ret()
12472%{
12473  match(Return);
12474
12475  format %{ "ret" %}
12476  opcode(0xC3);
12477  ins_encode(OpcP);
12478  ins_pipe(pipe_jmp);
12479%}
12480
12481// Tail Call; Jump from runtime stub to Java code.
12482// Also known as an 'interprocedural jump'.
12483// Target of jump will eventually return to caller.
12484// TailJump below removes the return address.
12485instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12486%{
12487  match(TailCall jump_target method_oop);
12488
12489  ins_cost(300);
12490  format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12491  opcode(0xFF, 0x4); /* Opcode FF /4 */
12492  ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12493  ins_pipe(pipe_jmp);
12494%}
12495
12496// Tail Jump; remove the return address; jump to target.
12497// TailCall above leaves the return address around.
12498instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12499%{
12500  match(TailJump jump_target ex_oop);
12501
12502  ins_cost(300);
12503  format %{ "popq    rdx\t# pop return address\n\t"
12504            "jmp     $jump_target" %}
12505  opcode(0xFF, 0x4); /* Opcode FF /4 */
12506  ins_encode(Opcode(0x5a), // popq rdx
12507             REX_reg(jump_target), OpcP, reg_opc(jump_target));
12508  ins_pipe(pipe_jmp);
12509%}
12510
12511// Create exception oop: created by stack-crawling runtime code.
12512// Created exception is now available to this handler, and is setup
12513// just prior to jumping to this handler.  No code emitted.
12514instruct CreateException(rax_RegP ex_oop)
12515%{
12516  match(Set ex_oop (CreateEx));
12517
12518  size(0);
12519  // use the following format syntax
12520  format %{ "# exception oop is in rax; no code emitted" %}
12521  ins_encode();
12522  ins_pipe(empty);
12523%}
12524
12525// Rethrow exception:
12526// The exception oop will come in the first argument position.
12527// Then JUMP (not call) to the rethrow stub code.
12528instruct RethrowException()
12529%{
12530  match(Rethrow);
12531
12532  // use the following format syntax
12533  format %{ "jmp     rethrow_stub" %}
12534  ins_encode(enc_rethrow);
12535  ins_pipe(pipe_jmp);
12536%}
12537
12538
12539//----------PEEPHOLE RULES-----------------------------------------------------
12540// These must follow all instruction definitions as they use the names
12541// defined in the instructions definitions.
12542//
12543// peepmatch ( root_instr_name [preceding_instruction]* );
12544//
12545// peepconstraint %{
12546// (instruction_number.operand_name relational_op instruction_number.operand_name
12547//  [, ...] );
12548// // instruction numbers are zero-based using left to right order in peepmatch
12549//
12550// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12551// // provide an instruction_number.operand_name for each operand that appears
12552// // in the replacement instruction's match rule
12553//
12554// ---------VM FLAGS---------------------------------------------------------
12555//
12556// All peephole optimizations can be turned off using -XX:-OptoPeephole
12557//
12558// Each peephole rule is given an identifying number starting with zero and
12559// increasing by one in the order seen by the parser.  An individual peephole
12560// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12561// on the command-line.
12562//
12563// ---------CURRENT LIMITATIONS----------------------------------------------
12564//
12565// Only match adjacent instructions in same basic block
12566// Only equality constraints
12567// Only constraints between operands, not (0.dest_reg == RAX_enc)
12568// Only one replacement instruction
12569//
12570// ---------EXAMPLE----------------------------------------------------------
12571//
12572// // pertinent parts of existing instructions in architecture description
12573// instruct movI(rRegI dst, rRegI src)
12574// %{
12575//   match(Set dst (CopyI src));
12576// %}
12577//
12578// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12579// %{
12580//   match(Set dst (AddI dst src));
12581//   effect(KILL cr);
12582// %}
12583//
12584// // Change (inc mov) to lea
12585// peephole %{
12586//   // increment preceeded by register-register move
12587//   peepmatch ( incI_rReg movI );
12588//   // require that the destination register of the increment
12589//   // match the destination register of the move
12590//   peepconstraint ( 0.dst == 1.dst );
12591//   // construct a replacement instruction that sets
12592//   // the destination to ( move's source register + one )
12593//   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12594// %}
12595//
12596
12597// Implementation no longer uses movX instructions since
12598// machine-independent system no longer uses CopyX nodes.
12599//
12600// peephole
12601// %{
12602//   peepmatch (incI_rReg movI);
12603//   peepconstraint (0.dst == 1.dst);
12604//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12605// %}
12606
12607// peephole
12608// %{
12609//   peepmatch (decI_rReg movI);
12610//   peepconstraint (0.dst == 1.dst);
12611//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12612// %}
12613
12614// peephole
12615// %{
12616//   peepmatch (addI_rReg_imm movI);
12617//   peepconstraint (0.dst == 1.dst);
12618//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12619// %}
12620
12621// peephole
12622// %{
12623//   peepmatch (incL_rReg movL);
12624//   peepconstraint (0.dst == 1.dst);
12625//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12626// %}
12627
12628// peephole
12629// %{
12630//   peepmatch (decL_rReg movL);
12631//   peepconstraint (0.dst == 1.dst);
12632//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12633// %}
12634
12635// peephole
12636// %{
12637//   peepmatch (addL_rReg_imm movL);
12638//   peepconstraint (0.dst == 1.dst);
12639//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12640// %}
12641
12642// peephole
12643// %{
12644//   peepmatch (addP_rReg_imm movP);
12645//   peepconstraint (0.dst == 1.dst);
12646//   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12647// %}
12648
12649// // Change load of spilled value to only a spill
12650// instruct storeI(memory mem, rRegI src)
12651// %{
12652//   match(Set mem (StoreI mem src));
12653// %}
12654//
12655// instruct loadI(rRegI dst, memory mem)
12656// %{
12657//   match(Set dst (LoadI mem));
12658// %}
12659//
12660
12661peephole
12662%{
12663  peepmatch (loadI storeI);
12664  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12665  peepreplace (storeI(1.mem 1.mem 1.src));
12666%}
12667
12668peephole
12669%{
12670  peepmatch (loadL storeL);
12671  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12672  peepreplace (storeL(1.mem 1.mem 1.src));
12673%}
12674
12675//----------SMARTSPILL RULES---------------------------------------------------
12676// These must follow all instruction definitions as they use the names
12677// defined in the instructions definitions.
12678