x86_64.ad revision 0:a61af66fc99e
1//
2// Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20// CA 95054 USA or visit www.sun.com if you need additional information or
21// have any questions.
22//
23//
24
25// AMD64 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
64// used as byte registers)
65
66// Previously set RBX, RSI, and RDI as save-on-entry for java code
67// Turn off SOE in java-code due to frequent use of uncommon-traps.
68// Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
71reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
72
73reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
74reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
75
76reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
77reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
78
79reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
80reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
81
82reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
83reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
84
85// now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
87reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
88
89#ifdef _WIN64
90
91reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
92reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
93
94reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
95reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
96
97#else
98
99reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
100reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
101
102reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
103reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
104
105#endif
106
107reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
108reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
109
110reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
111reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
112
113reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131
132// Floating Point Registers
133
134// XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
135// Word a in each register holds a Float, words ab hold a Double.  We
136// currently do not use the SIMD capabilities, so registers cd are
137// unused at the moment.
138// XMM8-XMM15 must be encoded with REX.
139// Linux ABI:   No register preserved across function calls
140//              XMM0-XMM7 might hold parameters
141// Windows ABI: XMM6-XMM15 preserved across function calls
142//              XMM0-XMM3 might hold parameters
143
144reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
145reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
146
147reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
148reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
149
150reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
151reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
152
153reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
154reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
155
156reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
157reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
158
159reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
160reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
161
162#ifdef _WIN64
163
164reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
165reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
166
167reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
168reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
169
170reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
171reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
172
173reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
174reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
175
176reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
177reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
178
179reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
180reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
181
182reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
183reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
184
185reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
186reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
187
188reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
189reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
190
191reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
192reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
193
194#else
195
196reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
197reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
198
199reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
200reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
201
202reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
203reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
204
205reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
206reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
207
208reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
209reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
210
211reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
212reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
213
214reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
215reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
216
217reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
218reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
219
220reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
221reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
222
223reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
224reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
225
226#endif // _WIN64
227
228reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
229
230// Specify priority of register selection within phases of register
231// allocation.  Highest priority is first.  A useful heuristic is to
232// give registers a low priority when they are required by machine
233// instructions, like EAX and EDX on I486, and choose no-save registers
234// before save-on-call, & save-on-call before save-on-entry.  Registers
235// which participate in fixed calling sequences should come last.
236// Registers which are used as pairs must fall on an even boundary.
237
238alloc_class chunk0(R10,         R10_H,
239                   R11,         R11_H,
240                   R8,          R8_H,
241                   R9,          R9_H,
242                   R12,         R12_H,
243                   RCX,         RCX_H,
244                   RBX,         RBX_H,
245                   RDI,         RDI_H,
246                   RDX,         RDX_H,
247                   RSI,         RSI_H,
248                   RAX,         RAX_H,
249                   RBP,         RBP_H,
250                   R13,         R13_H,
251                   R14,         R14_H,
252                   R15,         R15_H,
253                   RSP,         RSP_H);
254
255// XXX probably use 8-15 first on Linux
256alloc_class chunk1(XMM0,  XMM0_H,
257                   XMM1,  XMM1_H,
258                   XMM2,  XMM2_H,
259                   XMM3,  XMM3_H,
260                   XMM4,  XMM4_H,
261                   XMM5,  XMM5_H,
262                   XMM6,  XMM6_H,
263                   XMM7,  XMM7_H,
264                   XMM8,  XMM8_H,
265                   XMM9,  XMM9_H,
266                   XMM10, XMM10_H,
267                   XMM11, XMM11_H,
268                   XMM12, XMM12_H,
269                   XMM13, XMM13_H,
270                   XMM14, XMM14_H,
271                   XMM15, XMM15_H);
272
273alloc_class chunk2(RFLAGS);
274
275
276//----------Architecture Description Register Classes--------------------------
277// Several register classes are automatically defined based upon information in
278// this architecture description.
279// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
280// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
281// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
282// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
283//
284
285// Class for all pointer registers (including RSP)
286reg_class any_reg(RAX, RAX_H,
287                  RDX, RDX_H,
288                  RBP, RBP_H,
289                  RDI, RDI_H,
290                  RSI, RSI_H,
291                  RCX, RCX_H,
292                  RBX, RBX_H,
293                  RSP, RSP_H,
294                  R8,  R8_H,
295                  R9,  R9_H,
296                  R10, R10_H,
297                  R11, R11_H,
298                  R12, R12_H,
299                  R13, R13_H,
300                  R14, R14_H,
301                  R15, R15_H);
302
303// Class for all pointer registers except RSP
304reg_class ptr_reg(RAX, RAX_H,
305                  RDX, RDX_H,
306                  RBP, RBP_H,
307                  RDI, RDI_H,
308                  RSI, RSI_H,
309                  RCX, RCX_H,
310                  RBX, RBX_H,
311                  R8,  R8_H,
312                  R9,  R9_H,
313                  R10, R10_H,
314                  R11, R11_H,
315                  R12, R12_H,
316                  R13, R13_H,
317                  R14, R14_H);
318
319// Class for all pointer registers except RAX and RSP
320reg_class ptr_no_rax_reg(RDX, RDX_H,
321                         RBP, RBP_H,
322                         RDI, RDI_H,
323                         RSI, RSI_H,
324                         RCX, RCX_H,
325                         RBX, RBX_H,
326                         R8,  R8_H,
327                         R9,  R9_H,
328                         R10, R10_H,
329                         R11, R11_H,
330                         R12, R12_H,
331                         R13, R13_H,
332                         R14, R14_H);
333
334reg_class ptr_no_rbp_reg(RDX, RDX_H,
335                         RAX, RAX_H,
336                         RDI, RDI_H,
337                         RSI, RSI_H,
338                         RCX, RCX_H,
339                         RBX, RBX_H,
340                         R8,  R8_H,
341                         R9,  R9_H,
342                         R10, R10_H,
343                         R11, R11_H,
344                         R12, R12_H,
345                         R13, R13_H,
346                         R14, R14_H);
347
348// Class for all pointer registers except RAX, RBX and RSP
349reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
350                             RBP, RBP_H,
351                             RDI, RDI_H,
352                             RSI, RSI_H,
353                             RCX, RCX_H,
354                             R8,  R8_H,
355                             R9,  R9_H,
356                             R10, R10_H,
357                             R11, R11_H,
358                             R12, R12_H,
359                             R13, R13_H,
360                             R14, R14_H);
361
362// Singleton class for RAX pointer register
363reg_class ptr_rax_reg(RAX, RAX_H);
364
365// Singleton class for RBX pointer register
366reg_class ptr_rbx_reg(RBX, RBX_H);
367
368// Singleton class for RSI pointer register
369reg_class ptr_rsi_reg(RSI, RSI_H);
370
371// Singleton class for RDI pointer register
372reg_class ptr_rdi_reg(RDI, RDI_H);
373
374// Singleton class for RBP pointer register
375reg_class ptr_rbp_reg(RBP, RBP_H);
376
377// Singleton class for stack pointer
378reg_class ptr_rsp_reg(RSP, RSP_H);
379
380// Singleton class for TLS pointer
381reg_class ptr_r15_reg(R15, R15_H);
382
383// Class for all long registers (except RSP)
384reg_class long_reg(RAX, RAX_H,
385                   RDX, RDX_H,
386                   RBP, RBP_H,
387                   RDI, RDI_H,
388                   RSI, RSI_H,
389                   RCX, RCX_H,
390                   RBX, RBX_H,
391                   R8,  R8_H,
392                   R9,  R9_H,
393                   R10, R10_H,
394                   R11, R11_H,
395                   R12, R12_H,
396                   R13, R13_H,
397                   R14, R14_H);
398
399// Class for all long registers except RAX, RDX (and RSP)
400reg_class long_no_rax_rdx_reg(RBP, RBP_H,
401                              RDI, RDI_H,
402                              RSI, RSI_H,
403                              RCX, RCX_H,
404                              RBX, RBX_H,
405                              R8,  R8_H,
406                              R9,  R9_H,
407                              R10, R10_H,
408                              R11, R11_H,
409                              R12, R12_H,
410                              R13, R13_H,
411                              R14, R14_H);
412
413// Class for all long registers except RCX (and RSP)
414reg_class long_no_rcx_reg(RBP, RBP_H,
415                          RDI, RDI_H,
416                          RSI, RSI_H,
417                          RAX, RAX_H,
418                          RDX, RDX_H,
419                          RBX, RBX_H,
420                          R8,  R8_H,
421                          R9,  R9_H,
422                          R10, R10_H,
423                          R11, R11_H,
424                          R12, R12_H,
425                          R13, R13_H,
426                          R14, R14_H);
427
428// Class for all long registers except RAX (and RSP)
429reg_class long_no_rax_reg(RBP, RBP_H,
430                          RDX, RDX_H,
431                          RDI, RDI_H,
432                          RSI, RSI_H,
433                          RCX, RCX_H,
434                          RBX, RBX_H,
435                          R8,  R8_H,
436                          R9,  R9_H,
437                          R10, R10_H,
438                          R11, R11_H,
439                          R12, R12_H,
440                          R13, R13_H,
441                          R14, R14_H);
442
443// Singleton class for RAX long register
444reg_class long_rax_reg(RAX, RAX_H);
445
446// Singleton class for RCX long register
447reg_class long_rcx_reg(RCX, RCX_H);
448
449// Singleton class for RDX long register
450reg_class long_rdx_reg(RDX, RDX_H);
451
452// Class for all int registers (except RSP)
453reg_class int_reg(RAX,
454                  RDX,
455                  RBP,
456                  RDI,
457                  RSI,
458                  RCX,
459                  RBX,
460                  R8,
461                  R9,
462                  R10,
463                  R11,
464                  R12,
465                  R13,
466                  R14);
467
468// Class for all int registers except RCX (and RSP)
469reg_class int_no_rcx_reg(RAX,
470                         RDX,
471                         RBP,
472                         RDI,
473                         RSI,
474                         RBX,
475                         R8,
476                         R9,
477                         R10,
478                         R11,
479                         R12,
480                         R13,
481                         R14);
482
483// Class for all int registers except RAX, RDX (and RSP)
484reg_class int_no_rax_rdx_reg(RBP,
485                             RDI
486                             RSI,
487                             RCX,
488                             RBX,
489                             R8,
490                             R9,
491                             R10,
492                             R11,
493                             R12,
494                             R13,
495                             R14);
496
497// Singleton class for RAX int register
498reg_class int_rax_reg(RAX);
499
500// Singleton class for RBX int register
501reg_class int_rbx_reg(RBX);
502
503// Singleton class for RCX int register
504reg_class int_rcx_reg(RCX);
505
506// Singleton class for RCX int register
507reg_class int_rdx_reg(RDX);
508
509// Singleton class for RCX int register
510reg_class int_rdi_reg(RDI);
511
512// Singleton class for instruction pointer
513// reg_class ip_reg(RIP);
514
515// Singleton class for condition codes
516reg_class int_flags(RFLAGS);
517
518// Class for all float registers
519reg_class float_reg(XMM0,
520                    XMM1,
521                    XMM2,
522                    XMM3,
523                    XMM4,
524                    XMM5,
525                    XMM6,
526                    XMM7,
527                    XMM8,
528                    XMM9,
529                    XMM10,
530                    XMM11,
531                    XMM12,
532                    XMM13,
533                    XMM14,
534                    XMM15);
535
536// Class for all double registers
537reg_class double_reg(XMM0,  XMM0_H,
538                     XMM1,  XMM1_H,
539                     XMM2,  XMM2_H,
540                     XMM3,  XMM3_H,
541                     XMM4,  XMM4_H,
542                     XMM5,  XMM5_H,
543                     XMM6,  XMM6_H,
544                     XMM7,  XMM7_H,
545                     XMM8,  XMM8_H,
546                     XMM9,  XMM9_H,
547                     XMM10, XMM10_H,
548                     XMM11, XMM11_H,
549                     XMM12, XMM12_H,
550                     XMM13, XMM13_H,
551                     XMM14, XMM14_H,
552                     XMM15, XMM15_H);
553%}
554
555
556//----------SOURCE BLOCK-------------------------------------------------------
557// This is a block of C++ code which provides values, functions, and
558// definitions necessary in the rest of the architecture description
559source %{
560#define   RELOC_IMM64    Assembler::imm64_operand
561#define   RELOC_DISP32   Assembler::disp32_operand
562
563#define __ _masm.
564
565// !!!!! Special hack to get all types of calls to specify the byte offset
566//       from the start of the call to the point where the return address
567//       will point.
568int MachCallStaticJavaNode::ret_addr_offset()
569{
570  return 5; // 5 bytes from start of call to where return address points
571}
572
573int MachCallDynamicJavaNode::ret_addr_offset()
574{
575  return 15; // 15 bytes from start of call to where return address points
576}
577
578// In os_cpu .ad file
579// int MachCallRuntimeNode::ret_addr_offset()
580
581// Indicate if the safepoint node needs the polling page as an input.
582// Since amd64 does not have absolute addressing but RIP-relative
583// addressing and the polling page is within 2G, it doesn't.
584bool SafePointNode::needs_polling_address_input()
585{
586  return false;
587}
588
589//
590// Compute padding required for nodes which need alignment
591//
592
593// The address of the call instruction needs to be 4-byte aligned to
594// ensure that it does not span a cache line so that it can be patched.
595int CallStaticJavaDirectNode::compute_padding(int current_offset) const
596{
597  current_offset += 1; // skip call opcode byte
598  return round_to(current_offset, alignment_required()) - current_offset;
599}
600
601// The address of the call instruction needs to be 4-byte aligned to
602// ensure that it does not span a cache line so that it can be patched.
603int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
604{
605  current_offset += 11; // skip movq instruction + call opcode byte
606  return round_to(current_offset, alignment_required()) - current_offset;
607}
608
609#ifndef PRODUCT
610void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
611{
612  st->print("INT3");
613}
614#endif
615
616// EMIT_RM()
617void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
618{
619  unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
620  *(cbuf.code_end()) = c;
621  cbuf.set_code_end(cbuf.code_end() + 1);
622}
623
624// EMIT_CC()
625void emit_cc(CodeBuffer &cbuf, int f1, int f2)
626{
627  unsigned char c = (unsigned char) (f1 | f2);
628  *(cbuf.code_end()) = c;
629  cbuf.set_code_end(cbuf.code_end() + 1);
630}
631
632// EMIT_OPCODE()
633void emit_opcode(CodeBuffer &cbuf, int code)
634{
635  *(cbuf.code_end()) = (unsigned char) code;
636  cbuf.set_code_end(cbuf.code_end() + 1);
637}
638
639// EMIT_OPCODE() w/ relocation information
640void emit_opcode(CodeBuffer &cbuf,
641                 int code, relocInfo::relocType reloc, int offset, int format)
642{
643  cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
644  emit_opcode(cbuf, code);
645}
646
647// EMIT_D8()
648void emit_d8(CodeBuffer &cbuf, int d8)
649{
650  *(cbuf.code_end()) = (unsigned char) d8;
651  cbuf.set_code_end(cbuf.code_end() + 1);
652}
653
654// EMIT_D16()
655void emit_d16(CodeBuffer &cbuf, int d16)
656{
657  *((short *)(cbuf.code_end())) = d16;
658  cbuf.set_code_end(cbuf.code_end() + 2);
659}
660
661// EMIT_D32()
662void emit_d32(CodeBuffer &cbuf, int d32)
663{
664  *((int *)(cbuf.code_end())) = d32;
665  cbuf.set_code_end(cbuf.code_end() + 4);
666}
667
668// EMIT_D64()
669void emit_d64(CodeBuffer &cbuf, int64_t d64)
670{
671  *((int64_t*) (cbuf.code_end())) = d64;
672  cbuf.set_code_end(cbuf.code_end() + 8);
673}
674
675// emit 32 bit value and construct relocation entry from relocInfo::relocType
676void emit_d32_reloc(CodeBuffer& cbuf,
677                    int d32,
678                    relocInfo::relocType reloc,
679                    int format)
680{
681  assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
682  cbuf.relocate(cbuf.inst_mark(), reloc, format);
683
684  *((int*) (cbuf.code_end())) = d32;
685  cbuf.set_code_end(cbuf.code_end() + 4);
686}
687
688// emit 32 bit value and construct relocation entry from RelocationHolder
689void emit_d32_reloc(CodeBuffer& cbuf,
690                    int d32,
691                    RelocationHolder const& rspec,
692                    int format)
693{
694#ifdef ASSERT
695  if (rspec.reloc()->type() == relocInfo::oop_type &&
696      d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
697    assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
698  }
699#endif
700  cbuf.relocate(cbuf.inst_mark(), rspec, format);
701
702  *((int* )(cbuf.code_end())) = d32;
703  cbuf.set_code_end(cbuf.code_end() + 4);
704}
705
706void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
707  address next_ip = cbuf.code_end() + 4;
708  emit_d32_reloc(cbuf, (int) (addr - next_ip),
709                 external_word_Relocation::spec(addr),
710                 RELOC_DISP32);
711}
712
713
714// emit 64 bit value and construct relocation entry from relocInfo::relocType
715void emit_d64_reloc(CodeBuffer& cbuf,
716                    int64_t d64,
717                    relocInfo::relocType reloc,
718                    int format)
719{
720  cbuf.relocate(cbuf.inst_mark(), reloc, format);
721
722  *((int64_t*) (cbuf.code_end())) = d64;
723  cbuf.set_code_end(cbuf.code_end() + 8);
724}
725
726// emit 64 bit value and construct relocation entry from RelocationHolder
727void emit_d64_reloc(CodeBuffer& cbuf,
728                    int64_t d64,
729                    RelocationHolder const& rspec,
730                    int format)
731{
732#ifdef ASSERT
733  if (rspec.reloc()->type() == relocInfo::oop_type &&
734      d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
735    assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
736           "cannot embed non-perm oops in code");
737  }
738#endif
739  cbuf.relocate(cbuf.inst_mark(), rspec, format);
740
741  *((int64_t*) (cbuf.code_end())) = d64;
742  cbuf.set_code_end(cbuf.code_end() + 8);
743}
744
745// Access stack slot for load or store
746void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
747{
748  emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
749  if (-0x80 <= disp && disp < 0x80) {
750    emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
751    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
752    emit_d8(cbuf, disp);     // Displacement  // R/M byte
753  } else {
754    emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
755    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
756    emit_d32(cbuf, disp);     // Displacement // R/M byte
757  }
758}
759
760   // rRegI ereg, memory mem) %{    // emit_reg_mem
761void encode_RegMem(CodeBuffer &cbuf,
762                   int reg,
763                   int base, int index, int scale, int disp, bool disp_is_oop)
764{
765  assert(!disp_is_oop, "cannot have disp");
766  int regenc = reg & 7;
767  int baseenc = base & 7;
768  int indexenc = index & 7;
769
770  // There is no index & no scale, use form without SIB byte
771  if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
772    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
773    if (disp == 0 && base != RBP_enc && base != R13_enc) {
774      emit_rm(cbuf, 0x0, regenc, baseenc); // *
775    } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
776      // If 8-bit displacement, mode 0x1
777      emit_rm(cbuf, 0x1, regenc, baseenc); // *
778      emit_d8(cbuf, disp);
779    } else {
780      // If 32-bit displacement
781      if (base == -1) { // Special flag for absolute address
782        emit_rm(cbuf, 0x0, regenc, 0x5); // *
783        if (disp_is_oop) {
784          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
785        } else {
786          emit_d32(cbuf, disp);
787        }
788      } else {
789        // Normal base + offset
790        emit_rm(cbuf, 0x2, regenc, baseenc); // *
791        if (disp_is_oop) {
792          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
793        } else {
794          emit_d32(cbuf, disp);
795        }
796      }
797    }
798  } else {
799    // Else, encode with the SIB byte
800    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
801    if (disp == 0 && base != RBP_enc && base != R13_enc) {
802      // If no displacement
803      emit_rm(cbuf, 0x0, regenc, 0x4); // *
804      emit_rm(cbuf, scale, indexenc, baseenc);
805    } else {
806      if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
807        // If 8-bit displacement, mode 0x1
808        emit_rm(cbuf, 0x1, regenc, 0x4); // *
809        emit_rm(cbuf, scale, indexenc, baseenc);
810        emit_d8(cbuf, disp);
811      } else {
812        // If 32-bit displacement
813        if (base == 0x04 ) {
814          emit_rm(cbuf, 0x2, regenc, 0x4);
815          emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
816        } else {
817          emit_rm(cbuf, 0x2, regenc, 0x4);
818          emit_rm(cbuf, scale, indexenc, baseenc); // *
819        }
820        if (disp_is_oop) {
821          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
822        } else {
823          emit_d32(cbuf, disp);
824        }
825      }
826    }
827  }
828}
829
830void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
831{
832  if (dstenc != srcenc) {
833    if (dstenc < 8) {
834      if (srcenc >= 8) {
835        emit_opcode(cbuf, Assembler::REX_B);
836        srcenc -= 8;
837      }
838    } else {
839      if (srcenc < 8) {
840        emit_opcode(cbuf, Assembler::REX_R);
841      } else {
842        emit_opcode(cbuf, Assembler::REX_RB);
843        srcenc -= 8;
844      }
845      dstenc -= 8;
846    }
847
848    emit_opcode(cbuf, 0x8B);
849    emit_rm(cbuf, 0x3, dstenc, srcenc);
850  }
851}
852
853void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
854  if( dst_encoding == src_encoding ) {
855    // reg-reg copy, use an empty encoding
856  } else {
857    MacroAssembler _masm(&cbuf);
858
859    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
860  }
861}
862
863
864//=============================================================================
865#ifndef PRODUCT
866void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
867{
868  Compile* C = ra_->C;
869
870  int framesize = C->frame_slots() << LogBytesPerInt;
871  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
872  // Remove wordSize for return adr already pushed
873  // and another for the RBP we are going to save
874  framesize -= 2*wordSize;
875  bool need_nop = true;
876
877  // Calls to C2R adapters often do not accept exceptional returns.
878  // We require that their callers must bang for them.  But be
879  // careful, because some VM calls (such as call site linkage) can
880  // use several kilobytes of stack.  But the stack safety zone should
881  // account for that.  See bugs 4446381, 4468289, 4497237.
882  if (C->need_stack_bang(framesize)) {
883    st->print_cr("# stack bang"); st->print("\t");
884    need_nop = false;
885  }
886  st->print_cr("pushq   rbp"); st->print("\t");
887
888  if (VerifyStackAtCalls) {
889    // Majik cookie to verify stack depth
890    st->print_cr("pushq   0xffffffffbadb100d"
891                  "\t# Majik cookie for stack depth check");
892    st->print("\t");
893    framesize -= wordSize; // Remove 2 for cookie
894    need_nop = false;
895  }
896
897  if (framesize) {
898    st->print("subq    rsp, #%d\t# Create frame", framesize);
899    if (framesize < 0x80 && need_nop) {
900      st->print("\n\tnop\t# nop for patch_verified_entry");
901    }
902  }
903}
904#endif
905
906void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
907{
908  Compile* C = ra_->C;
909
910  // WARNING: Initial instruction MUST be 5 bytes or longer so that
911  // NativeJump::patch_verified_entry will be able to patch out the entry
912  // code safely. The fldcw is ok at 6 bytes, the push to verify stack
913  // depth is ok at 5 bytes, the frame allocation can be either 3 or
914  // 6 bytes. So if we don't do the fldcw or the push then we must
915  // use the 6 byte frame allocation even if we have no frame. :-(
916  // If method sets FPU control word do it now
917
918  int framesize = C->frame_slots() << LogBytesPerInt;
919  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
920  // Remove wordSize for return adr already pushed
921  // and another for the RBP we are going to save
922  framesize -= 2*wordSize;
923  bool need_nop = true;
924
925  // Calls to C2R adapters often do not accept exceptional returns.
926  // We require that their callers must bang for them.  But be
927  // careful, because some VM calls (such as call site linkage) can
928  // use several kilobytes of stack.  But the stack safety zone should
929  // account for that.  See bugs 4446381, 4468289, 4497237.
930  if (C->need_stack_bang(framesize)) {
931    MacroAssembler masm(&cbuf);
932    masm.generate_stack_overflow_check(framesize);
933    need_nop = false;
934  }
935
936  // We always push rbp so that on return to interpreter rbp will be
937  // restored correctly and we can correct the stack.
938  emit_opcode(cbuf, 0x50 | RBP_enc);
939
940  if (VerifyStackAtCalls) {
941    // Majik cookie to verify stack depth
942    emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
943    emit_d32(cbuf, 0xbadb100d);
944    framesize -= wordSize; // Remove 2 for cookie
945    need_nop = false;
946  }
947
948  if (framesize) {
949    emit_opcode(cbuf, Assembler::REX_W);
950    if (framesize < 0x80) {
951      emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
952      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
953      emit_d8(cbuf, framesize);
954      if (need_nop) {
955        emit_opcode(cbuf, 0x90); // nop
956      }
957    } else {
958      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
959      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
960      emit_d32(cbuf, framesize);
961    }
962  }
963
964  C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
965
966#ifdef ASSERT
967  if (VerifyStackAtCalls) {
968    Label L;
969    MacroAssembler masm(&cbuf);
970    masm.pushq(rax);
971    masm.movq(rax, rsp);
972    masm.andq(rax, StackAlignmentInBytes-1);
973    masm.cmpq(rax, StackAlignmentInBytes-wordSize);
974    masm.popq(rax);
975    masm.jcc(Assembler::equal, L);
976    masm.stop("Stack is not properly aligned!");
977    masm.bind(L);
978  }
979#endif
980}
981
982uint MachPrologNode::size(PhaseRegAlloc* ra_) const
983{
984  return MachNode::size(ra_); // too many variables; just compute it
985                              // the hard way
986}
987
988int MachPrologNode::reloc() const
989{
990  return 0; // a large enough number
991}
992
993//=============================================================================
994#ifndef PRODUCT
995void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
996{
997  Compile* C = ra_->C;
998  int framesize = C->frame_slots() << LogBytesPerInt;
999  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1000  // Remove word for return adr already pushed
1001  // and RBP
1002  framesize -= 2*wordSize;
1003
1004  if (framesize) {
1005    st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1006    st->print("\t");
1007  }
1008
1009  st->print_cr("popq\trbp");
1010  if (do_polling() && C->is_method_compilation()) {
1011    st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1012                  "# Safepoint: poll for GC");
1013    st->print("\t");
1014  }
1015}
1016#endif
1017
1018void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1019{
1020  Compile* C = ra_->C;
1021  int framesize = C->frame_slots() << LogBytesPerInt;
1022  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1023  // Remove word for return adr already pushed
1024  // and RBP
1025  framesize -= 2*wordSize;
1026
1027  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1028
1029  if (framesize) {
1030    emit_opcode(cbuf, Assembler::REX_W);
1031    if (framesize < 0x80) {
1032      emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1033      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1034      emit_d8(cbuf, framesize);
1035    } else {
1036      emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1037      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1038      emit_d32(cbuf, framesize);
1039    }
1040  }
1041
1042  // popq rbp
1043  emit_opcode(cbuf, 0x58 | RBP_enc);
1044
1045  if (do_polling() && C->is_method_compilation()) {
1046    // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1047    // XXX reg_mem doesn't support RIP-relative addressing yet
1048    cbuf.set_inst_mark();
1049    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1050    emit_opcode(cbuf, 0x85); // testl
1051    emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1052    // cbuf.inst_mark() is beginning of instruction
1053    emit_d32_reloc(cbuf, os::get_polling_page());
1054//                    relocInfo::poll_return_type,
1055  }
1056}
1057
1058uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1059{
1060  Compile* C = ra_->C;
1061  int framesize = C->frame_slots() << LogBytesPerInt;
1062  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1063  // Remove word for return adr already pushed
1064  // and RBP
1065  framesize -= 2*wordSize;
1066
1067  uint size = 0;
1068
1069  if (do_polling() && C->is_method_compilation()) {
1070    size += 6;
1071  }
1072
1073  // count popq rbp
1074  size++;
1075
1076  if (framesize) {
1077    if (framesize < 0x80) {
1078      size += 4;
1079    } else if (framesize) {
1080      size += 7;
1081    }
1082  }
1083
1084  return size;
1085}
1086
1087int MachEpilogNode::reloc() const
1088{
1089  return 2; // a large enough number
1090}
1091
1092const Pipeline* MachEpilogNode::pipeline() const
1093{
1094  return MachNode::pipeline_class();
1095}
1096
1097int MachEpilogNode::safepoint_offset() const
1098{
1099  return 0;
1100}
1101
1102//=============================================================================
1103
1104enum RC {
1105  rc_bad,
1106  rc_int,
1107  rc_float,
1108  rc_stack
1109};
1110
1111static enum RC rc_class(OptoReg::Name reg)
1112{
1113  if( !OptoReg::is_valid(reg)  ) return rc_bad;
1114
1115  if (OptoReg::is_stack(reg)) return rc_stack;
1116
1117  VMReg r = OptoReg::as_VMReg(reg);
1118
1119  if (r->is_Register()) return rc_int;
1120
1121  assert(r->is_XMMRegister(), "must be");
1122  return rc_float;
1123}
1124
1125uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1126                                       PhaseRegAlloc* ra_,
1127                                       bool do_size,
1128                                       outputStream* st) const
1129{
1130
1131  // Get registers to move
1132  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1133  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1134  OptoReg::Name dst_second = ra_->get_reg_second(this);
1135  OptoReg::Name dst_first = ra_->get_reg_first(this);
1136
1137  enum RC src_second_rc = rc_class(src_second);
1138  enum RC src_first_rc = rc_class(src_first);
1139  enum RC dst_second_rc = rc_class(dst_second);
1140  enum RC dst_first_rc = rc_class(dst_first);
1141
1142  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1143         "must move at least 1 register" );
1144
1145  if (src_first == dst_first && src_second == dst_second) {
1146    // Self copy, no move
1147    return 0;
1148  } else if (src_first_rc == rc_stack) {
1149    // mem ->
1150    if (dst_first_rc == rc_stack) {
1151      // mem -> mem
1152      assert(src_second != dst_first, "overlap");
1153      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1154          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1155        // 64-bit
1156        int src_offset = ra_->reg2offset(src_first);
1157        int dst_offset = ra_->reg2offset(dst_first);
1158        if (cbuf) {
1159          emit_opcode(*cbuf, 0xFF);
1160          encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1161
1162          emit_opcode(*cbuf, 0x8F);
1163          encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1164
1165#ifndef PRODUCT
1166        } else if (!do_size) {
1167          st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1168                     "popq    [rsp + #%d]",
1169                     src_offset,
1170                     dst_offset);
1171#endif
1172        }
1173        return
1174          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1175          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1176      } else {
1177        // 32-bit
1178        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1179        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1180        // No pushl/popl, so:
1181        int src_offset = ra_->reg2offset(src_first);
1182        int dst_offset = ra_->reg2offset(dst_first);
1183        if (cbuf) {
1184          emit_opcode(*cbuf, Assembler::REX_W);
1185          emit_opcode(*cbuf, 0x89);
1186          emit_opcode(*cbuf, 0x44);
1187          emit_opcode(*cbuf, 0x24);
1188          emit_opcode(*cbuf, 0xF8);
1189
1190          emit_opcode(*cbuf, 0x8B);
1191          encode_RegMem(*cbuf,
1192                        RAX_enc,
1193                        RSP_enc, 0x4, 0, src_offset,
1194                        false);
1195
1196          emit_opcode(*cbuf, 0x89);
1197          encode_RegMem(*cbuf,
1198                        RAX_enc,
1199                        RSP_enc, 0x4, 0, dst_offset,
1200                        false);
1201
1202          emit_opcode(*cbuf, Assembler::REX_W);
1203          emit_opcode(*cbuf, 0x8B);
1204          emit_opcode(*cbuf, 0x44);
1205          emit_opcode(*cbuf, 0x24);
1206          emit_opcode(*cbuf, 0xF8);
1207
1208#ifndef PRODUCT
1209        } else if (!do_size) {
1210          st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1211                     "movl    rax, [rsp + #%d]\n\t"
1212                     "movl    [rsp + #%d], rax\n\t"
1213                     "movq    rax, [rsp - #8]",
1214                     src_offset,
1215                     dst_offset);
1216#endif
1217        }
1218        return
1219          5 + // movq
1220          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1221          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1222          5; // movq
1223      }
1224    } else if (dst_first_rc == rc_int) {
1225      // mem -> gpr
1226      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1227          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1228        // 64-bit
1229        int offset = ra_->reg2offset(src_first);
1230        if (cbuf) {
1231          if (Matcher::_regEncode[dst_first] < 8) {
1232            emit_opcode(*cbuf, Assembler::REX_W);
1233          } else {
1234            emit_opcode(*cbuf, Assembler::REX_WR);
1235          }
1236          emit_opcode(*cbuf, 0x8B);
1237          encode_RegMem(*cbuf,
1238                        Matcher::_regEncode[dst_first],
1239                        RSP_enc, 0x4, 0, offset,
1240                        false);
1241#ifndef PRODUCT
1242        } else if (!do_size) {
1243          st->print("movq    %s, [rsp + #%d]\t# spill",
1244                     Matcher::regName[dst_first],
1245                     offset);
1246#endif
1247        }
1248        return
1249          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1250      } else {
1251        // 32-bit
1252        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1253        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1254        int offset = ra_->reg2offset(src_first);
1255        if (cbuf) {
1256          if (Matcher::_regEncode[dst_first] >= 8) {
1257            emit_opcode(*cbuf, Assembler::REX_R);
1258          }
1259          emit_opcode(*cbuf, 0x8B);
1260          encode_RegMem(*cbuf,
1261                        Matcher::_regEncode[dst_first],
1262                        RSP_enc, 0x4, 0, offset,
1263                        false);
1264#ifndef PRODUCT
1265        } else if (!do_size) {
1266          st->print("movl    %s, [rsp + #%d]\t# spill",
1267                     Matcher::regName[dst_first],
1268                     offset);
1269#endif
1270        }
1271        return
1272          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1273          ((Matcher::_regEncode[dst_first] < 8)
1274           ? 3
1275           : 4); // REX
1276      }
1277    } else if (dst_first_rc == rc_float) {
1278      // mem-> xmm
1279      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1280          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1281        // 64-bit
1282        int offset = ra_->reg2offset(src_first);
1283        if (cbuf) {
1284          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1285          if (Matcher::_regEncode[dst_first] >= 8) {
1286            emit_opcode(*cbuf, Assembler::REX_R);
1287          }
1288          emit_opcode(*cbuf, 0x0F);
1289          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1290          encode_RegMem(*cbuf,
1291                        Matcher::_regEncode[dst_first],
1292                        RSP_enc, 0x4, 0, offset,
1293                        false);
1294#ifndef PRODUCT
1295        } else if (!do_size) {
1296          st->print("%s  %s, [rsp + #%d]\t# spill",
1297                     UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1298                     Matcher::regName[dst_first],
1299                     offset);
1300#endif
1301        }
1302        return
1303          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1304          ((Matcher::_regEncode[dst_first] < 8)
1305           ? 5
1306           : 6); // REX
1307      } else {
1308        // 32-bit
1309        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1310        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1311        int offset = ra_->reg2offset(src_first);
1312        if (cbuf) {
1313          emit_opcode(*cbuf, 0xF3);
1314          if (Matcher::_regEncode[dst_first] >= 8) {
1315            emit_opcode(*cbuf, Assembler::REX_R);
1316          }
1317          emit_opcode(*cbuf, 0x0F);
1318          emit_opcode(*cbuf, 0x10);
1319          encode_RegMem(*cbuf,
1320                        Matcher::_regEncode[dst_first],
1321                        RSP_enc, 0x4, 0, offset,
1322                        false);
1323#ifndef PRODUCT
1324        } else if (!do_size) {
1325          st->print("movss   %s, [rsp + #%d]\t# spill",
1326                     Matcher::regName[dst_first],
1327                     offset);
1328#endif
1329        }
1330        return
1331          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1332          ((Matcher::_regEncode[dst_first] < 8)
1333           ? 5
1334           : 6); // REX
1335      }
1336    }
1337  } else if (src_first_rc == rc_int) {
1338    // gpr ->
1339    if (dst_first_rc == rc_stack) {
1340      // gpr -> mem
1341      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1342          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1343        // 64-bit
1344        int offset = ra_->reg2offset(dst_first);
1345        if (cbuf) {
1346          if (Matcher::_regEncode[src_first] < 8) {
1347            emit_opcode(*cbuf, Assembler::REX_W);
1348          } else {
1349            emit_opcode(*cbuf, Assembler::REX_WR);
1350          }
1351          emit_opcode(*cbuf, 0x89);
1352          encode_RegMem(*cbuf,
1353                        Matcher::_regEncode[src_first],
1354                        RSP_enc, 0x4, 0, offset,
1355                        false);
1356#ifndef PRODUCT
1357        } else if (!do_size) {
1358          st->print("movq    [rsp + #%d], %s\t# spill",
1359                     offset,
1360                     Matcher::regName[src_first]);
1361#endif
1362        }
1363        return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1364      } else {
1365        // 32-bit
1366        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1367        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1368        int offset = ra_->reg2offset(dst_first);
1369        if (cbuf) {
1370          if (Matcher::_regEncode[src_first] >= 8) {
1371            emit_opcode(*cbuf, Assembler::REX_R);
1372          }
1373          emit_opcode(*cbuf, 0x89);
1374          encode_RegMem(*cbuf,
1375                        Matcher::_regEncode[src_first],
1376                        RSP_enc, 0x4, 0, offset,
1377                        false);
1378#ifndef PRODUCT
1379        } else if (!do_size) {
1380          st->print("movl    [rsp + #%d], %s\t# spill",
1381                     offset,
1382                     Matcher::regName[src_first]);
1383#endif
1384        }
1385        return
1386          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1387          ((Matcher::_regEncode[src_first] < 8)
1388           ? 3
1389           : 4); // REX
1390      }
1391    } else if (dst_first_rc == rc_int) {
1392      // gpr -> gpr
1393      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1394          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1395        // 64-bit
1396        if (cbuf) {
1397          if (Matcher::_regEncode[dst_first] < 8) {
1398            if (Matcher::_regEncode[src_first] < 8) {
1399              emit_opcode(*cbuf, Assembler::REX_W);
1400            } else {
1401              emit_opcode(*cbuf, Assembler::REX_WB);
1402            }
1403          } else {
1404            if (Matcher::_regEncode[src_first] < 8) {
1405              emit_opcode(*cbuf, Assembler::REX_WR);
1406            } else {
1407              emit_opcode(*cbuf, Assembler::REX_WRB);
1408            }
1409          }
1410          emit_opcode(*cbuf, 0x8B);
1411          emit_rm(*cbuf, 0x3,
1412                  Matcher::_regEncode[dst_first] & 7,
1413                  Matcher::_regEncode[src_first] & 7);
1414#ifndef PRODUCT
1415        } else if (!do_size) {
1416          st->print("movq    %s, %s\t# spill",
1417                     Matcher::regName[dst_first],
1418                     Matcher::regName[src_first]);
1419#endif
1420        }
1421        return 3; // REX
1422      } else {
1423        // 32-bit
1424        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1425        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1426        if (cbuf) {
1427          if (Matcher::_regEncode[dst_first] < 8) {
1428            if (Matcher::_regEncode[src_first] >= 8) {
1429              emit_opcode(*cbuf, Assembler::REX_B);
1430            }
1431          } else {
1432            if (Matcher::_regEncode[src_first] < 8) {
1433              emit_opcode(*cbuf, Assembler::REX_R);
1434            } else {
1435              emit_opcode(*cbuf, Assembler::REX_RB);
1436            }
1437          }
1438          emit_opcode(*cbuf, 0x8B);
1439          emit_rm(*cbuf, 0x3,
1440                  Matcher::_regEncode[dst_first] & 7,
1441                  Matcher::_regEncode[src_first] & 7);
1442#ifndef PRODUCT
1443        } else if (!do_size) {
1444          st->print("movl    %s, %s\t# spill",
1445                     Matcher::regName[dst_first],
1446                     Matcher::regName[src_first]);
1447#endif
1448        }
1449        return
1450          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1451          ? 2
1452          : 3; // REX
1453      }
1454    } else if (dst_first_rc == rc_float) {
1455      // gpr -> xmm
1456      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1457          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1458        // 64-bit
1459        if (cbuf) {
1460          emit_opcode(*cbuf, 0x66);
1461          if (Matcher::_regEncode[dst_first] < 8) {
1462            if (Matcher::_regEncode[src_first] < 8) {
1463              emit_opcode(*cbuf, Assembler::REX_W);
1464            } else {
1465              emit_opcode(*cbuf, Assembler::REX_WB);
1466            }
1467          } else {
1468            if (Matcher::_regEncode[src_first] < 8) {
1469              emit_opcode(*cbuf, Assembler::REX_WR);
1470            } else {
1471              emit_opcode(*cbuf, Assembler::REX_WRB);
1472            }
1473          }
1474          emit_opcode(*cbuf, 0x0F);
1475          emit_opcode(*cbuf, 0x6E);
1476          emit_rm(*cbuf, 0x3,
1477                  Matcher::_regEncode[dst_first] & 7,
1478                  Matcher::_regEncode[src_first] & 7);
1479#ifndef PRODUCT
1480        } else if (!do_size) {
1481          st->print("movdq   %s, %s\t# spill",
1482                     Matcher::regName[dst_first],
1483                     Matcher::regName[src_first]);
1484#endif
1485        }
1486        return 5; // REX
1487      } else {
1488        // 32-bit
1489        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1490        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1491        if (cbuf) {
1492          emit_opcode(*cbuf, 0x66);
1493          if (Matcher::_regEncode[dst_first] < 8) {
1494            if (Matcher::_regEncode[src_first] >= 8) {
1495              emit_opcode(*cbuf, Assembler::REX_B);
1496            }
1497          } else {
1498            if (Matcher::_regEncode[src_first] < 8) {
1499              emit_opcode(*cbuf, Assembler::REX_R);
1500            } else {
1501              emit_opcode(*cbuf, Assembler::REX_RB);
1502            }
1503          }
1504          emit_opcode(*cbuf, 0x0F);
1505          emit_opcode(*cbuf, 0x6E);
1506          emit_rm(*cbuf, 0x3,
1507                  Matcher::_regEncode[dst_first] & 7,
1508                  Matcher::_regEncode[src_first] & 7);
1509#ifndef PRODUCT
1510        } else if (!do_size) {
1511          st->print("movdl   %s, %s\t# spill",
1512                     Matcher::regName[dst_first],
1513                     Matcher::regName[src_first]);
1514#endif
1515        }
1516        return
1517          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1518          ? 4
1519          : 5; // REX
1520      }
1521    }
1522  } else if (src_first_rc == rc_float) {
1523    // xmm ->
1524    if (dst_first_rc == rc_stack) {
1525      // xmm -> mem
1526      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1527          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1528        // 64-bit
1529        int offset = ra_->reg2offset(dst_first);
1530        if (cbuf) {
1531          emit_opcode(*cbuf, 0xF2);
1532          if (Matcher::_regEncode[src_first] >= 8) {
1533              emit_opcode(*cbuf, Assembler::REX_R);
1534          }
1535          emit_opcode(*cbuf, 0x0F);
1536          emit_opcode(*cbuf, 0x11);
1537          encode_RegMem(*cbuf,
1538                        Matcher::_regEncode[src_first],
1539                        RSP_enc, 0x4, 0, offset,
1540                        false);
1541#ifndef PRODUCT
1542        } else if (!do_size) {
1543          st->print("movsd   [rsp + #%d], %s\t# spill",
1544                     offset,
1545                     Matcher::regName[src_first]);
1546#endif
1547        }
1548        return
1549          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1550          ((Matcher::_regEncode[src_first] < 8)
1551           ? 5
1552           : 6); // REX
1553      } else {
1554        // 32-bit
1555        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1556        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1557        int offset = ra_->reg2offset(dst_first);
1558        if (cbuf) {
1559          emit_opcode(*cbuf, 0xF3);
1560          if (Matcher::_regEncode[src_first] >= 8) {
1561              emit_opcode(*cbuf, Assembler::REX_R);
1562          }
1563          emit_opcode(*cbuf, 0x0F);
1564          emit_opcode(*cbuf, 0x11);
1565          encode_RegMem(*cbuf,
1566                        Matcher::_regEncode[src_first],
1567                        RSP_enc, 0x4, 0, offset,
1568                        false);
1569#ifndef PRODUCT
1570        } else if (!do_size) {
1571          st->print("movss   [rsp + #%d], %s\t# spill",
1572                     offset,
1573                     Matcher::regName[src_first]);
1574#endif
1575        }
1576        return
1577          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1578          ((Matcher::_regEncode[src_first] < 8)
1579           ? 5
1580           : 6); // REX
1581      }
1582    } else if (dst_first_rc == rc_int) {
1583      // xmm -> gpr
1584      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1585          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1586        // 64-bit
1587        if (cbuf) {
1588          emit_opcode(*cbuf, 0x66);
1589          if (Matcher::_regEncode[dst_first] < 8) {
1590            if (Matcher::_regEncode[src_first] < 8) {
1591              emit_opcode(*cbuf, Assembler::REX_W);
1592            } else {
1593              emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1594            }
1595          } else {
1596            if (Matcher::_regEncode[src_first] < 8) {
1597              emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1598            } else {
1599              emit_opcode(*cbuf, Assembler::REX_WRB);
1600            }
1601          }
1602          emit_opcode(*cbuf, 0x0F);
1603          emit_opcode(*cbuf, 0x7E);
1604          emit_rm(*cbuf, 0x3,
1605                  Matcher::_regEncode[dst_first] & 7,
1606                  Matcher::_regEncode[src_first] & 7);
1607#ifndef PRODUCT
1608        } else if (!do_size) {
1609          st->print("movdq   %s, %s\t# spill",
1610                     Matcher::regName[dst_first],
1611                     Matcher::regName[src_first]);
1612#endif
1613        }
1614        return 5; // REX
1615      } else {
1616        // 32-bit
1617        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1618        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1619        if (cbuf) {
1620          emit_opcode(*cbuf, 0x66);
1621          if (Matcher::_regEncode[dst_first] < 8) {
1622            if (Matcher::_regEncode[src_first] >= 8) {
1623              emit_opcode(*cbuf, Assembler::REX_R); // attention!
1624            }
1625          } else {
1626            if (Matcher::_regEncode[src_first] < 8) {
1627              emit_opcode(*cbuf, Assembler::REX_B); // attention!
1628            } else {
1629              emit_opcode(*cbuf, Assembler::REX_RB);
1630            }
1631          }
1632          emit_opcode(*cbuf, 0x0F);
1633          emit_opcode(*cbuf, 0x7E);
1634          emit_rm(*cbuf, 0x3,
1635                  Matcher::_regEncode[dst_first] & 7,
1636                  Matcher::_regEncode[src_first] & 7);
1637#ifndef PRODUCT
1638        } else if (!do_size) {
1639          st->print("movdl   %s, %s\t# spill",
1640                     Matcher::regName[dst_first],
1641                     Matcher::regName[src_first]);
1642#endif
1643        }
1644        return
1645          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1646          ? 4
1647          : 5; // REX
1648      }
1649    } else if (dst_first_rc == rc_float) {
1650      // xmm -> xmm
1651      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1652          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1653        // 64-bit
1654        if (cbuf) {
1655          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1656          if (Matcher::_regEncode[dst_first] < 8) {
1657            if (Matcher::_regEncode[src_first] >= 8) {
1658              emit_opcode(*cbuf, Assembler::REX_B);
1659            }
1660          } else {
1661            if (Matcher::_regEncode[src_first] < 8) {
1662              emit_opcode(*cbuf, Assembler::REX_R);
1663            } else {
1664              emit_opcode(*cbuf, Assembler::REX_RB);
1665            }
1666          }
1667          emit_opcode(*cbuf, 0x0F);
1668          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1669          emit_rm(*cbuf, 0x3,
1670                  Matcher::_regEncode[dst_first] & 7,
1671                  Matcher::_regEncode[src_first] & 7);
1672#ifndef PRODUCT
1673        } else if (!do_size) {
1674          st->print("%s  %s, %s\t# spill",
1675                     UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1676                     Matcher::regName[dst_first],
1677                     Matcher::regName[src_first]);
1678#endif
1679        }
1680        return
1681          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1682          ? 4
1683          : 5; // REX
1684      } else {
1685        // 32-bit
1686        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1687        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1688        if (cbuf) {
1689          if (!UseXmmRegToRegMoveAll)
1690            emit_opcode(*cbuf, 0xF3);
1691          if (Matcher::_regEncode[dst_first] < 8) {
1692            if (Matcher::_regEncode[src_first] >= 8) {
1693              emit_opcode(*cbuf, Assembler::REX_B);
1694            }
1695          } else {
1696            if (Matcher::_regEncode[src_first] < 8) {
1697              emit_opcode(*cbuf, Assembler::REX_R);
1698            } else {
1699              emit_opcode(*cbuf, Assembler::REX_RB);
1700            }
1701          }
1702          emit_opcode(*cbuf, 0x0F);
1703          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1704          emit_rm(*cbuf, 0x3,
1705                  Matcher::_regEncode[dst_first] & 7,
1706                  Matcher::_regEncode[src_first] & 7);
1707#ifndef PRODUCT
1708        } else if (!do_size) {
1709          st->print("%s  %s, %s\t# spill",
1710                     UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1711                     Matcher::regName[dst_first],
1712                     Matcher::regName[src_first]);
1713#endif
1714        }
1715        return
1716          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1717          ? (UseXmmRegToRegMoveAll ? 3 : 4)
1718          : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1719      }
1720    }
1721  }
1722
1723  assert(0," foo ");
1724  Unimplemented();
1725
1726  return 0;
1727}
1728
1729#ifndef PRODUCT
1730void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1731{
1732  implementation(NULL, ra_, false, st);
1733}
1734#endif
1735
1736void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1737{
1738  implementation(&cbuf, ra_, false, NULL);
1739}
1740
1741uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1742{
1743  return implementation(NULL, ra_, true, NULL);
1744}
1745
1746//=============================================================================
1747#ifndef PRODUCT
1748void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1749{
1750  st->print("nop \t# %d bytes pad for loops and calls", _count);
1751}
1752#endif
1753
1754void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1755{
1756  MacroAssembler _masm(&cbuf);
1757  __ nop(_count);
1758}
1759
1760uint MachNopNode::size(PhaseRegAlloc*) const
1761{
1762  return _count;
1763}
1764
1765
1766//=============================================================================
1767#ifndef PRODUCT
1768void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1769{
1770  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1771  int reg = ra_->get_reg_first(this);
1772  st->print("leaq    %s, [rsp + #%d]\t# box lock",
1773            Matcher::regName[reg], offset);
1774}
1775#endif
1776
1777void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1778{
1779  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1780  int reg = ra_->get_encode(this);
1781  if (offset >= 0x80) {
1782    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1783    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1784    emit_rm(cbuf, 0x2, reg & 7, 0x04);
1785    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1786    emit_d32(cbuf, offset);
1787  } else {
1788    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1789    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1790    emit_rm(cbuf, 0x1, reg & 7, 0x04);
1791    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1792    emit_d8(cbuf, offset);
1793  }
1794}
1795
1796uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1797{
1798  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1799  return (offset < 0x80) ? 5 : 8; // REX
1800}
1801
1802//=============================================================================
1803
1804// emit call stub, compiled java to interpreter
1805void emit_java_to_interp(CodeBuffer& cbuf)
1806{
1807  // Stub is fixed up when the corresponding call is converted from
1808  // calling compiled code to calling interpreted code.
1809  // movq rbx, 0
1810  // jmp -5 # to self
1811
1812  address mark = cbuf.inst_mark();  // get mark within main instrs section
1813
1814  // Note that the code buffer's inst_mark is always relative to insts.
1815  // That's why we must use the macroassembler to generate a stub.
1816  MacroAssembler _masm(&cbuf);
1817
1818  address base =
1819  __ start_a_stub(Compile::MAX_stubs_size);
1820  if (base == NULL)  return;  // CodeBuffer::expand failed
1821  // static stub relocation stores the instruction address of the call
1822  __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1823  // static stub relocation also tags the methodOop in the code-stream.
1824  __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1825  __ jump(RuntimeAddress(__ pc()));
1826
1827  // Update current stubs pointer and restore code_end.
1828  __ end_a_stub();
1829}
1830
1831// size of call stub, compiled java to interpretor
1832uint size_java_to_interp()
1833{
1834  return 15;  // movq (1+1+8); jmp (1+4)
1835}
1836
1837// relocation entries for call stub, compiled java to interpretor
1838uint reloc_java_to_interp()
1839{
1840  return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1841}
1842
1843//=============================================================================
1844#ifndef PRODUCT
1845void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1846{
1847  st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1848               "# Inline cache check", oopDesc::klass_offset_in_bytes());
1849  st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1850  st->print_cr("\tnop");
1851  if (!OptoBreakpoint) {
1852    st->print_cr("\tnop");
1853  }
1854}
1855#endif
1856
1857void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1858{
1859  MacroAssembler masm(&cbuf);
1860#ifdef ASSERT
1861  uint code_size = cbuf.code_size();
1862#endif
1863  masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1864
1865  masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1866
1867  /* WARNING these NOPs are critical so that verified entry point is properly
1868     aligned for patching by NativeJump::patch_verified_entry() */
1869  int nops_cnt = 1;
1870  if (!OptoBreakpoint) {
1871    // Leave space for int3
1872     nops_cnt += 1;
1873  }
1874  masm.nop(nops_cnt);
1875
1876  assert(cbuf.code_size() - code_size == size(ra_),
1877         "checking code size of inline cache node");
1878}
1879
1880uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1881{
1882  return OptoBreakpoint ? 11 : 12;
1883}
1884
1885
1886//=============================================================================
1887uint size_exception_handler()
1888{
1889  // NativeCall instruction size is the same as NativeJump.
1890  // Note that this value is also credited (in output.cpp) to
1891  // the size of the code section.
1892  return NativeJump::instruction_size;
1893}
1894
1895// Emit exception handler code.
1896int emit_exception_handler(CodeBuffer& cbuf)
1897{
1898
1899  // Note that the code buffer's inst_mark is always relative to insts.
1900  // That's why we must use the macroassembler to generate a handler.
1901  MacroAssembler _masm(&cbuf);
1902  address base =
1903  __ start_a_stub(size_exception_handler());
1904  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1905  int offset = __ offset();
1906  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1907  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1908  __ end_a_stub();
1909  return offset;
1910}
1911
1912uint size_deopt_handler()
1913{
1914  // three 5 byte instructions
1915  return 15;
1916}
1917
1918// Emit deopt handler code.
1919int emit_deopt_handler(CodeBuffer& cbuf)
1920{
1921
1922  // Note that the code buffer's inst_mark is always relative to insts.
1923  // That's why we must use the macroassembler to generate a handler.
1924  MacroAssembler _masm(&cbuf);
1925  address base =
1926  __ start_a_stub(size_deopt_handler());
1927  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1928  int offset = __ offset();
1929  address the_pc = (address) __ pc();
1930  Label next;
1931  // push a "the_pc" on the stack without destroying any registers
1932  // as they all may be live.
1933
1934  // push address of "next"
1935  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1936  __ bind(next);
1937  // adjust it so it matches "the_pc"
1938  __ subq(Address(rsp, 0), __ offset() - offset);
1939  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1940  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1941  __ end_a_stub();
1942  return offset;
1943}
1944
1945static void emit_double_constant(CodeBuffer& cbuf, double x) {
1946  int mark = cbuf.insts()->mark_off();
1947  MacroAssembler _masm(&cbuf);
1948  address double_address = __ double_constant(x);
1949  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1950  emit_d32_reloc(cbuf,
1951                 (int) (double_address - cbuf.code_end() - 4),
1952                 internal_word_Relocation::spec(double_address),
1953                 RELOC_DISP32);
1954}
1955
1956static void emit_float_constant(CodeBuffer& cbuf, float x) {
1957  int mark = cbuf.insts()->mark_off();
1958  MacroAssembler _masm(&cbuf);
1959  address float_address = __ float_constant(x);
1960  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1961  emit_d32_reloc(cbuf,
1962                 (int) (float_address - cbuf.code_end() - 4),
1963                 internal_word_Relocation::spec(float_address),
1964                 RELOC_DISP32);
1965}
1966
1967
1968int Matcher::regnum_to_fpu_offset(int regnum)
1969{
1970  return regnum - 32; // The FP registers are in the second chunk
1971}
1972
1973// This is UltraSparc specific, true just means we have fast l2f conversion
1974const bool Matcher::convL2FSupported(void) {
1975  return true;
1976}
1977
1978// Vector width in bytes
1979const uint Matcher::vector_width_in_bytes(void) {
1980  return 8;
1981}
1982
1983// Vector ideal reg
1984const uint Matcher::vector_ideal_reg(void) {
1985  return Op_RegD;
1986}
1987
1988// Is this branch offset short enough that a short branch can be used?
1989//
1990// NOTE: If the platform does not provide any short branch variants, then
1991//       this method should return false for offset 0.
1992bool Matcher::is_short_branch_offset(int offset)
1993{
1994  return -0x80 <= offset && offset < 0x80;
1995}
1996
1997const bool Matcher::isSimpleConstant64(jlong value) {
1998  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1999  //return value == (int) value;  // Cf. storeImmL and immL32.
2000
2001  // Probably always true, even if a temp register is required.
2002  return true;
2003}
2004
2005// The ecx parameter to rep stosq for the ClearArray node is in words.
2006const bool Matcher::init_array_count_is_in_bytes = false;
2007
2008// Threshold size for cleararray.
2009const int Matcher::init_array_short_size = 8 * BytesPerLong;
2010
2011// Should the Matcher clone shifts on addressing modes, expecting them
2012// to be subsumed into complex addressing expressions or compute them
2013// into registers?  True for Intel but false for most RISCs
2014const bool Matcher::clone_shift_expressions = true;
2015
2016// Is it better to copy float constants, or load them directly from
2017// memory?  Intel can load a float constant from a direct address,
2018// requiring no extra registers.  Most RISCs will have to materialize
2019// an address into a register first, so they would do better to copy
2020// the constant from stack.
2021const bool Matcher::rematerialize_float_constants = true; // XXX
2022
2023// If CPU can load and store mis-aligned doubles directly then no
2024// fixup is needed.  Else we split the double into 2 integer pieces
2025// and move it piece-by-piece.  Only happens when passing doubles into
2026// C code as the Java calling convention forces doubles to be aligned.
2027const bool Matcher::misaligned_doubles_ok = true;
2028
2029// No-op on amd64
2030void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2031
2032// Advertise here if the CPU requires explicit rounding operations to
2033// implement the UseStrictFP mode.
2034const bool Matcher::strict_fp_requires_explicit_rounding = true;
2035
2036// Do floats take an entire double register or just half?
2037const bool Matcher::float_in_double = true;
2038// Do ints take an entire long register or just half?
2039const bool Matcher::int_in_long = true;
2040
2041// Return whether or not this register is ever used as an argument.
2042// This function is used on startup to build the trampoline stubs in
2043// generateOptoStub.  Registers not mentioned will be killed by the VM
2044// call in the trampoline, and arguments in those registers not be
2045// available to the callee.
2046bool Matcher::can_be_java_arg(int reg)
2047{
2048  return
2049    reg ==  RDI_num || reg ==  RDI_H_num ||
2050    reg ==  RSI_num || reg ==  RSI_H_num ||
2051    reg ==  RDX_num || reg ==  RDX_H_num ||
2052    reg ==  RCX_num || reg ==  RCX_H_num ||
2053    reg ==   R8_num || reg ==   R8_H_num ||
2054    reg ==   R9_num || reg ==   R9_H_num ||
2055    reg == XMM0_num || reg == XMM0_H_num ||
2056    reg == XMM1_num || reg == XMM1_H_num ||
2057    reg == XMM2_num || reg == XMM2_H_num ||
2058    reg == XMM3_num || reg == XMM3_H_num ||
2059    reg == XMM4_num || reg == XMM4_H_num ||
2060    reg == XMM5_num || reg == XMM5_H_num ||
2061    reg == XMM6_num || reg == XMM6_H_num ||
2062    reg == XMM7_num || reg == XMM7_H_num;
2063}
2064
2065bool Matcher::is_spillable_arg(int reg)
2066{
2067  return can_be_java_arg(reg);
2068}
2069
2070// Register for DIVI projection of divmodI
2071RegMask Matcher::divI_proj_mask() {
2072  return INT_RAX_REG_mask;
2073}
2074
2075// Register for MODI projection of divmodI
2076RegMask Matcher::modI_proj_mask() {
2077  return INT_RDX_REG_mask;
2078}
2079
2080// Register for DIVL projection of divmodL
2081RegMask Matcher::divL_proj_mask() {
2082  return LONG_RAX_REG_mask;
2083}
2084
2085// Register for MODL projection of divmodL
2086RegMask Matcher::modL_proj_mask() {
2087  return LONG_RDX_REG_mask;
2088}
2089
2090%}
2091
2092//----------ENCODING BLOCK-----------------------------------------------------
2093// This block specifies the encoding classes used by the compiler to
2094// output byte streams.  Encoding classes are parameterized macros
2095// used by Machine Instruction Nodes in order to generate the bit
2096// encoding of the instruction.  Operands specify their base encoding
2097// interface with the interface keyword.  There are currently
2098// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2099// COND_INTER.  REG_INTER causes an operand to generate a function
2100// which returns its register number when queried.  CONST_INTER causes
2101// an operand to generate a function which returns the value of the
2102// constant when queried.  MEMORY_INTER causes an operand to generate
2103// four functions which return the Base Register, the Index Register,
2104// the Scale Value, and the Offset Value of the operand when queried.
2105// COND_INTER causes an operand to generate six functions which return
2106// the encoding code (ie - encoding bits for the instruction)
2107// associated with each basic boolean condition for a conditional
2108// instruction.
2109//
2110// Instructions specify two basic values for encoding.  Again, a
2111// function is available to check if the constant displacement is an
2112// oop. They use the ins_encode keyword to specify their encoding
2113// classes (which must be a sequence of enc_class names, and their
2114// parameters, specified in the encoding block), and they use the
2115// opcode keyword to specify, in order, their primary, secondary, and
2116// tertiary opcode.  Only the opcode sections which a particular
2117// instruction needs for encoding need to be specified.
2118encode %{
2119  // Build emit functions for each basic byte or larger field in the
2120  // intel encoding scheme (opcode, rm, sib, immediate), and call them
2121  // from C++ code in the enc_class source block.  Emit functions will
2122  // live in the main source block for now.  In future, we can
2123  // generalize this by adding a syntax that specifies the sizes of
2124  // fields in an order, so that the adlc can build the emit functions
2125  // automagically
2126
2127  // Emit primary opcode
2128  enc_class OpcP
2129  %{
2130    emit_opcode(cbuf, $primary);
2131  %}
2132
2133  // Emit secondary opcode
2134  enc_class OpcS
2135  %{
2136    emit_opcode(cbuf, $secondary);
2137  %}
2138
2139  // Emit tertiary opcode
2140  enc_class OpcT
2141  %{
2142    emit_opcode(cbuf, $tertiary);
2143  %}
2144
2145  // Emit opcode directly
2146  enc_class Opcode(immI d8)
2147  %{
2148    emit_opcode(cbuf, $d8$$constant);
2149  %}
2150
2151  // Emit size prefix
2152  enc_class SizePrefix
2153  %{
2154    emit_opcode(cbuf, 0x66);
2155  %}
2156
2157  enc_class reg(rRegI reg)
2158  %{
2159    emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2160  %}
2161
2162  enc_class reg_reg(rRegI dst, rRegI src)
2163  %{
2164    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2165  %}
2166
2167  enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2168  %{
2169    emit_opcode(cbuf, $opcode$$constant);
2170    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2171  %}
2172
2173  enc_class cmpfp_fixup()
2174  %{
2175    // jnp,s exit
2176    emit_opcode(cbuf, 0x7B);
2177    emit_d8(cbuf, 0x0A);
2178
2179    // pushfq
2180    emit_opcode(cbuf, 0x9C);
2181
2182    // andq $0xffffff2b, (%rsp)
2183    emit_opcode(cbuf, Assembler::REX_W);
2184    emit_opcode(cbuf, 0x81);
2185    emit_opcode(cbuf, 0x24);
2186    emit_opcode(cbuf, 0x24);
2187    emit_d32(cbuf, 0xffffff2b);
2188
2189    // popfq
2190    emit_opcode(cbuf, 0x9D);
2191
2192    // nop (target for branch to avoid branch to branch)
2193    emit_opcode(cbuf, 0x90);
2194  %}
2195
2196  enc_class cmpfp3(rRegI dst)
2197  %{
2198    int dstenc = $dst$$reg;
2199
2200    // movl $dst, -1
2201    if (dstenc >= 8) {
2202      emit_opcode(cbuf, Assembler::REX_B);
2203    }
2204    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2205    emit_d32(cbuf, -1);
2206
2207    // jp,s done
2208    emit_opcode(cbuf, 0x7A);
2209    emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2210
2211    // jb,s done
2212    emit_opcode(cbuf, 0x72);
2213    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2214
2215    // setne $dst
2216    if (dstenc >= 4) {
2217      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2218    }
2219    emit_opcode(cbuf, 0x0F);
2220    emit_opcode(cbuf, 0x95);
2221    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2222
2223    // movzbl $dst, $dst
2224    if (dstenc >= 4) {
2225      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2226    }
2227    emit_opcode(cbuf, 0x0F);
2228    emit_opcode(cbuf, 0xB6);
2229    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2230  %}
2231
2232  enc_class cdql_enc(no_rax_rdx_RegI div)
2233  %{
2234    // Full implementation of Java idiv and irem; checks for
2235    // special case as described in JVM spec., p.243 & p.271.
2236    //
2237    //         normal case                           special case
2238    //
2239    // input : rax: dividend                         min_int
2240    //         reg: divisor                          -1
2241    //
2242    // output: rax: quotient  (= rax idiv reg)       min_int
2243    //         rdx: remainder (= rax irem reg)       0
2244    //
2245    //  Code sequnce:
2246    //
2247    //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2248    //    5:   75 07/08                jne    e <normal>
2249    //    7:   33 d2                   xor    %edx,%edx
2250    //  [div >= 8 -> offset + 1]
2251    //  [REX_B]
2252    //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2253    //    c:   74 03/04                je     11 <done>
2254    // 000000000000000e <normal>:
2255    //    e:   99                      cltd
2256    //  [div >= 8 -> offset + 1]
2257    //  [REX_B]
2258    //    f:   f7 f9                   idiv   $div
2259    // 0000000000000011 <done>:
2260
2261    // cmp    $0x80000000,%eax
2262    emit_opcode(cbuf, 0x3d);
2263    emit_d8(cbuf, 0x00);
2264    emit_d8(cbuf, 0x00);
2265    emit_d8(cbuf, 0x00);
2266    emit_d8(cbuf, 0x80);
2267
2268    // jne    e <normal>
2269    emit_opcode(cbuf, 0x75);
2270    emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2271
2272    // xor    %edx,%edx
2273    emit_opcode(cbuf, 0x33);
2274    emit_d8(cbuf, 0xD2);
2275
2276    // cmp    $0xffffffffffffffff,%ecx
2277    if ($div$$reg >= 8) {
2278      emit_opcode(cbuf, Assembler::REX_B);
2279    }
2280    emit_opcode(cbuf, 0x83);
2281    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2282    emit_d8(cbuf, 0xFF);
2283
2284    // je     11 <done>
2285    emit_opcode(cbuf, 0x74);
2286    emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2287
2288    // <normal>
2289    // cltd
2290    emit_opcode(cbuf, 0x99);
2291
2292    // idivl (note: must be emitted by the user of this rule)
2293    // <done>
2294  %}
2295
2296  enc_class cdqq_enc(no_rax_rdx_RegL div)
2297  %{
2298    // Full implementation of Java ldiv and lrem; checks for
2299    // special case as described in JVM spec., p.243 & p.271.
2300    //
2301    //         normal case                           special case
2302    //
2303    // input : rax: dividend                         min_long
2304    //         reg: divisor                          -1
2305    //
2306    // output: rax: quotient  (= rax idiv reg)       min_long
2307    //         rdx: remainder (= rax irem reg)       0
2308    //
2309    //  Code sequnce:
2310    //
2311    //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2312    //    7:   00 00 80
2313    //    a:   48 39 d0                cmp    %rdx,%rax
2314    //    d:   75 08                   jne    17 <normal>
2315    //    f:   33 d2                   xor    %edx,%edx
2316    //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2317    //   15:   74 05                   je     1c <done>
2318    // 0000000000000017 <normal>:
2319    //   17:   48 99                   cqto
2320    //   19:   48 f7 f9                idiv   $div
2321    // 000000000000001c <done>:
2322
2323    // mov    $0x8000000000000000,%rdx
2324    emit_opcode(cbuf, Assembler::REX_W);
2325    emit_opcode(cbuf, 0xBA);
2326    emit_d8(cbuf, 0x00);
2327    emit_d8(cbuf, 0x00);
2328    emit_d8(cbuf, 0x00);
2329    emit_d8(cbuf, 0x00);
2330    emit_d8(cbuf, 0x00);
2331    emit_d8(cbuf, 0x00);
2332    emit_d8(cbuf, 0x00);
2333    emit_d8(cbuf, 0x80);
2334
2335    // cmp    %rdx,%rax
2336    emit_opcode(cbuf, Assembler::REX_W);
2337    emit_opcode(cbuf, 0x39);
2338    emit_d8(cbuf, 0xD0);
2339
2340    // jne    17 <normal>
2341    emit_opcode(cbuf, 0x75);
2342    emit_d8(cbuf, 0x08);
2343
2344    // xor    %edx,%edx
2345    emit_opcode(cbuf, 0x33);
2346    emit_d8(cbuf, 0xD2);
2347
2348    // cmp    $0xffffffffffffffff,$div
2349    emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2350    emit_opcode(cbuf, 0x83);
2351    emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2352    emit_d8(cbuf, 0xFF);
2353
2354    // je     1e <done>
2355    emit_opcode(cbuf, 0x74);
2356    emit_d8(cbuf, 0x05);
2357
2358    // <normal>
2359    // cqto
2360    emit_opcode(cbuf, Assembler::REX_W);
2361    emit_opcode(cbuf, 0x99);
2362
2363    // idivq (note: must be emitted by the user of this rule)
2364    // <done>
2365  %}
2366
2367  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2368  enc_class OpcSE(immI imm)
2369  %{
2370    // Emit primary opcode and set sign-extend bit
2371    // Check for 8-bit immediate, and set sign extend bit in opcode
2372    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2373      emit_opcode(cbuf, $primary | 0x02);
2374    } else {
2375      // 32-bit immediate
2376      emit_opcode(cbuf, $primary);
2377    }
2378  %}
2379
2380  enc_class OpcSErm(rRegI dst, immI imm)
2381  %{
2382    // OpcSEr/m
2383    int dstenc = $dst$$reg;
2384    if (dstenc >= 8) {
2385      emit_opcode(cbuf, Assembler::REX_B);
2386      dstenc -= 8;
2387    }
2388    // Emit primary opcode and set sign-extend bit
2389    // Check for 8-bit immediate, and set sign extend bit in opcode
2390    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2391      emit_opcode(cbuf, $primary | 0x02);
2392    } else {
2393      // 32-bit immediate
2394      emit_opcode(cbuf, $primary);
2395    }
2396    // Emit r/m byte with secondary opcode, after primary opcode.
2397    emit_rm(cbuf, 0x3, $secondary, dstenc);
2398  %}
2399
2400  enc_class OpcSErm_wide(rRegL dst, immI imm)
2401  %{
2402    // OpcSEr/m
2403    int dstenc = $dst$$reg;
2404    if (dstenc < 8) {
2405      emit_opcode(cbuf, Assembler::REX_W);
2406    } else {
2407      emit_opcode(cbuf, Assembler::REX_WB);
2408      dstenc -= 8;
2409    }
2410    // Emit primary opcode and set sign-extend bit
2411    // Check for 8-bit immediate, and set sign extend bit in opcode
2412    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2413      emit_opcode(cbuf, $primary | 0x02);
2414    } else {
2415      // 32-bit immediate
2416      emit_opcode(cbuf, $primary);
2417    }
2418    // Emit r/m byte with secondary opcode, after primary opcode.
2419    emit_rm(cbuf, 0x3, $secondary, dstenc);
2420  %}
2421
2422  enc_class Con8or32(immI imm)
2423  %{
2424    // Check for 8-bit immediate, and set sign extend bit in opcode
2425    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2426      $$$emit8$imm$$constant;
2427    } else {
2428      // 32-bit immediate
2429      $$$emit32$imm$$constant;
2430    }
2431  %}
2432
2433  enc_class Lbl(label labl)
2434  %{
2435    // JMP, CALL
2436    Label* l = $labl$$label;
2437    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2438  %}
2439
2440  enc_class LblShort(label labl)
2441  %{
2442    // JMP, CALL
2443    Label* l = $labl$$label;
2444    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2445    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2446    emit_d8(cbuf, disp);
2447  %}
2448
2449  enc_class opc2_reg(rRegI dst)
2450  %{
2451    // BSWAP
2452    emit_cc(cbuf, $secondary, $dst$$reg);
2453  %}
2454
2455  enc_class opc3_reg(rRegI dst)
2456  %{
2457    // BSWAP
2458    emit_cc(cbuf, $tertiary, $dst$$reg);
2459  %}
2460
2461  enc_class reg_opc(rRegI div)
2462  %{
2463    // INC, DEC, IDIV, IMOD, JMP indirect, ...
2464    emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2465  %}
2466
2467  enc_class Jcc(cmpOp cop, label labl)
2468  %{
2469    // JCC
2470    Label* l = $labl$$label;
2471    $$$emit8$primary;
2472    emit_cc(cbuf, $secondary, $cop$$cmpcode);
2473    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2474  %}
2475
2476  enc_class JccShort (cmpOp cop, label labl)
2477  %{
2478  // JCC
2479    Label *l = $labl$$label;
2480    emit_cc(cbuf, $primary, $cop$$cmpcode);
2481    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2482    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2483    emit_d8(cbuf, disp);
2484  %}
2485
2486  enc_class enc_cmov(cmpOp cop)
2487  %{
2488    // CMOV
2489    $$$emit8$primary;
2490    emit_cc(cbuf, $secondary, $cop$$cmpcode);
2491  %}
2492
2493  enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2494  %{
2495    // Invert sense of branch from sense of cmov
2496    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2497    emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2498                  ? (UseXmmRegToRegMoveAll ? 3 : 4)
2499                  : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2500    // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2501    if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2502    if ($dst$$reg < 8) {
2503      if ($src$$reg >= 8) {
2504        emit_opcode(cbuf, Assembler::REX_B);
2505      }
2506    } else {
2507      if ($src$$reg < 8) {
2508        emit_opcode(cbuf, Assembler::REX_R);
2509      } else {
2510        emit_opcode(cbuf, Assembler::REX_RB);
2511      }
2512    }
2513    emit_opcode(cbuf, 0x0F);
2514    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2515    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2516  %}
2517
2518  enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2519  %{
2520    // Invert sense of branch from sense of cmov
2521    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2522    emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2523
2524    //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2525    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2526    if ($dst$$reg < 8) {
2527      if ($src$$reg >= 8) {
2528        emit_opcode(cbuf, Assembler::REX_B);
2529      }
2530    } else {
2531      if ($src$$reg < 8) {
2532        emit_opcode(cbuf, Assembler::REX_R);
2533      } else {
2534        emit_opcode(cbuf, Assembler::REX_RB);
2535      }
2536    }
2537    emit_opcode(cbuf, 0x0F);
2538    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2539    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2540  %}
2541
2542  enc_class enc_PartialSubtypeCheck()
2543  %{
2544    Register Rrdi = as_Register(RDI_enc); // result register
2545    Register Rrax = as_Register(RAX_enc); // super class
2546    Register Rrcx = as_Register(RCX_enc); // killed
2547    Register Rrsi = as_Register(RSI_enc); // sub class
2548    Label hit, miss;
2549
2550    MacroAssembler _masm(&cbuf);
2551    // Compare super with sub directly, since super is not in its own SSA.
2552    // The compiler used to emit this test, but we fold it in here,
2553    // to allow platform-specific tweaking on sparc.
2554    __ cmpq(Rrax, Rrsi);
2555    __ jcc(Assembler::equal, hit);
2556#ifndef PRODUCT
2557    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
2558    __ incrementl(Address(Rrcx, 0));
2559#endif //PRODUCT
2560    __ movq(Rrdi, Address(Rrsi,
2561                          sizeof(oopDesc) +
2562                          Klass::secondary_supers_offset_in_bytes()));
2563    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
2564    __ addq(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2565    __ repne_scan();
2566    __ jcc(Assembler::notEqual, miss);
2567    __ movq(Address(Rrsi,
2568                    sizeof(oopDesc) +
2569                    Klass::secondary_super_cache_offset_in_bytes()),
2570            Rrax);
2571    __ bind(hit);
2572    if ($primary) {
2573      __ xorq(Rrdi, Rrdi);
2574    }
2575    __ bind(miss);
2576  %}
2577
2578  enc_class Java_To_Interpreter(method meth)
2579  %{
2580    // CALL Java_To_Interpreter
2581    // This is the instruction starting address for relocation info.
2582    cbuf.set_inst_mark();
2583    $$$emit8$primary;
2584    // CALL directly to the runtime
2585    emit_d32_reloc(cbuf,
2586                   (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2587                   runtime_call_Relocation::spec(),
2588                   RELOC_DISP32);
2589  %}
2590
2591  enc_class Java_Static_Call(method meth)
2592  %{
2593    // JAVA STATIC CALL
2594    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2595    // determine who we intended to call.
2596    cbuf.set_inst_mark();
2597    $$$emit8$primary;
2598
2599    if (!_method) {
2600      emit_d32_reloc(cbuf,
2601                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2602                     runtime_call_Relocation::spec(),
2603                     RELOC_DISP32);
2604    } else if (_optimized_virtual) {
2605      emit_d32_reloc(cbuf,
2606                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2607                     opt_virtual_call_Relocation::spec(),
2608                     RELOC_DISP32);
2609    } else {
2610      emit_d32_reloc(cbuf,
2611                     (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2612                     static_call_Relocation::spec(),
2613                     RELOC_DISP32);
2614    }
2615    if (_method) {
2616      // Emit stub for static call
2617      emit_java_to_interp(cbuf);
2618    }
2619  %}
2620
2621  enc_class Java_Dynamic_Call(method meth)
2622  %{
2623    // JAVA DYNAMIC CALL
2624    // !!!!!
2625    // Generate  "movq rax, -1", placeholder instruction to load oop-info
2626    // emit_call_dynamic_prologue( cbuf );
2627    cbuf.set_inst_mark();
2628
2629    // movq rax, -1
2630    emit_opcode(cbuf, Assembler::REX_W);
2631    emit_opcode(cbuf, 0xB8 | RAX_enc);
2632    emit_d64_reloc(cbuf,
2633                   (int64_t) Universe::non_oop_word(),
2634                   oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2635    address virtual_call_oop_addr = cbuf.inst_mark();
2636    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2637    // who we intended to call.
2638    cbuf.set_inst_mark();
2639    $$$emit8$primary;
2640    emit_d32_reloc(cbuf,
2641                   (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2642                   virtual_call_Relocation::spec(virtual_call_oop_addr),
2643                   RELOC_DISP32);
2644  %}
2645
2646  enc_class Java_Compiled_Call(method meth)
2647  %{
2648    // JAVA COMPILED CALL
2649    int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2650
2651    // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2652    // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2653
2654    // callq *disp(%rax)
2655    cbuf.set_inst_mark();
2656    $$$emit8$primary;
2657    if (disp < 0x80) {
2658      emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2659      emit_d8(cbuf, disp); // Displacement
2660    } else {
2661      emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2662      emit_d32(cbuf, disp); // Displacement
2663    }
2664  %}
2665
2666  enc_class reg_opc_imm(rRegI dst, immI8 shift)
2667  %{
2668    // SAL, SAR, SHR
2669    int dstenc = $dst$$reg;
2670    if (dstenc >= 8) {
2671      emit_opcode(cbuf, Assembler::REX_B);
2672      dstenc -= 8;
2673    }
2674    $$$emit8$primary;
2675    emit_rm(cbuf, 0x3, $secondary, dstenc);
2676    $$$emit8$shift$$constant;
2677  %}
2678
2679  enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2680  %{
2681    // SAL, SAR, SHR
2682    int dstenc = $dst$$reg;
2683    if (dstenc < 8) {
2684      emit_opcode(cbuf, Assembler::REX_W);
2685    } else {
2686      emit_opcode(cbuf, Assembler::REX_WB);
2687      dstenc -= 8;
2688    }
2689    $$$emit8$primary;
2690    emit_rm(cbuf, 0x3, $secondary, dstenc);
2691    $$$emit8$shift$$constant;
2692  %}
2693
2694  enc_class load_immI(rRegI dst, immI src)
2695  %{
2696    int dstenc = $dst$$reg;
2697    if (dstenc >= 8) {
2698      emit_opcode(cbuf, Assembler::REX_B);
2699      dstenc -= 8;
2700    }
2701    emit_opcode(cbuf, 0xB8 | dstenc);
2702    $$$emit32$src$$constant;
2703  %}
2704
2705  enc_class load_immL(rRegL dst, immL src)
2706  %{
2707    int dstenc = $dst$$reg;
2708    if (dstenc < 8) {
2709      emit_opcode(cbuf, Assembler::REX_W);
2710    } else {
2711      emit_opcode(cbuf, Assembler::REX_WB);
2712      dstenc -= 8;
2713    }
2714    emit_opcode(cbuf, 0xB8 | dstenc);
2715    emit_d64(cbuf, $src$$constant);
2716  %}
2717
2718  enc_class load_immUL32(rRegL dst, immUL32 src)
2719  %{
2720    // same as load_immI, but this time we care about zeroes in the high word
2721    int dstenc = $dst$$reg;
2722    if (dstenc >= 8) {
2723      emit_opcode(cbuf, Assembler::REX_B);
2724      dstenc -= 8;
2725    }
2726    emit_opcode(cbuf, 0xB8 | dstenc);
2727    $$$emit32$src$$constant;
2728  %}
2729
2730  enc_class load_immL32(rRegL dst, immL32 src)
2731  %{
2732    int dstenc = $dst$$reg;
2733    if (dstenc < 8) {
2734      emit_opcode(cbuf, Assembler::REX_W);
2735    } else {
2736      emit_opcode(cbuf, Assembler::REX_WB);
2737      dstenc -= 8;
2738    }
2739    emit_opcode(cbuf, 0xC7);
2740    emit_rm(cbuf, 0x03, 0x00, dstenc);
2741    $$$emit32$src$$constant;
2742  %}
2743
2744  enc_class load_immP31(rRegP dst, immP32 src)
2745  %{
2746    // same as load_immI, but this time we care about zeroes in the high word
2747    int dstenc = $dst$$reg;
2748    if (dstenc >= 8) {
2749      emit_opcode(cbuf, Assembler::REX_B);
2750      dstenc -= 8;
2751    }
2752    emit_opcode(cbuf, 0xB8 | dstenc);
2753    $$$emit32$src$$constant;
2754  %}
2755
2756  enc_class load_immP(rRegP dst, immP src)
2757  %{
2758    int dstenc = $dst$$reg;
2759    if (dstenc < 8) {
2760      emit_opcode(cbuf, Assembler::REX_W);
2761    } else {
2762      emit_opcode(cbuf, Assembler::REX_WB);
2763      dstenc -= 8;
2764    }
2765    emit_opcode(cbuf, 0xB8 | dstenc);
2766    // This next line should be generated from ADLC
2767    if ($src->constant_is_oop()) {
2768      emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2769    } else {
2770      emit_d64(cbuf, $src$$constant);
2771    }
2772  %}
2773
2774  enc_class load_immF(regF dst, immF con)
2775  %{
2776    // XXX reg_mem doesn't support RIP-relative addressing yet
2777    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2778    emit_float_constant(cbuf, $con$$constant);
2779  %}
2780
2781  enc_class load_immD(regD dst, immD con)
2782  %{
2783    // XXX reg_mem doesn't support RIP-relative addressing yet
2784    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2785    emit_double_constant(cbuf, $con$$constant);
2786  %}
2787
2788  enc_class load_conF (regF dst, immF con) %{    // Load float constant
2789    emit_opcode(cbuf, 0xF3);
2790    if ($dst$$reg >= 8) {
2791      emit_opcode(cbuf, Assembler::REX_R);
2792    }
2793    emit_opcode(cbuf, 0x0F);
2794    emit_opcode(cbuf, 0x10);
2795    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2796    emit_float_constant(cbuf, $con$$constant);
2797  %}
2798
2799  enc_class load_conD (regD dst, immD con) %{    // Load double constant
2800    // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2801    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2802    if ($dst$$reg >= 8) {
2803      emit_opcode(cbuf, Assembler::REX_R);
2804    }
2805    emit_opcode(cbuf, 0x0F);
2806    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2807    emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2808    emit_double_constant(cbuf, $con$$constant);
2809  %}
2810
2811  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2812  enc_class enc_copy(rRegI dst, rRegI src)
2813  %{
2814    encode_copy(cbuf, $dst$$reg, $src$$reg);
2815  %}
2816
2817  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2818  enc_class enc_CopyXD( RegD dst, RegD src ) %{
2819    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2820  %}
2821
2822  enc_class enc_copy_always(rRegI dst, rRegI src)
2823  %{
2824    int srcenc = $src$$reg;
2825    int dstenc = $dst$$reg;
2826
2827    if (dstenc < 8) {
2828      if (srcenc >= 8) {
2829        emit_opcode(cbuf, Assembler::REX_B);
2830        srcenc -= 8;
2831      }
2832    } else {
2833      if (srcenc < 8) {
2834        emit_opcode(cbuf, Assembler::REX_R);
2835      } else {
2836        emit_opcode(cbuf, Assembler::REX_RB);
2837        srcenc -= 8;
2838      }
2839      dstenc -= 8;
2840    }
2841
2842    emit_opcode(cbuf, 0x8B);
2843    emit_rm(cbuf, 0x3, dstenc, srcenc);
2844  %}
2845
2846  enc_class enc_copy_wide(rRegL dst, rRegL src)
2847  %{
2848    int srcenc = $src$$reg;
2849    int dstenc = $dst$$reg;
2850
2851    if (dstenc != srcenc) {
2852      if (dstenc < 8) {
2853        if (srcenc < 8) {
2854          emit_opcode(cbuf, Assembler::REX_W);
2855        } else {
2856          emit_opcode(cbuf, Assembler::REX_WB);
2857          srcenc -= 8;
2858        }
2859      } else {
2860        if (srcenc < 8) {
2861          emit_opcode(cbuf, Assembler::REX_WR);
2862        } else {
2863          emit_opcode(cbuf, Assembler::REX_WRB);
2864          srcenc -= 8;
2865        }
2866        dstenc -= 8;
2867      }
2868      emit_opcode(cbuf, 0x8B);
2869      emit_rm(cbuf, 0x3, dstenc, srcenc);
2870    }
2871  %}
2872
2873  enc_class Con32(immI src)
2874  %{
2875    // Output immediate
2876    $$$emit32$src$$constant;
2877  %}
2878
2879  enc_class Con64(immL src)
2880  %{
2881    // Output immediate
2882    emit_d64($src$$constant);
2883  %}
2884
2885  enc_class Con32F_as_bits(immF src)
2886  %{
2887    // Output Float immediate bits
2888    jfloat jf = $src$$constant;
2889    jint jf_as_bits = jint_cast(jf);
2890    emit_d32(cbuf, jf_as_bits);
2891  %}
2892
2893  enc_class Con16(immI src)
2894  %{
2895    // Output immediate
2896    $$$emit16$src$$constant;
2897  %}
2898
2899  // How is this different from Con32??? XXX
2900  enc_class Con_d32(immI src)
2901  %{
2902    emit_d32(cbuf,$src$$constant);
2903  %}
2904
2905  enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2906    // Output immediate memory reference
2907    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2908    emit_d32(cbuf, 0x00);
2909  %}
2910
2911  enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2912    MacroAssembler masm(&cbuf);
2913
2914    Register switch_reg = as_Register($switch_val$$reg);
2915    Register dest_reg   = as_Register($dest$$reg);
2916    address table_base  = masm.address_table_constant(_index2label);
2917
2918    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2919    // to do that and the compiler is using that register as one it can allocate.
2920    // So we build it all by hand.
2921    // Address index(noreg, switch_reg, Address::times_1);
2922    // ArrayAddress dispatch(table, index);
2923
2924    Address dispatch(dest_reg, switch_reg, Address::times_1);
2925
2926    masm.lea(dest_reg, InternalAddress(table_base));
2927    masm.jmp(dispatch);
2928  %}
2929
2930  enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2931    MacroAssembler masm(&cbuf);
2932
2933    Register switch_reg = as_Register($switch_val$$reg);
2934    Register dest_reg   = as_Register($dest$$reg);
2935    address table_base  = masm.address_table_constant(_index2label);
2936
2937    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2938    // to do that and the compiler is using that register as one it can allocate.
2939    // So we build it all by hand.
2940    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2941    // ArrayAddress dispatch(table, index);
2942
2943    Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2944
2945    masm.lea(dest_reg, InternalAddress(table_base));
2946    masm.jmp(dispatch);
2947  %}
2948
2949  enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2950    MacroAssembler masm(&cbuf);
2951
2952    Register switch_reg = as_Register($switch_val$$reg);
2953    Register dest_reg   = as_Register($dest$$reg);
2954    address table_base  = masm.address_table_constant(_index2label);
2955
2956    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2957    // to do that and the compiler is using that register as one it can allocate.
2958    // So we build it all by hand.
2959    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2960    // ArrayAddress dispatch(table, index);
2961
2962    Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2963    masm.lea(dest_reg, InternalAddress(table_base));
2964    masm.jmp(dispatch);
2965
2966  %}
2967
2968  enc_class lock_prefix()
2969  %{
2970    if (os::is_MP()) {
2971      emit_opcode(cbuf, 0xF0); // lock
2972    }
2973  %}
2974
2975  enc_class REX_mem(memory mem)
2976  %{
2977    if ($mem$$base >= 8) {
2978      if ($mem$$index < 8) {
2979        emit_opcode(cbuf, Assembler::REX_B);
2980      } else {
2981        emit_opcode(cbuf, Assembler::REX_XB);
2982      }
2983    } else {
2984      if ($mem$$index >= 8) {
2985        emit_opcode(cbuf, Assembler::REX_X);
2986      }
2987    }
2988  %}
2989
2990  enc_class REX_mem_wide(memory mem)
2991  %{
2992    if ($mem$$base >= 8) {
2993      if ($mem$$index < 8) {
2994        emit_opcode(cbuf, Assembler::REX_WB);
2995      } else {
2996        emit_opcode(cbuf, Assembler::REX_WXB);
2997      }
2998    } else {
2999      if ($mem$$index < 8) {
3000        emit_opcode(cbuf, Assembler::REX_W);
3001      } else {
3002        emit_opcode(cbuf, Assembler::REX_WX);
3003      }
3004    }
3005  %}
3006
3007  // for byte regs
3008  enc_class REX_breg(rRegI reg)
3009  %{
3010    if ($reg$$reg >= 4) {
3011      emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3012    }
3013  %}
3014
3015  // for byte regs
3016  enc_class REX_reg_breg(rRegI dst, rRegI src)
3017  %{
3018    if ($dst$$reg < 8) {
3019      if ($src$$reg >= 4) {
3020        emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3021      }
3022    } else {
3023      if ($src$$reg < 8) {
3024        emit_opcode(cbuf, Assembler::REX_R);
3025      } else {
3026        emit_opcode(cbuf, Assembler::REX_RB);
3027      }
3028    }
3029  %}
3030
3031  // for byte regs
3032  enc_class REX_breg_mem(rRegI reg, memory mem)
3033  %{
3034    if ($reg$$reg < 8) {
3035      if ($mem$$base < 8) {
3036        if ($mem$$index >= 8) {
3037          emit_opcode(cbuf, Assembler::REX_X);
3038        } else if ($reg$$reg >= 4) {
3039          emit_opcode(cbuf, Assembler::REX);
3040        }
3041      } else {
3042        if ($mem$$index < 8) {
3043          emit_opcode(cbuf, Assembler::REX_B);
3044        } else {
3045          emit_opcode(cbuf, Assembler::REX_XB);
3046        }
3047      }
3048    } else {
3049      if ($mem$$base < 8) {
3050        if ($mem$$index < 8) {
3051          emit_opcode(cbuf, Assembler::REX_R);
3052        } else {
3053          emit_opcode(cbuf, Assembler::REX_RX);
3054        }
3055      } else {
3056        if ($mem$$index < 8) {
3057          emit_opcode(cbuf, Assembler::REX_RB);
3058        } else {
3059          emit_opcode(cbuf, Assembler::REX_RXB);
3060        }
3061      }
3062    }
3063  %}
3064
3065  enc_class REX_reg(rRegI reg)
3066  %{
3067    if ($reg$$reg >= 8) {
3068      emit_opcode(cbuf, Assembler::REX_B);
3069    }
3070  %}
3071
3072  enc_class REX_reg_wide(rRegI reg)
3073  %{
3074    if ($reg$$reg < 8) {
3075      emit_opcode(cbuf, Assembler::REX_W);
3076    } else {
3077      emit_opcode(cbuf, Assembler::REX_WB);
3078    }
3079  %}
3080
3081  enc_class REX_reg_reg(rRegI dst, rRegI src)
3082  %{
3083    if ($dst$$reg < 8) {
3084      if ($src$$reg >= 8) {
3085        emit_opcode(cbuf, Assembler::REX_B);
3086      }
3087    } else {
3088      if ($src$$reg < 8) {
3089        emit_opcode(cbuf, Assembler::REX_R);
3090      } else {
3091        emit_opcode(cbuf, Assembler::REX_RB);
3092      }
3093    }
3094  %}
3095
3096  enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3097  %{
3098    if ($dst$$reg < 8) {
3099      if ($src$$reg < 8) {
3100        emit_opcode(cbuf, Assembler::REX_W);
3101      } else {
3102        emit_opcode(cbuf, Assembler::REX_WB);
3103      }
3104    } else {
3105      if ($src$$reg < 8) {
3106        emit_opcode(cbuf, Assembler::REX_WR);
3107      } else {
3108        emit_opcode(cbuf, Assembler::REX_WRB);
3109      }
3110    }
3111  %}
3112
3113  enc_class REX_reg_mem(rRegI reg, memory mem)
3114  %{
3115    if ($reg$$reg < 8) {
3116      if ($mem$$base < 8) {
3117        if ($mem$$index >= 8) {
3118          emit_opcode(cbuf, Assembler::REX_X);
3119        }
3120      } else {
3121        if ($mem$$index < 8) {
3122          emit_opcode(cbuf, Assembler::REX_B);
3123        } else {
3124          emit_opcode(cbuf, Assembler::REX_XB);
3125        }
3126      }
3127    } else {
3128      if ($mem$$base < 8) {
3129        if ($mem$$index < 8) {
3130          emit_opcode(cbuf, Assembler::REX_R);
3131        } else {
3132          emit_opcode(cbuf, Assembler::REX_RX);
3133        }
3134      } else {
3135        if ($mem$$index < 8) {
3136          emit_opcode(cbuf, Assembler::REX_RB);
3137        } else {
3138          emit_opcode(cbuf, Assembler::REX_RXB);
3139        }
3140      }
3141    }
3142  %}
3143
3144  enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3145  %{
3146    if ($reg$$reg < 8) {
3147      if ($mem$$base < 8) {
3148        if ($mem$$index < 8) {
3149          emit_opcode(cbuf, Assembler::REX_W);
3150        } else {
3151          emit_opcode(cbuf, Assembler::REX_WX);
3152        }
3153      } else {
3154        if ($mem$$index < 8) {
3155          emit_opcode(cbuf, Assembler::REX_WB);
3156        } else {
3157          emit_opcode(cbuf, Assembler::REX_WXB);
3158        }
3159      }
3160    } else {
3161      if ($mem$$base < 8) {
3162        if ($mem$$index < 8) {
3163          emit_opcode(cbuf, Assembler::REX_WR);
3164        } else {
3165          emit_opcode(cbuf, Assembler::REX_WRX);
3166        }
3167      } else {
3168        if ($mem$$index < 8) {
3169          emit_opcode(cbuf, Assembler::REX_WRB);
3170        } else {
3171          emit_opcode(cbuf, Assembler::REX_WRXB);
3172        }
3173      }
3174    }
3175  %}
3176
3177  enc_class reg_mem(rRegI ereg, memory mem)
3178  %{
3179    // High registers handle in encode_RegMem
3180    int reg = $ereg$$reg;
3181    int base = $mem$$base;
3182    int index = $mem$$index;
3183    int scale = $mem$$scale;
3184    int disp = $mem$$disp;
3185    bool disp_is_oop = $mem->disp_is_oop();
3186
3187    encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3188  %}
3189
3190  enc_class RM_opc_mem(immI rm_opcode, memory mem)
3191  %{
3192    int rm_byte_opcode = $rm_opcode$$constant;
3193
3194    // High registers handle in encode_RegMem
3195    int base = $mem$$base;
3196    int index = $mem$$index;
3197    int scale = $mem$$scale;
3198    int displace = $mem$$disp;
3199
3200    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3201                                            // working with static
3202                                            // globals
3203    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3204                  disp_is_oop);
3205  %}
3206
3207  enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3208  %{
3209    int reg_encoding = $dst$$reg;
3210    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3211    int index        = 0x04;            // 0x04 indicates no index
3212    int scale        = 0x00;            // 0x00 indicates no scale
3213    int displace     = $src1$$constant; // 0x00 indicates no displacement
3214    bool disp_is_oop = false;
3215    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3216                  disp_is_oop);
3217  %}
3218
3219  enc_class neg_reg(rRegI dst)
3220  %{
3221    int dstenc = $dst$$reg;
3222    if (dstenc >= 8) {
3223      emit_opcode(cbuf, Assembler::REX_B);
3224      dstenc -= 8;
3225    }
3226    // NEG $dst
3227    emit_opcode(cbuf, 0xF7);
3228    emit_rm(cbuf, 0x3, 0x03, dstenc);
3229  %}
3230
3231  enc_class neg_reg_wide(rRegI dst)
3232  %{
3233    int dstenc = $dst$$reg;
3234    if (dstenc < 8) {
3235      emit_opcode(cbuf, Assembler::REX_W);
3236    } else {
3237      emit_opcode(cbuf, Assembler::REX_WB);
3238      dstenc -= 8;
3239    }
3240    // NEG $dst
3241    emit_opcode(cbuf, 0xF7);
3242    emit_rm(cbuf, 0x3, 0x03, dstenc);
3243  %}
3244
3245  enc_class setLT_reg(rRegI dst)
3246  %{
3247    int dstenc = $dst$$reg;
3248    if (dstenc >= 8) {
3249      emit_opcode(cbuf, Assembler::REX_B);
3250      dstenc -= 8;
3251    } else if (dstenc >= 4) {
3252      emit_opcode(cbuf, Assembler::REX);
3253    }
3254    // SETLT $dst
3255    emit_opcode(cbuf, 0x0F);
3256    emit_opcode(cbuf, 0x9C);
3257    emit_rm(cbuf, 0x3, 0x0, dstenc);
3258  %}
3259
3260  enc_class setNZ_reg(rRegI dst)
3261  %{
3262    int dstenc = $dst$$reg;
3263    if (dstenc >= 8) {
3264      emit_opcode(cbuf, Assembler::REX_B);
3265      dstenc -= 8;
3266    } else if (dstenc >= 4) {
3267      emit_opcode(cbuf, Assembler::REX);
3268    }
3269    // SETNZ $dst
3270    emit_opcode(cbuf, 0x0F);
3271    emit_opcode(cbuf, 0x95);
3272    emit_rm(cbuf, 0x3, 0x0, dstenc);
3273  %}
3274
3275  enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3276                       rcx_RegI tmp)
3277  %{
3278    // cadd_cmpLT
3279
3280    int tmpReg = $tmp$$reg;
3281
3282    int penc = $p$$reg;
3283    int qenc = $q$$reg;
3284    int yenc = $y$$reg;
3285
3286    // subl $p,$q
3287    if (penc < 8) {
3288      if (qenc >= 8) {
3289        emit_opcode(cbuf, Assembler::REX_B);
3290      }
3291    } else {
3292      if (qenc < 8) {
3293        emit_opcode(cbuf, Assembler::REX_R);
3294      } else {
3295        emit_opcode(cbuf, Assembler::REX_RB);
3296      }
3297    }
3298    emit_opcode(cbuf, 0x2B);
3299    emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3300
3301    // sbbl $tmp, $tmp
3302    emit_opcode(cbuf, 0x1B);
3303    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3304
3305    // andl $tmp, $y
3306    if (yenc >= 8) {
3307      emit_opcode(cbuf, Assembler::REX_B);
3308    }
3309    emit_opcode(cbuf, 0x23);
3310    emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3311
3312    // addl $p,$tmp
3313    if (penc >= 8) {
3314        emit_opcode(cbuf, Assembler::REX_R);
3315    }
3316    emit_opcode(cbuf, 0x03);
3317    emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3318  %}
3319
3320  // Compare the lonogs and set -1, 0, or 1 into dst
3321  enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3322  %{
3323    int src1enc = $src1$$reg;
3324    int src2enc = $src2$$reg;
3325    int dstenc = $dst$$reg;
3326
3327    // cmpq $src1, $src2
3328    if (src1enc < 8) {
3329      if (src2enc < 8) {
3330        emit_opcode(cbuf, Assembler::REX_W);
3331      } else {
3332        emit_opcode(cbuf, Assembler::REX_WB);
3333      }
3334    } else {
3335      if (src2enc < 8) {
3336        emit_opcode(cbuf, Assembler::REX_WR);
3337      } else {
3338        emit_opcode(cbuf, Assembler::REX_WRB);
3339      }
3340    }
3341    emit_opcode(cbuf, 0x3B);
3342    emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3343
3344    // movl $dst, -1
3345    if (dstenc >= 8) {
3346      emit_opcode(cbuf, Assembler::REX_B);
3347    }
3348    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3349    emit_d32(cbuf, -1);
3350
3351    // jl,s done
3352    emit_opcode(cbuf, 0x7C);
3353    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3354
3355    // setne $dst
3356    if (dstenc >= 4) {
3357      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3358    }
3359    emit_opcode(cbuf, 0x0F);
3360    emit_opcode(cbuf, 0x95);
3361    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3362
3363    // movzbl $dst, $dst
3364    if (dstenc >= 4) {
3365      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3366    }
3367    emit_opcode(cbuf, 0x0F);
3368    emit_opcode(cbuf, 0xB6);
3369    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3370  %}
3371
3372  enc_class Push_ResultXD(regD dst) %{
3373    int dstenc = $dst$$reg;
3374
3375    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3376
3377    // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3378    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3379    if (dstenc >= 8) {
3380      emit_opcode(cbuf, Assembler::REX_R);
3381    }
3382    emit_opcode  (cbuf, 0x0F );
3383    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3384    encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3385
3386    // add rsp,8
3387    emit_opcode(cbuf, Assembler::REX_W);
3388    emit_opcode(cbuf,0x83);
3389    emit_rm(cbuf,0x3, 0x0, RSP_enc);
3390    emit_d8(cbuf,0x08);
3391  %}
3392
3393  enc_class Push_SrcXD(regD src) %{
3394    int srcenc = $src$$reg;
3395
3396    // subq rsp,#8
3397    emit_opcode(cbuf, Assembler::REX_W);
3398    emit_opcode(cbuf, 0x83);
3399    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3400    emit_d8(cbuf, 0x8);
3401
3402    // movsd [rsp],src
3403    emit_opcode(cbuf, 0xF2);
3404    if (srcenc >= 8) {
3405      emit_opcode(cbuf, Assembler::REX_R);
3406    }
3407    emit_opcode(cbuf, 0x0F);
3408    emit_opcode(cbuf, 0x11);
3409    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3410
3411    // fldd [rsp]
3412    emit_opcode(cbuf, 0x66);
3413    emit_opcode(cbuf, 0xDD);
3414    encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3415  %}
3416
3417
3418  enc_class movq_ld(regD dst, memory mem) %{
3419    MacroAssembler _masm(&cbuf);
3420    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3421    __ movq(as_XMMRegister($dst$$reg), madr);
3422  %}
3423
3424  enc_class movq_st(memory mem, regD src) %{
3425    MacroAssembler _masm(&cbuf);
3426    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3427    __ movq(madr, as_XMMRegister($src$$reg));
3428  %}
3429
3430  enc_class pshufd_8x8(regF dst, regF src) %{
3431    MacroAssembler _masm(&cbuf);
3432
3433    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3434    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3435    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3436  %}
3437
3438  enc_class pshufd_4x16(regF dst, regF src) %{
3439    MacroAssembler _masm(&cbuf);
3440
3441    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3442  %}
3443
3444  enc_class pshufd(regD dst, regD src, int mode) %{
3445    MacroAssembler _masm(&cbuf);
3446
3447    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3448  %}
3449
3450  enc_class pxor(regD dst, regD src) %{
3451    MacroAssembler _masm(&cbuf);
3452
3453    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3454  %}
3455
3456  enc_class mov_i2x(regD dst, rRegI src) %{
3457    MacroAssembler _masm(&cbuf);
3458
3459    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3460  %}
3461
3462  // obj: object to lock
3463  // box: box address (header location) -- killed
3464  // tmp: rax -- killed
3465  // scr: rbx -- killed
3466  //
3467  // What follows is a direct transliteration of fast_lock() and fast_unlock()
3468  // from i486.ad.  See that file for comments.
3469  // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3470  // use the shorter encoding.  (Movl clears the high-order 32-bits).
3471
3472
3473  enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3474  %{
3475    Register objReg = as_Register((int)$obj$$reg);
3476    Register boxReg = as_Register((int)$box$$reg);
3477    Register tmpReg = as_Register($tmp$$reg);
3478    Register scrReg = as_Register($scr$$reg);
3479    MacroAssembler masm(&cbuf);
3480
3481    // Verify uniqueness of register assignments -- necessary but not sufficient
3482    assert (objReg != boxReg && objReg != tmpReg &&
3483            objReg != scrReg && tmpReg != scrReg, "invariant") ;
3484
3485    if (_counters != NULL) {
3486      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3487    }
3488    if (EmitSync & 1) {
3489        masm.movptr (Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
3490        masm.cmpq   (rsp, 0) ;
3491    } else
3492    if (EmitSync & 2) {
3493        Label DONE_LABEL;
3494        if (UseBiasedLocking) {
3495           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3496          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3497        }
3498        masm.movl(tmpReg, 0x1);
3499        masm.orq(tmpReg, Address(objReg, 0));
3500        masm.movq(Address(boxReg, 0), tmpReg);
3501        if (os::is_MP()) {
3502          masm.lock();
3503        }
3504        masm.cmpxchgq(boxReg, Address(objReg, 0)); // Updates tmpReg
3505        masm.jcc(Assembler::equal, DONE_LABEL);
3506
3507        // Recursive locking
3508        masm.subq(tmpReg, rsp);
3509        masm.andq(tmpReg, 7 - os::vm_page_size());
3510        masm.movq(Address(boxReg, 0), tmpReg);
3511
3512        masm.bind(DONE_LABEL);
3513        masm.nop(); // avoid branch to branch
3514    } else {
3515        Label DONE_LABEL, IsInflated, Egress;
3516
3517        masm.movq  (tmpReg, Address(objReg, 0)) ;
3518        masm.testq (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3519        masm.jcc   (Assembler::notZero, IsInflated) ;
3520
3521        // it's stack-locked, biased or neutral
3522        // TODO: optimize markword triage order to reduce the number of
3523        // conditional branches in the most common cases.
3524        // Beware -- there's a subtle invariant that fetch of the markword
3525        // at [FETCH], below, will never observe a biased encoding (*101b).
3526        // If this invariant is not held we'll suffer exclusion (safety) failure.
3527
3528        if (UseBiasedLocking) {
3529          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3530          masm.movq  (tmpReg, Address(objReg, 0)) ;        // [FETCH]
3531        }
3532
3533        masm.orq   (tmpReg, 1) ;
3534        masm.movq  (Address(boxReg, 0), tmpReg) ;
3535        if (os::is_MP()) { masm.lock(); }
3536        masm.cmpxchgq(boxReg, Address(objReg, 0)); // Updates tmpReg
3537        if (_counters != NULL) {
3538           masm.cond_inc32(Assembler::equal,
3539                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3540        }
3541        masm.jcc   (Assembler::equal, DONE_LABEL);
3542
3543        // Recursive locking
3544        masm.subq  (tmpReg, rsp);
3545        masm.andq  (tmpReg, 7 - os::vm_page_size());
3546        masm.movq  (Address(boxReg, 0), tmpReg);
3547        if (_counters != NULL) {
3548           masm.cond_inc32(Assembler::equal,
3549                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3550        }
3551        masm.jmp   (DONE_LABEL) ;
3552
3553        masm.bind  (IsInflated) ;
3554        // It's inflated
3555
3556        // TODO: someday avoid the ST-before-CAS penalty by
3557        // relocating (deferring) the following ST.
3558        // We should also think about trying a CAS without having
3559        // fetched _owner.  If the CAS is successful we may
3560        // avoid an RTO->RTS upgrade on the $line.
3561        masm.movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())) ;
3562
3563        masm.movq  (boxReg, tmpReg) ;
3564        masm.movq  (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3565        masm.testq (tmpReg, tmpReg) ;
3566        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3567
3568        // It's inflated and appears unlocked
3569        if (os::is_MP()) { masm.lock(); }
3570        masm.cmpxchgq(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3571        // Intentional fall-through into DONE_LABEL ...
3572
3573        masm.bind  (DONE_LABEL) ;
3574        masm.nop   () ;                 // avoid jmp to jmp
3575    }
3576  %}
3577
3578  // obj: object to unlock
3579  // box: box address (displaced header location), killed
3580  // RBX: killed tmp; cannot be obj nor box
3581  enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3582  %{
3583
3584    Register objReg = as_Register($obj$$reg);
3585    Register boxReg = as_Register($box$$reg);
3586    Register tmpReg = as_Register($tmp$$reg);
3587    MacroAssembler masm(&cbuf);
3588
3589    if (EmitSync & 4) {
3590       masm.cmpq  (rsp, 0) ;
3591    } else
3592    if (EmitSync & 8) {
3593       Label DONE_LABEL;
3594       if (UseBiasedLocking) {
3595         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3596       }
3597
3598       // Check whether the displaced header is 0
3599       //(=> recursive unlock)
3600       masm.movq(tmpReg, Address(boxReg, 0));
3601       masm.testq(tmpReg, tmpReg);
3602       masm.jcc(Assembler::zero, DONE_LABEL);
3603
3604       // If not recursive lock, reset the header to displaced header
3605       if (os::is_MP()) {
3606         masm.lock();
3607       }
3608       masm.cmpxchgq(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3609       masm.bind(DONE_LABEL);
3610       masm.nop(); // avoid branch to branch
3611    } else {
3612       Label DONE_LABEL, Stacked, CheckSucc ;
3613
3614       if (UseBiasedLocking) {
3615         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3616       }
3617
3618       masm.movq  (tmpReg, Address(objReg, 0)) ;
3619       masm.cmpq  (Address(boxReg, 0), (int)NULL_WORD) ;
3620       masm.jcc   (Assembler::zero, DONE_LABEL) ;
3621       masm.testq (tmpReg, 0x02) ;
3622       masm.jcc   (Assembler::zero, Stacked) ;
3623
3624       // It's inflated
3625       masm.movq  (boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3626       masm.xorq  (boxReg, r15_thread) ;
3627       masm.orq   (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3628       masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3629       masm.movq  (boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3630       masm.orq   (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3631       masm.jcc   (Assembler::notZero, CheckSucc) ;
3632       masm.mov64 (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int)NULL_WORD) ;
3633       masm.jmp   (DONE_LABEL) ;
3634
3635       if ((EmitSync & 65536) == 0) {
3636         Label LSuccess, LGoSlowPath ;
3637         masm.bind  (CheckSucc) ;
3638         masm.cmpq  (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int)NULL_WORD) ;
3639         masm.jcc   (Assembler::zero, LGoSlowPath) ;
3640
3641         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3642         // the explicit ST;MEMBAR combination, but masm doesn't currently support
3643         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3644         // are all faster when the write buffer is populated.
3645         masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int)NULL_WORD) ;
3646         if (os::is_MP()) {
3647            masm.lock () ; masm.addq (Address(rsp, 0), 0) ;
3648         }
3649         masm.cmpq  (Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int)NULL_WORD) ;
3650         masm.jcc   (Assembler::notZero, LSuccess) ;
3651
3652         masm.movptr (boxReg, (int)NULL_WORD) ;                   // box is really EAX
3653         if (os::is_MP()) { masm.lock(); }
3654         masm.cmpxchgq (r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3655         masm.jcc   (Assembler::notEqual, LSuccess) ;
3656         // Intentional fall-through into slow-path
3657
3658         masm.bind  (LGoSlowPath) ;
3659         masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3660         masm.jmp   (DONE_LABEL) ;
3661
3662         masm.bind  (LSuccess) ;
3663         masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3664         masm.jmp   (DONE_LABEL) ;
3665       }
3666
3667       masm.bind  (Stacked) ;
3668       masm.movq  (tmpReg, Address (boxReg, 0)) ;      // re-fetch
3669       if (os::is_MP()) { masm.lock(); }
3670       masm.cmpxchgq(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3671
3672       if (EmitSync & 65536) {
3673          masm.bind (CheckSucc) ;
3674       }
3675       masm.bind(DONE_LABEL);
3676       if (EmitSync & 32768) {
3677          masm.nop();                      // avoid branch to branch
3678       }
3679    }
3680  %}
3681
3682  enc_class enc_String_Compare()
3683  %{
3684    Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3685          POP_LABEL, DONE_LABEL, CONT_LABEL,
3686          WHILE_HEAD_LABEL;
3687    MacroAssembler masm(&cbuf);
3688
3689    // Get the first character position in both strings
3690    //         [8] char array, [12] offset, [16] count
3691    int value_offset  = java_lang_String::value_offset_in_bytes();
3692    int offset_offset = java_lang_String::offset_offset_in_bytes();
3693    int count_offset  = java_lang_String::count_offset_in_bytes();
3694    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3695
3696    masm.movq(rax, Address(rsi, value_offset));
3697    masm.movl(rcx, Address(rsi, offset_offset));
3698    masm.leaq(rax, Address(rax, rcx, Address::times_2, base_offset));
3699    masm.movq(rbx, Address(rdi, value_offset));
3700    masm.movl(rcx, Address(rdi, offset_offset));
3701    masm.leaq(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3702
3703    // Compute the minimum of the string lengths(rsi) and the
3704    // difference of the string lengths (stack)
3705
3706    masm.movl(rdi, Address(rdi, count_offset));
3707    masm.movl(rsi, Address(rsi, count_offset));
3708    masm.movl(rcx, rdi);
3709    masm.subl(rdi, rsi);
3710    masm.pushq(rdi);
3711    masm.cmovl(Assembler::lessEqual, rsi, rcx);
3712
3713    // Is the minimum length zero?
3714    masm.bind(RCX_GOOD_LABEL);
3715    masm.testl(rsi, rsi);
3716    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3717
3718    // Load first characters
3719    masm.load_unsigned_word(rcx, Address(rbx, 0));
3720    masm.load_unsigned_word(rdi, Address(rax, 0));
3721
3722    // Compare first characters
3723    masm.subl(rcx, rdi);
3724    masm.jcc(Assembler::notZero,  POP_LABEL);
3725    masm.decrementl(rsi);
3726    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3727
3728    {
3729      // Check after comparing first character to see if strings are equivalent
3730      Label LSkip2;
3731      // Check if the strings start at same location
3732      masm.cmpq(rbx, rax);
3733      masm.jcc(Assembler::notEqual, LSkip2);
3734
3735      // Check if the length difference is zero (from stack)
3736      masm.cmpl(Address(rsp, 0), 0x0);
3737      masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3738
3739      // Strings might not be equivalent
3740      masm.bind(LSkip2);
3741    }
3742
3743    // Shift RAX and RBX to the end of the arrays, negate min
3744    masm.leaq(rax, Address(rax, rsi, Address::times_2, 2));
3745    masm.leaq(rbx, Address(rbx, rsi, Address::times_2, 2));
3746    masm.negq(rsi);
3747
3748    // Compare the rest of the characters
3749    masm.bind(WHILE_HEAD_LABEL);
3750    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
3751    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
3752    masm.subl(rcx, rdi);
3753    masm.jcc(Assembler::notZero, POP_LABEL);
3754    masm.incrementq(rsi);
3755    masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3756
3757    // Strings are equal up to min length.  Return the length difference.
3758    masm.bind(LENGTH_DIFF_LABEL);
3759    masm.popq(rcx);
3760    masm.jmp(DONE_LABEL);
3761
3762    // Discard the stored length difference
3763    masm.bind(POP_LABEL);
3764    masm.addq(rsp, 8);
3765
3766    // That's it
3767    masm.bind(DONE_LABEL);
3768  %}
3769
3770  enc_class enc_rethrow()
3771  %{
3772    cbuf.set_inst_mark();
3773    emit_opcode(cbuf, 0xE9); // jmp entry
3774    emit_d32_reloc(cbuf,
3775                   (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3776                   runtime_call_Relocation::spec(),
3777                   RELOC_DISP32);
3778  %}
3779
3780  enc_class absF_encoding(regF dst)
3781  %{
3782    int dstenc = $dst$$reg;
3783    address signmask_address = (address) StubRoutines::amd64::float_sign_mask();
3784
3785    cbuf.set_inst_mark();
3786    if (dstenc >= 8) {
3787      emit_opcode(cbuf, Assembler::REX_R);
3788      dstenc -= 8;
3789    }
3790    // XXX reg_mem doesn't support RIP-relative addressing yet
3791    emit_opcode(cbuf, 0x0F);
3792    emit_opcode(cbuf, 0x54);
3793    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3794    emit_d32_reloc(cbuf, signmask_address);
3795  %}
3796
3797  enc_class absD_encoding(regD dst)
3798  %{
3799    int dstenc = $dst$$reg;
3800    address signmask_address = (address) StubRoutines::amd64::double_sign_mask();
3801
3802    cbuf.set_inst_mark();
3803    emit_opcode(cbuf, 0x66);
3804    if (dstenc >= 8) {
3805      emit_opcode(cbuf, Assembler::REX_R);
3806      dstenc -= 8;
3807    }
3808    // XXX reg_mem doesn't support RIP-relative addressing yet
3809    emit_opcode(cbuf, 0x0F);
3810    emit_opcode(cbuf, 0x54);
3811    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3812    emit_d32_reloc(cbuf, signmask_address);
3813  %}
3814
3815  enc_class negF_encoding(regF dst)
3816  %{
3817    int dstenc = $dst$$reg;
3818    address signflip_address = (address) StubRoutines::amd64::float_sign_flip();
3819
3820    cbuf.set_inst_mark();
3821    if (dstenc >= 8) {
3822      emit_opcode(cbuf, Assembler::REX_R);
3823      dstenc -= 8;
3824    }
3825    // XXX reg_mem doesn't support RIP-relative addressing yet
3826    emit_opcode(cbuf, 0x0F);
3827    emit_opcode(cbuf, 0x57);
3828    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3829    emit_d32_reloc(cbuf, signflip_address);
3830  %}
3831
3832  enc_class negD_encoding(regD dst)
3833  %{
3834    int dstenc = $dst$$reg;
3835    address signflip_address = (address) StubRoutines::amd64::double_sign_flip();
3836
3837    cbuf.set_inst_mark();
3838    emit_opcode(cbuf, 0x66);
3839    if (dstenc >= 8) {
3840      emit_opcode(cbuf, Assembler::REX_R);
3841      dstenc -= 8;
3842    }
3843    // XXX reg_mem doesn't support RIP-relative addressing yet
3844    emit_opcode(cbuf, 0x0F);
3845    emit_opcode(cbuf, 0x57);
3846    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3847    emit_d32_reloc(cbuf, signflip_address);
3848  %}
3849
3850  enc_class f2i_fixup(rRegI dst, regF src)
3851  %{
3852    int dstenc = $dst$$reg;
3853    int srcenc = $src$$reg;
3854
3855    // cmpl $dst, #0x80000000
3856    if (dstenc >= 8) {
3857      emit_opcode(cbuf, Assembler::REX_B);
3858    }
3859    emit_opcode(cbuf, 0x81);
3860    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3861    emit_d32(cbuf, 0x80000000);
3862
3863    // jne,s done
3864    emit_opcode(cbuf, 0x75);
3865    if (srcenc < 8 && dstenc < 8) {
3866      emit_d8(cbuf, 0xF);
3867    } else if (srcenc >= 8 && dstenc >= 8) {
3868      emit_d8(cbuf, 0x11);
3869    } else {
3870      emit_d8(cbuf, 0x10);
3871    }
3872
3873    // subq rsp, #8
3874    emit_opcode(cbuf, Assembler::REX_W);
3875    emit_opcode(cbuf, 0x83);
3876    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3877    emit_d8(cbuf, 8);
3878
3879    // movss [rsp], $src
3880    emit_opcode(cbuf, 0xF3);
3881    if (srcenc >= 8) {
3882      emit_opcode(cbuf, Assembler::REX_R);
3883    }
3884    emit_opcode(cbuf, 0x0F);
3885    emit_opcode(cbuf, 0x11);
3886    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3887
3888    // call f2i_fixup
3889    cbuf.set_inst_mark();
3890    emit_opcode(cbuf, 0xE8);
3891    emit_d32_reloc(cbuf,
3892                   (int)
3893                   (StubRoutines::amd64::f2i_fixup() - cbuf.code_end() - 4),
3894                   runtime_call_Relocation::spec(),
3895                   RELOC_DISP32);
3896
3897    // popq $dst
3898    if (dstenc >= 8) {
3899      emit_opcode(cbuf, Assembler::REX_B);
3900    }
3901    emit_opcode(cbuf, 0x58 | (dstenc & 7));
3902
3903    // done:
3904  %}
3905
3906  enc_class f2l_fixup(rRegL dst, regF src)
3907  %{
3908    int dstenc = $dst$$reg;
3909    int srcenc = $src$$reg;
3910    address const_address = (address) StubRoutines::amd64::double_sign_flip();
3911
3912    // cmpq $dst, [0x8000000000000000]
3913    cbuf.set_inst_mark();
3914    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3915    emit_opcode(cbuf, 0x39);
3916    // XXX reg_mem doesn't support RIP-relative addressing yet
3917    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3918    emit_d32_reloc(cbuf, const_address);
3919
3920
3921    // jne,s done
3922    emit_opcode(cbuf, 0x75);
3923    if (srcenc < 8 && dstenc < 8) {
3924      emit_d8(cbuf, 0xF);
3925    } else if (srcenc >= 8 && dstenc >= 8) {
3926      emit_d8(cbuf, 0x11);
3927    } else {
3928      emit_d8(cbuf, 0x10);
3929    }
3930
3931    // subq rsp, #8
3932    emit_opcode(cbuf, Assembler::REX_W);
3933    emit_opcode(cbuf, 0x83);
3934    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3935    emit_d8(cbuf, 8);
3936
3937    // movss [rsp], $src
3938    emit_opcode(cbuf, 0xF3);
3939    if (srcenc >= 8) {
3940      emit_opcode(cbuf, Assembler::REX_R);
3941    }
3942    emit_opcode(cbuf, 0x0F);
3943    emit_opcode(cbuf, 0x11);
3944    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3945
3946    // call f2l_fixup
3947    cbuf.set_inst_mark();
3948    emit_opcode(cbuf, 0xE8);
3949    emit_d32_reloc(cbuf,
3950                   (int)
3951                   (StubRoutines::amd64::f2l_fixup() - cbuf.code_end() - 4),
3952                   runtime_call_Relocation::spec(),
3953                   RELOC_DISP32);
3954
3955    // popq $dst
3956    if (dstenc >= 8) {
3957      emit_opcode(cbuf, Assembler::REX_B);
3958    }
3959    emit_opcode(cbuf, 0x58 | (dstenc & 7));
3960
3961    // done:
3962  %}
3963
3964  enc_class d2i_fixup(rRegI dst, regD src)
3965  %{
3966    int dstenc = $dst$$reg;
3967    int srcenc = $src$$reg;
3968
3969    // cmpl $dst, #0x80000000
3970    if (dstenc >= 8) {
3971      emit_opcode(cbuf, Assembler::REX_B);
3972    }
3973    emit_opcode(cbuf, 0x81);
3974    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3975    emit_d32(cbuf, 0x80000000);
3976
3977    // jne,s done
3978    emit_opcode(cbuf, 0x75);
3979    if (srcenc < 8 && dstenc < 8) {
3980      emit_d8(cbuf, 0xF);
3981    } else if (srcenc >= 8 && dstenc >= 8) {
3982      emit_d8(cbuf, 0x11);
3983    } else {
3984      emit_d8(cbuf, 0x10);
3985    }
3986
3987    // subq rsp, #8
3988    emit_opcode(cbuf, Assembler::REX_W);
3989    emit_opcode(cbuf, 0x83);
3990    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3991    emit_d8(cbuf, 8);
3992
3993    // movsd [rsp], $src
3994    emit_opcode(cbuf, 0xF2);
3995    if (srcenc >= 8) {
3996      emit_opcode(cbuf, Assembler::REX_R);
3997    }
3998    emit_opcode(cbuf, 0x0F);
3999    emit_opcode(cbuf, 0x11);
4000    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4001
4002    // call d2i_fixup
4003    cbuf.set_inst_mark();
4004    emit_opcode(cbuf, 0xE8);
4005    emit_d32_reloc(cbuf,
4006                   (int)
4007                   (StubRoutines::amd64::d2i_fixup() - cbuf.code_end() - 4),
4008                   runtime_call_Relocation::spec(),
4009                   RELOC_DISP32);
4010
4011    // popq $dst
4012    if (dstenc >= 8) {
4013      emit_opcode(cbuf, Assembler::REX_B);
4014    }
4015    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4016
4017    // done:
4018  %}
4019
4020  enc_class d2l_fixup(rRegL dst, regD src)
4021  %{
4022    int dstenc = $dst$$reg;
4023    int srcenc = $src$$reg;
4024    address const_address = (address) StubRoutines::amd64::double_sign_flip();
4025
4026    // cmpq $dst, [0x8000000000000000]
4027    cbuf.set_inst_mark();
4028    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4029    emit_opcode(cbuf, 0x39);
4030    // XXX reg_mem doesn't support RIP-relative addressing yet
4031    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4032    emit_d32_reloc(cbuf, const_address);
4033
4034
4035    // jne,s done
4036    emit_opcode(cbuf, 0x75);
4037    if (srcenc < 8 && dstenc < 8) {
4038      emit_d8(cbuf, 0xF);
4039    } else if (srcenc >= 8 && dstenc >= 8) {
4040      emit_d8(cbuf, 0x11);
4041    } else {
4042      emit_d8(cbuf, 0x10);
4043    }
4044
4045    // subq rsp, #8
4046    emit_opcode(cbuf, Assembler::REX_W);
4047    emit_opcode(cbuf, 0x83);
4048    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4049    emit_d8(cbuf, 8);
4050
4051    // movsd [rsp], $src
4052    emit_opcode(cbuf, 0xF2);
4053    if (srcenc >= 8) {
4054      emit_opcode(cbuf, Assembler::REX_R);
4055    }
4056    emit_opcode(cbuf, 0x0F);
4057    emit_opcode(cbuf, 0x11);
4058    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4059
4060    // call d2l_fixup
4061    cbuf.set_inst_mark();
4062    emit_opcode(cbuf, 0xE8);
4063    emit_d32_reloc(cbuf,
4064                   (int)
4065                   (StubRoutines::amd64::d2l_fixup() - cbuf.code_end() - 4),
4066                   runtime_call_Relocation::spec(),
4067                   RELOC_DISP32);
4068
4069    // popq $dst
4070    if (dstenc >= 8) {
4071      emit_opcode(cbuf, Assembler::REX_B);
4072    }
4073    emit_opcode(cbuf, 0x58 | (dstenc & 7));
4074
4075    // done:
4076  %}
4077
4078  enc_class enc_membar_acquire
4079  %{
4080    // [jk] not needed currently, if you enable this and it really
4081    // emits code don't forget to the remove the "size(0)" line in
4082    // membar_acquire()
4083    // MacroAssembler masm(&cbuf);
4084    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4085    //                                         Assembler::LoadLoad));
4086  %}
4087
4088  enc_class enc_membar_release
4089  %{
4090    // [jk] not needed currently, if you enable this and it really
4091    // emits code don't forget to the remove the "size(0)" line in
4092    // membar_release()
4093    // MacroAssembler masm(&cbuf);
4094    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
4095    //                                         Assembler::StoreStore));
4096  %}
4097
4098  enc_class enc_membar_volatile
4099  %{
4100    MacroAssembler masm(&cbuf);
4101    masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
4102                                            Assembler::StoreStore));
4103  %}
4104
4105  // Safepoint Poll.  This polls the safepoint page, and causes an
4106  // exception if it is not readable. Unfortunately, it kills
4107  // RFLAGS in the process.
4108  enc_class enc_safepoint_poll
4109  %{
4110    // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4111    // XXX reg_mem doesn't support RIP-relative addressing yet
4112    cbuf.set_inst_mark();
4113    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4114    emit_opcode(cbuf, 0x85); // testl
4115    emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4116    // cbuf.inst_mark() is beginning of instruction
4117    emit_d32_reloc(cbuf, os::get_polling_page());
4118//                    relocInfo::poll_type,
4119  %}
4120%}
4121
4122
4123//----------FRAME--------------------------------------------------------------
4124// Definition of frame structure and management information.
4125//
4126//  S T A C K   L A Y O U T    Allocators stack-slot number
4127//                             |   (to get allocators register number
4128//  G  Owned by    |        |  v    add OptoReg::stack0())
4129//  r   CALLER     |        |
4130//  o     |        +--------+      pad to even-align allocators stack-slot
4131//  w     V        |  pad0  |        numbers; owned by CALLER
4132//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4133//  h     ^        |   in   |  5
4134//        |        |  args  |  4   Holes in incoming args owned by SELF
4135//  |     |        |        |  3
4136//  |     |        +--------+
4137//  V     |        | old out|      Empty on Intel, window on Sparc
4138//        |    old |preserve|      Must be even aligned.
4139//        |     SP-+--------+----> Matcher::_old_SP, even aligned
4140//        |        |   in   |  3   area for Intel ret address
4141//     Owned by    |preserve|      Empty on Sparc.
4142//       SELF      +--------+
4143//        |        |  pad2  |  2   pad to align old SP
4144//        |        +--------+  1
4145//        |        | locks  |  0
4146//        |        +--------+----> OptoReg::stack0(), even aligned
4147//        |        |  pad1  | 11   pad to align new SP
4148//        |        +--------+
4149//        |        |        | 10
4150//        |        | spills |  9   spills
4151//        V        |        |  8   (pad0 slot for callee)
4152//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4153//        ^        |  out   |  7
4154//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4155//     Owned by    +--------+
4156//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4157//        |    new |preserve|      Must be even-aligned.
4158//        |     SP-+--------+----> Matcher::_new_SP, even aligned
4159//        |        |        |
4160//
4161// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4162//         known from SELF's arguments and the Java calling convention.
4163//         Region 6-7 is determined per call site.
4164// Note 2: If the calling convention leaves holes in the incoming argument
4165//         area, those holes are owned by SELF.  Holes in the outgoing area
4166//         are owned by the CALLEE.  Holes should not be nessecary in the
4167//         incoming area, as the Java calling convention is completely under
4168//         the control of the AD file.  Doubles can be sorted and packed to
4169//         avoid holes.  Holes in the outgoing arguments may be nessecary for
4170//         varargs C calling conventions.
4171// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4172//         even aligned with pad0 as needed.
4173//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4174//         region 6-11 is even aligned; it may be padded out more so that
4175//         the region from SP to FP meets the minimum stack alignment.
4176// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4177//         alignment.  Region 11, pad1, may be dynamically extended so that
4178//         SP meets the minimum alignment.
4179
4180frame
4181%{
4182  // What direction does stack grow in (assumed to be same for C & Java)
4183  stack_direction(TOWARDS_LOW);
4184
4185  // These three registers define part of the calling convention
4186  // between compiled code and the interpreter.
4187  inline_cache_reg(RAX);                // Inline Cache Register
4188  interpreter_method_oop_reg(RBX);      // Method Oop Register when
4189                                        // calling interpreter
4190
4191  // Optional: name the operand used by cisc-spilling to access
4192  // [stack_pointer + offset]
4193  cisc_spilling_operand_name(indOffset32);
4194
4195  // Number of stack slots consumed by locking an object
4196  sync_stack_slots(2);
4197
4198  // Compiled code's Frame Pointer
4199  frame_pointer(RSP);
4200
4201  // Interpreter stores its frame pointer in a register which is
4202  // stored to the stack by I2CAdaptors.
4203  // I2CAdaptors convert from interpreted java to compiled java.
4204  interpreter_frame_pointer(RBP);
4205
4206  // Stack alignment requirement
4207  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4208
4209  // Number of stack slots between incoming argument block and the start of
4210  // a new frame.  The PROLOG must add this many slots to the stack.  The
4211  // EPILOG must remove this many slots.  amd64 needs two slots for
4212  // return address.
4213  in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4214
4215  // Number of outgoing stack slots killed above the out_preserve_stack_slots
4216  // for calls to C.  Supports the var-args backing area for register parms.
4217  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4218
4219  // The after-PROLOG location of the return address.  Location of
4220  // return address specifies a type (REG or STACK) and a number
4221  // representing the register number (i.e. - use a register name) or
4222  // stack slot.
4223  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4224  // Otherwise, it is above the locks and verification slot and alignment word
4225  return_addr(STACK - 2 +
4226              round_to(2 + 2 * VerifyStackAtCalls +
4227                       Compile::current()->fixed_slots(),
4228                       WordsPerLong * 2));
4229
4230  // Body of function which returns an integer array locating
4231  // arguments either in registers or in stack slots.  Passed an array
4232  // of ideal registers called "sig" and a "length" count.  Stack-slot
4233  // offsets are based on outgoing arguments, i.e. a CALLER setting up
4234  // arguments for a CALLEE.  Incoming stack arguments are
4235  // automatically biased by the preserve_stack_slots field above.
4236
4237  calling_convention
4238  %{
4239    // No difference between ingoing/outgoing just pass false
4240    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4241  %}
4242
4243  c_calling_convention
4244  %{
4245    // This is obviously always outgoing
4246    (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4247  %}
4248
4249  // Location of compiled Java return values.  Same as C for now.
4250  return_value
4251  %{
4252    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4253           "only return normal values");
4254
4255    static const int lo[Op_RegL + 1] = {
4256      0,
4257      0,
4258      RAX_num,  // Op_RegI
4259      RAX_num,  // Op_RegP
4260      XMM0_num, // Op_RegF
4261      XMM0_num, // Op_RegD
4262      RAX_num   // Op_RegL
4263    };
4264    static const int hi[Op_RegL + 1] = {
4265      0,
4266      0,
4267      OptoReg::Bad, // Op_RegI
4268      RAX_H_num,    // Op_RegP
4269      OptoReg::Bad, // Op_RegF
4270      XMM0_H_num,   // Op_RegD
4271      RAX_H_num     // Op_RegL
4272    };
4273
4274    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4275  %}
4276%}
4277
4278//----------ATTRIBUTES---------------------------------------------------------
4279//----------Operand Attributes-------------------------------------------------
4280op_attrib op_cost(0);        // Required cost attribute
4281
4282//----------Instruction Attributes---------------------------------------------
4283ins_attrib ins_cost(100);       // Required cost attribute
4284ins_attrib ins_size(8);         // Required size attribute (in bits)
4285ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4286ins_attrib ins_short_branch(0); // Required flag: is this instruction
4287                                // a non-matching short branch variant
4288                                // of some long branch?
4289ins_attrib ins_alignment(1);    // Required alignment attribute (must
4290                                // be a power of 2) specifies the
4291                                // alignment that some part of the
4292                                // instruction (not necessarily the
4293                                // start) requires.  If > 1, a
4294                                // compute_padding() function must be
4295                                // provided for the instruction
4296
4297//----------OPERANDS-----------------------------------------------------------
4298// Operand definitions must precede instruction definitions for correct parsing
4299// in the ADLC because operands constitute user defined types which are used in
4300// instruction definitions.
4301
4302//----------Simple Operands----------------------------------------------------
4303// Immediate Operands
4304// Integer Immediate
4305operand immI()
4306%{
4307  match(ConI);
4308
4309  op_cost(10);
4310  format %{ %}
4311  interface(CONST_INTER);
4312%}
4313
4314// Constant for test vs zero
4315operand immI0()
4316%{
4317  predicate(n->get_int() == 0);
4318  match(ConI);
4319
4320  op_cost(0);
4321  format %{ %}
4322  interface(CONST_INTER);
4323%}
4324
4325// Constant for increment
4326operand immI1()
4327%{
4328  predicate(n->get_int() == 1);
4329  match(ConI);
4330
4331  op_cost(0);
4332  format %{ %}
4333  interface(CONST_INTER);
4334%}
4335
4336// Constant for decrement
4337operand immI_M1()
4338%{
4339  predicate(n->get_int() == -1);
4340  match(ConI);
4341
4342  op_cost(0);
4343  format %{ %}
4344  interface(CONST_INTER);
4345%}
4346
4347// Valid scale values for addressing modes
4348operand immI2()
4349%{
4350  predicate(0 <= n->get_int() && (n->get_int() <= 3));
4351  match(ConI);
4352
4353  format %{ %}
4354  interface(CONST_INTER);
4355%}
4356
4357operand immI8()
4358%{
4359  predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4360  match(ConI);
4361
4362  op_cost(5);
4363  format %{ %}
4364  interface(CONST_INTER);
4365%}
4366
4367operand immI16()
4368%{
4369  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4370  match(ConI);
4371
4372  op_cost(10);
4373  format %{ %}
4374  interface(CONST_INTER);
4375%}
4376
4377// Constant for long shifts
4378operand immI_32()
4379%{
4380  predicate( n->get_int() == 32 );
4381  match(ConI);
4382
4383  op_cost(0);
4384  format %{ %}
4385  interface(CONST_INTER);
4386%}
4387
4388// Constant for long shifts
4389operand immI_64()
4390%{
4391  predicate( n->get_int() == 64 );
4392  match(ConI);
4393
4394  op_cost(0);
4395  format %{ %}
4396  interface(CONST_INTER);
4397%}
4398
4399// Pointer Immediate
4400operand immP()
4401%{
4402  match(ConP);
4403
4404  op_cost(10);
4405  format %{ %}
4406  interface(CONST_INTER);
4407%}
4408
4409// NULL Pointer Immediate
4410operand immP0()
4411%{
4412  predicate(n->get_ptr() == 0);
4413  match(ConP);
4414
4415  op_cost(5);
4416  format %{ %}
4417  interface(CONST_INTER);
4418%}
4419
4420// Unsigned 31-bit Pointer Immediate
4421// Can be used in both 32-bit signed and 32-bit unsigned insns.
4422// Works for nulls and markOops; not for relocatable (oop) pointers.
4423operand immP31()
4424%{
4425  predicate(!n->as_Type()->type()->isa_oopptr()
4426            && (n->get_ptr() >> 31) == 0);
4427  match(ConP);
4428
4429  op_cost(5);
4430  format %{ %}
4431  interface(CONST_INTER);
4432%}
4433
4434// Long Immediate
4435operand immL()
4436%{
4437  match(ConL);
4438
4439  op_cost(20);
4440  format %{ %}
4441  interface(CONST_INTER);
4442%}
4443
4444// Long Immediate 8-bit
4445operand immL8()
4446%{
4447  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4448  match(ConL);
4449
4450  op_cost(5);
4451  format %{ %}
4452  interface(CONST_INTER);
4453%}
4454
4455// Long Immediate 32-bit unsigned
4456operand immUL32()
4457%{
4458  predicate(n->get_long() == (unsigned int) (n->get_long()));
4459  match(ConL);
4460
4461  op_cost(10);
4462  format %{ %}
4463  interface(CONST_INTER);
4464%}
4465
4466// Long Immediate 32-bit signed
4467operand immL32()
4468%{
4469  predicate(n->get_long() == (int) (n->get_long()));
4470  match(ConL);
4471
4472  op_cost(15);
4473  format %{ %}
4474  interface(CONST_INTER);
4475%}
4476
4477// Long Immediate zero
4478operand immL0()
4479%{
4480  predicate(n->get_long() == 0L);
4481  match(ConL);
4482
4483  op_cost(10);
4484  format %{ %}
4485  interface(CONST_INTER);
4486%}
4487
4488// Constant for increment
4489operand immL1()
4490%{
4491  predicate(n->get_long() == 1);
4492  match(ConL);
4493
4494  format %{ %}
4495  interface(CONST_INTER);
4496%}
4497
4498// Constant for decrement
4499operand immL_M1()
4500%{
4501  predicate(n->get_long() == -1);
4502  match(ConL);
4503
4504  format %{ %}
4505  interface(CONST_INTER);
4506%}
4507
4508// Long Immediate: the value 10
4509operand immL10()
4510%{
4511  predicate(n->get_long() == 10);
4512  match(ConL);
4513
4514  format %{ %}
4515  interface(CONST_INTER);
4516%}
4517
4518// Long immediate from 0 to 127.
4519// Used for a shorter form of long mul by 10.
4520operand immL_127()
4521%{
4522  predicate(0 <= n->get_long() && n->get_long() < 0x80);
4523  match(ConL);
4524
4525  op_cost(10);
4526  format %{ %}
4527  interface(CONST_INTER);
4528%}
4529
4530// Long Immediate: low 32-bit mask
4531operand immL_32bits()
4532%{
4533  predicate(n->get_long() == 0xFFFFFFFFL);
4534  match(ConL);
4535  op_cost(20);
4536
4537  format %{ %}
4538  interface(CONST_INTER);
4539%}
4540
4541// Float Immediate zero
4542operand immF0()
4543%{
4544  predicate(jint_cast(n->getf()) == 0);
4545  match(ConF);
4546
4547  op_cost(5);
4548  format %{ %}
4549  interface(CONST_INTER);
4550%}
4551
4552// Float Immediate
4553operand immF()
4554%{
4555  match(ConF);
4556
4557  op_cost(15);
4558  format %{ %}
4559  interface(CONST_INTER);
4560%}
4561
4562// Double Immediate zero
4563operand immD0()
4564%{
4565  predicate(jlong_cast(n->getd()) == 0);
4566  match(ConD);
4567
4568  op_cost(5);
4569  format %{ %}
4570  interface(CONST_INTER);
4571%}
4572
4573// Double Immediate
4574operand immD()
4575%{
4576  match(ConD);
4577
4578  op_cost(15);
4579  format %{ %}
4580  interface(CONST_INTER);
4581%}
4582
4583// Immediates for special shifts (sign extend)
4584
4585// Constants for increment
4586operand immI_16()
4587%{
4588  predicate(n->get_int() == 16);
4589  match(ConI);
4590
4591  format %{ %}
4592  interface(CONST_INTER);
4593%}
4594
4595operand immI_24()
4596%{
4597  predicate(n->get_int() == 24);
4598  match(ConI);
4599
4600  format %{ %}
4601  interface(CONST_INTER);
4602%}
4603
4604// Constant for byte-wide masking
4605operand immI_255()
4606%{
4607  predicate(n->get_int() == 255);
4608  match(ConI);
4609
4610  format %{ %}
4611  interface(CONST_INTER);
4612%}
4613
4614// Constant for short-wide masking
4615operand immI_65535()
4616%{
4617  predicate(n->get_int() == 65535);
4618  match(ConI);
4619
4620  format %{ %}
4621  interface(CONST_INTER);
4622%}
4623
4624// Constant for byte-wide masking
4625operand immL_255()
4626%{
4627  predicate(n->get_long() == 255);
4628  match(ConL);
4629
4630  format %{ %}
4631  interface(CONST_INTER);
4632%}
4633
4634// Constant for short-wide masking
4635operand immL_65535()
4636%{
4637  predicate(n->get_long() == 65535);
4638  match(ConL);
4639
4640  format %{ %}
4641  interface(CONST_INTER);
4642%}
4643
4644// Register Operands
4645// Integer Register
4646operand rRegI()
4647%{
4648  constraint(ALLOC_IN_RC(int_reg));
4649  match(RegI);
4650
4651  match(rax_RegI);
4652  match(rbx_RegI);
4653  match(rcx_RegI);
4654  match(rdx_RegI);
4655  match(rdi_RegI);
4656
4657  format %{ %}
4658  interface(REG_INTER);
4659%}
4660
4661// Special Registers
4662operand rax_RegI()
4663%{
4664  constraint(ALLOC_IN_RC(int_rax_reg));
4665  match(RegI);
4666  match(rRegI);
4667
4668  format %{ "RAX" %}
4669  interface(REG_INTER);
4670%}
4671
4672// Special Registers
4673operand rbx_RegI()
4674%{
4675  constraint(ALLOC_IN_RC(int_rbx_reg));
4676  match(RegI);
4677  match(rRegI);
4678
4679  format %{ "RBX" %}
4680  interface(REG_INTER);
4681%}
4682
4683operand rcx_RegI()
4684%{
4685  constraint(ALLOC_IN_RC(int_rcx_reg));
4686  match(RegI);
4687  match(rRegI);
4688
4689  format %{ "RCX" %}
4690  interface(REG_INTER);
4691%}
4692
4693operand rdx_RegI()
4694%{
4695  constraint(ALLOC_IN_RC(int_rdx_reg));
4696  match(RegI);
4697  match(rRegI);
4698
4699  format %{ "RDX" %}
4700  interface(REG_INTER);
4701%}
4702
4703operand rdi_RegI()
4704%{
4705  constraint(ALLOC_IN_RC(int_rdi_reg));
4706  match(RegI);
4707  match(rRegI);
4708
4709  format %{ "RDI" %}
4710  interface(REG_INTER);
4711%}
4712
4713operand no_rcx_RegI()
4714%{
4715  constraint(ALLOC_IN_RC(int_no_rcx_reg));
4716  match(RegI);
4717  match(rax_RegI);
4718  match(rbx_RegI);
4719  match(rdx_RegI);
4720  match(rdi_RegI);
4721
4722  format %{ %}
4723  interface(REG_INTER);
4724%}
4725
4726operand no_rax_rdx_RegI()
4727%{
4728  constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4729  match(RegI);
4730  match(rbx_RegI);
4731  match(rcx_RegI);
4732  match(rdi_RegI);
4733
4734  format %{ %}
4735  interface(REG_INTER);
4736%}
4737
4738// Pointer Register
4739operand any_RegP()
4740%{
4741  constraint(ALLOC_IN_RC(any_reg));
4742  match(RegP);
4743  match(rax_RegP);
4744  match(rbx_RegP);
4745  match(rdi_RegP);
4746  match(rsi_RegP);
4747  match(rbp_RegP);
4748  match(r15_RegP);
4749  match(rRegP);
4750
4751  format %{ %}
4752  interface(REG_INTER);
4753%}
4754
4755operand rRegP()
4756%{
4757  constraint(ALLOC_IN_RC(ptr_reg));
4758  match(RegP);
4759  match(rax_RegP);
4760  match(rbx_RegP);
4761  match(rdi_RegP);
4762  match(rsi_RegP);
4763  match(rbp_RegP);
4764  match(r15_RegP);  // See Q&A below about r15_RegP.
4765
4766  format %{ %}
4767  interface(REG_INTER);
4768%}
4769
4770// Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4771// Answer: Operand match rules govern the DFA as it processes instruction inputs.
4772// It's fine for an instruction input which expects rRegP to match a r15_RegP.
4773// The output of an instruction is controlled by the allocator, which respects
4774// register class masks, not match rules.  Unless an instruction mentions
4775// r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4776// by the allocator as an input.
4777
4778operand no_rax_RegP()
4779%{
4780  constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4781  match(RegP);
4782  match(rbx_RegP);
4783  match(rsi_RegP);
4784  match(rdi_RegP);
4785
4786  format %{ %}
4787  interface(REG_INTER);
4788%}
4789
4790operand no_rbp_RegP()
4791%{
4792  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4793  match(RegP);
4794  match(rbx_RegP);
4795  match(rsi_RegP);
4796  match(rdi_RegP);
4797
4798  format %{ %}
4799  interface(REG_INTER);
4800%}
4801
4802operand no_rax_rbx_RegP()
4803%{
4804  constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4805  match(RegP);
4806  match(rsi_RegP);
4807  match(rdi_RegP);
4808
4809  format %{ %}
4810  interface(REG_INTER);
4811%}
4812
4813// Special Registers
4814// Return a pointer value
4815operand rax_RegP()
4816%{
4817  constraint(ALLOC_IN_RC(ptr_rax_reg));
4818  match(RegP);
4819  match(rRegP);
4820
4821  format %{ %}
4822  interface(REG_INTER);
4823%}
4824
4825// Used in AtomicAdd
4826operand rbx_RegP()
4827%{
4828  constraint(ALLOC_IN_RC(ptr_rbx_reg));
4829  match(RegP);
4830  match(rRegP);
4831
4832  format %{ %}
4833  interface(REG_INTER);
4834%}
4835
4836operand rsi_RegP()
4837%{
4838  constraint(ALLOC_IN_RC(ptr_rsi_reg));
4839  match(RegP);
4840  match(rRegP);
4841
4842  format %{ %}
4843  interface(REG_INTER);
4844%}
4845
4846// Used in rep stosq
4847operand rdi_RegP()
4848%{
4849  constraint(ALLOC_IN_RC(ptr_rdi_reg));
4850  match(RegP);
4851  match(rRegP);
4852
4853  format %{ %}
4854  interface(REG_INTER);
4855%}
4856
4857operand rbp_RegP()
4858%{
4859  constraint(ALLOC_IN_RC(ptr_rbp_reg));
4860  match(RegP);
4861  match(rRegP);
4862
4863  format %{ %}
4864  interface(REG_INTER);
4865%}
4866
4867operand r15_RegP()
4868%{
4869  constraint(ALLOC_IN_RC(ptr_r15_reg));
4870  match(RegP);
4871  match(rRegP);
4872
4873  format %{ %}
4874  interface(REG_INTER);
4875%}
4876
4877operand rRegL()
4878%{
4879  constraint(ALLOC_IN_RC(long_reg));
4880  match(RegL);
4881  match(rax_RegL);
4882  match(rdx_RegL);
4883
4884  format %{ %}
4885  interface(REG_INTER);
4886%}
4887
4888// Special Registers
4889operand no_rax_rdx_RegL()
4890%{
4891  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4892  match(RegL);
4893  match(rRegL);
4894
4895  format %{ %}
4896  interface(REG_INTER);
4897%}
4898
4899operand no_rax_RegL()
4900%{
4901  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4902  match(RegL);
4903  match(rRegL);
4904  match(rdx_RegL);
4905
4906  format %{ %}
4907  interface(REG_INTER);
4908%}
4909
4910operand no_rcx_RegL()
4911%{
4912  constraint(ALLOC_IN_RC(long_no_rcx_reg));
4913  match(RegL);
4914  match(rRegL);
4915
4916  format %{ %}
4917  interface(REG_INTER);
4918%}
4919
4920operand rax_RegL()
4921%{
4922  constraint(ALLOC_IN_RC(long_rax_reg));
4923  match(RegL);
4924  match(rRegL);
4925
4926  format %{ "RAX" %}
4927  interface(REG_INTER);
4928%}
4929
4930operand rcx_RegL()
4931%{
4932  constraint(ALLOC_IN_RC(long_rcx_reg));
4933  match(RegL);
4934  match(rRegL);
4935
4936  format %{ %}
4937  interface(REG_INTER);
4938%}
4939
4940operand rdx_RegL()
4941%{
4942  constraint(ALLOC_IN_RC(long_rdx_reg));
4943  match(RegL);
4944  match(rRegL);
4945
4946  format %{ %}
4947  interface(REG_INTER);
4948%}
4949
4950// Flags register, used as output of compare instructions
4951operand rFlagsReg()
4952%{
4953  constraint(ALLOC_IN_RC(int_flags));
4954  match(RegFlags);
4955
4956  format %{ "RFLAGS" %}
4957  interface(REG_INTER);
4958%}
4959
4960// Flags register, used as output of FLOATING POINT compare instructions
4961operand rFlagsRegU()
4962%{
4963  constraint(ALLOC_IN_RC(int_flags));
4964  match(RegFlags);
4965
4966  format %{ "RFLAGS_U" %}
4967  interface(REG_INTER);
4968%}
4969
4970// Float register operands
4971operand regF()
4972%{
4973  constraint(ALLOC_IN_RC(float_reg));
4974  match(RegF);
4975
4976  format %{ %}
4977  interface(REG_INTER);
4978%}
4979
4980// Double register operands
4981operand regD()
4982%{
4983  constraint(ALLOC_IN_RC(double_reg));
4984  match(RegD);
4985
4986  format %{ %}
4987  interface(REG_INTER);
4988%}
4989
4990
4991//----------Memory Operands----------------------------------------------------
4992// Direct Memory Operand
4993// operand direct(immP addr)
4994// %{
4995//   match(addr);
4996
4997//   format %{ "[$addr]" %}
4998//   interface(MEMORY_INTER) %{
4999//     base(0xFFFFFFFF);
5000//     index(0x4);
5001//     scale(0x0);
5002//     disp($addr);
5003//   %}
5004// %}
5005
5006// Indirect Memory Operand
5007operand indirect(any_RegP reg)
5008%{
5009  constraint(ALLOC_IN_RC(ptr_reg));
5010  match(reg);
5011
5012  format %{ "[$reg]" %}
5013  interface(MEMORY_INTER) %{
5014    base($reg);
5015    index(0x4);
5016    scale(0x0);
5017    disp(0x0);
5018  %}
5019%}
5020
5021// Indirect Memory Plus Short Offset Operand
5022operand indOffset8(any_RegP reg, immL8 off)
5023%{
5024  constraint(ALLOC_IN_RC(ptr_reg));
5025  match(AddP reg off);
5026
5027  format %{ "[$reg + $off (8-bit)]" %}
5028  interface(MEMORY_INTER) %{
5029    base($reg);
5030    index(0x4);
5031    scale(0x0);
5032    disp($off);
5033  %}
5034%}
5035
5036// Indirect Memory Plus Long Offset Operand
5037operand indOffset32(any_RegP reg, immL32 off)
5038%{
5039  constraint(ALLOC_IN_RC(ptr_reg));
5040  match(AddP reg off);
5041
5042  format %{ "[$reg + $off (32-bit)]" %}
5043  interface(MEMORY_INTER) %{
5044    base($reg);
5045    index(0x4);
5046    scale(0x0);
5047    disp($off);
5048  %}
5049%}
5050
5051// Indirect Memory Plus Index Register Plus Offset Operand
5052operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5053%{
5054  constraint(ALLOC_IN_RC(ptr_reg));
5055  match(AddP (AddP reg lreg) off);
5056
5057  op_cost(10);
5058  format %{"[$reg + $off + $lreg]" %}
5059  interface(MEMORY_INTER) %{
5060    base($reg);
5061    index($lreg);
5062    scale(0x0);
5063    disp($off);
5064  %}
5065%}
5066
5067// Indirect Memory Plus Index Register Plus Offset Operand
5068operand indIndex(any_RegP reg, rRegL lreg)
5069%{
5070  constraint(ALLOC_IN_RC(ptr_reg));
5071  match(AddP reg lreg);
5072
5073  op_cost(10);
5074  format %{"[$reg + $lreg]" %}
5075  interface(MEMORY_INTER) %{
5076    base($reg);
5077    index($lreg);
5078    scale(0x0);
5079    disp(0x0);
5080  %}
5081%}
5082
5083// Indirect Memory Times Scale Plus Index Register
5084operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5085%{
5086  constraint(ALLOC_IN_RC(ptr_reg));
5087  match(AddP reg (LShiftL lreg scale));
5088
5089  op_cost(10);
5090  format %{"[$reg + $lreg << $scale]" %}
5091  interface(MEMORY_INTER) %{
5092    base($reg);
5093    index($lreg);
5094    scale($scale);
5095    disp(0x0);
5096  %}
5097%}
5098
5099// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5100operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5101%{
5102  constraint(ALLOC_IN_RC(ptr_reg));
5103  match(AddP (AddP reg (LShiftL lreg scale)) off);
5104
5105  op_cost(10);
5106  format %{"[$reg + $off + $lreg << $scale]" %}
5107  interface(MEMORY_INTER) %{
5108    base($reg);
5109    index($lreg);
5110    scale($scale);
5111    disp($off);
5112  %}
5113%}
5114
5115// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5116operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5117%{
5118  constraint(ALLOC_IN_RC(ptr_reg));
5119  predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5120  match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5121
5122  op_cost(10);
5123  format %{"[$reg + $off + $idx << $scale]" %}
5124  interface(MEMORY_INTER) %{
5125    base($reg);
5126    index($idx);
5127    scale($scale);
5128    disp($off);
5129  %}
5130%}
5131
5132//----------Special Memory Operands--------------------------------------------
5133// Stack Slot Operand - This operand is used for loading and storing temporary
5134//                      values on the stack where a match requires a value to
5135//                      flow through memory.
5136operand stackSlotP(sRegP reg)
5137%{
5138  constraint(ALLOC_IN_RC(stack_slots));
5139  // No match rule because this operand is only generated in matching
5140
5141  format %{ "[$reg]" %}
5142  interface(MEMORY_INTER) %{
5143    base(0x4);   // RSP
5144    index(0x4);  // No Index
5145    scale(0x0);  // No Scale
5146    disp($reg);  // Stack Offset
5147  %}
5148%}
5149
5150operand stackSlotI(sRegI reg)
5151%{
5152  constraint(ALLOC_IN_RC(stack_slots));
5153  // No match rule because this operand is only generated in matching
5154
5155  format %{ "[$reg]" %}
5156  interface(MEMORY_INTER) %{
5157    base(0x4);   // RSP
5158    index(0x4);  // No Index
5159    scale(0x0);  // No Scale
5160    disp($reg);  // Stack Offset
5161  %}
5162%}
5163
5164operand stackSlotF(sRegF reg)
5165%{
5166  constraint(ALLOC_IN_RC(stack_slots));
5167  // No match rule because this operand is only generated in matching
5168
5169  format %{ "[$reg]" %}
5170  interface(MEMORY_INTER) %{
5171    base(0x4);   // RSP
5172    index(0x4);  // No Index
5173    scale(0x0);  // No Scale
5174    disp($reg);  // Stack Offset
5175  %}
5176%}
5177
5178operand stackSlotD(sRegD reg)
5179%{
5180  constraint(ALLOC_IN_RC(stack_slots));
5181  // No match rule because this operand is only generated in matching
5182
5183  format %{ "[$reg]" %}
5184  interface(MEMORY_INTER) %{
5185    base(0x4);   // RSP
5186    index(0x4);  // No Index
5187    scale(0x0);  // No Scale
5188    disp($reg);  // Stack Offset
5189  %}
5190%}
5191operand stackSlotL(sRegL reg)
5192%{
5193  constraint(ALLOC_IN_RC(stack_slots));
5194  // No match rule because this operand is only generated in matching
5195
5196  format %{ "[$reg]" %}
5197  interface(MEMORY_INTER) %{
5198    base(0x4);   // RSP
5199    index(0x4);  // No Index
5200    scale(0x0);  // No Scale
5201    disp($reg);  // Stack Offset
5202  %}
5203%}
5204
5205//----------Conditional Branch Operands----------------------------------------
5206// Comparison Op  - This is the operation of the comparison, and is limited to
5207//                  the following set of codes:
5208//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5209//
5210// Other attributes of the comparison, such as unsignedness, are specified
5211// by the comparison instruction that sets a condition code flags register.
5212// That result is represented by a flags operand whose subtype is appropriate
5213// to the unsignedness (etc.) of the comparison.
5214//
5215// Later, the instruction which matches both the Comparison Op (a Bool) and
5216// the flags (produced by the Cmp) specifies the coding of the comparison op
5217// by matching a specific subtype of Bool operand below, such as cmpOpU.
5218
5219// Comparision Code
5220operand cmpOp()
5221%{
5222  match(Bool);
5223
5224  format %{ "" %}
5225  interface(COND_INTER) %{
5226    equal(0x4);
5227    not_equal(0x5);
5228    less(0xC);
5229    greater_equal(0xD);
5230    less_equal(0xE);
5231    greater(0xF);
5232  %}
5233%}
5234
5235// Comparison Code, unsigned compare.  Used by FP also, with
5236// C2 (unordered) turned into GT or LT already.  The other bits
5237// C0 and C3 are turned into Carry & Zero flags.
5238operand cmpOpU()
5239%{
5240  match(Bool);
5241
5242  format %{ "" %}
5243  interface(COND_INTER) %{
5244    equal(0x4);
5245    not_equal(0x5);
5246    less(0x2);
5247    greater_equal(0x3);
5248    less_equal(0x6);
5249    greater(0x7);
5250  %}
5251%}
5252
5253
5254//----------OPERAND CLASSES----------------------------------------------------
5255// Operand Classes are groups of operands that are used as to simplify
5256// instruction definitions by not requiring the AD writer to specify seperate
5257// instructions for every form of operand when the instruction accepts
5258// multiple operand types with the same basic encoding and format.  The classic
5259// case of this is memory operands.
5260
5261opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5262               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset);
5263
5264//----------PIPELINE-----------------------------------------------------------
5265// Rules which define the behavior of the target architectures pipeline.
5266pipeline %{
5267
5268//----------ATTRIBUTES---------------------------------------------------------
5269attributes %{
5270  variable_size_instructions;        // Fixed size instructions
5271  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5272  instruction_unit_size = 1;         // An instruction is 1 bytes long
5273  instruction_fetch_unit_size = 16;  // The processor fetches one line
5274  instruction_fetch_units = 1;       // of 16 bytes
5275
5276  // List of nop instructions
5277  nops( MachNop );
5278%}
5279
5280//----------RESOURCES----------------------------------------------------------
5281// Resources are the functional units available to the machine
5282
5283// Generic P2/P3 pipeline
5284// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5285// 3 instructions decoded per cycle.
5286// 2 load/store ops per cycle, 1 branch, 1 FPU,
5287// 3 ALU op, only ALU0 handles mul instructions.
5288resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5289           MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5290           BR, FPU,
5291           ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5292
5293//----------PIPELINE DESCRIPTION-----------------------------------------------
5294// Pipeline Description specifies the stages in the machine's pipeline
5295
5296// Generic P2/P3 pipeline
5297pipe_desc(S0, S1, S2, S3, S4, S5);
5298
5299//----------PIPELINE CLASSES---------------------------------------------------
5300// Pipeline Classes describe the stages in which input and output are
5301// referenced by the hardware pipeline.
5302
5303// Naming convention: ialu or fpu
5304// Then: _reg
5305// Then: _reg if there is a 2nd register
5306// Then: _long if it's a pair of instructions implementing a long
5307// Then: _fat if it requires the big decoder
5308//   Or: _mem if it requires the big decoder and a memory unit.
5309
5310// Integer ALU reg operation
5311pipe_class ialu_reg(rRegI dst)
5312%{
5313    single_instruction;
5314    dst    : S4(write);
5315    dst    : S3(read);
5316    DECODE : S0;        // any decoder
5317    ALU    : S3;        // any alu
5318%}
5319
5320// Long ALU reg operation
5321pipe_class ialu_reg_long(rRegL dst)
5322%{
5323    instruction_count(2);
5324    dst    : S4(write);
5325    dst    : S3(read);
5326    DECODE : S0(2);     // any 2 decoders
5327    ALU    : S3(2);     // both alus
5328%}
5329
5330// Integer ALU reg operation using big decoder
5331pipe_class ialu_reg_fat(rRegI dst)
5332%{
5333    single_instruction;
5334    dst    : S4(write);
5335    dst    : S3(read);
5336    D0     : S0;        // big decoder only
5337    ALU    : S3;        // any alu
5338%}
5339
5340// Long ALU reg operation using big decoder
5341pipe_class ialu_reg_long_fat(rRegL dst)
5342%{
5343    instruction_count(2);
5344    dst    : S4(write);
5345    dst    : S3(read);
5346    D0     : S0(2);     // big decoder only; twice
5347    ALU    : S3(2);     // any 2 alus
5348%}
5349
5350// Integer ALU reg-reg operation
5351pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5352%{
5353    single_instruction;
5354    dst    : S4(write);
5355    src    : S3(read);
5356    DECODE : S0;        // any decoder
5357    ALU    : S3;        // any alu
5358%}
5359
5360// Long ALU reg-reg operation
5361pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5362%{
5363    instruction_count(2);
5364    dst    : S4(write);
5365    src    : S3(read);
5366    DECODE : S0(2);     // any 2 decoders
5367    ALU    : S3(2);     // both alus
5368%}
5369
5370// Integer ALU reg-reg operation
5371pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5372%{
5373    single_instruction;
5374    dst    : S4(write);
5375    src    : S3(read);
5376    D0     : S0;        // big decoder only
5377    ALU    : S3;        // any alu
5378%}
5379
5380// Long ALU reg-reg operation
5381pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5382%{
5383    instruction_count(2);
5384    dst    : S4(write);
5385    src    : S3(read);
5386    D0     : S0(2);     // big decoder only; twice
5387    ALU    : S3(2);     // both alus
5388%}
5389
5390// Integer ALU reg-mem operation
5391pipe_class ialu_reg_mem(rRegI dst, memory mem)
5392%{
5393    single_instruction;
5394    dst    : S5(write);
5395    mem    : S3(read);
5396    D0     : S0;        // big decoder only
5397    ALU    : S4;        // any alu
5398    MEM    : S3;        // any mem
5399%}
5400
5401// Integer mem operation (prefetch)
5402pipe_class ialu_mem(memory mem)
5403%{
5404    single_instruction;
5405    mem    : S3(read);
5406    D0     : S0;        // big decoder only
5407    MEM    : S3;        // any mem
5408%}
5409
5410// Integer Store to Memory
5411pipe_class ialu_mem_reg(memory mem, rRegI src)
5412%{
5413    single_instruction;
5414    mem    : S3(read);
5415    src    : S5(read);
5416    D0     : S0;        // big decoder only
5417    ALU    : S4;        // any alu
5418    MEM    : S3;
5419%}
5420
5421// // Long Store to Memory
5422// pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5423// %{
5424//     instruction_count(2);
5425//     mem    : S3(read);
5426//     src    : S5(read);
5427//     D0     : S0(2);          // big decoder only; twice
5428//     ALU    : S4(2);     // any 2 alus
5429//     MEM    : S3(2);  // Both mems
5430// %}
5431
5432// Integer Store to Memory
5433pipe_class ialu_mem_imm(memory mem)
5434%{
5435    single_instruction;
5436    mem    : S3(read);
5437    D0     : S0;        // big decoder only
5438    ALU    : S4;        // any alu
5439    MEM    : S3;
5440%}
5441
5442// Integer ALU0 reg-reg operation
5443pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5444%{
5445    single_instruction;
5446    dst    : S4(write);
5447    src    : S3(read);
5448    D0     : S0;        // Big decoder only
5449    ALU0   : S3;        // only alu0
5450%}
5451
5452// Integer ALU0 reg-mem operation
5453pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5454%{
5455    single_instruction;
5456    dst    : S5(write);
5457    mem    : S3(read);
5458    D0     : S0;        // big decoder only
5459    ALU0   : S4;        // ALU0 only
5460    MEM    : S3;        // any mem
5461%}
5462
5463// Integer ALU reg-reg operation
5464pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5465%{
5466    single_instruction;
5467    cr     : S4(write);
5468    src1   : S3(read);
5469    src2   : S3(read);
5470    DECODE : S0;        // any decoder
5471    ALU    : S3;        // any alu
5472%}
5473
5474// Integer ALU reg-imm operation
5475pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5476%{
5477    single_instruction;
5478    cr     : S4(write);
5479    src1   : S3(read);
5480    DECODE : S0;        // any decoder
5481    ALU    : S3;        // any alu
5482%}
5483
5484// Integer ALU reg-mem operation
5485pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5486%{
5487    single_instruction;
5488    cr     : S4(write);
5489    src1   : S3(read);
5490    src2   : S3(read);
5491    D0     : S0;        // big decoder only
5492    ALU    : S4;        // any alu
5493    MEM    : S3;
5494%}
5495
5496// Conditional move reg-reg
5497pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5498%{
5499    instruction_count(4);
5500    y      : S4(read);
5501    q      : S3(read);
5502    p      : S3(read);
5503    DECODE : S0(4);     // any decoder
5504%}
5505
5506// Conditional move reg-reg
5507pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5508%{
5509    single_instruction;
5510    dst    : S4(write);
5511    src    : S3(read);
5512    cr     : S3(read);
5513    DECODE : S0;        // any decoder
5514%}
5515
5516// Conditional move reg-mem
5517pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5518%{
5519    single_instruction;
5520    dst    : S4(write);
5521    src    : S3(read);
5522    cr     : S3(read);
5523    DECODE : S0;        // any decoder
5524    MEM    : S3;
5525%}
5526
5527// Conditional move reg-reg long
5528pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5529%{
5530    single_instruction;
5531    dst    : S4(write);
5532    src    : S3(read);
5533    cr     : S3(read);
5534    DECODE : S0(2);     // any 2 decoders
5535%}
5536
5537// XXX
5538// // Conditional move double reg-reg
5539// pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5540// %{
5541//     single_instruction;
5542//     dst    : S4(write);
5543//     src    : S3(read);
5544//     cr     : S3(read);
5545//     DECODE : S0;     // any decoder
5546// %}
5547
5548// Float reg-reg operation
5549pipe_class fpu_reg(regD dst)
5550%{
5551    instruction_count(2);
5552    dst    : S3(read);
5553    DECODE : S0(2);     // any 2 decoders
5554    FPU    : S3;
5555%}
5556
5557// Float reg-reg operation
5558pipe_class fpu_reg_reg(regD dst, regD src)
5559%{
5560    instruction_count(2);
5561    dst    : S4(write);
5562    src    : S3(read);
5563    DECODE : S0(2);     // any 2 decoders
5564    FPU    : S3;
5565%}
5566
5567// Float reg-reg operation
5568pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5569%{
5570    instruction_count(3);
5571    dst    : S4(write);
5572    src1   : S3(read);
5573    src2   : S3(read);
5574    DECODE : S0(3);     // any 3 decoders
5575    FPU    : S3(2);
5576%}
5577
5578// Float reg-reg operation
5579pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5580%{
5581    instruction_count(4);
5582    dst    : S4(write);
5583    src1   : S3(read);
5584    src2   : S3(read);
5585    src3   : S3(read);
5586    DECODE : S0(4);     // any 3 decoders
5587    FPU    : S3(2);
5588%}
5589
5590// Float reg-reg operation
5591pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5592%{
5593    instruction_count(4);
5594    dst    : S4(write);
5595    src1   : S3(read);
5596    src2   : S3(read);
5597    src3   : S3(read);
5598    DECODE : S1(3);     // any 3 decoders
5599    D0     : S0;        // Big decoder only
5600    FPU    : S3(2);
5601    MEM    : S3;
5602%}
5603
5604// Float reg-mem operation
5605pipe_class fpu_reg_mem(regD dst, memory mem)
5606%{
5607    instruction_count(2);
5608    dst    : S5(write);
5609    mem    : S3(read);
5610    D0     : S0;        // big decoder only
5611    DECODE : S1;        // any decoder for FPU POP
5612    FPU    : S4;
5613    MEM    : S3;        // any mem
5614%}
5615
5616// Float reg-mem operation
5617pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5618%{
5619    instruction_count(3);
5620    dst    : S5(write);
5621    src1   : S3(read);
5622    mem    : S3(read);
5623    D0     : S0;        // big decoder only
5624    DECODE : S1(2);     // any decoder for FPU POP
5625    FPU    : S4;
5626    MEM    : S3;        // any mem
5627%}
5628
5629// Float mem-reg operation
5630pipe_class fpu_mem_reg(memory mem, regD src)
5631%{
5632    instruction_count(2);
5633    src    : S5(read);
5634    mem    : S3(read);
5635    DECODE : S0;        // any decoder for FPU PUSH
5636    D0     : S1;        // big decoder only
5637    FPU    : S4;
5638    MEM    : S3;        // any mem
5639%}
5640
5641pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5642%{
5643    instruction_count(3);
5644    src1   : S3(read);
5645    src2   : S3(read);
5646    mem    : S3(read);
5647    DECODE : S0(2);     // any decoder for FPU PUSH
5648    D0     : S1;        // big decoder only
5649    FPU    : S4;
5650    MEM    : S3;        // any mem
5651%}
5652
5653pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5654%{
5655    instruction_count(3);
5656    src1   : S3(read);
5657    src2   : S3(read);
5658    mem    : S4(read);
5659    DECODE : S0;        // any decoder for FPU PUSH
5660    D0     : S0(2);     // big decoder only
5661    FPU    : S4;
5662    MEM    : S3(2);     // any mem
5663%}
5664
5665pipe_class fpu_mem_mem(memory dst, memory src1)
5666%{
5667    instruction_count(2);
5668    src1   : S3(read);
5669    dst    : S4(read);
5670    D0     : S0(2);     // big decoder only
5671    MEM    : S3(2);     // any mem
5672%}
5673
5674pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5675%{
5676    instruction_count(3);
5677    src1   : S3(read);
5678    src2   : S3(read);
5679    dst    : S4(read);
5680    D0     : S0(3);     // big decoder only
5681    FPU    : S4;
5682    MEM    : S3(3);     // any mem
5683%}
5684
5685pipe_class fpu_mem_reg_con(memory mem, regD src1)
5686%{
5687    instruction_count(3);
5688    src1   : S4(read);
5689    mem    : S4(read);
5690    DECODE : S0;        // any decoder for FPU PUSH
5691    D0     : S0(2);     // big decoder only
5692    FPU    : S4;
5693    MEM    : S3(2);     // any mem
5694%}
5695
5696// Float load constant
5697pipe_class fpu_reg_con(regD dst)
5698%{
5699    instruction_count(2);
5700    dst    : S5(write);
5701    D0     : S0;        // big decoder only for the load
5702    DECODE : S1;        // any decoder for FPU POP
5703    FPU    : S4;
5704    MEM    : S3;        // any mem
5705%}
5706
5707// Float load constant
5708pipe_class fpu_reg_reg_con(regD dst, regD src)
5709%{
5710    instruction_count(3);
5711    dst    : S5(write);
5712    src    : S3(read);
5713    D0     : S0;        // big decoder only for the load
5714    DECODE : S1(2);     // any decoder for FPU POP
5715    FPU    : S4;
5716    MEM    : S3;        // any mem
5717%}
5718
5719// UnConditional branch
5720pipe_class pipe_jmp(label labl)
5721%{
5722    single_instruction;
5723    BR   : S3;
5724%}
5725
5726// Conditional branch
5727pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5728%{
5729    single_instruction;
5730    cr    : S1(read);
5731    BR    : S3;
5732%}
5733
5734// Allocation idiom
5735pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5736%{
5737    instruction_count(1); force_serialization;
5738    fixed_latency(6);
5739    heap_ptr : S3(read);
5740    DECODE   : S0(3);
5741    D0       : S2;
5742    MEM      : S3;
5743    ALU      : S3(2);
5744    dst      : S5(write);
5745    BR       : S5;
5746%}
5747
5748// Generic big/slow expanded idiom
5749pipe_class pipe_slow()
5750%{
5751    instruction_count(10); multiple_bundles; force_serialization;
5752    fixed_latency(100);
5753    D0  : S0(2);
5754    MEM : S3(2);
5755%}
5756
5757// The real do-nothing guy
5758pipe_class empty()
5759%{
5760    instruction_count(0);
5761%}
5762
5763// Define the class for the Nop node
5764define
5765%{
5766   MachNop = empty;
5767%}
5768
5769%}
5770
5771//----------INSTRUCTIONS-------------------------------------------------------
5772//
5773// match      -- States which machine-independent subtree may be replaced
5774//               by this instruction.
5775// ins_cost   -- The estimated cost of this instruction is used by instruction
5776//               selection to identify a minimum cost tree of machine
5777//               instructions that matches a tree of machine-independent
5778//               instructions.
5779// format     -- A string providing the disassembly for this instruction.
5780//               The value of an instruction's operand may be inserted
5781//               by referring to it with a '$' prefix.
5782// opcode     -- Three instruction opcodes may be provided.  These are referred
5783//               to within an encode class as $primary, $secondary, and $tertiary
5784//               rrspectively.  The primary opcode is commonly used to
5785//               indicate the type of machine instruction, while secondary
5786//               and tertiary are often used for prefix options or addressing
5787//               modes.
5788// ins_encode -- A list of encode classes with parameters. The encode class
5789//               name must have been defined in an 'enc_class' specification
5790//               in the encode section of the architecture description.
5791
5792
5793//----------Load/Store/Move Instructions---------------------------------------
5794//----------Load Instructions--------------------------------------------------
5795
5796// Load Byte (8 bit signed)
5797instruct loadB(rRegI dst, memory mem)
5798%{
5799  match(Set dst (LoadB mem));
5800
5801  ins_cost(125);
5802  format %{ "movsbl  $dst, $mem\t# byte" %}
5803  opcode(0x0F, 0xBE);
5804  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5805  ins_pipe(ialu_reg_mem);
5806%}
5807
5808// Load Byte (8 bit signed) into long
5809// instruct loadB2L(rRegL dst, memory mem)
5810// %{
5811//   match(Set dst (ConvI2L (LoadB mem)));
5812
5813//   ins_cost(125);
5814//   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5815//   opcode(0x0F, 0xBE);
5816//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5817//   ins_pipe(ialu_reg_mem);
5818// %}
5819
5820// Load Byte (8 bit UNsigned)
5821instruct loadUB(rRegI dst, memory mem, immI_255 bytemask)
5822%{
5823  match(Set dst (AndI (LoadB mem) bytemask));
5824
5825  ins_cost(125);
5826  format %{ "movzbl  $dst, $mem\t# ubyte" %}
5827  opcode(0x0F, 0xB6);
5828  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5829  ins_pipe(ialu_reg_mem);
5830%}
5831
5832// Load Byte (8 bit UNsigned) into long
5833// instruct loadUB2L(rRegL dst, memory mem, immI_255 bytemask)
5834// %{
5835//   match(Set dst (ConvI2L (AndI (LoadB mem) bytemask)));
5836
5837//   ins_cost(125);
5838//   format %{ "movzbl  $dst, $mem\t# ubyte -> long" %}
5839//   opcode(0x0F, 0xB6);
5840//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5841//   ins_pipe(ialu_reg_mem);
5842// %}
5843
5844// Load Short (16 bit signed)
5845instruct loadS(rRegI dst, memory mem)
5846%{
5847  match(Set dst (LoadS mem));
5848
5849  ins_cost(125); // XXX
5850  format %{ "movswl $dst, $mem\t# short" %}
5851  opcode(0x0F, 0xBF);
5852  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5853  ins_pipe(ialu_reg_mem);
5854%}
5855
5856// Load Short (16 bit signed) into long
5857// instruct loadS2L(rRegL dst, memory mem)
5858// %{
5859//   match(Set dst (ConvI2L (LoadS mem)));
5860
5861//   ins_cost(125); // XXX
5862//   format %{ "movswq $dst, $mem\t# short -> long" %}
5863//   opcode(0x0F, 0xBF);
5864//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5865//   ins_pipe(ialu_reg_mem);
5866// %}
5867
5868// Load Char (16 bit UNsigned)
5869instruct loadC(rRegI dst, memory mem)
5870%{
5871  match(Set dst (LoadC mem));
5872
5873  ins_cost(125);
5874  format %{ "movzwl  $dst, $mem\t# char" %}
5875  opcode(0x0F, 0xB7);
5876  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5877  ins_pipe(ialu_reg_mem);
5878%}
5879
5880// Load Char (16 bit UNsigned) into long
5881// instruct loadC2L(rRegL dst, memory mem)
5882// %{
5883//   match(Set dst (ConvI2L (LoadC mem)));
5884
5885//   ins_cost(125);
5886//   format %{ "movzwl  $dst, $mem\t# char -> long" %}
5887//   opcode(0x0F, 0xB7);
5888//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
5889//   ins_pipe(ialu_reg_mem);
5890// %}
5891
5892// Load Integer
5893instruct loadI(rRegI dst, memory mem)
5894%{
5895  match(Set dst (LoadI mem));
5896
5897  ins_cost(125); // XXX
5898  format %{ "movl    $dst, $mem\t# int" %}
5899  opcode(0x8B);
5900  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5901  ins_pipe(ialu_reg_mem);
5902%}
5903
5904// Load Long
5905instruct loadL(rRegL dst, memory mem)
5906%{
5907  match(Set dst (LoadL mem));
5908
5909  ins_cost(125); // XXX
5910  format %{ "movq    $dst, $mem\t# long" %}
5911  opcode(0x8B);
5912  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5913  ins_pipe(ialu_reg_mem); // XXX
5914%}
5915
5916// Load Range
5917instruct loadRange(rRegI dst, memory mem)
5918%{
5919  match(Set dst (LoadRange mem));
5920
5921  ins_cost(125); // XXX
5922  format %{ "movl    $dst, $mem\t# range" %}
5923  opcode(0x8B);
5924  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5925  ins_pipe(ialu_reg_mem);
5926%}
5927
5928// Load Pointer
5929instruct loadP(rRegP dst, memory mem)
5930%{
5931  match(Set dst (LoadP mem));
5932
5933  ins_cost(125); // XXX
5934  format %{ "movq    $dst, $mem\t# ptr" %}
5935  opcode(0x8B);
5936  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5937  ins_pipe(ialu_reg_mem); // XXX
5938%}
5939
5940// Load Klass Pointer
5941instruct loadKlass(rRegP dst, memory mem)
5942%{
5943  match(Set dst (LoadKlass mem));
5944
5945  ins_cost(125); // XXX
5946  format %{ "movq    $dst, $mem\t# class" %}
5947  opcode(0x8B);
5948  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5949  ins_pipe(ialu_reg_mem); // XXX
5950%}
5951
5952// Load Float
5953instruct loadF(regF dst, memory mem)
5954%{
5955  match(Set dst (LoadF mem));
5956
5957  ins_cost(145); // XXX
5958  format %{ "movss   $dst, $mem\t# float" %}
5959  opcode(0xF3, 0x0F, 0x10);
5960  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
5961  ins_pipe(pipe_slow); // XXX
5962%}
5963
5964// Load Double
5965instruct loadD_partial(regD dst, memory mem)
5966%{
5967  predicate(!UseXmmLoadAndClearUpper);
5968  match(Set dst (LoadD mem));
5969
5970  ins_cost(145); // XXX
5971  format %{ "movlpd  $dst, $mem\t# double" %}
5972  opcode(0x66, 0x0F, 0x12);
5973  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
5974  ins_pipe(pipe_slow); // XXX
5975%}
5976
5977instruct loadD(regD dst, memory mem)
5978%{
5979  predicate(UseXmmLoadAndClearUpper);
5980  match(Set dst (LoadD mem));
5981
5982  ins_cost(145); // XXX
5983  format %{ "movsd   $dst, $mem\t# double" %}
5984  opcode(0xF2, 0x0F, 0x10);
5985  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
5986  ins_pipe(pipe_slow); // XXX
5987%}
5988
5989// Load Aligned Packed Byte to XMM register
5990instruct loadA8B(regD dst, memory mem) %{
5991  match(Set dst (Load8B mem));
5992  ins_cost(125);
5993  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
5994  ins_encode( movq_ld(dst, mem));
5995  ins_pipe( pipe_slow );
5996%}
5997
5998// Load Aligned Packed Short to XMM register
5999instruct loadA4S(regD dst, memory mem) %{
6000  match(Set dst (Load4S mem));
6001  ins_cost(125);
6002  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6003  ins_encode( movq_ld(dst, mem));
6004  ins_pipe( pipe_slow );
6005%}
6006
6007// Load Aligned Packed Char to XMM register
6008instruct loadA4C(regD dst, memory mem) %{
6009  match(Set dst (Load4C mem));
6010  ins_cost(125);
6011  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6012  ins_encode( movq_ld(dst, mem));
6013  ins_pipe( pipe_slow );
6014%}
6015
6016// Load Aligned Packed Integer to XMM register
6017instruct load2IU(regD dst, memory mem) %{
6018  match(Set dst (Load2I mem));
6019  ins_cost(125);
6020  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6021  ins_encode( movq_ld(dst, mem));
6022  ins_pipe( pipe_slow );
6023%}
6024
6025// Load Aligned Packed Single to XMM
6026instruct loadA2F(regD dst, memory mem) %{
6027  match(Set dst (Load2F mem));
6028  ins_cost(145);
6029  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6030  ins_encode( movq_ld(dst, mem));
6031  ins_pipe( pipe_slow );
6032%}
6033
6034// Load Effective Address
6035instruct leaP8(rRegP dst, indOffset8 mem)
6036%{
6037  match(Set dst mem);
6038
6039  ins_cost(110); // XXX
6040  format %{ "leaq    $dst, $mem\t# ptr 8" %}
6041  opcode(0x8D);
6042  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6043  ins_pipe(ialu_reg_reg_fat);
6044%}
6045
6046instruct leaP32(rRegP dst, indOffset32 mem)
6047%{
6048  match(Set dst mem);
6049
6050  ins_cost(110);
6051  format %{ "leaq    $dst, $mem\t# ptr 32" %}
6052  opcode(0x8D);
6053  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6054  ins_pipe(ialu_reg_reg_fat);
6055%}
6056
6057// instruct leaPIdx(rRegP dst, indIndex mem)
6058// %{
6059//   match(Set dst mem);
6060
6061//   ins_cost(110);
6062//   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6063//   opcode(0x8D);
6064//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6065//   ins_pipe(ialu_reg_reg_fat);
6066// %}
6067
6068instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6069%{
6070  match(Set dst mem);
6071
6072  ins_cost(110);
6073  format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6074  opcode(0x8D);
6075  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6076  ins_pipe(ialu_reg_reg_fat);
6077%}
6078
6079instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6080%{
6081  match(Set dst mem);
6082
6083  ins_cost(110);
6084  format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6085  opcode(0x8D);
6086  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6087  ins_pipe(ialu_reg_reg_fat);
6088%}
6089
6090instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6091%{
6092  match(Set dst mem);
6093
6094  ins_cost(110);
6095  format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6096  opcode(0x8D);
6097  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6098  ins_pipe(ialu_reg_reg_fat);
6099%}
6100
6101instruct loadConI(rRegI dst, immI src)
6102%{
6103  match(Set dst src);
6104
6105  format %{ "movl    $dst, $src\t# int" %}
6106  ins_encode(load_immI(dst, src));
6107  ins_pipe(ialu_reg_fat); // XXX
6108%}
6109
6110instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6111%{
6112  match(Set dst src);
6113  effect(KILL cr);
6114
6115  ins_cost(50);
6116  format %{ "xorl    $dst, $dst\t# int" %}
6117  opcode(0x33); /* + rd */
6118  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6119  ins_pipe(ialu_reg);
6120%}
6121
6122instruct loadConL(rRegL dst, immL src)
6123%{
6124  match(Set dst src);
6125
6126  ins_cost(150);
6127  format %{ "movq    $dst, $src\t# long" %}
6128  ins_encode(load_immL(dst, src));
6129  ins_pipe(ialu_reg);
6130%}
6131
6132instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6133%{
6134  match(Set dst src);
6135  effect(KILL cr);
6136
6137  ins_cost(50);
6138  format %{ "xorl    $dst, $dst\t# long" %}
6139  opcode(0x33); /* + rd */
6140  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6141  ins_pipe(ialu_reg); // XXX
6142%}
6143
6144instruct loadConUL32(rRegL dst, immUL32 src)
6145%{
6146  match(Set dst src);
6147
6148  ins_cost(60);
6149  format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6150  ins_encode(load_immUL32(dst, src));
6151  ins_pipe(ialu_reg);
6152%}
6153
6154instruct loadConL32(rRegL dst, immL32 src)
6155%{
6156  match(Set dst src);
6157
6158  ins_cost(70);
6159  format %{ "movq    $dst, $src\t# long (32-bit)" %}
6160  ins_encode(load_immL32(dst, src));
6161  ins_pipe(ialu_reg);
6162%}
6163
6164instruct loadConP(rRegP dst, immP src)
6165%{
6166  match(Set dst src);
6167
6168  format %{ "movq    $dst, $src\t# ptr" %}
6169  ins_encode(load_immP(dst, src));
6170  ins_pipe(ialu_reg_fat); // XXX
6171%}
6172
6173instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6174%{
6175  match(Set dst src);
6176  effect(KILL cr);
6177
6178  ins_cost(50);
6179  format %{ "xorl    $dst, $dst\t# ptr" %}
6180  opcode(0x33); /* + rd */
6181  ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6182  ins_pipe(ialu_reg);
6183%}
6184
6185instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6186%{
6187  match(Set dst src);
6188  effect(KILL cr);
6189
6190  ins_cost(60);
6191  format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6192  ins_encode(load_immP31(dst, src));
6193  ins_pipe(ialu_reg);
6194%}
6195
6196instruct loadConF(regF dst, immF src)
6197%{
6198  match(Set dst src);
6199  ins_cost(125);
6200
6201  format %{ "movss   $dst, [$src]" %}
6202  ins_encode(load_conF(dst, src));
6203  ins_pipe(pipe_slow);
6204%}
6205
6206instruct loadConF0(regF dst, immF0 src)
6207%{
6208  match(Set dst src);
6209  ins_cost(100);
6210
6211  format %{ "xorps   $dst, $dst\t# float 0.0" %}
6212  opcode(0x0F, 0x57);
6213  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6214  ins_pipe(pipe_slow);
6215%}
6216
6217// Use the same format since predicate() can not be used here.
6218instruct loadConD(regD dst, immD src)
6219%{
6220  match(Set dst src);
6221  ins_cost(125);
6222
6223  format %{ "movsd   $dst, [$src]" %}
6224  ins_encode(load_conD(dst, src));
6225  ins_pipe(pipe_slow);
6226%}
6227
6228instruct loadConD0(regD dst, immD0 src)
6229%{
6230  match(Set dst src);
6231  ins_cost(100);
6232
6233  format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6234  opcode(0x66, 0x0F, 0x57);
6235  ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6236  ins_pipe(pipe_slow);
6237%}
6238
6239instruct loadSSI(rRegI dst, stackSlotI src)
6240%{
6241  match(Set dst src);
6242
6243  ins_cost(125);
6244  format %{ "movl    $dst, $src\t# int stk" %}
6245  opcode(0x8B);
6246  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6247  ins_pipe(ialu_reg_mem);
6248%}
6249
6250instruct loadSSL(rRegL dst, stackSlotL src)
6251%{
6252  match(Set dst src);
6253
6254  ins_cost(125);
6255  format %{ "movq    $dst, $src\t# long stk" %}
6256  opcode(0x8B);
6257  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6258  ins_pipe(ialu_reg_mem);
6259%}
6260
6261instruct loadSSP(rRegP dst, stackSlotP src)
6262%{
6263  match(Set dst src);
6264
6265  ins_cost(125);
6266  format %{ "movq    $dst, $src\t# ptr stk" %}
6267  opcode(0x8B);
6268  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6269  ins_pipe(ialu_reg_mem);
6270%}
6271
6272instruct loadSSF(regF dst, stackSlotF src)
6273%{
6274  match(Set dst src);
6275
6276  ins_cost(125);
6277  format %{ "movss   $dst, $src\t# float stk" %}
6278  opcode(0xF3, 0x0F, 0x10);
6279  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6280  ins_pipe(pipe_slow); // XXX
6281%}
6282
6283// Use the same format since predicate() can not be used here.
6284instruct loadSSD(regD dst, stackSlotD src)
6285%{
6286  match(Set dst src);
6287
6288  ins_cost(125);
6289  format %{ "movsd   $dst, $src\t# double stk" %}
6290  ins_encode  %{
6291    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6292  %}
6293  ins_pipe(pipe_slow); // XXX
6294%}
6295
6296// Prefetch instructions.
6297// Must be safe to execute with invalid address (cannot fault).
6298
6299instruct prefetchr( memory mem ) %{
6300  predicate(ReadPrefetchInstr==3);
6301  match(PrefetchRead mem);
6302  ins_cost(125);
6303
6304  format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6305  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6306  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6307  ins_pipe(ialu_mem);
6308%}
6309
6310instruct prefetchrNTA( memory mem ) %{
6311  predicate(ReadPrefetchInstr==0);
6312  match(PrefetchRead mem);
6313  ins_cost(125);
6314
6315  format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6316  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6317  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6318  ins_pipe(ialu_mem);
6319%}
6320
6321instruct prefetchrT0( memory mem ) %{
6322  predicate(ReadPrefetchInstr==1);
6323  match(PrefetchRead mem);
6324  ins_cost(125);
6325
6326  format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6327  opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6328  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6329  ins_pipe(ialu_mem);
6330%}
6331
6332instruct prefetchrT2( memory mem ) %{
6333  predicate(ReadPrefetchInstr==2);
6334  match(PrefetchRead mem);
6335  ins_cost(125);
6336
6337  format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6338  opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6339  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6340  ins_pipe(ialu_mem);
6341%}
6342
6343instruct prefetchw( memory mem ) %{
6344  predicate(AllocatePrefetchInstr==3);
6345  match(PrefetchWrite mem);
6346  ins_cost(125);
6347
6348  format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6349  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6350  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6351  ins_pipe(ialu_mem);
6352%}
6353
6354instruct prefetchwNTA( memory mem ) %{
6355  predicate(AllocatePrefetchInstr==0);
6356  match(PrefetchWrite mem);
6357  ins_cost(125);
6358
6359  format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6360  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6361  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6362  ins_pipe(ialu_mem);
6363%}
6364
6365instruct prefetchwT0( memory mem ) %{
6366  predicate(AllocatePrefetchInstr==1);
6367  match(PrefetchWrite mem);
6368  ins_cost(125);
6369
6370  format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6371  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6372  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6373  ins_pipe(ialu_mem);
6374%}
6375
6376instruct prefetchwT2( memory mem ) %{
6377  predicate(AllocatePrefetchInstr==2);
6378  match(PrefetchWrite mem);
6379  ins_cost(125);
6380
6381  format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6382  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6383  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6384  ins_pipe(ialu_mem);
6385%}
6386
6387//----------Store Instructions-------------------------------------------------
6388
6389// Store Byte
6390instruct storeB(memory mem, rRegI src)
6391%{
6392  match(Set mem (StoreB mem src));
6393
6394  ins_cost(125); // XXX
6395  format %{ "movb    $mem, $src\t# byte" %}
6396  opcode(0x88);
6397  ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6398  ins_pipe(ialu_mem_reg);
6399%}
6400
6401// Store Char/Short
6402instruct storeC(memory mem, rRegI src)
6403%{
6404  match(Set mem (StoreC mem src));
6405
6406  ins_cost(125); // XXX
6407  format %{ "movw    $mem, $src\t# char/short" %}
6408  opcode(0x89);
6409  ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6410  ins_pipe(ialu_mem_reg);
6411%}
6412
6413// Store Integer
6414instruct storeI(memory mem, rRegI src)
6415%{
6416  match(Set mem (StoreI mem src));
6417
6418  ins_cost(125); // XXX
6419  format %{ "movl    $mem, $src\t# int" %}
6420  opcode(0x89);
6421  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6422  ins_pipe(ialu_mem_reg);
6423%}
6424
6425// Store Long
6426instruct storeL(memory mem, rRegL src)
6427%{
6428  match(Set mem (StoreL mem src));
6429
6430  ins_cost(125); // XXX
6431  format %{ "movq    $mem, $src\t# long" %}
6432  opcode(0x89);
6433  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6434  ins_pipe(ialu_mem_reg); // XXX
6435%}
6436
6437// Store Pointer
6438instruct storeP(memory mem, any_RegP src)
6439%{
6440  match(Set mem (StoreP mem src));
6441
6442  ins_cost(125); // XXX
6443  format %{ "movq    $mem, $src\t# ptr" %}
6444  opcode(0x89);
6445  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6446  ins_pipe(ialu_mem_reg);
6447%}
6448
6449// Store NULL Pointer, mark word, or other simple pointer constant.
6450instruct storeImmP(memory mem, immP31 src)
6451%{
6452  match(Set mem (StoreP mem src));
6453
6454  ins_cost(125); // XXX
6455  format %{ "movq    $mem, $src\t# ptr" %}
6456  opcode(0xC7); /* C7 /0 */
6457  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6458  ins_pipe(ialu_mem_imm);
6459%}
6460
6461// Store Integer Immediate
6462instruct storeImmI(memory mem, immI src)
6463%{
6464  match(Set mem (StoreI mem src));
6465
6466  ins_cost(150);
6467  format %{ "movl    $mem, $src\t# int" %}
6468  opcode(0xC7); /* C7 /0 */
6469  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6470  ins_pipe(ialu_mem_imm);
6471%}
6472
6473// Store Long Immediate
6474instruct storeImmL(memory mem, immL32 src)
6475%{
6476  match(Set mem (StoreL mem src));
6477
6478  ins_cost(150);
6479  format %{ "movq    $mem, $src\t# long" %}
6480  opcode(0xC7); /* C7 /0 */
6481  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6482  ins_pipe(ialu_mem_imm);
6483%}
6484
6485// Store Short/Char Immediate
6486instruct storeImmI16(memory mem, immI16 src)
6487%{
6488  predicate(UseStoreImmI16);
6489  match(Set mem (StoreC mem src));
6490
6491  ins_cost(150);
6492  format %{ "movw    $mem, $src\t# short/char" %}
6493  opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6494  ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6495  ins_pipe(ialu_mem_imm);
6496%}
6497
6498// Store Byte Immediate
6499instruct storeImmB(memory mem, immI8 src)
6500%{
6501  match(Set mem (StoreB mem src));
6502
6503  ins_cost(150); // XXX
6504  format %{ "movb    $mem, $src\t# byte" %}
6505  opcode(0xC6); /* C6 /0 */
6506  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6507  ins_pipe(ialu_mem_imm);
6508%}
6509
6510// Store Aligned Packed Byte XMM register to memory
6511instruct storeA8B(memory mem, regD src) %{
6512  match(Set mem (Store8B mem src));
6513  ins_cost(145);
6514  format %{ "MOVQ  $mem,$src\t! packed8B" %}
6515  ins_encode( movq_st(mem, src));
6516  ins_pipe( pipe_slow );
6517%}
6518
6519// Store Aligned Packed Char/Short XMM register to memory
6520instruct storeA4C(memory mem, regD src) %{
6521  match(Set mem (Store4C mem src));
6522  ins_cost(145);
6523  format %{ "MOVQ  $mem,$src\t! packed4C" %}
6524  ins_encode( movq_st(mem, src));
6525  ins_pipe( pipe_slow );
6526%}
6527
6528// Store Aligned Packed Integer XMM register to memory
6529instruct storeA2I(memory mem, regD src) %{
6530  match(Set mem (Store2I mem src));
6531  ins_cost(145);
6532  format %{ "MOVQ  $mem,$src\t! packed2I" %}
6533  ins_encode( movq_st(mem, src));
6534  ins_pipe( pipe_slow );
6535%}
6536
6537// Store CMS card-mark Immediate
6538instruct storeImmCM0(memory mem, immI0 src)
6539%{
6540  match(Set mem (StoreCM mem src));
6541
6542  ins_cost(150); // XXX
6543  format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6544  opcode(0xC6); /* C6 /0 */
6545  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6546  ins_pipe(ialu_mem_imm);
6547%}
6548
6549// Store Aligned Packed Single Float XMM register to memory
6550instruct storeA2F(memory mem, regD src) %{
6551  match(Set mem (Store2F mem src));
6552  ins_cost(145);
6553  format %{ "MOVQ  $mem,$src\t! packed2F" %}
6554  ins_encode( movq_st(mem, src));
6555  ins_pipe( pipe_slow );
6556%}
6557
6558// Store Float
6559instruct storeF(memory mem, regF src)
6560%{
6561  match(Set mem (StoreF mem src));
6562
6563  ins_cost(95); // XXX
6564  format %{ "movss   $mem, $src\t# float" %}
6565  opcode(0xF3, 0x0F, 0x11);
6566  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
6567  ins_pipe(pipe_slow); // XXX
6568%}
6569
6570// Store immediate Float value (it is faster than store from XMM register)
6571instruct storeF_imm(memory mem, immF src)
6572%{
6573  match(Set mem (StoreF mem src));
6574
6575  ins_cost(50);
6576  format %{ "movl    $mem, $src\t# float" %}
6577  opcode(0xC7); /* C7 /0 */
6578  ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6579  ins_pipe(ialu_mem_imm);
6580%}
6581
6582// Store Double
6583instruct storeD(memory mem, regD src)
6584%{
6585  match(Set mem (StoreD mem src));
6586
6587  ins_cost(95); // XXX
6588  format %{ "movsd   $mem, $src\t# double" %}
6589  opcode(0xF2, 0x0F, 0x11);
6590  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
6591  ins_pipe(pipe_slow); // XXX
6592%}
6593
6594// Store immediate double 0.0 (it is faster than store from XMM register)
6595instruct storeD0_imm(memory mem, immD0 src)
6596%{
6597  match(Set mem (StoreD mem src));
6598
6599  ins_cost(50);
6600  format %{ "movq    $mem, $src\t# double 0." %}
6601  opcode(0xC7); /* C7 /0 */
6602  ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6603  ins_pipe(ialu_mem_imm);
6604%}
6605
6606instruct storeSSI(stackSlotI dst, rRegI src)
6607%{
6608  match(Set dst src);
6609
6610  ins_cost(100);
6611  format %{ "movl    $dst, $src\t# int stk" %}
6612  opcode(0x89);
6613  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6614  ins_pipe( ialu_mem_reg );
6615%}
6616
6617instruct storeSSL(stackSlotL dst, rRegL src)
6618%{
6619  match(Set dst src);
6620
6621  ins_cost(100);
6622  format %{ "movq    $dst, $src\t# long stk" %}
6623  opcode(0x89);
6624  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6625  ins_pipe(ialu_mem_reg);
6626%}
6627
6628instruct storeSSP(stackSlotP dst, rRegP src)
6629%{
6630  match(Set dst src);
6631
6632  ins_cost(100);
6633  format %{ "movq    $dst, $src\t# ptr stk" %}
6634  opcode(0x89);
6635  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6636  ins_pipe(ialu_mem_reg);
6637%}
6638
6639instruct storeSSF(stackSlotF dst, regF src)
6640%{
6641  match(Set dst src);
6642
6643  ins_cost(95); // XXX
6644  format %{ "movss   $dst, $src\t# float stk" %}
6645  opcode(0xF3, 0x0F, 0x11);
6646  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
6647  ins_pipe(pipe_slow); // XXX
6648%}
6649
6650instruct storeSSD(stackSlotD dst, regD src)
6651%{
6652  match(Set dst src);
6653
6654  ins_cost(95); // XXX
6655  format %{ "movsd   $dst, $src\t# double stk" %}
6656  opcode(0xF2, 0x0F, 0x11);
6657  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
6658  ins_pipe(pipe_slow); // XXX
6659%}
6660
6661//----------BSWAP Instructions-------------------------------------------------
6662instruct bytes_reverse_int(rRegI dst) %{
6663  match(Set dst (ReverseBytesI dst));
6664
6665  format %{ "bswapl  $dst" %}
6666  opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6667  ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6668  ins_pipe( ialu_reg );
6669%}
6670
6671instruct bytes_reverse_long(rRegL dst) %{
6672  match(Set dst (ReverseBytesL dst));
6673
6674  format %{ "bswapq  $dst" %}
6675
6676  opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6677  ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6678  ins_pipe( ialu_reg);
6679%}
6680
6681instruct loadI_reversed(rRegI dst, memory src) %{
6682  match(Set dst (ReverseBytesI (LoadI src)));
6683
6684  format %{ "bswap_movl $dst, $src" %}
6685  opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
6686  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
6687  ins_pipe( ialu_reg_mem );
6688%}
6689
6690instruct loadL_reversed(rRegL dst, memory src) %{
6691  match(Set dst (ReverseBytesL (LoadL src)));
6692
6693  format %{ "bswap_movq $dst, $src" %}
6694  opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
6695  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
6696  ins_pipe( ialu_reg_mem );
6697%}
6698
6699instruct storeI_reversed(memory dst, rRegI src) %{
6700  match(Set dst (StoreI dst (ReverseBytesI  src)));
6701
6702  format %{ "movl_bswap $dst, $src" %}
6703  opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
6704  ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
6705  ins_pipe( ialu_mem_reg );
6706%}
6707
6708instruct storeL_reversed(memory dst, rRegL src) %{
6709  match(Set dst (StoreL dst (ReverseBytesL  src)));
6710
6711  format %{ "movq_bswap $dst, $src" %}
6712  opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
6713  ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
6714  ins_pipe( ialu_mem_reg );
6715%}
6716
6717//----------MemBar Instructions-----------------------------------------------
6718// Memory barrier flavors
6719
6720instruct membar_acquire()
6721%{
6722  match(MemBarAcquire);
6723  ins_cost(0);
6724
6725  size(0);
6726  format %{ "MEMBAR-acquire" %}
6727  ins_encode();
6728  ins_pipe(empty);
6729%}
6730
6731instruct membar_acquire_lock()
6732%{
6733  match(MemBarAcquire);
6734  predicate(Matcher::prior_fast_lock(n));
6735  ins_cost(0);
6736
6737  size(0);
6738  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6739  ins_encode();
6740  ins_pipe(empty);
6741%}
6742
6743instruct membar_release()
6744%{
6745  match(MemBarRelease);
6746  ins_cost(0);
6747
6748  size(0);
6749  format %{ "MEMBAR-release" %}
6750  ins_encode();
6751  ins_pipe(empty);
6752%}
6753
6754instruct membar_release_lock()
6755%{
6756  match(MemBarRelease);
6757  predicate(Matcher::post_fast_unlock(n));
6758  ins_cost(0);
6759
6760  size(0);
6761  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6762  ins_encode();
6763  ins_pipe(empty);
6764%}
6765
6766instruct membar_volatile()
6767%{
6768  match(MemBarVolatile);
6769  ins_cost(400);
6770
6771  format %{ "MEMBAR-volatile" %}
6772  ins_encode(enc_membar_volatile);
6773  ins_pipe(pipe_slow);
6774%}
6775
6776instruct unnecessary_membar_volatile()
6777%{
6778  match(MemBarVolatile);
6779  predicate(Matcher::post_store_load_barrier(n));
6780  ins_cost(0);
6781
6782  size(0);
6783  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6784  ins_encode();
6785  ins_pipe(empty);
6786%}
6787
6788//----------Move Instructions--------------------------------------------------
6789
6790instruct castX2P(rRegP dst, rRegL src)
6791%{
6792  match(Set dst (CastX2P src));
6793
6794  format %{ "movq    $dst, $src\t# long->ptr" %}
6795  ins_encode(enc_copy_wide(dst, src));
6796  ins_pipe(ialu_reg_reg); // XXX
6797%}
6798
6799instruct castP2X(rRegL dst, rRegP src)
6800%{
6801  match(Set dst (CastP2X src));
6802
6803  format %{ "movq    $dst, $src\t# ptr -> long" %}
6804  ins_encode(enc_copy_wide(dst, src));
6805  ins_pipe(ialu_reg_reg); // XXX
6806%}
6807
6808//----------Conditional Move---------------------------------------------------
6809// Jump
6810// dummy instruction for generating temp registers
6811instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6812  match(Jump (LShiftL switch_val shift));
6813  ins_cost(350);
6814  predicate(false);
6815  effect(TEMP dest);
6816
6817  format %{ "leaq    $dest, table_base\n\t"
6818            "jmp     [$dest + $switch_val << $shift]\n\t" %}
6819  ins_encode(jump_enc_offset(switch_val, shift, dest));
6820  ins_pipe(pipe_jmp);
6821  ins_pc_relative(1);
6822%}
6823
6824instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6825  match(Jump (AddL (LShiftL switch_val shift) offset));
6826  ins_cost(350);
6827  effect(TEMP dest);
6828
6829  format %{ "leaq    $dest, table_base\n\t"
6830            "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6831  ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
6832  ins_pipe(pipe_jmp);
6833  ins_pc_relative(1);
6834%}
6835
6836instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6837  match(Jump switch_val);
6838  ins_cost(350);
6839  effect(TEMP dest);
6840
6841  format %{ "leaq    $dest, table_base\n\t"
6842            "jmp     [$dest + $switch_val]\n\t" %}
6843  ins_encode(jump_enc(switch_val, dest));
6844  ins_pipe(pipe_jmp);
6845  ins_pc_relative(1);
6846%}
6847
6848// Conditional move
6849instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6850%{
6851  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6852
6853  ins_cost(200); // XXX
6854  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6855  opcode(0x0F, 0x40);
6856  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6857  ins_pipe(pipe_cmov_reg);
6858%}
6859
6860instruct cmovI_regU(rRegI dst, rRegI src, rFlagsRegU cr, cmpOpU cop)
6861%{
6862  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6863
6864  ins_cost(200); // XXX
6865  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6866  opcode(0x0F, 0x40);
6867  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6868  ins_pipe(pipe_cmov_reg);
6869%}
6870
6871// Conditional move
6872instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src)
6873%{
6874  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6875
6876  ins_cost(250); // XXX
6877  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6878  opcode(0x0F, 0x40);
6879  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6880  ins_pipe(pipe_cmov_mem);
6881%}
6882
6883// Conditional move
6884instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6885%{
6886  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6887
6888  ins_cost(250); // XXX
6889  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6890  opcode(0x0F, 0x40);
6891  ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6892  ins_pipe(pipe_cmov_mem);
6893%}
6894
6895// Conditional move
6896instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6897%{
6898  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6899
6900  ins_cost(200); // XXX
6901  format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6902  opcode(0x0F, 0x40);
6903  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6904  ins_pipe(pipe_cmov_reg);  // XXX
6905%}
6906
6907// Conditional move
6908instruct cmovP_regU(rRegP dst, rRegP src, rFlagsRegU cr, cmpOpU cop)
6909%{
6910  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6911
6912  ins_cost(200); // XXX
6913  format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6914  opcode(0x0F, 0x40);
6915  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6916  ins_pipe(pipe_cmov_reg); // XXX
6917%}
6918
6919// DISABLED: Requires the ADLC to emit a bottom_type call that
6920// correctly meets the two pointer arguments; one is an incoming
6921// register but the other is a memory operand.  ALSO appears to
6922// be buggy with implicit null checks.
6923//
6924//// Conditional move
6925//instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6926//%{
6927//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6928//  ins_cost(250);
6929//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6930//  opcode(0x0F,0x40);
6931//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6932//  ins_pipe( pipe_cmov_mem );
6933//%}
6934//
6935//// Conditional move
6936//instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6937//%{
6938//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6939//  ins_cost(250);
6940//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6941//  opcode(0x0F,0x40);
6942//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6943//  ins_pipe( pipe_cmov_mem );
6944//%}
6945
6946instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6947%{
6948  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6949
6950  ins_cost(200); // XXX
6951  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6952  opcode(0x0F, 0x40);
6953  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6954  ins_pipe(pipe_cmov_reg);  // XXX
6955%}
6956
6957instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6958%{
6959  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6960
6961  ins_cost(200); // XXX
6962  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6963  opcode(0x0F, 0x40);
6964  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6965  ins_pipe(pipe_cmov_mem);  // XXX
6966%}
6967
6968instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6969%{
6970  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6971
6972  ins_cost(200); // XXX
6973  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6974  opcode(0x0F, 0x40);
6975  ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6976  ins_pipe(pipe_cmov_reg); // XXX
6977%}
6978
6979instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6980%{
6981  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6982
6983  ins_cost(200); // XXX
6984  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6985  opcode(0x0F, 0x40);
6986  ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6987  ins_pipe(pipe_cmov_mem); // XXX
6988%}
6989
6990instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6991%{
6992  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6993
6994  ins_cost(200); // XXX
6995  format %{ "jn$cop    skip\t# signed cmove float\n\t"
6996            "movss     $dst, $src\n"
6997    "skip:" %}
6998  ins_encode(enc_cmovf_branch(cop, dst, src));
6999  ins_pipe(pipe_slow);
7000%}
7001
7002// instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7003// %{
7004//   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7005
7006//   ins_cost(200); // XXX
7007//   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7008//             "movss     $dst, $src\n"
7009//     "skip:" %}
7010//   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7011//   ins_pipe(pipe_slow);
7012// %}
7013
7014instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7015%{
7016  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7017
7018  ins_cost(200); // XXX
7019  format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7020            "movss     $dst, $src\n"
7021    "skip:" %}
7022  ins_encode(enc_cmovf_branch(cop, dst, src));
7023  ins_pipe(pipe_slow);
7024%}
7025
7026instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7027%{
7028  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7029
7030  ins_cost(200); // XXX
7031  format %{ "jn$cop    skip\t# signed cmove double\n\t"
7032            "movsd     $dst, $src\n"
7033    "skip:" %}
7034  ins_encode(enc_cmovd_branch(cop, dst, src));
7035  ins_pipe(pipe_slow);
7036%}
7037
7038instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7039%{
7040  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7041
7042  ins_cost(200); // XXX
7043  format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7044            "movsd     $dst, $src\n"
7045    "skip:" %}
7046  ins_encode(enc_cmovd_branch(cop, dst, src));
7047  ins_pipe(pipe_slow);
7048%}
7049
7050//----------Arithmetic Instructions--------------------------------------------
7051//----------Addition Instructions----------------------------------------------
7052
7053instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7054%{
7055  match(Set dst (AddI dst src));
7056  effect(KILL cr);
7057
7058  format %{ "addl    $dst, $src\t# int" %}
7059  opcode(0x03);
7060  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7061  ins_pipe(ialu_reg_reg);
7062%}
7063
7064instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7065%{
7066  match(Set dst (AddI dst src));
7067  effect(KILL cr);
7068
7069  format %{ "addl    $dst, $src\t# int" %}
7070  opcode(0x81, 0x00); /* /0 id */
7071  ins_encode(OpcSErm(dst, src), Con8or32(src));
7072  ins_pipe( ialu_reg );
7073%}
7074
7075instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7076%{
7077  match(Set dst (AddI dst (LoadI src)));
7078  effect(KILL cr);
7079
7080  ins_cost(125); // XXX
7081  format %{ "addl    $dst, $src\t# int" %}
7082  opcode(0x03);
7083  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7084  ins_pipe(ialu_reg_mem);
7085%}
7086
7087instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7088%{
7089  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7090  effect(KILL cr);
7091
7092  ins_cost(150); // XXX
7093  format %{ "addl    $dst, $src\t# int" %}
7094  opcode(0x01); /* Opcode 01 /r */
7095  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7096  ins_pipe(ialu_mem_reg);
7097%}
7098
7099instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7100%{
7101  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7102  effect(KILL cr);
7103
7104  ins_cost(125); // XXX
7105  format %{ "addl    $dst, $src\t# int" %}
7106  opcode(0x81); /* Opcode 81 /0 id */
7107  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7108  ins_pipe(ialu_mem_imm);
7109%}
7110
7111instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7112%{
7113  predicate(UseIncDec);
7114  match(Set dst (AddI dst src));
7115  effect(KILL cr);
7116
7117  format %{ "incl    $dst\t# int" %}
7118  opcode(0xFF, 0x00); // FF /0
7119  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7120  ins_pipe(ialu_reg);
7121%}
7122
7123instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7124%{
7125  predicate(UseIncDec);
7126  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7127  effect(KILL cr);
7128
7129  ins_cost(125); // XXX
7130  format %{ "incl    $dst\t# int" %}
7131  opcode(0xFF); /* Opcode FF /0 */
7132  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7133  ins_pipe(ialu_mem_imm);
7134%}
7135
7136// XXX why does that use AddI
7137instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7138%{
7139  predicate(UseIncDec);
7140  match(Set dst (AddI dst src));
7141  effect(KILL cr);
7142
7143  format %{ "decl    $dst\t# int" %}
7144  opcode(0xFF, 0x01); // FF /1
7145  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7146  ins_pipe(ialu_reg);
7147%}
7148
7149// XXX why does that use AddI
7150instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7151%{
7152  predicate(UseIncDec);
7153  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7154  effect(KILL cr);
7155
7156  ins_cost(125); // XXX
7157  format %{ "decl    $dst\t# int" %}
7158  opcode(0xFF); /* Opcode FF /1 */
7159  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7160  ins_pipe(ialu_mem_imm);
7161%}
7162
7163instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7164%{
7165  match(Set dst (AddI src0 src1));
7166
7167  ins_cost(110);
7168  format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7169  opcode(0x8D); /* 0x8D /r */
7170  ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7171  ins_pipe(ialu_reg_reg);
7172%}
7173
7174instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7175%{
7176  match(Set dst (AddL dst src));
7177  effect(KILL cr);
7178
7179  format %{ "addq    $dst, $src\t# long" %}
7180  opcode(0x03);
7181  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7182  ins_pipe(ialu_reg_reg);
7183%}
7184
7185instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7186%{
7187  match(Set dst (AddL dst src));
7188  effect(KILL cr);
7189
7190  format %{ "addq    $dst, $src\t# long" %}
7191  opcode(0x81, 0x00); /* /0 id */
7192  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7193  ins_pipe( ialu_reg );
7194%}
7195
7196instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7197%{
7198  match(Set dst (AddL dst (LoadL src)));
7199  effect(KILL cr);
7200
7201  ins_cost(125); // XXX
7202  format %{ "addq    $dst, $src\t# long" %}
7203  opcode(0x03);
7204  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7205  ins_pipe(ialu_reg_mem);
7206%}
7207
7208instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7209%{
7210  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7211  effect(KILL cr);
7212
7213  ins_cost(150); // XXX
7214  format %{ "addq    $dst, $src\t# long" %}
7215  opcode(0x01); /* Opcode 01 /r */
7216  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7217  ins_pipe(ialu_mem_reg);
7218%}
7219
7220instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7221%{
7222  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7223  effect(KILL cr);
7224
7225  ins_cost(125); // XXX
7226  format %{ "addq    $dst, $src\t# long" %}
7227  opcode(0x81); /* Opcode 81 /0 id */
7228  ins_encode(REX_mem_wide(dst),
7229             OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7230  ins_pipe(ialu_mem_imm);
7231%}
7232
7233instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7234%{
7235  predicate(UseIncDec);
7236  match(Set dst (AddL dst src));
7237  effect(KILL cr);
7238
7239  format %{ "incq    $dst\t# long" %}
7240  opcode(0xFF, 0x00); // FF /0
7241  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7242  ins_pipe(ialu_reg);
7243%}
7244
7245instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7246%{
7247  predicate(UseIncDec);
7248  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7249  effect(KILL cr);
7250
7251  ins_cost(125); // XXX
7252  format %{ "incq    $dst\t# long" %}
7253  opcode(0xFF); /* Opcode FF /0 */
7254  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7255  ins_pipe(ialu_mem_imm);
7256%}
7257
7258// XXX why does that use AddL
7259instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7260%{
7261  predicate(UseIncDec);
7262  match(Set dst (AddL dst src));
7263  effect(KILL cr);
7264
7265  format %{ "decq    $dst\t# long" %}
7266  opcode(0xFF, 0x01); // FF /1
7267  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7268  ins_pipe(ialu_reg);
7269%}
7270
7271// XXX why does that use AddL
7272instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7273%{
7274  predicate(UseIncDec);
7275  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7276  effect(KILL cr);
7277
7278  ins_cost(125); // XXX
7279  format %{ "decq    $dst\t# long" %}
7280  opcode(0xFF); /* Opcode FF /1 */
7281  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7282  ins_pipe(ialu_mem_imm);
7283%}
7284
7285instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7286%{
7287  match(Set dst (AddL src0 src1));
7288
7289  ins_cost(110);
7290  format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7291  opcode(0x8D); /* 0x8D /r */
7292  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7293  ins_pipe(ialu_reg_reg);
7294%}
7295
7296instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7297%{
7298  match(Set dst (AddP dst src));
7299  effect(KILL cr);
7300
7301  format %{ "addq    $dst, $src\t# ptr" %}
7302  opcode(0x03);
7303  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7304  ins_pipe(ialu_reg_reg);
7305%}
7306
7307instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7308%{
7309  match(Set dst (AddP dst src));
7310  effect(KILL cr);
7311
7312  format %{ "addq    $dst, $src\t# ptr" %}
7313  opcode(0x81, 0x00); /* /0 id */
7314  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7315  ins_pipe( ialu_reg );
7316%}
7317
7318// XXX addP mem ops ????
7319
7320instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7321%{
7322  match(Set dst (AddP src0 src1));
7323
7324  ins_cost(110);
7325  format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7326  opcode(0x8D); /* 0x8D /r */
7327  ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7328  ins_pipe(ialu_reg_reg);
7329%}
7330
7331instruct checkCastPP(rRegP dst)
7332%{
7333  match(Set dst (CheckCastPP dst));
7334
7335  size(0);
7336  format %{ "# checkcastPP of $dst" %}
7337  ins_encode(/* empty encoding */);
7338  ins_pipe(empty);
7339%}
7340
7341instruct castPP(rRegP dst)
7342%{
7343  match(Set dst (CastPP dst));
7344
7345  size(0);
7346  format %{ "# castPP of $dst" %}
7347  ins_encode(/* empty encoding */);
7348  ins_pipe(empty);
7349%}
7350
7351instruct castII(rRegI dst)
7352%{
7353  match(Set dst (CastII dst));
7354
7355  size(0);
7356  format %{ "# castII of $dst" %}
7357  ins_encode(/* empty encoding */);
7358  ins_cost(0);
7359  ins_pipe(empty);
7360%}
7361
7362// LoadP-locked same as a regular LoadP when used with compare-swap
7363instruct loadPLocked(rRegP dst, memory mem)
7364%{
7365  match(Set dst (LoadPLocked mem));
7366
7367  ins_cost(125); // XXX
7368  format %{ "movq    $dst, $mem\t# ptr locked" %}
7369  opcode(0x8B);
7370  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7371  ins_pipe(ialu_reg_mem); // XXX
7372%}
7373
7374// LoadL-locked - same as a regular LoadL when used with compare-swap
7375instruct loadLLocked(rRegL dst, memory mem)
7376%{
7377  match(Set dst (LoadLLocked mem));
7378
7379  ins_cost(125); // XXX
7380  format %{ "movq    $dst, $mem\t# long locked" %}
7381  opcode(0x8B);
7382  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7383  ins_pipe(ialu_reg_mem); // XXX
7384%}
7385
7386// Conditional-store of the updated heap-top.
7387// Used during allocation of the shared heap.
7388// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7389
7390instruct storePConditional(memory heap_top_ptr,
7391                           rax_RegP oldval, rRegP newval,
7392                           rFlagsReg cr)
7393%{
7394  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7395
7396  format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7397            "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7398  opcode(0x0F, 0xB1);
7399  ins_encode(lock_prefix,
7400             REX_reg_mem_wide(newval, heap_top_ptr),
7401             OpcP, OpcS,
7402             reg_mem(newval, heap_top_ptr));
7403  ins_pipe(pipe_cmpxchg);
7404%}
7405
7406// Conditional-store of a long value
7407// Returns a boolean value (0/1) on success.  Implemented with a
7408// CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
7409
7410instruct storeLConditional(rRegI res,
7411                           memory mem_ptr,
7412                           rax_RegL oldval, rRegL newval,
7413                           rFlagsReg cr)
7414%{
7415  match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
7416  effect(KILL cr);
7417
7418  format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
7419            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7420            "sete    $res\n\t"
7421            "movzbl  $res, $res" %}
7422  opcode(0x0F, 0xB1);
7423  ins_encode(lock_prefix,
7424             REX_reg_mem_wide(newval, mem_ptr),
7425             OpcP, OpcS,
7426             reg_mem(newval, mem_ptr),
7427             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7428             REX_reg_breg(res, res), // movzbl
7429             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7430  ins_pipe(pipe_cmpxchg);
7431%}
7432
7433// Conditional-store of a long value
7434// ZF flag is set on success, reset otherwise. Implemented with a
7435// CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
7436instruct storeLConditional_flags(memory mem_ptr,
7437                                 rax_RegL oldval, rRegL newval,
7438                                 rFlagsReg cr,
7439                                 immI0 zero)
7440%{
7441  match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
7442
7443  format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
7444            "If rax == $mem_ptr then store $newval into $mem_ptr" %}
7445  opcode(0x0F, 0xB1);
7446  ins_encode(lock_prefix,
7447             REX_reg_mem_wide(newval, mem_ptr),
7448             OpcP, OpcS,
7449             reg_mem(newval, mem_ptr));
7450  ins_pipe(pipe_cmpxchg);
7451%}
7452
7453instruct compareAndSwapP(rRegI res,
7454                         memory mem_ptr,
7455                         rax_RegP oldval, rRegP newval,
7456                         rFlagsReg cr)
7457%{
7458  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7459  effect(KILL cr, KILL oldval);
7460
7461  format %{ "cmpxchgq $mem_ptr,$newval\t# "
7462            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7463            "sete    $res\n\t"
7464            "movzbl  $res, $res" %}
7465  opcode(0x0F, 0xB1);
7466  ins_encode(lock_prefix,
7467             REX_reg_mem_wide(newval, mem_ptr),
7468             OpcP, OpcS,
7469             reg_mem(newval, mem_ptr),
7470             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7471             REX_reg_breg(res, res), // movzbl
7472             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7473  ins_pipe( pipe_cmpxchg );
7474%}
7475
7476// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7477instruct compareAndSwapL(rRegI res,
7478                         memory mem_ptr,
7479                         rax_RegL oldval, rRegL newval,
7480                         rFlagsReg cr)
7481%{
7482  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7483  effect(KILL cr, KILL oldval);
7484
7485  format %{ "cmpxchgq $mem_ptr,$newval\t# "
7486            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7487            "sete    $res\n\t"
7488            "movzbl  $res, $res" %}
7489  opcode(0x0F, 0xB1);
7490  ins_encode(lock_prefix,
7491             REX_reg_mem_wide(newval, mem_ptr),
7492             OpcP, OpcS,
7493             reg_mem(newval, mem_ptr),
7494             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7495             REX_reg_breg(res, res), // movzbl
7496             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7497  ins_pipe( pipe_cmpxchg );
7498%}
7499
7500instruct compareAndSwapI(rRegI res,
7501                         memory mem_ptr,
7502                         rax_RegI oldval, rRegI newval,
7503                         rFlagsReg cr)
7504%{
7505  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7506  effect(KILL cr, KILL oldval);
7507
7508  format %{ "cmpxchgl $mem_ptr,$newval\t# "
7509            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7510            "sete    $res\n\t"
7511            "movzbl  $res, $res" %}
7512  opcode(0x0F, 0xB1);
7513  ins_encode(lock_prefix,
7514             REX_reg_mem(newval, mem_ptr),
7515             OpcP, OpcS,
7516             reg_mem(newval, mem_ptr),
7517             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7518             REX_reg_breg(res, res), // movzbl
7519             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7520  ins_pipe( pipe_cmpxchg );
7521%}
7522
7523
7524//----------Subtraction Instructions-------------------------------------------
7525
7526// Integer Subtraction Instructions
7527instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7528%{
7529  match(Set dst (SubI dst src));
7530  effect(KILL cr);
7531
7532  format %{ "subl    $dst, $src\t# int" %}
7533  opcode(0x2B);
7534  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7535  ins_pipe(ialu_reg_reg);
7536%}
7537
7538instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7539%{
7540  match(Set dst (SubI dst src));
7541  effect(KILL cr);
7542
7543  format %{ "subl    $dst, $src\t# int" %}
7544  opcode(0x81, 0x05);  /* Opcode 81 /5 */
7545  ins_encode(OpcSErm(dst, src), Con8or32(src));
7546  ins_pipe(ialu_reg);
7547%}
7548
7549instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7550%{
7551  match(Set dst (SubI dst (LoadI src)));
7552  effect(KILL cr);
7553
7554  ins_cost(125);
7555  format %{ "subl    $dst, $src\t# int" %}
7556  opcode(0x2B);
7557  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7558  ins_pipe(ialu_reg_mem);
7559%}
7560
7561instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7562%{
7563  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7564  effect(KILL cr);
7565
7566  ins_cost(150);
7567  format %{ "subl    $dst, $src\t# int" %}
7568  opcode(0x29); /* Opcode 29 /r */
7569  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7570  ins_pipe(ialu_mem_reg);
7571%}
7572
7573instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7574%{
7575  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7576  effect(KILL cr);
7577
7578  ins_cost(125); // XXX
7579  format %{ "subl    $dst, $src\t# int" %}
7580  opcode(0x81); /* Opcode 81 /5 id */
7581  ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7582  ins_pipe(ialu_mem_imm);
7583%}
7584
7585instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7586%{
7587  match(Set dst (SubL dst src));
7588  effect(KILL cr);
7589
7590  format %{ "subq    $dst, $src\t# long" %}
7591  opcode(0x2B);
7592  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7593  ins_pipe(ialu_reg_reg);
7594%}
7595
7596instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7597%{
7598  match(Set dst (SubL dst src));
7599  effect(KILL cr);
7600
7601  format %{ "subq    $dst, $src\t# long" %}
7602  opcode(0x81, 0x05);  /* Opcode 81 /5 */
7603  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7604  ins_pipe(ialu_reg);
7605%}
7606
7607instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7608%{
7609  match(Set dst (SubL dst (LoadL src)));
7610  effect(KILL cr);
7611
7612  ins_cost(125);
7613  format %{ "subq    $dst, $src\t# long" %}
7614  opcode(0x2B);
7615  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7616  ins_pipe(ialu_reg_mem);
7617%}
7618
7619instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7620%{
7621  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7622  effect(KILL cr);
7623
7624  ins_cost(150);
7625  format %{ "subq    $dst, $src\t# long" %}
7626  opcode(0x29); /* Opcode 29 /r */
7627  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7628  ins_pipe(ialu_mem_reg);
7629%}
7630
7631instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7632%{
7633  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7634  effect(KILL cr);
7635
7636  ins_cost(125); // XXX
7637  format %{ "subq    $dst, $src\t# long" %}
7638  opcode(0x81); /* Opcode 81 /5 id */
7639  ins_encode(REX_mem_wide(dst),
7640             OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7641  ins_pipe(ialu_mem_imm);
7642%}
7643
7644// Subtract from a pointer
7645// XXX hmpf???
7646instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7647%{
7648  match(Set dst (AddP dst (SubI zero src)));
7649  effect(KILL cr);
7650
7651  format %{ "subq    $dst, $src\t# ptr - int" %}
7652  opcode(0x2B);
7653  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7654  ins_pipe(ialu_reg_reg);
7655%}
7656
7657instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7658%{
7659  match(Set dst (SubI zero dst));
7660  effect(KILL cr);
7661
7662  format %{ "negl    $dst\t# int" %}
7663  opcode(0xF7, 0x03);  // Opcode F7 /3
7664  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7665  ins_pipe(ialu_reg);
7666%}
7667
7668instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7669%{
7670  match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7671  effect(KILL cr);
7672
7673  format %{ "negl    $dst\t# int" %}
7674  opcode(0xF7, 0x03);  // Opcode F7 /3
7675  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7676  ins_pipe(ialu_reg);
7677%}
7678
7679instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7680%{
7681  match(Set dst (SubL zero dst));
7682  effect(KILL cr);
7683
7684  format %{ "negq    $dst\t# long" %}
7685  opcode(0xF7, 0x03);  // Opcode F7 /3
7686  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7687  ins_pipe(ialu_reg);
7688%}
7689
7690instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7691%{
7692  match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7693  effect(KILL cr);
7694
7695  format %{ "negq    $dst\t# long" %}
7696  opcode(0xF7, 0x03);  // Opcode F7 /3
7697  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7698  ins_pipe(ialu_reg);
7699%}
7700
7701
7702//----------Multiplication/Division Instructions-------------------------------
7703// Integer Multiplication Instructions
7704// Multiply Register
7705
7706instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7707%{
7708  match(Set dst (MulI dst src));
7709  effect(KILL cr);
7710
7711  ins_cost(300);
7712  format %{ "imull   $dst, $src\t# int" %}
7713  opcode(0x0F, 0xAF);
7714  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7715  ins_pipe(ialu_reg_reg_alu0);
7716%}
7717
7718instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7719%{
7720  match(Set dst (MulI src imm));
7721  effect(KILL cr);
7722
7723  ins_cost(300);
7724  format %{ "imull   $dst, $src, $imm\t# int" %}
7725  opcode(0x69); /* 69 /r id */
7726  ins_encode(REX_reg_reg(dst, src),
7727             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7728  ins_pipe(ialu_reg_reg_alu0);
7729%}
7730
7731instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7732%{
7733  match(Set dst (MulI dst (LoadI src)));
7734  effect(KILL cr);
7735
7736  ins_cost(350);
7737  format %{ "imull   $dst, $src\t# int" %}
7738  opcode(0x0F, 0xAF);
7739  ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7740  ins_pipe(ialu_reg_mem_alu0);
7741%}
7742
7743instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7744%{
7745  match(Set dst (MulI (LoadI src) imm));
7746  effect(KILL cr);
7747
7748  ins_cost(300);
7749  format %{ "imull   $dst, $src, $imm\t# int" %}
7750  opcode(0x69); /* 69 /r id */
7751  ins_encode(REX_reg_mem(dst, src),
7752             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7753  ins_pipe(ialu_reg_mem_alu0);
7754%}
7755
7756instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7757%{
7758  match(Set dst (MulL dst src));
7759  effect(KILL cr);
7760
7761  ins_cost(300);
7762  format %{ "imulq   $dst, $src\t# long" %}
7763  opcode(0x0F, 0xAF);
7764  ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7765  ins_pipe(ialu_reg_reg_alu0);
7766%}
7767
7768instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7769%{
7770  match(Set dst (MulL src imm));
7771  effect(KILL cr);
7772
7773  ins_cost(300);
7774  format %{ "imulq   $dst, $src, $imm\t# long" %}
7775  opcode(0x69); /* 69 /r id */
7776  ins_encode(REX_reg_reg_wide(dst, src),
7777             OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7778  ins_pipe(ialu_reg_reg_alu0);
7779%}
7780
7781instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7782%{
7783  match(Set dst (MulL dst (LoadL src)));
7784  effect(KILL cr);
7785
7786  ins_cost(350);
7787  format %{ "imulq   $dst, $src\t# long" %}
7788  opcode(0x0F, 0xAF);
7789  ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7790  ins_pipe(ialu_reg_mem_alu0);
7791%}
7792
7793instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7794%{
7795  match(Set dst (MulL (LoadL src) imm));
7796  effect(KILL cr);
7797
7798  ins_cost(300);
7799  format %{ "imulq   $dst, $src, $imm\t# long" %}
7800  opcode(0x69); /* 69 /r id */
7801  ins_encode(REX_reg_mem_wide(dst, src),
7802             OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7803  ins_pipe(ialu_reg_mem_alu0);
7804%}
7805
7806instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7807                   rFlagsReg cr)
7808%{
7809  match(Set rax (DivI rax div));
7810  effect(KILL rdx, KILL cr);
7811
7812  ins_cost(30*100+10*100); // XXX
7813  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7814            "jne,s   normal\n\t"
7815            "xorl    rdx, rdx\n\t"
7816            "cmpl    $div, -1\n\t"
7817            "je,s    done\n"
7818    "normal: cdql\n\t"
7819            "idivl   $div\n"
7820    "done:"        %}
7821  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7822  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7823  ins_pipe(ialu_reg_reg_alu0);
7824%}
7825
7826instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7827                   rFlagsReg cr)
7828%{
7829  match(Set rax (DivL rax div));
7830  effect(KILL rdx, KILL cr);
7831
7832  ins_cost(30*100+10*100); // XXX
7833  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7834            "cmpq    rax, rdx\n\t"
7835            "jne,s   normal\n\t"
7836            "xorl    rdx, rdx\n\t"
7837            "cmpq    $div, -1\n\t"
7838            "je,s    done\n"
7839    "normal: cdqq\n\t"
7840            "idivq   $div\n"
7841    "done:"        %}
7842  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7843  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7844  ins_pipe(ialu_reg_reg_alu0);
7845%}
7846
7847// Integer DIVMOD with Register, both quotient and mod results
7848instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
7849                             rFlagsReg cr)
7850%{
7851  match(DivModI rax div);
7852  effect(KILL cr);
7853
7854  ins_cost(30*100+10*100); // XXX
7855  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
7856            "jne,s   normal\n\t"
7857            "xorl    rdx, rdx\n\t"
7858            "cmpl    $div, -1\n\t"
7859            "je,s    done\n"
7860    "normal: cdql\n\t"
7861            "idivl   $div\n"
7862    "done:"        %}
7863  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7864  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7865  ins_pipe(pipe_slow);
7866%}
7867
7868// Long DIVMOD with Register, both quotient and mod results
7869instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
7870                             rFlagsReg cr)
7871%{
7872  match(DivModL rax div);
7873  effect(KILL cr);
7874
7875  ins_cost(30*100+10*100); // XXX
7876  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
7877            "cmpq    rax, rdx\n\t"
7878            "jne,s   normal\n\t"
7879            "xorl    rdx, rdx\n\t"
7880            "cmpq    $div, -1\n\t"
7881            "je,s    done\n"
7882    "normal: cdqq\n\t"
7883            "idivq   $div\n"
7884    "done:"        %}
7885  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7886  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7887  ins_pipe(pipe_slow);
7888%}
7889
7890//----------- DivL-By-Constant-Expansions--------------------------------------
7891// DivI cases are handled by the compiler
7892
7893// Magic constant, reciprical of 10
7894instruct loadConL_0x6666666666666667(rRegL dst)
7895%{
7896  effect(DEF dst);
7897
7898  format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
7899  ins_encode(load_immL(dst, 0x6666666666666667));
7900  ins_pipe(ialu_reg);
7901%}
7902
7903instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7904%{
7905  effect(DEF dst, USE src, USE_KILL rax, KILL cr);
7906
7907  format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
7908  opcode(0xF7, 0x5); /* Opcode F7 /5 */
7909  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7910  ins_pipe(ialu_reg_reg_alu0);
7911%}
7912
7913instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
7914%{
7915  effect(USE_DEF dst, KILL cr);
7916
7917  format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
7918  opcode(0xC1, 0x7); /* C1 /7 ib */
7919  ins_encode(reg_opc_imm_wide(dst, 0x3F));
7920  ins_pipe(ialu_reg);
7921%}
7922
7923instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
7924%{
7925  effect(USE_DEF dst, KILL cr);
7926
7927  format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
7928  opcode(0xC1, 0x7); /* C1 /7 ib */
7929  ins_encode(reg_opc_imm_wide(dst, 0x2));
7930  ins_pipe(ialu_reg);
7931%}
7932
7933instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
7934%{
7935  match(Set dst (DivL src div));
7936
7937  ins_cost((5+8)*100);
7938  expand %{
7939    rax_RegL rax;                     // Killed temp
7940    rFlagsReg cr;                     // Killed
7941    loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
7942    mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
7943    sarL_rReg_63(src, cr);            // sarq  src, 63
7944    sarL_rReg_2(dst, cr);             // sarq  rdx, 2
7945    subL_rReg(dst, src, cr);          // subl  rdx, src
7946  %}
7947%}
7948
7949//-----------------------------------------------------------------------------
7950
7951instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
7952                   rFlagsReg cr)
7953%{
7954  match(Set rdx (ModI rax div));
7955  effect(KILL rax, KILL cr);
7956
7957  ins_cost(300); // XXX
7958  format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
7959            "jne,s   normal\n\t"
7960            "xorl    rdx, rdx\n\t"
7961            "cmpl    $div, -1\n\t"
7962            "je,s    done\n"
7963    "normal: cdql\n\t"
7964            "idivl   $div\n"
7965    "done:"        %}
7966  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7967  ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
7968  ins_pipe(ialu_reg_reg_alu0);
7969%}
7970
7971instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
7972                   rFlagsReg cr)
7973%{
7974  match(Set rdx (ModL rax div));
7975  effect(KILL rax, KILL cr);
7976
7977  ins_cost(300); // XXX
7978  format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
7979            "cmpq    rax, rdx\n\t"
7980            "jne,s   normal\n\t"
7981            "xorl    rdx, rdx\n\t"
7982            "cmpq    $div, -1\n\t"
7983            "je,s    done\n"
7984    "normal: cdqq\n\t"
7985            "idivq   $div\n"
7986    "done:"        %}
7987  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7988  ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
7989  ins_pipe(ialu_reg_reg_alu0);
7990%}
7991
7992// Integer Shift Instructions
7993// Shift Left by one
7994instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
7995%{
7996  match(Set dst (LShiftI dst shift));
7997  effect(KILL cr);
7998
7999  format %{ "sall    $dst, $shift" %}
8000  opcode(0xD1, 0x4); /* D1 /4 */
8001  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8002  ins_pipe(ialu_reg);
8003%}
8004
8005// Shift Left by one
8006instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8007%{
8008  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8009  effect(KILL cr);
8010
8011  format %{ "sall    $dst, $shift\t" %}
8012  opcode(0xD1, 0x4); /* D1 /4 */
8013  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8014  ins_pipe(ialu_mem_imm);
8015%}
8016
8017// Shift Left by 8-bit immediate
8018instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8019%{
8020  match(Set dst (LShiftI dst shift));
8021  effect(KILL cr);
8022
8023  format %{ "sall    $dst, $shift" %}
8024  opcode(0xC1, 0x4); /* C1 /4 ib */
8025  ins_encode(reg_opc_imm(dst, shift));
8026  ins_pipe(ialu_reg);
8027%}
8028
8029// Shift Left by 8-bit immediate
8030instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8031%{
8032  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8033  effect(KILL cr);
8034
8035  format %{ "sall    $dst, $shift" %}
8036  opcode(0xC1, 0x4); /* C1 /4 ib */
8037  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8038  ins_pipe(ialu_mem_imm);
8039%}
8040
8041// Shift Left by variable
8042instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8043%{
8044  match(Set dst (LShiftI dst shift));
8045  effect(KILL cr);
8046
8047  format %{ "sall    $dst, $shift" %}
8048  opcode(0xD3, 0x4); /* D3 /4 */
8049  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8050  ins_pipe(ialu_reg_reg);
8051%}
8052
8053// Shift Left by variable
8054instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8055%{
8056  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8057  effect(KILL cr);
8058
8059  format %{ "sall    $dst, $shift" %}
8060  opcode(0xD3, 0x4); /* D3 /4 */
8061  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8062  ins_pipe(ialu_mem_reg);
8063%}
8064
8065// Arithmetic shift right by one
8066instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8067%{
8068  match(Set dst (RShiftI dst shift));
8069  effect(KILL cr);
8070
8071  format %{ "sarl    $dst, $shift" %}
8072  opcode(0xD1, 0x7); /* D1 /7 */
8073  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8074  ins_pipe(ialu_reg);
8075%}
8076
8077// Arithmetic shift right by one
8078instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8079%{
8080  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8081  effect(KILL cr);
8082
8083  format %{ "sarl    $dst, $shift" %}
8084  opcode(0xD1, 0x7); /* D1 /7 */
8085  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8086  ins_pipe(ialu_mem_imm);
8087%}
8088
8089// Arithmetic Shift Right by 8-bit immediate
8090instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8091%{
8092  match(Set dst (RShiftI dst shift));
8093  effect(KILL cr);
8094
8095  format %{ "sarl    $dst, $shift" %}
8096  opcode(0xC1, 0x7); /* C1 /7 ib */
8097  ins_encode(reg_opc_imm(dst, shift));
8098  ins_pipe(ialu_mem_imm);
8099%}
8100
8101// Arithmetic Shift Right by 8-bit immediate
8102instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8103%{
8104  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8105  effect(KILL cr);
8106
8107  format %{ "sarl    $dst, $shift" %}
8108  opcode(0xC1, 0x7); /* C1 /7 ib */
8109  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8110  ins_pipe(ialu_mem_imm);
8111%}
8112
8113// Arithmetic Shift Right by variable
8114instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8115%{
8116  match(Set dst (RShiftI dst shift));
8117  effect(KILL cr);
8118
8119  format %{ "sarl    $dst, $shift" %}
8120  opcode(0xD3, 0x7); /* D3 /7 */
8121  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8122  ins_pipe(ialu_reg_reg);
8123%}
8124
8125// Arithmetic Shift Right by variable
8126instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8127%{
8128  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8129  effect(KILL cr);
8130
8131  format %{ "sarl    $dst, $shift" %}
8132  opcode(0xD3, 0x7); /* D3 /7 */
8133  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8134  ins_pipe(ialu_mem_reg);
8135%}
8136
8137// Logical shift right by one
8138instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8139%{
8140  match(Set dst (URShiftI dst shift));
8141  effect(KILL cr);
8142
8143  format %{ "shrl    $dst, $shift" %}
8144  opcode(0xD1, 0x5); /* D1 /5 */
8145  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8146  ins_pipe(ialu_reg);
8147%}
8148
8149// Logical shift right by one
8150instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8151%{
8152  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8153  effect(KILL cr);
8154
8155  format %{ "shrl    $dst, $shift" %}
8156  opcode(0xD1, 0x5); /* D1 /5 */
8157  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8158  ins_pipe(ialu_mem_imm);
8159%}
8160
8161// Logical Shift Right by 8-bit immediate
8162instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8163%{
8164  match(Set dst (URShiftI dst shift));
8165  effect(KILL cr);
8166
8167  format %{ "shrl    $dst, $shift" %}
8168  opcode(0xC1, 0x5); /* C1 /5 ib */
8169  ins_encode(reg_opc_imm(dst, shift));
8170  ins_pipe(ialu_reg);
8171%}
8172
8173// Logical Shift Right by 8-bit immediate
8174instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8175%{
8176  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8177  effect(KILL cr);
8178
8179  format %{ "shrl    $dst, $shift" %}
8180  opcode(0xC1, 0x5); /* C1 /5 ib */
8181  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8182  ins_pipe(ialu_mem_imm);
8183%}
8184
8185// Logical Shift Right by variable
8186instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8187%{
8188  match(Set dst (URShiftI dst shift));
8189  effect(KILL cr);
8190
8191  format %{ "shrl    $dst, $shift" %}
8192  opcode(0xD3, 0x5); /* D3 /5 */
8193  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8194  ins_pipe(ialu_reg_reg);
8195%}
8196
8197// Logical Shift Right by variable
8198instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8199%{
8200  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8201  effect(KILL cr);
8202
8203  format %{ "shrl    $dst, $shift" %}
8204  opcode(0xD3, 0x5); /* D3 /5 */
8205  ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8206  ins_pipe(ialu_mem_reg);
8207%}
8208
8209// Long Shift Instructions
8210// Shift Left by one
8211instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8212%{
8213  match(Set dst (LShiftL dst shift));
8214  effect(KILL cr);
8215
8216  format %{ "salq    $dst, $shift" %}
8217  opcode(0xD1, 0x4); /* D1 /4 */
8218  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8219  ins_pipe(ialu_reg);
8220%}
8221
8222// Shift Left by one
8223instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8224%{
8225  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8226  effect(KILL cr);
8227
8228  format %{ "salq    $dst, $shift" %}
8229  opcode(0xD1, 0x4); /* D1 /4 */
8230  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8231  ins_pipe(ialu_mem_imm);
8232%}
8233
8234// Shift Left by 8-bit immediate
8235instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8236%{
8237  match(Set dst (LShiftL dst shift));
8238  effect(KILL cr);
8239
8240  format %{ "salq    $dst, $shift" %}
8241  opcode(0xC1, 0x4); /* C1 /4 ib */
8242  ins_encode(reg_opc_imm_wide(dst, shift));
8243  ins_pipe(ialu_reg);
8244%}
8245
8246// Shift Left by 8-bit immediate
8247instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8248%{
8249  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8250  effect(KILL cr);
8251
8252  format %{ "salq    $dst, $shift" %}
8253  opcode(0xC1, 0x4); /* C1 /4 ib */
8254  ins_encode(REX_mem_wide(dst), OpcP,
8255             RM_opc_mem(secondary, dst), Con8or32(shift));
8256  ins_pipe(ialu_mem_imm);
8257%}
8258
8259// Shift Left by variable
8260instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8261%{
8262  match(Set dst (LShiftL dst shift));
8263  effect(KILL cr);
8264
8265  format %{ "salq    $dst, $shift" %}
8266  opcode(0xD3, 0x4); /* D3 /4 */
8267  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8268  ins_pipe(ialu_reg_reg);
8269%}
8270
8271// Shift Left by variable
8272instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8273%{
8274  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8275  effect(KILL cr);
8276
8277  format %{ "salq    $dst, $shift" %}
8278  opcode(0xD3, 0x4); /* D3 /4 */
8279  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8280  ins_pipe(ialu_mem_reg);
8281%}
8282
8283// Arithmetic shift right by one
8284instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8285%{
8286  match(Set dst (RShiftL dst shift));
8287  effect(KILL cr);
8288
8289  format %{ "sarq    $dst, $shift" %}
8290  opcode(0xD1, 0x7); /* D1 /7 */
8291  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8292  ins_pipe(ialu_reg);
8293%}
8294
8295// Arithmetic shift right by one
8296instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8297%{
8298  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8299  effect(KILL cr);
8300
8301  format %{ "sarq    $dst, $shift" %}
8302  opcode(0xD1, 0x7); /* D1 /7 */
8303  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8304  ins_pipe(ialu_mem_imm);
8305%}
8306
8307// Arithmetic Shift Right by 8-bit immediate
8308instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8309%{
8310  match(Set dst (RShiftL dst shift));
8311  effect(KILL cr);
8312
8313  format %{ "sarq    $dst, $shift" %}
8314  opcode(0xC1, 0x7); /* C1 /7 ib */
8315  ins_encode(reg_opc_imm_wide(dst, shift));
8316  ins_pipe(ialu_mem_imm);
8317%}
8318
8319// Arithmetic Shift Right by 8-bit immediate
8320instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8321%{
8322  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8323  effect(KILL cr);
8324
8325  format %{ "sarq    $dst, $shift" %}
8326  opcode(0xC1, 0x7); /* C1 /7 ib */
8327  ins_encode(REX_mem_wide(dst), OpcP,
8328             RM_opc_mem(secondary, dst), Con8or32(shift));
8329  ins_pipe(ialu_mem_imm);
8330%}
8331
8332// Arithmetic Shift Right by variable
8333instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8334%{
8335  match(Set dst (RShiftL dst shift));
8336  effect(KILL cr);
8337
8338  format %{ "sarq    $dst, $shift" %}
8339  opcode(0xD3, 0x7); /* D3 /7 */
8340  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8341  ins_pipe(ialu_reg_reg);
8342%}
8343
8344// Arithmetic Shift Right by variable
8345instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8346%{
8347  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8348  effect(KILL cr);
8349
8350  format %{ "sarq    $dst, $shift" %}
8351  opcode(0xD3, 0x7); /* D3 /7 */
8352  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8353  ins_pipe(ialu_mem_reg);
8354%}
8355
8356// Logical shift right by one
8357instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8358%{
8359  match(Set dst (URShiftL dst shift));
8360  effect(KILL cr);
8361
8362  format %{ "shrq    $dst, $shift" %}
8363  opcode(0xD1, 0x5); /* D1 /5 */
8364  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8365  ins_pipe(ialu_reg);
8366%}
8367
8368// Logical shift right by one
8369instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8370%{
8371  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8372  effect(KILL cr);
8373
8374  format %{ "shrq    $dst, $shift" %}
8375  opcode(0xD1, 0x5); /* D1 /5 */
8376  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8377  ins_pipe(ialu_mem_imm);
8378%}
8379
8380// Logical Shift Right by 8-bit immediate
8381instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8382%{
8383  match(Set dst (URShiftL dst shift));
8384  effect(KILL cr);
8385
8386  format %{ "shrq    $dst, $shift" %}
8387  opcode(0xC1, 0x5); /* C1 /5 ib */
8388  ins_encode(reg_opc_imm_wide(dst, shift));
8389  ins_pipe(ialu_reg);
8390%}
8391
8392// Logical Shift Right by 8-bit immediate
8393instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8394%{
8395  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8396  effect(KILL cr);
8397
8398  format %{ "shrq    $dst, $shift" %}
8399  opcode(0xC1, 0x5); /* C1 /5 ib */
8400  ins_encode(REX_mem_wide(dst), OpcP,
8401             RM_opc_mem(secondary, dst), Con8or32(shift));
8402  ins_pipe(ialu_mem_imm);
8403%}
8404
8405// Logical Shift Right by variable
8406instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8407%{
8408  match(Set dst (URShiftL dst shift));
8409  effect(KILL cr);
8410
8411  format %{ "shrq    $dst, $shift" %}
8412  opcode(0xD3, 0x5); /* D3 /5 */
8413  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8414  ins_pipe(ialu_reg_reg);
8415%}
8416
8417// Logical Shift Right by variable
8418instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8419%{
8420  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8421  effect(KILL cr);
8422
8423  format %{ "shrq    $dst, $shift" %}
8424  opcode(0xD3, 0x5); /* D3 /5 */
8425  ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8426  ins_pipe(ialu_mem_reg);
8427%}
8428
8429// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8430// This idiom is used by the compiler for the i2b bytecode.
8431instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8432%{
8433  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8434
8435  format %{ "movsbl  $dst, $src\t# i2b" %}
8436  opcode(0x0F, 0xBE);
8437  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8438  ins_pipe(ialu_reg_reg);
8439%}
8440
8441// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8442// This idiom is used by the compiler the i2s bytecode.
8443instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8444%{
8445  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8446
8447  format %{ "movswl  $dst, $src\t# i2s" %}
8448  opcode(0x0F, 0xBF);
8449  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8450  ins_pipe(ialu_reg_reg);
8451%}
8452
8453// ROL/ROR instructions
8454
8455// ROL expand
8456instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8457  effect(KILL cr, USE_DEF dst);
8458
8459  format %{ "roll    $dst" %}
8460  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8461  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8462  ins_pipe(ialu_reg);
8463%}
8464
8465instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8466  effect(USE_DEF dst, USE shift, KILL cr);
8467
8468  format %{ "roll    $dst, $shift" %}
8469  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8470  ins_encode( reg_opc_imm(dst, shift) );
8471  ins_pipe(ialu_reg);
8472%}
8473
8474instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8475%{
8476  effect(USE_DEF dst, USE shift, KILL cr);
8477
8478  format %{ "roll    $dst, $shift" %}
8479  opcode(0xD3, 0x0); /* Opcode D3 /0 */
8480  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8481  ins_pipe(ialu_reg_reg);
8482%}
8483// end of ROL expand
8484
8485// Rotate Left by one
8486instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8487%{
8488  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8489
8490  expand %{
8491    rolI_rReg_imm1(dst, cr);
8492  %}
8493%}
8494
8495// Rotate Left by 8-bit immediate
8496instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8497%{
8498  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8499  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8500
8501  expand %{
8502    rolI_rReg_imm8(dst, lshift, cr);
8503  %}
8504%}
8505
8506// Rotate Left by variable
8507instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8508%{
8509  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8510
8511  expand %{
8512    rolI_rReg_CL(dst, shift, cr);
8513  %}
8514%}
8515
8516// Rotate Left by variable
8517instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8518%{
8519  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8520
8521  expand %{
8522    rolI_rReg_CL(dst, shift, cr);
8523  %}
8524%}
8525
8526// ROR expand
8527instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8528%{
8529  effect(USE_DEF dst, KILL cr);
8530
8531  format %{ "rorl    $dst" %}
8532  opcode(0xD1, 0x1); /* D1 /1 */
8533  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8534  ins_pipe(ialu_reg);
8535%}
8536
8537instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8538%{
8539  effect(USE_DEF dst, USE shift, KILL cr);
8540
8541  format %{ "rorl    $dst, $shift" %}
8542  opcode(0xC1, 0x1); /* C1 /1 ib */
8543  ins_encode(reg_opc_imm(dst, shift));
8544  ins_pipe(ialu_reg);
8545%}
8546
8547instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8548%{
8549  effect(USE_DEF dst, USE shift, KILL cr);
8550
8551  format %{ "rorl    $dst, $shift" %}
8552  opcode(0xD3, 0x1); /* D3 /1 */
8553  ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8554  ins_pipe(ialu_reg_reg);
8555%}
8556// end of ROR expand
8557
8558// Rotate Right by one
8559instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8560%{
8561  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8562
8563  expand %{
8564    rorI_rReg_imm1(dst, cr);
8565  %}
8566%}
8567
8568// Rotate Right by 8-bit immediate
8569instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8570%{
8571  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8572  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8573
8574  expand %{
8575    rorI_rReg_imm8(dst, rshift, cr);
8576  %}
8577%}
8578
8579// Rotate Right by variable
8580instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8581%{
8582  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8583
8584  expand %{
8585    rorI_rReg_CL(dst, shift, cr);
8586  %}
8587%}
8588
8589// Rotate Right by variable
8590instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8591%{
8592  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8593
8594  expand %{
8595    rorI_rReg_CL(dst, shift, cr);
8596  %}
8597%}
8598
8599// for long rotate
8600// ROL expand
8601instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8602  effect(USE_DEF dst, KILL cr);
8603
8604  format %{ "rolq    $dst" %}
8605  opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8606  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8607  ins_pipe(ialu_reg);
8608%}
8609
8610instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8611  effect(USE_DEF dst, USE shift, KILL cr);
8612
8613  format %{ "rolq    $dst, $shift" %}
8614  opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8615  ins_encode( reg_opc_imm_wide(dst, shift) );
8616  ins_pipe(ialu_reg);
8617%}
8618
8619instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8620%{
8621  effect(USE_DEF dst, USE shift, KILL cr);
8622
8623  format %{ "rolq    $dst, $shift" %}
8624  opcode(0xD3, 0x0); /* Opcode D3 /0 */
8625  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8626  ins_pipe(ialu_reg_reg);
8627%}
8628// end of ROL expand
8629
8630// Rotate Left by one
8631instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8632%{
8633  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8634
8635  expand %{
8636    rolL_rReg_imm1(dst, cr);
8637  %}
8638%}
8639
8640// Rotate Left by 8-bit immediate
8641instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8642%{
8643  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8644  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8645
8646  expand %{
8647    rolL_rReg_imm8(dst, lshift, cr);
8648  %}
8649%}
8650
8651// Rotate Left by variable
8652instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8653%{
8654  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8655
8656  expand %{
8657    rolL_rReg_CL(dst, shift, cr);
8658  %}
8659%}
8660
8661// Rotate Left by variable
8662instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8663%{
8664  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8665
8666  expand %{
8667    rolL_rReg_CL(dst, shift, cr);
8668  %}
8669%}
8670
8671// ROR expand
8672instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8673%{
8674  effect(USE_DEF dst, KILL cr);
8675
8676  format %{ "rorq    $dst" %}
8677  opcode(0xD1, 0x1); /* D1 /1 */
8678  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8679  ins_pipe(ialu_reg);
8680%}
8681
8682instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8683%{
8684  effect(USE_DEF dst, USE shift, KILL cr);
8685
8686  format %{ "rorq    $dst, $shift" %}
8687  opcode(0xC1, 0x1); /* C1 /1 ib */
8688  ins_encode(reg_opc_imm_wide(dst, shift));
8689  ins_pipe(ialu_reg);
8690%}
8691
8692instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8693%{
8694  effect(USE_DEF dst, USE shift, KILL cr);
8695
8696  format %{ "rorq    $dst, $shift" %}
8697  opcode(0xD3, 0x1); /* D3 /1 */
8698  ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8699  ins_pipe(ialu_reg_reg);
8700%}
8701// end of ROR expand
8702
8703// Rotate Right by one
8704instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8705%{
8706  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8707
8708  expand %{
8709    rorL_rReg_imm1(dst, cr);
8710  %}
8711%}
8712
8713// Rotate Right by 8-bit immediate
8714instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8715%{
8716  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8717  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8718
8719  expand %{
8720    rorL_rReg_imm8(dst, rshift, cr);
8721  %}
8722%}
8723
8724// Rotate Right by variable
8725instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8726%{
8727  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8728
8729  expand %{
8730    rorL_rReg_CL(dst, shift, cr);
8731  %}
8732%}
8733
8734// Rotate Right by variable
8735instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8736%{
8737  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8738
8739  expand %{
8740    rorL_rReg_CL(dst, shift, cr);
8741  %}
8742%}
8743
8744// Logical Instructions
8745
8746// Integer Logical Instructions
8747
8748// And Instructions
8749// And Register with Register
8750instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8751%{
8752  match(Set dst (AndI dst src));
8753  effect(KILL cr);
8754
8755  format %{ "andl    $dst, $src\t# int" %}
8756  opcode(0x23);
8757  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8758  ins_pipe(ialu_reg_reg);
8759%}
8760
8761// And Register with Immediate 255
8762instruct andI_rReg_imm255(rRegI dst, immI_255 src)
8763%{
8764  match(Set dst (AndI dst src));
8765
8766  format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
8767  opcode(0x0F, 0xB6);
8768  ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8769  ins_pipe(ialu_reg);
8770%}
8771
8772// And Register with Immediate 255 and promote to long
8773instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
8774%{
8775  match(Set dst (ConvI2L (AndI src mask)));
8776
8777  format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
8778  opcode(0x0F, 0xB6);
8779  ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8780  ins_pipe(ialu_reg);
8781%}
8782
8783// And Register with Immediate 65535
8784instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
8785%{
8786  match(Set dst (AndI dst src));
8787
8788  format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
8789  opcode(0x0F, 0xB7);
8790  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8791  ins_pipe(ialu_reg);
8792%}
8793
8794// And Register with Immediate 65535 and promote to long
8795instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
8796%{
8797  match(Set dst (ConvI2L (AndI src mask)));
8798
8799  format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
8800  opcode(0x0F, 0xB7);
8801  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8802  ins_pipe(ialu_reg);
8803%}
8804
8805// And Register with Immediate
8806instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8807%{
8808  match(Set dst (AndI dst src));
8809  effect(KILL cr);
8810
8811  format %{ "andl    $dst, $src\t# int" %}
8812  opcode(0x81, 0x04); /* Opcode 81 /4 */
8813  ins_encode(OpcSErm(dst, src), Con8or32(src));
8814  ins_pipe(ialu_reg);
8815%}
8816
8817// And Register with Memory
8818instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8819%{
8820  match(Set dst (AndI dst (LoadI src)));
8821  effect(KILL cr);
8822
8823  ins_cost(125);
8824  format %{ "andl    $dst, $src\t# int" %}
8825  opcode(0x23);
8826  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8827  ins_pipe(ialu_reg_mem);
8828%}
8829
8830// And Memory with Register
8831instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8832%{
8833  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8834  effect(KILL cr);
8835
8836  ins_cost(150);
8837  format %{ "andl    $dst, $src\t# int" %}
8838  opcode(0x21); /* Opcode 21 /r */
8839  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8840  ins_pipe(ialu_mem_reg);
8841%}
8842
8843// And Memory with Immediate
8844instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
8845%{
8846  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8847  effect(KILL cr);
8848
8849  ins_cost(125);
8850  format %{ "andl    $dst, $src\t# int" %}
8851  opcode(0x81, 0x4); /* Opcode 81 /4 id */
8852  ins_encode(REX_mem(dst), OpcSE(src),
8853             RM_opc_mem(secondary, dst), Con8or32(src));
8854  ins_pipe(ialu_mem_imm);
8855%}
8856
8857// Or Instructions
8858// Or Register with Register
8859instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8860%{
8861  match(Set dst (OrI dst src));
8862  effect(KILL cr);
8863
8864  format %{ "orl     $dst, $src\t# int" %}
8865  opcode(0x0B);
8866  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8867  ins_pipe(ialu_reg_reg);
8868%}
8869
8870// Or Register with Immediate
8871instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8872%{
8873  match(Set dst (OrI dst src));
8874  effect(KILL cr);
8875
8876  format %{ "orl     $dst, $src\t# int" %}
8877  opcode(0x81, 0x01); /* Opcode 81 /1 id */
8878  ins_encode(OpcSErm(dst, src), Con8or32(src));
8879  ins_pipe(ialu_reg);
8880%}
8881
8882// Or Register with Memory
8883instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8884%{
8885  match(Set dst (OrI dst (LoadI src)));
8886  effect(KILL cr);
8887
8888  ins_cost(125);
8889  format %{ "orl     $dst, $src\t# int" %}
8890  opcode(0x0B);
8891  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8892  ins_pipe(ialu_reg_mem);
8893%}
8894
8895// Or Memory with Register
8896instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8897%{
8898  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8899  effect(KILL cr);
8900
8901  ins_cost(150);
8902  format %{ "orl     $dst, $src\t# int" %}
8903  opcode(0x09); /* Opcode 09 /r */
8904  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8905  ins_pipe(ialu_mem_reg);
8906%}
8907
8908// Or Memory with Immediate
8909instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
8910%{
8911  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8912  effect(KILL cr);
8913
8914  ins_cost(125);
8915  format %{ "orl     $dst, $src\t# int" %}
8916  opcode(0x81, 0x1); /* Opcode 81 /1 id */
8917  ins_encode(REX_mem(dst), OpcSE(src),
8918             RM_opc_mem(secondary, dst), Con8or32(src));
8919  ins_pipe(ialu_mem_imm);
8920%}
8921
8922// Xor Instructions
8923// Xor Register with Register
8924instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8925%{
8926  match(Set dst (XorI dst src));
8927  effect(KILL cr);
8928
8929  format %{ "xorl    $dst, $src\t# int" %}
8930  opcode(0x33);
8931  ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8932  ins_pipe(ialu_reg_reg);
8933%}
8934
8935// Xor Register with Immediate
8936instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8937%{
8938  match(Set dst (XorI dst src));
8939  effect(KILL cr);
8940
8941  format %{ "xorl    $dst, $src\t# int" %}
8942  opcode(0x81, 0x06); /* Opcode 81 /6 id */
8943  ins_encode(OpcSErm(dst, src), Con8or32(src));
8944  ins_pipe(ialu_reg);
8945%}
8946
8947// Xor Register with Memory
8948instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8949%{
8950  match(Set dst (XorI dst (LoadI src)));
8951  effect(KILL cr);
8952
8953  ins_cost(125);
8954  format %{ "xorl    $dst, $src\t# int" %}
8955  opcode(0x33);
8956  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8957  ins_pipe(ialu_reg_mem);
8958%}
8959
8960// Xor Memory with Register
8961instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8962%{
8963  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8964  effect(KILL cr);
8965
8966  ins_cost(150);
8967  format %{ "xorl    $dst, $src\t# int" %}
8968  opcode(0x31); /* Opcode 31 /r */
8969  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8970  ins_pipe(ialu_mem_reg);
8971%}
8972
8973// Xor Memory with Immediate
8974instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
8975%{
8976  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8977  effect(KILL cr);
8978
8979  ins_cost(125);
8980  format %{ "xorl    $dst, $src\t# int" %}
8981  opcode(0x81, 0x6); /* Opcode 81 /6 id */
8982  ins_encode(REX_mem(dst), OpcSE(src),
8983             RM_opc_mem(secondary, dst), Con8or32(src));
8984  ins_pipe(ialu_mem_imm);
8985%}
8986
8987
8988// Long Logical Instructions
8989
8990// And Instructions
8991// And Register with Register
8992instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8993%{
8994  match(Set dst (AndL dst src));
8995  effect(KILL cr);
8996
8997  format %{ "andq    $dst, $src\t# long" %}
8998  opcode(0x23);
8999  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9000  ins_pipe(ialu_reg_reg);
9001%}
9002
9003// And Register with Immediate 255
9004instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9005%{
9006  match(Set dst (AndL dst src));
9007
9008  format %{ "movzbq  $dst, $src\t# long & 0xFF" %}
9009  opcode(0x0F, 0xB6);
9010  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9011  ins_pipe(ialu_reg);
9012%}
9013
9014// And Register with Immediate 65535
9015instruct andL_rReg_imm65535(rRegI dst, immL_65535 src)
9016%{
9017  match(Set dst (AndL dst src));
9018
9019  format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9020  opcode(0x0F, 0xB7);
9021  ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9022  ins_pipe(ialu_reg);
9023%}
9024
9025// And Register with Immediate
9026instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9027%{
9028  match(Set dst (AndL dst src));
9029  effect(KILL cr);
9030
9031  format %{ "andq    $dst, $src\t# long" %}
9032  opcode(0x81, 0x04); /* Opcode 81 /4 */
9033  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9034  ins_pipe(ialu_reg);
9035%}
9036
9037// And Register with Memory
9038instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9039%{
9040  match(Set dst (AndL dst (LoadL src)));
9041  effect(KILL cr);
9042
9043  ins_cost(125);
9044  format %{ "andq    $dst, $src\t# long" %}
9045  opcode(0x23);
9046  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9047  ins_pipe(ialu_reg_mem);
9048%}
9049
9050// And Memory with Register
9051instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9052%{
9053  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9054  effect(KILL cr);
9055
9056  ins_cost(150);
9057  format %{ "andq    $dst, $src\t# long" %}
9058  opcode(0x21); /* Opcode 21 /r */
9059  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9060  ins_pipe(ialu_mem_reg);
9061%}
9062
9063// And Memory with Immediate
9064instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9065%{
9066  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9067  effect(KILL cr);
9068
9069  ins_cost(125);
9070  format %{ "andq    $dst, $src\t# long" %}
9071  opcode(0x81, 0x4); /* Opcode 81 /4 id */
9072  ins_encode(REX_mem_wide(dst), OpcSE(src),
9073             RM_opc_mem(secondary, dst), Con8or32(src));
9074  ins_pipe(ialu_mem_imm);
9075%}
9076
9077// Or Instructions
9078// Or Register with Register
9079instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9080%{
9081  match(Set dst (OrL dst src));
9082  effect(KILL cr);
9083
9084  format %{ "orq     $dst, $src\t# long" %}
9085  opcode(0x0B);
9086  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9087  ins_pipe(ialu_reg_reg);
9088%}
9089
9090// Or Register with Immediate
9091instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9092%{
9093  match(Set dst (OrL dst src));
9094  effect(KILL cr);
9095
9096  format %{ "orq     $dst, $src\t# long" %}
9097  opcode(0x81, 0x01); /* Opcode 81 /1 id */
9098  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9099  ins_pipe(ialu_reg);
9100%}
9101
9102// Or Register with Memory
9103instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9104%{
9105  match(Set dst (OrL dst (LoadL src)));
9106  effect(KILL cr);
9107
9108  ins_cost(125);
9109  format %{ "orq     $dst, $src\t# long" %}
9110  opcode(0x0B);
9111  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9112  ins_pipe(ialu_reg_mem);
9113%}
9114
9115// Or Memory with Register
9116instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9117%{
9118  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9119  effect(KILL cr);
9120
9121  ins_cost(150);
9122  format %{ "orq     $dst, $src\t# long" %}
9123  opcode(0x09); /* Opcode 09 /r */
9124  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9125  ins_pipe(ialu_mem_reg);
9126%}
9127
9128// Or Memory with Immediate
9129instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9130%{
9131  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9132  effect(KILL cr);
9133
9134  ins_cost(125);
9135  format %{ "orq     $dst, $src\t# long" %}
9136  opcode(0x81, 0x1); /* Opcode 81 /1 id */
9137  ins_encode(REX_mem_wide(dst), OpcSE(src),
9138             RM_opc_mem(secondary, dst), Con8or32(src));
9139  ins_pipe(ialu_mem_imm);
9140%}
9141
9142// Xor Instructions
9143// Xor Register with Register
9144instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9145%{
9146  match(Set dst (XorL dst src));
9147  effect(KILL cr);
9148
9149  format %{ "xorq    $dst, $src\t# long" %}
9150  opcode(0x33);
9151  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9152  ins_pipe(ialu_reg_reg);
9153%}
9154
9155// Xor Register with Immediate
9156instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9157%{
9158  match(Set dst (XorL dst src));
9159  effect(KILL cr);
9160
9161  format %{ "xorq    $dst, $src\t# long" %}
9162  opcode(0x81, 0x06); /* Opcode 81 /6 id */
9163  ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9164  ins_pipe(ialu_reg);
9165%}
9166
9167// Xor Register with Memory
9168instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9169%{
9170  match(Set dst (XorL dst (LoadL src)));
9171  effect(KILL cr);
9172
9173  ins_cost(125);
9174  format %{ "xorq    $dst, $src\t# long" %}
9175  opcode(0x33);
9176  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9177  ins_pipe(ialu_reg_mem);
9178%}
9179
9180// Xor Memory with Register
9181instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9182%{
9183  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9184  effect(KILL cr);
9185
9186  ins_cost(150);
9187  format %{ "xorq    $dst, $src\t# long" %}
9188  opcode(0x31); /* Opcode 31 /r */
9189  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9190  ins_pipe(ialu_mem_reg);
9191%}
9192
9193// Xor Memory with Immediate
9194instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9195%{
9196  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9197  effect(KILL cr);
9198
9199  ins_cost(125);
9200  format %{ "xorq    $dst, $src\t# long" %}
9201  opcode(0x81, 0x6); /* Opcode 81 /6 id */
9202  ins_encode(REX_mem_wide(dst), OpcSE(src),
9203             RM_opc_mem(secondary, dst), Con8or32(src));
9204  ins_pipe(ialu_mem_imm);
9205%}
9206
9207// Convert Int to Boolean
9208instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9209%{
9210  match(Set dst (Conv2B src));
9211  effect(KILL cr);
9212
9213  format %{ "testl   $src, $src\t# ci2b\n\t"
9214            "setnz   $dst\n\t"
9215            "movzbl  $dst, $dst" %}
9216  ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9217             setNZ_reg(dst),
9218             REX_reg_breg(dst, dst), // movzbl
9219             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9220  ins_pipe(pipe_slow); // XXX
9221%}
9222
9223// Convert Pointer to Boolean
9224instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9225%{
9226  match(Set dst (Conv2B src));
9227  effect(KILL cr);
9228
9229  format %{ "testq   $src, $src\t# cp2b\n\t"
9230            "setnz   $dst\n\t"
9231            "movzbl  $dst, $dst" %}
9232  ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9233             setNZ_reg(dst),
9234             REX_reg_breg(dst, dst), // movzbl
9235             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9236  ins_pipe(pipe_slow); // XXX
9237%}
9238
9239instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9240%{
9241  match(Set dst (CmpLTMask p q));
9242  effect(KILL cr);
9243
9244  ins_cost(400); // XXX
9245  format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9246            "setlt   $dst\n\t"
9247            "movzbl  $dst, $dst\n\t"
9248            "negl    $dst" %}
9249  ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9250             setLT_reg(dst),
9251             REX_reg_breg(dst, dst), // movzbl
9252             Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9253             neg_reg(dst));
9254  ins_pipe(pipe_slow);
9255%}
9256
9257instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9258%{
9259  match(Set dst (CmpLTMask dst zero));
9260  effect(KILL cr);
9261
9262  ins_cost(100); // XXX
9263  format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9264  opcode(0xC1, 0x7);  /* C1 /7 ib */
9265  ins_encode(reg_opc_imm(dst, 0x1F));
9266  ins_pipe(ialu_reg);
9267%}
9268
9269
9270instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
9271                         rRegI tmp,
9272                         rFlagsReg cr)
9273%{
9274  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9275  effect(TEMP tmp, KILL cr);
9276
9277  ins_cost(400); // XXX
9278  format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
9279            "sbbl    $tmp, $tmp\n\t"
9280            "andl    $tmp, $y\n\t"
9281            "addl    $p, $tmp" %}
9282  ins_encode(enc_cmpLTP(p, q, y, tmp));
9283  ins_pipe(pipe_cmplt);
9284%}
9285
9286/* If I enable this, I encourage spilling in the inner loop of compress.
9287instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
9288%{
9289  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9290  effect( TEMP tmp, KILL cr );
9291  ins_cost(400);
9292
9293  format %{ "SUB    $p,$q\n\t"
9294            "SBB    RCX,RCX\n\t"
9295            "AND    RCX,$y\n\t"
9296            "ADD    $p,RCX" %}
9297  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9298%}
9299*/
9300
9301//---------- FP Instructions------------------------------------------------
9302
9303instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9304%{
9305  match(Set cr (CmpF src1 src2));
9306
9307  ins_cost(145);
9308  format %{ "ucomiss $src1, $src2\n\t"
9309            "jnp,s   exit\n\t"
9310            "pushfq\t# saw NaN, set CF\n\t"
9311            "andq    [rsp], #0xffffff2b\n\t"
9312            "popfq\n"
9313    "exit:   nop\t# avoid branch to branch" %}
9314  opcode(0x0F, 0x2E);
9315  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
9316             cmpfp_fixup);
9317  ins_pipe(pipe_slow);
9318%}
9319
9320instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9321%{
9322  match(Set cr (CmpF src1 (LoadF src2)));
9323
9324  ins_cost(145);
9325  format %{ "ucomiss $src1, $src2\n\t"
9326            "jnp,s   exit\n\t"
9327            "pushfq\t# saw NaN, set CF\n\t"
9328            "andq    [rsp], #0xffffff2b\n\t"
9329            "popfq\n"
9330    "exit:   nop\t# avoid branch to branch" %}
9331  opcode(0x0F, 0x2E);
9332  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
9333             cmpfp_fixup);
9334  ins_pipe(pipe_slow);
9335%}
9336
9337instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
9338%{
9339  match(Set cr (CmpF src1 src2));
9340
9341  ins_cost(145);
9342  format %{ "ucomiss $src1, $src2\n\t"
9343            "jnp,s   exit\n\t"
9344            "pushfq\t# saw NaN, set CF\n\t"
9345            "andq    [rsp], #0xffffff2b\n\t"
9346            "popfq\n"
9347    "exit:   nop\t# avoid branch to branch" %}
9348  opcode(0x0F, 0x2E);
9349  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
9350             cmpfp_fixup);
9351  ins_pipe(pipe_slow);
9352%}
9353
9354instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9355%{
9356  match(Set cr (CmpD src1 src2));
9357
9358  ins_cost(145);
9359  format %{ "ucomisd $src1, $src2\n\t"
9360            "jnp,s   exit\n\t"
9361            "pushfq\t# saw NaN, set CF\n\t"
9362            "andq    [rsp], #0xffffff2b\n\t"
9363            "popfq\n"
9364    "exit:   nop\t# avoid branch to branch" %}
9365  opcode(0x66, 0x0F, 0x2E);
9366  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
9367             cmpfp_fixup);
9368  ins_pipe(pipe_slow);
9369%}
9370
9371instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9372%{
9373  match(Set cr (CmpD src1 (LoadD src2)));
9374
9375  ins_cost(145);
9376  format %{ "ucomisd $src1, $src2\n\t"
9377            "jnp,s   exit\n\t"
9378            "pushfq\t# saw NaN, set CF\n\t"
9379            "andq    [rsp], #0xffffff2b\n\t"
9380            "popfq\n"
9381    "exit:   nop\t# avoid branch to branch" %}
9382  opcode(0x66, 0x0F, 0x2E);
9383  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
9384             cmpfp_fixup);
9385  ins_pipe(pipe_slow);
9386%}
9387
9388instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
9389%{
9390  match(Set cr (CmpD src1 src2));
9391
9392  ins_cost(145);
9393  format %{ "ucomisd $src1, [$src2]\n\t"
9394            "jnp,s   exit\n\t"
9395            "pushfq\t# saw NaN, set CF\n\t"
9396            "andq    [rsp], #0xffffff2b\n\t"
9397            "popfq\n"
9398    "exit:   nop\t# avoid branch to branch" %}
9399  opcode(0x66, 0x0F, 0x2E);
9400  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
9401             cmpfp_fixup);
9402  ins_pipe(pipe_slow);
9403%}
9404
9405// Compare into -1,0,1
9406instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9407%{
9408  match(Set dst (CmpF3 src1 src2));
9409  effect(KILL cr);
9410
9411  ins_cost(275);
9412  format %{ "ucomiss $src1, $src2\n\t"
9413            "movl    $dst, #-1\n\t"
9414            "jp,s    done\n\t"
9415            "jb,s    done\n\t"
9416            "setne   $dst\n\t"
9417            "movzbl  $dst, $dst\n"
9418    "done:" %}
9419
9420  opcode(0x0F, 0x2E);
9421  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
9422             cmpfp3(dst));
9423  ins_pipe(pipe_slow);
9424%}
9425
9426// Compare into -1,0,1
9427instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9428%{
9429  match(Set dst (CmpF3 src1 (LoadF src2)));
9430  effect(KILL cr);
9431
9432  ins_cost(275);
9433  format %{ "ucomiss $src1, $src2\n\t"
9434            "movl    $dst, #-1\n\t"
9435            "jp,s    done\n\t"
9436            "jb,s    done\n\t"
9437            "setne   $dst\n\t"
9438            "movzbl  $dst, $dst\n"
9439    "done:" %}
9440
9441  opcode(0x0F, 0x2E);
9442  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
9443             cmpfp3(dst));
9444  ins_pipe(pipe_slow);
9445%}
9446
9447// Compare into -1,0,1
9448instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
9449%{
9450  match(Set dst (CmpF3 src1 src2));
9451  effect(KILL cr);
9452
9453  ins_cost(275);
9454  format %{ "ucomiss $src1, [$src2]\n\t"
9455            "movl    $dst, #-1\n\t"
9456            "jp,s    done\n\t"
9457            "jb,s    done\n\t"
9458            "setne   $dst\n\t"
9459            "movzbl  $dst, $dst\n"
9460    "done:" %}
9461
9462  opcode(0x0F, 0x2E);
9463  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
9464             cmpfp3(dst));
9465  ins_pipe(pipe_slow);
9466%}
9467
9468// Compare into -1,0,1
9469instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9470%{
9471  match(Set dst (CmpD3 src1 src2));
9472  effect(KILL cr);
9473
9474  ins_cost(275);
9475  format %{ "ucomisd $src1, $src2\n\t"
9476            "movl    $dst, #-1\n\t"
9477            "jp,s    done\n\t"
9478            "jb,s    done\n\t"
9479            "setne   $dst\n\t"
9480            "movzbl  $dst, $dst\n"
9481    "done:" %}
9482
9483  opcode(0x66, 0x0F, 0x2E);
9484  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
9485             cmpfp3(dst));
9486  ins_pipe(pipe_slow);
9487%}
9488
9489// Compare into -1,0,1
9490instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9491%{
9492  match(Set dst (CmpD3 src1 (LoadD src2)));
9493  effect(KILL cr);
9494
9495  ins_cost(275);
9496  format %{ "ucomisd $src1, $src2\n\t"
9497            "movl    $dst, #-1\n\t"
9498            "jp,s    done\n\t"
9499            "jb,s    done\n\t"
9500            "setne   $dst\n\t"
9501            "movzbl  $dst, $dst\n"
9502    "done:" %}
9503
9504  opcode(0x66, 0x0F, 0x2E);
9505  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
9506             cmpfp3(dst));
9507  ins_pipe(pipe_slow);
9508%}
9509
9510// Compare into -1,0,1
9511instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
9512%{
9513  match(Set dst (CmpD3 src1 src2));
9514  effect(KILL cr);
9515
9516  ins_cost(275);
9517  format %{ "ucomisd $src1, [$src2]\n\t"
9518            "movl    $dst, #-1\n\t"
9519            "jp,s    done\n\t"
9520            "jb,s    done\n\t"
9521            "setne   $dst\n\t"
9522            "movzbl  $dst, $dst\n"
9523    "done:" %}
9524
9525  opcode(0x66, 0x0F, 0x2E);
9526  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
9527             cmpfp3(dst));
9528  ins_pipe(pipe_slow);
9529%}
9530
9531instruct addF_reg(regF dst, regF src)
9532%{
9533  match(Set dst (AddF dst src));
9534
9535  format %{ "addss   $dst, $src" %}
9536  ins_cost(150); // XXX
9537  opcode(0xF3, 0x0F, 0x58);
9538  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9539  ins_pipe(pipe_slow);
9540%}
9541
9542instruct addF_mem(regF dst, memory src)
9543%{
9544  match(Set dst (AddF dst (LoadF src)));
9545
9546  format %{ "addss   $dst, $src" %}
9547  ins_cost(150); // XXX
9548  opcode(0xF3, 0x0F, 0x58);
9549  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9550  ins_pipe(pipe_slow);
9551%}
9552
9553instruct addF_imm(regF dst, immF src)
9554%{
9555  match(Set dst (AddF dst src));
9556
9557  format %{ "addss   $dst, [$src]" %}
9558  ins_cost(150); // XXX
9559  opcode(0xF3, 0x0F, 0x58);
9560  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9561  ins_pipe(pipe_slow);
9562%}
9563
9564instruct addD_reg(regD dst, regD src)
9565%{
9566  match(Set dst (AddD dst src));
9567
9568  format %{ "addsd   $dst, $src" %}
9569  ins_cost(150); // XXX
9570  opcode(0xF2, 0x0F, 0x58);
9571  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9572  ins_pipe(pipe_slow);
9573%}
9574
9575instruct addD_mem(regD dst, memory src)
9576%{
9577  match(Set dst (AddD dst (LoadD src)));
9578
9579  format %{ "addsd   $dst, $src" %}
9580  ins_cost(150); // XXX
9581  opcode(0xF2, 0x0F, 0x58);
9582  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9583  ins_pipe(pipe_slow);
9584%}
9585
9586instruct addD_imm(regD dst, immD src)
9587%{
9588  match(Set dst (AddD dst src));
9589
9590  format %{ "addsd   $dst, [$src]" %}
9591  ins_cost(150); // XXX
9592  opcode(0xF2, 0x0F, 0x58);
9593  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9594  ins_pipe(pipe_slow);
9595%}
9596
9597instruct subF_reg(regF dst, regF src)
9598%{
9599  match(Set dst (SubF dst src));
9600
9601  format %{ "subss   $dst, $src" %}
9602  ins_cost(150); // XXX
9603  opcode(0xF3, 0x0F, 0x5C);
9604  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9605  ins_pipe(pipe_slow);
9606%}
9607
9608instruct subF_mem(regF dst, memory src)
9609%{
9610  match(Set dst (SubF dst (LoadF src)));
9611
9612  format %{ "subss   $dst, $src" %}
9613  ins_cost(150); // XXX
9614  opcode(0xF3, 0x0F, 0x5C);
9615  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9616  ins_pipe(pipe_slow);
9617%}
9618
9619instruct subF_imm(regF dst, immF src)
9620%{
9621  match(Set dst (SubF dst src));
9622
9623  format %{ "subss   $dst, [$src]" %}
9624  ins_cost(150); // XXX
9625  opcode(0xF3, 0x0F, 0x5C);
9626  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9627  ins_pipe(pipe_slow);
9628%}
9629
9630instruct subD_reg(regD dst, regD src)
9631%{
9632  match(Set dst (SubD dst src));
9633
9634  format %{ "subsd   $dst, $src" %}
9635  ins_cost(150); // XXX
9636  opcode(0xF2, 0x0F, 0x5C);
9637  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9638  ins_pipe(pipe_slow);
9639%}
9640
9641instruct subD_mem(regD dst, memory src)
9642%{
9643  match(Set dst (SubD dst (LoadD src)));
9644
9645  format %{ "subsd   $dst, $src" %}
9646  ins_cost(150); // XXX
9647  opcode(0xF2, 0x0F, 0x5C);
9648  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9649  ins_pipe(pipe_slow);
9650%}
9651
9652instruct subD_imm(regD dst, immD src)
9653%{
9654  match(Set dst (SubD dst src));
9655
9656  format %{ "subsd   $dst, [$src]" %}
9657  ins_cost(150); // XXX
9658  opcode(0xF2, 0x0F, 0x5C);
9659  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9660  ins_pipe(pipe_slow);
9661%}
9662
9663instruct mulF_reg(regF dst, regF src)
9664%{
9665  match(Set dst (MulF dst src));
9666
9667  format %{ "mulss   $dst, $src" %}
9668  ins_cost(150); // XXX
9669  opcode(0xF3, 0x0F, 0x59);
9670  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9671  ins_pipe(pipe_slow);
9672%}
9673
9674instruct mulF_mem(regF dst, memory src)
9675%{
9676  match(Set dst (MulF dst (LoadF src)));
9677
9678  format %{ "mulss   $dst, $src" %}
9679  ins_cost(150); // XXX
9680  opcode(0xF3, 0x0F, 0x59);
9681  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9682  ins_pipe(pipe_slow);
9683%}
9684
9685instruct mulF_imm(regF dst, immF src)
9686%{
9687  match(Set dst (MulF dst src));
9688
9689  format %{ "mulss   $dst, [$src]" %}
9690  ins_cost(150); // XXX
9691  opcode(0xF3, 0x0F, 0x59);
9692  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9693  ins_pipe(pipe_slow);
9694%}
9695
9696instruct mulD_reg(regD dst, regD src)
9697%{
9698  match(Set dst (MulD dst src));
9699
9700  format %{ "mulsd   $dst, $src" %}
9701  ins_cost(150); // XXX
9702  opcode(0xF2, 0x0F, 0x59);
9703  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9704  ins_pipe(pipe_slow);
9705%}
9706
9707instruct mulD_mem(regD dst, memory src)
9708%{
9709  match(Set dst (MulD dst (LoadD src)));
9710
9711  format %{ "mulsd   $dst, $src" %}
9712  ins_cost(150); // XXX
9713  opcode(0xF2, 0x0F, 0x59);
9714  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9715  ins_pipe(pipe_slow);
9716%}
9717
9718instruct mulD_imm(regD dst, immD src)
9719%{
9720  match(Set dst (MulD dst src));
9721
9722  format %{ "mulsd   $dst, [$src]" %}
9723  ins_cost(150); // XXX
9724  opcode(0xF2, 0x0F, 0x59);
9725  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9726  ins_pipe(pipe_slow);
9727%}
9728
9729instruct divF_reg(regF dst, regF src)
9730%{
9731  match(Set dst (DivF dst src));
9732
9733  format %{ "divss   $dst, $src" %}
9734  ins_cost(150); // XXX
9735  opcode(0xF3, 0x0F, 0x5E);
9736  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9737  ins_pipe(pipe_slow);
9738%}
9739
9740instruct divF_mem(regF dst, memory src)
9741%{
9742  match(Set dst (DivF dst (LoadF src)));
9743
9744  format %{ "divss   $dst, $src" %}
9745  ins_cost(150); // XXX
9746  opcode(0xF3, 0x0F, 0x5E);
9747  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9748  ins_pipe(pipe_slow);
9749%}
9750
9751instruct divF_imm(regF dst, immF src)
9752%{
9753  match(Set dst (DivF dst src));
9754
9755  format %{ "divss   $dst, [$src]" %}
9756  ins_cost(150); // XXX
9757  opcode(0xF3, 0x0F, 0x5E);
9758  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9759  ins_pipe(pipe_slow);
9760%}
9761
9762instruct divD_reg(regD dst, regD src)
9763%{
9764  match(Set dst (DivD dst src));
9765
9766  format %{ "divsd   $dst, $src" %}
9767  ins_cost(150); // XXX
9768  opcode(0xF2, 0x0F, 0x5E);
9769  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9770  ins_pipe(pipe_slow);
9771%}
9772
9773instruct divD_mem(regD dst, memory src)
9774%{
9775  match(Set dst (DivD dst (LoadD src)));
9776
9777  format %{ "divsd   $dst, $src" %}
9778  ins_cost(150); // XXX
9779  opcode(0xF2, 0x0F, 0x5E);
9780  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9781  ins_pipe(pipe_slow);
9782%}
9783
9784instruct divD_imm(regD dst, immD src)
9785%{
9786  match(Set dst (DivD dst src));
9787
9788  format %{ "divsd   $dst, [$src]" %}
9789  ins_cost(150); // XXX
9790  opcode(0xF2, 0x0F, 0x5E);
9791  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9792  ins_pipe(pipe_slow);
9793%}
9794
9795instruct sqrtF_reg(regF dst, regF src)
9796%{
9797  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
9798
9799  format %{ "sqrtss  $dst, $src" %}
9800  ins_cost(150); // XXX
9801  opcode(0xF3, 0x0F, 0x51);
9802  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9803  ins_pipe(pipe_slow);
9804%}
9805
9806instruct sqrtF_mem(regF dst, memory src)
9807%{
9808  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
9809
9810  format %{ "sqrtss  $dst, $src" %}
9811  ins_cost(150); // XXX
9812  opcode(0xF3, 0x0F, 0x51);
9813  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9814  ins_pipe(pipe_slow);
9815%}
9816
9817instruct sqrtF_imm(regF dst, immF src)
9818%{
9819  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
9820
9821  format %{ "sqrtss  $dst, [$src]" %}
9822  ins_cost(150); // XXX
9823  opcode(0xF3, 0x0F, 0x51);
9824  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
9825  ins_pipe(pipe_slow);
9826%}
9827
9828instruct sqrtD_reg(regD dst, regD src)
9829%{
9830  match(Set dst (SqrtD src));
9831
9832  format %{ "sqrtsd  $dst, $src" %}
9833  ins_cost(150); // XXX
9834  opcode(0xF2, 0x0F, 0x51);
9835  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9836  ins_pipe(pipe_slow);
9837%}
9838
9839instruct sqrtD_mem(regD dst, memory src)
9840%{
9841  match(Set dst (SqrtD (LoadD src)));
9842
9843  format %{ "sqrtsd  $dst, $src" %}
9844  ins_cost(150); // XXX
9845  opcode(0xF2, 0x0F, 0x51);
9846  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9847  ins_pipe(pipe_slow);
9848%}
9849
9850instruct sqrtD_imm(regD dst, immD src)
9851%{
9852  match(Set dst (SqrtD src));
9853
9854  format %{ "sqrtsd  $dst, [$src]" %}
9855  ins_cost(150); // XXX
9856  opcode(0xF2, 0x0F, 0x51);
9857  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
9858  ins_pipe(pipe_slow);
9859%}
9860
9861instruct absF_reg(regF dst)
9862%{
9863  match(Set dst (AbsF dst));
9864
9865  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
9866  ins_encode(absF_encoding(dst));
9867  ins_pipe(pipe_slow);
9868%}
9869
9870instruct absD_reg(regD dst)
9871%{
9872  match(Set dst (AbsD dst));
9873
9874  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
9875            "# abs double by sign masking" %}
9876  ins_encode(absD_encoding(dst));
9877  ins_pipe(pipe_slow);
9878%}
9879
9880instruct negF_reg(regF dst)
9881%{
9882  match(Set dst (NegF dst));
9883
9884  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
9885  ins_encode(negF_encoding(dst));
9886  ins_pipe(pipe_slow);
9887%}
9888
9889instruct negD_reg(regD dst)
9890%{
9891  match(Set dst (NegD dst));
9892
9893  format %{ "xorpd   $dst, [0x8000000000000000]\t"
9894            "# neg double by sign flipping" %}
9895  ins_encode(negD_encoding(dst));
9896  ins_pipe(pipe_slow);
9897%}
9898
9899// -----------Trig and Trancendental Instructions------------------------------
9900instruct cosD_reg(regD dst) %{
9901  match(Set dst (CosD dst));
9902
9903  format %{ "dcos   $dst\n\t" %}
9904  opcode(0xD9, 0xFF);
9905  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9906  ins_pipe( pipe_slow );
9907%}
9908
9909instruct sinD_reg(regD dst) %{
9910  match(Set dst (SinD dst));
9911
9912  format %{ "dsin   $dst\n\t" %}
9913  opcode(0xD9, 0xFE);
9914  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9915  ins_pipe( pipe_slow );
9916%}
9917
9918instruct tanD_reg(regD dst) %{
9919  match(Set dst (TanD dst));
9920
9921  format %{ "dtan   $dst\n\t" %}
9922  ins_encode( Push_SrcXD(dst),
9923              Opcode(0xD9), Opcode(0xF2),   //fptan
9924              Opcode(0xDD), Opcode(0xD8),   //fstp st
9925              Push_ResultXD(dst) );
9926  ins_pipe( pipe_slow );
9927%}
9928
9929instruct log10D_reg(regD dst) %{
9930  // The source and result Double operands in XMM registers
9931  match(Set dst (Log10D dst));
9932  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9933  // fyl2x        ; compute log_10(2) * log_2(x)
9934  format %{ "fldlg2\t\t\t#Log10\n\t"
9935            "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
9936         %}
9937   ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
9938              Push_SrcXD(dst),
9939              Opcode(0xD9), Opcode(0xF1),   // fyl2x
9940              Push_ResultXD(dst));
9941
9942  ins_pipe( pipe_slow );
9943%}
9944
9945instruct logD_reg(regD dst) %{
9946  // The source and result Double operands in XMM registers
9947  match(Set dst (LogD dst));
9948  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9949  // fyl2x        ; compute log_e(2) * log_2(x)
9950  format %{ "fldln2\t\t\t#Log_e\n\t"
9951            "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
9952         %}
9953  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9954              Push_SrcXD(dst),
9955              Opcode(0xD9), Opcode(0xF1),   // fyl2x
9956              Push_ResultXD(dst));
9957  ins_pipe( pipe_slow );
9958%}
9959
9960
9961
9962//----------Arithmetic Conversion Instructions---------------------------------
9963
9964instruct roundFloat_nop(regF dst)
9965%{
9966  match(Set dst (RoundFloat dst));
9967
9968  ins_cost(0);
9969  ins_encode();
9970  ins_pipe(empty);
9971%}
9972
9973instruct roundDouble_nop(regD dst)
9974%{
9975  match(Set dst (RoundDouble dst));
9976
9977  ins_cost(0);
9978  ins_encode();
9979  ins_pipe(empty);
9980%}
9981
9982instruct convF2D_reg_reg(regD dst, regF src)
9983%{
9984  match(Set dst (ConvF2D src));
9985
9986  format %{ "cvtss2sd $dst, $src" %}
9987  opcode(0xF3, 0x0F, 0x5A);
9988  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
9989  ins_pipe(pipe_slow); // XXX
9990%}
9991
9992instruct convF2D_reg_mem(regD dst, memory src)
9993%{
9994  match(Set dst (ConvF2D (LoadF src)));
9995
9996  format %{ "cvtss2sd $dst, $src" %}
9997  opcode(0xF3, 0x0F, 0x5A);
9998  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
9999  ins_pipe(pipe_slow); // XXX
10000%}
10001
10002instruct convD2F_reg_reg(regF dst, regD src)
10003%{
10004  match(Set dst (ConvD2F src));
10005
10006  format %{ "cvtsd2ss $dst, $src" %}
10007  opcode(0xF2, 0x0F, 0x5A);
10008  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10009  ins_pipe(pipe_slow); // XXX
10010%}
10011
10012instruct convD2F_reg_mem(regF dst, memory src)
10013%{
10014  match(Set dst (ConvD2F (LoadD src)));
10015
10016  format %{ "cvtsd2ss $dst, $src" %}
10017  opcode(0xF2, 0x0F, 0x5A);
10018  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10019  ins_pipe(pipe_slow); // XXX
10020%}
10021
10022// XXX do mem variants
10023instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10024%{
10025  match(Set dst (ConvF2I src));
10026  effect(KILL cr);
10027
10028  format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10029            "cmpl    $dst, #0x80000000\n\t"
10030            "jne,s   done\n\t"
10031            "subq    rsp, #8\n\t"
10032            "movss   [rsp], $src\n\t"
10033            "call    f2i_fixup\n\t"
10034            "popq    $dst\n"
10035    "done:   "%}
10036  opcode(0xF3, 0x0F, 0x2C);
10037  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10038             f2i_fixup(dst, src));
10039  ins_pipe(pipe_slow);
10040%}
10041
10042instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10043%{
10044  match(Set dst (ConvF2L src));
10045  effect(KILL cr);
10046
10047  format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10048            "cmpq    $dst, [0x8000000000000000]\n\t"
10049            "jne,s   done\n\t"
10050            "subq    rsp, #8\n\t"
10051            "movss   [rsp], $src\n\t"
10052            "call    f2l_fixup\n\t"
10053            "popq    $dst\n"
10054    "done:   "%}
10055  opcode(0xF3, 0x0F, 0x2C);
10056  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10057             f2l_fixup(dst, src));
10058  ins_pipe(pipe_slow);
10059%}
10060
10061instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10062%{
10063  match(Set dst (ConvD2I src));
10064  effect(KILL cr);
10065
10066  format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10067            "cmpl    $dst, #0x80000000\n\t"
10068            "jne,s   done\n\t"
10069            "subq    rsp, #8\n\t"
10070            "movsd   [rsp], $src\n\t"
10071            "call    d2i_fixup\n\t"
10072            "popq    $dst\n"
10073    "done:   "%}
10074  opcode(0xF2, 0x0F, 0x2C);
10075  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
10076             d2i_fixup(dst, src));
10077  ins_pipe(pipe_slow);
10078%}
10079
10080instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10081%{
10082  match(Set dst (ConvD2L src));
10083  effect(KILL cr);
10084
10085  format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10086            "cmpq    $dst, [0x8000000000000000]\n\t"
10087            "jne,s   done\n\t"
10088            "subq    rsp, #8\n\t"
10089            "movsd   [rsp], $src\n\t"
10090            "call    d2l_fixup\n\t"
10091            "popq    $dst\n"
10092    "done:   "%}
10093  opcode(0xF2, 0x0F, 0x2C);
10094  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
10095             d2l_fixup(dst, src));
10096  ins_pipe(pipe_slow);
10097%}
10098
10099instruct convI2F_reg_reg(regF dst, rRegI src)
10100%{
10101  match(Set dst (ConvI2F src));
10102
10103  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10104  opcode(0xF3, 0x0F, 0x2A);
10105  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10106  ins_pipe(pipe_slow); // XXX
10107%}
10108
10109instruct convI2F_reg_mem(regF dst, memory src)
10110%{
10111  match(Set dst (ConvI2F (LoadI src)));
10112
10113  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10114  opcode(0xF3, 0x0F, 0x2A);
10115  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10116  ins_pipe(pipe_slow); // XXX
10117%}
10118
10119instruct convI2D_reg_reg(regD dst, rRegI src)
10120%{
10121  match(Set dst (ConvI2D src));
10122
10123  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10124  opcode(0xF2, 0x0F, 0x2A);
10125  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10126  ins_pipe(pipe_slow); // XXX
10127%}
10128
10129instruct convI2D_reg_mem(regD dst, memory src)
10130%{
10131  match(Set dst (ConvI2D (LoadI src)));
10132
10133  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10134  opcode(0xF2, 0x0F, 0x2A);
10135  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10136  ins_pipe(pipe_slow); // XXX
10137%}
10138
10139instruct convL2F_reg_reg(regF dst, rRegL src)
10140%{
10141  match(Set dst (ConvL2F src));
10142
10143  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10144  opcode(0xF3, 0x0F, 0x2A);
10145  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
10146  ins_pipe(pipe_slow); // XXX
10147%}
10148
10149instruct convL2F_reg_mem(regF dst, memory src)
10150%{
10151  match(Set dst (ConvL2F (LoadL src)));
10152
10153  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10154  opcode(0xF3, 0x0F, 0x2A);
10155  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
10156  ins_pipe(pipe_slow); // XXX
10157%}
10158
10159instruct convL2D_reg_reg(regD dst, rRegL src)
10160%{
10161  match(Set dst (ConvL2D src));
10162
10163  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10164  opcode(0xF2, 0x0F, 0x2A);
10165  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
10166  ins_pipe(pipe_slow); // XXX
10167%}
10168
10169instruct convL2D_reg_mem(regD dst, memory src)
10170%{
10171  match(Set dst (ConvL2D (LoadL src)));
10172
10173  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10174  opcode(0xF2, 0x0F, 0x2A);
10175  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
10176  ins_pipe(pipe_slow); // XXX
10177%}
10178
10179instruct convI2L_reg_reg(rRegL dst, rRegI src)
10180%{
10181  match(Set dst (ConvI2L src));
10182
10183  ins_cost(125);
10184  format %{ "movslq  $dst, $src\t# i2l" %}
10185  opcode(0x63); // needs REX.W
10186  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10187  ins_pipe(ialu_reg_reg);
10188%}
10189
10190// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10191// %{
10192//   match(Set dst (ConvI2L src));
10193// //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10194// //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10195//   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10196//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10197//             ((const TypeNode*) n)->type()->is_long()->_lo ==
10198//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10199
10200//   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10201//   ins_encode(enc_copy(dst, src));
10202// //   opcode(0x63); // needs REX.W
10203// //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10204//   ins_pipe(ialu_reg_reg);
10205// %}
10206
10207instruct convI2L_reg_mem(rRegL dst, memory src)
10208%{
10209  match(Set dst (ConvI2L (LoadI src)));
10210
10211  format %{ "movslq  $dst, $src\t# i2l" %}
10212  opcode(0x63); // needs REX.W
10213  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst,src));
10214  ins_pipe(ialu_reg_mem);
10215%}
10216
10217// Zero-extend convert int to long
10218instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10219%{
10220  match(Set dst (AndL (ConvI2L src) mask));
10221
10222  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10223  ins_encode(enc_copy(dst, src));
10224  ins_pipe(ialu_reg_reg);
10225%}
10226
10227// Zero-extend convert int to long
10228instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10229%{
10230  match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10231
10232  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10233  opcode(0x8B);
10234  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10235  ins_pipe(ialu_reg_mem);
10236%}
10237
10238instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10239%{
10240  match(Set dst (AndL src mask));
10241
10242  format %{ "movl    $dst, $src\t# zero-extend long" %}
10243  ins_encode(enc_copy_always(dst, src));
10244  ins_pipe(ialu_reg_reg);
10245%}
10246
10247instruct convL2I_reg_reg(rRegI dst, rRegL src)
10248%{
10249  match(Set dst (ConvL2I src));
10250
10251  format %{ "movl    $dst, $src\t# l2i" %}
10252  ins_encode(enc_copy_always(dst, src));
10253  ins_pipe(ialu_reg_reg);
10254%}
10255
10256
10257instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10258  match(Set dst (MoveF2I src));
10259  effect(DEF dst, USE src);
10260
10261  ins_cost(125);
10262  format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10263  opcode(0x8B);
10264  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10265  ins_pipe(ialu_reg_mem);
10266%}
10267
10268instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10269  match(Set dst (MoveI2F src));
10270  effect(DEF dst, USE src);
10271
10272  ins_cost(125);
10273  format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10274  opcode(0xF3, 0x0F, 0x10);
10275  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10276  ins_pipe(pipe_slow);
10277%}
10278
10279instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10280  match(Set dst (MoveD2L src));
10281  effect(DEF dst, USE src);
10282
10283  ins_cost(125);
10284  format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10285  opcode(0x8B);
10286  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10287  ins_pipe(ialu_reg_mem);
10288%}
10289
10290instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10291  predicate(!UseXmmLoadAndClearUpper);
10292  match(Set dst (MoveL2D src));
10293  effect(DEF dst, USE src);
10294
10295  ins_cost(125);
10296  format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10297  opcode(0x66, 0x0F, 0x12);
10298  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10299  ins_pipe(pipe_slow);
10300%}
10301
10302instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10303  predicate(UseXmmLoadAndClearUpper);
10304  match(Set dst (MoveL2D src));
10305  effect(DEF dst, USE src);
10306
10307  ins_cost(125);
10308  format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10309  opcode(0xF2, 0x0F, 0x10);
10310  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10311  ins_pipe(pipe_slow);
10312%}
10313
10314
10315instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10316  match(Set dst (MoveF2I src));
10317  effect(DEF dst, USE src);
10318
10319  ins_cost(95); // XXX
10320  format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10321  opcode(0xF3, 0x0F, 0x11);
10322  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
10323  ins_pipe(pipe_slow);
10324%}
10325
10326instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10327  match(Set dst (MoveI2F src));
10328  effect(DEF dst, USE src);
10329
10330  ins_cost(100);
10331  format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10332  opcode(0x89);
10333  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10334  ins_pipe( ialu_mem_reg );
10335%}
10336
10337instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10338  match(Set dst (MoveD2L src));
10339  effect(DEF dst, USE src);
10340
10341  ins_cost(95); // XXX
10342  format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10343  opcode(0xF2, 0x0F, 0x11);
10344  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
10345  ins_pipe(pipe_slow);
10346%}
10347
10348instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10349  match(Set dst (MoveL2D src));
10350  effect(DEF dst, USE src);
10351
10352  ins_cost(100);
10353  format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10354  opcode(0x89);
10355  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10356  ins_pipe(ialu_mem_reg);
10357%}
10358
10359instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10360  match(Set dst (MoveF2I src));
10361  effect(DEF dst, USE src);
10362  ins_cost(85);
10363  format %{ "movd    $dst,$src\t# MoveF2I" %}
10364  ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
10365  ins_pipe( pipe_slow );
10366%}
10367
10368instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10369  match(Set dst (MoveD2L src));
10370  effect(DEF dst, USE src);
10371  ins_cost(85);
10372  format %{ "movd    $dst,$src\t# MoveD2L" %}
10373  ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
10374  ins_pipe( pipe_slow );
10375%}
10376
10377// The next instructions have long latency and use Int unit. Set high cost.
10378instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10379  match(Set dst (MoveI2F src));
10380  effect(DEF dst, USE src);
10381  ins_cost(300);
10382  format %{ "movd    $dst,$src\t# MoveI2F" %}
10383  ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
10384  ins_pipe( pipe_slow );
10385%}
10386
10387instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10388  match(Set dst (MoveL2D src));
10389  effect(DEF dst, USE src);
10390  ins_cost(300);
10391  format %{ "movd    $dst,$src\t# MoveL2D" %}
10392  ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
10393  ins_pipe( pipe_slow );
10394%}
10395
10396// Replicate scalar to packed byte (1 byte) values in xmm
10397instruct Repl8B_reg(regD dst, regD src) %{
10398  match(Set dst (Replicate8B src));
10399  format %{ "MOVDQA  $dst,$src\n\t"
10400            "PUNPCKLBW $dst,$dst\n\t"
10401            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10402  ins_encode( pshufd_8x8(dst, src));
10403  ins_pipe( pipe_slow );
10404%}
10405
10406// Replicate scalar to packed byte (1 byte) values in xmm
10407instruct Repl8B_rRegI(regD dst, rRegI src) %{
10408  match(Set dst (Replicate8B src));
10409  format %{ "MOVD    $dst,$src\n\t"
10410            "PUNPCKLBW $dst,$dst\n\t"
10411            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10412  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
10413  ins_pipe( pipe_slow );
10414%}
10415
10416// Replicate scalar zero to packed byte (1 byte) values in xmm
10417instruct Repl8B_immI0(regD dst, immI0 zero) %{
10418  match(Set dst (Replicate8B zero));
10419  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
10420  ins_encode( pxor(dst, dst));
10421  ins_pipe( fpu_reg_reg );
10422%}
10423
10424// Replicate scalar to packed shore (2 byte) values in xmm
10425instruct Repl4S_reg(regD dst, regD src) %{
10426  match(Set dst (Replicate4S src));
10427  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
10428  ins_encode( pshufd_4x16(dst, src));
10429  ins_pipe( fpu_reg_reg );
10430%}
10431
10432// Replicate scalar to packed shore (2 byte) values in xmm
10433instruct Repl4S_rRegI(regD dst, rRegI src) %{
10434  match(Set dst (Replicate4S src));
10435  format %{ "MOVD    $dst,$src\n\t"
10436            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
10437  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
10438  ins_pipe( fpu_reg_reg );
10439%}
10440
10441// Replicate scalar zero to packed short (2 byte) values in xmm
10442instruct Repl4S_immI0(regD dst, immI0 zero) %{
10443  match(Set dst (Replicate4S zero));
10444  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
10445  ins_encode( pxor(dst, dst));
10446  ins_pipe( fpu_reg_reg );
10447%}
10448
10449// Replicate scalar to packed char (2 byte) values in xmm
10450instruct Repl4C_reg(regD dst, regD src) %{
10451  match(Set dst (Replicate4C src));
10452  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
10453  ins_encode( pshufd_4x16(dst, src));
10454  ins_pipe( fpu_reg_reg );
10455%}
10456
10457// Replicate scalar to packed char (2 byte) values in xmm
10458instruct Repl4C_rRegI(regD dst, rRegI src) %{
10459  match(Set dst (Replicate4C src));
10460  format %{ "MOVD    $dst,$src\n\t"
10461            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
10462  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
10463  ins_pipe( fpu_reg_reg );
10464%}
10465
10466// Replicate scalar zero to packed char (2 byte) values in xmm
10467instruct Repl4C_immI0(regD dst, immI0 zero) %{
10468  match(Set dst (Replicate4C zero));
10469  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
10470  ins_encode( pxor(dst, dst));
10471  ins_pipe( fpu_reg_reg );
10472%}
10473
10474// Replicate scalar to packed integer (4 byte) values in xmm
10475instruct Repl2I_reg(regD dst, regD src) %{
10476  match(Set dst (Replicate2I src));
10477  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
10478  ins_encode( pshufd(dst, src, 0x00));
10479  ins_pipe( fpu_reg_reg );
10480%}
10481
10482// Replicate scalar to packed integer (4 byte) values in xmm
10483instruct Repl2I_rRegI(regD dst, rRegI src) %{
10484  match(Set dst (Replicate2I src));
10485  format %{ "MOVD   $dst,$src\n\t"
10486            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
10487  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
10488  ins_pipe( fpu_reg_reg );
10489%}
10490
10491// Replicate scalar zero to packed integer (2 byte) values in xmm
10492instruct Repl2I_immI0(regD dst, immI0 zero) %{
10493  match(Set dst (Replicate2I zero));
10494  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
10495  ins_encode( pxor(dst, dst));
10496  ins_pipe( fpu_reg_reg );
10497%}
10498
10499// Replicate scalar to packed single precision floating point values in xmm
10500instruct Repl2F_reg(regD dst, regD src) %{
10501  match(Set dst (Replicate2F src));
10502  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10503  ins_encode( pshufd(dst, src, 0xe0));
10504  ins_pipe( fpu_reg_reg );
10505%}
10506
10507// Replicate scalar to packed single precision floating point values in xmm
10508instruct Repl2F_regF(regD dst, regF src) %{
10509  match(Set dst (Replicate2F src));
10510  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10511  ins_encode( pshufd(dst, src, 0xe0));
10512  ins_pipe( fpu_reg_reg );
10513%}
10514
10515// Replicate scalar to packed single precision floating point values in xmm
10516instruct Repl2F_immF0(regD dst, immF0 zero) %{
10517  match(Set dst (Replicate2F zero));
10518  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
10519  ins_encode( pxor(dst, dst));
10520  ins_pipe( fpu_reg_reg );
10521%}
10522
10523
10524// =======================================================================
10525// fast clearing of an array
10526instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10527                  rFlagsReg cr)
10528%{
10529  match(Set dummy (ClearArray cnt base));
10530  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10531
10532  format %{ "xorl    rax, rax\t# ClearArray:\n\t"
10533            "rep stosq\t# Store rax to *rdi++ while rcx--" %}
10534  ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
10535             Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
10536  ins_pipe(pipe_slow);
10537%}
10538
10539instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1,
10540                        rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr)
10541%{
10542  match(Set result (StrComp str1 str2));
10543  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
10544  //ins_cost(300);
10545
10546  format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
10547  ins_encode( enc_String_Compare() );
10548  ins_pipe( pipe_slow );
10549%}
10550
10551//----------Control Flow Instructions------------------------------------------
10552// Signed compare Instructions
10553
10554// XXX more variants!!
10555instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10556%{
10557  match(Set cr (CmpI op1 op2));
10558  effect(DEF cr, USE op1, USE op2);
10559
10560  format %{ "cmpl    $op1, $op2" %}
10561  opcode(0x3B);  /* Opcode 3B /r */
10562  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10563  ins_pipe(ialu_cr_reg_reg);
10564%}
10565
10566instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10567%{
10568  match(Set cr (CmpI op1 op2));
10569
10570  format %{ "cmpl    $op1, $op2" %}
10571  opcode(0x81, 0x07); /* Opcode 81 /7 */
10572  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10573  ins_pipe(ialu_cr_reg_imm);
10574%}
10575
10576instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10577%{
10578  match(Set cr (CmpI op1 (LoadI op2)));
10579
10580  ins_cost(500); // XXX
10581  format %{ "cmpl    $op1, $op2" %}
10582  opcode(0x3B); /* Opcode 3B /r */
10583  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10584  ins_pipe(ialu_cr_reg_mem);
10585%}
10586
10587instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10588%{
10589  match(Set cr (CmpI src zero));
10590
10591  format %{ "testl   $src, $src" %}
10592  opcode(0x85);
10593  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10594  ins_pipe(ialu_cr_reg_imm);
10595%}
10596
10597instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10598%{
10599  match(Set cr (CmpI (AndI src con) zero));
10600
10601  format %{ "testl   $src, $con" %}
10602  opcode(0xF7, 0x00);
10603  ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10604  ins_pipe(ialu_cr_reg_imm);
10605%}
10606
10607instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10608%{
10609  match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10610
10611  format %{ "testl   $src, $mem" %}
10612  opcode(0x85);
10613  ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10614  ins_pipe(ialu_cr_reg_mem);
10615%}
10616
10617// Unsigned compare Instructions; really, same as signed except they
10618// produce an rFlagsRegU instead of rFlagsReg.
10619instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10620%{
10621  match(Set cr (CmpU op1 op2));
10622
10623  format %{ "cmpl    $op1, $op2\t# unsigned" %}
10624  opcode(0x3B); /* Opcode 3B /r */
10625  ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10626  ins_pipe(ialu_cr_reg_reg);
10627%}
10628
10629instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10630%{
10631  match(Set cr (CmpU op1 op2));
10632
10633  format %{ "cmpl    $op1, $op2\t# unsigned" %}
10634  opcode(0x81,0x07); /* Opcode 81 /7 */
10635  ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10636  ins_pipe(ialu_cr_reg_imm);
10637%}
10638
10639instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10640%{
10641  match(Set cr (CmpU op1 (LoadI op2)));
10642
10643  ins_cost(500); // XXX
10644  format %{ "cmpl    $op1, $op2\t# unsigned" %}
10645  opcode(0x3B); /* Opcode 3B /r */
10646  ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10647  ins_pipe(ialu_cr_reg_mem);
10648%}
10649
10650// // // Cisc-spilled version of cmpU_rReg
10651// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10652// //%{
10653// //  match(Set cr (CmpU (LoadI op1) op2));
10654// //
10655// //  format %{ "CMPu   $op1,$op2" %}
10656// //  ins_cost(500);
10657// //  opcode(0x39);  /* Opcode 39 /r */
10658// //  ins_encode( OpcP, reg_mem( op1, op2) );
10659// //%}
10660
10661instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10662%{
10663  match(Set cr (CmpU src zero));
10664
10665  format %{ "testl  $src, $src\t# unsigned" %}
10666  opcode(0x85);
10667  ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10668  ins_pipe(ialu_cr_reg_imm);
10669%}
10670
10671instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10672%{
10673  match(Set cr (CmpP op1 op2));
10674
10675  format %{ "cmpq    $op1, $op2\t# ptr" %}
10676  opcode(0x3B); /* Opcode 3B /r */
10677  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10678  ins_pipe(ialu_cr_reg_reg);
10679%}
10680
10681instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10682%{
10683  match(Set cr (CmpP op1 (LoadP op2)));
10684
10685  ins_cost(500); // XXX
10686  format %{ "cmpq    $op1, $op2\t# ptr" %}
10687  opcode(0x3B); /* Opcode 3B /r */
10688  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10689  ins_pipe(ialu_cr_reg_mem);
10690%}
10691
10692// // // Cisc-spilled version of cmpP_rReg
10693// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10694// //%{
10695// //  match(Set cr (CmpP (LoadP op1) op2));
10696// //
10697// //  format %{ "CMPu   $op1,$op2" %}
10698// //  ins_cost(500);
10699// //  opcode(0x39);  /* Opcode 39 /r */
10700// //  ins_encode( OpcP, reg_mem( op1, op2) );
10701// //%}
10702
10703// XXX this is generalized by compP_rReg_mem???
10704// Compare raw pointer (used in out-of-heap check).
10705// Only works because non-oop pointers must be raw pointers
10706// and raw pointers have no anti-dependencies.
10707instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10708%{
10709  predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
10710  match(Set cr (CmpP op1 (LoadP op2)));
10711
10712  format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10713  opcode(0x3B); /* Opcode 3B /r */
10714  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10715  ins_pipe(ialu_cr_reg_mem);
10716%}
10717
10718// This will generate a signed flags result. This should be OK since
10719// any compare to a zero should be eq/neq.
10720instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10721%{
10722  match(Set cr (CmpP src zero));
10723
10724  format %{ "testq   $src, $src\t# ptr" %}
10725  opcode(0x85);
10726  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10727  ins_pipe(ialu_cr_reg_imm);
10728%}
10729
10730// This will generate a signed flags result. This should be OK since
10731// any compare to a zero should be eq/neq.
10732instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
10733%{
10734  match(Set cr (CmpP (LoadP op) zero));
10735
10736  ins_cost(500); // XXX
10737  format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
10738  opcode(0xF7); /* Opcode F7 /0 */
10739  ins_encode(REX_mem_wide(op),
10740             OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
10741  ins_pipe(ialu_cr_reg_imm);
10742%}
10743
10744// Yanked all unsigned pointer compare operations.
10745// Pointer compares are done with CmpP which is already unsigned.
10746
10747instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
10748%{
10749  match(Set cr (CmpL op1 op2));
10750
10751  format %{ "cmpq    $op1, $op2" %}
10752  opcode(0x3B);  /* Opcode 3B /r */
10753  ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10754  ins_pipe(ialu_cr_reg_reg);
10755%}
10756
10757instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
10758%{
10759  match(Set cr (CmpL op1 op2));
10760
10761  format %{ "cmpq    $op1, $op2" %}
10762  opcode(0x81, 0x07); /* Opcode 81 /7 */
10763  ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
10764  ins_pipe(ialu_cr_reg_imm);
10765%}
10766
10767instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
10768%{
10769  match(Set cr (CmpL op1 (LoadL op2)));
10770
10771  ins_cost(500); // XXX
10772  format %{ "cmpq    $op1, $op2" %}
10773  opcode(0x3B); /* Opcode 3B /r */
10774  ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10775  ins_pipe(ialu_cr_reg_mem);
10776%}
10777
10778instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
10779%{
10780  match(Set cr (CmpL src zero));
10781
10782  format %{ "testq   $src, $src" %}
10783  opcode(0x85);
10784  ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10785  ins_pipe(ialu_cr_reg_imm);
10786%}
10787
10788instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
10789%{
10790  match(Set cr (CmpL (AndL src con) zero));
10791
10792  format %{ "testq   $src, $con\t# long" %}
10793  opcode(0xF7, 0x00);
10794  ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
10795  ins_pipe(ialu_cr_reg_imm);
10796%}
10797
10798instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
10799%{
10800  match(Set cr (CmpL (AndL src (LoadL mem)) zero));
10801
10802  format %{ "testq   $src, $mem" %}
10803  opcode(0x85);
10804  ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
10805  ins_pipe(ialu_cr_reg_mem);
10806%}
10807
10808// Manifest a CmpL result in an integer register.  Very painful.
10809// This is the test to avoid.
10810instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
10811%{
10812  match(Set dst (CmpL3 src1 src2));
10813  effect(KILL flags);
10814
10815  ins_cost(275); // XXX
10816  format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
10817            "movl    $dst, -1\n\t"
10818            "jl,s    done\n\t"
10819            "setne   $dst\n\t"
10820            "movzbl  $dst, $dst\n\t"
10821    "done:" %}
10822  ins_encode(cmpl3_flag(src1, src2, dst));
10823  ins_pipe(pipe_slow);
10824%}
10825
10826//----------Max and Min--------------------------------------------------------
10827// Min Instructions
10828
10829instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
10830%{
10831  effect(USE_DEF dst, USE src, USE cr);
10832
10833  format %{ "cmovlgt $dst, $src\t# min" %}
10834  opcode(0x0F, 0x4F);
10835  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10836  ins_pipe(pipe_cmov_reg);
10837%}
10838
10839
10840instruct minI_rReg(rRegI dst, rRegI src)
10841%{
10842  match(Set dst (MinI dst src));
10843
10844  ins_cost(200);
10845  expand %{
10846    rFlagsReg cr;
10847    compI_rReg(cr, dst, src);
10848    cmovI_reg_g(dst, src, cr);
10849  %}
10850%}
10851
10852instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
10853%{
10854  effect(USE_DEF dst, USE src, USE cr);
10855
10856  format %{ "cmovllt $dst, $src\t# max" %}
10857  opcode(0x0F, 0x4C);
10858  ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10859  ins_pipe(pipe_cmov_reg);
10860%}
10861
10862
10863instruct maxI_rReg(rRegI dst, rRegI src)
10864%{
10865  match(Set dst (MaxI dst src));
10866
10867  ins_cost(200);
10868  expand %{
10869    rFlagsReg cr;
10870    compI_rReg(cr, dst, src);
10871    cmovI_reg_l(dst, src, cr);
10872  %}
10873%}
10874
10875// ============================================================================
10876// Branch Instructions
10877
10878// Jump Direct - Label defines a relative address from JMP+1
10879instruct jmpDir(label labl)
10880%{
10881  match(Goto);
10882  effect(USE labl);
10883
10884  ins_cost(300);
10885  format %{ "jmp     $labl" %}
10886  size(5);
10887  opcode(0xE9);
10888  ins_encode(OpcP, Lbl(labl));
10889  ins_pipe(pipe_jmp);
10890  ins_pc_relative(1);
10891%}
10892
10893// Jump Direct Conditional - Label defines a relative address from Jcc+1
10894instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
10895%{
10896  match(If cop cr);
10897  effect(USE labl);
10898
10899  ins_cost(300);
10900  format %{ "j$cop     $labl" %}
10901  size(6);
10902  opcode(0x0F, 0x80);
10903  ins_encode(Jcc(cop, labl));
10904  ins_pipe(pipe_jcc);
10905  ins_pc_relative(1);
10906%}
10907
10908// Jump Direct Conditional - Label defines a relative address from Jcc+1
10909instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
10910%{
10911  match(CountedLoopEnd cop cr);
10912  effect(USE labl);
10913
10914  ins_cost(300);
10915  format %{ "j$cop     $labl\t# loop end" %}
10916  size(6);
10917  opcode(0x0F, 0x80);
10918  ins_encode(Jcc(cop, labl));
10919  ins_pipe(pipe_jcc);
10920  ins_pc_relative(1);
10921%}
10922
10923// Jump Direct Conditional - Label defines a relative address from Jcc+1
10924instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl)
10925%{
10926  match(CountedLoopEnd cop cmp);
10927  effect(USE labl);
10928
10929  ins_cost(300);
10930  format %{ "j$cop,u   $labl\t# loop end" %}
10931  size(6);
10932  opcode(0x0F, 0x80);
10933  ins_encode(Jcc(cop, labl));
10934  ins_pipe(pipe_jcc);
10935  ins_pc_relative(1);
10936%}
10937
10938// Jump Direct Conditional - using unsigned comparison
10939instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl)
10940%{
10941  match(If cop cmp);
10942  effect(USE labl);
10943
10944  ins_cost(300);
10945  format %{ "j$cop,u   $labl" %}
10946  size(6);
10947  opcode(0x0F, 0x80);
10948  ins_encode(Jcc(cop, labl));
10949  ins_pipe(pipe_jcc);
10950  ins_pc_relative(1);
10951%}
10952
10953// ============================================================================
10954// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
10955// superklass array for an instance of the superklass.  Set a hidden
10956// internal cache on a hit (cache is checked with exposed code in
10957// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
10958// encoding ALSO sets flags.
10959
10960instruct partialSubtypeCheck(rdi_RegP result,
10961                             rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
10962                             rFlagsReg cr)
10963%{
10964  match(Set result (PartialSubtypeCheck sub super));
10965  effect(KILL rcx, KILL cr);
10966
10967  ins_cost(1100);  // slightly larger than the next version
10968  format %{ "cmpq    rax, rsi\n\t"
10969            "jeq,s   hit\n\t"
10970            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
10971            "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
10972            "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
10973            "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
10974            "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
10975            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
10976    "hit:\n\t"
10977            "xorq    $result, $result\t\t Hit: rdi zero\n\t"
10978    "miss:\t" %}
10979
10980  opcode(0x1); // Force a XOR of RDI
10981  ins_encode(enc_PartialSubtypeCheck());
10982  ins_pipe(pipe_slow);
10983%}
10984
10985instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
10986                                     rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
10987                                     immP0 zero,
10988                                     rdi_RegP result)
10989%{
10990  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
10991  effect(KILL rcx, KILL result);
10992
10993  ins_cost(1000);
10994  format %{ "cmpq    rax, rsi\n\t"
10995            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
10996            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
10997            "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
10998            "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
10999            "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11000            "jne,s   miss\t\t# Missed: flags nz\n\t"
11001            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
11002    "miss:\t" %}
11003
11004  opcode(0x0); // No need to XOR RDI
11005  ins_encode(enc_PartialSubtypeCheck());
11006  ins_pipe(pipe_slow);
11007%}
11008
11009// ============================================================================
11010// Branch Instructions -- short offset versions
11011//
11012// These instructions are used to replace jumps of a long offset (the default
11013// match) with jumps of a shorter offset.  These instructions are all tagged
11014// with the ins_short_branch attribute, which causes the ADLC to suppress the
11015// match rules in general matching.  Instead, the ADLC generates a conversion
11016// method in the MachNode which can be used to do in-place replacement of the
11017// long variant with the shorter variant.  The compiler will determine if a
11018// branch can be taken by the is_short_branch_offset() predicate in the machine
11019// specific code section of the file.
11020
11021// Jump Direct - Label defines a relative address from JMP+1
11022instruct jmpDir_short(label labl)
11023%{
11024  match(Goto);
11025  effect(USE labl);
11026
11027  ins_cost(300);
11028  format %{ "jmp,s   $labl" %}
11029  size(2);
11030  opcode(0xEB);
11031  ins_encode(OpcP, LblShort(labl));
11032  ins_pipe(pipe_jmp);
11033  ins_pc_relative(1);
11034  ins_short_branch(1);
11035%}
11036
11037// Jump Direct Conditional - Label defines a relative address from Jcc+1
11038instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl)
11039%{
11040  match(If cop cr);
11041  effect(USE labl);
11042
11043  ins_cost(300);
11044  format %{ "j$cop,s   $labl" %}
11045  size(2);
11046  opcode(0x70);
11047  ins_encode(JccShort(cop, labl));
11048  ins_pipe(pipe_jcc);
11049  ins_pc_relative(1);
11050  ins_short_branch(1);
11051%}
11052
11053// Jump Direct Conditional - Label defines a relative address from Jcc+1
11054instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl)
11055%{
11056  match(CountedLoopEnd cop cr);
11057  effect(USE labl);
11058
11059  ins_cost(300);
11060  format %{ "j$cop,s   $labl" %}
11061  size(2);
11062  opcode(0x70);
11063  ins_encode(JccShort(cop, labl));
11064  ins_pipe(pipe_jcc);
11065  ins_pc_relative(1);
11066  ins_short_branch(1);
11067%}
11068
11069// Jump Direct Conditional - Label defines a relative address from Jcc+1
11070instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl)
11071%{
11072  match(CountedLoopEnd cop cmp);
11073  effect(USE labl);
11074
11075  ins_cost(300);
11076  format %{ "j$cop,us  $labl" %}
11077  size(2);
11078  opcode(0x70);
11079  ins_encode(JccShort(cop, labl));
11080  ins_pipe(pipe_jcc);
11081  ins_pc_relative(1);
11082  ins_short_branch(1);
11083%}
11084
11085// Jump Direct Conditional - using unsigned comparison
11086instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl)
11087%{
11088  match(If cop cmp);
11089  effect(USE labl);
11090
11091  ins_cost(300);
11092  format %{ "j$cop,us  $labl" %}
11093  size(2);
11094  opcode(0x70);
11095  ins_encode(JccShort(cop, labl));
11096  ins_pipe(pipe_jcc);
11097  ins_pc_relative(1);
11098  ins_short_branch(1);
11099%}
11100
11101// ============================================================================
11102// inlined locking and unlocking
11103
11104instruct cmpFastLock(rFlagsReg cr,
11105                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
11106%{
11107  match(Set cr (FastLock object box));
11108  effect(TEMP tmp, TEMP scr);
11109
11110  ins_cost(300);
11111  format %{ "fastlock $object,$box,$tmp,$scr" %}
11112  ins_encode(Fast_Lock(object, box, tmp, scr));
11113  ins_pipe(pipe_slow);
11114  ins_pc_relative(1);
11115%}
11116
11117instruct cmpFastUnlock(rFlagsReg cr,
11118                       rRegP object, rax_RegP box, rRegP tmp)
11119%{
11120  match(Set cr (FastUnlock object box));
11121  effect(TEMP tmp);
11122
11123  ins_cost(300);
11124  format %{ "fastunlock $object, $box, $tmp" %}
11125  ins_encode(Fast_Unlock(object, box, tmp));
11126  ins_pipe(pipe_slow);
11127  ins_pc_relative(1);
11128%}
11129
11130
11131// ============================================================================
11132// Safepoint Instructions
11133instruct safePoint_poll(rFlagsReg cr)
11134%{
11135  match(SafePoint);
11136  effect(KILL cr);
11137
11138  format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
11139            "# Safepoint: poll for GC" %}
11140  size(6); // Opcode + ModRM + Disp32 == 6 bytes
11141  ins_cost(125);
11142  ins_encode(enc_safepoint_poll);
11143  ins_pipe(ialu_reg_mem);
11144%}
11145
11146// ============================================================================
11147// Procedure Call/Return Instructions
11148// Call Java Static Instruction
11149// Note: If this code changes, the corresponding ret_addr_offset() and
11150//       compute_padding() functions will have to be adjusted.
11151instruct CallStaticJavaDirect(method meth)
11152%{
11153  match(CallStaticJava);
11154  effect(USE meth);
11155
11156  ins_cost(300);
11157  format %{ "call,static " %}
11158  opcode(0xE8); /* E8 cd */
11159  ins_encode(Java_Static_Call(meth), call_epilog);
11160  ins_pipe(pipe_slow);
11161  ins_pc_relative(1);
11162  ins_alignment(4);
11163%}
11164
11165// Call Java Dynamic Instruction
11166// Note: If this code changes, the corresponding ret_addr_offset() and
11167//       compute_padding() functions will have to be adjusted.
11168instruct CallDynamicJavaDirect(method meth)
11169%{
11170  match(CallDynamicJava);
11171  effect(USE meth);
11172
11173  ins_cost(300);
11174  format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11175            "call,dynamic " %}
11176  opcode(0xE8); /* E8 cd */
11177  ins_encode(Java_Dynamic_Call(meth), call_epilog);
11178  ins_pipe(pipe_slow);
11179  ins_pc_relative(1);
11180  ins_alignment(4);
11181%}
11182
11183// Call Runtime Instruction
11184instruct CallRuntimeDirect(method meth)
11185%{
11186  match(CallRuntime);
11187  effect(USE meth);
11188
11189  ins_cost(300);
11190  format %{ "call,runtime " %}
11191  opcode(0xE8); /* E8 cd */
11192  ins_encode(Java_To_Runtime(meth));
11193  ins_pipe(pipe_slow);
11194  ins_pc_relative(1);
11195%}
11196
11197// Call runtime without safepoint
11198instruct CallLeafDirect(method meth)
11199%{
11200  match(CallLeaf);
11201  effect(USE meth);
11202
11203  ins_cost(300);
11204  format %{ "call_leaf,runtime " %}
11205  opcode(0xE8); /* E8 cd */
11206  ins_encode(Java_To_Runtime(meth));
11207  ins_pipe(pipe_slow);
11208  ins_pc_relative(1);
11209%}
11210
11211// Call runtime without safepoint
11212instruct CallLeafNoFPDirect(method meth)
11213%{
11214  match(CallLeafNoFP);
11215  effect(USE meth);
11216
11217  ins_cost(300);
11218  format %{ "call_leaf_nofp,runtime " %}
11219  opcode(0xE8); /* E8 cd */
11220  ins_encode(Java_To_Runtime(meth));
11221  ins_pipe(pipe_slow);
11222  ins_pc_relative(1);
11223%}
11224
11225// Return Instruction
11226// Remove the return address & jump to it.
11227// Notice: We always emit a nop after a ret to make sure there is room
11228// for safepoint patching
11229instruct Ret()
11230%{
11231  match(Return);
11232
11233  format %{ "ret" %}
11234  opcode(0xC3);
11235  ins_encode(OpcP);
11236  ins_pipe(pipe_jmp);
11237%}
11238
11239// Tail Call; Jump from runtime stub to Java code.
11240// Also known as an 'interprocedural jump'.
11241// Target of jump will eventually return to caller.
11242// TailJump below removes the return address.
11243instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11244%{
11245  match(TailCall jump_target method_oop);
11246
11247  ins_cost(300);
11248  format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11249  opcode(0xFF, 0x4); /* Opcode FF /4 */
11250  ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11251  ins_pipe(pipe_jmp);
11252%}
11253
11254// Tail Jump; remove the return address; jump to target.
11255// TailCall above leaves the return address around.
11256instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11257%{
11258  match(TailJump jump_target ex_oop);
11259
11260  ins_cost(300);
11261  format %{ "popq    rdx\t# pop return address\n\t"
11262            "jmp     $jump_target" %}
11263  opcode(0xFF, 0x4); /* Opcode FF /4 */
11264  ins_encode(Opcode(0x5a), // popq rdx
11265             REX_reg(jump_target), OpcP, reg_opc(jump_target));
11266  ins_pipe(pipe_jmp);
11267%}
11268
11269// Create exception oop: created by stack-crawling runtime code.
11270// Created exception is now available to this handler, and is setup
11271// just prior to jumping to this handler.  No code emitted.
11272instruct CreateException(rax_RegP ex_oop)
11273%{
11274  match(Set ex_oop (CreateEx));
11275
11276  size(0);
11277  // use the following format syntax
11278  format %{ "# exception oop is in rax; no code emitted" %}
11279  ins_encode();
11280  ins_pipe(empty);
11281%}
11282
11283// Rethrow exception:
11284// The exception oop will come in the first argument position.
11285// Then JUMP (not call) to the rethrow stub code.
11286instruct RethrowException()
11287%{
11288  match(Rethrow);
11289
11290  // use the following format syntax
11291  format %{ "jmp     rethrow_stub" %}
11292  ins_encode(enc_rethrow);
11293  ins_pipe(pipe_jmp);
11294%}
11295
11296
11297//----------PEEPHOLE RULES-----------------------------------------------------
11298// These must follow all instruction definitions as they use the names
11299// defined in the instructions definitions.
11300//
11301// peepmatch ( root_instr_name [precerding_instruction]* );
11302//
11303// peepconstraint %{
11304// (instruction_number.operand_name relational_op instruction_number.operand_name
11305//  [, ...] );
11306// // instruction numbers are zero-based using left to right order in peepmatch
11307//
11308// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11309// // provide an instruction_number.operand_name for each operand that appears
11310// // in the replacement instruction's match rule
11311//
11312// ---------VM FLAGS---------------------------------------------------------
11313//
11314// All peephole optimizations can be turned off using -XX:-OptoPeephole
11315//
11316// Each peephole rule is given an identifying number starting with zero and
11317// increasing by one in the order seen by the parser.  An individual peephole
11318// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11319// on the command-line.
11320//
11321// ---------CURRENT LIMITATIONS----------------------------------------------
11322//
11323// Only match adjacent instructions in same basic block
11324// Only equality constraints
11325// Only constraints between operands, not (0.dest_reg == RAX_enc)
11326// Only one replacement instruction
11327//
11328// ---------EXAMPLE----------------------------------------------------------
11329//
11330// // pertinent parts of existing instructions in architecture description
11331// instruct movI(rRegI dst, rRegI src)
11332// %{
11333//   match(Set dst (CopyI src));
11334// %}
11335//
11336// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11337// %{
11338//   match(Set dst (AddI dst src));
11339//   effect(KILL cr);
11340// %}
11341//
11342// // Change (inc mov) to lea
11343// peephole %{
11344//   // increment preceeded by register-register move
11345//   peepmatch ( incI_rReg movI );
11346//   // require that the destination register of the increment
11347//   // match the destination register of the move
11348//   peepconstraint ( 0.dst == 1.dst );
11349//   // construct a replacement instruction that sets
11350//   // the destination to ( move's source register + one )
11351//   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11352// %}
11353//
11354
11355// Implementation no longer uses movX instructions since
11356// machine-independent system no longer uses CopyX nodes.
11357//
11358// peephole
11359// %{
11360//   peepmatch (incI_rReg movI);
11361//   peepconstraint (0.dst == 1.dst);
11362//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11363// %}
11364
11365// peephole
11366// %{
11367//   peepmatch (decI_rReg movI);
11368//   peepconstraint (0.dst == 1.dst);
11369//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11370// %}
11371
11372// peephole
11373// %{
11374//   peepmatch (addI_rReg_imm movI);
11375//   peepconstraint (0.dst == 1.dst);
11376//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11377// %}
11378
11379// peephole
11380// %{
11381//   peepmatch (incL_rReg movL);
11382//   peepconstraint (0.dst == 1.dst);
11383//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11384// %}
11385
11386// peephole
11387// %{
11388//   peepmatch (decL_rReg movL);
11389//   peepconstraint (0.dst == 1.dst);
11390//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11391// %}
11392
11393// peephole
11394// %{
11395//   peepmatch (addL_rReg_imm movL);
11396//   peepconstraint (0.dst == 1.dst);
11397//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11398// %}
11399
11400// peephole
11401// %{
11402//   peepmatch (addP_rReg_imm movP);
11403//   peepconstraint (0.dst == 1.dst);
11404//   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11405// %}
11406
11407// // Change load of spilled value to only a spill
11408// instruct storeI(memory mem, rRegI src)
11409// %{
11410//   match(Set mem (StoreI mem src));
11411// %}
11412//
11413// instruct loadI(rRegI dst, memory mem)
11414// %{
11415//   match(Set dst (LoadI mem));
11416// %}
11417//
11418
11419peephole
11420%{
11421  peepmatch (loadI storeI);
11422  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11423  peepreplace (storeI(1.mem 1.mem 1.src));
11424%}
11425
11426peephole
11427%{
11428  peepmatch (loadL storeL);
11429  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11430  peepreplace (storeL(1.mem 1.mem 1.src));
11431%}
11432
11433//----------SMARTSPILL RULES---------------------------------------------------
11434// These must follow all instruction definitions as they use the names
11435// defined in the instructions definitions.
11436