1//
2// Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
3// Copyright (c) 2014, Red Hat Inc. All rights reserved.
4// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5//
6// This code is free software; you can redistribute it and/or modify it
7// under the terms of the GNU General Public License version 2 only, as
8// published by the Free Software Foundation.
9//
10// This code is distributed in the hope that it will be useful, but WITHOUT
11// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13// version 2 for more details (a copy is included in the LICENSE file that
14// accompanied this code).
15//
16// You should have received a copy of the GNU General Public License version
17// 2 along with this work; if not, write to the Free Software Foundation,
18// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19//
20// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21// or visit www.oracle.com if you need additional information or have any
22// questions.
23//
24//
25
26// AArch64 Architecture Description File
27
28//----------REGISTER DEFINITION BLOCK------------------------------------------
29// This information is used by the matcher and the register allocator to
30// describe individual registers and classes of registers within the target
31// archtecture.
32
33register %{
34//----------Architecture Description Register Definitions----------------------
35// General Registers
36// "reg_def"  name ( register save type, C convention save type,
37//                   ideal register type, encoding );
38// Register Save Types:
39//
40// NS  = No-Save:       The register allocator assumes that these registers
41//                      can be used without saving upon entry to the method, &
42//                      that they do not need to be saved at call sites.
43//
44// SOC = Save-On-Call:  The register allocator assumes that these registers
45//                      can be used without saving upon entry to the method,
46//                      but that they must be saved at call sites.
47//
48// SOE = Save-On-Entry: The register allocator assumes that these registers
49//                      must be saved before using them upon entry to the
50//                      method, but they do not need to be saved at call
51//                      sites.
52//
53// AS  = Always-Save:   The register allocator assumes that these registers
54//                      must be saved before using them upon entry to the
55//                      method, & that they must be saved at call sites.
56//
57// Ideal Register Type is used to determine how to save & restore a
58// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
59// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
60//
61// The encoding number is the actual bit-pattern placed into the opcodes.
62
63// We must define the 64 bit int registers in two 32 bit halves, the
64// real lower register and a virtual upper half register. upper halves
65// are used by the register allocator but are not actually supplied as
66// operands to memory ops.
67//
68// follow the C1 compiler in making registers
69//
70//   r0-r7,r10-r26 volatile (caller save)
71//   r27-r32 system (no save, no allocate)
72//   r8-r9 invisible to the allocator (so we can use them as scratch regs)
73//
74// as regards Java usage. we don't use any callee save registers
75// because this makes it difficult to de-optimise a frame (see comment
76// in x86 implementation of Deoptimization::unwind_callee_save_values)
77//
78
79// General Registers
80
81reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
82reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
83reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
84reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
85reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
86reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
87reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
88reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
89reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
90reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
91reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
92reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
93reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
94reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
95reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
96reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
97reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
98reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
99reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
100reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
101reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
102reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
103reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
104reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
105reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
106reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
107reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
108reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
109reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
110reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
111reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
112reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
113reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
114reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
115reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
116reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
117reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
118reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
119reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
120reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
121reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
122reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
123reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
124reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
125reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
126reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
127reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
128reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
129reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
130reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
131reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
132reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
133reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
134reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
135reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
136reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
137reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
138reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
139reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
140reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
141
142// ----------------------------
143// Float/Double Registers
144// ----------------------------
145
146// Double Registers
147
148// The rules of ADL require that double registers be defined in pairs.
149// Each pair must be two 32-bit values, but not necessarily a pair of
150// single float registers. In each pair, ADLC-assigned register numbers
151// must be adjacent, with the lower number even. Finally, when the
152// CPU stores such a register pair to memory, the word associated with
153// the lower ADLC-assigned number must be stored to the lower address.
154
155// AArch64 has 32 floating-point registers. Each can store a vector of
156// single or double precision floating-point values up to 8 * 32
157// floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
158// use the first float or double element of the vector.
159
160// for Java use float registers v0-v15 are always save on call whereas
161// the platform ABI treats v8-v15 as callee save). float registers
162// v16-v31 are SOC as per the platform spec
163
164  reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
165  reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
166  reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
167  reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
168
169  reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
170  reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
171  reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
172  reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
173
174  reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
175  reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
176  reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
177  reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
178
179  reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
180  reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
181  reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
182  reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
183
184  reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
185  reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
186  reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
187  reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
188
189  reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
190  reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
191  reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
192  reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
193
194  reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
195  reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
196  reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
197  reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
198
199  reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
200  reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
201  reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
202  reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
203
204  reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
205  reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
206  reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
207  reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
208
209  reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
210  reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
211  reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
212  reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
213
214  reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
215  reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
216  reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
217  reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
218
219  reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
220  reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
221  reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
222  reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
223
224  reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
225  reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
226  reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
227  reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
228
229  reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
230  reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
231  reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
232  reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
233
234  reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
235  reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
236  reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
237  reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
238
239  reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
240  reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
241  reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
242  reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
243
244  reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
245  reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
246  reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
247  reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
248
249  reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
250  reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
251  reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
252  reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
253
254  reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
255  reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
256  reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
257  reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
258
259  reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
260  reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
261  reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
262  reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
263
264  reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
265  reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
266  reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
267  reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
268
269  reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
270  reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
271  reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
272  reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
273
274  reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
275  reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
276  reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
277  reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
278
279  reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
280  reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
281  reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
282  reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
283
284  reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
285  reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
286  reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
287  reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
288
289  reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
290  reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
291  reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
292  reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
293
294  reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
295  reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
296  reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
297  reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
298
299  reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
300  reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
301  reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
302  reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
303
304  reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
305  reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
306  reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
307  reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
308
309  reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
310  reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
311  reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
312  reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
313
314  reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
315  reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
316  reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
317  reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
318
319  reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
320  reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
321  reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
322  reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
323
324// ----------------------------
325// Special Registers
326// ----------------------------
327
328// the AArch64 CSPR status flag register is not directly acessible as
329// instruction operand. the FPSR status flag register is a system
330// register which can be written/read using MSR/MRS but again does not
331// appear as an operand (a code identifying the FSPR occurs as an
332// immediate value in the instruction).
333
334reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
335
336
337// Specify priority of register selection within phases of register
338// allocation.  Highest priority is first.  A useful heuristic is to
339// give registers a low priority when they are required by machine
340// instructions, like EAX and EDX on I486, and choose no-save registers
341// before save-on-call, & save-on-call before save-on-entry.  Registers
342// which participate in fixed calling sequences should come last.
343// Registers which are used as pairs must fall on an even boundary.
344
345alloc_class chunk0(
346    // volatiles
347    R10, R10_H,
348    R11, R11_H,
349    R12, R12_H,
350    R13, R13_H,
351    R14, R14_H,
352    R15, R15_H,
353    R16, R16_H,
354    R17, R17_H,
355    R18, R18_H,
356
357    // arg registers
358    R0, R0_H,
359    R1, R1_H,
360    R2, R2_H,
361    R3, R3_H,
362    R4, R4_H,
363    R5, R5_H,
364    R6, R6_H,
365    R7, R7_H,
366
367    // non-volatiles
368    R19, R19_H,
369    R20, R20_H,
370    R21, R21_H,
371    R22, R22_H,
372    R23, R23_H,
373    R24, R24_H,
374    R25, R25_H,
375    R26, R26_H,
376
377    // non-allocatable registers
378
379    R27, R27_H, // heapbase
380    R28, R28_H, // thread
381    R29, R29_H, // fp
382    R30, R30_H, // lr
383    R31, R31_H, // sp
384);
385
386alloc_class chunk1(
387
388    // no save
389    V16, V16_H, V16_J, V16_K,
390    V17, V17_H, V17_J, V17_K,
391    V18, V18_H, V18_J, V18_K,
392    V19, V19_H, V19_J, V19_K,
393    V20, V20_H, V20_J, V20_K,
394    V21, V21_H, V21_J, V21_K,
395    V22, V22_H, V22_J, V22_K,
396    V23, V23_H, V23_J, V23_K,
397    V24, V24_H, V24_J, V24_K,
398    V25, V25_H, V25_J, V25_K,
399    V26, V26_H, V26_J, V26_K,
400    V27, V27_H, V27_J, V27_K,
401    V28, V28_H, V28_J, V28_K,
402    V29, V29_H, V29_J, V29_K,
403    V30, V30_H, V30_J, V30_K,
404    V31, V31_H, V31_J, V31_K,
405
406    // arg registers
407    V0, V0_H, V0_J, V0_K,
408    V1, V1_H, V1_J, V1_K,
409    V2, V2_H, V2_J, V2_K,
410    V3, V3_H, V3_J, V3_K,
411    V4, V4_H, V4_J, V4_K,
412    V5, V5_H, V5_J, V5_K,
413    V6, V6_H, V6_J, V6_K,
414    V7, V7_H, V7_J, V7_K,
415
416    // non-volatiles
417    V8, V8_H, V8_J, V8_K,
418    V9, V9_H, V9_J, V9_K,
419    V10, V10_H, V10_J, V10_K,
420    V11, V11_H, V11_J, V11_K,
421    V12, V12_H, V12_J, V12_K,
422    V13, V13_H, V13_J, V13_K,
423    V14, V14_H, V14_J, V14_K,
424    V15, V15_H, V15_J, V15_K,
425);
426
427alloc_class chunk2(RFLAGS);
428
429//----------Architecture Description Register Classes--------------------------
430// Several register classes are automatically defined based upon information in
431// this architecture description.
432// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
433// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
434// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
435// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
436//
437
438// Class for all 32 bit integer registers -- excludes SP which will
439// never be used as an integer register
440reg_class any_reg32(
441    R0,
442    R1,
443    R2,
444    R3,
445    R4,
446    R5,
447    R6,
448    R7,
449    R10,
450    R11,
451    R12,
452    R13,
453    R14,
454    R15,
455    R16,
456    R17,
457    R18,
458    R19,
459    R20,
460    R21,
461    R22,
462    R23,
463    R24,
464    R25,
465    R26,
466    R27,
467    R28,
468    R29,
469    R30
470);
471
472// Singleton class for R0 int register
473reg_class int_r0_reg(R0);
474
475// Singleton class for R2 int register
476reg_class int_r2_reg(R2);
477
478// Singleton class for R3 int register
479reg_class int_r3_reg(R3);
480
481// Singleton class for R4 int register
482reg_class int_r4_reg(R4);
483
484// Class for all long integer registers (including RSP)
485reg_class any_reg(
486    R0, R0_H,
487    R1, R1_H,
488    R2, R2_H,
489    R3, R3_H,
490    R4, R4_H,
491    R5, R5_H,
492    R6, R6_H,
493    R7, R7_H,
494    R10, R10_H,
495    R11, R11_H,
496    R12, R12_H,
497    R13, R13_H,
498    R14, R14_H,
499    R15, R15_H,
500    R16, R16_H,
501    R17, R17_H,
502    R18, R18_H,
503    R19, R19_H,
504    R20, R20_H,
505    R21, R21_H,
506    R22, R22_H,
507    R23, R23_H,
508    R24, R24_H,
509    R25, R25_H,
510    R26, R26_H,
511    R27, R27_H,
512    R28, R28_H,
513    R29, R29_H,
514    R30, R30_H,
515    R31, R31_H
516);
517
518// Class for all non-special integer registers
519reg_class no_special_reg32_no_fp(
520    R0,
521    R1,
522    R2,
523    R3,
524    R4,
525    R5,
526    R6,
527    R7,
528    R10,
529    R11,
530    R12,                        // rmethod
531    R13,
532    R14,
533    R15,
534    R16,
535    R17,
536    R18,
537    R19,
538    R20,
539    R21,
540    R22,
541    R23,
542    R24,
543    R25,
544    R26
545 /* R27, */                     // heapbase
546 /* R28, */                     // thread
547 /* R29, */                     // fp
548 /* R30, */                     // lr
549 /* R31 */                      // sp
550);
551
552reg_class no_special_reg32_with_fp(
553    R0,
554    R1,
555    R2,
556    R3,
557    R4,
558    R5,
559    R6,
560    R7,
561    R10,
562    R11,
563    R12,                        // rmethod
564    R13,
565    R14,
566    R15,
567    R16,
568    R17,
569    R18,
570    R19,
571    R20,
572    R21,
573    R22,
574    R23,
575    R24,
576    R25,
577    R26
578 /* R27, */                     // heapbase
579 /* R28, */                     // thread
580    R29,                        // fp
581 /* R30, */                     // lr
582 /* R31 */                      // sp
583);
584
585reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
586
587// Class for all non-special long integer registers
588reg_class no_special_reg_no_fp(
589    R0, R0_H,
590    R1, R1_H,
591    R2, R2_H,
592    R3, R3_H,
593    R4, R4_H,
594    R5, R5_H,
595    R6, R6_H,
596    R7, R7_H,
597    R10, R10_H,
598    R11, R11_H,
599    R12, R12_H,                 // rmethod
600    R13, R13_H,
601    R14, R14_H,
602    R15, R15_H,
603    R16, R16_H,
604    R17, R17_H,
605    R18, R18_H,
606    R19, R19_H,
607    R20, R20_H,
608    R21, R21_H,
609    R22, R22_H,
610    R23, R23_H,
611    R24, R24_H,
612    R25, R25_H,
613    R26, R26_H,
614 /* R27, R27_H, */              // heapbase
615 /* R28, R28_H, */              // thread
616 /* R29, R29_H, */              // fp
617 /* R30, R30_H, */              // lr
618 /* R31, R31_H */               // sp
619);
620
621reg_class no_special_reg_with_fp(
622    R0, R0_H,
623    R1, R1_H,
624    R2, R2_H,
625    R3, R3_H,
626    R4, R4_H,
627    R5, R5_H,
628    R6, R6_H,
629    R7, R7_H,
630    R10, R10_H,
631    R11, R11_H,
632    R12, R12_H,                 // rmethod
633    R13, R13_H,
634    R14, R14_H,
635    R15, R15_H,
636    R16, R16_H,
637    R17, R17_H,
638    R18, R18_H,
639    R19, R19_H,
640    R20, R20_H,
641    R21, R21_H,
642    R22, R22_H,
643    R23, R23_H,
644    R24, R24_H,
645    R25, R25_H,
646    R26, R26_H,
647 /* R27, R27_H, */              // heapbase
648 /* R28, R28_H, */              // thread
649    R29, R29_H,                 // fp
650 /* R30, R30_H, */              // lr
651 /* R31, R31_H */               // sp
652);
653
654reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
655
656// Class for 64 bit register r0
657reg_class r0_reg(
658    R0, R0_H
659);
660
661// Class for 64 bit register r1
662reg_class r1_reg(
663    R1, R1_H
664);
665
666// Class for 64 bit register r2
667reg_class r2_reg(
668    R2, R2_H
669);
670
671// Class for 64 bit register r3
672reg_class r3_reg(
673    R3, R3_H
674);
675
676// Class for 64 bit register r4
677reg_class r4_reg(
678    R4, R4_H
679);
680
681// Class for 64 bit register r5
682reg_class r5_reg(
683    R5, R5_H
684);
685
686// Class for 64 bit register r10
687reg_class r10_reg(
688    R10, R10_H
689);
690
691// Class for 64 bit register r11
692reg_class r11_reg(
693    R11, R11_H
694);
695
696// Class for method register
697reg_class method_reg(
698    R12, R12_H
699);
700
701// Class for heapbase register
702reg_class heapbase_reg(
703    R27, R27_H
704);
705
706// Class for thread register
707reg_class thread_reg(
708    R28, R28_H
709);
710
711// Class for frame pointer register
712reg_class fp_reg(
713    R29, R29_H
714);
715
716// Class for link register
717reg_class lr_reg(
718    R30, R30_H
719);
720
721// Class for long sp register
722reg_class sp_reg(
723  R31, R31_H
724);
725
726// Class for all pointer registers
727reg_class ptr_reg(
728    R0, R0_H,
729    R1, R1_H,
730    R2, R2_H,
731    R3, R3_H,
732    R4, R4_H,
733    R5, R5_H,
734    R6, R6_H,
735    R7, R7_H,
736    R10, R10_H,
737    R11, R11_H,
738    R12, R12_H,
739    R13, R13_H,
740    R14, R14_H,
741    R15, R15_H,
742    R16, R16_H,
743    R17, R17_H,
744    R18, R18_H,
745    R19, R19_H,
746    R20, R20_H,
747    R21, R21_H,
748    R22, R22_H,
749    R23, R23_H,
750    R24, R24_H,
751    R25, R25_H,
752    R26, R26_H,
753    R27, R27_H,
754    R28, R28_H,
755    R29, R29_H,
756    R30, R30_H,
757    R31, R31_H
758);
759
760// Class for all non_special pointer registers
761reg_class no_special_ptr_reg(
762    R0, R0_H,
763    R1, R1_H,
764    R2, R2_H,
765    R3, R3_H,
766    R4, R4_H,
767    R5, R5_H,
768    R6, R6_H,
769    R7, R7_H,
770    R10, R10_H,
771    R11, R11_H,
772    R12, R12_H,
773    R13, R13_H,
774    R14, R14_H,
775    R15, R15_H,
776    R16, R16_H,
777    R17, R17_H,
778    R18, R18_H,
779    R19, R19_H,
780    R20, R20_H,
781    R21, R21_H,
782    R22, R22_H,
783    R23, R23_H,
784    R24, R24_H,
785    R25, R25_H,
786    R26, R26_H,
787 /* R27, R27_H, */              // heapbase
788 /* R28, R28_H, */              // thread
789 /* R29, R29_H, */              // fp
790 /* R30, R30_H, */              // lr
791 /* R31, R31_H */               // sp
792);
793
794// Class for all float registers
795reg_class float_reg(
796    V0,
797    V1,
798    V2,
799    V3,
800    V4,
801    V5,
802    V6,
803    V7,
804    V8,
805    V9,
806    V10,
807    V11,
808    V12,
809    V13,
810    V14,
811    V15,
812    V16,
813    V17,
814    V18,
815    V19,
816    V20,
817    V21,
818    V22,
819    V23,
820    V24,
821    V25,
822    V26,
823    V27,
824    V28,
825    V29,
826    V30,
827    V31
828);
829
830// Double precision float registers have virtual `high halves' that
831// are needed by the allocator.
832// Class for all double registers
833reg_class double_reg(
834    V0, V0_H,
835    V1, V1_H,
836    V2, V2_H,
837    V3, V3_H,
838    V4, V4_H,
839    V5, V5_H,
840    V6, V6_H,
841    V7, V7_H,
842    V8, V8_H,
843    V9, V9_H,
844    V10, V10_H,
845    V11, V11_H,
846    V12, V12_H,
847    V13, V13_H,
848    V14, V14_H,
849    V15, V15_H,
850    V16, V16_H,
851    V17, V17_H,
852    V18, V18_H,
853    V19, V19_H,
854    V20, V20_H,
855    V21, V21_H,
856    V22, V22_H,
857    V23, V23_H,
858    V24, V24_H,
859    V25, V25_H,
860    V26, V26_H,
861    V27, V27_H,
862    V28, V28_H,
863    V29, V29_H,
864    V30, V30_H,
865    V31, V31_H
866);
867
868// Class for all 64bit vector registers
869reg_class vectord_reg(
870    V0, V0_H,
871    V1, V1_H,
872    V2, V2_H,
873    V3, V3_H,
874    V4, V4_H,
875    V5, V5_H,
876    V6, V6_H,
877    V7, V7_H,
878    V8, V8_H,
879    V9, V9_H,
880    V10, V10_H,
881    V11, V11_H,
882    V12, V12_H,
883    V13, V13_H,
884    V14, V14_H,
885    V15, V15_H,
886    V16, V16_H,
887    V17, V17_H,
888    V18, V18_H,
889    V19, V19_H,
890    V20, V20_H,
891    V21, V21_H,
892    V22, V22_H,
893    V23, V23_H,
894    V24, V24_H,
895    V25, V25_H,
896    V26, V26_H,
897    V27, V27_H,
898    V28, V28_H,
899    V29, V29_H,
900    V30, V30_H,
901    V31, V31_H
902);
903
904// Class for all 128bit vector registers
905reg_class vectorx_reg(
906    V0, V0_H, V0_J, V0_K,
907    V1, V1_H, V1_J, V1_K,
908    V2, V2_H, V2_J, V2_K,
909    V3, V3_H, V3_J, V3_K,
910    V4, V4_H, V4_J, V4_K,
911    V5, V5_H, V5_J, V5_K,
912    V6, V6_H, V6_J, V6_K,
913    V7, V7_H, V7_J, V7_K,
914    V8, V8_H, V8_J, V8_K,
915    V9, V9_H, V9_J, V9_K,
916    V10, V10_H, V10_J, V10_K,
917    V11, V11_H, V11_J, V11_K,
918    V12, V12_H, V12_J, V12_K,
919    V13, V13_H, V13_J, V13_K,
920    V14, V14_H, V14_J, V14_K,
921    V15, V15_H, V15_J, V15_K,
922    V16, V16_H, V16_J, V16_K,
923    V17, V17_H, V17_J, V17_K,
924    V18, V18_H, V18_J, V18_K,
925    V19, V19_H, V19_J, V19_K,
926    V20, V20_H, V20_J, V20_K,
927    V21, V21_H, V21_J, V21_K,
928    V22, V22_H, V22_J, V22_K,
929    V23, V23_H, V23_J, V23_K,
930    V24, V24_H, V24_J, V24_K,
931    V25, V25_H, V25_J, V25_K,
932    V26, V26_H, V26_J, V26_K,
933    V27, V27_H, V27_J, V27_K,
934    V28, V28_H, V28_J, V28_K,
935    V29, V29_H, V29_J, V29_K,
936    V30, V30_H, V30_J, V30_K,
937    V31, V31_H, V31_J, V31_K
938);
939
940// Class for 128 bit register v0
941reg_class v0_reg(
942    V0, V0_H
943);
944
945// Class for 128 bit register v1
946reg_class v1_reg(
947    V1, V1_H
948);
949
950// Class for 128 bit register v2
951reg_class v2_reg(
952    V2, V2_H
953);
954
955// Class for 128 bit register v3
956reg_class v3_reg(
957    V3, V3_H
958);
959
960// Singleton class for condition codes
961reg_class int_flags(RFLAGS);
962
963%}
964
965//----------DEFINITION BLOCK---------------------------------------------------
966// Define name --> value mappings to inform the ADLC of an integer valued name
967// Current support includes integer values in the range [0, 0x7FFFFFFF]
968// Format:
969//        int_def  <name>         ( <int_value>, <expression>);
970// Generated Code in ad_<arch>.hpp
971//        #define  <name>   (<expression>)
972//        // value == <int_value>
973// Generated code in ad_<arch>.cpp adlc_verification()
974//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
975//
976
977// we follow the ppc-aix port in using a simple cost model which ranks
978// register operations as cheap, memory ops as more expensive and
979// branches as most expensive. the first two have a low as well as a
980// normal cost. huge cost appears to be a way of saying don't do
981// something
982
983definitions %{
984  // The default cost (of a register move instruction).
985  int_def INSN_COST            (    100,     100);
986  int_def BRANCH_COST          (    200,     2 * INSN_COST);
987  int_def CALL_COST            (    200,     2 * INSN_COST);
988  int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
989%}
990
991
992//----------SOURCE BLOCK-------------------------------------------------------
993// This is a block of C++ code which provides values, functions, and
994// definitions necessary in the rest of the architecture description
995
996source_hpp %{
997
998#include "gc/shared/cardTableModRefBS.hpp"
999#include "opto/addnode.hpp"
1000
1001class CallStubImpl {
1002
1003  //--------------------------------------------------------------
1004  //---<  Used for optimization in Compile::shorten_branches  >---
1005  //--------------------------------------------------------------
1006
1007 public:
1008  // Size of call trampoline stub.
1009  static uint size_call_trampoline() {
1010    return 0; // no call trampolines on this platform
1011  }
1012
1013  // number of relocations needed by a call trampoline stub
1014  static uint reloc_call_trampoline() {
1015    return 0; // no call trampolines on this platform
1016  }
1017};
1018
1019class HandlerImpl {
1020
1021 public:
1022
1023  static int emit_exception_handler(CodeBuffer &cbuf);
1024  static int emit_deopt_handler(CodeBuffer& cbuf);
1025
1026  static uint size_exception_handler() {
1027    return MacroAssembler::far_branch_size();
1028  }
1029
1030  static uint size_deopt_handler() {
1031    // count one adr and one far branch instruction
1032    return 4 * NativeInstruction::instruction_size;
1033  }
1034};
1035
1036  // graph traversal helpers
1037
1038  MemBarNode *parent_membar(const Node *n);
1039  MemBarNode *child_membar(const MemBarNode *n);
1040  bool leading_membar(const MemBarNode *barrier);
1041
1042  bool is_card_mark_membar(const MemBarNode *barrier);
1043  bool is_CAS(int opcode);
1044
1045  MemBarNode *leading_to_normal(MemBarNode *leading);
1046  MemBarNode *normal_to_leading(const MemBarNode *barrier);
1047  MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1048  MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1049  MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1050
1051  // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1052
1053  bool unnecessary_acquire(const Node *barrier);
1054  bool needs_acquiring_load(const Node *load);
1055
1056  // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1057
1058  bool unnecessary_release(const Node *barrier);
1059  bool unnecessary_volatile(const Node *barrier);
1060  bool needs_releasing_store(const Node *store);
1061
1062  // predicate controlling translation of CompareAndSwapX
1063  bool needs_acquiring_load_exclusive(const Node *load);
1064
1065  // predicate controlling translation of StoreCM
1066  bool unnecessary_storestore(const Node *storecm);
1067
1068  // predicate controlling addressing modes
1069  bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1070%}
1071
1072source %{
1073
1074  // Optimizaton of volatile gets and puts
1075  // -------------------------------------
1076  //
1077  // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1078  // use to implement volatile reads and writes. For a volatile read
1079  // we simply need
1080  //
1081  //   ldar<x>
1082  //
1083  // and for a volatile write we need
1084  //
1085  //   stlr<x>
1086  //
1087  // Alternatively, we can implement them by pairing a normal
1088  // load/store with a memory barrier. For a volatile read we need
1089  //
1090  //   ldr<x>
1091  //   dmb ishld
1092  //
1093  // for a volatile write
1094  //
1095  //   dmb ish
1096  //   str<x>
1097  //   dmb ish
1098  //
1099  // We can also use ldaxr and stlxr to implement compare and swap CAS
1100  // sequences. These are normally translated to an instruction
1101  // sequence like the following
1102  //
1103  //   dmb      ish
1104  // retry:
1105  //   ldxr<x>   rval raddr
1106  //   cmp       rval rold
1107  //   b.ne done
1108  //   stlxr<x>  rval, rnew, rold
1109  //   cbnz      rval retry
1110  // done:
1111  //   cset      r0, eq
1112  //   dmb ishld
1113  //
1114  // Note that the exclusive store is already using an stlxr
1115  // instruction. That is required to ensure visibility to other
1116  // threads of the exclusive write (assuming it succeeds) before that
1117  // of any subsequent writes.
1118  //
1119  // The following instruction sequence is an improvement on the above
1120  //
1121  // retry:
1122  //   ldaxr<x>  rval raddr
1123  //   cmp       rval rold
1124  //   b.ne done
1125  //   stlxr<x>  rval, rnew, rold
1126  //   cbnz      rval retry
1127  // done:
1128  //   cset      r0, eq
1129  //
1130  // We don't need the leading dmb ish since the stlxr guarantees
1131  // visibility of prior writes in the case that the swap is
1132  // successful. Crucially we don't have to worry about the case where
1133  // the swap is not successful since no valid program should be
1134  // relying on visibility of prior changes by the attempting thread
1135  // in the case where the CAS fails.
1136  //
1137  // Similarly, we don't need the trailing dmb ishld if we substitute
1138  // an ldaxr instruction since that will provide all the guarantees we
1139  // require regarding observation of changes made by other threads
1140  // before any change to the CAS address observed by the load.
1141  //
1142  // In order to generate the desired instruction sequence we need to
1143  // be able to identify specific 'signature' ideal graph node
1144  // sequences which i) occur as a translation of a volatile reads or
1145  // writes or CAS operations and ii) do not occur through any other
1146  // translation or graph transformation. We can then provide
1147  // alternative aldc matching rules which translate these node
1148  // sequences to the desired machine code sequences. Selection of the
1149  // alternative rules can be implemented by predicates which identify
1150  // the relevant node sequences.
1151  //
1152  // The ideal graph generator translates a volatile read to the node
1153  // sequence
1154  //
1155  //   LoadX[mo_acquire]
1156  //   MemBarAcquire
1157  //
1158  // As a special case when using the compressed oops optimization we
1159  // may also see this variant
1160  //
1161  //   LoadN[mo_acquire]
1162  //   DecodeN
1163  //   MemBarAcquire
1164  //
1165  // A volatile write is translated to the node sequence
1166  //
1167  //   MemBarRelease
1168  //   StoreX[mo_release] {CardMark}-optional
1169  //   MemBarVolatile
1170  //
1171  // n.b. the above node patterns are generated with a strict
1172  // 'signature' configuration of input and output dependencies (see
1173  // the predicates below for exact details). The card mark may be as
1174  // simple as a few extra nodes or, in a few GC configurations, may
1175  // include more complex control flow between the leading and
1176  // trailing memory barriers. However, whatever the card mark
1177  // configuration these signatures are unique to translated volatile
1178  // reads/stores -- they will not appear as a result of any other
1179  // bytecode translation or inlining nor as a consequence of
1180  // optimizing transforms.
1181  //
1182  // We also want to catch inlined unsafe volatile gets and puts and
1183  // be able to implement them using either ldar<x>/stlr<x> or some
1184  // combination of ldr<x>/stlr<x> and dmb instructions.
1185  //
1186  // Inlined unsafe volatiles puts manifest as a minor variant of the
1187  // normal volatile put node sequence containing an extra cpuorder
1188  // membar
1189  //
1190  //   MemBarRelease
1191  //   MemBarCPUOrder
1192  //   StoreX[mo_release] {CardMark}-optional
1193  //   MemBarVolatile
1194  //
1195  // n.b. as an aside, the cpuorder membar is not itself subject to
1196  // matching and translation by adlc rules.  However, the rule
1197  // predicates need to detect its presence in order to correctly
1198  // select the desired adlc rules.
1199  //
1200  // Inlined unsafe volatile gets manifest as a somewhat different
1201  // node sequence to a normal volatile get
1202  //
1203  //   MemBarCPUOrder
1204  //        ||       \\
1205  //   MemBarAcquire LoadX[mo_acquire]
1206  //        ||
1207  //   MemBarCPUOrder
1208  //
1209  // In this case the acquire membar does not directly depend on the
1210  // load. However, we can be sure that the load is generated from an
1211  // inlined unsafe volatile get if we see it dependent on this unique
1212  // sequence of membar nodes. Similarly, given an acquire membar we
1213  // can know that it was added because of an inlined unsafe volatile
1214  // get if it is fed and feeds a cpuorder membar and if its feed
1215  // membar also feeds an acquiring load.
1216  //
1217  // Finally an inlined (Unsafe) CAS operation is translated to the
1218  // following ideal graph
1219  //
1220  //   MemBarRelease
1221  //   MemBarCPUOrder
1222  //   CompareAndSwapX {CardMark}-optional
1223  //   MemBarCPUOrder
1224  //   MemBarAcquire
1225  //
1226  // So, where we can identify these volatile read and write
1227  // signatures we can choose to plant either of the above two code
1228  // sequences. For a volatile read we can simply plant a normal
1229  // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1230  // also choose to inhibit translation of the MemBarAcquire and
1231  // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1232  //
1233  // When we recognise a volatile store signature we can choose to
1234  // plant at a dmb ish as a translation for the MemBarRelease, a
1235  // normal str<x> and then a dmb ish for the MemBarVolatile.
1236  // Alternatively, we can inhibit translation of the MemBarRelease
1237  // and MemBarVolatile and instead plant a simple stlr<x>
1238  // instruction.
1239  //
1240  // when we recognise a CAS signature we can choose to plant a dmb
1241  // ish as a translation for the MemBarRelease, the conventional
1242  // macro-instruction sequence for the CompareAndSwap node (which
1243  // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1244  // Alternatively, we can elide generation of the dmb instructions
1245  // and plant the alternative CompareAndSwap macro-instruction
1246  // sequence (which uses ldaxr<x>).
1247  //
1248  // Of course, the above only applies when we see these signature
1249  // configurations. We still want to plant dmb instructions in any
1250  // other cases where we may see a MemBarAcquire, MemBarRelease or
1251  // MemBarVolatile. For example, at the end of a constructor which
1252  // writes final/volatile fields we will see a MemBarRelease
1253  // instruction and this needs a 'dmb ish' lest we risk the
1254  // constructed object being visible without making the
1255  // final/volatile field writes visible.
1256  //
1257  // n.b. the translation rules below which rely on detection of the
1258  // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1259  // If we see anything other than the signature configurations we
1260  // always just translate the loads and stores to ldr<x> and str<x>
1261  // and translate acquire, release and volatile membars to the
1262  // relevant dmb instructions.
1263  //
1264
1265  // graph traversal helpers used for volatile put/get and CAS
1266  // optimization
1267
1268  // 1) general purpose helpers
1269
1270  // if node n is linked to a parent MemBarNode by an intervening
1271  // Control and Memory ProjNode return the MemBarNode otherwise return
1272  // NULL.
1273  //
1274  // n may only be a Load or a MemBar.
1275
1276  MemBarNode *parent_membar(const Node *n)
1277  {
1278    Node *ctl = NULL;
1279    Node *mem = NULL;
1280    Node *membar = NULL;
1281
1282    if (n->is_Load()) {
1283      ctl = n->lookup(LoadNode::Control);
1284      mem = n->lookup(LoadNode::Memory);
1285    } else if (n->is_MemBar()) {
1286      ctl = n->lookup(TypeFunc::Control);
1287      mem = n->lookup(TypeFunc::Memory);
1288    } else {
1289	return NULL;
1290    }
1291
1292    if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1293      return NULL;
1294    }
1295
1296    membar = ctl->lookup(0);
1297
1298    if (!membar || !membar->is_MemBar()) {
1299      return NULL;
1300    }
1301
1302    if (mem->lookup(0) != membar) {
1303      return NULL;
1304    }
1305
1306    return membar->as_MemBar();
1307  }
1308
1309  // if n is linked to a child MemBarNode by intervening Control and
1310  // Memory ProjNodes return the MemBarNode otherwise return NULL.
1311
1312  MemBarNode *child_membar(const MemBarNode *n)
1313  {
1314    ProjNode *ctl = n->proj_out(TypeFunc::Control);
1315    ProjNode *mem = n->proj_out(TypeFunc::Memory);
1316
1317    // MemBar needs to have both a Ctl and Mem projection
1318    if (! ctl || ! mem)
1319      return NULL;
1320
1321    MemBarNode *child = NULL;
1322    Node *x;
1323
1324    for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1325      x = ctl->fast_out(i);
1326      // if we see a membar we keep hold of it. we may also see a new
1327      // arena copy of the original but it will appear later
1328      if (x->is_MemBar()) {
1329	  child = x->as_MemBar();
1330	  break;
1331      }
1332    }
1333
1334    if (child == NULL) {
1335      return NULL;
1336    }
1337
1338    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1339      x = mem->fast_out(i);
1340      // if we see a membar we keep hold of it. we may also see a new
1341      // arena copy of the original but it will appear later
1342      if (x == child) {
1343	return child;
1344      }
1345    }
1346    return NULL;
1347  }
1348
1349  // helper predicate use to filter candidates for a leading memory
1350  // barrier
1351  //
1352  // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1353  // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1354
1355  bool leading_membar(const MemBarNode *barrier)
1356  {
1357    int opcode = barrier->Opcode();
1358    // if this is a release membar we are ok
1359    if (opcode == Op_MemBarRelease) {
1360      return true;
1361    }
1362    // if its a cpuorder membar . . .
1363    if (opcode != Op_MemBarCPUOrder) {
1364      return false;
1365    }
1366    // then the parent has to be a release membar
1367    MemBarNode *parent = parent_membar(barrier);
1368    if (!parent) {
1369      return false;
1370    }
1371    opcode = parent->Opcode();
1372    return opcode == Op_MemBarRelease;
1373  }
1374
1375  // 2) card mark detection helper
1376
1377  // helper predicate which can be used to detect a volatile membar
1378  // introduced as part of a conditional card mark sequence either by
1379  // G1 or by CMS when UseCondCardMark is true.
1380  //
1381  // membar can be definitively determined to be part of a card mark
1382  // sequence if and only if all the following hold
1383  //
1384  // i) it is a MemBarVolatile
1385  //
1386  // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1387  // true
1388  //
1389  // iii) the node's Mem projection feeds a StoreCM node.
1390
1391  bool is_card_mark_membar(const MemBarNode *barrier)
1392  {
1393    if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1394      return false;
1395    }
1396
1397    if (barrier->Opcode() != Op_MemBarVolatile) {
1398      return false;
1399    }
1400
1401    ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1402
1403    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1404      Node *y = mem->fast_out(i);
1405      if (y->Opcode() == Op_StoreCM) {
1406	return true;
1407      }
1408    }
1409
1410    return false;
1411  }
1412
1413
1414  // 3) helper predicates to traverse volatile put or CAS graphs which
1415  // may contain GC barrier subgraphs
1416
1417  // Preamble
1418  // --------
1419  //
1420  // for volatile writes we can omit generating barriers and employ a
1421  // releasing store when we see a node sequence sequence with a
1422  // leading MemBarRelease and a trailing MemBarVolatile as follows
1423  //
1424  //   MemBarRelease
1425  //  {      ||      } -- optional
1426  //  {MemBarCPUOrder}
1427  //         ||     \\
1428  //         ||     StoreX[mo_release]
1429  //         | \     /
1430  //         | MergeMem
1431  //         | /
1432  //   MemBarVolatile
1433  //
1434  // where
1435  //  || and \\ represent Ctl and Mem feeds via Proj nodes
1436  //  | \ and / indicate further routing of the Ctl and Mem feeds
1437  //
1438  // this is the graph we see for non-object stores. however, for a
1439  // volatile Object store (StoreN/P) we may see other nodes below the
1440  // leading membar because of the need for a GC pre- or post-write
1441  // barrier.
1442  //
1443  // with most GC configurations we with see this simple variant which
1444  // includes a post-write barrier card mark.
1445  //
1446  //   MemBarRelease______________________________
1447  //         ||    \\               Ctl \        \\
1448  //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1449  //         | \     /                       . . .  /
1450  //         | MergeMem
1451  //         | /
1452  //         ||      /
1453  //   MemBarVolatile
1454  //
1455  // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1456  // the object address to an int used to compute the card offset) and
1457  // Ctl+Mem to a StoreB node (which does the actual card mark).
1458  //
1459  // n.b. a StoreCM node will only appear in this configuration when
1460  // using CMS. StoreCM differs from a normal card mark write (StoreB)
1461  // because it implies a requirement to order visibility of the card
1462  // mark (StoreCM) relative to the object put (StoreP/N) using a
1463  // StoreStore memory barrier (arguably this ought to be represented
1464  // explicitly in the ideal graph but that is not how it works). This
1465  // ordering is required for both non-volatile and volatile
1466  // puts. Normally that means we need to translate a StoreCM using
1467  // the sequence
1468  //
1469  //   dmb ishst
1470  //   stlrb
1471  //
1472  // However, in the case of a volatile put if we can recognise this
1473  // configuration and plant an stlr for the object write then we can
1474  // omit the dmb and just plant an strb since visibility of the stlr
1475  // is ordered before visibility of subsequent stores. StoreCM nodes
1476  // also arise when using G1 or using CMS with conditional card
1477  // marking. In these cases (as we shall see) we don't need to insert
1478  // the dmb when translating StoreCM because there is already an
1479  // intervening StoreLoad barrier between it and the StoreP/N.
1480  //
1481  // It is also possible to perform the card mark conditionally on it
1482  // currently being unmarked in which case the volatile put graph
1483  // will look slightly different
1484  //
1485  //   MemBarRelease____________________________________________
1486  //         ||    \\               Ctl \     Ctl \     \\  Mem \
1487  //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1488  //         | \     /                              \            |
1489  //         | MergeMem                            . . .      StoreB
1490  //         | /                                                /
1491  //         ||     /
1492  //   MemBarVolatile
1493  //
1494  // It is worth noting at this stage that both the above
1495  // configurations can be uniquely identified by checking that the
1496  // memory flow includes the following subgraph:
1497  //
1498  //   MemBarRelease
1499  //  {MemBarCPUOrder}
1500  //          |  \      . . .
1501  //          |  StoreX[mo_release]  . . .
1502  //          |   /
1503  //         MergeMem
1504  //          |
1505  //   MemBarVolatile
1506  //
1507  // This is referred to as a *normal* subgraph. It can easily be
1508  // detected starting from any candidate MemBarRelease,
1509  // StoreX[mo_release] or MemBarVolatile.
1510  //
1511  // A simple variation on this normal case occurs for an unsafe CAS
1512  // operation. The basic graph for a non-object CAS is
1513  //
1514  //   MemBarRelease
1515  //         ||
1516  //   MemBarCPUOrder
1517  //         ||     \\   . . .
1518  //         ||     CompareAndSwapX
1519  //         ||       |
1520  //         ||     SCMemProj
1521  //         | \     /
1522  //         | MergeMem
1523  //         | /
1524  //   MemBarCPUOrder
1525  //         ||
1526  //   MemBarAcquire
1527  //
1528  // The same basic variations on this arrangement (mutatis mutandis)
1529  // occur when a card mark is introduced. i.e. we se the same basic
1530  // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1531  // tail of the graph is a pair comprising a MemBarCPUOrder +
1532  // MemBarAcquire.
1533  //
1534  // So, in the case of a CAS the normal graph has the variant form
1535  //
1536  //   MemBarRelease
1537  //   MemBarCPUOrder
1538  //          |   \      . . .
1539  //          |  CompareAndSwapX  . . .
1540  //          |    |
1541  //          |   SCMemProj
1542  //          |   /  . . .
1543  //         MergeMem
1544  //          |
1545  //   MemBarCPUOrder
1546  //   MemBarAcquire
1547  //
1548  // This graph can also easily be detected starting from any
1549  // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1550  //
1551  // the code below uses two helper predicates, leading_to_normal and
1552  // normal_to_leading to identify these normal graphs, one validating
1553  // the layout starting from the top membar and searching down and
1554  // the other validating the layout starting from the lower membar
1555  // and searching up.
1556  //
1557  // There are two special case GC configurations when a normal graph
1558  // may not be generated: when using G1 (which always employs a
1559  // conditional card mark); and when using CMS with conditional card
1560  // marking configured. These GCs are both concurrent rather than
1561  // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1562  // graph between the leading and trailing membar nodes, in
1563  // particular enforcing stronger memory serialisation beween the
1564  // object put and the corresponding conditional card mark. CMS
1565  // employs a post-write GC barrier while G1 employs both a pre- and
1566  // post-write GC barrier. Of course the extra nodes may be absent --
1567  // they are only inserted for object puts. This significantly
1568  // complicates the task of identifying whether a MemBarRelease,
1569  // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1570  // when using these GC configurations (see below). It adds similar
1571  // complexity to the task of identifying whether a MemBarRelease,
1572  // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1573  //
1574  // In both cases the post-write subtree includes an auxiliary
1575  // MemBarVolatile (StoreLoad barrier) separating the object put and
1576  // the read of the corresponding card. This poses two additional
1577  // problems.
1578  //
1579  // Firstly, a card mark MemBarVolatile needs to be distinguished
1580  // from a normal trailing MemBarVolatile. Resolving this first
1581  // problem is straightforward: a card mark MemBarVolatile always
1582  // projects a Mem feed to a StoreCM node and that is a unique marker
1583  //
1584  //      MemBarVolatile (card mark)
1585  //       C |    \     . . .
1586  //         |   StoreCM   . . .
1587  //       . . .
1588  //
1589  // The second problem is how the code generator is to translate the
1590  // card mark barrier? It always needs to be translated to a "dmb
1591  // ish" instruction whether or not it occurs as part of a volatile
1592  // put. A StoreLoad barrier is needed after the object put to ensure
1593  // i) visibility to GC threads of the object put and ii) visibility
1594  // to the mutator thread of any card clearing write by a GC
1595  // thread. Clearly a normal store (str) will not guarantee this
1596  // ordering but neither will a releasing store (stlr). The latter
1597  // guarantees that the object put is visible but does not guarantee
1598  // that writes by other threads have also been observed.
1599  //
1600  // So, returning to the task of translating the object put and the
1601  // leading/trailing membar nodes: what do the non-normal node graph
1602  // look like for these 2 special cases? and how can we determine the
1603  // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1604  // in both normal and non-normal cases?
1605  //
1606  // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1607  // which selects conditonal execution based on the value loaded
1608  // (LoadB) from the card. Ctl and Mem are fed to the If via an
1609  // intervening StoreLoad barrier (MemBarVolatile).
1610  //
1611  // So, with CMS we may see a node graph for a volatile object store
1612  // which looks like this
1613  //
1614  //   MemBarRelease
1615  //   MemBarCPUOrder_(leading)__________________
1616  //     C |    M \       \\                   C \
1617  //       |       \    StoreN/P[mo_release]  CastP2X
1618  //       |    Bot \    /
1619  //       |       MergeMem
1620  //       |         /
1621  //      MemBarVolatile (card mark)
1622  //     C |  ||    M |
1623  //       | LoadB    |
1624  //       |   |      |
1625  //       | Cmp      |\
1626  //       | /        | \
1627  //       If         |  \
1628  //       | \        |   \
1629  // IfFalse  IfTrue  |    \
1630  //       \     / \  |     \
1631  //        \   / StoreCM    |
1632  //         \ /      |      |
1633  //        Region   . . .   |
1634  //          | \           /
1635  //          |  . . .  \  / Bot
1636  //          |       MergeMem
1637  //          |          |
1638  //        MemBarVolatile (trailing)
1639  //
1640  // The first MergeMem merges the AliasIdxBot Mem slice from the
1641  // leading membar and the oopptr Mem slice from the Store into the
1642  // card mark membar. The trailing MergeMem merges the AliasIdxBot
1643  // Mem slice from the card mark membar and the AliasIdxRaw slice
1644  // from the StoreCM into the trailing membar (n.b. the latter
1645  // proceeds via a Phi associated with the If region).
1646  //
1647  // The graph for a CAS varies slightly, the obvious difference being
1648  // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1649  // and the trailing MemBarVolatile by a MemBarCPUOrder +
1650  // MemBarAcquire pair. The other important difference is that the
1651  // CompareAndSwap node's SCMemProj is not merged into the card mark
1652  // membar - it still feeds the trailing MergeMem. This also means
1653  // that the card mark membar receives its Mem feed directly from the
1654  // leading membar rather than via a MergeMem.
1655  //
1656  //   MemBarRelease
1657  //   MemBarCPUOrder__(leading)_________________________
1658  //       ||                       \\                 C \
1659  //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1660  //     C |  ||    M |              |
1661  //       | LoadB    |       ______/|
1662  //       |   |      |      /       |
1663  //       | Cmp      |     /      SCMemProj
1664  //       | /        |    /         |
1665  //       If         |   /         /
1666  //       | \        |  /         /
1667  // IfFalse  IfTrue  | /         /
1668  //       \     / \  |/ prec    /
1669  //        \   / StoreCM       /
1670  //         \ /      |        /
1671  //        Region   . . .    /
1672  //          | \            /
1673  //          |  . . .  \   / Bot
1674  //          |       MergeMem
1675  //          |          |
1676  //        MemBarCPUOrder
1677  //        MemBarAcquire (trailing)
1678  //
1679  // This has a slightly different memory subgraph to the one seen
1680  // previously but the core of it is the same as for the CAS normal
1681  // sungraph
1682  //
1683  //   MemBarRelease
1684  //   MemBarCPUOrder____
1685  //      ||             \      . . .
1686  //   MemBarVolatile  CompareAndSwapX  . . .
1687  //      |  \            |
1688  //        . . .   SCMemProj
1689  //          |     /  . . .
1690  //         MergeMem
1691  //          |
1692  //   MemBarCPUOrder
1693  //   MemBarAcquire
1694  //
1695  //
1696  // G1 is quite a lot more complicated. The nodes inserted on behalf
1697  // of G1 may comprise: a pre-write graph which adds the old value to
1698  // the SATB queue; the releasing store itself; and, finally, a
1699  // post-write graph which performs a card mark.
1700  //
1701  // The pre-write graph may be omitted, but only when the put is
1702  // writing to a newly allocated (young gen) object and then only if
1703  // there is a direct memory chain to the Initialize node for the
1704  // object allocation. This will not happen for a volatile put since
1705  // any memory chain passes through the leading membar.
1706  //
1707  // The pre-write graph includes a series of 3 If tests. The outermost
1708  // If tests whether SATB is enabled (no else case). The next If tests
1709  // whether the old value is non-NULL (no else case). The third tests
1710  // whether the SATB queue index is > 0, if so updating the queue. The
1711  // else case for this third If calls out to the runtime to allocate a
1712  // new queue buffer.
1713  //
1714  // So with G1 the pre-write and releasing store subgraph looks like
1715  // this (the nested Ifs are omitted).
1716  //
1717  //  MemBarRelease (leading)____________
1718  //     C |  ||  M \   M \    M \  M \ . . .
1719  //       | LoadB   \  LoadL  LoadN   \
1720  //       | /        \                 \
1721  //       If         |\                 \
1722  //       | \        | \                 \
1723  //  IfFalse  IfTrue |  \                 \
1724  //       |     |    |   \                 |
1725  //       |     If   |   /\                |
1726  //       |     |          \               |
1727  //       |                 \              |
1728  //       |    . . .         \             |
1729  //       | /       | /       |            |
1730  //      Region  Phi[M]       |            |
1731  //       | \       |         |            |
1732  //       |  \_____ | ___     |            |
1733  //     C | C \     |   C \ M |            |
1734  //       | CastP2X | StoreN/P[mo_release] |
1735  //       |         |         |            |
1736  //     C |       M |       M |          M |
1737  //        \        |         |           /
1738  //                  . . .
1739  //          (post write subtree elided)
1740  //                    . . .
1741  //             C \         M /
1742  //         MemBarVolatile (trailing)
1743  //
1744  // n.b. the LoadB in this subgraph is not the card read -- it's a
1745  // read of the SATB queue active flag.
1746  //
1747  // Once again the CAS graph is a minor variant on the above with the
1748  // expected substitutions of CompareAndSawpX for StoreN/P and
1749  // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1750  //
1751  // The G1 post-write subtree is also optional, this time when the
1752  // new value being written is either null or can be identified as a
1753  // newly allocated (young gen) object with no intervening control
1754  // flow. The latter cannot happen but the former may, in which case
1755  // the card mark membar is omitted and the memory feeds form the
1756  // leading membar and the SToreN/P are merged direct into the
1757  // trailing membar as per the normal subgraph. So, the only special
1758  // case which arises is when the post-write subgraph is generated.
1759  //
1760  // The kernel of the post-write G1 subgraph is the card mark itself
1761  // which includes a card mark memory barrier (MemBarVolatile), a
1762  // card test (LoadB), and a conditional update (If feeding a
1763  // StoreCM). These nodes are surrounded by a series of nested Ifs
1764  // which try to avoid doing the card mark. The top level If skips if
1765  // the object reference does not cross regions (i.e. it tests if
1766  // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1767  // need not be recorded. The next If, which skips on a NULL value,
1768  // may be absent (it is not generated if the type of value is >=
1769  // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1770  // checking if card_val != young).  n.b. although this test requires
1771  // a pre-read of the card it can safely be done before the StoreLoad
1772  // barrier. However that does not bypass the need to reread the card
1773  // after the barrier.
1774  //
1775  //                (pre-write subtree elided)
1776  //        . . .                  . . .    . . .  . . .
1777  //        C |                    M |     M |    M |
1778  //       Region                  Phi[M] StoreN    |
1779  //          |                     / \      |      |
1780  //         / \_______            /   \     |      |
1781  //      C / C \      . . .            \    |      |
1782  //       If   CastP2X . . .            |   |      |
1783  //       / \                           |   |      |
1784  //      /   \                          |   |      |
1785  // IfFalse IfTrue                      |   |      |
1786  //   |       |                         |   |     /|
1787  //   |       If                        |   |    / |
1788  //   |      / \                        |   |   /  |
1789  //   |     /   \                        \  |  /   |
1790  //   | IfFalse IfTrue                   MergeMem  |
1791  //   |  . . .    / \                       /      |
1792  //   |          /   \                     /       |
1793  //   |     IfFalse IfTrue                /        |
1794  //   |      . . .    |                  /         |
1795  //   |               If                /          |
1796  //   |               / \              /           |
1797  //   |              /   \            /            |
1798  //   |         IfFalse IfTrue       /             |
1799  //   |           . . .   |         /              |
1800  //   |                    \       /               |
1801  //   |                     \     /                |
1802  //   |             MemBarVolatile__(card mark)    |
1803  //   |                ||   C |  M \  M \          |
1804  //   |               LoadB   If    |    |         |
1805  //   |                      / \    |    |         |
1806  //   |                     . . .   |    |         |
1807  //   |                          \  |    |        /
1808  //   |                        StoreCM   |       /
1809  //   |                          . . .   |      /
1810  //   |                        _________/      /
1811  //   |                       /  _____________/
1812  //   |   . . .       . . .  |  /            /
1813  //   |    |                 | /   _________/
1814  //   |    |               Phi[M] /        /
1815  //   |    |                 |   /        /
1816  //   |    |                 |  /        /
1817  //   |  Region  . . .     Phi[M]  _____/
1818  //   |    /                 |    /
1819  //   |                      |   /
1820  //   | . . .   . . .        |  /
1821  //   | /                    | /
1822  // Region           |  |  Phi[M]
1823  //   |              |  |  / Bot
1824  //    \            MergeMem
1825  //     \            /
1826  //     MemBarVolatile
1827  //
1828  // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1829  // from the leading membar and the oopptr Mem slice from the Store
1830  // into the card mark membar i.e. the memory flow to the card mark
1831  // membar still looks like a normal graph.
1832  //
1833  // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1834  // Mem slices (from the StoreCM and other card mark queue stores).
1835  // However in this case the AliasIdxBot Mem slice does not come
1836  // direct from the card mark membar. It is merged through a series
1837  // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1838  // from the leading membar with the Mem feed from the card mark
1839  // membar. Each Phi corresponds to one of the Ifs which may skip
1840  // around the card mark membar. So when the If implementing the NULL
1841  // value check has been elided the total number of Phis is 2
1842  // otherwise it is 3.
1843  //
1844  // The CAS graph when using G1GC also includes a pre-write subgraph
1845  // and an optional post-write subgraph. Teh sam evarioations are
1846  // introduced as for CMS with conditional card marking i.e. the
1847  // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1848  // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1849  // Mem feed from the CompareAndSwapP/N includes a precedence
1850  // dependency feed to the StoreCM and a feed via an SCMemProj to the
1851  // trailing membar. So, as before the configuration includes the
1852  // normal CAS graph as a subgraph of the memory flow.
1853  //
1854  // So, the upshot is that in all cases the volatile put graph will
1855  // include a *normal* memory subgraph betwen the leading membar and
1856  // its child membar, either a volatile put graph (including a
1857  // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1858  // When that child is not a card mark membar then it marks the end
1859  // of the volatile put or CAS subgraph. If the child is a card mark
1860  // membar then the normal subgraph will form part of a volatile put
1861  // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1862  // to a trailing barrier via a MergeMem. That feed is either direct
1863  // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1864  // memory flow (for G1).
1865  //
1866  // The predicates controlling generation of instructions for store
1867  // and barrier nodes employ a few simple helper functions (described
1868  // below) which identify the presence or absence of all these
1869  // subgraph configurations and provide a means of traversing from
1870  // one node in the subgraph to another.
1871
1872  // is_CAS(int opcode)
1873  //
1874  // return true if opcode is one of the possible CompareAndSwapX
1875  // values otherwise false.
1876
1877  bool is_CAS(int opcode)
1878  {
1879    switch(opcode) {
1880      // We handle these
1881    case Op_CompareAndSwapI:
1882    case Op_CompareAndSwapL:
1883    case Op_CompareAndSwapP:
1884    case Op_CompareAndSwapN:
1885 // case Op_CompareAndSwapB:
1886 // case Op_CompareAndSwapS:
1887      return true;
1888      // These are TBD
1889    case Op_WeakCompareAndSwapB:
1890    case Op_WeakCompareAndSwapS:
1891    case Op_WeakCompareAndSwapI:
1892    case Op_WeakCompareAndSwapL:
1893    case Op_WeakCompareAndSwapP:
1894    case Op_WeakCompareAndSwapN:
1895    case Op_CompareAndExchangeB:
1896    case Op_CompareAndExchangeS:
1897    case Op_CompareAndExchangeI:
1898    case Op_CompareAndExchangeL:
1899    case Op_CompareAndExchangeP:
1900    case Op_CompareAndExchangeN:
1901      return false;
1902    default:
1903      return false;
1904    }
1905  }
1906
1907
1908  // leading_to_normal
1909  //
1910  //graph traversal helper which detects the normal case Mem feed from
1911  // a release membar (or, optionally, its cpuorder child) to a
1912  // dependent volatile membar i.e. it ensures that one or other of
1913  // the following Mem flow subgraph is present.
1914  //
1915  //   MemBarRelease
1916  //   MemBarCPUOrder {leading}
1917  //          |  \      . . .
1918  //          |  StoreN/P[mo_release]  . . .
1919  //          |   /
1920  //         MergeMem
1921  //          |
1922  //   MemBarVolatile {trailing or card mark}
1923  //
1924  //   MemBarRelease
1925  //   MemBarCPUOrder {leading}
1926  //      |       \      . . .
1927  //      |     CompareAndSwapX  . . .
1928  //               |
1929  //     . . .    SCMemProj
1930  //           \   |
1931  //      |    MergeMem
1932  //      |       /
1933  //    MemBarCPUOrder
1934  //    MemBarAcquire {trailing}
1935  //
1936  // if the correct configuration is present returns the trailing
1937  // membar otherwise NULL.
1938  //
1939  // the input membar is expected to be either a cpuorder membar or a
1940  // release membar. in the latter case it should not have a cpu membar
1941  // child.
1942  //
1943  // the returned value may be a card mark or trailing membar
1944  //
1945
1946  MemBarNode *leading_to_normal(MemBarNode *leading)
1947  {
1948    assert((leading->Opcode() == Op_MemBarRelease ||
1949	    leading->Opcode() == Op_MemBarCPUOrder),
1950	   "expecting a volatile or cpuroder membar!");
1951
1952    // check the mem flow
1953    ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1954
1955    if (!mem) {
1956      return NULL;
1957    }
1958
1959    Node *x = NULL;
1960    StoreNode * st = NULL;
1961    LoadStoreNode *cas = NULL;
1962    MergeMemNode *mm = NULL;
1963
1964    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1965      x = mem->fast_out(i);
1966      if (x->is_MergeMem()) {
1967	if (mm != NULL) {
1968	  return NULL;
1969	}
1970	// two merge mems is one too many
1971	mm = x->as_MergeMem();
1972      } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1973	// two releasing stores/CAS nodes is one too many
1974	if (st != NULL || cas != NULL) {
1975	  return NULL;
1976	}
1977	st = x->as_Store();
1978      } else if (is_CAS(x->Opcode())) {
1979	if (st != NULL || cas != NULL) {
1980	  return NULL;
1981	}
1982	cas = x->as_LoadStore();
1983      }
1984    }
1985
1986    // must have a store or a cas
1987    if (!st && !cas) {
1988      return NULL;
1989    }
1990
1991    // must have a merge if we also have st
1992    if (st && !mm) {
1993      return NULL;
1994    }
1995
1996    Node *y = NULL;
1997    if (cas) {
1998      // look for an SCMemProj
1999      for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2000	x = cas->fast_out(i);
2001	if (x->is_Proj()) {
2002	  y = x;
2003	  break;
2004	}
2005      }
2006      if (y == NULL) {
2007	return NULL;
2008      }
2009      // the proj must feed a MergeMem
2010      for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2011	x = y->fast_out(i);
2012	if (x->is_MergeMem()) {
2013	  mm = x->as_MergeMem();
2014	  break;
2015	}
2016      }
2017      if (mm == NULL)
2018	return NULL;
2019    } else {
2020      // ensure the store feeds the existing mergemem;
2021      for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2022	if (st->fast_out(i) == mm) {
2023	  y = st;
2024	  break;
2025	}
2026      }
2027      if (y == NULL) {
2028	return NULL;
2029      }
2030    }
2031
2032    MemBarNode *mbar = NULL;
2033    // ensure the merge feeds to the expected type of membar
2034    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2035      x = mm->fast_out(i);
2036      if (x->is_MemBar()) {
2037	int opcode = x->Opcode();
2038	if (opcode == Op_MemBarVolatile && st) {
2039	  mbar = x->as_MemBar();
2040	} else if (cas && opcode == Op_MemBarCPUOrder) {
2041	  MemBarNode *y =  x->as_MemBar();
2042	  y = child_membar(y);
2043	  if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2044	    mbar = y;
2045	  }
2046	}
2047	break;
2048      }
2049    }
2050
2051    return mbar;
2052  }
2053
2054  // normal_to_leading
2055  //
2056  // graph traversal helper which detects the normal case Mem feed
2057  // from either a card mark or a trailing membar to a preceding
2058  // release membar (optionally its cpuorder child) i.e. it ensures
2059  // that one or other of the following Mem flow subgraphs is present.
2060  //
2061  //   MemBarRelease
2062  //   MemBarCPUOrder {leading}
2063  //          |  \      . . .
2064  //          |  StoreN/P[mo_release]  . . .
2065  //          |   /
2066  //         MergeMem
2067  //          |
2068  //   MemBarVolatile {card mark or trailing}
2069  //
2070  //   MemBarRelease
2071  //   MemBarCPUOrder {leading}
2072  //      |       \      . . .
2073  //      |     CompareAndSwapX  . . .
2074  //               |
2075  //     . . .    SCMemProj
2076  //           \   |
2077  //      |    MergeMem
2078  //      |        /
2079  //    MemBarCPUOrder
2080  //    MemBarAcquire {trailing}
2081  //
2082  // this predicate checks for the same flow as the previous predicate
2083  // but starting from the bottom rather than the top.
2084  //
2085  // if the configuration is present returns the cpuorder member for
2086  // preference or when absent the release membar otherwise NULL.
2087  //
2088  // n.b. the input membar is expected to be a MemBarVolatile but
2089  // need not be a card mark membar.
2090
2091  MemBarNode *normal_to_leading(const MemBarNode *barrier)
2092  {
2093    // input must be a volatile membar
2094    assert((barrier->Opcode() == Op_MemBarVolatile ||
2095	    barrier->Opcode() == Op_MemBarAcquire),
2096	   "expecting a volatile or an acquire membar");
2097    Node *x;
2098    bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2099
2100    // if we have an acquire membar then it must be fed via a CPUOrder
2101    // membar
2102
2103    if (is_cas) {
2104      // skip to parent barrier which must be a cpuorder
2105      x = parent_membar(barrier);
2106      if (x->Opcode() != Op_MemBarCPUOrder)
2107	return NULL;
2108    } else {
2109      // start from the supplied barrier
2110      x = (Node *)barrier;
2111    }
2112
2113    // the Mem feed to the membar should be a merge
2114    x = x ->in(TypeFunc::Memory);
2115    if (!x->is_MergeMem())
2116      return NULL;
2117
2118    MergeMemNode *mm = x->as_MergeMem();
2119
2120    if (is_cas) {
2121      // the merge should be fed from the CAS via an SCMemProj node
2122      x = NULL;
2123      for (uint idx = 1; idx < mm->req(); idx++) {
2124	if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2125	  x = mm->in(idx);
2126	  break;
2127	}
2128      }
2129      if (x == NULL) {
2130	return NULL;
2131      }
2132      // check for a CAS feeding this proj
2133      x = x->in(0);
2134      int opcode = x->Opcode();
2135      if (!is_CAS(opcode)) {
2136	return NULL;
2137      }
2138      // the CAS should get its mem feed from the leading membar
2139      x = x->in(MemNode::Memory);
2140    } else {
2141      // the merge should get its Bottom mem feed from the leading membar
2142      x = mm->in(Compile::AliasIdxBot);
2143    }
2144
2145    // ensure this is a non control projection
2146    if (!x->is_Proj() || x->is_CFG()) {
2147      return NULL;
2148    }
2149    // if it is fed by a membar that's the one we want
2150    x = x->in(0);
2151
2152    if (!x->is_MemBar()) {
2153      return NULL;
2154    }
2155
2156    MemBarNode *leading = x->as_MemBar();
2157    // reject invalid candidates
2158    if (!leading_membar(leading)) {
2159      return NULL;
2160    }
2161
2162    // ok, we have a leading membar, now for the sanity clauses
2163
2164    // the leading membar must feed Mem to a releasing store or CAS
2165    ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2166    StoreNode *st = NULL;
2167    LoadStoreNode *cas = NULL;
2168    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2169      x = mem->fast_out(i);
2170      if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2171	// two stores or CASes is one too many
2172	if (st != NULL || cas != NULL) {
2173	  return NULL;
2174	}
2175	st = x->as_Store();
2176      } else if (is_CAS(x->Opcode())) {
2177	if (st != NULL || cas != NULL) {
2178	  return NULL;
2179	}
2180	cas = x->as_LoadStore();
2181      }
2182    }
2183
2184    // we should not have both a store and a cas
2185    if (st == NULL & cas == NULL) {
2186      return NULL;
2187    }
2188
2189    if (st == NULL) {
2190      // nothing more to check
2191      return leading;
2192    } else {
2193      // we should not have a store if we started from an acquire
2194      if (is_cas) {
2195	return NULL;
2196      }
2197
2198      // the store should feed the merge we used to get here
2199      for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2200	if (st->fast_out(i) == mm) {
2201	  return leading;
2202	}
2203      }
2204    }
2205
2206    return NULL;
2207  }
2208
2209  // card_mark_to_trailing
2210  //
2211  // graph traversal helper which detects extra, non-normal Mem feed
2212  // from a card mark volatile membar to a trailing membar i.e. it
2213  // ensures that one of the following three GC post-write Mem flow
2214  // subgraphs is present.
2215  //
2216  // 1)
2217  //     . . .
2218  //       |
2219  //   MemBarVolatile (card mark)
2220  //      |          |
2221  //      |        StoreCM
2222  //      |          |
2223  //      |        . . .
2224  //  Bot |  /
2225  //   MergeMem
2226  //      |
2227  //      |
2228  //    MemBarVolatile {trailing}
2229  //
2230  // 2)
2231  //   MemBarRelease/CPUOrder (leading)
2232  //    |
2233  //    |
2234  //    |\       . . .
2235  //    | \        |
2236  //    |  \  MemBarVolatile (card mark)
2237  //    |   \   |     |
2238  //     \   \  |   StoreCM    . . .
2239  //      \   \ |
2240  //       \  Phi
2241  //        \ /
2242  //        Phi  . . .
2243  //     Bot |   /
2244  //       MergeMem
2245  //         |
2246  //    MemBarVolatile {trailing}
2247  //
2248  //
2249  // 3)
2250  //   MemBarRelease/CPUOrder (leading)
2251  //    |
2252  //    |\
2253  //    | \
2254  //    |  \      . . .
2255  //    |   \       |
2256  //    |\   \  MemBarVolatile (card mark)
2257  //    | \   \   |     |
2258  //    |  \   \  |   StoreCM    . . .
2259  //    |   \   \ |
2260  //     \   \  Phi
2261  //      \   \ /
2262  //       \  Phi
2263  //        \ /
2264  //        Phi  . . .
2265  //     Bot |   /
2266  //       MergeMem
2267  //         |
2268  //         |
2269  //    MemBarVolatile {trailing}
2270  //
2271  // configuration 1 is only valid if UseConcMarkSweepGC &&
2272  // UseCondCardMark
2273  //
2274  // configurations 2 and 3 are only valid if UseG1GC.
2275  //
2276  // if a valid configuration is present returns the trailing membar
2277  // otherwise NULL.
2278  //
2279  // n.b. the supplied membar is expected to be a card mark
2280  // MemBarVolatile i.e. the caller must ensure the input node has the
2281  // correct operand and feeds Mem to a StoreCM node
2282
2283  MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2284  {
2285    // input must be a card mark volatile membar
2286    assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2287
2288    Node *feed = barrier->proj_out(TypeFunc::Memory);
2289    Node *x;
2290    MergeMemNode *mm = NULL;
2291
2292    const int MAX_PHIS = 3;	// max phis we will search through
2293    int phicount = 0; 		// current search count
2294
2295    bool retry_feed = true;
2296    while (retry_feed) {
2297      // see if we have a direct MergeMem feed
2298      for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2299	x = feed->fast_out(i);
2300	// the correct Phi will be merging a Bot memory slice
2301	if (x->is_MergeMem()) {
2302	  mm = x->as_MergeMem();
2303	  break;
2304	}
2305      }
2306      if (mm) {
2307	retry_feed = false;
2308      } else if (UseG1GC & phicount++ < MAX_PHIS) {
2309	// the barrier may feed indirectly via one or two Phi nodes
2310	PhiNode *phi = NULL;
2311	for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2312	  x = feed->fast_out(i);
2313	  // the correct Phi will be merging a Bot memory slice
2314	  if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2315	    phi = x->as_Phi();
2316	    break;
2317	  }
2318	}
2319	if (!phi) {
2320	  return NULL;
2321	}
2322	// look for another merge below this phi
2323	feed = phi;
2324      } else {
2325	// couldn't find a merge
2326	return NULL;
2327      }
2328    }
2329
2330    // sanity check this feed turns up as the expected slice
2331    assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2332
2333    MemBarNode *trailing = NULL;
2334    // be sure we have a trailing membar the merge
2335    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2336      x = mm->fast_out(i);
2337      if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2338	trailing = x->as_MemBar();
2339	break;
2340      }
2341    }
2342
2343    return trailing;
2344  }
2345
2346  // trailing_to_card_mark
2347  //
2348  // graph traversal helper which detects extra, non-normal Mem feed
2349  // from a trailing volatile membar to a preceding card mark volatile
2350  // membar i.e. it identifies whether one of the three possible extra
2351  // GC post-write Mem flow subgraphs is present
2352  //
2353  // this predicate checks for the same flow as the previous predicate
2354  // but starting from the bottom rather than the top.
2355  //
2356  // if the configuration is present returns the card mark membar
2357  // otherwise NULL
2358  //
2359  // n.b. the supplied membar is expected to be a trailing
2360  // MemBarVolatile i.e. the caller must ensure the input node has the
2361  // correct opcode
2362
2363  MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2364  {
2365    assert(trailing->Opcode() == Op_MemBarVolatile,
2366	   "expecting a volatile membar");
2367    assert(!is_card_mark_membar(trailing),
2368	   "not expecting a card mark membar");
2369
2370    // the Mem feed to the membar should be a merge
2371    Node *x = trailing->in(TypeFunc::Memory);
2372    if (!x->is_MergeMem()) {
2373      return NULL;
2374    }
2375
2376    MergeMemNode *mm = x->as_MergeMem();
2377
2378    x = mm->in(Compile::AliasIdxBot);
2379    // with G1 we may possibly see a Phi or two before we see a Memory
2380    // Proj from the card mark membar
2381
2382    const int MAX_PHIS = 3;	// max phis we will search through
2383    int phicount = 0; 		// current search count
2384
2385    bool retry_feed = !x->is_Proj();
2386
2387    while (retry_feed) {
2388      if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2389	PhiNode *phi = x->as_Phi();
2390	ProjNode *proj = NULL;
2391	PhiNode *nextphi = NULL;
2392	bool found_leading = false;
2393	for (uint i = 1; i < phi->req(); i++) {
2394	  x = phi->in(i);
2395	  if (x->is_Phi()) {
2396	    nextphi = x->as_Phi();
2397	  } else if (x->is_Proj()) {
2398	    int opcode = x->in(0)->Opcode();
2399	    if (opcode == Op_MemBarVolatile) {
2400	      proj = x->as_Proj();
2401	    } else if (opcode == Op_MemBarRelease ||
2402		       opcode == Op_MemBarCPUOrder) {
2403	      // probably a leading membar
2404	      found_leading = true;
2405	    }
2406	  }
2407	}
2408	// if we found a correct looking proj then retry from there
2409	// otherwise we must see a leading and a phi or this the
2410	// wrong config
2411	if (proj != NULL) {
2412	  x = proj;
2413	  retry_feed = false;
2414	} else if (found_leading && nextphi != NULL) {
2415	  // retry from this phi to check phi2
2416	  x = nextphi;
2417	} else {
2418	  // not what we were looking for
2419	  return NULL;
2420	}
2421      } else {
2422	return NULL;
2423      }
2424    }
2425    // the proj has to come from the card mark membar
2426    x = x->in(0);
2427    if (!x->is_MemBar()) {
2428      return NULL;
2429    }
2430
2431    MemBarNode *card_mark_membar = x->as_MemBar();
2432
2433    if (!is_card_mark_membar(card_mark_membar)) {
2434      return NULL;
2435    }
2436
2437    return card_mark_membar;
2438  }
2439
2440  // trailing_to_leading
2441  //
2442  // graph traversal helper which checks the Mem flow up the graph
2443  // from a (non-card mark) trailing membar attempting to locate and
2444  // return an associated leading membar. it first looks for a
2445  // subgraph in the normal configuration (relying on helper
2446  // normal_to_leading). failing that it then looks for one of the
2447  // possible post-write card mark subgraphs linking the trailing node
2448  // to a the card mark membar (relying on helper
2449  // trailing_to_card_mark), and then checks that the card mark membar
2450  // is fed by a leading membar (once again relying on auxiliary
2451  // predicate normal_to_leading).
2452  //
2453  // if the configuration is valid returns the cpuorder member for
2454  // preference or when absent the release membar otherwise NULL.
2455  //
2456  // n.b. the input membar is expected to be either a volatile or
2457  // acquire membar but in the former case must *not* be a card mark
2458  // membar.
2459
2460  MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2461  {
2462    assert((trailing->Opcode() == Op_MemBarAcquire ||
2463	    trailing->Opcode() == Op_MemBarVolatile),
2464	   "expecting an acquire or volatile membar");
2465    assert((trailing->Opcode() != Op_MemBarVolatile ||
2466	    !is_card_mark_membar(trailing)),
2467	   "not expecting a card mark membar");
2468
2469    MemBarNode *leading = normal_to_leading(trailing);
2470
2471    if (leading) {
2472      return leading;
2473    }
2474
2475    // nothing more to do if this is an acquire
2476    if (trailing->Opcode() == Op_MemBarAcquire) {
2477      return NULL;
2478    }
2479
2480    MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2481
2482    if (!card_mark_membar) {
2483      return NULL;
2484    }
2485
2486    return normal_to_leading(card_mark_membar);
2487  }
2488
2489  // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2490
2491bool unnecessary_acquire(const Node *barrier)
2492{
2493  assert(barrier->is_MemBar(), "expecting a membar");
2494
2495  if (UseBarriersForVolatile) {
2496    // we need to plant a dmb
2497    return false;
2498  }
2499
2500  // a volatile read derived from bytecode (or also from an inlined
2501  // SHA field read via LibraryCallKit::load_field_from_object)
2502  // manifests as a LoadX[mo_acquire] followed by an acquire membar
2503  // with a bogus read dependency on it's preceding load. so in those
2504  // cases we will find the load node at the PARMS offset of the
2505  // acquire membar.  n.b. there may be an intervening DecodeN node.
2506  //
2507  // a volatile load derived from an inlined unsafe field access
2508  // manifests as a cpuorder membar with Ctl and Mem projections
2509  // feeding both an acquire membar and a LoadX[mo_acquire]. The
2510  // acquire then feeds another cpuorder membar via Ctl and Mem
2511  // projections. The load has no output dependency on these trailing
2512  // membars because subsequent nodes inserted into the graph take
2513  // their control feed from the final membar cpuorder meaning they
2514  // are all ordered after the load.
2515
2516  Node *x = barrier->lookup(TypeFunc::Parms);
2517  if (x) {
2518    // we are starting from an acquire and it has a fake dependency
2519    //
2520    // need to check for
2521    //
2522    //   LoadX[mo_acquire]
2523    //   {  |1   }
2524    //   {DecodeN}
2525    //      |Parms
2526    //   MemBarAcquire*
2527    //
2528    // where * tags node we were passed
2529    // and |k means input k
2530    if (x->is_DecodeNarrowPtr()) {
2531      x = x->in(1);
2532    }
2533
2534    return (x->is_Load() && x->as_Load()->is_acquire());
2535  }
2536
2537  // now check for an unsafe volatile get
2538
2539  // need to check for
2540  //
2541  //   MemBarCPUOrder
2542  //        ||       \\
2543  //   MemBarAcquire* LoadX[mo_acquire]
2544  //        ||
2545  //   MemBarCPUOrder
2546  //
2547  // where * tags node we were passed
2548  // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2549
2550  // check for a parent MemBarCPUOrder
2551  ProjNode *ctl;
2552  ProjNode *mem;
2553  MemBarNode *parent = parent_membar(barrier);
2554  if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2555    return false;
2556  ctl = parent->proj_out(TypeFunc::Control);
2557  mem = parent->proj_out(TypeFunc::Memory);
2558  if (!ctl || !mem) {
2559    return false;
2560  }
2561  // ensure the proj nodes both feed a LoadX[mo_acquire]
2562  LoadNode *ld = NULL;
2563  for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2564    x = ctl->fast_out(i);
2565    // if we see a load we keep hold of it and stop searching
2566    if (x->is_Load()) {
2567      ld = x->as_Load();
2568      break;
2569    }
2570  }
2571  // it must be an acquiring load
2572  if (ld && ld->is_acquire()) {
2573
2574    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2575      x = mem->fast_out(i);
2576      // if we see the same load we drop it and stop searching
2577      if (x == ld) {
2578	ld = NULL;
2579	break;
2580      }
2581    }
2582    // we must have dropped the load
2583    if (ld == NULL) {
2584      // check for a child cpuorder membar
2585      MemBarNode *child  = child_membar(barrier->as_MemBar());
2586      if (child && child->Opcode() == Op_MemBarCPUOrder)
2587	return true;
2588    }
2589  }
2590
2591  // final option for unnecessary mebar is that it is a trailing node
2592  // belonging to a CAS
2593
2594  MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2595
2596  return leading != NULL;
2597}
2598
2599bool needs_acquiring_load(const Node *n)
2600{
2601  assert(n->is_Load(), "expecting a load");
2602  if (UseBarriersForVolatile) {
2603    // we use a normal load and a dmb
2604    return false;
2605  }
2606
2607  LoadNode *ld = n->as_Load();
2608
2609  if (!ld->is_acquire()) {
2610    return false;
2611  }
2612
2613  // check if this load is feeding an acquire membar
2614  //
2615  //   LoadX[mo_acquire]
2616  //   {  |1   }
2617  //   {DecodeN}
2618  //      |Parms
2619  //   MemBarAcquire*
2620  //
2621  // where * tags node we were passed
2622  // and |k means input k
2623
2624  Node *start = ld;
2625  Node *mbacq = NULL;
2626
2627  // if we hit a DecodeNarrowPtr we reset the start node and restart
2628  // the search through the outputs
2629 restart:
2630
2631  for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2632    Node *x = start->fast_out(i);
2633    if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2634      mbacq = x;
2635    } else if (!mbacq &&
2636	       (x->is_DecodeNarrowPtr() ||
2637		(x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2638      start = x;
2639      goto restart;
2640    }
2641  }
2642
2643  if (mbacq) {
2644    return true;
2645  }
2646
2647  // now check for an unsafe volatile get
2648
2649  // check if Ctl and Proj feed comes from a MemBarCPUOrder
2650  //
2651  //     MemBarCPUOrder
2652  //        ||       \\
2653  //   MemBarAcquire* LoadX[mo_acquire]
2654  //        ||
2655  //   MemBarCPUOrder
2656
2657  MemBarNode *membar;
2658
2659  membar = parent_membar(ld);
2660
2661  if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2662    return false;
2663  }
2664
2665  // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2666
2667  membar = child_membar(membar);
2668
2669  if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2670    return false;
2671  }
2672
2673  membar = child_membar(membar);
2674
2675  if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2676    return false;
2677  }
2678
2679  return true;
2680}
2681
2682bool unnecessary_release(const Node *n)
2683{
2684  assert((n->is_MemBar() &&
2685	  n->Opcode() == Op_MemBarRelease),
2686	 "expecting a release membar");
2687
2688  if (UseBarriersForVolatile) {
2689    // we need to plant a dmb
2690    return false;
2691  }
2692
2693  // if there is a dependent CPUOrder barrier then use that as the
2694  // leading
2695
2696  MemBarNode *barrier = n->as_MemBar();
2697  // check for an intervening cpuorder membar
2698  MemBarNode *b = child_membar(barrier);
2699  if (b && b->Opcode() == Op_MemBarCPUOrder) {
2700    // ok, so start the check from the dependent cpuorder barrier
2701    barrier = b;
2702  }
2703
2704  // must start with a normal feed
2705  MemBarNode *child_barrier = leading_to_normal(barrier);
2706
2707  if (!child_barrier) {
2708    return false;
2709  }
2710
2711  if (!is_card_mark_membar(child_barrier)) {
2712    // this is the trailing membar and we are done
2713    return true;
2714  }
2715
2716  // must be sure this card mark feeds a trailing membar
2717  MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2718  return (trailing != NULL);
2719}
2720
2721bool unnecessary_volatile(const Node *n)
2722{
2723  // assert n->is_MemBar();
2724  if (UseBarriersForVolatile) {
2725    // we need to plant a dmb
2726    return false;
2727  }
2728
2729  MemBarNode *mbvol = n->as_MemBar();
2730
2731  // first we check if this is part of a card mark. if so then we have
2732  // to generate a StoreLoad barrier
2733
2734  if (is_card_mark_membar(mbvol)) {
2735      return false;
2736  }
2737
2738  // ok, if it's not a card mark then we still need to check if it is
2739  // a trailing membar of a volatile put hgraph.
2740
2741  return (trailing_to_leading(mbvol) != NULL);
2742}
2743
2744// predicates controlling emit of str<x>/stlr<x> and associated dmbs
2745
2746bool needs_releasing_store(const Node *n)
2747{
2748  // assert n->is_Store();
2749  if (UseBarriersForVolatile) {
2750    // we use a normal store and dmb combination
2751    return false;
2752  }
2753
2754  StoreNode *st = n->as_Store();
2755
2756  // the store must be marked as releasing
2757  if (!st->is_release()) {
2758    return false;
2759  }
2760
2761  // the store must be fed by a membar
2762
2763  Node *x = st->lookup(StoreNode::Memory);
2764
2765  if (! x || !x->is_Proj()) {
2766    return false;
2767  }
2768
2769  ProjNode *proj = x->as_Proj();
2770
2771  x = proj->lookup(0);
2772
2773  if (!x || !x->is_MemBar()) {
2774    return false;
2775  }
2776
2777  MemBarNode *barrier = x->as_MemBar();
2778
2779  // if the barrier is a release membar or a cpuorder mmebar fed by a
2780  // release membar then we need to check whether that forms part of a
2781  // volatile put graph.
2782
2783  // reject invalid candidates
2784  if (!leading_membar(barrier)) {
2785    return false;
2786  }
2787
2788  // does this lead a normal subgraph?
2789  MemBarNode *mbvol = leading_to_normal(barrier);
2790
2791  if (!mbvol) {
2792    return false;
2793  }
2794
2795  // all done unless this is a card mark
2796  if (!is_card_mark_membar(mbvol)) {
2797    return true;
2798  }
2799
2800  // we found a card mark -- just make sure we have a trailing barrier
2801
2802  return (card_mark_to_trailing(mbvol) != NULL);
2803}
2804
2805// predicate controlling translation of CAS
2806//
2807// returns true if CAS needs to use an acquiring load otherwise false
2808
2809bool needs_acquiring_load_exclusive(const Node *n)
2810{
2811  assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2812  if (UseBarriersForVolatile) {
2813    return false;
2814  }
2815
2816  // CAS nodes only ought to turn up in inlined unsafe CAS operations
2817#ifdef ASSERT
2818  LoadStoreNode *st = n->as_LoadStore();
2819
2820  // the store must be fed by a membar
2821
2822  Node *x = st->lookup(StoreNode::Memory);
2823
2824  assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2825
2826  ProjNode *proj = x->as_Proj();
2827
2828  x = proj->lookup(0);
2829
2830  assert (x && x->is_MemBar(), "CAS not fed by membar!");
2831
2832  MemBarNode *barrier = x->as_MemBar();
2833
2834  // the barrier must be a cpuorder mmebar fed by a release membar
2835
2836  assert(barrier->Opcode() == Op_MemBarCPUOrder,
2837	 "CAS not fed by cpuorder membar!");
2838
2839  MemBarNode *b = parent_membar(barrier);
2840  assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2841	  "CAS not fed by cpuorder+release membar pair!");
2842
2843  // does this lead a normal subgraph?
2844  MemBarNode *mbar = leading_to_normal(barrier);
2845
2846  assert(mbar != NULL, "CAS not embedded in normal graph!");
2847
2848  assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2849#endif // ASSERT
2850  // so we can just return true here
2851  return true;
2852}
2853
2854// predicate controlling translation of StoreCM
2855//
2856// returns true if a StoreStore must precede the card write otherwise
2857// false
2858
2859bool unnecessary_storestore(const Node *storecm)
2860{
2861  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2862
2863  // we only ever need to generate a dmb ishst between an object put
2864  // and the associated card mark when we are using CMS without
2865  // conditional card marking
2866
2867  if (!UseConcMarkSweepGC || UseCondCardMark) {
2868    return true;
2869  }
2870
2871  // if we are implementing volatile puts using barriers then the
2872  // object put as an str so we must insert the dmb ishst
2873
2874  if (UseBarriersForVolatile) {
2875    return false;
2876  }
2877
2878  // we can omit the dmb ishst if this StoreCM is part of a volatile
2879  // put because in thta case the put will be implemented by stlr
2880  //
2881  // we need to check for a normal subgraph feeding this StoreCM.
2882  // that means the StoreCM must be fed Memory from a leading membar,
2883  // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2884  // leading membar must be part of a normal subgraph
2885
2886  Node *x = storecm->in(StoreNode::Memory);
2887
2888  if (!x->is_Proj()) {
2889    return false;
2890  }
2891
2892  x = x->in(0);
2893
2894  if (!x->is_MemBar()) {
2895    return false;
2896  }
2897
2898  MemBarNode *leading = x->as_MemBar();
2899
2900  // reject invalid candidates
2901  if (!leading_membar(leading)) {
2902    return false;
2903  }
2904
2905  // we can omit the StoreStore if it is the head of a normal subgraph
2906  return (leading_to_normal(leading) != NULL);
2907}
2908
2909
2910#define __ _masm.
2911
2912// advance declarations for helper functions to convert register
2913// indices to register objects
2914
2915// the ad file has to provide implementations of certain methods
2916// expected by the generic code
2917//
2918// REQUIRED FUNCTIONALITY
2919
2920//=============================================================================
2921
2922// !!!!! Special hack to get all types of calls to specify the byte offset
2923//       from the start of the call to the point where the return address
2924//       will point.
2925
2926int MachCallStaticJavaNode::ret_addr_offset()
2927{
2928  // call should be a simple bl
2929  int off = 4;
2930  return off;
2931}
2932
2933int MachCallDynamicJavaNode::ret_addr_offset()
2934{
2935  return 16; // movz, movk, movk, bl
2936}
2937
2938int MachCallRuntimeNode::ret_addr_offset() {
2939  // for generated stubs the call will be
2940  //   far_call(addr)
2941  // for real runtime callouts it will be six instructions
2942  // see aarch64_enc_java_to_runtime
2943  //   adr(rscratch2, retaddr)
2944  //   lea(rscratch1, RuntimeAddress(addr)
2945  //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2946  //   blrt rscratch1
2947  CodeBlob *cb = CodeCache::find_blob(_entry_point);
2948  if (cb) {
2949    return MacroAssembler::far_branch_size();
2950  } else {
2951    return 6 * NativeInstruction::instruction_size;
2952  }
2953}
2954
2955// Indicate if the safepoint node needs the polling page as an input
2956
2957// the shared code plants the oop data at the start of the generated
2958// code for the safepoint node and that needs ot be at the load
2959// instruction itself. so we cannot plant a mov of the safepoint poll
2960// address followed by a load. setting this to true means the mov is
2961// scheduled as a prior instruction. that's better for scheduling
2962// anyway.
2963
2964bool SafePointNode::needs_polling_address_input()
2965{
2966  return true;
2967}
2968
2969//=============================================================================
2970
2971#ifndef PRODUCT
2972void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2973  st->print("BREAKPOINT");
2974}
2975#endif
2976
2977void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2978  MacroAssembler _masm(&cbuf);
2979  __ brk(0);
2980}
2981
2982uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2983  return MachNode::size(ra_);
2984}
2985
2986//=============================================================================
2987
2988#ifndef PRODUCT
2989  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2990    st->print("nop \t# %d bytes pad for loops and calls", _count);
2991  }
2992#endif
2993
2994  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2995    MacroAssembler _masm(&cbuf);
2996    for (int i = 0; i < _count; i++) {
2997      __ nop();
2998    }
2999  }
3000
3001  uint MachNopNode::size(PhaseRegAlloc*) const {
3002    return _count * NativeInstruction::instruction_size;
3003  }
3004
3005//=============================================================================
3006const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3007
3008int Compile::ConstantTable::calculate_table_base_offset() const {
3009  return 0;  // absolute addressing, no offset
3010}
3011
3012bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3013void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3014  ShouldNotReachHere();
3015}
3016
3017void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3018  // Empty encoding
3019}
3020
3021uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3022  return 0;
3023}
3024
3025#ifndef PRODUCT
3026void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3027  st->print("-- \t// MachConstantBaseNode (empty encoding)");
3028}
3029#endif
3030
3031#ifndef PRODUCT
3032void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3033  Compile* C = ra_->C;
3034
3035  int framesize = C->frame_slots() << LogBytesPerInt;
3036
3037  if (C->need_stack_bang(framesize))
3038    st->print("# stack bang size=%d\n\t", framesize);
3039
3040  if (framesize < ((1 << 9) + 2 * wordSize)) {
3041    st->print("sub  sp, sp, #%d\n\t", framesize);
3042    st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3043    if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3044  } else {
3045    st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3046    if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3047    st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3048    st->print("sub  sp, sp, rscratch1");
3049  }
3050}
3051#endif
3052
3053void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3054  Compile* C = ra_->C;
3055  MacroAssembler _masm(&cbuf);
3056
3057  // n.b. frame size includes space for return pc and rfp
3058  const long framesize = C->frame_size_in_bytes();
3059  assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3060
3061  // insert a nop at the start of the prolog so we can patch in a
3062  // branch if we need to invalidate the method later
3063  __ nop();
3064
3065  int bangsize = C->bang_size_in_bytes();
3066  if (C->need_stack_bang(bangsize) && UseStackBanging)
3067    __ generate_stack_overflow_check(bangsize);
3068
3069  __ build_frame(framesize);
3070
3071  if (NotifySimulator) {
3072    __ notify(Assembler::method_entry);
3073  }
3074
3075  if (VerifyStackAtCalls) {
3076    Unimplemented();
3077  }
3078
3079  C->set_frame_complete(cbuf.insts_size());
3080
3081  if (C->has_mach_constant_base_node()) {
3082    // NOTE: We set the table base offset here because users might be
3083    // emitted before MachConstantBaseNode.
3084    Compile::ConstantTable& constant_table = C->constant_table();
3085    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3086  }
3087}
3088
3089uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3090{
3091  return MachNode::size(ra_); // too many variables; just compute it
3092                              // the hard way
3093}
3094
3095int MachPrologNode::reloc() const
3096{
3097  return 0;
3098}
3099
3100//=============================================================================
3101
3102#ifndef PRODUCT
3103void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3104  Compile* C = ra_->C;
3105  int framesize = C->frame_slots() << LogBytesPerInt;
3106
3107  st->print("# pop frame %d\n\t",framesize);
3108
3109  if (framesize == 0) {
3110    st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3111  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3112    st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3113    st->print("add  sp, sp, #%d\n\t", framesize);
3114  } else {
3115    st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3116    st->print("add  sp, sp, rscratch1\n\t");
3117    st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3118  }
3119
3120  if (do_polling() && C->is_method_compilation()) {
3121    st->print("# touch polling page\n\t");
3122    st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3123    st->print("ldr zr, [rscratch1]");
3124  }
3125}
3126#endif
3127
3128void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3129  Compile* C = ra_->C;
3130  MacroAssembler _masm(&cbuf);
3131  int framesize = C->frame_slots() << LogBytesPerInt;
3132
3133  __ remove_frame(framesize);
3134
3135  if (NotifySimulator) {
3136    __ notify(Assembler::method_reentry);
3137  }
3138
3139  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3140    __ reserved_stack_check();
3141  }
3142
3143  if (do_polling() && C->is_method_compilation()) {
3144    __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3145  }
3146}
3147
3148uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3149  // Variable size. Determine dynamically.
3150  return MachNode::size(ra_);
3151}
3152
3153int MachEpilogNode::reloc() const {
3154  // Return number of relocatable values contained in this instruction.
3155  return 1; // 1 for polling page.
3156}
3157
3158const Pipeline * MachEpilogNode::pipeline() const {
3159  return MachNode::pipeline_class();
3160}
3161
3162// This method seems to be obsolete. It is declared in machnode.hpp
3163// and defined in all *.ad files, but it is never called. Should we
3164// get rid of it?
3165int MachEpilogNode::safepoint_offset() const {
3166  assert(do_polling(), "no return for this epilog node");
3167  return 4;
3168}
3169
3170//=============================================================================
3171
3172// Figure out which register class each belongs in: rc_int, rc_float or
3173// rc_stack.
3174enum RC { rc_bad, rc_int, rc_float, rc_stack };
3175
3176static enum RC rc_class(OptoReg::Name reg) {
3177
3178  if (reg == OptoReg::Bad) {
3179    return rc_bad;
3180  }
3181
3182  // we have 30 int registers * 2 halves
3183  // (rscratch1 and rscratch2 are omitted)
3184
3185  if (reg < 60) {
3186    return rc_int;
3187  }
3188
3189  // we have 32 float register * 2 halves
3190  if (reg < 60 + 128) {
3191    return rc_float;
3192  }
3193
3194  // Between float regs & stack is the flags regs.
3195  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3196
3197  return rc_stack;
3198}
3199
3200uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3201  Compile* C = ra_->C;
3202
3203  // Get registers to move.
3204  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3205  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3206  OptoReg::Name dst_hi = ra_->get_reg_second(this);
3207  OptoReg::Name dst_lo = ra_->get_reg_first(this);
3208
3209  enum RC src_hi_rc = rc_class(src_hi);
3210  enum RC src_lo_rc = rc_class(src_lo);
3211  enum RC dst_hi_rc = rc_class(dst_hi);
3212  enum RC dst_lo_rc = rc_class(dst_lo);
3213
3214  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3215
3216  if (src_hi != OptoReg::Bad) {
3217    assert((src_lo&1)==0 && src_lo+1==src_hi &&
3218           (dst_lo&1)==0 && dst_lo+1==dst_hi,
3219           "expected aligned-adjacent pairs");
3220  }
3221
3222  if (src_lo == dst_lo && src_hi == dst_hi) {
3223    return 0;            // Self copy, no move.
3224  }
3225
3226  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3227              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3228  int src_offset = ra_->reg2offset(src_lo);
3229  int dst_offset = ra_->reg2offset(dst_lo);
3230
3231  if (bottom_type()->isa_vect() != NULL) {
3232    uint ireg = ideal_reg();
3233    assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3234    if (cbuf) {
3235      MacroAssembler _masm(cbuf);
3236      assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3237      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3238        // stack->stack
3239        assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3240        if (ireg == Op_VecD) {
3241          __ unspill(rscratch1, true, src_offset);
3242          __ spill(rscratch1, true, dst_offset);
3243        } else {
3244          __ spill_copy128(src_offset, dst_offset);
3245        }
3246      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3247        __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3248               ireg == Op_VecD ? __ T8B : __ T16B,
3249               as_FloatRegister(Matcher::_regEncode[src_lo]));
3250      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3251        __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3252                       ireg == Op_VecD ? __ D : __ Q,
3253                       ra_->reg2offset(dst_lo));
3254      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3255        __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3256                       ireg == Op_VecD ? __ D : __ Q,
3257                       ra_->reg2offset(src_lo));
3258      } else {
3259        ShouldNotReachHere();
3260      }
3261    }
3262  } else if (cbuf) {
3263    MacroAssembler _masm(cbuf);
3264    switch (src_lo_rc) {
3265    case rc_int:
3266      if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3267        if (is64) {
3268            __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3269                   as_Register(Matcher::_regEncode[src_lo]));
3270        } else {
3271            MacroAssembler _masm(cbuf);
3272            __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3273                    as_Register(Matcher::_regEncode[src_lo]));
3274        }
3275      } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3276        if (is64) {
3277            __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3278                     as_Register(Matcher::_regEncode[src_lo]));
3279        } else {
3280            __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3281                     as_Register(Matcher::_regEncode[src_lo]));
3282        }
3283      } else {                    // gpr --> stack spill
3284        assert(dst_lo_rc == rc_stack, "spill to bad register class");
3285        __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3286      }
3287      break;
3288    case rc_float:
3289      if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3290        if (is64) {
3291            __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3292                     as_FloatRegister(Matcher::_regEncode[src_lo]));
3293        } else {
3294            __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3295                     as_FloatRegister(Matcher::_regEncode[src_lo]));
3296        }
3297      } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3298          if (cbuf) {
3299            __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3300                     as_FloatRegister(Matcher::_regEncode[src_lo]));
3301        } else {
3302            __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3303                     as_FloatRegister(Matcher::_regEncode[src_lo]));
3304        }
3305      } else {                    // fpr --> stack spill
3306        assert(dst_lo_rc == rc_stack, "spill to bad register class");
3307        __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3308                 is64 ? __ D : __ S, dst_offset);
3309      }
3310      break;
3311    case rc_stack:
3312      if (dst_lo_rc == rc_int) {  // stack --> gpr load
3313        __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3314      } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3315        __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3316                   is64 ? __ D : __ S, src_offset);
3317      } else {                    // stack --> stack copy
3318        assert(dst_lo_rc == rc_stack, "spill to bad register class");
3319        __ unspill(rscratch1, is64, src_offset);
3320        __ spill(rscratch1, is64, dst_offset);
3321      }
3322      break;
3323    default:
3324      assert(false, "bad rc_class for spill");
3325      ShouldNotReachHere();
3326    }
3327  }
3328
3329  if (st) {
3330    st->print("spill ");
3331    if (src_lo_rc == rc_stack) {
3332      st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3333    } else {
3334      st->print("%s -> ", Matcher::regName[src_lo]);
3335    }
3336    if (dst_lo_rc == rc_stack) {
3337      st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3338    } else {
3339      st->print("%s", Matcher::regName[dst_lo]);
3340    }
3341    if (bottom_type()->isa_vect() != NULL) {
3342      st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3343    } else {
3344      st->print("\t# spill size = %d", is64 ? 64:32);
3345    }
3346  }
3347
3348  return 0;
3349
3350}
3351
3352#ifndef PRODUCT
3353void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3354  if (!ra_)
3355    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3356  else
3357    implementation(NULL, ra_, false, st);
3358}
3359#endif
3360
3361void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3362  implementation(&cbuf, ra_, false, NULL);
3363}
3364
3365uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3366  return MachNode::size(ra_);
3367}
3368
3369//=============================================================================
3370
3371#ifndef PRODUCT
3372void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3373  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3374  int reg = ra_->get_reg_first(this);
3375  st->print("add %s, rsp, #%d]\t# box lock",
3376            Matcher::regName[reg], offset);
3377}
3378#endif
3379
3380void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3381  MacroAssembler _masm(&cbuf);
3382
3383  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3384  int reg    = ra_->get_encode(this);
3385
3386  if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3387    __ add(as_Register(reg), sp, offset);
3388  } else {
3389    ShouldNotReachHere();
3390  }
3391}
3392
3393uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3394  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3395  return 4;
3396}
3397
3398//=============================================================================
3399
3400#ifndef PRODUCT
3401void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3402{
3403  st->print_cr("# MachUEPNode");
3404  if (UseCompressedClassPointers) {
3405    st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3406    if (Universe::narrow_klass_shift() != 0) {
3407      st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3408    }
3409  } else {
3410   st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3411  }
3412  st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3413  st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3414}
3415#endif
3416
3417void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3418{
3419  // This is the unverified entry point.
3420  MacroAssembler _masm(&cbuf);
3421
3422  __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3423  Label skip;
3424  // TODO
3425  // can we avoid this skip and still use a reloc?
3426  __ br(Assembler::EQ, skip);
3427  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3428  __ bind(skip);
3429}
3430
3431uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3432{
3433  return MachNode::size(ra_);
3434}
3435
3436// REQUIRED EMIT CODE
3437
3438//=============================================================================
3439
3440// Emit exception handler code.
3441int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3442{
3443  // mov rscratch1 #exception_blob_entry_point
3444  // br rscratch1
3445  // Note that the code buffer's insts_mark is always relative to insts.
3446  // That's why we must use the macroassembler to generate a handler.
3447  MacroAssembler _masm(&cbuf);
3448  address base = __ start_a_stub(size_exception_handler());
3449  if (base == NULL) {
3450    ciEnv::current()->record_failure("CodeCache is full");
3451    return 0;  // CodeBuffer::expand failed
3452  }
3453  int offset = __ offset();
3454  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3455  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3456  __ end_a_stub();
3457  return offset;
3458}
3459
3460// Emit deopt handler code.
3461int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3462{
3463  // Note that the code buffer's insts_mark is always relative to insts.
3464  // That's why we must use the macroassembler to generate a handler.
3465  MacroAssembler _masm(&cbuf);
3466  address base = __ start_a_stub(size_deopt_handler());
3467  if (base == NULL) {
3468    ciEnv::current()->record_failure("CodeCache is full");
3469    return 0;  // CodeBuffer::expand failed
3470  }
3471  int offset = __ offset();
3472
3473  __ adr(lr, __ pc());
3474  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3475
3476  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3477  __ end_a_stub();
3478  return offset;
3479}
3480
3481// REQUIRED MATCHER CODE
3482
3483//=============================================================================
3484
3485const bool Matcher::match_rule_supported(int opcode) {
3486
3487  switch (opcode) {
3488  default:
3489    break;
3490  }
3491
3492  if (!has_match_rule(opcode)) {
3493    return false;
3494  }
3495
3496  return true;  // Per default match rules are supported.
3497}
3498
3499const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3500
3501  // TODO
3502  // identify extra cases that we might want to provide match rules for
3503  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3504  bool ret_value = match_rule_supported(opcode);
3505  // Add rules here.
3506
3507  return ret_value;  // Per default match rules are supported.
3508}
3509
3510const bool Matcher::has_predicated_vectors(void) {
3511  return false;
3512}
3513
3514const int Matcher::float_pressure(int default_pressure_threshold) {
3515  return default_pressure_threshold;
3516}
3517
3518int Matcher::regnum_to_fpu_offset(int regnum)
3519{
3520  Unimplemented();
3521  return 0;
3522}
3523
3524// Is this branch offset short enough that a short branch can be used?
3525//
3526// NOTE: If the platform does not provide any short branch variants, then
3527//       this method should return false for offset 0.
3528bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3529  // The passed offset is relative to address of the branch.
3530
3531  return (-32768 <= offset && offset < 32768);
3532}
3533
3534const bool Matcher::isSimpleConstant64(jlong value) {
3535  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3536  // Probably always true, even if a temp register is required.
3537  return true;
3538}
3539
3540// true just means we have fast l2f conversion
3541const bool Matcher::convL2FSupported(void) {
3542  return true;
3543}
3544
3545// Vector width in bytes.
3546const int Matcher::vector_width_in_bytes(BasicType bt) {
3547  int size = MIN2(16,(int)MaxVectorSize);
3548  // Minimum 2 values in vector
3549  if (size < 2*type2aelembytes(bt)) size = 0;
3550  // But never < 4
3551  if (size < 4) size = 0;
3552  return size;
3553}
3554
3555// Limits on vector size (number of elements) loaded into vector.
3556const int Matcher::max_vector_size(const BasicType bt) {
3557  return vector_width_in_bytes(bt)/type2aelembytes(bt);
3558}
3559const int Matcher::min_vector_size(const BasicType bt) {
3560//  For the moment limit the vector size to 8 bytes
3561    int size = 8 / type2aelembytes(bt);
3562    if (size < 2) size = 2;
3563    return size;
3564}
3565
3566// Vector ideal reg.
3567const uint Matcher::vector_ideal_reg(int len) {
3568  switch(len) {
3569    case  8: return Op_VecD;
3570    case 16: return Op_VecX;
3571  }
3572  ShouldNotReachHere();
3573  return 0;
3574}
3575
3576const uint Matcher::vector_shift_count_ideal_reg(int size) {
3577  return Op_VecX;
3578}
3579
3580// AES support not yet implemented
3581const bool Matcher::pass_original_key_for_aes() {
3582  return false;
3583}
3584
3585// x86 supports misaligned vectors store/load.
3586const bool Matcher::misaligned_vectors_ok() {
3587  return !AlignVector; // can be changed by flag
3588}
3589
3590// false => size gets scaled to BytesPerLong, ok.
3591const bool Matcher::init_array_count_is_in_bytes = false;
3592
3593// Use conditional move (CMOVL)
3594const int Matcher::long_cmove_cost() {
3595  // long cmoves are no more expensive than int cmoves
3596  return 0;
3597}
3598
3599const int Matcher::float_cmove_cost() {
3600  // float cmoves are no more expensive than int cmoves
3601  return 0;
3602}
3603
3604// Does the CPU require late expand (see block.cpp for description of late expand)?
3605const bool Matcher::require_postalloc_expand = false;
3606
3607// Do we need to mask the count passed to shift instructions or does
3608// the cpu only look at the lower 5/6 bits anyway?
3609const bool Matcher::need_masked_shift_count = false;
3610
3611// This affects two different things:
3612//  - how Decode nodes are matched
3613//  - how ImplicitNullCheck opportunities are recognized
3614// If true, the matcher will try to remove all Decodes and match them
3615// (as operands) into nodes. NullChecks are not prepared to deal with
3616// Decodes by final_graph_reshaping().
3617// If false, final_graph_reshaping() forces the decode behind the Cmp
3618// for a NullCheck. The matcher matches the Decode node into a register.
3619// Implicit_null_check optimization moves the Decode along with the
3620// memory operation back up before the NullCheck.
3621bool Matcher::narrow_oop_use_complex_address() {
3622  return Universe::narrow_oop_shift() == 0;
3623}
3624
3625bool Matcher::narrow_klass_use_complex_address() {
3626// TODO
3627// decide whether we need to set this to true
3628  return false;
3629}
3630
3631bool Matcher::const_oop_prefer_decode() {
3632  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3633  return Universe::narrow_oop_base() == NULL;
3634}
3635
3636bool Matcher::const_klass_prefer_decode() {
3637  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3638  return Universe::narrow_klass_base() == NULL;
3639}
3640
3641// Is it better to copy float constants, or load them directly from
3642// memory?  Intel can load a float constant from a direct address,
3643// requiring no extra registers.  Most RISCs will have to materialize
3644// an address into a register first, so they would do better to copy
3645// the constant from stack.
3646const bool Matcher::rematerialize_float_constants = false;
3647
3648// If CPU can load and store mis-aligned doubles directly then no
3649// fixup is needed.  Else we split the double into 2 integer pieces
3650// and move it piece-by-piece.  Only happens when passing doubles into
3651// C code as the Java calling convention forces doubles to be aligned.
3652const bool Matcher::misaligned_doubles_ok = true;
3653
3654// No-op on amd64
3655void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3656  Unimplemented();
3657}
3658
3659// Advertise here if the CPU requires explicit rounding operations to
3660// implement the UseStrictFP mode.
3661const bool Matcher::strict_fp_requires_explicit_rounding = false;
3662
3663// Are floats converted to double when stored to stack during
3664// deoptimization?
3665bool Matcher::float_in_double() { return true; }
3666
3667// Do ints take an entire long register or just half?
3668// The relevant question is how the int is callee-saved:
3669// the whole long is written but de-opt'ing will have to extract
3670// the relevant 32 bits.
3671const bool Matcher::int_in_long = true;
3672
3673// Return whether or not this register is ever used as an argument.
3674// This function is used on startup to build the trampoline stubs in
3675// generateOptoStub.  Registers not mentioned will be killed by the VM
3676// call in the trampoline, and arguments in those registers not be
3677// available to the callee.
3678bool Matcher::can_be_java_arg(int reg)
3679{
3680  return
3681    reg ==  R0_num || reg == R0_H_num ||
3682    reg ==  R1_num || reg == R1_H_num ||
3683    reg ==  R2_num || reg == R2_H_num ||
3684    reg ==  R3_num || reg == R3_H_num ||
3685    reg ==  R4_num || reg == R4_H_num ||
3686    reg ==  R5_num || reg == R5_H_num ||
3687    reg ==  R6_num || reg == R6_H_num ||
3688    reg ==  R7_num || reg == R7_H_num ||
3689    reg ==  V0_num || reg == V0_H_num ||
3690    reg ==  V1_num || reg == V1_H_num ||
3691    reg ==  V2_num || reg == V2_H_num ||
3692    reg ==  V3_num || reg == V3_H_num ||
3693    reg ==  V4_num || reg == V4_H_num ||
3694    reg ==  V5_num || reg == V5_H_num ||
3695    reg ==  V6_num || reg == V6_H_num ||
3696    reg ==  V7_num || reg == V7_H_num;
3697}
3698
3699bool Matcher::is_spillable_arg(int reg)
3700{
3701  return can_be_java_arg(reg);
3702}
3703
3704bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3705  return false;
3706}
3707
3708RegMask Matcher::divI_proj_mask() {
3709  ShouldNotReachHere();
3710  return RegMask();
3711}
3712
3713// Register for MODI projection of divmodI.
3714RegMask Matcher::modI_proj_mask() {
3715  ShouldNotReachHere();
3716  return RegMask();
3717}
3718
3719// Register for DIVL projection of divmodL.
3720RegMask Matcher::divL_proj_mask() {
3721  ShouldNotReachHere();
3722  return RegMask();
3723}
3724
3725// Register for MODL projection of divmodL.
3726RegMask Matcher::modL_proj_mask() {
3727  ShouldNotReachHere();
3728  return RegMask();
3729}
3730
3731const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3732  return FP_REG_mask();
3733}
3734
3735bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3736  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3737    Node* u = addp->fast_out(i);
3738    if (u->is_Mem()) {
3739      int opsize = u->as_Mem()->memory_size();
3740      assert(opsize > 0, "unexpected memory operand size");
3741      if (u->as_Mem()->memory_size() != (1<<shift)) {
3742        return false;
3743      }
3744    }
3745  }
3746  return true;
3747}
3748
3749const bool Matcher::convi2l_type_required = false;
3750
3751// Should the Matcher clone shifts on addressing modes, expecting them
3752// to be subsumed into complex addressing expressions or compute them
3753// into registers?
3754bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3755  if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3756    return true;
3757  }
3758
3759  Node *off = m->in(AddPNode::Offset);
3760  if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3761      size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3762      // Are there other uses besides address expressions?
3763      !is_visited(off)) {
3764    address_visited.set(off->_idx); // Flag as address_visited
3765    mstack.push(off->in(2), Visit);
3766    Node *conv = off->in(1);
3767    if (conv->Opcode() == Op_ConvI2L &&
3768        // Are there other uses besides address expressions?
3769        !is_visited(conv)) {
3770      address_visited.set(conv->_idx); // Flag as address_visited
3771      mstack.push(conv->in(1), Pre_Visit);
3772    } else {
3773      mstack.push(conv, Pre_Visit);
3774    }
3775    address_visited.test_set(m->_idx); // Flag as address_visited
3776    mstack.push(m->in(AddPNode::Address), Pre_Visit);
3777    mstack.push(m->in(AddPNode::Base), Pre_Visit);
3778    return true;
3779  } else if (off->Opcode() == Op_ConvI2L &&
3780             // Are there other uses besides address expressions?
3781             !is_visited(off)) {
3782    address_visited.test_set(m->_idx); // Flag as address_visited
3783    address_visited.set(off->_idx); // Flag as address_visited
3784    mstack.push(off->in(1), Pre_Visit);
3785    mstack.push(m->in(AddPNode::Address), Pre_Visit);
3786    mstack.push(m->in(AddPNode::Base), Pre_Visit);
3787    return true;
3788  }
3789  return false;
3790}
3791
3792// Transform:
3793// (AddP base (AddP base address (LShiftL index con)) offset)
3794// into:
3795// (AddP base (AddP base offset) (LShiftL index con))
3796// to take full advantage of ARM's addressing modes
3797void Compile::reshape_address(AddPNode* addp) {
3798  Node *addr = addp->in(AddPNode::Address);
3799  if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3800    const AddPNode *addp2 = addr->as_AddP();
3801    if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3802         addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3803         size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3804        addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3805
3806      // Any use that can't embed the address computation?
3807      for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3808        Node* u = addp->fast_out(i);
3809        if (!u->is_Mem()) {
3810          return;
3811        }
3812        if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3813          return;
3814        }
3815        if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
3816          int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
3817          if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
3818            return;
3819          }
3820        }
3821      }
3822
3823      Node* off = addp->in(AddPNode::Offset);
3824      Node* addr2 = addp2->in(AddPNode::Address);
3825      Node* base = addp->in(AddPNode::Base);
3826
3827      Node* new_addr = NULL;
3828      // Check whether the graph already has the new AddP we need
3829      // before we create one (no GVN available here).
3830      for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3831        Node* u = addr2->fast_out(i);
3832        if (u->is_AddP() &&
3833            u->in(AddPNode::Base) == base &&
3834            u->in(AddPNode::Address) == addr2 &&
3835            u->in(AddPNode::Offset) == off) {
3836          new_addr = u;
3837          break;
3838        }
3839      }
3840
3841      if (new_addr == NULL) {
3842        new_addr = new AddPNode(base, addr2, off);
3843      }
3844      Node* new_off = addp2->in(AddPNode::Offset);
3845      addp->set_req(AddPNode::Address, new_addr);
3846      if (addr->outcnt() == 0) {
3847        addr->disconnect_inputs(NULL, this);
3848      }
3849      addp->set_req(AddPNode::Offset, new_off);
3850      if (off->outcnt() == 0) {
3851        off->disconnect_inputs(NULL, this);
3852      }
3853    }
3854  }
3855}
3856
3857// helper for encoding java_to_runtime calls on sim
3858//
3859// this is needed to compute the extra arguments required when
3860// planting a call to the simulator blrt instruction. the TypeFunc
3861// can be queried to identify the counts for integral, and floating
3862// arguments and the return type
3863
3864static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3865{
3866  int gps = 0;
3867  int fps = 0;
3868  const TypeTuple *domain = tf->domain();
3869  int max = domain->cnt();
3870  for (int i = TypeFunc::Parms; i < max; i++) {
3871    const Type *t = domain->field_at(i);
3872    switch(t->basic_type()) {
3873    case T_FLOAT:
3874    case T_DOUBLE:
3875      fps++;
3876    default:
3877      gps++;
3878    }
3879  }
3880  gpcnt = gps;
3881  fpcnt = fps;
3882  BasicType rt = tf->return_type();
3883  switch (rt) {
3884  case T_VOID:
3885    rtype = MacroAssembler::ret_type_void;
3886    break;
3887  default:
3888    rtype = MacroAssembler::ret_type_integral;
3889    break;
3890  case T_FLOAT:
3891    rtype = MacroAssembler::ret_type_float;
3892    break;
3893  case T_DOUBLE:
3894    rtype = MacroAssembler::ret_type_double;
3895    break;
3896  }
3897}
3898
3899#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3900  MacroAssembler _masm(&cbuf);                                          \
3901  {                                                                     \
3902    guarantee(INDEX == -1, "mode not permitted for volatile");          \
3903    guarantee(DISP == 0, "mode not permitted for volatile");            \
3904    guarantee(SCALE == 0, "mode not permitted for volatile");           \
3905    __ INSN(REG, as_Register(BASE));                                    \
3906  }
3907
3908typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3909typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3910typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3911                                  MacroAssembler::SIMD_RegVariant T, const Address &adr);
3912
3913  // Used for all non-volatile memory accesses.  The use of
3914  // $mem->opcode() to discover whether this pattern uses sign-extended
3915  // offsets is something of a kludge.
3916  static void loadStore(MacroAssembler masm, mem_insn insn,
3917                         Register reg, int opcode,
3918                         Register base, int index, int size, int disp)
3919  {
3920    Address::extend scale;
3921
3922    // Hooboy, this is fugly.  We need a way to communicate to the
3923    // encoder that the index needs to be sign extended, so we have to
3924    // enumerate all the cases.
3925    switch (opcode) {
3926    case INDINDEXSCALEDI2L:
3927    case INDINDEXSCALEDI2LN:
3928    case INDINDEXI2L:
3929    case INDINDEXI2LN:
3930      scale = Address::sxtw(size);
3931      break;
3932    default:
3933      scale = Address::lsl(size);
3934    }
3935
3936    if (index == -1) {
3937      (masm.*insn)(reg, Address(base, disp));
3938    } else {
3939      assert(disp == 0, "unsupported address mode: disp = %d", disp);
3940      (masm.*insn)(reg, Address(base, as_Register(index), scale));
3941    }
3942  }
3943
3944  static void loadStore(MacroAssembler masm, mem_float_insn insn,
3945                         FloatRegister reg, int opcode,
3946                         Register base, int index, int size, int disp)
3947  {
3948    Address::extend scale;
3949
3950    switch (opcode) {
3951    case INDINDEXSCALEDI2L:
3952    case INDINDEXSCALEDI2LN:
3953      scale = Address::sxtw(size);
3954      break;
3955    default:
3956      scale = Address::lsl(size);
3957    }
3958
3959     if (index == -1) {
3960      (masm.*insn)(reg, Address(base, disp));
3961    } else {
3962      assert(disp == 0, "unsupported address mode: disp = %d", disp);
3963      (masm.*insn)(reg, Address(base, as_Register(index), scale));
3964    }
3965  }
3966
3967  static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3968                         FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3969                         int opcode, Register base, int index, int size, int disp)
3970  {
3971    if (index == -1) {
3972      (masm.*insn)(reg, T, Address(base, disp));
3973    } else {
3974      assert(disp == 0, "unsupported address mode");
3975      (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3976    }
3977  }
3978
3979%}
3980
3981
3982
3983//----------ENCODING BLOCK-----------------------------------------------------
3984// This block specifies the encoding classes used by the compiler to
3985// output byte streams.  Encoding classes are parameterized macros
3986// used by Machine Instruction Nodes in order to generate the bit
3987// encoding of the instruction.  Operands specify their base encoding
3988// interface with the interface keyword.  There are currently
3989// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3990// COND_INTER.  REG_INTER causes an operand to generate a function
3991// which returns its register number when queried.  CONST_INTER causes
3992// an operand to generate a function which returns the value of the
3993// constant when queried.  MEMORY_INTER causes an operand to generate
3994// four functions which return the Base Register, the Index Register,
3995// the Scale Value, and the Offset Value of the operand when queried.
3996// COND_INTER causes an operand to generate six functions which return
3997// the encoding code (ie - encoding bits for the instruction)
3998// associated with each basic boolean condition for a conditional
3999// instruction.
4000//
4001// Instructions specify two basic values for encoding.  Again, a
4002// function is available to check if the constant displacement is an
4003// oop. They use the ins_encode keyword to specify their encoding
4004// classes (which must be a sequence of enc_class names, and their
4005// parameters, specified in the encoding block), and they use the
4006// opcode keyword to specify, in order, their primary, secondary, and
4007// tertiary opcode.  Only the opcode sections which a particular
4008// instruction needs for encoding need to be specified.
4009encode %{
4010  // Build emit functions for each basic byte or larger field in the
4011  // intel encoding scheme (opcode, rm, sib, immediate), and call them
4012  // from C++ code in the enc_class source block.  Emit functions will
4013  // live in the main source block for now.  In future, we can
4014  // generalize this by adding a syntax that specifies the sizes of
4015  // fields in an order, so that the adlc can build the emit functions
4016  // automagically
4017
4018  // catch all for unimplemented encodings
4019  enc_class enc_unimplemented %{
4020    MacroAssembler _masm(&cbuf);
4021    __ unimplemented("C2 catch all");
4022  %}
4023
4024  // BEGIN Non-volatile memory access
4025
4026  enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
4027    Register dst_reg = as_Register($dst$$reg);
4028    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
4029               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4030  %}
4031
4032  enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
4033    Register dst_reg = as_Register($dst$$reg);
4034    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
4035               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4036  %}
4037
4038  enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
4039    Register dst_reg = as_Register($dst$$reg);
4040    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
4041               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042  %}
4043
4044  enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
4045    Register dst_reg = as_Register($dst$$reg);
4046    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
4047               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048  %}
4049
4050  enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
4051    Register dst_reg = as_Register($dst$$reg);
4052    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
4053               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054  %}
4055
4056  enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4057    Register dst_reg = as_Register($dst$$reg);
4058    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
4059               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060  %}
4061
4062  enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4063    Register dst_reg = as_Register($dst$$reg);
4064    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4065               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066  %}
4067
4068  enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4069    Register dst_reg = as_Register($dst$$reg);
4070    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4071               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072  %}
4073
4074  enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4075    Register dst_reg = as_Register($dst$$reg);
4076    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4077               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078  %}
4079
4080  enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4081    Register dst_reg = as_Register($dst$$reg);
4082    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4083               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4084  %}
4085
4086  enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4087    Register dst_reg = as_Register($dst$$reg);
4088    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
4089               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4090  %}
4091
4092  enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4093    Register dst_reg = as_Register($dst$$reg);
4094    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4095               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4096  %}
4097
4098  enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4099    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4100    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4101               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4102  %}
4103
4104  enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4105    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4106    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4107               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4108  %}
4109
4110  enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4111    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4112    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4113       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4114  %}
4115
4116  enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4117    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4118    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4119       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4120  %}
4121
4122  enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4123    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4124    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4125       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4126  %}
4127
4128  enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4129    Register src_reg = as_Register($src$$reg);
4130    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4131               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4132  %}
4133
4134  enc_class aarch64_enc_strb0(memory mem) %{
4135    MacroAssembler _masm(&cbuf);
4136    loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4137               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4138  %}
4139
4140  enc_class aarch64_enc_strb0_ordered(memory mem) %{
4141    MacroAssembler _masm(&cbuf);
4142    __ membar(Assembler::StoreStore);
4143    loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4144               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4145  %}
4146
4147  enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4148    Register src_reg = as_Register($src$$reg);
4149    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4150               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4151  %}
4152
4153  enc_class aarch64_enc_strh0(memory mem) %{
4154    MacroAssembler _masm(&cbuf);
4155    loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4156               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4157  %}
4158
4159  enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4160    Register src_reg = as_Register($src$$reg);
4161    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4162               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4163  %}
4164
4165  enc_class aarch64_enc_strw0(memory mem) %{
4166    MacroAssembler _masm(&cbuf);
4167    loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4168               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4169  %}
4170
4171  enc_class aarch64_enc_str(iRegL src, memory mem) %{
4172    Register src_reg = as_Register($src$$reg);
4173    // we sometimes get asked to store the stack pointer into the
4174    // current thread -- we cannot do that directly on AArch64
4175    if (src_reg == r31_sp) {
4176      MacroAssembler _masm(&cbuf);
4177      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4178      __ mov(rscratch2, sp);
4179      src_reg = rscratch2;
4180    }
4181    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4182               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4183  %}
4184
4185  enc_class aarch64_enc_str0(memory mem) %{
4186    MacroAssembler _masm(&cbuf);
4187    loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4188               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4189  %}
4190
4191  enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4192    FloatRegister src_reg = as_FloatRegister($src$$reg);
4193    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4194               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4195  %}
4196
4197  enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4198    FloatRegister src_reg = as_FloatRegister($src$$reg);
4199    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4200               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4201  %}
4202
4203  enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4204    FloatRegister src_reg = as_FloatRegister($src$$reg);
4205    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4206       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4207  %}
4208
4209  enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4210    FloatRegister src_reg = as_FloatRegister($src$$reg);
4211    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4212       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4213  %}
4214
4215  enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4216    FloatRegister src_reg = as_FloatRegister($src$$reg);
4217    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4218       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4219  %}
4220
4221  // END Non-volatile memory access
4222
4223  // volatile loads and stores
4224
4225  enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4226    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4227                 rscratch1, stlrb);
4228  %}
4229
4230  enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4231    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4232                 rscratch1, stlrh);
4233  %}
4234
4235  enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4236    MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4237                 rscratch1, stlrw);
4238  %}
4239
4240
4241  enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4242    Register dst_reg = as_Register($dst$$reg);
4243    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4244             rscratch1, ldarb);
4245    __ sxtbw(dst_reg, dst_reg);
4246  %}
4247
4248  enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4249    Register dst_reg = as_Register($dst$$reg);
4250    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4251             rscratch1, ldarb);
4252    __ sxtb(dst_reg, dst_reg);
4253  %}
4254
4255  enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4256    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4257             rscratch1, ldarb);
4258  %}
4259
4260  enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4261    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4262             rscratch1, ldarb);
4263  %}
4264
4265  enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4266    Register dst_reg = as_Register($dst$$reg);
4267    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4268             rscratch1, ldarh);
4269    __ sxthw(dst_reg, dst_reg);
4270  %}
4271
4272  enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4273    Register dst_reg = as_Register($dst$$reg);
4274    MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4275             rscratch1, ldarh);
4276    __ sxth(dst_reg, dst_reg);
4277  %}
4278
4279  enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4280    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4281             rscratch1, ldarh);
4282  %}
4283
4284  enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4285    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4286             rscratch1, ldarh);
4287  %}
4288
4289  enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4290    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4291             rscratch1, ldarw);
4292  %}
4293
4294  enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4295    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4296             rscratch1, ldarw);
4297  %}
4298
4299  enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4300    MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4301             rscratch1, ldar);
4302  %}
4303
4304  enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4305    MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4306             rscratch1, ldarw);
4307    __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4308  %}
4309
4310  enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4311    MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4312             rscratch1, ldar);
4313    __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4314  %}
4315
4316  enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4317    Register src_reg = as_Register($src$$reg);
4318    // we sometimes get asked to store the stack pointer into the
4319    // current thread -- we cannot do that directly on AArch64
4320    if (src_reg == r31_sp) {
4321        MacroAssembler _masm(&cbuf);
4322      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4323      __ mov(rscratch2, sp);
4324      src_reg = rscratch2;
4325    }
4326    MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4327                 rscratch1, stlr);
4328  %}
4329
4330  enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4331    {
4332      MacroAssembler _masm(&cbuf);
4333      FloatRegister src_reg = as_FloatRegister($src$$reg);
4334      __ fmovs(rscratch2, src_reg);
4335    }
4336    MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4337                 rscratch1, stlrw);
4338  %}
4339
4340  enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4341    {
4342      MacroAssembler _masm(&cbuf);
4343      FloatRegister src_reg = as_FloatRegister($src$$reg);
4344      __ fmovd(rscratch2, src_reg);
4345    }
4346    MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4347                 rscratch1, stlr);
4348  %}
4349
4350  // synchronized read/update encodings
4351
4352  enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4353    MacroAssembler _masm(&cbuf);
4354    Register dst_reg = as_Register($dst$$reg);
4355    Register base = as_Register($mem$$base);
4356    int index = $mem$$index;
4357    int scale = $mem$$scale;
4358    int disp = $mem$$disp;
4359    if (index == -1) {
4360       if (disp != 0) {
4361        __ lea(rscratch1, Address(base, disp));
4362        __ ldaxr(dst_reg, rscratch1);
4363      } else {
4364        // TODO
4365        // should we ever get anything other than this case?
4366        __ ldaxr(dst_reg, base);
4367      }
4368    } else {
4369      Register index_reg = as_Register(index);
4370      if (disp == 0) {
4371        __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4372        __ ldaxr(dst_reg, rscratch1);
4373      } else {
4374        __ lea(rscratch1, Address(base, disp));
4375        __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4376        __ ldaxr(dst_reg, rscratch1);
4377      }
4378    }
4379  %}
4380
4381  enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4382    MacroAssembler _masm(&cbuf);
4383    Register src_reg = as_Register($src$$reg);
4384    Register base = as_Register($mem$$base);
4385    int index = $mem$$index;
4386    int scale = $mem$$scale;
4387    int disp = $mem$$disp;
4388    if (index == -1) {
4389       if (disp != 0) {
4390        __ lea(rscratch2, Address(base, disp));
4391        __ stlxr(rscratch1, src_reg, rscratch2);
4392      } else {
4393        // TODO
4394        // should we ever get anything other than this case?
4395        __ stlxr(rscratch1, src_reg, base);
4396      }
4397    } else {
4398      Register index_reg = as_Register(index);
4399      if (disp == 0) {
4400        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4401        __ stlxr(rscratch1, src_reg, rscratch2);
4402      } else {
4403        __ lea(rscratch2, Address(base, disp));
4404        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4405        __ stlxr(rscratch1, src_reg, rscratch2);
4406      }
4407    }
4408    __ cmpw(rscratch1, zr);
4409  %}
4410
4411  enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4412    MacroAssembler _masm(&cbuf);
4413    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4414    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4415               Assembler::xword, /*acquire*/ false, /*release*/ true,
4416               /*weak*/ false, noreg);
4417  %}
4418
4419  enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4420    MacroAssembler _masm(&cbuf);
4421    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4422    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4423               Assembler::word, /*acquire*/ false, /*release*/ true,
4424               /*weak*/ false, noreg);
4425  %}
4426
4427  enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4428    MacroAssembler _masm(&cbuf);
4429    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4430    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4431               Assembler::halfword, /*acquire*/ false, /*release*/ true,
4432               /*weak*/ false, noreg);
4433  %}
4434
4435  enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4436    MacroAssembler _masm(&cbuf);
4437    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4438    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4439               Assembler::byte, /*acquire*/ false, /*release*/ true,
4440               /*weak*/ false, noreg);
4441  %}  
4442    
4443
4444  // The only difference between aarch64_enc_cmpxchg and
4445  // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4446  // CompareAndSwap sequence to serve as a barrier on acquiring a
4447  // lock.
4448  enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4449    MacroAssembler _masm(&cbuf);
4450    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4451    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4452               Assembler::xword, /*acquire*/ true, /*release*/ true,
4453               /*weak*/ false, noreg);
4454  %}
4455
4456  enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4457    MacroAssembler _masm(&cbuf);
4458    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4459    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4460               Assembler::word, /*acquire*/ true, /*release*/ true,
4461               /*weak*/ false, noreg);
4462  %}
4463
4464
4465  // auxiliary used for CompareAndSwapX to set result register
4466  enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4467    MacroAssembler _masm(&cbuf);
4468    Register res_reg = as_Register($res$$reg);
4469    __ cset(res_reg, Assembler::EQ);
4470  %}
4471
4472  // prefetch encodings
4473
4474  enc_class aarch64_enc_prefetchw(memory mem) %{
4475    MacroAssembler _masm(&cbuf);
4476    Register base = as_Register($mem$$base);
4477    int index = $mem$$index;
4478    int scale = $mem$$scale;
4479    int disp = $mem$$disp;
4480    if (index == -1) {
4481      __ prfm(Address(base, disp), PSTL1KEEP);
4482    } else {
4483      Register index_reg = as_Register(index);
4484      if (disp == 0) {
4485        __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4486      } else {
4487        __ lea(rscratch1, Address(base, disp));
4488	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4489      }
4490    }
4491  %}
4492
4493  /// mov envcodings
4494
4495  enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4496    MacroAssembler _masm(&cbuf);
4497    u_int32_t con = (u_int32_t)$src$$constant;
4498    Register dst_reg = as_Register($dst$$reg);
4499    if (con == 0) {
4500      __ movw(dst_reg, zr);
4501    } else {
4502      __ movw(dst_reg, con);
4503    }
4504  %}
4505
4506  enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4507    MacroAssembler _masm(&cbuf);
4508    Register dst_reg = as_Register($dst$$reg);
4509    u_int64_t con = (u_int64_t)$src$$constant;
4510    if (con == 0) {
4511      __ mov(dst_reg, zr);
4512    } else {
4513      __ mov(dst_reg, con);
4514    }
4515  %}
4516
4517  enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4518    MacroAssembler _masm(&cbuf);
4519    Register dst_reg = as_Register($dst$$reg);
4520    address con = (address)$src$$constant;
4521    if (con == NULL || con == (address)1) {
4522      ShouldNotReachHere();
4523    } else {
4524      relocInfo::relocType rtype = $src->constant_reloc();
4525      if (rtype == relocInfo::oop_type) {
4526        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4527      } else if (rtype == relocInfo::metadata_type) {
4528        __ mov_metadata(dst_reg, (Metadata*)con);
4529      } else {
4530        assert(rtype == relocInfo::none, "unexpected reloc type");
4531        if (con < (address)(uintptr_t)os::vm_page_size()) {
4532          __ mov(dst_reg, con);
4533        } else {
4534          unsigned long offset;
4535          __ adrp(dst_reg, con, offset);
4536          __ add(dst_reg, dst_reg, offset);
4537        }
4538      }
4539    }
4540  %}
4541
4542  enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4543    MacroAssembler _masm(&cbuf);
4544    Register dst_reg = as_Register($dst$$reg);
4545    __ mov(dst_reg, zr);
4546  %}
4547
4548  enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4549    MacroAssembler _masm(&cbuf);
4550    Register dst_reg = as_Register($dst$$reg);
4551    __ mov(dst_reg, (u_int64_t)1);
4552  %}
4553
4554  enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4555    MacroAssembler _masm(&cbuf);
4556    address page = (address)$src$$constant;
4557    Register dst_reg = as_Register($dst$$reg);
4558    unsigned long off;
4559    __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4560    assert(off == 0, "assumed offset == 0");
4561  %}
4562
4563  enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4564    MacroAssembler _masm(&cbuf);
4565    __ load_byte_map_base($dst$$Register);
4566  %}
4567
4568  enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4569    MacroAssembler _masm(&cbuf);
4570    Register dst_reg = as_Register($dst$$reg);
4571    address con = (address)$src$$constant;
4572    if (con == NULL) {
4573      ShouldNotReachHere();
4574    } else {
4575      relocInfo::relocType rtype = $src->constant_reloc();
4576      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4577      __ set_narrow_oop(dst_reg, (jobject)con);
4578    }
4579  %}
4580
4581  enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4582    MacroAssembler _masm(&cbuf);
4583    Register dst_reg = as_Register($dst$$reg);
4584    __ mov(dst_reg, zr);
4585  %}
4586
4587  enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4588    MacroAssembler _masm(&cbuf);
4589    Register dst_reg = as_Register($dst$$reg);
4590    address con = (address)$src$$constant;
4591    if (con == NULL) {
4592      ShouldNotReachHere();
4593    } else {
4594      relocInfo::relocType rtype = $src->constant_reloc();
4595      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4596      __ set_narrow_klass(dst_reg, (Klass *)con);
4597    }
4598  %}
4599
4600  // arithmetic encodings
4601
4602  enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4603    MacroAssembler _masm(&cbuf);
4604    Register dst_reg = as_Register($dst$$reg);
4605    Register src_reg = as_Register($src1$$reg);
4606    int32_t con = (int32_t)$src2$$constant;
4607    // add has primary == 0, subtract has primary == 1
4608    if ($primary) { con = -con; }
4609    if (con < 0) {
4610      __ subw(dst_reg, src_reg, -con);
4611    } else {
4612      __ addw(dst_reg, src_reg, con);
4613    }
4614  %}
4615
4616  enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4617    MacroAssembler _masm(&cbuf);
4618    Register dst_reg = as_Register($dst$$reg);
4619    Register src_reg = as_Register($src1$$reg);
4620    int32_t con = (int32_t)$src2$$constant;
4621    // add has primary == 0, subtract has primary == 1
4622    if ($primary) { con = -con; }
4623    if (con < 0) {
4624      __ sub(dst_reg, src_reg, -con);
4625    } else {
4626      __ add(dst_reg, src_reg, con);
4627    }
4628  %}
4629
4630  enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4631    MacroAssembler _masm(&cbuf);
4632   Register dst_reg = as_Register($dst$$reg);
4633   Register src1_reg = as_Register($src1$$reg);
4634   Register src2_reg = as_Register($src2$$reg);
4635    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4636  %}
4637
4638  enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4639    MacroAssembler _masm(&cbuf);
4640   Register dst_reg = as_Register($dst$$reg);
4641   Register src1_reg = as_Register($src1$$reg);
4642   Register src2_reg = as_Register($src2$$reg);
4643    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4644  %}
4645
4646  enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4647    MacroAssembler _masm(&cbuf);
4648   Register dst_reg = as_Register($dst$$reg);
4649   Register src1_reg = as_Register($src1$$reg);
4650   Register src2_reg = as_Register($src2$$reg);
4651    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4652  %}
4653
4654  enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4655    MacroAssembler _masm(&cbuf);
4656   Register dst_reg = as_Register($dst$$reg);
4657   Register src1_reg = as_Register($src1$$reg);
4658   Register src2_reg = as_Register($src2$$reg);
4659    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4660  %}
4661
4662  // compare instruction encodings
4663
4664  enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4665    MacroAssembler _masm(&cbuf);
4666    Register reg1 = as_Register($src1$$reg);
4667    Register reg2 = as_Register($src2$$reg);
4668    __ cmpw(reg1, reg2);
4669  %}
4670
4671  enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4672    MacroAssembler _masm(&cbuf);
4673    Register reg = as_Register($src1$$reg);
4674    int32_t val = $src2$$constant;
4675    if (val >= 0) {
4676      __ subsw(zr, reg, val);
4677    } else {
4678      __ addsw(zr, reg, -val);
4679    }
4680  %}
4681
4682  enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4683    MacroAssembler _masm(&cbuf);
4684    Register reg1 = as_Register($src1$$reg);
4685    u_int32_t val = (u_int32_t)$src2$$constant;
4686    __ movw(rscratch1, val);
4687    __ cmpw(reg1, rscratch1);
4688  %}
4689
4690  enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4691    MacroAssembler _masm(&cbuf);
4692    Register reg1 = as_Register($src1$$reg);
4693    Register reg2 = as_Register($src2$$reg);
4694    __ cmp(reg1, reg2);
4695  %}
4696
4697  enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4698    MacroAssembler _masm(&cbuf);
4699    Register reg = as_Register($src1$$reg);
4700    int64_t val = $src2$$constant;
4701    if (val >= 0) {
4702      __ subs(zr, reg, val);
4703    } else if (val != -val) {
4704      __ adds(zr, reg, -val);
4705    } else {
4706    // aargh, Long.MIN_VALUE is a special case
4707      __ orr(rscratch1, zr, (u_int64_t)val);
4708      __ subs(zr, reg, rscratch1);
4709    }
4710  %}
4711
4712  enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4713    MacroAssembler _masm(&cbuf);
4714    Register reg1 = as_Register($src1$$reg);
4715    u_int64_t val = (u_int64_t)$src2$$constant;
4716    __ mov(rscratch1, val);
4717    __ cmp(reg1, rscratch1);
4718  %}
4719
4720  enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4721    MacroAssembler _masm(&cbuf);
4722    Register reg1 = as_Register($src1$$reg);
4723    Register reg2 = as_Register($src2$$reg);
4724    __ cmp(reg1, reg2);
4725  %}
4726
4727  enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4728    MacroAssembler _masm(&cbuf);
4729    Register reg1 = as_Register($src1$$reg);
4730    Register reg2 = as_Register($src2$$reg);
4731    __ cmpw(reg1, reg2);
4732  %}
4733
4734  enc_class aarch64_enc_testp(iRegP src) %{
4735    MacroAssembler _masm(&cbuf);
4736    Register reg = as_Register($src$$reg);
4737    __ cmp(reg, zr);
4738  %}
4739
4740  enc_class aarch64_enc_testn(iRegN src) %{
4741    MacroAssembler _masm(&cbuf);
4742    Register reg = as_Register($src$$reg);
4743    __ cmpw(reg, zr);
4744  %}
4745
4746  enc_class aarch64_enc_b(label lbl) %{
4747    MacroAssembler _masm(&cbuf);
4748    Label *L = $lbl$$label;
4749    __ b(*L);
4750  %}
4751
4752  enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4753    MacroAssembler _masm(&cbuf);
4754    Label *L = $lbl$$label;
4755    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4756  %}
4757
4758  enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4759    MacroAssembler _masm(&cbuf);
4760    Label *L = $lbl$$label;
4761    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4762  %}
4763
4764  enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4765  %{
4766     Register sub_reg = as_Register($sub$$reg);
4767     Register super_reg = as_Register($super$$reg);
4768     Register temp_reg = as_Register($temp$$reg);
4769     Register result_reg = as_Register($result$$reg);
4770
4771     Label miss;
4772     MacroAssembler _masm(&cbuf);
4773     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4774                                     NULL, &miss,
4775                                     /*set_cond_codes:*/ true);
4776     if ($primary) {
4777       __ mov(result_reg, zr);
4778     }
4779     __ bind(miss);
4780  %}
4781
4782  enc_class aarch64_enc_java_static_call(method meth) %{
4783    MacroAssembler _masm(&cbuf);
4784
4785    address addr = (address)$meth$$method;
4786    address call;
4787    if (!_method) {
4788      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4789      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4790    } else {
4791      int method_index = resolved_method_index(cbuf);
4792      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4793                                                  : static_call_Relocation::spec(method_index);
4794      call = __ trampoline_call(Address(addr, rspec), &cbuf);
4795
4796      // Emit stub for static call
4797      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4798      if (stub == NULL) {
4799        ciEnv::current()->record_failure("CodeCache is full");
4800        return;
4801      }
4802    }
4803    if (call == NULL) {
4804      ciEnv::current()->record_failure("CodeCache is full");
4805      return;
4806    }
4807  %}
4808
4809  enc_class aarch64_enc_java_dynamic_call(method meth) %{
4810    MacroAssembler _masm(&cbuf);
4811    int method_index = resolved_method_index(cbuf);
4812    address call = __ ic_call((address)$meth$$method, method_index);
4813    if (call == NULL) {
4814      ciEnv::current()->record_failure("CodeCache is full");
4815      return;
4816    }
4817  %}
4818
4819  enc_class aarch64_enc_call_epilog() %{
4820    MacroAssembler _masm(&cbuf);
4821    if (VerifyStackAtCalls) {
4822      // Check that stack depth is unchanged: find majik cookie on stack
4823      __ call_Unimplemented();
4824    }
4825  %}
4826
4827  enc_class aarch64_enc_java_to_runtime(method meth) %{
4828    MacroAssembler _masm(&cbuf);
4829
4830    // some calls to generated routines (arraycopy code) are scheduled
4831    // by C2 as runtime calls. if so we can call them using a br (they
4832    // will be in a reachable segment) otherwise we have to use a blrt
4833    // which loads the absolute address into a register.
4834    address entry = (address)$meth$$method;
4835    CodeBlob *cb = CodeCache::find_blob(entry);
4836    if (cb) {
4837      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4838      if (call == NULL) {
4839        ciEnv::current()->record_failure("CodeCache is full");
4840        return;
4841      }
4842    } else {
4843      int gpcnt;
4844      int fpcnt;
4845      int rtype;
4846      getCallInfo(tf(), gpcnt, fpcnt, rtype);
4847      Label retaddr;
4848      __ adr(rscratch2, retaddr);
4849      __ lea(rscratch1, RuntimeAddress(entry));
4850      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4851      __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4852      __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4853      __ bind(retaddr);
4854      __ add(sp, sp, 2 * wordSize);
4855    }
4856  %}
4857
4858  enc_class aarch64_enc_rethrow() %{
4859    MacroAssembler _masm(&cbuf);
4860    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4861  %}
4862
4863  enc_class aarch64_enc_ret() %{
4864    MacroAssembler _masm(&cbuf);
4865    __ ret(lr);
4866  %}
4867
4868  enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4869    MacroAssembler _masm(&cbuf);
4870    Register target_reg = as_Register($jump_target$$reg);
4871    __ br(target_reg);
4872  %}
4873
4874  enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4875    MacroAssembler _masm(&cbuf);
4876    Register target_reg = as_Register($jump_target$$reg);
4877    // exception oop should be in r0
4878    // ret addr has been popped into lr
4879    // callee expects it in r3
4880    __ mov(r3, lr);
4881    __ br(target_reg);
4882  %}
4883
4884  enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4885    MacroAssembler _masm(&cbuf);
4886    Register oop = as_Register($object$$reg);
4887    Register box = as_Register($box$$reg);
4888    Register disp_hdr = as_Register($tmp$$reg);
4889    Register tmp = as_Register($tmp2$$reg);
4890    Label cont;
4891    Label object_has_monitor;
4892    Label cas_failed;
4893
4894    assert_different_registers(oop, box, tmp, disp_hdr);
4895
4896    // Load markOop from object into displaced_header.
4897    __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4898
4899    // Always do locking in runtime.
4900    if (EmitSync & 0x01) {
4901      __ cmp(oop, zr);
4902      return;
4903    }
4904
4905    if (UseBiasedLocking && !UseOptoBiasInlining) {
4906      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4907    }
4908
4909    // Handle existing monitor
4910    if ((EmitSync & 0x02) == 0) {
4911      // we can use AArch64's bit test and branch here but
4912      // markoopDesc does not define a bit index just the bit value
4913      // so assert in case the bit pos changes
4914#     define __monitor_value_log2 1
4915      assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4916      __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4917#     undef __monitor_value_log2
4918    }
4919
4920    // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4921    __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4922
4923    // Load Compare Value application register.
4924
4925    // Initialize the box. (Must happen before we update the object mark!)
4926    __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4927
4928    // Compare object markOop with mark and if equal exchange scratch1
4929    // with object markOop.
4930    if (UseLSE) {
4931      __ mov(tmp, disp_hdr);
4932      __ casal(Assembler::xword, tmp, box, oop);
4933      __ cmp(tmp, disp_hdr);
4934      __ br(Assembler::EQ, cont);
4935    } else {
4936      Label retry_load;
4937      if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4938        __ prfm(Address(oop), PSTL1STRM);
4939      __ bind(retry_load);
4940      __ ldaxr(tmp, oop);
4941      __ cmp(tmp, disp_hdr);
4942      __ br(Assembler::NE, cas_failed);
4943      // use stlxr to ensure update is immediately visible
4944      __ stlxr(tmp, box, oop);
4945      __ cbzw(tmp, cont);
4946      __ b(retry_load);
4947    }
4948
4949    // Formerly:
4950    // __ cmpxchgptr(/*oldv=*/disp_hdr,
4951    //               /*newv=*/box,
4952    //               /*addr=*/oop,
4953    //               /*tmp=*/tmp,
4954    //               cont,
4955    //               /*fail*/NULL);
4956
4957    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4958
4959    // If the compare-and-exchange succeeded, then we found an unlocked
4960    // object, will have now locked it will continue at label cont
4961
4962    __ bind(cas_failed);
4963    // We did not see an unlocked object so try the fast recursive case.
4964
4965    // Check if the owner is self by comparing the value in the
4966    // markOop of object (disp_hdr) with the stack pointer.
4967    __ mov(rscratch1, sp);
4968    __ sub(disp_hdr, disp_hdr, rscratch1);
4969    __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4970    // If condition is true we are cont and hence we can store 0 as the
4971    // displaced header in the box, which indicates that it is a recursive lock.
4972    __ ands(tmp/*==0?*/, disp_hdr, tmp);
4973    __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4974
4975    // Handle existing monitor.
4976    if ((EmitSync & 0x02) == 0) {
4977      __ b(cont);
4978
4979      __ bind(object_has_monitor);
4980      // The object's monitor m is unlocked iff m->owner == NULL,
4981      // otherwise m->owner may contain a thread or a stack address.
4982      //
4983      // Try to CAS m->owner from NULL to current thread.
4984      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4985      __ mov(disp_hdr, zr);
4986
4987      if (UseLSE) {
4988        __ mov(rscratch1, disp_hdr);
4989        __ casal(Assembler::xword, rscratch1, rthread, tmp);
4990        __ cmp(rscratch1, disp_hdr);
4991      } else {
4992        Label retry_load, fail;
4993        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4994          __ prfm(Address(tmp), PSTL1STRM);
4995        __ bind(retry_load);
4996        __ ldaxr(rscratch1, tmp);
4997        __ cmp(disp_hdr, rscratch1);
4998        __ br(Assembler::NE, fail);
4999        // use stlxr to ensure update is immediately visible
5000        __ stlxr(rscratch1, rthread, tmp);
5001        __ cbnzw(rscratch1, retry_load);
5002        __ bind(fail);
5003      }
5004
5005      // Label next;
5006      // __ cmpxchgptr(/*oldv=*/disp_hdr,
5007      //               /*newv=*/rthread,
5008      //               /*addr=*/tmp,
5009      //               /*tmp=*/rscratch1,
5010      //               /*succeed*/next,
5011      //               /*fail*/NULL);
5012      // __ bind(next);
5013
5014      // store a non-null value into the box.
5015      __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5016
5017      // PPC port checks the following invariants
5018      // #ifdef ASSERT
5019      // bne(flag, cont);
5020      // We have acquired the monitor, check some invariants.
5021      // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
5022      // Invariant 1: _recursions should be 0.
5023      // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
5024      // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
5025      //                        "monitor->_recursions should be 0", -1);
5026      // Invariant 2: OwnerIsThread shouldn't be 0.
5027      // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
5028      //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
5029      //                           "monitor->OwnerIsThread shouldn't be 0", -1);
5030      // #endif
5031    }
5032
5033    __ bind(cont);
5034    // flag == EQ indicates success
5035    // flag == NE indicates failure
5036
5037  %}
5038
5039  // TODO
5040  // reimplement this with custom cmpxchgptr code
5041  // which avoids some of the unnecessary branching
5042  enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
5043    MacroAssembler _masm(&cbuf);
5044    Register oop = as_Register($object$$reg);
5045    Register box = as_Register($box$$reg);
5046    Register disp_hdr = as_Register($tmp$$reg);
5047    Register tmp = as_Register($tmp2$$reg);
5048    Label cont;
5049    Label object_has_monitor;
5050    Label cas_failed;
5051
5052    assert_different_registers(oop, box, tmp, disp_hdr);
5053
5054    // Always do locking in runtime.
5055    if (EmitSync & 0x01) {
5056      __ cmp(oop, zr); // Oop can't be 0 here => always false.
5057      return;
5058    }
5059
5060    if (UseBiasedLocking && !UseOptoBiasInlining) {
5061      __ biased_locking_exit(oop, tmp, cont);
5062    }
5063
5064    // Find the lock address and load the displaced header from the stack.
5065    __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5066
5067    // If the displaced header is 0, we have a recursive unlock.
5068    __ cmp(disp_hdr, zr);
5069    __ br(Assembler::EQ, cont);
5070
5071
5072    // Handle existing monitor.
5073    if ((EmitSync & 0x02) == 0) {
5074      __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
5075      __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
5076    }
5077
5078    // Check if it is still a light weight lock, this is is true if we
5079    // see the stack address of the basicLock in the markOop of the
5080    // object.
5081
5082      if (UseLSE) {
5083        __ mov(tmp, box);
5084        __ casl(Assembler::xword, tmp, disp_hdr, oop);
5085        __ cmp(tmp, box);
5086      } else {
5087        Label retry_load;
5088        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5089          __ prfm(Address(oop), PSTL1STRM);
5090        __ bind(retry_load);
5091        __ ldxr(tmp, oop);
5092        __ cmp(box, tmp);
5093        __ br(Assembler::NE, cas_failed);
5094        // use stlxr to ensure update is immediately visible
5095        __ stlxr(tmp, disp_hdr, oop);
5096        __ cbzw(tmp, cont);
5097        __ b(retry_load);
5098      }
5099
5100    // __ cmpxchgptr(/*compare_value=*/box,
5101    //               /*exchange_value=*/disp_hdr,
5102    //               /*where=*/oop,
5103    //               /*result=*/tmp,
5104    //               cont,
5105    //               /*cas_failed*/NULL);
5106    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5107
5108    __ bind(cas_failed);
5109
5110    // Handle existing monitor.
5111    if ((EmitSync & 0x02) == 0) {
5112      __ b(cont);
5113
5114      __ bind(object_has_monitor);
5115      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5116      __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5117      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5118      __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5119      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5120      __ cmp(rscratch1, zr);
5121      __ br(Assembler::NE, cont);
5122
5123      __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5124      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5125      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5126      __ cmp(rscratch1, zr);
5127      __ cbnz(rscratch1, cont);
5128      // need a release store here
5129      __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5130      __ stlr(rscratch1, tmp); // rscratch1 is zero
5131    }
5132
5133    __ bind(cont);
5134    // flag == EQ indicates success
5135    // flag == NE indicates failure
5136  %}
5137
5138%}
5139
5140//----------FRAME--------------------------------------------------------------
5141// Definition of frame structure and management information.
5142//
5143//  S T A C K   L A Y O U T    Allocators stack-slot number
5144//                             |   (to get allocators register number
5145//  G  Owned by    |        |  v    add OptoReg::stack0())
5146//  r   CALLER     |        |
5147//  o     |        +--------+      pad to even-align allocators stack-slot
5148//  w     V        |  pad0  |        numbers; owned by CALLER
5149//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5150//  h     ^        |   in   |  5
5151//        |        |  args  |  4   Holes in incoming args owned by SELF
5152//  |     |        |        |  3
5153//  |     |        +--------+
5154//  V     |        | old out|      Empty on Intel, window on Sparc
5155//        |    old |preserve|      Must be even aligned.
5156//        |     SP-+--------+----> Matcher::_old_SP, even aligned
5157//        |        |   in   |  3   area for Intel ret address
5158//     Owned by    |preserve|      Empty on Sparc.
5159//       SELF      +--------+
5160//        |        |  pad2  |  2   pad to align old SP
5161//        |        +--------+  1
5162//        |        | locks  |  0
5163//        |        +--------+----> OptoReg::stack0(), even aligned
5164//        |        |  pad1  | 11   pad to align new SP
5165//        |        +--------+
5166//        |        |        | 10
5167//        |        | spills |  9   spills
5168//        V        |        |  8   (pad0 slot for callee)
5169//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5170//        ^        |  out   |  7
5171//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5172//     Owned by    +--------+
5173//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5174//        |    new |preserve|      Must be even-aligned.
5175//        |     SP-+--------+----> Matcher::_new_SP, even aligned
5176//        |        |        |
5177//
5178// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5179//         known from SELF's arguments and the Java calling convention.
5180//         Region 6-7 is determined per call site.
5181// Note 2: If the calling convention leaves holes in the incoming argument
5182//         area, those holes are owned by SELF.  Holes in the outgoing area
5183//         are owned by the CALLEE.  Holes should not be nessecary in the
5184//         incoming area, as the Java calling convention is completely under
5185//         the control of the AD file.  Doubles can be sorted and packed to
5186//         avoid holes.  Holes in the outgoing arguments may be nessecary for
5187//         varargs C calling conventions.
5188// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5189//         even aligned with pad0 as needed.
5190//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5191//           (the latter is true on Intel but is it false on AArch64?)
5192//         region 6-11 is even aligned; it may be padded out more so that
5193//         the region from SP to FP meets the minimum stack alignment.
5194// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5195//         alignment.  Region 11, pad1, may be dynamically extended so that
5196//         SP meets the minimum alignment.
5197
5198frame %{
5199  // What direction does stack grow in (assumed to be same for C & Java)
5200  stack_direction(TOWARDS_LOW);
5201
5202  // These three registers define part of the calling convention
5203  // between compiled code and the interpreter.
5204
5205  // Inline Cache Register or methodOop for I2C.
5206  inline_cache_reg(R12);
5207
5208  // Method Oop Register when calling interpreter.
5209  interpreter_method_oop_reg(R12);
5210
5211  // Number of stack slots consumed by locking an object
5212  sync_stack_slots(2);
5213
5214  // Compiled code's Frame Pointer
5215  frame_pointer(R31);
5216
5217  // Interpreter stores its frame pointer in a register which is
5218  // stored to the stack by I2CAdaptors.
5219  // I2CAdaptors convert from interpreted java to compiled java.
5220  interpreter_frame_pointer(R29);
5221
5222  // Stack alignment requirement
5223  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5224
5225  // Number of stack slots between incoming argument block and the start of
5226  // a new frame.  The PROLOG must add this many slots to the stack.  The
5227  // EPILOG must remove this many slots. aarch64 needs two slots for
5228  // return address and fp.
5229  // TODO think this is correct but check
5230  in_preserve_stack_slots(4);
5231
5232  // Number of outgoing stack slots killed above the out_preserve_stack_slots
5233  // for calls to C.  Supports the var-args backing area for register parms.
5234  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5235
5236  // The after-PROLOG location of the return address.  Location of
5237  // return address specifies a type (REG or STACK) and a number
5238  // representing the register number (i.e. - use a register name) or
5239  // stack slot.
5240  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5241  // Otherwise, it is above the locks and verification slot and alignment word
5242  // TODO this may well be correct but need to check why that - 2 is there
5243  // ppc port uses 0 but we definitely need to allow for fixed_slots
5244  // which folds in the space used for monitors
5245  return_addr(STACK - 2 +
5246              align_up((Compile::current()->in_preserve_stack_slots() +
5247                        Compile::current()->fixed_slots()),
5248                       stack_alignment_in_slots()));
5249
5250  // Body of function which returns an integer array locating
5251  // arguments either in registers or in stack slots.  Passed an array
5252  // of ideal registers called "sig" and a "length" count.  Stack-slot
5253  // offsets are based on outgoing arguments, i.e. a CALLER setting up
5254  // arguments for a CALLEE.  Incoming stack arguments are
5255  // automatically biased by the preserve_stack_slots field above.
5256
5257  calling_convention
5258  %{
5259    // No difference between ingoing/outgoing just pass false
5260    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5261  %}
5262
5263  c_calling_convention
5264  %{
5265    // This is obviously always outgoing
5266    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5267  %}
5268
5269  // Location of compiled Java return values.  Same as C for now.
5270  return_value
5271  %{
5272    // TODO do we allow ideal_reg == Op_RegN???
5273    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5274           "only return normal values");
5275
5276    static const int lo[Op_RegL + 1] = { // enum name
5277      0,                                 // Op_Node
5278      0,                                 // Op_Set
5279      R0_num,                            // Op_RegN
5280      R0_num,                            // Op_RegI
5281      R0_num,                            // Op_RegP
5282      V0_num,                            // Op_RegF
5283      V0_num,                            // Op_RegD
5284      R0_num                             // Op_RegL
5285    };
5286
5287    static const int hi[Op_RegL + 1] = { // enum name
5288      0,                                 // Op_Node
5289      0,                                 // Op_Set
5290      OptoReg::Bad,                       // Op_RegN
5291      OptoReg::Bad,                      // Op_RegI
5292      R0_H_num,                          // Op_RegP
5293      OptoReg::Bad,                      // Op_RegF
5294      V0_H_num,                          // Op_RegD
5295      R0_H_num                           // Op_RegL
5296    };
5297
5298    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5299  %}
5300%}
5301
5302//----------ATTRIBUTES---------------------------------------------------------
5303//----------Operand Attributes-------------------------------------------------
5304op_attrib op_cost(1);        // Required cost attribute
5305
5306//----------Instruction Attributes---------------------------------------------
5307ins_attrib ins_cost(INSN_COST); // Required cost attribute
5308ins_attrib ins_size(32);        // Required size attribute (in bits)
5309ins_attrib ins_short_branch(0); // Required flag: is this instruction
5310                                // a non-matching short branch variant
5311                                // of some long branch?
5312ins_attrib ins_alignment(4);    // Required alignment attribute (must
5313                                // be a power of 2) specifies the
5314                                // alignment that some part of the
5315                                // instruction (not necessarily the
5316                                // start) requires.  If > 1, a
5317                                // compute_padding() function must be
5318                                // provided for the instruction
5319
5320//----------OPERANDS-----------------------------------------------------------
5321// Operand definitions must precede instruction definitions for correct parsing
5322// in the ADLC because operands constitute user defined types which are used in
5323// instruction definitions.
5324
5325//----------Simple Operands----------------------------------------------------
5326
5327// Integer operands 32 bit
5328// 32 bit immediate
5329operand immI()
5330%{
5331  match(ConI);
5332
5333  op_cost(0);
5334  format %{ %}
5335  interface(CONST_INTER);
5336%}
5337
5338// 32 bit zero
5339operand immI0()
5340%{
5341  predicate(n->get_int() == 0);
5342  match(ConI);
5343
5344  op_cost(0);
5345  format %{ %}
5346  interface(CONST_INTER);
5347%}
5348
5349// 32 bit unit increment
5350operand immI_1()
5351%{
5352  predicate(n->get_int() == 1);
5353  match(ConI);
5354
5355  op_cost(0);
5356  format %{ %}
5357  interface(CONST_INTER);
5358%}
5359
5360// 32 bit unit decrement
5361operand immI_M1()
5362%{
5363  predicate(n->get_int() == -1);
5364  match(ConI);
5365
5366  op_cost(0);
5367  format %{ %}
5368  interface(CONST_INTER);
5369%}
5370
5371// Shift values for add/sub extension shift
5372operand immIExt()
5373%{
5374  predicate(0 <= n->get_int() && (n->get_int() <= 4));
5375  match(ConI);
5376
5377  op_cost(0);
5378  format %{ %}
5379  interface(CONST_INTER);
5380%}
5381
5382operand immI_le_4()
5383%{
5384  predicate(n->get_int() <= 4);
5385  match(ConI);
5386
5387  op_cost(0);
5388  format %{ %}
5389  interface(CONST_INTER);
5390%}
5391
5392operand immI_31()
5393%{
5394  predicate(n->get_int() == 31);
5395  match(ConI);
5396
5397  op_cost(0);
5398  format %{ %}
5399  interface(CONST_INTER);
5400%}
5401
5402operand immI_8()
5403%{
5404  predicate(n->get_int() == 8);
5405  match(ConI);
5406
5407  op_cost(0);
5408  format %{ %}
5409  interface(CONST_INTER);
5410%}
5411
5412operand immI_16()
5413%{
5414  predicate(n->get_int() == 16);
5415  match(ConI);
5416
5417  op_cost(0);
5418  format %{ %}
5419  interface(CONST_INTER);
5420%}
5421
5422operand immI_24()
5423%{
5424  predicate(n->get_int() == 24);
5425  match(ConI);
5426
5427  op_cost(0);
5428  format %{ %}
5429  interface(CONST_INTER);
5430%}
5431
5432operand immI_32()
5433%{
5434  predicate(n->get_int() == 32);
5435  match(ConI);
5436
5437  op_cost(0);
5438  format %{ %}
5439  interface(CONST_INTER);
5440%}
5441
5442operand immI_48()
5443%{
5444  predicate(n->get_int() == 48);
5445  match(ConI);
5446
5447  op_cost(0);
5448  format %{ %}
5449  interface(CONST_INTER);
5450%}
5451
5452operand immI_56()
5453%{
5454  predicate(n->get_int() == 56);
5455  match(ConI);
5456
5457  op_cost(0);
5458  format %{ %}
5459  interface(CONST_INTER);
5460%}
5461
5462operand immI_63()
5463%{
5464  predicate(n->get_int() == 63);
5465  match(ConI);
5466
5467  op_cost(0);
5468  format %{ %}
5469  interface(CONST_INTER);
5470%}
5471
5472operand immI_64()
5473%{
5474  predicate(n->get_int() == 64);
5475  match(ConI);
5476
5477  op_cost(0);
5478  format %{ %}
5479  interface(CONST_INTER);
5480%}
5481
5482operand immI_255()
5483%{
5484  predicate(n->get_int() == 255);
5485  match(ConI);
5486
5487  op_cost(0);
5488  format %{ %}
5489  interface(CONST_INTER);
5490%}
5491
5492operand immI_65535()
5493%{
5494  predicate(n->get_int() == 65535);
5495  match(ConI);
5496
5497  op_cost(0);
5498  format %{ %}
5499  interface(CONST_INTER);
5500%}
5501
5502operand immL_255()
5503%{
5504  predicate(n->get_long() == 255L);
5505  match(ConL);
5506
5507  op_cost(0);
5508  format %{ %}
5509  interface(CONST_INTER);
5510%}
5511
5512operand immL_65535()
5513%{
5514  predicate(n->get_long() == 65535L);
5515  match(ConL);
5516
5517  op_cost(0);
5518  format %{ %}
5519  interface(CONST_INTER);
5520%}
5521
5522operand immL_4294967295()
5523%{
5524  predicate(n->get_long() == 4294967295L);
5525  match(ConL);
5526
5527  op_cost(0);
5528  format %{ %}
5529  interface(CONST_INTER);
5530%}
5531
5532operand immL_bitmask()
5533%{
5534  predicate(((n->get_long() & 0xc000000000000000l) == 0)
5535            && is_power_of_2(n->get_long() + 1));
5536  match(ConL);
5537
5538  op_cost(0);
5539  format %{ %}
5540  interface(CONST_INTER);
5541%}
5542
5543operand immI_bitmask()
5544%{
5545  predicate(((n->get_int() & 0xc0000000) == 0)
5546            && is_power_of_2(n->get_int() + 1));
5547  match(ConI);
5548
5549  op_cost(0);
5550  format %{ %}
5551  interface(CONST_INTER);
5552%}
5553
5554// Scale values for scaled offset addressing modes (up to long but not quad)
5555operand immIScale()
5556%{
5557  predicate(0 <= n->get_int() && (n->get_int() <= 3));
5558  match(ConI);
5559
5560  op_cost(0);
5561  format %{ %}
5562  interface(CONST_INTER);
5563%}
5564
5565// 26 bit signed offset -- for pc-relative branches
5566operand immI26()
5567%{
5568  predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5569  match(ConI);
5570
5571  op_cost(0);
5572  format %{ %}
5573  interface(CONST_INTER);
5574%}
5575
5576// 19 bit signed offset -- for pc-relative loads
5577operand immI19()
5578%{
5579  predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5580  match(ConI);
5581
5582  op_cost(0);
5583  format %{ %}
5584  interface(CONST_INTER);
5585%}
5586
5587// 12 bit unsigned offset -- for base plus immediate loads
5588operand immIU12()
5589%{
5590  predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5591  match(ConI);
5592
5593  op_cost(0);
5594  format %{ %}
5595  interface(CONST_INTER);
5596%}
5597
5598operand immLU12()
5599%{
5600  predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5601  match(ConL);
5602
5603  op_cost(0);
5604  format %{ %}
5605  interface(CONST_INTER);
5606%}
5607
5608// Offset for scaled or unscaled immediate loads and stores
5609operand immIOffset()
5610%{
5611  predicate(Address::offset_ok_for_immed(n->get_int()));
5612  match(ConI);
5613
5614  op_cost(0);
5615  format %{ %}
5616  interface(CONST_INTER);
5617%}
5618
5619operand immIOffset4()
5620%{
5621  predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5622  match(ConI);
5623
5624  op_cost(0);
5625  format %{ %}
5626  interface(CONST_INTER);
5627%}
5628
5629operand immIOffset8()
5630%{
5631  predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5632  match(ConI);
5633
5634  op_cost(0);
5635  format %{ %}
5636  interface(CONST_INTER);
5637%}
5638
5639operand immIOffset16()
5640%{
5641  predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5642  match(ConI);
5643
5644  op_cost(0);
5645  format %{ %}
5646  interface(CONST_INTER);
5647%}
5648
5649operand immLoffset()
5650%{
5651  predicate(Address::offset_ok_for_immed(n->get_long()));
5652  match(ConL);
5653
5654  op_cost(0);
5655  format %{ %}
5656  interface(CONST_INTER);
5657%}
5658
5659operand immLoffset4()
5660%{
5661  predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5662  match(ConL);
5663
5664  op_cost(0);
5665  format %{ %}
5666  interface(CONST_INTER);
5667%}
5668
5669operand immLoffset8()
5670%{
5671  predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5672  match(ConL);
5673
5674  op_cost(0);
5675  format %{ %}
5676  interface(CONST_INTER);
5677%}
5678
5679operand immLoffset16()
5680%{
5681  predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5682  match(ConL);
5683
5684  op_cost(0);
5685  format %{ %}
5686  interface(CONST_INTER);
5687%}
5688
5689// 32 bit integer valid for add sub immediate
5690operand immIAddSub()
5691%{
5692  predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5693  match(ConI);
5694  op_cost(0);
5695  format %{ %}
5696  interface(CONST_INTER);
5697%}
5698
5699// 32 bit unsigned integer valid for logical immediate
5700// TODO -- check this is right when e.g the mask is 0x80000000
5701operand immILog()
5702%{
5703  predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5704  match(ConI);
5705
5706  op_cost(0);
5707  format %{ %}
5708  interface(CONST_INTER);
5709%}
5710
5711// Integer operands 64 bit
5712// 64 bit immediate
5713operand immL()
5714%{
5715  match(ConL);
5716
5717  op_cost(0);
5718  format %{ %}
5719  interface(CONST_INTER);
5720%}
5721
5722// 64 bit zero
5723operand immL0()
5724%{
5725  predicate(n->get_long() == 0);
5726  match(ConL);
5727
5728  op_cost(0);
5729  format %{ %}
5730  interface(CONST_INTER);
5731%}
5732
5733// 64 bit unit increment
5734operand immL_1()
5735%{
5736  predicate(n->get_long() == 1);
5737  match(ConL);
5738
5739  op_cost(0);
5740  format %{ %}
5741  interface(CONST_INTER);
5742%}
5743
5744// 64 bit unit decrement
5745operand immL_M1()
5746%{
5747  predicate(n->get_long() == -1);
5748  match(ConL);
5749
5750  op_cost(0);
5751  format %{ %}
5752  interface(CONST_INTER);
5753%}
5754
5755// 32 bit offset of pc in thread anchor
5756
5757operand immL_pc_off()
5758%{
5759  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5760                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5761  match(ConL);
5762
5763  op_cost(0);
5764  format %{ %}
5765  interface(CONST_INTER);
5766%}
5767
5768// 64 bit integer valid for add sub immediate
5769operand immLAddSub()
5770%{
5771  predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5772  match(ConL);
5773  op_cost(0);
5774  format %{ %}
5775  interface(CONST_INTER);
5776%}
5777
5778// 64 bit integer valid for logical immediate
5779operand immLLog()
5780%{
5781  predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5782  match(ConL);
5783  op_cost(0);
5784  format %{ %}
5785  interface(CONST_INTER);
5786%}
5787
5788// Long Immediate: low 32-bit mask
5789operand immL_32bits()
5790%{
5791  predicate(n->get_long() == 0xFFFFFFFFL);
5792  match(ConL);
5793  op_cost(0);
5794  format %{ %}
5795  interface(CONST_INTER);
5796%}
5797
5798// Pointer operands
5799// Pointer Immediate
5800operand immP()
5801%{
5802  match(ConP);
5803
5804  op_cost(0);
5805  format %{ %}
5806  interface(CONST_INTER);
5807%}
5808
5809// NULL Pointer Immediate
5810operand immP0()
5811%{
5812  predicate(n->get_ptr() == 0);
5813  match(ConP);
5814
5815  op_cost(0);
5816  format %{ %}
5817  interface(CONST_INTER);
5818%}
5819
5820// Pointer Immediate One
5821// this is used in object initialization (initial object header)
5822operand immP_1()
5823%{
5824  predicate(n->get_ptr() == 1);
5825  match(ConP);
5826
5827  op_cost(0);
5828  format %{ %}
5829  interface(CONST_INTER);
5830%}
5831
5832// Polling Page Pointer Immediate
5833operand immPollPage()
5834%{
5835  predicate((address)n->get_ptr() == os::get_polling_page());
5836  match(ConP);
5837
5838  op_cost(0);
5839  format %{ %}
5840  interface(CONST_INTER);
5841%}
5842
5843// Card Table Byte Map Base
5844operand immByteMapBase()
5845%{
5846  // Get base of card map
5847  predicate((jbyte*)n->get_ptr() ==
5848        ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5849  match(ConP);
5850
5851  op_cost(0);
5852  format %{ %}
5853  interface(CONST_INTER);
5854%}
5855
5856// Pointer Immediate Minus One
5857// this is used when we want to write the current PC to the thread anchor
5858operand immP_M1()
5859%{
5860  predicate(n->get_ptr() == -1);
5861  match(ConP);
5862
5863  op_cost(0);
5864  format %{ %}
5865  interface(CONST_INTER);
5866%}
5867
5868// Pointer Immediate Minus Two
5869// this is used when we want to write the current PC to the thread anchor
5870operand immP_M2()
5871%{
5872  predicate(n->get_ptr() == -2);
5873  match(ConP);
5874
5875  op_cost(0);
5876  format %{ %}
5877  interface(CONST_INTER);
5878%}
5879
5880// Float and Double operands
5881// Double Immediate
5882operand immD()
5883%{
5884  match(ConD);
5885  op_cost(0);
5886  format %{ %}
5887  interface(CONST_INTER);
5888%}
5889
5890// Double Immediate: +0.0d
5891operand immD0()
5892%{
5893  predicate(jlong_cast(n->getd()) == 0);
5894  match(ConD);
5895
5896  op_cost(0);
5897  format %{ %}
5898  interface(CONST_INTER);
5899%}
5900
5901// constant 'double +0.0'.
5902operand immDPacked()
5903%{
5904  predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5905  match(ConD);
5906  op_cost(0);
5907  format %{ %}
5908  interface(CONST_INTER);
5909%}
5910
5911// Float Immediate
5912operand immF()
5913%{
5914  match(ConF);
5915  op_cost(0);
5916  format %{ %}
5917  interface(CONST_INTER);
5918%}
5919
5920// Float Immediate: +0.0f.
5921operand immF0()
5922%{
5923  predicate(jint_cast(n->getf()) == 0);
5924  match(ConF);
5925
5926  op_cost(0);
5927  format %{ %}
5928  interface(CONST_INTER);
5929%}
5930
5931//
5932operand immFPacked()
5933%{
5934  predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5935  match(ConF);
5936  op_cost(0);
5937  format %{ %}
5938  interface(CONST_INTER);
5939%}
5940
5941// Narrow pointer operands
5942// Narrow Pointer Immediate
5943operand immN()
5944%{
5945  match(ConN);
5946
5947  op_cost(0);
5948  format %{ %}
5949  interface(CONST_INTER);
5950%}
5951
5952// Narrow NULL Pointer Immediate
5953operand immN0()
5954%{
5955  predicate(n->get_narrowcon() == 0);
5956  match(ConN);
5957
5958  op_cost(0);
5959  format %{ %}
5960  interface(CONST_INTER);
5961%}
5962
5963operand immNKlass()
5964%{
5965  match(ConNKlass);
5966
5967  op_cost(0);
5968  format %{ %}
5969  interface(CONST_INTER);
5970%}
5971
5972// Integer 32 bit Register Operands
5973// Integer 32 bitRegister (excludes SP)
5974operand iRegI()
5975%{
5976  constraint(ALLOC_IN_RC(any_reg32));
5977  match(RegI);
5978  match(iRegINoSp);
5979  op_cost(0);
5980  format %{ %}
5981  interface(REG_INTER);
5982%}
5983
5984// Integer 32 bit Register not Special
5985operand iRegINoSp()
5986%{
5987  constraint(ALLOC_IN_RC(no_special_reg32));
5988  match(RegI);
5989  op_cost(0);
5990  format %{ %}
5991  interface(REG_INTER);
5992%}
5993
5994// Integer 64 bit Register Operands
5995// Integer 64 bit Register (includes SP)
5996operand iRegL()
5997%{
5998  constraint(ALLOC_IN_RC(any_reg));
5999  match(RegL);
6000  match(iRegLNoSp);
6001  op_cost(0);
6002  format %{ %}
6003  interface(REG_INTER);
6004%}
6005
6006// Integer 64 bit Register not Special
6007operand iRegLNoSp()
6008%{
6009  constraint(ALLOC_IN_RC(no_special_reg));
6010  match(RegL);
6011  match(iRegL_R0);
6012  format %{ %}
6013  interface(REG_INTER);
6014%}
6015
6016// Pointer Register Operands
6017// Pointer Register
6018operand iRegP()
6019%{
6020  constraint(ALLOC_IN_RC(ptr_reg));
6021  match(RegP);
6022  match(iRegPNoSp);
6023  match(iRegP_R0);
6024  //match(iRegP_R2);
6025  //match(iRegP_R4);
6026  //match(iRegP_R5);
6027  match(thread_RegP);
6028  op_cost(0);
6029  format %{ %}
6030  interface(REG_INTER);
6031%}
6032
6033// Pointer 64 bit Register not Special
6034operand iRegPNoSp()
6035%{
6036  constraint(ALLOC_IN_RC(no_special_ptr_reg));
6037  match(RegP);
6038  // match(iRegP);
6039  // match(iRegP_R0);
6040  // match(iRegP_R2);
6041  // match(iRegP_R4);
6042  // match(iRegP_R5);
6043  // match(thread_RegP);
6044  op_cost(0);
6045  format %{ %}
6046  interface(REG_INTER);
6047%}
6048
6049// Pointer 64 bit Register R0 only
6050operand iRegP_R0()
6051%{
6052  constraint(ALLOC_IN_RC(r0_reg));
6053  match(RegP);
6054  // match(iRegP);
6055  match(iRegPNoSp);
6056  op_cost(0);
6057  format %{ %}
6058  interface(REG_INTER);
6059%}
6060
6061// Pointer 64 bit Register R1 only
6062operand iRegP_R1()
6063%{
6064  constraint(ALLOC_IN_RC(r1_reg));
6065  match(RegP);
6066  // match(iRegP);
6067  match(iRegPNoSp);
6068  op_cost(0);
6069  format %{ %}
6070  interface(REG_INTER);
6071%}
6072
6073// Pointer 64 bit Register R2 only
6074operand iRegP_R2()
6075%{
6076  constraint(ALLOC_IN_RC(r2_reg));
6077  match(RegP);
6078  // match(iRegP);
6079  match(iRegPNoSp);
6080  op_cost(0);
6081  format %{ %}
6082  interface(REG_INTER);
6083%}
6084
6085// Pointer 64 bit Register R3 only
6086operand iRegP_R3()
6087%{
6088  constraint(ALLOC_IN_RC(r3_reg));
6089  match(RegP);
6090  // match(iRegP);
6091  match(iRegPNoSp);
6092  op_cost(0);
6093  format %{ %}
6094  interface(REG_INTER);
6095%}
6096
6097// Pointer 64 bit Register R4 only
6098operand iRegP_R4()
6099%{
6100  constraint(ALLOC_IN_RC(r4_reg));
6101  match(RegP);
6102  // match(iRegP);
6103  match(iRegPNoSp);
6104  op_cost(0);
6105  format %{ %}
6106  interface(REG_INTER);
6107%}
6108
6109// Pointer 64 bit Register R5 only
6110operand iRegP_R5()
6111%{
6112  constraint(ALLOC_IN_RC(r5_reg));
6113  match(RegP);
6114  // match(iRegP);
6115  match(iRegPNoSp);
6116  op_cost(0);
6117  format %{ %}
6118  interface(REG_INTER);
6119%}
6120
6121// Pointer 64 bit Register R10 only
6122operand iRegP_R10()
6123%{
6124  constraint(ALLOC_IN_RC(r10_reg));
6125  match(RegP);
6126  // match(iRegP);
6127  match(iRegPNoSp);
6128  op_cost(0);
6129  format %{ %}
6130  interface(REG_INTER);
6131%}
6132
6133// Long 64 bit Register R0 only
6134operand iRegL_R0()
6135%{
6136  constraint(ALLOC_IN_RC(r0_reg));
6137  match(RegL);
6138  match(iRegLNoSp);
6139  op_cost(0);
6140  format %{ %}
6141  interface(REG_INTER);
6142%}
6143
6144// Long 64 bit Register R2 only
6145operand iRegL_R2()
6146%{
6147  constraint(ALLOC_IN_RC(r2_reg));
6148  match(RegL);
6149  match(iRegLNoSp);
6150  op_cost(0);
6151  format %{ %}
6152  interface(REG_INTER);
6153%}
6154
6155// Long 64 bit Register R3 only
6156operand iRegL_R3()
6157%{
6158  constraint(ALLOC_IN_RC(r3_reg));
6159  match(RegL);
6160  match(iRegLNoSp);
6161  op_cost(0);
6162  format %{ %}
6163  interface(REG_INTER);
6164%}
6165
6166// Long 64 bit Register R11 only
6167operand iRegL_R11()
6168%{
6169  constraint(ALLOC_IN_RC(r11_reg));
6170  match(RegL);
6171  match(iRegLNoSp);
6172  op_cost(0);
6173  format %{ %}
6174  interface(REG_INTER);
6175%}
6176
6177// Pointer 64 bit Register FP only
6178operand iRegP_FP()
6179%{
6180  constraint(ALLOC_IN_RC(fp_reg));
6181  match(RegP);
6182  // match(iRegP);
6183  op_cost(0);
6184  format %{ %}
6185  interface(REG_INTER);
6186%}
6187
6188// Register R0 only
6189operand iRegI_R0()
6190%{
6191  constraint(ALLOC_IN_RC(int_r0_reg));
6192  match(RegI);
6193  match(iRegINoSp);
6194  op_cost(0);
6195  format %{ %}
6196  interface(REG_INTER);
6197%}
6198
6199// Register R2 only
6200operand iRegI_R2()
6201%{
6202  constraint(ALLOC_IN_RC(int_r2_reg));
6203  match(RegI);
6204  match(iRegINoSp);
6205  op_cost(0);
6206  format %{ %}
6207  interface(REG_INTER);
6208%}
6209
6210// Register R3 only
6211operand iRegI_R3()
6212%{
6213  constraint(ALLOC_IN_RC(int_r3_reg));
6214  match(RegI);
6215  match(iRegINoSp);
6216  op_cost(0);
6217  format %{ %}
6218  interface(REG_INTER);
6219%}
6220
6221
6222// Register R4 only
6223operand iRegI_R4()
6224%{
6225  constraint(ALLOC_IN_RC(int_r4_reg));
6226  match(RegI);
6227  match(iRegINoSp);
6228  op_cost(0);
6229  format %{ %}
6230  interface(REG_INTER);
6231%}
6232
6233
6234// Pointer Register Operands
6235// Narrow Pointer Register
6236operand iRegN()
6237%{
6238  constraint(ALLOC_IN_RC(any_reg32));
6239  match(RegN);
6240  match(iRegNNoSp);
6241  op_cost(0);
6242  format %{ %}
6243  interface(REG_INTER);
6244%}
6245
6246operand iRegN_R0()
6247%{
6248  constraint(ALLOC_IN_RC(r0_reg));
6249  match(iRegN);
6250  op_cost(0);
6251  format %{ %}
6252  interface(REG_INTER);
6253%}
6254
6255operand iRegN_R2()
6256%{
6257  constraint(ALLOC_IN_RC(r2_reg));
6258  match(iRegN);
6259  op_cost(0);
6260  format %{ %}
6261  interface(REG_INTER);
6262%}
6263
6264operand iRegN_R3()
6265%{
6266  constraint(ALLOC_IN_RC(r3_reg));
6267  match(iRegN);
6268  op_cost(0);
6269  format %{ %}
6270  interface(REG_INTER);
6271%}
6272
6273// Integer 64 bit Register not Special
6274operand iRegNNoSp()
6275%{
6276  constraint(ALLOC_IN_RC(no_special_reg32));
6277  match(RegN);
6278  op_cost(0);
6279  format %{ %}
6280  interface(REG_INTER);
6281%}
6282
6283// heap base register -- used for encoding immN0
6284
6285operand iRegIHeapbase()
6286%{
6287  constraint(ALLOC_IN_RC(heapbase_reg));
6288  match(RegI);
6289  op_cost(0);
6290  format %{ %}
6291  interface(REG_INTER);
6292%}
6293
6294// Float Register
6295// Float register operands
6296operand vRegF()
6297%{
6298  constraint(ALLOC_IN_RC(float_reg));
6299  match(RegF);
6300
6301  op_cost(0);
6302  format %{ %}
6303  interface(REG_INTER);
6304%}
6305
6306// Double Register
6307// Double register operands
6308operand vRegD()
6309%{
6310  constraint(ALLOC_IN_RC(double_reg));
6311  match(RegD);
6312
6313  op_cost(0);
6314  format %{ %}
6315  interface(REG_INTER);
6316%}
6317
6318operand vecD()
6319%{
6320  constraint(ALLOC_IN_RC(vectord_reg));
6321  match(VecD);
6322
6323  op_cost(0);
6324  format %{ %}
6325  interface(REG_INTER);
6326%}
6327
6328operand vecX()
6329%{
6330  constraint(ALLOC_IN_RC(vectorx_reg));
6331  match(VecX);
6332
6333  op_cost(0);
6334  format %{ %}
6335  interface(REG_INTER);
6336%}
6337
6338operand vRegD_V0()
6339%{
6340  constraint(ALLOC_IN_RC(v0_reg));
6341  match(RegD);
6342  op_cost(0);
6343  format %{ %}
6344  interface(REG_INTER);
6345%}
6346
6347operand vRegD_V1()
6348%{
6349  constraint(ALLOC_IN_RC(v1_reg));
6350  match(RegD);
6351  op_cost(0);
6352  format %{ %}
6353  interface(REG_INTER);
6354%}
6355
6356operand vRegD_V2()
6357%{
6358  constraint(ALLOC_IN_RC(v2_reg));
6359  match(RegD);
6360  op_cost(0);
6361  format %{ %}
6362  interface(REG_INTER);
6363%}
6364
6365operand vRegD_V3()
6366%{
6367  constraint(ALLOC_IN_RC(v3_reg));
6368  match(RegD);
6369  op_cost(0);
6370  format %{ %}
6371  interface(REG_INTER);
6372%}
6373
6374// Flags register, used as output of signed compare instructions
6375
6376// note that on AArch64 we also use this register as the output for
6377// for floating point compare instructions (CmpF CmpD). this ensures
6378// that ordered inequality tests use GT, GE, LT or LE none of which
6379// pass through cases where the result is unordered i.e. one or both
6380// inputs to the compare is a NaN. this means that the ideal code can
6381// replace e.g. a GT with an LE and not end up capturing the NaN case
6382// (where the comparison should always fail). EQ and NE tests are
6383// always generated in ideal code so that unordered folds into the NE
6384// case, matching the behaviour of AArch64 NE.
6385//
6386// This differs from x86 where the outputs of FP compares use a
6387// special FP flags registers and where compares based on this
6388// register are distinguished into ordered inequalities (cmpOpUCF) and
6389// EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6390// to explicitly handle the unordered case in branches. x86 also has
6391// to include extra CMoveX rules to accept a cmpOpUCF input.
6392
6393operand rFlagsReg()
6394%{
6395  constraint(ALLOC_IN_RC(int_flags));
6396  match(RegFlags);
6397
6398  op_cost(0);
6399  format %{ "RFLAGS" %}
6400  interface(REG_INTER);
6401%}
6402
6403// Flags register, used as output of unsigned compare instructions
6404operand rFlagsRegU()
6405%{
6406  constraint(ALLOC_IN_RC(int_flags));
6407  match(RegFlags);
6408
6409  op_cost(0);
6410  format %{ "RFLAGSU" %}
6411  interface(REG_INTER);
6412%}
6413
6414// Special Registers
6415
6416// Method Register
6417operand inline_cache_RegP(iRegP reg)
6418%{
6419  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6420  match(reg);
6421  match(iRegPNoSp);
6422  op_cost(0);
6423  format %{ %}
6424  interface(REG_INTER);
6425%}
6426
6427operand interpreter_method_oop_RegP(iRegP reg)
6428%{
6429  constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6430  match(reg);
6431  match(iRegPNoSp);
6432  op_cost(0);
6433  format %{ %}
6434  interface(REG_INTER);
6435%}
6436
6437// Thread Register
6438operand thread_RegP(iRegP reg)
6439%{
6440  constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6441  match(reg);
6442  op_cost(0);
6443  format %{ %}
6444  interface(REG_INTER);
6445%}
6446
6447operand lr_RegP(iRegP reg)
6448%{
6449  constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6450  match(reg);
6451  op_cost(0);
6452  format %{ %}
6453  interface(REG_INTER);
6454%}
6455
6456//----------Memory Operands----------------------------------------------------
6457
6458operand indirect(iRegP reg)
6459%{
6460  constraint(ALLOC_IN_RC(ptr_reg));
6461  match(reg);
6462  op_cost(0);
6463  format %{ "[$reg]" %}
6464  interface(MEMORY_INTER) %{
6465    base($reg);
6466    index(0xffffffff);
6467    scale(0x0);
6468    disp(0x0);
6469  %}
6470%}
6471
6472operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6473%{
6474  constraint(ALLOC_IN_RC(ptr_reg));
6475  predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6476  match(AddP reg (LShiftL (ConvI2L ireg) scale));
6477  op_cost(0);
6478  format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6479  interface(MEMORY_INTER) %{
6480    base($reg);
6481    index($ireg);
6482    scale($scale);
6483    disp(0x0);
6484  %}
6485%}
6486
6487operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6488%{
6489  constraint(ALLOC_IN_RC(ptr_reg));
6490  predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6491  match(AddP reg (LShiftL lreg scale));
6492  op_cost(0);
6493  format %{ "$reg, $lreg lsl($scale)" %}
6494  interface(MEMORY_INTER) %{
6495    base($reg);
6496    index($lreg);
6497    scale($scale);
6498    disp(0x0);
6499  %}
6500%}
6501
6502operand indIndexI2L(iRegP reg, iRegI ireg)
6503%{
6504  constraint(ALLOC_IN_RC(ptr_reg));
6505  match(AddP reg (ConvI2L ireg));
6506  op_cost(0);
6507  format %{ "$reg, $ireg, 0, I2L" %}
6508  interface(MEMORY_INTER) %{
6509    base($reg);
6510    index($ireg);
6511    scale(0x0);
6512    disp(0x0);
6513  %}
6514%}
6515
6516operand indIndex(iRegP reg, iRegL lreg)
6517%{
6518  constraint(ALLOC_IN_RC(ptr_reg));
6519  match(AddP reg lreg);
6520  op_cost(0);
6521  format %{ "$reg, $lreg" %}
6522  interface(MEMORY_INTER) %{
6523    base($reg);
6524    index($lreg);
6525    scale(0x0);
6526    disp(0x0);
6527  %}
6528%}
6529
6530operand indOffI(iRegP reg, immIOffset off)
6531%{
6532  constraint(ALLOC_IN_RC(ptr_reg));
6533  match(AddP reg off);
6534  op_cost(0);
6535  format %{ "[$reg, $off]" %}
6536  interface(MEMORY_INTER) %{
6537    base($reg);
6538    index(0xffffffff);
6539    scale(0x0);
6540    disp($off);
6541  %}
6542%}
6543
6544operand indOffI4(iRegP reg, immIOffset4 off)
6545%{
6546  constraint(ALLOC_IN_RC(ptr_reg));
6547  match(AddP reg off);
6548  op_cost(0);
6549  format %{ "[$reg, $off]" %}
6550  interface(MEMORY_INTER) %{
6551    base($reg);
6552    index(0xffffffff);
6553    scale(0x0);
6554    disp($off);
6555  %}
6556%}
6557
6558operand indOffI8(iRegP reg, immIOffset8 off)
6559%{
6560  constraint(ALLOC_IN_RC(ptr_reg));
6561  match(AddP reg off);
6562  op_cost(0);
6563  format %{ "[$reg, $off]" %}
6564  interface(MEMORY_INTER) %{
6565    base($reg);
6566    index(0xffffffff);
6567    scale(0x0);
6568    disp($off);
6569  %}
6570%}
6571
6572operand indOffI16(iRegP reg, immIOffset16 off)
6573%{
6574  constraint(ALLOC_IN_RC(ptr_reg));
6575  match(AddP reg off);
6576  op_cost(0);
6577  format %{ "[$reg, $off]" %}
6578  interface(MEMORY_INTER) %{
6579    base($reg);
6580    index(0xffffffff);
6581    scale(0x0);
6582    disp($off);
6583  %}
6584%}
6585
6586operand indOffL(iRegP reg, immLoffset off)
6587%{
6588  constraint(ALLOC_IN_RC(ptr_reg));
6589  match(AddP reg off);
6590  op_cost(0);
6591  format %{ "[$reg, $off]" %}
6592  interface(MEMORY_INTER) %{
6593    base($reg);
6594    index(0xffffffff);
6595    scale(0x0);
6596    disp($off);
6597  %}
6598%}
6599
6600operand indOffL4(iRegP reg, immLoffset4 off)
6601%{
6602  constraint(ALLOC_IN_RC(ptr_reg));
6603  match(AddP reg off);
6604  op_cost(0);
6605  format %{ "[$reg, $off]" %}
6606  interface(MEMORY_INTER) %{
6607    base($reg);
6608    index(0xffffffff);
6609    scale(0x0);
6610    disp($off);
6611  %}
6612%}
6613
6614operand indOffL8(iRegP reg, immLoffset8 off)
6615%{
6616  constraint(ALLOC_IN_RC(ptr_reg));
6617  match(AddP reg off);
6618  op_cost(0);
6619  format %{ "[$reg, $off]" %}
6620  interface(MEMORY_INTER) %{
6621    base($reg);
6622    index(0xffffffff);
6623    scale(0x0);
6624    disp($off);
6625  %}
6626%}
6627
6628operand indOffL16(iRegP reg, immLoffset16 off)
6629%{
6630  constraint(ALLOC_IN_RC(ptr_reg));
6631  match(AddP reg off);
6632  op_cost(0);
6633  format %{ "[$reg, $off]" %}
6634  interface(MEMORY_INTER) %{
6635    base($reg);
6636    index(0xffffffff);
6637    scale(0x0);
6638    disp($off);
6639  %}
6640%}
6641
6642operand indirectN(iRegN reg)
6643%{
6644  predicate(Universe::narrow_oop_shift() == 0);
6645  constraint(ALLOC_IN_RC(ptr_reg));
6646  match(DecodeN reg);
6647  op_cost(0);
6648  format %{ "[$reg]\t# narrow" %}
6649  interface(MEMORY_INTER) %{
6650    base($reg);
6651    index(0xffffffff);
6652    scale(0x0);
6653    disp(0x0);
6654  %}
6655%}
6656
6657operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6658%{
6659  predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6660  constraint(ALLOC_IN_RC(ptr_reg));
6661  match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6662  op_cost(0);
6663  format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6664  interface(MEMORY_INTER) %{
6665    base($reg);
6666    index($ireg);
6667    scale($scale);
6668    disp(0x0);
6669  %}
6670%}
6671
6672operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6673%{
6674  predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6675  constraint(ALLOC_IN_RC(ptr_reg));
6676  match(AddP (DecodeN reg) (LShiftL lreg scale));
6677  op_cost(0);
6678  format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6679  interface(MEMORY_INTER) %{
6680    base($reg);
6681    index($lreg);
6682    scale($scale);
6683    disp(0x0);
6684  %}
6685%}
6686
6687operand indIndexI2LN(iRegN reg, iRegI ireg)
6688%{
6689  predicate(Universe::narrow_oop_shift() == 0);
6690  constraint(ALLOC_IN_RC(ptr_reg));
6691  match(AddP (DecodeN reg) (ConvI2L ireg));
6692  op_cost(0);
6693  format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6694  interface(MEMORY_INTER) %{
6695    base($reg);
6696    index($ireg);
6697    scale(0x0);
6698    disp(0x0);
6699  %}
6700%}
6701
6702operand indIndexN(iRegN reg, iRegL lreg)
6703%{
6704  predicate(Universe::narrow_oop_shift() == 0);
6705  constraint(ALLOC_IN_RC(ptr_reg));
6706  match(AddP (DecodeN reg) lreg);
6707  op_cost(0);
6708  format %{ "$reg, $lreg\t# narrow" %}
6709  interface(MEMORY_INTER) %{
6710    base($reg);
6711    index($lreg);
6712    scale(0x0);
6713    disp(0x0);
6714  %}
6715%}
6716
6717operand indOffIN(iRegN reg, immIOffset off)
6718%{
6719  predicate(Universe::narrow_oop_shift() == 0);
6720  constraint(ALLOC_IN_RC(ptr_reg));
6721  match(AddP (DecodeN reg) off);
6722  op_cost(0);
6723  format %{ "[$reg, $off]\t# narrow" %}
6724  interface(MEMORY_INTER) %{
6725    base($reg);
6726    index(0xffffffff);
6727    scale(0x0);
6728    disp($off);
6729  %}
6730%}
6731
6732operand indOffLN(iRegN reg, immLoffset off)
6733%{
6734  predicate(Universe::narrow_oop_shift() == 0);
6735  constraint(ALLOC_IN_RC(ptr_reg));
6736  match(AddP (DecodeN reg) off);
6737  op_cost(0);
6738  format %{ "[$reg, $off]\t# narrow" %}
6739  interface(MEMORY_INTER) %{
6740    base($reg);
6741    index(0xffffffff);
6742    scale(0x0);
6743    disp($off);
6744  %}
6745%}
6746
6747
6748
6749// AArch64 opto stubs need to write to the pc slot in the thread anchor
6750operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6751%{
6752  constraint(ALLOC_IN_RC(ptr_reg));
6753  match(AddP reg off);
6754  op_cost(0);
6755  format %{ "[$reg, $off]" %}
6756  interface(MEMORY_INTER) %{
6757    base($reg);
6758    index(0xffffffff);
6759    scale(0x0);
6760    disp($off);
6761  %}
6762%}
6763
6764//----------Special Memory Operands--------------------------------------------
6765// Stack Slot Operand - This operand is used for loading and storing temporary
6766//                      values on the stack where a match requires a value to
6767//                      flow through memory.
6768operand stackSlotP(sRegP reg)
6769%{
6770  constraint(ALLOC_IN_RC(stack_slots));
6771  op_cost(100);
6772  // No match rule because this operand is only generated in matching
6773  // match(RegP);
6774  format %{ "[$reg]" %}
6775  interface(MEMORY_INTER) %{
6776    base(0x1e);  // RSP
6777    index(0x0);  // No Index
6778    scale(0x0);  // No Scale
6779    disp($reg);  // Stack Offset
6780  %}
6781%}
6782
6783operand stackSlotI(sRegI reg)
6784%{
6785  constraint(ALLOC_IN_RC(stack_slots));
6786  // No match rule because this operand is only generated in matching
6787  // match(RegI);
6788  format %{ "[$reg]" %}
6789  interface(MEMORY_INTER) %{
6790    base(0x1e);  // RSP
6791    index(0x0);  // No Index
6792    scale(0x0);  // No Scale
6793    disp($reg);  // Stack Offset
6794  %}
6795%}
6796
6797operand stackSlotF(sRegF reg)
6798%{
6799  constraint(ALLOC_IN_RC(stack_slots));
6800  // No match rule because this operand is only generated in matching
6801  // match(RegF);
6802  format %{ "[$reg]" %}
6803  interface(MEMORY_INTER) %{
6804    base(0x1e);  // RSP
6805    index(0x0);  // No Index
6806    scale(0x0);  // No Scale
6807    disp($reg);  // Stack Offset
6808  %}
6809%}
6810
6811operand stackSlotD(sRegD reg)
6812%{
6813  constraint(ALLOC_IN_RC(stack_slots));
6814  // No match rule because this operand is only generated in matching
6815  // match(RegD);
6816  format %{ "[$reg]" %}
6817  interface(MEMORY_INTER) %{
6818    base(0x1e);  // RSP
6819    index(0x0);  // No Index
6820    scale(0x0);  // No Scale
6821    disp($reg);  // Stack Offset
6822  %}
6823%}
6824
6825operand stackSlotL(sRegL reg)
6826%{
6827  constraint(ALLOC_IN_RC(stack_slots));
6828  // No match rule because this operand is only generated in matching
6829  // match(RegL);
6830  format %{ "[$reg]" %}
6831  interface(MEMORY_INTER) %{
6832    base(0x1e);  // RSP
6833    index(0x0);  // No Index
6834    scale(0x0);  // No Scale
6835    disp($reg);  // Stack Offset
6836  %}
6837%}
6838
6839// Operands for expressing Control Flow
6840// NOTE: Label is a predefined operand which should not be redefined in
6841//       the AD file. It is generically handled within the ADLC.
6842
6843//----------Conditional Branch Operands----------------------------------------
6844// Comparison Op  - This is the operation of the comparison, and is limited to
6845//                  the following set of codes:
6846//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6847//
6848// Other attributes of the comparison, such as unsignedness, are specified
6849// by the comparison instruction that sets a condition code flags register.
6850// That result is represented by a flags operand whose subtype is appropriate
6851// to the unsignedness (etc.) of the comparison.
6852//
6853// Later, the instruction which matches both the Comparison Op (a Bool) and
6854// the flags (produced by the Cmp) specifies the coding of the comparison op
6855// by matching a specific subtype of Bool operand below, such as cmpOpU.
6856
6857// used for signed integral comparisons and fp comparisons
6858
6859operand cmpOp()
6860%{
6861  match(Bool);
6862
6863  format %{ "" %}
6864  interface(COND_INTER) %{
6865    equal(0x0, "eq");
6866    not_equal(0x1, "ne");
6867    less(0xb, "lt");
6868    greater_equal(0xa, "ge");
6869    less_equal(0xd, "le");
6870    greater(0xc, "gt");
6871    overflow(0x6, "vs");
6872    no_overflow(0x7, "vc");
6873  %}
6874%}
6875
6876// used for unsigned integral comparisons
6877
6878operand cmpOpU()
6879%{
6880  match(Bool);
6881
6882  format %{ "" %}
6883  interface(COND_INTER) %{
6884    equal(0x0, "eq");
6885    not_equal(0x1, "ne");
6886    less(0x3, "lo");
6887    greater_equal(0x2, "hs");
6888    less_equal(0x9, "ls");
6889    greater(0x8, "hi");
6890    overflow(0x6, "vs");
6891    no_overflow(0x7, "vc");
6892  %}
6893%}
6894
6895// used for certain integral comparisons which can be
6896// converted to cbxx or tbxx instructions
6897
6898operand cmpOpEqNe()
6899%{
6900  match(Bool);
6901  match(CmpOp);
6902  op_cost(0);
6903  predicate(n->as_Bool()->_test._test == BoolTest::ne
6904            || n->as_Bool()->_test._test == BoolTest::eq);
6905
6906  format %{ "" %}
6907  interface(COND_INTER) %{
6908    equal(0x0, "eq");
6909    not_equal(0x1, "ne");
6910    less(0xb, "lt");
6911    greater_equal(0xa, "ge");
6912    less_equal(0xd, "le");
6913    greater(0xc, "gt");
6914    overflow(0x6, "vs");
6915    no_overflow(0x7, "vc");
6916  %}
6917%}
6918
6919// used for certain integral comparisons which can be
6920// converted to cbxx or tbxx instructions
6921
6922operand cmpOpLtGe()
6923%{
6924  match(Bool);
6925  match(CmpOp);
6926  op_cost(0);
6927
6928  predicate(n->as_Bool()->_test._test == BoolTest::lt
6929            || n->as_Bool()->_test._test == BoolTest::ge);
6930
6931  format %{ "" %}
6932  interface(COND_INTER) %{
6933    equal(0x0, "eq");
6934    not_equal(0x1, "ne");
6935    less(0xb, "lt");
6936    greater_equal(0xa, "ge");
6937    less_equal(0xd, "le");
6938    greater(0xc, "gt");
6939    overflow(0x6, "vs");
6940    no_overflow(0x7, "vc");
6941  %}
6942%}
6943
6944// used for certain unsigned integral comparisons which can be
6945// converted to cbxx or tbxx instructions
6946
6947operand cmpOpUEqNeLtGe()
6948%{
6949  match(Bool);
6950  match(CmpOp);
6951  op_cost(0);
6952
6953  predicate(n->as_Bool()->_test._test == BoolTest::eq
6954            || n->as_Bool()->_test._test == BoolTest::ne
6955            || n->as_Bool()->_test._test == BoolTest::lt
6956            || n->as_Bool()->_test._test == BoolTest::ge);
6957
6958  format %{ "" %}
6959  interface(COND_INTER) %{
6960    equal(0x0, "eq");
6961    not_equal(0x1, "ne");
6962    less(0xb, "lt");
6963    greater_equal(0xa, "ge");
6964    less_equal(0xd, "le");
6965    greater(0xc, "gt");
6966    overflow(0x6, "vs");
6967    no_overflow(0x7, "vc");
6968  %}
6969%}
6970
6971// Special operand allowing long args to int ops to be truncated for free
6972
6973operand iRegL2I(iRegL reg) %{
6974
6975  op_cost(0);
6976
6977  match(ConvL2I reg);
6978
6979  format %{ "l2i($reg)" %}
6980
6981  interface(REG_INTER)
6982%}
6983
6984opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6985opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6986opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6987
6988//----------OPERAND CLASSES----------------------------------------------------
6989// Operand Classes are groups of operands that are used as to simplify
6990// instruction definitions by not requiring the AD writer to specify
6991// separate instructions for every form of operand when the
6992// instruction accepts multiple operand types with the same basic
6993// encoding and format. The classic case of this is memory operands.
6994
6995// memory is used to define read/write location for load/store
6996// instruction defs. we can turn a memory op into an Address
6997
6998opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6999               indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
7000
7001// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
7002// operations. it allows the src to be either an iRegI or a (ConvL2I
7003// iRegL). in the latter case the l2i normally planted for a ConvL2I
7004// can be elided because the 32-bit instruction will just employ the
7005// lower 32 bits anyway.
7006//
7007// n.b. this does not elide all L2I conversions. if the truncated
7008// value is consumed by more than one operation then the ConvL2I
7009// cannot be bundled into the consuming nodes so an l2i gets planted
7010// (actually a movw $dst $src) and the downstream instructions consume
7011// the result of the l2i as an iRegI input. That's a shame since the
7012// movw is actually redundant but its not too costly.
7013
7014opclass iRegIorL2I(iRegI, iRegL2I);
7015
7016//----------PIPELINE-----------------------------------------------------------
7017// Rules which define the behavior of the target architectures pipeline.
7018
7019// For specific pipelines, eg A53, define the stages of that pipeline
7020//pipe_desc(ISS, EX1, EX2, WR);
7021#define ISS S0
7022#define EX1 S1
7023#define EX2 S2
7024#define WR  S3
7025
7026// Integer ALU reg operation
7027pipeline %{
7028
7029attributes %{
7030  // ARM instructions are of fixed length
7031  fixed_size_instructions;        // Fixed size instructions TODO does
7032  max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
7033  // ARM instructions come in 32-bit word units
7034  instruction_unit_size = 4;         // An instruction is 4 bytes long
7035  instruction_fetch_unit_size = 64;  // The processor fetches one line
7036  instruction_fetch_units = 1;       // of 64 bytes
7037
7038  // List of nop instructions
7039  nops( MachNop );
7040%}
7041
7042// We don't use an actual pipeline model so don't care about resources
7043// or description. we do use pipeline classes to introduce fixed
7044// latencies
7045
7046//----------RESOURCES----------------------------------------------------------
7047// Resources are the functional units available to the machine
7048
7049resources( INS0, INS1, INS01 = INS0 | INS1,
7050           ALU0, ALU1, ALU = ALU0 | ALU1,
7051           MAC,
7052           DIV,
7053           BRANCH,
7054           LDST,
7055           NEON_FP);
7056
7057//----------PIPELINE DESCRIPTION-----------------------------------------------
7058// Pipeline Description specifies the stages in the machine's pipeline
7059
7060// Define the pipeline as a generic 6 stage pipeline
7061pipe_desc(S0, S1, S2, S3, S4, S5);
7062
7063//----------PIPELINE CLASSES---------------------------------------------------
7064// Pipeline Classes describe the stages in which input and output are
7065// referenced by the hardware pipeline.
7066
7067pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
7068%{
7069  single_instruction;
7070  src1   : S1(read);
7071  src2   : S2(read);
7072  dst    : S5(write);
7073  INS01  : ISS;
7074  NEON_FP : S5;
7075%}
7076
7077pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
7078%{
7079  single_instruction;
7080  src1   : S1(read);
7081  src2   : S2(read);
7082  dst    : S5(write);
7083  INS01  : ISS;
7084  NEON_FP : S5;
7085%}
7086
7087pipe_class fp_uop_s(vRegF dst, vRegF src)
7088%{
7089  single_instruction;
7090  src    : S1(read);
7091  dst    : S5(write);
7092  INS01  : ISS;
7093  NEON_FP : S5;
7094%}
7095
7096pipe_class fp_uop_d(vRegD dst, vRegD src)
7097%{
7098  single_instruction;
7099  src    : S1(read);
7100  dst    : S5(write);
7101  INS01  : ISS;
7102  NEON_FP : S5;
7103%}
7104
7105pipe_class fp_d2f(vRegF dst, vRegD src)
7106%{
7107  single_instruction;
7108  src    : S1(read);
7109  dst    : S5(write);
7110  INS01  : ISS;
7111  NEON_FP : S5;
7112%}
7113
7114pipe_class fp_f2d(vRegD dst, vRegF src)
7115%{
7116  single_instruction;
7117  src    : S1(read);
7118  dst    : S5(write);
7119  INS01  : ISS;
7120  NEON_FP : S5;
7121%}
7122
7123pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7124%{
7125  single_instruction;
7126  src    : S1(read);
7127  dst    : S5(write);
7128  INS01  : ISS;
7129  NEON_FP : S5;
7130%}
7131
7132pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7133%{
7134  single_instruction;
7135  src    : S1(read);
7136  dst    : S5(write);
7137  INS01  : ISS;
7138  NEON_FP : S5;
7139%}
7140
7141pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7142%{
7143  single_instruction;
7144  src    : S1(read);
7145  dst    : S5(write);
7146  INS01  : ISS;
7147  NEON_FP : S5;
7148%}
7149
7150pipe_class fp_l2f(vRegF dst, iRegL src)
7151%{
7152  single_instruction;
7153  src    : S1(read);
7154  dst    : S5(write);
7155  INS01  : ISS;
7156  NEON_FP : S5;
7157%}
7158
7159pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7160%{
7161  single_instruction;
7162  src    : S1(read);
7163  dst    : S5(write);
7164  INS01  : ISS;
7165  NEON_FP : S5;
7166%}
7167
7168pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7169%{
7170  single_instruction;
7171  src    : S1(read);
7172  dst    : S5(write);
7173  INS01  : ISS;
7174  NEON_FP : S5;
7175%}
7176
7177pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7178%{
7179  single_instruction;
7180  src    : S1(read);
7181  dst    : S5(write);
7182  INS01  : ISS;
7183  NEON_FP : S5;
7184%}
7185
7186pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7187%{
7188  single_instruction;
7189  src    : S1(read);
7190  dst    : S5(write);
7191  INS01  : ISS;
7192  NEON_FP : S5;
7193%}
7194
7195pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7196%{
7197  single_instruction;
7198  src1   : S1(read);
7199  src2   : S2(read);
7200  dst    : S5(write);
7201  INS0   : ISS;
7202  NEON_FP : S5;
7203%}
7204
7205pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7206%{
7207  single_instruction;
7208  src1   : S1(read);
7209  src2   : S2(read);
7210  dst    : S5(write);
7211  INS0   : ISS;
7212  NEON_FP : S5;
7213%}
7214
7215pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7216%{
7217  single_instruction;
7218  cr     : S1(read);
7219  src1   : S1(read);
7220  src2   : S1(read);
7221  dst    : S3(write);
7222  INS01  : ISS;
7223  NEON_FP : S3;
7224%}
7225
7226pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7227%{
7228  single_instruction;
7229  cr     : S1(read);
7230  src1   : S1(read);
7231  src2   : S1(read);
7232  dst    : S3(write);
7233  INS01  : ISS;
7234  NEON_FP : S3;
7235%}
7236
7237pipe_class fp_imm_s(vRegF dst)
7238%{
7239  single_instruction;
7240  dst    : S3(write);
7241  INS01  : ISS;
7242  NEON_FP : S3;
7243%}
7244
7245pipe_class fp_imm_d(vRegD dst)
7246%{
7247  single_instruction;
7248  dst    : S3(write);
7249  INS01  : ISS;
7250  NEON_FP : S3;
7251%}
7252
7253pipe_class fp_load_constant_s(vRegF dst)
7254%{
7255  single_instruction;
7256  dst    : S4(write);
7257  INS01  : ISS;
7258  NEON_FP : S4;
7259%}
7260
7261pipe_class fp_load_constant_d(vRegD dst)
7262%{
7263  single_instruction;
7264  dst    : S4(write);
7265  INS01  : ISS;
7266  NEON_FP : S4;
7267%}
7268
7269pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7270%{
7271  single_instruction;
7272  dst    : S5(write);
7273  src1   : S1(read);
7274  src2   : S1(read);
7275  INS01  : ISS;
7276  NEON_FP : S5;
7277%}
7278
7279pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7280%{
7281  single_instruction;
7282  dst    : S5(write);
7283  src1   : S1(read);
7284  src2   : S1(read);
7285  INS0   : ISS;
7286  NEON_FP : S5;
7287%}
7288
7289pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7290%{
7291  single_instruction;
7292  dst    : S5(write);
7293  src1   : S1(read);
7294  src2   : S1(read);
7295  dst    : S1(read);
7296  INS01  : ISS;
7297  NEON_FP : S5;
7298%}
7299
7300pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7301%{
7302  single_instruction;
7303  dst    : S5(write);
7304  src1   : S1(read);
7305  src2   : S1(read);
7306  dst    : S1(read);
7307  INS0   : ISS;
7308  NEON_FP : S5;
7309%}
7310
7311pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7312%{
7313  single_instruction;
7314  dst    : S4(write);
7315  src1   : S2(read);
7316  src2   : S2(read);
7317  INS01  : ISS;
7318  NEON_FP : S4;
7319%}
7320
7321pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7322%{
7323  single_instruction;
7324  dst    : S4(write);
7325  src1   : S2(read);
7326  src2   : S2(read);
7327  INS0   : ISS;
7328  NEON_FP : S4;
7329%}
7330
7331pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7332%{
7333  single_instruction;
7334  dst    : S3(write);
7335  src1   : S2(read);
7336  src2   : S2(read);
7337  INS01  : ISS;
7338  NEON_FP : S3;
7339%}
7340
7341pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7342%{
7343  single_instruction;
7344  dst    : S3(write);
7345  src1   : S2(read);
7346  src2   : S2(read);
7347  INS0   : ISS;
7348  NEON_FP : S3;
7349%}
7350
7351pipe_class vshift64(vecD dst, vecD src, vecX shift)
7352%{
7353  single_instruction;
7354  dst    : S3(write);
7355  src    : S1(read);
7356  shift  : S1(read);
7357  INS01  : ISS;
7358  NEON_FP : S3;
7359%}
7360
7361pipe_class vshift128(vecX dst, vecX src, vecX shift)
7362%{
7363  single_instruction;
7364  dst    : S3(write);
7365  src    : S1(read);
7366  shift  : S1(read);
7367  INS0   : ISS;
7368  NEON_FP : S3;
7369%}
7370
7371pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7372%{
7373  single_instruction;
7374  dst    : S3(write);
7375  src    : S1(read);
7376  INS01  : ISS;
7377  NEON_FP : S3;
7378%}
7379
7380pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7381%{
7382  single_instruction;
7383  dst    : S3(write);
7384  src    : S1(read);
7385  INS0   : ISS;
7386  NEON_FP : S3;
7387%}
7388
7389pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7390%{
7391  single_instruction;
7392  dst    : S5(write);
7393  src1   : S1(read);
7394  src2   : S1(read);
7395  INS01  : ISS;
7396  NEON_FP : S5;
7397%}
7398
7399pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7400%{
7401  single_instruction;
7402  dst    : S5(write);
7403  src1   : S1(read);
7404  src2   : S1(read);
7405  INS0   : ISS;
7406  NEON_FP : S5;
7407%}
7408
7409pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7410%{
7411  single_instruction;
7412  dst    : S5(write);
7413  src1   : S1(read);
7414  src2   : S1(read);
7415  INS0   : ISS;
7416  NEON_FP : S5;
7417%}
7418
7419pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7420%{
7421  single_instruction;
7422  dst    : S5(write);
7423  src1   : S1(read);
7424  src2   : S1(read);
7425  INS0   : ISS;
7426  NEON_FP : S5;
7427%}
7428
7429pipe_class vsqrt_fp128(vecX dst, vecX src)
7430%{
7431  single_instruction;
7432  dst    : S5(write);
7433  src    : S1(read);
7434  INS0   : ISS;
7435  NEON_FP : S5;
7436%}
7437
7438pipe_class vunop_fp64(vecD dst, vecD src)
7439%{
7440  single_instruction;
7441  dst    : S5(write);
7442  src    : S1(read);
7443  INS01  : ISS;
7444  NEON_FP : S5;
7445%}
7446
7447pipe_class vunop_fp128(vecX dst, vecX src)
7448%{
7449  single_instruction;
7450  dst    : S5(write);
7451  src    : S1(read);
7452  INS0   : ISS;
7453  NEON_FP : S5;
7454%}
7455
7456pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7457%{
7458  single_instruction;
7459  dst    : S3(write);
7460  src    : S1(read);
7461  INS01  : ISS;
7462  NEON_FP : S3;
7463%}
7464
7465pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7466%{
7467  single_instruction;
7468  dst    : S3(write);
7469  src    : S1(read);
7470  INS01  : ISS;
7471  NEON_FP : S3;
7472%}
7473
7474pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7475%{
7476  single_instruction;
7477  dst    : S3(write);
7478  src    : S1(read);
7479  INS01  : ISS;
7480  NEON_FP : S3;
7481%}
7482
7483pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7484%{
7485  single_instruction;
7486  dst    : S3(write);
7487  src    : S1(read);
7488  INS01  : ISS;
7489  NEON_FP : S3;
7490%}
7491
7492pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7493%{
7494  single_instruction;
7495  dst    : S3(write);
7496  src    : S1(read);
7497  INS01  : ISS;
7498  NEON_FP : S3;
7499%}
7500
7501pipe_class vmovi_reg_imm64(vecD dst)
7502%{
7503  single_instruction;
7504  dst    : S3(write);
7505  INS01  : ISS;
7506  NEON_FP : S3;
7507%}
7508
7509pipe_class vmovi_reg_imm128(vecX dst)
7510%{
7511  single_instruction;
7512  dst    : S3(write);
7513  INS0   : ISS;
7514  NEON_FP : S3;
7515%}
7516
7517pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7518%{
7519  single_instruction;
7520  dst    : S5(write);
7521  mem    : ISS(read);
7522  INS01  : ISS;
7523  NEON_FP : S3;
7524%}
7525
7526pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7527%{
7528  single_instruction;
7529  dst    : S5(write);
7530  mem    : ISS(read);
7531  INS01  : ISS;
7532  NEON_FP : S3;
7533%}
7534
7535pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7536%{
7537  single_instruction;
7538  mem    : ISS(read);
7539  src    : S2(read);
7540  INS01  : ISS;
7541  NEON_FP : S3;
7542%}
7543
7544pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7545%{
7546  single_instruction;
7547  mem    : ISS(read);
7548  src    : S2(read);
7549  INS01  : ISS;
7550  NEON_FP : S3;
7551%}
7552
7553//------- Integer ALU operations --------------------------
7554
7555// Integer ALU reg-reg operation
7556// Operands needed in EX1, result generated in EX2
7557// Eg.  ADD     x0, x1, x2
7558pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7559%{
7560  single_instruction;
7561  dst    : EX2(write);
7562  src1   : EX1(read);
7563  src2   : EX1(read);
7564  INS01  : ISS; // Dual issue as instruction 0 or 1
7565  ALU    : EX2;
7566%}
7567
7568// Integer ALU reg-reg operation with constant shift
7569// Shifted register must be available in LATE_ISS instead of EX1
7570// Eg.  ADD     x0, x1, x2, LSL #2
7571pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7572%{
7573  single_instruction;
7574  dst    : EX2(write);
7575  src1   : EX1(read);
7576  src2   : ISS(read);
7577  INS01  : ISS;
7578  ALU    : EX2;
7579%}
7580
7581// Integer ALU reg operation with constant shift
7582// Eg.  LSL     x0, x1, #shift
7583pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7584%{
7585  single_instruction;
7586  dst    : EX2(write);
7587  src1   : ISS(read);
7588  INS01  : ISS;
7589  ALU    : EX2;
7590%}
7591
7592// Integer ALU reg-reg operation with variable shift
7593// Both operands must be available in LATE_ISS instead of EX1
7594// Result is available in EX1 instead of EX2
7595// Eg.  LSLV    x0, x1, x2
7596pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7597%{
7598  single_instruction;
7599  dst    : EX1(write);
7600  src1   : ISS(read);
7601  src2   : ISS(read);
7602  INS01  : ISS;
7603  ALU    : EX1;
7604%}
7605
7606// Integer ALU reg-reg operation with extract
7607// As for _vshift above, but result generated in EX2
7608// Eg.  EXTR    x0, x1, x2, #N
7609pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7610%{
7611  single_instruction;
7612  dst    : EX2(write);
7613  src1   : ISS(read);
7614  src2   : ISS(read);
7615  INS1   : ISS; // Can only dual issue as Instruction 1
7616  ALU    : EX1;
7617%}
7618
7619// Integer ALU reg operation
7620// Eg.  NEG     x0, x1
7621pipe_class ialu_reg(iRegI dst, iRegI src)
7622%{
7623  single_instruction;
7624  dst    : EX2(write);
7625  src    : EX1(read);
7626  INS01  : ISS;
7627  ALU    : EX2;
7628%}
7629
7630// Integer ALU reg mmediate operation
7631// Eg.  ADD     x0, x1, #N
7632pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7633%{
7634  single_instruction;
7635  dst    : EX2(write);
7636  src1   : EX1(read);
7637  INS01  : ISS;
7638  ALU    : EX2;
7639%}
7640
7641// Integer ALU immediate operation (no source operands)
7642// Eg.  MOV     x0, #N
7643pipe_class ialu_imm(iRegI dst)
7644%{
7645  single_instruction;
7646  dst    : EX1(write);
7647  INS01  : ISS;
7648  ALU    : EX1;
7649%}
7650
7651//------- Compare operation -------------------------------
7652
7653// Compare reg-reg
7654// Eg.  CMP     x0, x1
7655pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7656%{
7657  single_instruction;
7658//  fixed_latency(16);
7659  cr     : EX2(write);
7660  op1    : EX1(read);
7661  op2    : EX1(read);
7662  INS01  : ISS;
7663  ALU    : EX2;
7664%}
7665
7666// Compare reg-reg
7667// Eg.  CMP     x0, #N
7668pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7669%{
7670  single_instruction;
7671//  fixed_latency(16);
7672  cr     : EX2(write);
7673  op1    : EX1(read);
7674  INS01  : ISS;
7675  ALU    : EX2;
7676%}
7677
7678//------- Conditional instructions ------------------------
7679
7680// Conditional no operands
7681// Eg.  CSINC   x0, zr, zr, <cond>
7682pipe_class icond_none(iRegI dst, rFlagsReg cr)
7683%{
7684  single_instruction;
7685  cr     : EX1(read);
7686  dst    : EX2(write);
7687  INS01  : ISS;
7688  ALU    : EX2;
7689%}
7690
7691// Conditional 2 operand
7692// EG.  CSEL    X0, X1, X2, <cond>
7693pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7694%{
7695  single_instruction;
7696  cr     : EX1(read);
7697  src1   : EX1(read);
7698  src2   : EX1(read);
7699  dst    : EX2(write);
7700  INS01  : ISS;
7701  ALU    : EX2;
7702%}
7703
7704// Conditional 2 operand
7705// EG.  CSEL    X0, X1, X2, <cond>
7706pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7707%{
7708  single_instruction;
7709  cr     : EX1(read);
7710  src    : EX1(read);
7711  dst    : EX2(write);
7712  INS01  : ISS;
7713  ALU    : EX2;
7714%}
7715
7716//------- Multiply pipeline operations --------------------
7717
7718// Multiply reg-reg
7719// Eg.  MUL     w0, w1, w2
7720pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7721%{
7722  single_instruction;
7723  dst    : WR(write);
7724  src1   : ISS(read);
7725  src2   : ISS(read);
7726  INS01  : ISS;
7727  MAC    : WR;
7728%}
7729
7730// Multiply accumulate
7731// Eg.  MADD    w0, w1, w2, w3
7732pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7733%{
7734  single_instruction;
7735  dst    : WR(write);
7736  src1   : ISS(read);
7737  src2   : ISS(read);
7738  src3   : ISS(read);
7739  INS01  : ISS;
7740  MAC    : WR;
7741%}
7742
7743// Eg.  MUL     w0, w1, w2
7744pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7745%{
7746  single_instruction;
7747  fixed_latency(3); // Maximum latency for 64 bit mul
7748  dst    : WR(write);
7749  src1   : ISS(read);
7750  src2   : ISS(read);
7751  INS01  : ISS;
7752  MAC    : WR;
7753%}
7754
7755// Multiply accumulate
7756// Eg.  MADD    w0, w1, w2, w3
7757pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7758%{
7759  single_instruction;
7760  fixed_latency(3); // Maximum latency for 64 bit mul
7761  dst    : WR(write);
7762  src1   : ISS(read);
7763  src2   : ISS(read);
7764  src3   : ISS(read);
7765  INS01  : ISS;
7766  MAC    : WR;
7767%}
7768
7769//------- Divide pipeline operations --------------------
7770
7771// Eg.  SDIV    w0, w1, w2
7772pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7773%{
7774  single_instruction;
7775  fixed_latency(8); // Maximum latency for 32 bit divide
7776  dst    : WR(write);
7777  src1   : ISS(read);
7778  src2   : ISS(read);
7779  INS0   : ISS; // Can only dual issue as instruction 0
7780  DIV    : WR;
7781%}
7782
7783// Eg.  SDIV    x0, x1, x2
7784pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7785%{
7786  single_instruction;
7787  fixed_latency(16); // Maximum latency for 64 bit divide
7788  dst    : WR(write);
7789  src1   : ISS(read);
7790  src2   : ISS(read);
7791  INS0   : ISS; // Can only dual issue as instruction 0
7792  DIV    : WR;
7793%}
7794
7795//------- Load pipeline operations ------------------------
7796
7797// Load - prefetch
7798// Eg.  PFRM    <mem>
7799pipe_class iload_prefetch(memory mem)
7800%{
7801  single_instruction;
7802  mem    : ISS(read);
7803  INS01  : ISS;
7804  LDST   : WR;
7805%}
7806
7807// Load - reg, mem
7808// Eg.  LDR     x0, <mem>
7809pipe_class iload_reg_mem(iRegI dst, memory mem)
7810%{
7811  single_instruction;
7812  dst    : WR(write);
7813  mem    : ISS(read);
7814  INS01  : ISS;
7815  LDST   : WR;
7816%}
7817
7818// Load - reg, reg
7819// Eg.  LDR     x0, [sp, x1]
7820pipe_class iload_reg_reg(iRegI dst, iRegI src)
7821%{
7822  single_instruction;
7823  dst    : WR(write);
7824  src    : ISS(read);
7825  INS01  : ISS;
7826  LDST   : WR;
7827%}
7828
7829//------- Store pipeline operations -----------------------
7830
7831// Store - zr, mem
7832// Eg.  STR     zr, <mem>
7833pipe_class istore_mem(memory mem)
7834%{
7835  single_instruction;
7836  mem    : ISS(read);
7837  INS01  : ISS;
7838  LDST   : WR;
7839%}
7840
7841// Store - reg, mem
7842// Eg.  STR     x0, <mem>
7843pipe_class istore_reg_mem(iRegI src, memory mem)
7844%{
7845  single_instruction;
7846  mem    : ISS(read);
7847  src    : EX2(read);
7848  INS01  : ISS;
7849  LDST   : WR;
7850%}
7851
7852// Store - reg, reg
7853// Eg. STR      x0, [sp, x1]
7854pipe_class istore_reg_reg(iRegI dst, iRegI src)
7855%{
7856  single_instruction;
7857  dst    : ISS(read);
7858  src    : EX2(read);
7859  INS01  : ISS;
7860  LDST   : WR;
7861%}
7862
7863//------- Store pipeline operations -----------------------
7864
7865// Branch
7866pipe_class pipe_branch()
7867%{
7868  single_instruction;
7869  INS01  : ISS;
7870  BRANCH : EX1;
7871%}
7872
7873// Conditional branch
7874pipe_class pipe_branch_cond(rFlagsReg cr)
7875%{
7876  single_instruction;
7877  cr     : EX1(read);
7878  INS01  : ISS;
7879  BRANCH : EX1;
7880%}
7881
7882// Compare & Branch
7883// EG.  CBZ/CBNZ
7884pipe_class pipe_cmp_branch(iRegI op1)
7885%{
7886  single_instruction;
7887  op1    : EX1(read);
7888  INS01  : ISS;
7889  BRANCH : EX1;
7890%}
7891
7892//------- Synchronisation operations ----------------------
7893
7894// Any operation requiring serialization.
7895// EG.  DMB/Atomic Ops/Load Acquire/Str Release
7896pipe_class pipe_serial()
7897%{
7898  single_instruction;
7899  force_serialization;
7900  fixed_latency(16);
7901  INS01  : ISS(2); // Cannot dual issue with any other instruction
7902  LDST   : WR;
7903%}
7904
7905// Generic big/slow expanded idiom - also serialized
7906pipe_class pipe_slow()
7907%{
7908  instruction_count(10);
7909  multiple_bundles;
7910  force_serialization;
7911  fixed_latency(16);
7912  INS01  : ISS(2); // Cannot dual issue with any other instruction
7913  LDST   : WR;
7914%}
7915
7916// Empty pipeline class
7917pipe_class pipe_class_empty()
7918%{
7919  single_instruction;
7920  fixed_latency(0);
7921%}
7922
7923// Default pipeline class.
7924pipe_class pipe_class_default()
7925%{
7926  single_instruction;
7927  fixed_latency(2);
7928%}
7929
7930// Pipeline class for compares.
7931pipe_class pipe_class_compare()
7932%{
7933  single_instruction;
7934  fixed_latency(16);
7935%}
7936
7937// Pipeline class for memory operations.
7938pipe_class pipe_class_memory()
7939%{
7940  single_instruction;
7941  fixed_latency(16);
7942%}
7943
7944// Pipeline class for call.
7945pipe_class pipe_class_call()
7946%{
7947  single_instruction;
7948  fixed_latency(100);
7949%}
7950
7951// Define the class for the Nop node.
7952define %{
7953   MachNop = pipe_class_empty;
7954%}
7955
7956%}
7957//----------INSTRUCTIONS-------------------------------------------------------
7958//
7959// match      -- States which machine-independent subtree may be replaced
7960//               by this instruction.
7961// ins_cost   -- The estimated cost of this instruction is used by instruction
7962//               selection to identify a minimum cost tree of machine
7963//               instructions that matches a tree of machine-independent
7964//               instructions.
7965// format     -- A string providing the disassembly for this instruction.
7966//               The value of an instruction's operand may be inserted
7967//               by referring to it with a '$' prefix.
7968// opcode     -- Three instruction opcodes may be provided.  These are referred
7969//               to within an encode class as $primary, $secondary, and $tertiary
7970//               rrspectively.  The primary opcode is commonly used to
7971//               indicate the type of machine instruction, while secondary
7972//               and tertiary are often used for prefix options or addressing
7973//               modes.
7974// ins_encode -- A list of encode classes with parameters. The encode class
7975//               name must have been defined in an 'enc_class' specification
7976//               in the encode section of the architecture description.
7977
7978// ============================================================================
7979// Memory (Load/Store) Instructions
7980
7981// Load Instructions
7982
7983// Load Byte (8 bit signed)
7984instruct loadB(iRegINoSp dst, memory mem)
7985%{
7986  match(Set dst (LoadB mem));
7987  predicate(!needs_acquiring_load(n));
7988
7989  ins_cost(4 * INSN_COST);
7990  format %{ "ldrsbw  $dst, $mem\t# byte" %}
7991
7992  ins_encode(aarch64_enc_ldrsbw(dst, mem));
7993
7994  ins_pipe(iload_reg_mem);
7995%}
7996
7997// Load Byte (8 bit signed) into long
7998instruct loadB2L(iRegLNoSp dst, memory mem)
7999%{
8000  match(Set dst (ConvI2L (LoadB mem)));
8001  predicate(!needs_acquiring_load(n->in(1)));
8002
8003  ins_cost(4 * INSN_COST);
8004  format %{ "ldrsb  $dst, $mem\t# byte" %}
8005
8006  ins_encode(aarch64_enc_ldrsb(dst, mem));
8007
8008  ins_pipe(iload_reg_mem);
8009%}
8010
8011// Load Byte (8 bit unsigned)
8012instruct loadUB(iRegINoSp dst, memory mem)
8013%{
8014  match(Set dst (LoadUB mem));
8015  predicate(!needs_acquiring_load(n));
8016
8017  ins_cost(4 * INSN_COST);
8018  format %{ "ldrbw  $dst, $mem\t# byte" %}
8019
8020  ins_encode(aarch64_enc_ldrb(dst, mem));
8021
8022  ins_pipe(iload_reg_mem);
8023%}
8024
8025// Load Byte (8 bit unsigned) into long
8026instruct loadUB2L(iRegLNoSp dst, memory mem)
8027%{
8028  match(Set dst (ConvI2L (LoadUB mem)));
8029  predicate(!needs_acquiring_load(n->in(1)));
8030
8031  ins_cost(4 * INSN_COST);
8032  format %{ "ldrb  $dst, $mem\t# byte" %}
8033
8034  ins_encode(aarch64_enc_ldrb(dst, mem));
8035
8036  ins_pipe(iload_reg_mem);
8037%}
8038
8039// Load Short (16 bit signed)
8040instruct loadS(iRegINoSp dst, memory mem)
8041%{
8042  match(Set dst (LoadS mem));
8043  predicate(!needs_acquiring_load(n));
8044
8045  ins_cost(4 * INSN_COST);
8046  format %{ "ldrshw  $dst, $mem\t# short" %}
8047
8048  ins_encode(aarch64_enc_ldrshw(dst, mem));
8049
8050  ins_pipe(iload_reg_mem);
8051%}
8052
8053// Load Short (16 bit signed) into long
8054instruct loadS2L(iRegLNoSp dst, memory mem)
8055%{
8056  match(Set dst (ConvI2L (LoadS mem)));
8057  predicate(!needs_acquiring_load(n->in(1)));
8058
8059  ins_cost(4 * INSN_COST);
8060  format %{ "ldrsh  $dst, $mem\t# short" %}
8061
8062  ins_encode(aarch64_enc_ldrsh(dst, mem));
8063
8064  ins_pipe(iload_reg_mem);
8065%}
8066
8067// Load Char (16 bit unsigned)
8068instruct loadUS(iRegINoSp dst, memory mem)
8069%{
8070  match(Set dst (LoadUS mem));
8071  predicate(!needs_acquiring_load(n));
8072
8073  ins_cost(4 * INSN_COST);
8074  format %{ "ldrh  $dst, $mem\t# short" %}
8075
8076  ins_encode(aarch64_enc_ldrh(dst, mem));
8077
8078  ins_pipe(iload_reg_mem);
8079%}
8080
8081// Load Short/Char (16 bit unsigned) into long
8082instruct loadUS2L(iRegLNoSp dst, memory mem)
8083%{
8084  match(Set dst (ConvI2L (LoadUS mem)));
8085  predicate(!needs_acquiring_load(n->in(1)));
8086
8087  ins_cost(4 * INSN_COST);
8088  format %{ "ldrh  $dst, $mem\t# short" %}
8089
8090  ins_encode(aarch64_enc_ldrh(dst, mem));
8091
8092  ins_pipe(iload_reg_mem);
8093%}
8094
8095// Load Integer (32 bit signed)
8096instruct loadI(iRegINoSp dst, memory mem)
8097%{
8098  match(Set dst (LoadI mem));
8099  predicate(!needs_acquiring_load(n));
8100
8101  ins_cost(4 * INSN_COST);
8102  format %{ "ldrw  $dst, $mem\t# int" %}
8103
8104  ins_encode(aarch64_enc_ldrw(dst, mem));
8105
8106  ins_pipe(iload_reg_mem);
8107%}
8108
8109// Load Integer (32 bit signed) into long
8110instruct loadI2L(iRegLNoSp dst, memory mem)
8111%{
8112  match(Set dst (ConvI2L (LoadI mem)));
8113  predicate(!needs_acquiring_load(n->in(1)));
8114
8115  ins_cost(4 * INSN_COST);
8116  format %{ "ldrsw  $dst, $mem\t# int" %}
8117
8118  ins_encode(aarch64_enc_ldrsw(dst, mem));
8119
8120  ins_pipe(iload_reg_mem);
8121%}
8122
8123// Load Integer (32 bit unsigned) into long
8124instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8125%{
8126  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8127  predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8128
8129  ins_cost(4 * INSN_COST);
8130  format %{ "ldrw  $dst, $mem\t# int" %}
8131
8132  ins_encode(aarch64_enc_ldrw(dst, mem));
8133
8134  ins_pipe(iload_reg_mem);
8135%}
8136
8137// Load Long (64 bit signed)
8138instruct loadL(iRegLNoSp dst, memory mem)
8139%{
8140  match(Set dst (LoadL mem));
8141  predicate(!needs_acquiring_load(n));
8142
8143  ins_cost(4 * INSN_COST);
8144  format %{ "ldr  $dst, $mem\t# int" %}
8145
8146  ins_encode(aarch64_enc_ldr(dst, mem));
8147
8148  ins_pipe(iload_reg_mem);
8149%}
8150
8151// Load Range
8152instruct loadRange(iRegINoSp dst, memory mem)
8153%{
8154  match(Set dst (LoadRange mem));
8155
8156  ins_cost(4 * INSN_COST);
8157  format %{ "ldrw  $dst, $mem\t# range" %}
8158
8159  ins_encode(aarch64_enc_ldrw(dst, mem));
8160
8161  ins_pipe(iload_reg_mem);
8162%}
8163
8164// Load Pointer
8165instruct loadP(iRegPNoSp dst, memory mem)
8166%{
8167  match(Set dst (LoadP mem));
8168  predicate(!needs_acquiring_load(n));
8169
8170  ins_cost(4 * INSN_COST);
8171  format %{ "ldr  $dst, $mem\t# ptr" %}
8172
8173  ins_encode(aarch64_enc_ldr(dst, mem));
8174
8175  ins_pipe(iload_reg_mem);
8176%}
8177
8178// Load Compressed Pointer
8179instruct loadN(iRegNNoSp dst, memory mem)
8180%{
8181  match(Set dst (LoadN mem));
8182  predicate(!needs_acquiring_load(n));
8183
8184  ins_cost(4 * INSN_COST);
8185  format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8186
8187  ins_encode(aarch64_enc_ldrw(dst, mem));
8188
8189  ins_pipe(iload_reg_mem);
8190%}
8191
8192// Load Klass Pointer
8193instruct loadKlass(iRegPNoSp dst, memory mem)
8194%{
8195  match(Set dst (LoadKlass mem));
8196  predicate(!needs_acquiring_load(n));
8197
8198  ins_cost(4 * INSN_COST);
8199  format %{ "ldr  $dst, $mem\t# class" %}
8200
8201  ins_encode(aarch64_enc_ldr(dst, mem));
8202
8203  ins_pipe(iload_reg_mem);
8204%}
8205
8206// Load Narrow Klass Pointer
8207instruct loadNKlass(iRegNNoSp dst, memory mem)
8208%{
8209  match(Set dst (LoadNKlass mem));
8210  predicate(!needs_acquiring_load(n));
8211
8212  ins_cost(4 * INSN_COST);
8213  format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8214
8215  ins_encode(aarch64_enc_ldrw(dst, mem));
8216
8217  ins_pipe(iload_reg_mem);
8218%}
8219
8220// Load Float
8221instruct loadF(vRegF dst, memory mem)
8222%{
8223  match(Set dst (LoadF mem));
8224  predicate(!needs_acquiring_load(n));
8225
8226  ins_cost(4 * INSN_COST);
8227  format %{ "ldrs  $dst, $mem\t# float" %}
8228
8229  ins_encode( aarch64_enc_ldrs(dst, mem) );
8230
8231  ins_pipe(pipe_class_memory);
8232%}
8233
8234// Load Double
8235instruct loadD(vRegD dst, memory mem)
8236%{
8237  match(Set dst (LoadD mem));
8238  predicate(!needs_acquiring_load(n));
8239
8240  ins_cost(4 * INSN_COST);
8241  format %{ "ldrd  $dst, $mem\t# double" %}
8242
8243  ins_encode( aarch64_enc_ldrd(dst, mem) );
8244
8245  ins_pipe(pipe_class_memory);
8246%}
8247
8248
8249// Load Int Constant
8250instruct loadConI(iRegINoSp dst, immI src)
8251%{
8252  match(Set dst src);
8253
8254  ins_cost(INSN_COST);
8255  format %{ "mov $dst, $src\t# int" %}
8256
8257  ins_encode( aarch64_enc_movw_imm(dst, src) );
8258
8259  ins_pipe(ialu_imm);
8260%}
8261
8262// Load Long Constant
8263instruct loadConL(iRegLNoSp dst, immL src)
8264%{
8265  match(Set dst src);
8266
8267  ins_cost(INSN_COST);
8268  format %{ "mov $dst, $src\t# long" %}
8269
8270  ins_encode( aarch64_enc_mov_imm(dst, src) );
8271
8272  ins_pipe(ialu_imm);
8273%}
8274
8275// Load Pointer Constant
8276
8277instruct loadConP(iRegPNoSp dst, immP con)
8278%{
8279  match(Set dst con);
8280
8281  ins_cost(INSN_COST * 4);
8282  format %{
8283    "mov  $dst, $con\t# ptr\n\t"
8284  %}
8285
8286  ins_encode(aarch64_enc_mov_p(dst, con));
8287
8288  ins_pipe(ialu_imm);
8289%}
8290
8291// Load Null Pointer Constant
8292
8293instruct loadConP0(iRegPNoSp dst, immP0 con)
8294%{
8295  match(Set dst con);
8296
8297  ins_cost(INSN_COST);
8298  format %{ "mov  $dst, $con\t# NULL ptr" %}
8299
8300  ins_encode(aarch64_enc_mov_p0(dst, con));
8301
8302  ins_pipe(ialu_imm);
8303%}
8304
8305// Load Pointer Constant One
8306
8307instruct loadConP1(iRegPNoSp dst, immP_1 con)
8308%{
8309  match(Set dst con);
8310
8311  ins_cost(INSN_COST);
8312  format %{ "mov  $dst, $con\t# NULL ptr" %}
8313
8314  ins_encode(aarch64_enc_mov_p1(dst, con));
8315
8316  ins_pipe(ialu_imm);
8317%}
8318
8319// Load Poll Page Constant
8320
8321instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8322%{
8323  match(Set dst con);
8324
8325  ins_cost(INSN_COST);
8326  format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8327
8328  ins_encode(aarch64_enc_mov_poll_page(dst, con));
8329
8330  ins_pipe(ialu_imm);
8331%}
8332
8333// Load Byte Map Base Constant
8334
8335instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8336%{
8337  match(Set dst con);
8338
8339  ins_cost(INSN_COST);
8340  format %{ "adr  $dst, $con\t# Byte Map Base" %}
8341
8342  ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8343
8344  ins_pipe(ialu_imm);
8345%}
8346
8347// Load Narrow Pointer Constant
8348
8349instruct loadConN(iRegNNoSp dst, immN con)
8350%{
8351  match(Set dst con);
8352
8353  ins_cost(INSN_COST * 4);
8354  format %{ "mov  $dst, $con\t# compressed ptr" %}
8355
8356  ins_encode(aarch64_enc_mov_n(dst, con));
8357
8358  ins_pipe(ialu_imm);
8359%}
8360
8361// Load Narrow Null Pointer Constant
8362
8363instruct loadConN0(iRegNNoSp dst, immN0 con)
8364%{
8365  match(Set dst con);
8366
8367  ins_cost(INSN_COST);
8368  format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8369
8370  ins_encode(aarch64_enc_mov_n0(dst, con));
8371
8372  ins_pipe(ialu_imm);
8373%}
8374
8375// Load Narrow Klass Constant
8376
8377instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8378%{
8379  match(Set dst con);
8380
8381  ins_cost(INSN_COST);
8382  format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8383
8384  ins_encode(aarch64_enc_mov_nk(dst, con));
8385
8386  ins_pipe(ialu_imm);
8387%}
8388
8389// Load Packed Float Constant
8390
8391instruct loadConF_packed(vRegF dst, immFPacked con) %{
8392  match(Set dst con);
8393  ins_cost(INSN_COST * 4);
8394  format %{ "fmovs  $dst, $con"%}
8395  ins_encode %{
8396    __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8397  %}
8398
8399  ins_pipe(fp_imm_s);
8400%}
8401
8402// Load Float Constant
8403
8404instruct loadConF(vRegF dst, immF con) %{
8405  match(Set dst con);
8406
8407  ins_cost(INSN_COST * 4);
8408
8409  format %{
8410    "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8411  %}
8412
8413  ins_encode %{
8414    __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8415  %}
8416
8417  ins_pipe(fp_load_constant_s);
8418%}
8419
8420// Load Packed Double Constant
8421
8422instruct loadConD_packed(vRegD dst, immDPacked con) %{
8423  match(Set dst con);
8424  ins_cost(INSN_COST);
8425  format %{ "fmovd  $dst, $con"%}
8426  ins_encode %{
8427    __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8428  %}
8429
8430  ins_pipe(fp_imm_d);
8431%}
8432
8433// Load Double Constant
8434
8435instruct loadConD(vRegD dst, immD con) %{
8436  match(Set dst con);
8437
8438  ins_cost(INSN_COST * 5);
8439  format %{
8440    "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8441  %}
8442
8443  ins_encode %{
8444    __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8445  %}
8446
8447  ins_pipe(fp_load_constant_d);
8448%}
8449
8450// Store Instructions
8451
8452// Store CMS card-mark Immediate
8453instruct storeimmCM0(immI0 zero, memory mem)
8454%{
8455  match(Set mem (StoreCM mem zero));
8456  predicate(unnecessary_storestore(n));
8457
8458  ins_cost(INSN_COST);
8459  format %{ "strb zr, $mem\t# byte" %}
8460
8461  ins_encode(aarch64_enc_strb0(mem));
8462
8463  ins_pipe(istore_mem);
8464%}
8465
8466// Store CMS card-mark Immediate with intervening StoreStore
8467// needed when using CMS with no conditional card marking
8468instruct storeimmCM0_ordered(immI0 zero, memory mem)
8469%{
8470  match(Set mem (StoreCM mem zero));
8471
8472  ins_cost(INSN_COST * 2);
8473  format %{ "dmb ishst"
8474      "\n\tstrb zr, $mem\t# byte" %}
8475
8476  ins_encode(aarch64_enc_strb0_ordered(mem));
8477
8478  ins_pipe(istore_mem);
8479%}
8480
8481// Store Byte
8482instruct storeB(iRegIorL2I src, memory mem)
8483%{
8484  match(Set mem (StoreB mem src));
8485  predicate(!needs_releasing_store(n));
8486
8487  ins_cost(INSN_COST);
8488  format %{ "strb  $src, $mem\t# byte" %}
8489
8490  ins_encode(aarch64_enc_strb(src, mem));
8491
8492  ins_pipe(istore_reg_mem);
8493%}
8494
8495
8496instruct storeimmB0(immI0 zero, memory mem)
8497%{
8498  match(Set mem (StoreB mem zero));
8499  predicate(!needs_releasing_store(n));
8500
8501  ins_cost(INSN_COST);
8502  format %{ "strb rscractch2, $mem\t# byte" %}
8503
8504  ins_encode(aarch64_enc_strb0(mem));
8505
8506  ins_pipe(istore_mem);
8507%}
8508
8509// Store Char/Short
8510instruct storeC(iRegIorL2I src, memory mem)
8511%{
8512  match(Set mem (StoreC mem src));
8513  predicate(!needs_releasing_store(n));
8514
8515  ins_cost(INSN_COST);
8516  format %{ "strh  $src, $mem\t# short" %}
8517
8518  ins_encode(aarch64_enc_strh(src, mem));
8519
8520  ins_pipe(istore_reg_mem);
8521%}
8522
8523instruct storeimmC0(immI0 zero, memory mem)
8524%{
8525  match(Set mem (StoreC mem zero));
8526  predicate(!needs_releasing_store(n));
8527
8528  ins_cost(INSN_COST);
8529  format %{ "strh  zr, $mem\t# short" %}
8530
8531  ins_encode(aarch64_enc_strh0(mem));
8532
8533  ins_pipe(istore_mem);
8534%}
8535
8536// Store Integer
8537
8538instruct storeI(iRegIorL2I src, memory mem)
8539%{
8540  match(Set mem(StoreI mem src));
8541  predicate(!needs_releasing_store(n));
8542
8543  ins_cost(INSN_COST);
8544  format %{ "strw  $src, $mem\t# int" %}
8545
8546  ins_encode(aarch64_enc_strw(src, mem));
8547
8548  ins_pipe(istore_reg_mem);
8549%}
8550
8551instruct storeimmI0(immI0 zero, memory mem)
8552%{
8553  match(Set mem(StoreI mem zero));
8554  predicate(!needs_releasing_store(n));
8555
8556  ins_cost(INSN_COST);
8557  format %{ "strw  zr, $mem\t# int" %}
8558
8559  ins_encode(aarch64_enc_strw0(mem));
8560
8561  ins_pipe(istore_mem);
8562%}
8563
8564// Store Long (64 bit signed)
8565instruct storeL(iRegL src, memory mem)
8566%{
8567  match(Set mem (StoreL mem src));
8568  predicate(!needs_releasing_store(n));
8569
8570  ins_cost(INSN_COST);
8571  format %{ "str  $src, $mem\t# int" %}
8572
8573  ins_encode(aarch64_enc_str(src, mem));
8574
8575  ins_pipe(istore_reg_mem);
8576%}
8577
8578// Store Long (64 bit signed)
8579instruct storeimmL0(immL0 zero, memory mem)
8580%{
8581  match(Set mem (StoreL mem zero));
8582  predicate(!needs_releasing_store(n));
8583
8584  ins_cost(INSN_COST);
8585  format %{ "str  zr, $mem\t# int" %}
8586
8587  ins_encode(aarch64_enc_str0(mem));
8588
8589  ins_pipe(istore_mem);
8590%}
8591
8592// Store Pointer
8593instruct storeP(iRegP src, memory mem)
8594%{
8595  match(Set mem (StoreP mem src));
8596  predicate(!needs_releasing_store(n));
8597
8598  ins_cost(INSN_COST);
8599  format %{ "str  $src, $mem\t# ptr" %}
8600
8601  ins_encode(aarch64_enc_str(src, mem));
8602
8603  ins_pipe(istore_reg_mem);
8604%}
8605
8606// Store Pointer
8607instruct storeimmP0(immP0 zero, memory mem)
8608%{
8609  match(Set mem (StoreP mem zero));
8610  predicate(!needs_releasing_store(n));
8611
8612  ins_cost(INSN_COST);
8613  format %{ "str zr, $mem\t# ptr" %}
8614
8615  ins_encode(aarch64_enc_str0(mem));
8616
8617  ins_pipe(istore_mem);
8618%}
8619
8620// Store Compressed Pointer
8621instruct storeN(iRegN src, memory mem)
8622%{
8623  match(Set mem (StoreN mem src));
8624  predicate(!needs_releasing_store(n));
8625
8626  ins_cost(INSN_COST);
8627  format %{ "strw  $src, $mem\t# compressed ptr" %}
8628
8629  ins_encode(aarch64_enc_strw(src, mem));
8630
8631  ins_pipe(istore_reg_mem);
8632%}
8633
8634instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8635%{
8636  match(Set mem (StoreN mem zero));
8637  predicate(Universe::narrow_oop_base() == NULL &&
8638            Universe::narrow_klass_base() == NULL &&
8639            (!needs_releasing_store(n)));
8640
8641  ins_cost(INSN_COST);
8642  format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8643
8644  ins_encode(aarch64_enc_strw(heapbase, mem));
8645
8646  ins_pipe(istore_reg_mem);
8647%}
8648
8649// Store Float
8650instruct storeF(vRegF src, memory mem)
8651%{
8652  match(Set mem (StoreF mem src));
8653  predicate(!needs_releasing_store(n));
8654
8655  ins_cost(INSN_COST);
8656  format %{ "strs  $src, $mem\t# float" %}
8657
8658  ins_encode( aarch64_enc_strs(src, mem) );
8659
8660  ins_pipe(pipe_class_memory);
8661%}
8662
8663// TODO
8664// implement storeImmF0 and storeFImmPacked
8665
8666// Store Double
8667instruct storeD(vRegD src, memory mem)
8668%{
8669  match(Set mem (StoreD mem src));
8670  predicate(!needs_releasing_store(n));
8671
8672  ins_cost(INSN_COST);
8673  format %{ "strd  $src, $mem\t# double" %}
8674
8675  ins_encode( aarch64_enc_strd(src, mem) );
8676
8677  ins_pipe(pipe_class_memory);
8678%}
8679
8680// Store Compressed Klass Pointer
8681instruct storeNKlass(iRegN src, memory mem)
8682%{
8683  predicate(!needs_releasing_store(n));
8684  match(Set mem (StoreNKlass mem src));
8685
8686  ins_cost(INSN_COST);
8687  format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8688
8689  ins_encode(aarch64_enc_strw(src, mem));
8690
8691  ins_pipe(istore_reg_mem);
8692%}
8693
8694// TODO
8695// implement storeImmD0 and storeDImmPacked
8696
8697// prefetch instructions
8698// Must be safe to execute with invalid address (cannot fault).
8699
8700instruct prefetchalloc( memory mem ) %{
8701  match(PrefetchAllocation mem);
8702
8703  ins_cost(INSN_COST);
8704  format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8705
8706  ins_encode( aarch64_enc_prefetchw(mem) );
8707
8708  ins_pipe(iload_prefetch);
8709%}
8710
8711//  ---------------- volatile loads and stores ----------------
8712
8713// Load Byte (8 bit signed)
8714instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8715%{
8716  match(Set dst (LoadB mem));
8717
8718  ins_cost(VOLATILE_REF_COST);
8719  format %{ "ldarsb  $dst, $mem\t# byte" %}
8720
8721  ins_encode(aarch64_enc_ldarsb(dst, mem));
8722
8723  ins_pipe(pipe_serial);
8724%}
8725
8726// Load Byte (8 bit signed) into long
8727instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8728%{
8729  match(Set dst (ConvI2L (LoadB mem)));
8730
8731  ins_cost(VOLATILE_REF_COST);
8732  format %{ "ldarsb  $dst, $mem\t# byte" %}
8733
8734  ins_encode(aarch64_enc_ldarsb(dst, mem));
8735
8736  ins_pipe(pipe_serial);
8737%}
8738
8739// Load Byte (8 bit unsigned)
8740instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8741%{
8742  match(Set dst (LoadUB mem));
8743
8744  ins_cost(VOLATILE_REF_COST);
8745  format %{ "ldarb  $dst, $mem\t# byte" %}
8746
8747  ins_encode(aarch64_enc_ldarb(dst, mem));
8748
8749  ins_pipe(pipe_serial);
8750%}
8751
8752// Load Byte (8 bit unsigned) into long
8753instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8754%{
8755  match(Set dst (ConvI2L (LoadUB mem)));
8756
8757  ins_cost(VOLATILE_REF_COST);
8758  format %{ "ldarb  $dst, $mem\t# byte" %}
8759
8760  ins_encode(aarch64_enc_ldarb(dst, mem));
8761
8762  ins_pipe(pipe_serial);
8763%}
8764
8765// Load Short (16 bit signed)
8766instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8767%{
8768  match(Set dst (LoadS mem));
8769
8770  ins_cost(VOLATILE_REF_COST);
8771  format %{ "ldarshw  $dst, $mem\t# short" %}
8772
8773  ins_encode(aarch64_enc_ldarshw(dst, mem));
8774
8775  ins_pipe(pipe_serial);
8776%}
8777
8778instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8779%{
8780  match(Set dst (LoadUS mem));
8781
8782  ins_cost(VOLATILE_REF_COST);
8783  format %{ "ldarhw  $dst, $mem\t# short" %}
8784
8785  ins_encode(aarch64_enc_ldarhw(dst, mem));
8786
8787  ins_pipe(pipe_serial);
8788%}
8789
8790// Load Short/Char (16 bit unsigned) into long
8791instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8792%{
8793  match(Set dst (ConvI2L (LoadUS mem)));
8794
8795  ins_cost(VOLATILE_REF_COST);
8796  format %{ "ldarh  $dst, $mem\t# short" %}
8797
8798  ins_encode(aarch64_enc_ldarh(dst, mem));
8799
8800  ins_pipe(pipe_serial);
8801%}
8802
8803// Load Short/Char (16 bit signed) into long
8804instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8805%{
8806  match(Set dst (ConvI2L (LoadS mem)));
8807
8808  ins_cost(VOLATILE_REF_COST);
8809  format %{ "ldarh  $dst, $mem\t# short" %}
8810
8811  ins_encode(aarch64_enc_ldarsh(dst, mem));
8812
8813  ins_pipe(pipe_serial);
8814%}
8815
8816// Load Integer (32 bit signed)
8817instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8818%{
8819  match(Set dst (LoadI mem));
8820
8821  ins_cost(VOLATILE_REF_COST);
8822  format %{ "ldarw  $dst, $mem\t# int" %}
8823
8824  ins_encode(aarch64_enc_ldarw(dst, mem));
8825
8826  ins_pipe(pipe_serial);
8827%}
8828
8829// Load Integer (32 bit unsigned) into long
8830instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8831%{
8832  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8833
8834  ins_cost(VOLATILE_REF_COST);
8835  format %{ "ldarw  $dst, $mem\t# int" %}
8836
8837  ins_encode(aarch64_enc_ldarw(dst, mem));
8838
8839  ins_pipe(pipe_serial);
8840%}
8841
8842// Load Long (64 bit signed)
8843instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8844%{
8845  match(Set dst (LoadL mem));
8846
8847  ins_cost(VOLATILE_REF_COST);
8848  format %{ "ldar  $dst, $mem\t# int" %}
8849
8850  ins_encode(aarch64_enc_ldar(dst, mem));
8851
8852  ins_pipe(pipe_serial);
8853%}
8854
8855// Load Pointer
8856instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8857%{
8858  match(Set dst (LoadP mem));
8859
8860  ins_cost(VOLATILE_REF_COST);
8861  format %{ "ldar  $dst, $mem\t# ptr" %}
8862
8863  ins_encode(aarch64_enc_ldar(dst, mem));
8864
8865  ins_pipe(pipe_serial);
8866%}
8867
8868// Load Compressed Pointer
8869instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8870%{
8871  match(Set dst (LoadN mem));
8872
8873  ins_cost(VOLATILE_REF_COST);
8874  format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8875
8876  ins_encode(aarch64_enc_ldarw(dst, mem));
8877
8878  ins_pipe(pipe_serial);
8879%}
8880
8881// Load Float
8882instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8883%{
8884  match(Set dst (LoadF mem));
8885
8886  ins_cost(VOLATILE_REF_COST);
8887  format %{ "ldars  $dst, $mem\t# float" %}
8888
8889  ins_encode( aarch64_enc_fldars(dst, mem) );
8890
8891  ins_pipe(pipe_serial);
8892%}
8893
8894// Load Double
8895instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8896%{
8897  match(Set dst (LoadD mem));
8898
8899  ins_cost(VOLATILE_REF_COST);
8900  format %{ "ldard  $dst, $mem\t# double" %}
8901
8902  ins_encode( aarch64_enc_fldard(dst, mem) );
8903
8904  ins_pipe(pipe_serial);
8905%}
8906
8907// Store Byte
8908instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8909%{
8910  match(Set mem (StoreB mem src));
8911
8912  ins_cost(VOLATILE_REF_COST);
8913  format %{ "stlrb  $src, $mem\t# byte" %}
8914
8915  ins_encode(aarch64_enc_stlrb(src, mem));
8916
8917  ins_pipe(pipe_class_memory);
8918%}
8919
8920// Store Char/Short
8921instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8922%{
8923  match(Set mem (StoreC mem src));
8924
8925  ins_cost(VOLATILE_REF_COST);
8926  format %{ "stlrh  $src, $mem\t# short" %}
8927
8928  ins_encode(aarch64_enc_stlrh(src, mem));
8929
8930  ins_pipe(pipe_class_memory);
8931%}
8932
8933// Store Integer
8934
8935instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8936%{
8937  match(Set mem(StoreI mem src));
8938
8939  ins_cost(VOLATILE_REF_COST);
8940  format %{ "stlrw  $src, $mem\t# int" %}
8941
8942  ins_encode(aarch64_enc_stlrw(src, mem));
8943
8944  ins_pipe(pipe_class_memory);
8945%}
8946
8947// Store Long (64 bit signed)
8948instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8949%{
8950  match(Set mem (StoreL mem src));
8951
8952  ins_cost(VOLATILE_REF_COST);
8953  format %{ "stlr  $src, $mem\t# int" %}
8954
8955  ins_encode(aarch64_enc_stlr(src, mem));
8956
8957  ins_pipe(pipe_class_memory);
8958%}
8959
8960// Store Pointer
8961instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8962%{
8963  match(Set mem (StoreP mem src));
8964
8965  ins_cost(VOLATILE_REF_COST);
8966  format %{ "stlr  $src, $mem\t# ptr" %}
8967
8968  ins_encode(aarch64_enc_stlr(src, mem));
8969
8970  ins_pipe(pipe_class_memory);
8971%}
8972
8973// Store Compressed Pointer
8974instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8975%{
8976  match(Set mem (StoreN mem src));
8977
8978  ins_cost(VOLATILE_REF_COST);
8979  format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8980
8981  ins_encode(aarch64_enc_stlrw(src, mem));
8982
8983  ins_pipe(pipe_class_memory);
8984%}
8985
8986// Store Float
8987instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8988%{
8989  match(Set mem (StoreF mem src));
8990
8991  ins_cost(VOLATILE_REF_COST);
8992  format %{ "stlrs  $src, $mem\t# float" %}
8993
8994  ins_encode( aarch64_enc_fstlrs(src, mem) );
8995
8996  ins_pipe(pipe_class_memory);
8997%}
8998
8999// TODO
9000// implement storeImmF0 and storeFImmPacked
9001
9002// Store Double
9003instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
9004%{
9005  match(Set mem (StoreD mem src));
9006
9007  ins_cost(VOLATILE_REF_COST);
9008  format %{ "stlrd  $src, $mem\t# double" %}
9009
9010  ins_encode( aarch64_enc_fstlrd(src, mem) );
9011
9012  ins_pipe(pipe_class_memory);
9013%}
9014
9015//  ---------------- end of volatile loads and stores ----------------
9016
9017// ============================================================================
9018// BSWAP Instructions
9019
9020instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
9021  match(Set dst (ReverseBytesI src));
9022
9023  ins_cost(INSN_COST);
9024  format %{ "revw  $dst, $src" %}
9025
9026  ins_encode %{
9027    __ revw(as_Register($dst$$reg), as_Register($src$$reg));
9028  %}
9029
9030  ins_pipe(ialu_reg);
9031%}
9032
9033instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
9034  match(Set dst (ReverseBytesL src));
9035
9036  ins_cost(INSN_COST);
9037  format %{ "rev  $dst, $src" %}
9038
9039  ins_encode %{
9040    __ rev(as_Register($dst$$reg), as_Register($src$$reg));
9041  %}
9042
9043  ins_pipe(ialu_reg);
9044%}
9045
9046instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
9047  match(Set dst (ReverseBytesUS src));
9048
9049  ins_cost(INSN_COST);
9050  format %{ "rev16w  $dst, $src" %}
9051
9052  ins_encode %{
9053    __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9054  %}
9055
9056  ins_pipe(ialu_reg);
9057%}
9058
9059instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9060  match(Set dst (ReverseBytesS src));
9061
9062  ins_cost(INSN_COST);
9063  format %{ "rev16w  $dst, $src\n\t"
9064            "sbfmw $dst, $dst, #0, #15" %}
9065
9066  ins_encode %{
9067    __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9068    __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9069  %}
9070
9071  ins_pipe(ialu_reg);
9072%}
9073
9074// ============================================================================
9075// Zero Count Instructions
9076
9077instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9078  match(Set dst (CountLeadingZerosI src));
9079
9080  ins_cost(INSN_COST);
9081  format %{ "clzw  $dst, $src" %}
9082  ins_encode %{
9083    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9084  %}
9085
9086  ins_pipe(ialu_reg);
9087%}
9088
9089instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9090  match(Set dst (CountLeadingZerosL src));
9091
9092  ins_cost(INSN_COST);
9093  format %{ "clz   $dst, $src" %}
9094  ins_encode %{
9095    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9096  %}
9097
9098  ins_pipe(ialu_reg);
9099%}
9100
9101instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9102  match(Set dst (CountTrailingZerosI src));
9103
9104  ins_cost(INSN_COST * 2);
9105  format %{ "rbitw  $dst, $src\n\t"
9106            "clzw   $dst, $dst" %}
9107  ins_encode %{
9108    __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9109    __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9110  %}
9111
9112  ins_pipe(ialu_reg);
9113%}
9114
9115instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9116  match(Set dst (CountTrailingZerosL src));
9117
9118  ins_cost(INSN_COST * 2);
9119  format %{ "rbit   $dst, $src\n\t"
9120            "clz    $dst, $dst" %}
9121  ins_encode %{
9122    __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9123    __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9124  %}
9125
9126  ins_pipe(ialu_reg);
9127%}
9128
9129//---------- Population Count Instructions -------------------------------------
9130//
9131
9132instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9133  predicate(UsePopCountInstruction);
9134  match(Set dst (PopCountI src));
9135  effect(TEMP tmp);
9136  ins_cost(INSN_COST * 13);
9137
9138  format %{ "movw   $src, $src\n\t"
9139            "mov    $tmp, $src\t# vector (1D)\n\t"
9140            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9141            "addv   $tmp, $tmp\t# vector (8B)\n\t"
9142            "mov    $dst, $tmp\t# vector (1D)" %}
9143  ins_encode %{
9144    __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9145    __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9146    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9147    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9148    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9149  %}
9150
9151  ins_pipe(pipe_class_default);
9152%}
9153
9154instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9155  predicate(UsePopCountInstruction);
9156  match(Set dst (PopCountI (LoadI mem)));
9157  effect(TEMP tmp);
9158  ins_cost(INSN_COST * 13);
9159
9160  format %{ "ldrs   $tmp, $mem\n\t"
9161            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9162            "addv   $tmp, $tmp\t# vector (8B)\n\t"
9163            "mov    $dst, $tmp\t# vector (1D)" %}
9164  ins_encode %{
9165    FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9166    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9167               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9168    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9169    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9170    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9171  %}
9172
9173  ins_pipe(pipe_class_default);
9174%}
9175
9176// Note: Long.bitCount(long) returns an int.
9177instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9178  predicate(UsePopCountInstruction);
9179  match(Set dst (PopCountL src));
9180  effect(TEMP tmp);
9181  ins_cost(INSN_COST * 13);
9182
9183  format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9184            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9185            "addv   $tmp, $tmp\t# vector (8B)\n\t"
9186            "mov    $dst, $tmp\t# vector (1D)" %}
9187  ins_encode %{
9188    __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9189    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9190    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9191    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9192  %}
9193
9194  ins_pipe(pipe_class_default);
9195%}
9196
9197instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9198  predicate(UsePopCountInstruction);
9199  match(Set dst (PopCountL (LoadL mem)));
9200  effect(TEMP tmp);
9201  ins_cost(INSN_COST * 13);
9202
9203  format %{ "ldrd   $tmp, $mem\n\t"
9204            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9205            "addv   $tmp, $tmp\t# vector (8B)\n\t"
9206            "mov    $dst, $tmp\t# vector (1D)" %}
9207  ins_encode %{
9208    FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9209    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9210               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9211    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9212    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9213    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9214  %}
9215
9216  ins_pipe(pipe_class_default);
9217%}
9218
9219// ============================================================================
9220// MemBar Instruction
9221
9222instruct load_fence() %{
9223  match(LoadFence);
9224  ins_cost(VOLATILE_REF_COST);
9225
9226  format %{ "load_fence" %}
9227
9228  ins_encode %{
9229    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9230  %}
9231  ins_pipe(pipe_serial);
9232%}
9233
9234instruct unnecessary_membar_acquire() %{
9235  predicate(unnecessary_acquire(n));
9236  match(MemBarAcquire);
9237  ins_cost(0);
9238
9239  format %{ "membar_acquire (elided)" %}
9240
9241  ins_encode %{
9242    __ block_comment("membar_acquire (elided)");
9243  %}
9244
9245  ins_pipe(pipe_class_empty);
9246%}
9247
9248instruct membar_acquire() %{
9249  match(MemBarAcquire);
9250  ins_cost(VOLATILE_REF_COST);
9251
9252  format %{ "membar_acquire" %}
9253
9254  ins_encode %{
9255    __ block_comment("membar_acquire");
9256    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9257  %}
9258
9259  ins_pipe(pipe_serial);
9260%}
9261
9262
9263instruct membar_acquire_lock() %{
9264  match(MemBarAcquireLock);
9265  ins_cost(VOLATILE_REF_COST);
9266
9267  format %{ "membar_acquire_lock (elided)" %}
9268
9269  ins_encode %{
9270    __ block_comment("membar_acquire_lock (elided)");
9271  %}
9272
9273  ins_pipe(pipe_serial);
9274%}
9275
9276instruct store_fence() %{
9277  match(StoreFence);
9278  ins_cost(VOLATILE_REF_COST);
9279
9280  format %{ "store_fence" %}
9281
9282  ins_encode %{
9283    __ membar(Assembler::LoadStore|Assembler::StoreStore);
9284  %}
9285  ins_pipe(pipe_serial);
9286%}
9287
9288instruct unnecessary_membar_release() %{
9289  predicate(unnecessary_release(n));
9290  match(MemBarRelease);
9291  ins_cost(0);
9292
9293  format %{ "membar_release (elided)" %}
9294
9295  ins_encode %{
9296    __ block_comment("membar_release (elided)");
9297  %}
9298  ins_pipe(pipe_serial);
9299%}
9300
9301instruct membar_release() %{
9302  match(MemBarRelease);
9303  ins_cost(VOLATILE_REF_COST);
9304
9305  format %{ "membar_release" %}
9306
9307  ins_encode %{
9308    __ block_comment("membar_release");
9309    __ membar(Assembler::LoadStore|Assembler::StoreStore);
9310  %}
9311  ins_pipe(pipe_serial);
9312%}
9313
9314instruct membar_storestore() %{
9315  match(MemBarStoreStore);
9316  ins_cost(VOLATILE_REF_COST);
9317
9318  format %{ "MEMBAR-store-store" %}
9319
9320  ins_encode %{
9321    __ membar(Assembler::StoreStore);
9322  %}
9323  ins_pipe(pipe_serial);
9324%}
9325
9326instruct membar_release_lock() %{
9327  match(MemBarReleaseLock);
9328  ins_cost(VOLATILE_REF_COST);
9329
9330  format %{ "membar_release_lock (elided)" %}
9331
9332  ins_encode %{
9333    __ block_comment("membar_release_lock (elided)");
9334  %}
9335
9336  ins_pipe(pipe_serial);
9337%}
9338
9339instruct unnecessary_membar_volatile() %{
9340  predicate(unnecessary_volatile(n));
9341  match(MemBarVolatile);
9342  ins_cost(0);
9343
9344  format %{ "membar_volatile (elided)" %}
9345
9346  ins_encode %{
9347    __ block_comment("membar_volatile (elided)");
9348  %}
9349
9350  ins_pipe(pipe_serial);
9351%}
9352
9353instruct membar_volatile() %{
9354  match(MemBarVolatile);
9355  ins_cost(VOLATILE_REF_COST*100);
9356
9357  format %{ "membar_volatile" %}
9358
9359  ins_encode %{
9360    __ block_comment("membar_volatile");
9361    __ membar(Assembler::StoreLoad);
9362  %}
9363
9364  ins_pipe(pipe_serial);
9365%}
9366
9367// ============================================================================
9368// Cast/Convert Instructions
9369
9370instruct castX2P(iRegPNoSp dst, iRegL src) %{
9371  match(Set dst (CastX2P src));
9372
9373  ins_cost(INSN_COST);
9374  format %{ "mov $dst, $src\t# long -> ptr" %}
9375
9376  ins_encode %{
9377    if ($dst$$reg != $src$$reg) {
9378      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9379    }
9380  %}
9381
9382  ins_pipe(ialu_reg);
9383%}
9384
9385instruct castP2X(iRegLNoSp dst, iRegP src) %{
9386  match(Set dst (CastP2X src));
9387
9388  ins_cost(INSN_COST);
9389  format %{ "mov $dst, $src\t# ptr -> long" %}
9390
9391  ins_encode %{
9392    if ($dst$$reg != $src$$reg) {
9393      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9394    }
9395  %}
9396
9397  ins_pipe(ialu_reg);
9398%}
9399
9400// Convert oop into int for vectors alignment masking
9401instruct convP2I(iRegINoSp dst, iRegP src) %{
9402  match(Set dst (ConvL2I (CastP2X src)));
9403
9404  ins_cost(INSN_COST);
9405  format %{ "movw $dst, $src\t# ptr -> int" %}
9406  ins_encode %{
9407    __ movw($dst$$Register, $src$$Register);
9408  %}
9409
9410  ins_pipe(ialu_reg);
9411%}
9412
9413// Convert compressed oop into int for vectors alignment masking
9414// in case of 32bit oops (heap < 4Gb).
9415instruct convN2I(iRegINoSp dst, iRegN src)
9416%{
9417  predicate(Universe::narrow_oop_shift() == 0);
9418  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9419
9420  ins_cost(INSN_COST);
9421  format %{ "mov dst, $src\t# compressed ptr -> int" %}
9422  ins_encode %{
9423    __ movw($dst$$Register, $src$$Register);
9424  %}
9425
9426  ins_pipe(ialu_reg);
9427%}
9428
9429
9430// Convert oop pointer into compressed form
9431instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9432  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9433  match(Set dst (EncodeP src));
9434  effect(KILL cr);
9435  ins_cost(INSN_COST * 3);
9436  format %{ "encode_heap_oop $dst, $src" %}
9437  ins_encode %{
9438    Register s = $src$$Register;
9439    Register d = $dst$$Register;
9440    __ encode_heap_oop(d, s);
9441  %}
9442  ins_pipe(ialu_reg);
9443%}
9444
9445instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9446  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9447  match(Set dst (EncodeP src));
9448  ins_cost(INSN_COST * 3);
9449  format %{ "encode_heap_oop_not_null $dst, $src" %}
9450  ins_encode %{
9451    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9452  %}
9453  ins_pipe(ialu_reg);
9454%}
9455
9456instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9457  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9458            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9459  match(Set dst (DecodeN src));
9460  ins_cost(INSN_COST * 3);
9461  format %{ "decode_heap_oop $dst, $src" %}
9462  ins_encode %{
9463    Register s = $src$$Register;
9464    Register d = $dst$$Register;
9465    __ decode_heap_oop(d, s);
9466  %}
9467  ins_pipe(ialu_reg);
9468%}
9469
9470instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9471  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9472            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9473  match(Set dst (DecodeN src));
9474  ins_cost(INSN_COST * 3);
9475  format %{ "decode_heap_oop_not_null $dst, $src" %}
9476  ins_encode %{
9477    Register s = $src$$Register;
9478    Register d = $dst$$Register;
9479    __ decode_heap_oop_not_null(d, s);
9480  %}
9481  ins_pipe(ialu_reg);
9482%}
9483
9484// n.b. AArch64 implementations of encode_klass_not_null and
9485// decode_klass_not_null do not modify the flags register so, unlike
9486// Intel, we don't kill CR as a side effect here
9487
9488instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9489  match(Set dst (EncodePKlass src));
9490
9491  ins_cost(INSN_COST * 3);
9492  format %{ "encode_klass_not_null $dst,$src" %}
9493
9494  ins_encode %{
9495    Register src_reg = as_Register($src$$reg);
9496    Register dst_reg = as_Register($dst$$reg);
9497    __ encode_klass_not_null(dst_reg, src_reg);
9498  %}
9499
9500   ins_pipe(ialu_reg);
9501%}
9502
9503instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9504  match(Set dst (DecodeNKlass src));
9505
9506  ins_cost(INSN_COST * 3);
9507  format %{ "decode_klass_not_null $dst,$src" %}
9508
9509  ins_encode %{
9510    Register src_reg = as_Register($src$$reg);
9511    Register dst_reg = as_Register($dst$$reg);
9512    if (dst_reg != src_reg) {
9513      __ decode_klass_not_null(dst_reg, src_reg);
9514    } else {
9515      __ decode_klass_not_null(dst_reg);
9516    }
9517  %}
9518
9519   ins_pipe(ialu_reg);
9520%}
9521
9522instruct checkCastPP(iRegPNoSp dst)
9523%{
9524  match(Set dst (CheckCastPP dst));
9525
9526  size(0);
9527  format %{ "# checkcastPP of $dst" %}
9528  ins_encode(/* empty encoding */);
9529  ins_pipe(pipe_class_empty);
9530%}
9531
9532instruct castPP(iRegPNoSp dst)
9533%{
9534  match(Set dst (CastPP dst));
9535
9536  size(0);
9537  format %{ "# castPP of $dst" %}
9538  ins_encode(/* empty encoding */);
9539  ins_pipe(pipe_class_empty);
9540%}
9541
9542instruct castII(iRegI dst)
9543%{
9544  match(Set dst (CastII dst));
9545
9546  size(0);
9547  format %{ "# castII of $dst" %}
9548  ins_encode(/* empty encoding */);
9549  ins_cost(0);
9550  ins_pipe(pipe_class_empty);
9551%}
9552
9553// ============================================================================
9554// Atomic operation instructions
9555//
9556// Intel and SPARC both implement Ideal Node LoadPLocked and
9557// Store{PIL}Conditional instructions using a normal load for the
9558// LoadPLocked and a CAS for the Store{PIL}Conditional.
9559//
9560// The ideal code appears only to use LoadPLocked/StorePLocked as a
9561// pair to lock object allocations from Eden space when not using
9562// TLABs.
9563//
9564// There does not appear to be a Load{IL}Locked Ideal Node and the
9565// Ideal code appears to use Store{IL}Conditional as an alias for CAS
9566// and to use StoreIConditional only for 32-bit and StoreLConditional
9567// only for 64-bit.
9568//
9569// We implement LoadPLocked and StorePLocked instructions using,
9570// respectively the AArch64 hw load-exclusive and store-conditional
9571// instructions. Whereas we must implement each of
9572// Store{IL}Conditional using a CAS which employs a pair of
9573// instructions comprising a load-exclusive followed by a
9574// store-conditional.
9575
9576
9577// Locked-load (linked load) of the current heap-top
9578// used when updating the eden heap top
9579// implemented using ldaxr on AArch64
9580
9581instruct loadPLocked(iRegPNoSp dst, indirect mem)
9582%{
9583  match(Set dst (LoadPLocked mem));
9584
9585  ins_cost(VOLATILE_REF_COST);
9586
9587  format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9588
9589  ins_encode(aarch64_enc_ldaxr(dst, mem));
9590
9591  ins_pipe(pipe_serial);
9592%}
9593
9594// Conditional-store of the updated heap-top.
9595// Used during allocation of the shared heap.
9596// Sets flag (EQ) on success.
9597// implemented using stlxr on AArch64.
9598
9599instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9600%{
9601  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9602
9603  ins_cost(VOLATILE_REF_COST);
9604
9605 // TODO
9606 // do we need to do a store-conditional release or can we just use a
9607 // plain store-conditional?
9608
9609  format %{
9610    "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9611    "cmpw rscratch1, zr\t# EQ on successful write"
9612  %}
9613
9614  ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9615
9616  ins_pipe(pipe_serial);
9617%}
9618
9619
9620// storeLConditional is used by PhaseMacroExpand::expand_lock_node
9621// when attempting to rebias a lock towards the current thread.  We
9622// must use the acquire form of cmpxchg in order to guarantee acquire
9623// semantics in this case.
9624instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9625%{
9626  match(Set cr (StoreLConditional mem (Binary oldval newval)));
9627
9628  ins_cost(VOLATILE_REF_COST);
9629
9630  format %{
9631    "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9632    "cmpw rscratch1, zr\t# EQ on successful write"
9633  %}
9634
9635  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9636
9637  ins_pipe(pipe_slow);
9638%}
9639
9640// storeIConditional also has acquire semantics, for no better reason
9641// than matching storeLConditional.  At the time of writing this
9642// comment storeIConditional was not used anywhere by AArch64.
9643instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9644%{
9645  match(Set cr (StoreIConditional mem (Binary oldval newval)));
9646
9647  ins_cost(VOLATILE_REF_COST);
9648
9649  format %{
9650    "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9651    "cmpw rscratch1, zr\t# EQ on successful write"
9652  %}
9653
9654  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9655
9656  ins_pipe(pipe_slow);
9657%}
9658
9659// standard CompareAndSwapX when we are using barriers
9660// these have higher priority than the rules selected by a predicate
9661
9662// XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9663// can't match them
9664
9665instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9666
9667  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9668  ins_cost(2 * VOLATILE_REF_COST);
9669
9670  effect(KILL cr);
9671
9672  format %{
9673    "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9674    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9675  %}
9676
9677  ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9678            aarch64_enc_cset_eq(res));
9679
9680  ins_pipe(pipe_slow);
9681%}
9682
9683instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9684
9685  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9686  ins_cost(2 * VOLATILE_REF_COST);
9687
9688  effect(KILL cr);
9689
9690  format %{
9691    "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9692    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9693  %}
9694
9695  ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9696            aarch64_enc_cset_eq(res));
9697
9698  ins_pipe(pipe_slow);
9699%}
9700
9701instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9702
9703  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9704  ins_cost(2 * VOLATILE_REF_COST);
9705
9706  effect(KILL cr);
9707
9708 format %{
9709    "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9710    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9711 %}
9712
9713 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9714            aarch64_enc_cset_eq(res));
9715
9716  ins_pipe(pipe_slow);
9717%}
9718
9719instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9720
9721  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9722  ins_cost(2 * VOLATILE_REF_COST);
9723
9724  effect(KILL cr);
9725
9726 format %{
9727    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9728    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9729 %}
9730
9731 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9732            aarch64_enc_cset_eq(res));
9733
9734  ins_pipe(pipe_slow);
9735%}
9736
9737instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9738
9739  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9740  ins_cost(2 * VOLATILE_REF_COST);
9741
9742  effect(KILL cr);
9743
9744 format %{
9745    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9746    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9747 %}
9748
9749 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9750            aarch64_enc_cset_eq(res));
9751
9752  ins_pipe(pipe_slow);
9753%}
9754
9755instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9756
9757  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9758  ins_cost(2 * VOLATILE_REF_COST);
9759
9760  effect(KILL cr);
9761
9762 format %{
9763    "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9764    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9765 %}
9766
9767 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9768            aarch64_enc_cset_eq(res));
9769
9770  ins_pipe(pipe_slow);
9771%}
9772
9773// alternative CompareAndSwapX when we are eliding barriers
9774
9775instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9776
9777  predicate(needs_acquiring_load_exclusive(n));
9778  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9779  ins_cost(VOLATILE_REF_COST);
9780
9781  effect(KILL cr);
9782
9783 format %{
9784    "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9785    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9786 %}
9787
9788 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9789            aarch64_enc_cset_eq(res));
9790
9791  ins_pipe(pipe_slow);
9792%}
9793
9794instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9795
9796  predicate(needs_acquiring_load_exclusive(n));
9797  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9798  ins_cost(VOLATILE_REF_COST);
9799
9800  effect(KILL cr);
9801
9802 format %{
9803    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9804    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9805 %}
9806
9807 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9808            aarch64_enc_cset_eq(res));
9809
9810  ins_pipe(pipe_slow);
9811%}
9812
9813instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9814
9815  predicate(needs_acquiring_load_exclusive(n));
9816  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9817  ins_cost(VOLATILE_REF_COST);
9818
9819  effect(KILL cr);
9820
9821 format %{
9822    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9823    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9824 %}
9825
9826 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9827            aarch64_enc_cset_eq(res));
9828
9829  ins_pipe(pipe_slow);
9830%}
9831
9832instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9833
9834  predicate(needs_acquiring_load_exclusive(n));
9835  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9836  ins_cost(VOLATILE_REF_COST);
9837
9838  effect(KILL cr);
9839
9840 format %{
9841    "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9842    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9843 %}
9844
9845 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9846            aarch64_enc_cset_eq(res));
9847
9848  ins_pipe(pipe_slow);
9849%}
9850
9851
9852// ---------------------------------------------------------------------
9853
9854
9855// BEGIN This section of the file is automatically generated. Do not edit --------------
9856
9857// Sundry CAS operations.  Note that release is always true,
9858// regardless of the memory ordering of the CAS.  This is because we
9859// need the volatile case to be sequentially consistent but there is
9860// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9861// can't check the type of memory ordering here, so we always emit a
9862// STLXR.
9863
9864// This section is generated from aarch64_ad_cas.m4
9865
9866
9867
9868instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9869  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9870  ins_cost(2 * VOLATILE_REF_COST);
9871  effect(TEMP_DEF res, KILL cr);
9872  format %{
9873    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9874  %}
9875  ins_encode %{
9876    __ uxtbw(rscratch2, $oldval$$Register);
9877    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9878               Assembler::byte, /*acquire*/ false, /*release*/ true,
9879               /*weak*/ false, $res$$Register);
9880    __ sxtbw($res$$Register, $res$$Register);
9881  %}
9882  ins_pipe(pipe_slow);
9883%}
9884
9885instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9886  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9887  ins_cost(2 * VOLATILE_REF_COST);
9888  effect(TEMP_DEF res, KILL cr);
9889  format %{
9890    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9891  %}
9892  ins_encode %{
9893    __ uxthw(rscratch2, $oldval$$Register);
9894    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9895               Assembler::halfword, /*acquire*/ false, /*release*/ true,
9896               /*weak*/ false, $res$$Register);
9897    __ sxthw($res$$Register, $res$$Register);
9898  %}
9899  ins_pipe(pipe_slow);
9900%}
9901
9902instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9903  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9904  ins_cost(2 * VOLATILE_REF_COST);
9905  effect(TEMP_DEF res, KILL cr);
9906  format %{
9907    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9908  %}
9909  ins_encode %{
9910    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9911               Assembler::word, /*acquire*/ false, /*release*/ true,
9912               /*weak*/ false, $res$$Register);
9913  %}
9914  ins_pipe(pipe_slow);
9915%}
9916
9917instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9918  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9919  ins_cost(2 * VOLATILE_REF_COST);
9920  effect(TEMP_DEF res, KILL cr);
9921  format %{
9922    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9923  %}
9924  ins_encode %{
9925    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9926               Assembler::xword, /*acquire*/ false, /*release*/ true,
9927               /*weak*/ false, $res$$Register);
9928  %}
9929  ins_pipe(pipe_slow);
9930%}
9931
9932instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9933  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9934  ins_cost(2 * VOLATILE_REF_COST);
9935  effect(TEMP_DEF res, KILL cr);
9936  format %{
9937    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9938  %}
9939  ins_encode %{
9940    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9941               Assembler::word, /*acquire*/ false, /*release*/ true,
9942               /*weak*/ false, $res$$Register);
9943  %}
9944  ins_pipe(pipe_slow);
9945%}
9946
9947instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9948  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9949  ins_cost(2 * VOLATILE_REF_COST);
9950  effect(TEMP_DEF res, KILL cr);
9951  format %{
9952    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9953  %}
9954  ins_encode %{
9955    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9956               Assembler::xword, /*acquire*/ false, /*release*/ true,
9957               /*weak*/ false, $res$$Register);
9958  %}
9959  ins_pipe(pipe_slow);
9960%}
9961
9962instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9963  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9964  ins_cost(2 * VOLATILE_REF_COST);
9965  effect(KILL cr);
9966  format %{
9967    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9968    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9969  %}
9970  ins_encode %{
9971    __ uxtbw(rscratch2, $oldval$$Register);
9972    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9973               Assembler::byte, /*acquire*/ false, /*release*/ true,
9974               /*weak*/ true, noreg);
9975    __ csetw($res$$Register, Assembler::EQ);
9976  %}
9977  ins_pipe(pipe_slow);
9978%}
9979
9980instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9981  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9982  ins_cost(2 * VOLATILE_REF_COST);
9983  effect(KILL cr);
9984  format %{
9985    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9986    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9987  %}
9988  ins_encode %{
9989    __ uxthw(rscratch2, $oldval$$Register);
9990    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9991               Assembler::halfword, /*acquire*/ false, /*release*/ true,
9992               /*weak*/ true, noreg);
9993    __ csetw($res$$Register, Assembler::EQ);
9994  %}
9995  ins_pipe(pipe_slow);
9996%}
9997
9998instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9999  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10000  ins_cost(2 * VOLATILE_REF_COST);
10001  effect(KILL cr);
10002  format %{
10003    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10004    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10005  %}
10006  ins_encode %{
10007    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10008               Assembler::word, /*acquire*/ false, /*release*/ true,
10009               /*weak*/ true, noreg);
10010    __ csetw($res$$Register, Assembler::EQ);
10011  %}
10012  ins_pipe(pipe_slow);
10013%}
10014
10015instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10016  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10017  ins_cost(2 * VOLATILE_REF_COST);
10018  effect(KILL cr);
10019  format %{
10020    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10021    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10022  %}
10023  ins_encode %{
10024    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10025               Assembler::xword, /*acquire*/ false, /*release*/ true,
10026               /*weak*/ true, noreg);
10027    __ csetw($res$$Register, Assembler::EQ);
10028  %}
10029  ins_pipe(pipe_slow);
10030%}
10031
10032instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10033  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10034  ins_cost(2 * VOLATILE_REF_COST);
10035  effect(KILL cr);
10036  format %{
10037    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10038    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10039  %}
10040  ins_encode %{
10041    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10042               Assembler::word, /*acquire*/ false, /*release*/ true,
10043               /*weak*/ true, noreg);
10044    __ csetw($res$$Register, Assembler::EQ);
10045  %}
10046  ins_pipe(pipe_slow);
10047%}
10048
10049instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10050  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10051  ins_cost(2 * VOLATILE_REF_COST);
10052  effect(KILL cr);
10053  format %{
10054    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10055    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10056  %}
10057  ins_encode %{
10058    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10059               Assembler::xword, /*acquire*/ false, /*release*/ true,
10060               /*weak*/ true, noreg);
10061    __ csetw($res$$Register, Assembler::EQ);
10062  %}
10063  ins_pipe(pipe_slow);
10064%}
10065
10066// END This section of the file is automatically generated. Do not edit --------------
10067// ---------------------------------------------------------------------
10068
10069instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10070  match(Set prev (GetAndSetI mem newv));
10071  format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10072  ins_encode %{
10073    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10074  %}
10075  ins_pipe(pipe_serial);
10076%}
10077
10078instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10079  match(Set prev (GetAndSetL mem newv));
10080  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10081  ins_encode %{
10082    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10083  %}
10084  ins_pipe(pipe_serial);
10085%}
10086
10087instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10088  match(Set prev (GetAndSetN mem newv));
10089  format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10090  ins_encode %{
10091    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10092  %}
10093  ins_pipe(pipe_serial);
10094%}
10095
10096instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10097  match(Set prev (GetAndSetP mem newv));
10098  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10099  ins_encode %{
10100    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10101  %}
10102  ins_pipe(pipe_serial);
10103%}
10104
10105
10106instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10107  match(Set newval (GetAndAddL mem incr));
10108  ins_cost(INSN_COST * 10);
10109  format %{ "get_and_addL $newval, [$mem], $incr" %}
10110  ins_encode %{
10111    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10112  %}
10113  ins_pipe(pipe_serial);
10114%}
10115
10116instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10117  predicate(n->as_LoadStore()->result_not_used());
10118  match(Set dummy (GetAndAddL mem incr));
10119  ins_cost(INSN_COST * 9);
10120  format %{ "get_and_addL [$mem], $incr" %}
10121  ins_encode %{
10122    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10123  %}
10124  ins_pipe(pipe_serial);
10125%}
10126
10127instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10128  match(Set newval (GetAndAddL mem incr));
10129  ins_cost(INSN_COST * 10);
10130  format %{ "get_and_addL $newval, [$mem], $incr" %}
10131  ins_encode %{
10132    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10133  %}
10134  ins_pipe(pipe_serial);
10135%}
10136
10137instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10138  predicate(n->as_LoadStore()->result_not_used());
10139  match(Set dummy (GetAndAddL mem incr));
10140  ins_cost(INSN_COST * 9);
10141  format %{ "get_and_addL [$mem], $incr" %}
10142  ins_encode %{
10143    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10144  %}
10145  ins_pipe(pipe_serial);
10146%}
10147
10148instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10149  match(Set newval (GetAndAddI mem incr));
10150  ins_cost(INSN_COST * 10);
10151  format %{ "get_and_addI $newval, [$mem], $incr" %}
10152  ins_encode %{
10153    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10154  %}
10155  ins_pipe(pipe_serial);
10156%}
10157
10158instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10159  predicate(n->as_LoadStore()->result_not_used());
10160  match(Set dummy (GetAndAddI mem incr));
10161  ins_cost(INSN_COST * 9);
10162  format %{ "get_and_addI [$mem], $incr" %}
10163  ins_encode %{
10164    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10165  %}
10166  ins_pipe(pipe_serial);
10167%}
10168
10169instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10170  match(Set newval (GetAndAddI mem incr));
10171  ins_cost(INSN_COST * 10);
10172  format %{ "get_and_addI $newval, [$mem], $incr" %}
10173  ins_encode %{
10174    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10175  %}
10176  ins_pipe(pipe_serial);
10177%}
10178
10179instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10180  predicate(n->as_LoadStore()->result_not_used());
10181  match(Set dummy (GetAndAddI mem incr));
10182  ins_cost(INSN_COST * 9);
10183  format %{ "get_and_addI [$mem], $incr" %}
10184  ins_encode %{
10185    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10186  %}
10187  ins_pipe(pipe_serial);
10188%}
10189
10190// Manifest a CmpL result in an integer register.
10191// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10192instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10193%{
10194  match(Set dst (CmpL3 src1 src2));
10195  effect(KILL flags);
10196
10197  ins_cost(INSN_COST * 6);
10198  format %{
10199      "cmp $src1, $src2"
10200      "csetw $dst, ne"
10201      "cnegw $dst, lt"
10202  %}
10203  // format %{ "CmpL3 $dst, $src1, $src2" %}
10204  ins_encode %{
10205    __ cmp($src1$$Register, $src2$$Register);
10206    __ csetw($dst$$Register, Assembler::NE);
10207    __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10208  %}
10209
10210  ins_pipe(pipe_class_default);
10211%}
10212
10213instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10214%{
10215  match(Set dst (CmpL3 src1 src2));
10216  effect(KILL flags);
10217
10218  ins_cost(INSN_COST * 6);
10219  format %{
10220      "cmp $src1, $src2"
10221      "csetw $dst, ne"
10222      "cnegw $dst, lt"
10223  %}
10224  ins_encode %{
10225    int32_t con = (int32_t)$src2$$constant;
10226     if (con < 0) {
10227      __ adds(zr, $src1$$Register, -con);
10228    } else {
10229      __ subs(zr, $src1$$Register, con);
10230    }
10231    __ csetw($dst$$Register, Assembler::NE);
10232    __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10233  %}
10234
10235  ins_pipe(pipe_class_default);
10236%}
10237
10238// ============================================================================
10239// Conditional Move Instructions
10240
10241// n.b. we have identical rules for both a signed compare op (cmpOp)
10242// and an unsigned compare op (cmpOpU). it would be nice if we could
10243// define an op class which merged both inputs and use it to type the
10244// argument to a single rule. unfortunatelyt his fails because the
10245// opclass does not live up to the COND_INTER interface of its
10246// component operands. When the generic code tries to negate the
10247// operand it ends up running the generci Machoper::negate method
10248// which throws a ShouldNotHappen. So, we have to provide two flavours
10249// of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10250
10251instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10252  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10253
10254  ins_cost(INSN_COST * 2);
10255  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10256
10257  ins_encode %{
10258    __ cselw(as_Register($dst$$reg),
10259             as_Register($src2$$reg),
10260             as_Register($src1$$reg),
10261             (Assembler::Condition)$cmp$$cmpcode);
10262  %}
10263
10264  ins_pipe(icond_reg_reg);
10265%}
10266
10267instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10268  match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10269
10270  ins_cost(INSN_COST * 2);
10271  format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10272
10273  ins_encode %{
10274    __ cselw(as_Register($dst$$reg),
10275             as_Register($src2$$reg),
10276             as_Register($src1$$reg),
10277             (Assembler::Condition)$cmp$$cmpcode);
10278  %}
10279
10280  ins_pipe(icond_reg_reg);
10281%}
10282
10283// special cases where one arg is zero
10284
10285// n.b. this is selected in preference to the rule above because it
10286// avoids loading constant 0 into a source register
10287
10288// TODO
10289// we ought only to be able to cull one of these variants as the ideal
10290// transforms ought always to order the zero consistently (to left/right?)
10291
10292instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10293  match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10294
10295  ins_cost(INSN_COST * 2);
10296  format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10297
10298  ins_encode %{
10299    __ cselw(as_Register($dst$$reg),
10300             as_Register($src$$reg),
10301             zr,
10302             (Assembler::Condition)$cmp$$cmpcode);
10303  %}
10304
10305  ins_pipe(icond_reg);
10306%}
10307
10308instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10309  match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10310
10311  ins_cost(INSN_COST * 2);
10312  format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10313
10314  ins_encode %{
10315    __ cselw(as_Register($dst$$reg),
10316             as_Register($src$$reg),
10317             zr,
10318             (Assembler::Condition)$cmp$$cmpcode);
10319  %}
10320
10321  ins_pipe(icond_reg);
10322%}
10323
10324instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10325  match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10326
10327  ins_cost(INSN_COST * 2);
10328  format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10329
10330  ins_encode %{
10331    __ cselw(as_Register($dst$$reg),
10332             zr,
10333             as_Register($src$$reg),
10334             (Assembler::Condition)$cmp$$cmpcode);
10335  %}
10336
10337  ins_pipe(icond_reg);
10338%}
10339
10340instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10341  match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10342
10343  ins_cost(INSN_COST * 2);
10344  format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10345
10346  ins_encode %{
10347    __ cselw(as_Register($dst$$reg),
10348             zr,
10349             as_Register($src$$reg),
10350             (Assembler::Condition)$cmp$$cmpcode);
10351  %}
10352
10353  ins_pipe(icond_reg);
10354%}
10355
10356// special case for creating a boolean 0 or 1
10357
10358// n.b. this is selected in preference to the rule above because it
10359// avoids loading constants 0 and 1 into a source register
10360
10361instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10362  match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10363
10364  ins_cost(INSN_COST * 2);
10365  format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10366
10367  ins_encode %{
10368    // equivalently
10369    // cset(as_Register($dst$$reg),
10370    //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10371    __ csincw(as_Register($dst$$reg),
10372             zr,
10373             zr,
10374             (Assembler::Condition)$cmp$$cmpcode);
10375  %}
10376
10377  ins_pipe(icond_none);
10378%}
10379
10380instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10381  match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10382
10383  ins_cost(INSN_COST * 2);
10384  format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10385
10386  ins_encode %{
10387    // equivalently
10388    // cset(as_Register($dst$$reg),
10389    //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10390    __ csincw(as_Register($dst$$reg),
10391             zr,
10392             zr,
10393             (Assembler::Condition)$cmp$$cmpcode);
10394  %}
10395
10396  ins_pipe(icond_none);
10397%}
10398
10399instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10400  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10401
10402  ins_cost(INSN_COST * 2);
10403  format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10404
10405  ins_encode %{
10406    __ csel(as_Register($dst$$reg),
10407            as_Register($src2$$reg),
10408            as_Register($src1$$reg),
10409            (Assembler::Condition)$cmp$$cmpcode);
10410  %}
10411
10412  ins_pipe(icond_reg_reg);
10413%}
10414
10415instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10416  match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10417
10418  ins_cost(INSN_COST * 2);
10419  format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10420
10421  ins_encode %{
10422    __ csel(as_Register($dst$$reg),
10423            as_Register($src2$$reg),
10424            as_Register($src1$$reg),
10425            (Assembler::Condition)$cmp$$cmpcode);
10426  %}
10427
10428  ins_pipe(icond_reg_reg);
10429%}
10430
10431// special cases where one arg is zero
10432
10433instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10434  match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10435
10436  ins_cost(INSN_COST * 2);
10437  format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10438
10439  ins_encode %{
10440    __ csel(as_Register($dst$$reg),
10441            zr,
10442            as_Register($src$$reg),
10443            (Assembler::Condition)$cmp$$cmpcode);
10444  %}
10445
10446  ins_pipe(icond_reg);
10447%}
10448
10449instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10450  match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10451
10452  ins_cost(INSN_COST * 2);
10453  format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10454
10455  ins_encode %{
10456    __ csel(as_Register($dst$$reg),
10457            zr,
10458            as_Register($src$$reg),
10459            (Assembler::Condition)$cmp$$cmpcode);
10460  %}
10461
10462  ins_pipe(icond_reg);
10463%}
10464
10465instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10466  match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10467
10468  ins_cost(INSN_COST * 2);
10469  format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10470
10471  ins_encode %{
10472    __ csel(as_Register($dst$$reg),
10473            as_Register($src$$reg),
10474            zr,
10475            (Assembler::Condition)$cmp$$cmpcode);
10476  %}
10477
10478  ins_pipe(icond_reg);
10479%}
10480
10481instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10482  match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10483
10484  ins_cost(INSN_COST * 2);
10485  format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10486
10487  ins_encode %{
10488    __ csel(as_Register($dst$$reg),
10489            as_Register($src$$reg),
10490            zr,
10491            (Assembler::Condition)$cmp$$cmpcode);
10492  %}
10493
10494  ins_pipe(icond_reg);
10495%}
10496
10497instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10498  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10499
10500  ins_cost(INSN_COST * 2);
10501  format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10502
10503  ins_encode %{
10504    __ csel(as_Register($dst$$reg),
10505            as_Register($src2$$reg),
10506            as_Register($src1$$reg),
10507            (Assembler::Condition)$cmp$$cmpcode);
10508  %}
10509
10510  ins_pipe(icond_reg_reg);
10511%}
10512
10513instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10514  match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10515
10516  ins_cost(INSN_COST * 2);
10517  format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10518
10519  ins_encode %{
10520    __ csel(as_Register($dst$$reg),
10521            as_Register($src2$$reg),
10522            as_Register($src1$$reg),
10523            (Assembler::Condition)$cmp$$cmpcode);
10524  %}
10525
10526  ins_pipe(icond_reg_reg);
10527%}
10528
10529// special cases where one arg is zero
10530
10531instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10532  match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10533
10534  ins_cost(INSN_COST * 2);
10535  format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10536
10537  ins_encode %{
10538    __ csel(as_Register($dst$$reg),
10539            zr,
10540            as_Register($src$$reg),
10541            (Assembler::Condition)$cmp$$cmpcode);
10542  %}
10543
10544  ins_pipe(icond_reg);
10545%}
10546
10547instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10548  match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10549
10550  ins_cost(INSN_COST * 2);
10551  format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10552
10553  ins_encode %{
10554    __ csel(as_Register($dst$$reg),
10555            zr,
10556            as_Register($src$$reg),
10557            (Assembler::Condition)$cmp$$cmpcode);
10558  %}
10559
10560  ins_pipe(icond_reg);
10561%}
10562
10563instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10564  match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10565
10566  ins_cost(INSN_COST * 2);
10567  format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10568
10569  ins_encode %{
10570    __ csel(as_Register($dst$$reg),
10571            as_Register($src$$reg),
10572            zr,
10573            (Assembler::Condition)$cmp$$cmpcode);
10574  %}
10575
10576  ins_pipe(icond_reg);
10577%}
10578
10579instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10580  match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10581
10582  ins_cost(INSN_COST * 2);
10583  format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10584
10585  ins_encode %{
10586    __ csel(as_Register($dst$$reg),
10587            as_Register($src$$reg),
10588            zr,
10589            (Assembler::Condition)$cmp$$cmpcode);
10590  %}
10591
10592  ins_pipe(icond_reg);
10593%}
10594
10595instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10596  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10597
10598  ins_cost(INSN_COST * 2);
10599  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10600
10601  ins_encode %{
10602    __ cselw(as_Register($dst$$reg),
10603             as_Register($src2$$reg),
10604             as_Register($src1$$reg),
10605             (Assembler::Condition)$cmp$$cmpcode);
10606  %}
10607
10608  ins_pipe(icond_reg_reg);
10609%}
10610
10611instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10612  match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10613
10614  ins_cost(INSN_COST * 2);
10615  format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10616
10617  ins_encode %{
10618    __ cselw(as_Register($dst$$reg),
10619             as_Register($src2$$reg),
10620             as_Register($src1$$reg),
10621             (Assembler::Condition)$cmp$$cmpcode);
10622  %}
10623
10624  ins_pipe(icond_reg_reg);
10625%}
10626
10627// special cases where one arg is zero
10628
10629instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10630  match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10631
10632  ins_cost(INSN_COST * 2);
10633  format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10634
10635  ins_encode %{
10636    __ cselw(as_Register($dst$$reg),
10637             zr,
10638             as_Register($src$$reg),
10639             (Assembler::Condition)$cmp$$cmpcode);
10640  %}
10641
10642  ins_pipe(icond_reg);
10643%}
10644
10645instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10646  match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10647
10648  ins_cost(INSN_COST * 2);
10649  format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10650
10651  ins_encode %{
10652    __ cselw(as_Register($dst$$reg),
10653             zr,
10654             as_Register($src$$reg),
10655             (Assembler::Condition)$cmp$$cmpcode);
10656  %}
10657
10658  ins_pipe(icond_reg);
10659%}
10660
10661instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10662  match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10663
10664  ins_cost(INSN_COST * 2);
10665  format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10666
10667  ins_encode %{
10668    __ cselw(as_Register($dst$$reg),
10669             as_Register($src$$reg),
10670             zr,
10671             (Assembler::Condition)$cmp$$cmpcode);
10672  %}
10673
10674  ins_pipe(icond_reg);
10675%}
10676
10677instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10678  match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10679
10680  ins_cost(INSN_COST * 2);
10681  format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10682
10683  ins_encode %{
10684    __ cselw(as_Register($dst$$reg),
10685             as_Register($src$$reg),
10686             zr,
10687             (Assembler::Condition)$cmp$$cmpcode);
10688  %}
10689
10690  ins_pipe(icond_reg);
10691%}
10692
10693instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10694%{
10695  match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10696
10697  ins_cost(INSN_COST * 3);
10698
10699  format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10700  ins_encode %{
10701    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10702    __ fcsels(as_FloatRegister($dst$$reg),
10703              as_FloatRegister($src2$$reg),
10704              as_FloatRegister($src1$$reg),
10705              cond);
10706  %}
10707
10708  ins_pipe(fp_cond_reg_reg_s);
10709%}
10710
10711instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10712%{
10713  match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10714
10715  ins_cost(INSN_COST * 3);
10716
10717  format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10718  ins_encode %{
10719    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10720    __ fcsels(as_FloatRegister($dst$$reg),
10721              as_FloatRegister($src2$$reg),
10722              as_FloatRegister($src1$$reg),
10723              cond);
10724  %}
10725
10726  ins_pipe(fp_cond_reg_reg_s);
10727%}
10728
10729instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10730%{
10731  match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10732
10733  ins_cost(INSN_COST * 3);
10734
10735  format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10736  ins_encode %{
10737    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10738    __ fcseld(as_FloatRegister($dst$$reg),
10739              as_FloatRegister($src2$$reg),
10740              as_FloatRegister($src1$$reg),
10741              cond);
10742  %}
10743
10744  ins_pipe(fp_cond_reg_reg_d);
10745%}
10746
10747instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10748%{
10749  match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10750
10751  ins_cost(INSN_COST * 3);
10752
10753  format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10754  ins_encode %{
10755    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10756    __ fcseld(as_FloatRegister($dst$$reg),
10757              as_FloatRegister($src2$$reg),
10758              as_FloatRegister($src1$$reg),
10759              cond);
10760  %}
10761
10762  ins_pipe(fp_cond_reg_reg_d);
10763%}
10764
10765// ============================================================================
10766// Arithmetic Instructions
10767//
10768
10769// Integer Addition
10770
10771// TODO
10772// these currently employ operations which do not set CR and hence are
10773// not flagged as killing CR but we would like to isolate the cases
10774// where we want to set flags from those where we don't. need to work
10775// out how to do that.
10776
10777instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10778  match(Set dst (AddI src1 src2));
10779
10780  ins_cost(INSN_COST);
10781  format %{ "addw  $dst, $src1, $src2" %}
10782
10783  ins_encode %{
10784    __ addw(as_Register($dst$$reg),
10785            as_Register($src1$$reg),
10786            as_Register($src2$$reg));
10787  %}
10788
10789  ins_pipe(ialu_reg_reg);
10790%}
10791
10792instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10793  match(Set dst (AddI src1 src2));
10794
10795  ins_cost(INSN_COST);
10796  format %{ "addw $dst, $src1, $src2" %}
10797
10798  // use opcode to indicate that this is an add not a sub
10799  opcode(0x0);
10800
10801  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10802
10803  ins_pipe(ialu_reg_imm);
10804%}
10805
10806instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10807  match(Set dst (AddI (ConvL2I src1) src2));
10808
10809  ins_cost(INSN_COST);
10810  format %{ "addw $dst, $src1, $src2" %}
10811
10812  // use opcode to indicate that this is an add not a sub
10813  opcode(0x0);
10814
10815  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10816
10817  ins_pipe(ialu_reg_imm);
10818%}
10819
10820// Pointer Addition
10821instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10822  match(Set dst (AddP src1 src2));
10823
10824  ins_cost(INSN_COST);
10825  format %{ "add $dst, $src1, $src2\t# ptr" %}
10826
10827  ins_encode %{
10828    __ add(as_Register($dst$$reg),
10829           as_Register($src1$$reg),
10830           as_Register($src2$$reg));
10831  %}
10832
10833  ins_pipe(ialu_reg_reg);
10834%}
10835
10836instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10837  match(Set dst (AddP src1 (ConvI2L src2)));
10838
10839  ins_cost(1.9 * INSN_COST);
10840  format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10841
10842  ins_encode %{
10843    __ add(as_Register($dst$$reg),
10844           as_Register($src1$$reg),
10845           as_Register($src2$$reg), ext::sxtw);
10846  %}
10847
10848  ins_pipe(ialu_reg_reg);
10849%}
10850
10851instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10852  match(Set dst (AddP src1 (LShiftL src2 scale)));
10853
10854  ins_cost(1.9 * INSN_COST);
10855  format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10856
10857  ins_encode %{
10858    __ lea(as_Register($dst$$reg),
10859           Address(as_Register($src1$$reg), as_Register($src2$$reg),
10860                   Address::lsl($scale$$constant)));
10861  %}
10862
10863  ins_pipe(ialu_reg_reg_shift);
10864%}
10865
10866instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10867  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10868
10869  ins_cost(1.9 * INSN_COST);
10870  format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10871
10872  ins_encode %{
10873    __ lea(as_Register($dst$$reg),
10874           Address(as_Register($src1$$reg), as_Register($src2$$reg),
10875                   Address::sxtw($scale$$constant)));
10876  %}
10877
10878  ins_pipe(ialu_reg_reg_shift);
10879%}
10880
10881instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10882  match(Set dst (LShiftL (ConvI2L src) scale));
10883
10884  ins_cost(INSN_COST);
10885  format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10886
10887  ins_encode %{
10888    __ sbfiz(as_Register($dst$$reg),
10889          as_Register($src$$reg),
10890          $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10891  %}
10892
10893  ins_pipe(ialu_reg_shift);
10894%}
10895
10896// Pointer Immediate Addition
10897// n.b. this needs to be more expensive than using an indirect memory
10898// operand
10899instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10900  match(Set dst (AddP src1 src2));
10901
10902  ins_cost(INSN_COST);
10903  format %{ "add $dst, $src1, $src2\t# ptr" %}
10904
10905  // use opcode to indicate that this is an add not a sub
10906  opcode(0x0);
10907
10908  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10909
10910  ins_pipe(ialu_reg_imm);
10911%}
10912
10913// Long Addition
10914instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10915
10916  match(Set dst (AddL src1 src2));
10917
10918  ins_cost(INSN_COST);
10919  format %{ "add  $dst, $src1, $src2" %}
10920
10921  ins_encode %{
10922    __ add(as_Register($dst$$reg),
10923           as_Register($src1$$reg),
10924           as_Register($src2$$reg));
10925  %}
10926
10927  ins_pipe(ialu_reg_reg);
10928%}
10929
10930// No constant pool entries requiredLong Immediate Addition.
10931instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10932  match(Set dst (AddL src1 src2));
10933
10934  ins_cost(INSN_COST);
10935  format %{ "add $dst, $src1, $src2" %}
10936
10937  // use opcode to indicate that this is an add not a sub
10938  opcode(0x0);
10939
10940  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10941
10942  ins_pipe(ialu_reg_imm);
10943%}
10944
10945// Integer Subtraction
10946instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10947  match(Set dst (SubI src1 src2));
10948
10949  ins_cost(INSN_COST);
10950  format %{ "subw  $dst, $src1, $src2" %}
10951
10952  ins_encode %{
10953    __ subw(as_Register($dst$$reg),
10954            as_Register($src1$$reg),
10955            as_Register($src2$$reg));
10956  %}
10957
10958  ins_pipe(ialu_reg_reg);
10959%}
10960
10961// Immediate Subtraction
10962instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10963  match(Set dst (SubI src1 src2));
10964
10965  ins_cost(INSN_COST);
10966  format %{ "subw $dst, $src1, $src2" %}
10967
10968  // use opcode to indicate that this is a sub not an add
10969  opcode(0x1);
10970
10971  ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10972
10973  ins_pipe(ialu_reg_imm);
10974%}
10975
10976// Long Subtraction
10977instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10978
10979  match(Set dst (SubL src1 src2));
10980
10981  ins_cost(INSN_COST);
10982  format %{ "sub  $dst, $src1, $src2" %}
10983
10984  ins_encode %{
10985    __ sub(as_Register($dst$$reg),
10986           as_Register($src1$$reg),
10987           as_Register($src2$$reg));
10988  %}
10989
10990  ins_pipe(ialu_reg_reg);
10991%}
10992
10993// No constant pool entries requiredLong Immediate Subtraction.
10994instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10995  match(Set dst (SubL src1 src2));
10996
10997  ins_cost(INSN_COST);
10998  format %{ "sub$dst, $src1, $src2" %}
10999
11000  // use opcode to indicate that this is a sub not an add
11001  opcode(0x1);
11002
11003  ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11004
11005  ins_pipe(ialu_reg_imm);
11006%}
11007
11008// Integer Negation (special case for sub)
11009
11010instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11011  match(Set dst (SubI zero src));
11012
11013  ins_cost(INSN_COST);
11014  format %{ "negw $dst, $src\t# int" %}
11015
11016  ins_encode %{
11017    __ negw(as_Register($dst$$reg),
11018            as_Register($src$$reg));
11019  %}
11020
11021  ins_pipe(ialu_reg);
11022%}
11023
11024// Long Negation
11025
11026instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
11027  match(Set dst (SubL zero src));
11028
11029  ins_cost(INSN_COST);
11030  format %{ "neg $dst, $src\t# long" %}
11031
11032  ins_encode %{
11033    __ neg(as_Register($dst$$reg),
11034           as_Register($src$$reg));
11035  %}
11036
11037  ins_pipe(ialu_reg);
11038%}
11039
11040// Integer Multiply
11041
11042instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11043  match(Set dst (MulI src1 src2));
11044
11045  ins_cost(INSN_COST * 3);
11046  format %{ "mulw  $dst, $src1, $src2" %}
11047
11048  ins_encode %{
11049    __ mulw(as_Register($dst$$reg),
11050            as_Register($src1$$reg),
11051            as_Register($src2$$reg));
11052  %}
11053
11054  ins_pipe(imul_reg_reg);
11055%}
11056
11057instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11058  match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11059
11060  ins_cost(INSN_COST * 3);
11061  format %{ "smull  $dst, $src1, $src2" %}
11062
11063  ins_encode %{
11064    __ smull(as_Register($dst$$reg),
11065             as_Register($src1$$reg),
11066             as_Register($src2$$reg));
11067  %}
11068
11069  ins_pipe(imul_reg_reg);
11070%}
11071
11072// Long Multiply
11073
11074instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11075  match(Set dst (MulL src1 src2));
11076
11077  ins_cost(INSN_COST * 5);
11078  format %{ "mul  $dst, $src1, $src2" %}
11079
11080  ins_encode %{
11081    __ mul(as_Register($dst$$reg),
11082           as_Register($src1$$reg),
11083           as_Register($src2$$reg));
11084  %}
11085
11086  ins_pipe(lmul_reg_reg);
11087%}
11088
11089instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11090%{
11091  match(Set dst (MulHiL src1 src2));
11092
11093  ins_cost(INSN_COST * 7);
11094  format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11095
11096  ins_encode %{
11097    __ smulh(as_Register($dst$$reg),
11098             as_Register($src1$$reg),
11099             as_Register($src2$$reg));
11100  %}
11101
11102  ins_pipe(lmul_reg_reg);
11103%}
11104
11105// Combined Integer Multiply & Add/Sub
11106
11107instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11108  match(Set dst (AddI src3 (MulI src1 src2)));
11109
11110  ins_cost(INSN_COST * 3);
11111  format %{ "madd  $dst, $src1, $src2, $src3" %}
11112
11113  ins_encode %{
11114    __ maddw(as_Register($dst$$reg),
11115             as_Register($src1$$reg),
11116             as_Register($src2$$reg),
11117             as_Register($src3$$reg));
11118  %}
11119
11120  ins_pipe(imac_reg_reg);
11121%}
11122
11123instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11124  match(Set dst (SubI src3 (MulI src1 src2)));
11125
11126  ins_cost(INSN_COST * 3);
11127  format %{ "msub  $dst, $src1, $src2, $src3" %}
11128
11129  ins_encode %{
11130    __ msubw(as_Register($dst$$reg),
11131             as_Register($src1$$reg),
11132             as_Register($src2$$reg),
11133             as_Register($src3$$reg));
11134  %}
11135
11136  ins_pipe(imac_reg_reg);
11137%}
11138
11139// Combined Long Multiply & Add/Sub
11140
11141instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11142  match(Set dst (AddL src3 (MulL src1 src2)));
11143
11144  ins_cost(INSN_COST * 5);
11145  format %{ "madd  $dst, $src1, $src2, $src3" %}
11146
11147  ins_encode %{
11148    __ madd(as_Register($dst$$reg),
11149            as_Register($src1$$reg),
11150            as_Register($src2$$reg),
11151            as_Register($src3$$reg));
11152  %}
11153
11154  ins_pipe(lmac_reg_reg);
11155%}
11156
11157instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11158  match(Set dst (SubL src3 (MulL src1 src2)));
11159
11160  ins_cost(INSN_COST * 5);
11161  format %{ "msub  $dst, $src1, $src2, $src3" %}
11162
11163  ins_encode %{
11164    __ msub(as_Register($dst$$reg),
11165            as_Register($src1$$reg),
11166            as_Register($src2$$reg),
11167            as_Register($src3$$reg));
11168  %}
11169
11170  ins_pipe(lmac_reg_reg);
11171%}
11172
11173// Integer Divide
11174
11175instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11176  match(Set dst (DivI src1 src2));
11177
11178  ins_cost(INSN_COST * 19);
11179  format %{ "sdivw  $dst, $src1, $src2" %}
11180
11181  ins_encode(aarch64_enc_divw(dst, src1, src2));
11182  ins_pipe(idiv_reg_reg);
11183%}
11184
11185instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11186  match(Set dst (URShiftI (RShiftI src1 div1) div2));
11187  ins_cost(INSN_COST);
11188  format %{ "lsrw $dst, $src1, $div1" %}
11189  ins_encode %{
11190    __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11191  %}
11192  ins_pipe(ialu_reg_shift);
11193%}
11194
11195instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11196  match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11197  ins_cost(INSN_COST);
11198  format %{ "addw $dst, $src, LSR $div1" %}
11199
11200  ins_encode %{
11201    __ addw(as_Register($dst$$reg),
11202              as_Register($src$$reg),
11203              as_Register($src$$reg),
11204              Assembler::LSR, 31);
11205  %}
11206  ins_pipe(ialu_reg);
11207%}
11208
11209// Long Divide
11210
11211instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11212  match(Set dst (DivL src1 src2));
11213
11214  ins_cost(INSN_COST * 35);
11215  format %{ "sdiv   $dst, $src1, $src2" %}
11216
11217  ins_encode(aarch64_enc_div(dst, src1, src2));
11218  ins_pipe(ldiv_reg_reg);
11219%}
11220
11221instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11222  match(Set dst (URShiftL (RShiftL src1 div1) div2));
11223  ins_cost(INSN_COST);
11224  format %{ "lsr $dst, $src1, $div1" %}
11225  ins_encode %{
11226    __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11227  %}
11228  ins_pipe(ialu_reg_shift);
11229%}
11230
11231instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11232  match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11233  ins_cost(INSN_COST);
11234  format %{ "add $dst, $src, $div1" %}
11235
11236  ins_encode %{
11237    __ add(as_Register($dst$$reg),
11238              as_Register($src$$reg),
11239              as_Register($src$$reg),
11240              Assembler::LSR, 63);
11241  %}
11242  ins_pipe(ialu_reg);
11243%}
11244
11245// Integer Remainder
11246
11247instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11248  match(Set dst (ModI src1 src2));
11249
11250  ins_cost(INSN_COST * 22);
11251  format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11252            "msubw($dst, rscratch1, $src2, $src1" %}
11253
11254  ins_encode(aarch64_enc_modw(dst, src1, src2));
11255  ins_pipe(idiv_reg_reg);
11256%}
11257
11258// Long Remainder
11259
11260instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11261  match(Set dst (ModL src1 src2));
11262
11263  ins_cost(INSN_COST * 38);
11264  format %{ "sdiv   rscratch1, $src1, $src2\n"
11265            "msub($dst, rscratch1, $src2, $src1" %}
11266
11267  ins_encode(aarch64_enc_mod(dst, src1, src2));
11268  ins_pipe(ldiv_reg_reg);
11269%}
11270
11271// Integer Shifts
11272
11273// Shift Left Register
11274instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11275  match(Set dst (LShiftI src1 src2));
11276
11277  ins_cost(INSN_COST * 2);
11278  format %{ "lslvw  $dst, $src1, $src2" %}
11279
11280  ins_encode %{
11281    __ lslvw(as_Register($dst$$reg),
11282             as_Register($src1$$reg),
11283             as_Register($src2$$reg));
11284  %}
11285
11286  ins_pipe(ialu_reg_reg_vshift);
11287%}
11288
11289// Shift Left Immediate
11290instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11291  match(Set dst (LShiftI src1 src2));
11292
11293  ins_cost(INSN_COST);
11294  format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11295
11296  ins_encode %{
11297    __ lslw(as_Register($dst$$reg),
11298            as_Register($src1$$reg),
11299            $src2$$constant & 0x1f);
11300  %}
11301
11302  ins_pipe(ialu_reg_shift);
11303%}
11304
11305// Shift Right Logical Register
11306instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11307  match(Set dst (URShiftI src1 src2));
11308
11309  ins_cost(INSN_COST * 2);
11310  format %{ "lsrvw  $dst, $src1, $src2" %}
11311
11312  ins_encode %{
11313    __ lsrvw(as_Register($dst$$reg),
11314             as_Register($src1$$reg),
11315             as_Register($src2$$reg));
11316  %}
11317
11318  ins_pipe(ialu_reg_reg_vshift);
11319%}
11320
11321// Shift Right Logical Immediate
11322instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11323  match(Set dst (URShiftI src1 src2));
11324
11325  ins_cost(INSN_COST);
11326  format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11327
11328  ins_encode %{
11329    __ lsrw(as_Register($dst$$reg),
11330            as_Register($src1$$reg),
11331            $src2$$constant & 0x1f);
11332  %}
11333
11334  ins_pipe(ialu_reg_shift);
11335%}
11336
11337// Shift Right Arithmetic Register
11338instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11339  match(Set dst (RShiftI src1 src2));
11340
11341  ins_cost(INSN_COST * 2);
11342  format %{ "asrvw  $dst, $src1, $src2" %}
11343
11344  ins_encode %{
11345    __ asrvw(as_Register($dst$$reg),
11346             as_Register($src1$$reg),
11347             as_Register($src2$$reg));
11348  %}
11349
11350  ins_pipe(ialu_reg_reg_vshift);
11351%}
11352
11353// Shift Right Arithmetic Immediate
11354instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11355  match(Set dst (RShiftI src1 src2));
11356
11357  ins_cost(INSN_COST);
11358  format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11359
11360  ins_encode %{
11361    __ asrw(as_Register($dst$$reg),
11362            as_Register($src1$$reg),
11363            $src2$$constant & 0x1f);
11364  %}
11365
11366  ins_pipe(ialu_reg_shift);
11367%}
11368
11369// Combined Int Mask and Right Shift (using UBFM)
11370// TODO
11371
11372// Long Shifts
11373
11374// Shift Left Register
11375instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11376  match(Set dst (LShiftL src1 src2));
11377
11378  ins_cost(INSN_COST * 2);
11379  format %{ "lslv  $dst, $src1, $src2" %}
11380
11381  ins_encode %{
11382    __ lslv(as_Register($dst$$reg),
11383            as_Register($src1$$reg),
11384            as_Register($src2$$reg));
11385  %}
11386
11387  ins_pipe(ialu_reg_reg_vshift);
11388%}
11389
11390// Shift Left Immediate
11391instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11392  match(Set dst (LShiftL src1 src2));
11393
11394  ins_cost(INSN_COST);
11395  format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11396
11397  ins_encode %{
11398    __ lsl(as_Register($dst$$reg),
11399            as_Register($src1$$reg),
11400            $src2$$constant & 0x3f);
11401  %}
11402
11403  ins_pipe(ialu_reg_shift);
11404%}
11405
11406// Shift Right Logical Register
11407instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11408  match(Set dst (URShiftL src1 src2));
11409
11410  ins_cost(INSN_COST * 2);
11411  format %{ "lsrv  $dst, $src1, $src2" %}
11412
11413  ins_encode %{
11414    __ lsrv(as_Register($dst$$reg),
11415            as_Register($src1$$reg),
11416            as_Register($src2$$reg));
11417  %}
11418
11419  ins_pipe(ialu_reg_reg_vshift);
11420%}
11421
11422// Shift Right Logical Immediate
11423instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11424  match(Set dst (URShiftL src1 src2));
11425
11426  ins_cost(INSN_COST);
11427  format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11428
11429  ins_encode %{
11430    __ lsr(as_Register($dst$$reg),
11431           as_Register($src1$$reg),
11432           $src2$$constant & 0x3f);
11433  %}
11434
11435  ins_pipe(ialu_reg_shift);
11436%}
11437
11438// A special-case pattern for card table stores.
11439instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11440  match(Set dst (URShiftL (CastP2X src1) src2));
11441
11442  ins_cost(INSN_COST);
11443  format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11444
11445  ins_encode %{
11446    __ lsr(as_Register($dst$$reg),
11447           as_Register($src1$$reg),
11448           $src2$$constant & 0x3f);
11449  %}
11450
11451  ins_pipe(ialu_reg_shift);
11452%}
11453
11454// Shift Right Arithmetic Register
11455instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11456  match(Set dst (RShiftL src1 src2));
11457
11458  ins_cost(INSN_COST * 2);
11459  format %{ "asrv  $dst, $src1, $src2" %}
11460
11461  ins_encode %{
11462    __ asrv(as_Register($dst$$reg),
11463            as_Register($src1$$reg),
11464            as_Register($src2$$reg));
11465  %}
11466
11467  ins_pipe(ialu_reg_reg_vshift);
11468%}
11469
11470// Shift Right Arithmetic Immediate
11471instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11472  match(Set dst (RShiftL src1 src2));
11473
11474  ins_cost(INSN_COST);
11475  format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11476
11477  ins_encode %{
11478    __ asr(as_Register($dst$$reg),
11479           as_Register($src1$$reg),
11480           $src2$$constant & 0x3f);
11481  %}
11482
11483  ins_pipe(ialu_reg_shift);
11484%}
11485
11486// BEGIN This section of the file is automatically generated. Do not edit --------------
11487
11488instruct regL_not_reg(iRegLNoSp dst,
11489                         iRegL src1, immL_M1 m1,
11490                         rFlagsReg cr) %{
11491  match(Set dst (XorL src1 m1));
11492  ins_cost(INSN_COST);
11493  format %{ "eon  $dst, $src1, zr" %}
11494
11495  ins_encode %{
11496    __ eon(as_Register($dst$$reg),
11497              as_Register($src1$$reg),
11498              zr,
11499              Assembler::LSL, 0);
11500  %}
11501
11502  ins_pipe(ialu_reg);
11503%}
11504instruct regI_not_reg(iRegINoSp dst,
11505                         iRegIorL2I src1, immI_M1 m1,
11506                         rFlagsReg cr) %{
11507  match(Set dst (XorI src1 m1));
11508  ins_cost(INSN_COST);
11509  format %{ "eonw  $dst, $src1, zr" %}
11510
11511  ins_encode %{
11512    __ eonw(as_Register($dst$$reg),
11513              as_Register($src1$$reg),
11514              zr,
11515              Assembler::LSL, 0);
11516  %}
11517
11518  ins_pipe(ialu_reg);
11519%}
11520
11521instruct AndI_reg_not_reg(iRegINoSp dst,
11522                         iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11523                         rFlagsReg cr) %{
11524  match(Set dst (AndI src1 (XorI src2 m1)));
11525  ins_cost(INSN_COST);
11526  format %{ "bicw  $dst, $src1, $src2" %}
11527
11528  ins_encode %{
11529    __ bicw(as_Register($dst$$reg),
11530              as_Register($src1$$reg),
11531              as_Register($src2$$reg),
11532              Assembler::LSL, 0);
11533  %}
11534
11535  ins_pipe(ialu_reg_reg);
11536%}
11537
11538instruct AndL_reg_not_reg(iRegLNoSp dst,
11539                         iRegL src1, iRegL src2, immL_M1 m1,
11540                         rFlagsReg cr) %{
11541  match(Set dst (AndL src1 (XorL src2 m1)));
11542  ins_cost(INSN_COST);
11543  format %{ "bic  $dst, $src1, $src2" %}
11544
11545  ins_encode %{
11546    __ bic(as_Register($dst$$reg),
11547              as_Register($src1$$reg),
11548              as_Register($src2$$reg),
11549              Assembler::LSL, 0);
11550  %}
11551
11552  ins_pipe(ialu_reg_reg);
11553%}
11554
11555instruct OrI_reg_not_reg(iRegINoSp dst,
11556                         iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11557                         rFlagsReg cr) %{
11558  match(Set dst (OrI src1 (XorI src2 m1)));
11559  ins_cost(INSN_COST);
11560  format %{ "ornw  $dst, $src1, $src2" %}
11561
11562  ins_encode %{
11563    __ ornw(as_Register($dst$$reg),
11564              as_Register($src1$$reg),
11565              as_Register($src2$$reg),
11566              Assembler::LSL, 0);
11567  %}
11568
11569  ins_pipe(ialu_reg_reg);
11570%}
11571
11572instruct OrL_reg_not_reg(iRegLNoSp dst,
11573                         iRegL src1, iRegL src2, immL_M1 m1,
11574                         rFlagsReg cr) %{
11575  match(Set dst (OrL src1 (XorL src2 m1)));
11576  ins_cost(INSN_COST);
11577  format %{ "orn  $dst, $src1, $src2" %}
11578
11579  ins_encode %{
11580    __ orn(as_Register($dst$$reg),
11581              as_Register($src1$$reg),
11582              as_Register($src2$$reg),
11583              Assembler::LSL, 0);
11584  %}
11585
11586  ins_pipe(ialu_reg_reg);
11587%}
11588
11589instruct XorI_reg_not_reg(iRegINoSp dst,
11590                         iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11591                         rFlagsReg cr) %{
11592  match(Set dst (XorI m1 (XorI src2 src1)));
11593  ins_cost(INSN_COST);
11594  format %{ "eonw  $dst, $src1, $src2" %}
11595
11596  ins_encode %{
11597    __ eonw(as_Register($dst$$reg),
11598              as_Register($src1$$reg),
11599              as_Register($src2$$reg),
11600              Assembler::LSL, 0);
11601  %}
11602
11603  ins_pipe(ialu_reg_reg);
11604%}
11605
11606instruct XorL_reg_not_reg(iRegLNoSp dst,
11607                         iRegL src1, iRegL src2, immL_M1 m1,
11608                         rFlagsReg cr) %{
11609  match(Set dst (XorL m1 (XorL src2 src1)));
11610  ins_cost(INSN_COST);
11611  format %{ "eon  $dst, $src1, $src2" %}
11612
11613  ins_encode %{
11614    __ eon(as_Register($dst$$reg),
11615              as_Register($src1$$reg),
11616              as_Register($src2$$reg),
11617              Assembler::LSL, 0);
11618  %}
11619
11620  ins_pipe(ialu_reg_reg);
11621%}
11622
11623instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11624                         iRegIorL2I src1, iRegIorL2I src2,
11625                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11626  match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11627  ins_cost(1.9 * INSN_COST);
11628  format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11629
11630  ins_encode %{
11631    __ bicw(as_Register($dst$$reg),
11632              as_Register($src1$$reg),
11633              as_Register($src2$$reg),
11634              Assembler::LSR,
11635              $src3$$constant & 0x1f);
11636  %}
11637
11638  ins_pipe(ialu_reg_reg_shift);
11639%}
11640
11641instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11642                         iRegL src1, iRegL src2,
11643                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11644  match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11645  ins_cost(1.9 * INSN_COST);
11646  format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11647
11648  ins_encode %{
11649    __ bic(as_Register($dst$$reg),
11650              as_Register($src1$$reg),
11651              as_Register($src2$$reg),
11652              Assembler::LSR,
11653              $src3$$constant & 0x3f);
11654  %}
11655
11656  ins_pipe(ialu_reg_reg_shift);
11657%}
11658
11659instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11660                         iRegIorL2I src1, iRegIorL2I src2,
11661                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11662  match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11663  ins_cost(1.9 * INSN_COST);
11664  format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11665
11666  ins_encode %{
11667    __ bicw(as_Register($dst$$reg),
11668              as_Register($src1$$reg),
11669              as_Register($src2$$reg),
11670              Assembler::ASR,
11671              $src3$$constant & 0x1f);
11672  %}
11673
11674  ins_pipe(ialu_reg_reg_shift);
11675%}
11676
11677instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11678                         iRegL src1, iRegL src2,
11679                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11680  match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11681  ins_cost(1.9 * INSN_COST);
11682  format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11683
11684  ins_encode %{
11685    __ bic(as_Register($dst$$reg),
11686              as_Register($src1$$reg),
11687              as_Register($src2$$reg),
11688              Assembler::ASR,
11689              $src3$$constant & 0x3f);
11690  %}
11691
11692  ins_pipe(ialu_reg_reg_shift);
11693%}
11694
11695instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11696                         iRegIorL2I src1, iRegIorL2I src2,
11697                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11698  match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11699  ins_cost(1.9 * INSN_COST);
11700  format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11701
11702  ins_encode %{
11703    __ bicw(as_Register($dst$$reg),
11704              as_Register($src1$$reg),
11705              as_Register($src2$$reg),
11706              Assembler::LSL,
11707              $src3$$constant & 0x1f);
11708  %}
11709
11710  ins_pipe(ialu_reg_reg_shift);
11711%}
11712
11713instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11714                         iRegL src1, iRegL src2,
11715                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11716  match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11717  ins_cost(1.9 * INSN_COST);
11718  format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11719
11720  ins_encode %{
11721    __ bic(as_Register($dst$$reg),
11722              as_Register($src1$$reg),
11723              as_Register($src2$$reg),
11724              Assembler::LSL,
11725              $src3$$constant & 0x3f);
11726  %}
11727
11728  ins_pipe(ialu_reg_reg_shift);
11729%}
11730
11731instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11732                         iRegIorL2I src1, iRegIorL2I src2,
11733                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11734  match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11735  ins_cost(1.9 * INSN_COST);
11736  format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11737
11738  ins_encode %{
11739    __ eonw(as_Register($dst$$reg),
11740              as_Register($src1$$reg),
11741              as_Register($src2$$reg),
11742              Assembler::LSR,
11743              $src3$$constant & 0x1f);
11744  %}
11745
11746  ins_pipe(ialu_reg_reg_shift);
11747%}
11748
11749instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11750                         iRegL src1, iRegL src2,
11751                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11752  match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11753  ins_cost(1.9 * INSN_COST);
11754  format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11755
11756  ins_encode %{
11757    __ eon(as_Register($dst$$reg),
11758              as_Register($src1$$reg),
11759              as_Register($src2$$reg),
11760              Assembler::LSR,
11761              $src3$$constant & 0x3f);
11762  %}
11763
11764  ins_pipe(ialu_reg_reg_shift);
11765%}
11766
11767instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11768                         iRegIorL2I src1, iRegIorL2I src2,
11769                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11770  match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11771  ins_cost(1.9 * INSN_COST);
11772  format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11773
11774  ins_encode %{
11775    __ eonw(as_Register($dst$$reg),
11776              as_Register($src1$$reg),
11777              as_Register($src2$$reg),
11778              Assembler::ASR,
11779              $src3$$constant & 0x1f);
11780  %}
11781
11782  ins_pipe(ialu_reg_reg_shift);
11783%}
11784
11785instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11786                         iRegL src1, iRegL src2,
11787                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11788  match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11789  ins_cost(1.9 * INSN_COST);
11790  format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11791
11792  ins_encode %{
11793    __ eon(as_Register($dst$$reg),
11794              as_Register($src1$$reg),
11795              as_Register($src2$$reg),
11796              Assembler::ASR,
11797              $src3$$constant & 0x3f);
11798  %}
11799
11800  ins_pipe(ialu_reg_reg_shift);
11801%}
11802
11803instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11804                         iRegIorL2I src1, iRegIorL2I src2,
11805                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11806  match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11807  ins_cost(1.9 * INSN_COST);
11808  format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11809
11810  ins_encode %{
11811    __ eonw(as_Register($dst$$reg),
11812              as_Register($src1$$reg),
11813              as_Register($src2$$reg),
11814              Assembler::LSL,
11815              $src3$$constant & 0x1f);
11816  %}
11817
11818  ins_pipe(ialu_reg_reg_shift);
11819%}
11820
11821instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11822                         iRegL src1, iRegL src2,
11823                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11824  match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11825  ins_cost(1.9 * INSN_COST);
11826  format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11827
11828  ins_encode %{
11829    __ eon(as_Register($dst$$reg),
11830              as_Register($src1$$reg),
11831              as_Register($src2$$reg),
11832              Assembler::LSL,
11833              $src3$$constant & 0x3f);
11834  %}
11835
11836  ins_pipe(ialu_reg_reg_shift);
11837%}
11838
11839instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11840                         iRegIorL2I src1, iRegIorL2I src2,
11841                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11842  match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11843  ins_cost(1.9 * INSN_COST);
11844  format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11845
11846  ins_encode %{
11847    __ ornw(as_Register($dst$$reg),
11848              as_Register($src1$$reg),
11849              as_Register($src2$$reg),
11850              Assembler::LSR,
11851              $src3$$constant & 0x1f);
11852  %}
11853
11854  ins_pipe(ialu_reg_reg_shift);
11855%}
11856
11857instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11858                         iRegL src1, iRegL src2,
11859                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11860  match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11861  ins_cost(1.9 * INSN_COST);
11862  format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11863
11864  ins_encode %{
11865    __ orn(as_Register($dst$$reg),
11866              as_Register($src1$$reg),
11867              as_Register($src2$$reg),
11868              Assembler::LSR,
11869              $src3$$constant & 0x3f);
11870  %}
11871
11872  ins_pipe(ialu_reg_reg_shift);
11873%}
11874
11875instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11876                         iRegIorL2I src1, iRegIorL2I src2,
11877                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11878  match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11879  ins_cost(1.9 * INSN_COST);
11880  format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11881
11882  ins_encode %{
11883    __ ornw(as_Register($dst$$reg),
11884              as_Register($src1$$reg),
11885              as_Register($src2$$reg),
11886              Assembler::ASR,
11887              $src3$$constant & 0x1f);
11888  %}
11889
11890  ins_pipe(ialu_reg_reg_shift);
11891%}
11892
11893instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11894                         iRegL src1, iRegL src2,
11895                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11896  match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11897  ins_cost(1.9 * INSN_COST);
11898  format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11899
11900  ins_encode %{
11901    __ orn(as_Register($dst$$reg),
11902              as_Register($src1$$reg),
11903              as_Register($src2$$reg),
11904              Assembler::ASR,
11905              $src3$$constant & 0x3f);
11906  %}
11907
11908  ins_pipe(ialu_reg_reg_shift);
11909%}
11910
11911instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11912                         iRegIorL2I src1, iRegIorL2I src2,
11913                         immI src3, immI_M1 src4, rFlagsReg cr) %{
11914  match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11915  ins_cost(1.9 * INSN_COST);
11916  format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11917
11918  ins_encode %{
11919    __ ornw(as_Register($dst$$reg),
11920              as_Register($src1$$reg),
11921              as_Register($src2$$reg),
11922              Assembler::LSL,
11923              $src3$$constant & 0x1f);
11924  %}
11925
11926  ins_pipe(ialu_reg_reg_shift);
11927%}
11928
11929instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11930                         iRegL src1, iRegL src2,
11931                         immI src3, immL_M1 src4, rFlagsReg cr) %{
11932  match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11933  ins_cost(1.9 * INSN_COST);
11934  format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11935
11936  ins_encode %{
11937    __ orn(as_Register($dst$$reg),
11938              as_Register($src1$$reg),
11939              as_Register($src2$$reg),
11940              Assembler::LSL,
11941              $src3$$constant & 0x3f);
11942  %}
11943
11944  ins_pipe(ialu_reg_reg_shift);
11945%}
11946
11947instruct AndI_reg_URShift_reg(iRegINoSp dst,
11948                         iRegIorL2I src1, iRegIorL2I src2,
11949                         immI src3, rFlagsReg cr) %{
11950  match(Set dst (AndI src1 (URShiftI src2 src3)));
11951
11952  ins_cost(1.9 * INSN_COST);
11953  format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11954
11955  ins_encode %{
11956    __ andw(as_Register($dst$$reg),
11957              as_Register($src1$$reg),
11958              as_Register($src2$$reg),
11959              Assembler::LSR,
11960              $src3$$constant & 0x1f);
11961  %}
11962
11963  ins_pipe(ialu_reg_reg_shift);
11964%}
11965
11966instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11967                         iRegL src1, iRegL src2,
11968                         immI src3, rFlagsReg cr) %{
11969  match(Set dst (AndL src1 (URShiftL src2 src3)));
11970
11971  ins_cost(1.9 * INSN_COST);
11972  format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11973
11974  ins_encode %{
11975    __ andr(as_Register($dst$$reg),
11976              as_Register($src1$$reg),
11977              as_Register($src2$$reg),
11978              Assembler::LSR,
11979              $src3$$constant & 0x3f);
11980  %}
11981
11982  ins_pipe(ialu_reg_reg_shift);
11983%}
11984
11985instruct AndI_reg_RShift_reg(iRegINoSp dst,
11986                         iRegIorL2I src1, iRegIorL2I src2,
11987                         immI src3, rFlagsReg cr) %{
11988  match(Set dst (AndI src1 (RShiftI src2 src3)));
11989
11990  ins_cost(1.9 * INSN_COST);
11991  format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11992
11993  ins_encode %{
11994    __ andw(as_Register($dst$$reg),
11995              as_Register($src1$$reg),
11996              as_Register($src2$$reg),
11997              Assembler::ASR,
11998              $src3$$constant & 0x1f);
11999  %}
12000
12001  ins_pipe(ialu_reg_reg_shift);
12002%}
12003
12004instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12005                         iRegL src1, iRegL src2,
12006                         immI src3, rFlagsReg cr) %{
12007  match(Set dst (AndL src1 (RShiftL src2 src3)));
12008
12009  ins_cost(1.9 * INSN_COST);
12010  format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12011
12012  ins_encode %{
12013    __ andr(as_Register($dst$$reg),
12014              as_Register($src1$$reg),
12015              as_Register($src2$$reg),
12016              Assembler::ASR,
12017              $src3$$constant & 0x3f);
12018  %}
12019
12020  ins_pipe(ialu_reg_reg_shift);
12021%}
12022
12023instruct AndI_reg_LShift_reg(iRegINoSp dst,
12024                         iRegIorL2I src1, iRegIorL2I src2,
12025                         immI src3, rFlagsReg cr) %{
12026  match(Set dst (AndI src1 (LShiftI src2 src3)));
12027
12028  ins_cost(1.9 * INSN_COST);
12029  format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12030
12031  ins_encode %{
12032    __ andw(as_Register($dst$$reg),
12033              as_Register($src1$$reg),
12034              as_Register($src2$$reg),
12035              Assembler::LSL,
12036              $src3$$constant & 0x1f);
12037  %}
12038
12039  ins_pipe(ialu_reg_reg_shift);
12040%}
12041
12042instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12043                         iRegL src1, iRegL src2,
12044                         immI src3, rFlagsReg cr) %{
12045  match(Set dst (AndL src1 (LShiftL src2 src3)));
12046
12047  ins_cost(1.9 * INSN_COST);
12048  format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12049
12050  ins_encode %{
12051    __ andr(as_Register($dst$$reg),
12052              as_Register($src1$$reg),
12053              as_Register($src2$$reg),
12054              Assembler::LSL,
12055              $src3$$constant & 0x3f);
12056  %}
12057
12058  ins_pipe(ialu_reg_reg_shift);
12059%}
12060
12061instruct XorI_reg_URShift_reg(iRegINoSp dst,
12062                         iRegIorL2I src1, iRegIorL2I src2,
12063                         immI src3, rFlagsReg cr) %{
12064  match(Set dst (XorI src1 (URShiftI src2 src3)));
12065
12066  ins_cost(1.9 * INSN_COST);
12067  format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12068
12069  ins_encode %{
12070    __ eorw(as_Register($dst$$reg),
12071              as_Register($src1$$reg),
12072              as_Register($src2$$reg),
12073              Assembler::LSR,
12074              $src3$$constant & 0x1f);
12075  %}
12076
12077  ins_pipe(ialu_reg_reg_shift);
12078%}
12079
12080instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12081                         iRegL src1, iRegL src2,
12082                         immI src3, rFlagsReg cr) %{
12083  match(Set dst (XorL src1 (URShiftL src2 src3)));
12084
12085  ins_cost(1.9 * INSN_COST);
12086  format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12087
12088  ins_encode %{
12089    __ eor(as_Register($dst$$reg),
12090              as_Register($src1$$reg),
12091              as_Register($src2$$reg),
12092              Assembler::LSR,
12093              $src3$$constant & 0x3f);
12094  %}
12095
12096  ins_pipe(ialu_reg_reg_shift);
12097%}
12098
12099instruct XorI_reg_RShift_reg(iRegINoSp dst,
12100                         iRegIorL2I src1, iRegIorL2I src2,
12101                         immI src3, rFlagsReg cr) %{
12102  match(Set dst (XorI src1 (RShiftI src2 src3)));
12103
12104  ins_cost(1.9 * INSN_COST);
12105  format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12106
12107  ins_encode %{
12108    __ eorw(as_Register($dst$$reg),
12109              as_Register($src1$$reg),
12110              as_Register($src2$$reg),
12111              Assembler::ASR,
12112              $src3$$constant & 0x1f);
12113  %}
12114
12115  ins_pipe(ialu_reg_reg_shift);
12116%}
12117
12118instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12119                         iRegL src1, iRegL src2,
12120                         immI src3, rFlagsReg cr) %{
12121  match(Set dst (XorL src1 (RShiftL src2 src3)));
12122
12123  ins_cost(1.9 * INSN_COST);
12124  format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12125
12126  ins_encode %{
12127    __ eor(as_Register($dst$$reg),
12128              as_Register($src1$$reg),
12129              as_Register($src2$$reg),
12130              Assembler::ASR,
12131              $src3$$constant & 0x3f);
12132  %}
12133
12134  ins_pipe(ialu_reg_reg_shift);
12135%}
12136
12137instruct XorI_reg_LShift_reg(iRegINoSp dst,
12138                         iRegIorL2I src1, iRegIorL2I src2,
12139                         immI src3, rFlagsReg cr) %{
12140  match(Set dst (XorI src1 (LShiftI src2 src3)));
12141
12142  ins_cost(1.9 * INSN_COST);
12143  format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12144
12145  ins_encode %{
12146    __ eorw(as_Register($dst$$reg),
12147              as_Register($src1$$reg),
12148              as_Register($src2$$reg),
12149              Assembler::LSL,
12150              $src3$$constant & 0x1f);
12151  %}
12152
12153  ins_pipe(ialu_reg_reg_shift);
12154%}
12155
12156instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12157                         iRegL src1, iRegL src2,
12158                         immI src3, rFlagsReg cr) %{
12159  match(Set dst (XorL src1 (LShiftL src2 src3)));
12160
12161  ins_cost(1.9 * INSN_COST);
12162  format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12163
12164  ins_encode %{
12165    __ eor(as_Register($dst$$reg),
12166              as_Register($src1$$reg),
12167              as_Register($src2$$reg),
12168              Assembler::LSL,
12169              $src3$$constant & 0x3f);
12170  %}
12171
12172  ins_pipe(ialu_reg_reg_shift);
12173%}
12174
12175instruct OrI_reg_URShift_reg(iRegINoSp dst,
12176                         iRegIorL2I src1, iRegIorL2I src2,
12177                         immI src3, rFlagsReg cr) %{
12178  match(Set dst (OrI src1 (URShiftI src2 src3)));
12179
12180  ins_cost(1.9 * INSN_COST);
12181  format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12182
12183  ins_encode %{
12184    __ orrw(as_Register($dst$$reg),
12185              as_Register($src1$$reg),
12186              as_Register($src2$$reg),
12187              Assembler::LSR,
12188              $src3$$constant & 0x1f);
12189  %}
12190
12191  ins_pipe(ialu_reg_reg_shift);
12192%}
12193
12194instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12195                         iRegL src1, iRegL src2,
12196                         immI src3, rFlagsReg cr) %{
12197  match(Set dst (OrL src1 (URShiftL src2 src3)));
12198
12199  ins_cost(1.9 * INSN_COST);
12200  format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12201
12202  ins_encode %{
12203    __ orr(as_Register($dst$$reg),
12204              as_Register($src1$$reg),
12205              as_Register($src2$$reg),
12206              Assembler::LSR,
12207              $src3$$constant & 0x3f);
12208  %}
12209
12210  ins_pipe(ialu_reg_reg_shift);
12211%}
12212
12213instruct OrI_reg_RShift_reg(iRegINoSp dst,
12214                         iRegIorL2I src1, iRegIorL2I src2,
12215                         immI src3, rFlagsReg cr) %{
12216  match(Set dst (OrI src1 (RShiftI src2 src3)));
12217
12218  ins_cost(1.9 * INSN_COST);
12219  format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12220
12221  ins_encode %{
12222    __ orrw(as_Register($dst$$reg),
12223              as_Register($src1$$reg),
12224              as_Register($src2$$reg),
12225              Assembler::ASR,
12226              $src3$$constant & 0x1f);
12227  %}
12228
12229  ins_pipe(ialu_reg_reg_shift);
12230%}
12231
12232instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12233                         iRegL src1, iRegL src2,
12234                         immI src3, rFlagsReg cr) %{
12235  match(Set dst (OrL src1 (RShiftL src2 src3)));
12236
12237  ins_cost(1.9 * INSN_COST);
12238  format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12239
12240  ins_encode %{
12241    __ orr(as_Register($dst$$reg),
12242              as_Register($src1$$reg),
12243              as_Register($src2$$reg),
12244              Assembler::ASR,
12245              $src3$$constant & 0x3f);
12246  %}
12247
12248  ins_pipe(ialu_reg_reg_shift);
12249%}
12250
12251instruct OrI_reg_LShift_reg(iRegINoSp dst,
12252                         iRegIorL2I src1, iRegIorL2I src2,
12253                         immI src3, rFlagsReg cr) %{
12254  match(Set dst (OrI src1 (LShiftI src2 src3)));
12255
12256  ins_cost(1.9 * INSN_COST);
12257  format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12258
12259  ins_encode %{
12260    __ orrw(as_Register($dst$$reg),
12261              as_Register($src1$$reg),
12262              as_Register($src2$$reg),
12263              Assembler::LSL,
12264              $src3$$constant & 0x1f);
12265  %}
12266
12267  ins_pipe(ialu_reg_reg_shift);
12268%}
12269
12270instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12271                         iRegL src1, iRegL src2,
12272                         immI src3, rFlagsReg cr) %{
12273  match(Set dst (OrL src1 (LShiftL src2 src3)));
12274
12275  ins_cost(1.9 * INSN_COST);
12276  format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12277
12278  ins_encode %{
12279    __ orr(as_Register($dst$$reg),
12280              as_Register($src1$$reg),
12281              as_Register($src2$$reg),
12282              Assembler::LSL,
12283              $src3$$constant & 0x3f);
12284  %}
12285
12286  ins_pipe(ialu_reg_reg_shift);
12287%}
12288
12289instruct AddI_reg_URShift_reg(iRegINoSp dst,
12290                         iRegIorL2I src1, iRegIorL2I src2,
12291                         immI src3, rFlagsReg cr) %{
12292  match(Set dst (AddI src1 (URShiftI src2 src3)));
12293
12294  ins_cost(1.9 * INSN_COST);
12295  format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12296
12297  ins_encode %{
12298    __ addw(as_Register($dst$$reg),
12299              as_Register($src1$$reg),
12300              as_Register($src2$$reg),
12301              Assembler::LSR,
12302              $src3$$constant & 0x1f);
12303  %}
12304
12305  ins_pipe(ialu_reg_reg_shift);
12306%}
12307
12308instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12309                         iRegL src1, iRegL src2,
12310                         immI src3, rFlagsReg cr) %{
12311  match(Set dst (AddL src1 (URShiftL src2 src3)));
12312
12313  ins_cost(1.9 * INSN_COST);
12314  format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12315
12316  ins_encode %{
12317    __ add(as_Register($dst$$reg),
12318              as_Register($src1$$reg),
12319              as_Register($src2$$reg),
12320              Assembler::LSR,
12321              $src3$$constant & 0x3f);
12322  %}
12323
12324  ins_pipe(ialu_reg_reg_shift);
12325%}
12326
12327instruct AddI_reg_RShift_reg(iRegINoSp dst,
12328                         iRegIorL2I src1, iRegIorL2I src2,
12329                         immI src3, rFlagsReg cr) %{
12330  match(Set dst (AddI src1 (RShiftI src2 src3)));
12331
12332  ins_cost(1.9 * INSN_COST);
12333  format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12334
12335  ins_encode %{
12336    __ addw(as_Register($dst$$reg),
12337              as_Register($src1$$reg),
12338              as_Register($src2$$reg),
12339              Assembler::ASR,
12340              $src3$$constant & 0x1f);
12341  %}
12342
12343  ins_pipe(ialu_reg_reg_shift);
12344%}
12345
12346instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12347                         iRegL src1, iRegL src2,
12348                         immI src3, rFlagsReg cr) %{
12349  match(Set dst (AddL src1 (RShiftL src2 src3)));
12350
12351  ins_cost(1.9 * INSN_COST);
12352  format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12353
12354  ins_encode %{
12355    __ add(as_Register($dst$$reg),
12356              as_Register($src1$$reg),
12357              as_Register($src2$$reg),
12358              Assembler::ASR,
12359              $src3$$constant & 0x3f);
12360  %}
12361
12362  ins_pipe(ialu_reg_reg_shift);
12363%}
12364
12365instruct AddI_reg_LShift_reg(iRegINoSp dst,
12366                         iRegIorL2I src1, iRegIorL2I src2,
12367                         immI src3, rFlagsReg cr) %{
12368  match(Set dst (AddI src1 (LShiftI src2 src3)));
12369
12370  ins_cost(1.9 * INSN_COST);
12371  format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12372
12373  ins_encode %{
12374    __ addw(as_Register($dst$$reg),
12375              as_Register($src1$$reg),
12376              as_Register($src2$$reg),
12377              Assembler::LSL,
12378              $src3$$constant & 0x1f);
12379  %}
12380
12381  ins_pipe(ialu_reg_reg_shift);
12382%}
12383
12384instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12385                         iRegL src1, iRegL src2,
12386                         immI src3, rFlagsReg cr) %{
12387  match(Set dst (AddL src1 (LShiftL src2 src3)));
12388
12389  ins_cost(1.9 * INSN_COST);
12390  format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12391
12392  ins_encode %{
12393    __ add(as_Register($dst$$reg),
12394              as_Register($src1$$reg),
12395              as_Register($src2$$reg),
12396              Assembler::LSL,
12397              $src3$$constant & 0x3f);
12398  %}
12399
12400  ins_pipe(ialu_reg_reg_shift);
12401%}
12402
12403instruct SubI_reg_URShift_reg(iRegINoSp dst,
12404                         iRegIorL2I src1, iRegIorL2I src2,
12405                         immI src3, rFlagsReg cr) %{
12406  match(Set dst (SubI src1 (URShiftI src2 src3)));
12407
12408  ins_cost(1.9 * INSN_COST);
12409  format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12410
12411  ins_encode %{
12412    __ subw(as_Register($dst$$reg),
12413              as_Register($src1$$reg),
12414              as_Register($src2$$reg),
12415              Assembler::LSR,
12416              $src3$$constant & 0x1f);
12417  %}
12418
12419  ins_pipe(ialu_reg_reg_shift);
12420%}
12421
12422instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12423                         iRegL src1, iRegL src2,
12424                         immI src3, rFlagsReg cr) %{
12425  match(Set dst (SubL src1 (URShiftL src2 src3)));
12426
12427  ins_cost(1.9 * INSN_COST);
12428  format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12429
12430  ins_encode %{
12431    __ sub(as_Register($dst$$reg),
12432              as_Register($src1$$reg),
12433              as_Register($src2$$reg),
12434              Assembler::LSR,
12435              $src3$$constant & 0x3f);
12436  %}
12437
12438  ins_pipe(ialu_reg_reg_shift);
12439%}
12440
12441instruct SubI_reg_RShift_reg(iRegINoSp dst,
12442                         iRegIorL2I src1, iRegIorL2I src2,
12443                         immI src3, rFlagsReg cr) %{
12444  match(Set dst (SubI src1 (RShiftI src2 src3)));
12445
12446  ins_cost(1.9 * INSN_COST);
12447  format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12448
12449  ins_encode %{
12450    __ subw(as_Register($dst$$reg),
12451              as_Register($src1$$reg),
12452              as_Register($src2$$reg),
12453              Assembler::ASR,
12454              $src3$$constant & 0x1f);
12455  %}
12456
12457  ins_pipe(ialu_reg_reg_shift);
12458%}
12459
12460instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12461                         iRegL src1, iRegL src2,
12462                         immI src3, rFlagsReg cr) %{
12463  match(Set dst (SubL src1 (RShiftL src2 src3)));
12464
12465  ins_cost(1.9 * INSN_COST);
12466  format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12467
12468  ins_encode %{
12469    __ sub(as_Register($dst$$reg),
12470              as_Register($src1$$reg),
12471              as_Register($src2$$reg),
12472              Assembler::ASR,
12473              $src3$$constant & 0x3f);
12474  %}
12475
12476  ins_pipe(ialu_reg_reg_shift);
12477%}
12478
12479instruct SubI_reg_LShift_reg(iRegINoSp dst,
12480                         iRegIorL2I src1, iRegIorL2I src2,
12481                         immI src3, rFlagsReg cr) %{
12482  match(Set dst (SubI src1 (LShiftI src2 src3)));
12483
12484  ins_cost(1.9 * INSN_COST);
12485  format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12486
12487  ins_encode %{
12488    __ subw(as_Register($dst$$reg),
12489              as_Register($src1$$reg),
12490              as_Register($src2$$reg),
12491              Assembler::LSL,
12492              $src3$$constant & 0x1f);
12493  %}
12494
12495  ins_pipe(ialu_reg_reg_shift);
12496%}
12497
12498instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12499                         iRegL src1, iRegL src2,
12500                         immI src3, rFlagsReg cr) %{
12501  match(Set dst (SubL src1 (LShiftL src2 src3)));
12502
12503  ins_cost(1.9 * INSN_COST);
12504  format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12505
12506  ins_encode %{
12507    __ sub(as_Register($dst$$reg),
12508              as_Register($src1$$reg),
12509              as_Register($src2$$reg),
12510              Assembler::LSL,
12511              $src3$$constant & 0x3f);
12512  %}
12513
12514  ins_pipe(ialu_reg_reg_shift);
12515%}
12516
12517
12518
12519// Shift Left followed by Shift Right.
12520// This idiom is used by the compiler for the i2b bytecode etc.
12521instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12522%{
12523  match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12524  // Make sure we are not going to exceed what sbfm can do.
12525  predicate((unsigned int)n->in(2)->get_int() <= 63
12526            && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12527
12528  ins_cost(INSN_COST * 2);
12529  format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12530  ins_encode %{
12531    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12532    int s = 63 - lshift;
12533    int r = (rshift - lshift) & 63;
12534    __ sbfm(as_Register($dst$$reg),
12535            as_Register($src$$reg),
12536            r, s);
12537  %}
12538
12539  ins_pipe(ialu_reg_shift);
12540%}
12541
12542// Shift Left followed by Shift Right.
12543// This idiom is used by the compiler for the i2b bytecode etc.
12544instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12545%{
12546  match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12547  // Make sure we are not going to exceed what sbfmw can do.
12548  predicate((unsigned int)n->in(2)->get_int() <= 31
12549            && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12550
12551  ins_cost(INSN_COST * 2);
12552  format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12553  ins_encode %{
12554    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12555    int s = 31 - lshift;
12556    int r = (rshift - lshift) & 31;
12557    __ sbfmw(as_Register($dst$$reg),
12558            as_Register($src$$reg),
12559            r, s);
12560  %}
12561
12562  ins_pipe(ialu_reg_shift);
12563%}
12564
12565// Shift Left followed by Shift Right.
12566// This idiom is used by the compiler for the i2b bytecode etc.
12567instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12568%{
12569  match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12570  // Make sure we are not going to exceed what ubfm can do.
12571  predicate((unsigned int)n->in(2)->get_int() <= 63
12572            && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12573
12574  ins_cost(INSN_COST * 2);
12575  format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12576  ins_encode %{
12577    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12578    int s = 63 - lshift;
12579    int r = (rshift - lshift) & 63;
12580    __ ubfm(as_Register($dst$$reg),
12581            as_Register($src$$reg),
12582            r, s);
12583  %}
12584
12585  ins_pipe(ialu_reg_shift);
12586%}
12587
12588// Shift Left followed by Shift Right.
12589// This idiom is used by the compiler for the i2b bytecode etc.
12590instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12591%{
12592  match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12593  // Make sure we are not going to exceed what ubfmw can do.
12594  predicate((unsigned int)n->in(2)->get_int() <= 31
12595            && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12596
12597  ins_cost(INSN_COST * 2);
12598  format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12599  ins_encode %{
12600    int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12601    int s = 31 - lshift;
12602    int r = (rshift - lshift) & 31;
12603    __ ubfmw(as_Register($dst$$reg),
12604            as_Register($src$$reg),
12605            r, s);
12606  %}
12607
12608  ins_pipe(ialu_reg_shift);
12609%}
12610// Bitfield extract with shift & mask
12611
12612instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12613%{
12614  match(Set dst (AndI (URShiftI src rshift) mask));
12615
12616  ins_cost(INSN_COST);
12617  format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12618  ins_encode %{
12619    int rshift = $rshift$$constant;
12620    long mask = $mask$$constant;
12621    int width = exact_log2(mask+1);
12622    __ ubfxw(as_Register($dst$$reg),
12623            as_Register($src$$reg), rshift, width);
12624  %}
12625  ins_pipe(ialu_reg_shift);
12626%}
12627instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12628%{
12629  match(Set dst (AndL (URShiftL src rshift) mask));
12630
12631  ins_cost(INSN_COST);
12632  format %{ "ubfx $dst, $src, $rshift, $mask" %}
12633  ins_encode %{
12634    int rshift = $rshift$$constant;
12635    long mask = $mask$$constant;
12636    int width = exact_log2(mask+1);
12637    __ ubfx(as_Register($dst$$reg),
12638            as_Register($src$$reg), rshift, width);
12639  %}
12640  ins_pipe(ialu_reg_shift);
12641%}
12642
12643// We can use ubfx when extending an And with a mask when we know mask
12644// is positive.  We know that because immI_bitmask guarantees it.
12645instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12646%{
12647  match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12648
12649  ins_cost(INSN_COST * 2);
12650  format %{ "ubfx $dst, $src, $rshift, $mask" %}
12651  ins_encode %{
12652    int rshift = $rshift$$constant;
12653    long mask = $mask$$constant;
12654    int width = exact_log2(mask+1);
12655    __ ubfx(as_Register($dst$$reg),
12656            as_Register($src$$reg), rshift, width);
12657  %}
12658  ins_pipe(ialu_reg_shift);
12659%}
12660
12661// We can use ubfiz when masking by a positive number and then left shifting the result.
12662// We know that the mask is positive because immI_bitmask guarantees it.
12663instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12664%{
12665  match(Set dst (LShiftI (AndI src mask) lshift));
12666  predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12667    (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12668
12669  ins_cost(INSN_COST);
12670  format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12671  ins_encode %{
12672    int lshift = $lshift$$constant;
12673    long mask = $mask$$constant;
12674    int width = exact_log2(mask+1);
12675    __ ubfizw(as_Register($dst$$reg),
12676          as_Register($src$$reg), lshift, width);
12677  %}
12678  ins_pipe(ialu_reg_shift);
12679%}
12680// We can use ubfiz when masking by a positive number and then left shifting the result.
12681// We know that the mask is positive because immL_bitmask guarantees it.
12682instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12683%{
12684  match(Set dst (LShiftL (AndL src mask) lshift));
12685  predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12686    (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12687
12688  ins_cost(INSN_COST);
12689  format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12690  ins_encode %{
12691    int lshift = $lshift$$constant;
12692    long mask = $mask$$constant;
12693    int width = exact_log2(mask+1);
12694    __ ubfiz(as_Register($dst$$reg),
12695          as_Register($src$$reg), lshift, width);
12696  %}
12697  ins_pipe(ialu_reg_shift);
12698%}
12699
12700// If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12701instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12702%{
12703  match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12704  predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12705    (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12706
12707  ins_cost(INSN_COST);
12708  format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12709  ins_encode %{
12710    int lshift = $lshift$$constant;
12711    long mask = $mask$$constant;
12712    int width = exact_log2(mask+1);
12713    __ ubfiz(as_Register($dst$$reg),
12714             as_Register($src$$reg), lshift, width);
12715  %}
12716  ins_pipe(ialu_reg_shift);
12717%}
12718
12719// Rotations
12720
12721instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12722%{
12723  match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12724  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12725
12726  ins_cost(INSN_COST);
12727  format %{ "extr $dst, $src1, $src2, #$rshift" %}
12728
12729  ins_encode %{
12730    __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12731            $rshift$$constant & 63);
12732  %}
12733  ins_pipe(ialu_reg_reg_extr);
12734%}
12735
12736instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12737%{
12738  match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12739  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12740
12741  ins_cost(INSN_COST);
12742  format %{ "extr $dst, $src1, $src2, #$rshift" %}
12743
12744  ins_encode %{
12745    __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12746            $rshift$$constant & 31);
12747  %}
12748  ins_pipe(ialu_reg_reg_extr);
12749%}
12750
12751instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12752%{
12753  match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12754  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12755
12756  ins_cost(INSN_COST);
12757  format %{ "extr $dst, $src1, $src2, #$rshift" %}
12758
12759  ins_encode %{
12760    __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12761            $rshift$$constant & 63);
12762  %}
12763  ins_pipe(ialu_reg_reg_extr);
12764%}
12765
12766instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12767%{
12768  match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12769  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12770
12771  ins_cost(INSN_COST);
12772  format %{ "extr $dst, $src1, $src2, #$rshift" %}
12773
12774  ins_encode %{
12775    __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12776            $rshift$$constant & 31);
12777  %}
12778  ins_pipe(ialu_reg_reg_extr);
12779%}
12780
12781
12782// rol expander
12783
12784instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12785%{
12786  effect(DEF dst, USE src, USE shift);
12787
12788  format %{ "rol    $dst, $src, $shift" %}
12789  ins_cost(INSN_COST * 3);
12790  ins_encode %{
12791    __ subw(rscratch1, zr, as_Register($shift$$reg));
12792    __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12793            rscratch1);
12794    %}
12795  ins_pipe(ialu_reg_reg_vshift);
12796%}
12797
12798// rol expander
12799
12800instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12801%{
12802  effect(DEF dst, USE src, USE shift);
12803
12804  format %{ "rol    $dst, $src, $shift" %}
12805  ins_cost(INSN_COST * 3);
12806  ins_encode %{
12807    __ subw(rscratch1, zr, as_Register($shift$$reg));
12808    __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12809            rscratch1);
12810    %}
12811  ins_pipe(ialu_reg_reg_vshift);
12812%}
12813
12814instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12815%{
12816  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12817
12818  expand %{
12819    rolL_rReg(dst, src, shift, cr);
12820  %}
12821%}
12822
12823instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12824%{
12825  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12826
12827  expand %{
12828    rolL_rReg(dst, src, shift, cr);
12829  %}
12830%}
12831
12832instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12833%{
12834  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12835
12836  expand %{
12837    rolI_rReg(dst, src, shift, cr);
12838  %}
12839%}
12840
12841instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12842%{
12843  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12844
12845  expand %{
12846    rolI_rReg(dst, src, shift, cr);
12847  %}
12848%}
12849
12850// ror expander
12851
12852instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12853%{
12854  effect(DEF dst, USE src, USE shift);
12855
12856  format %{ "ror    $dst, $src, $shift" %}
12857  ins_cost(INSN_COST);
12858  ins_encode %{
12859    __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12860            as_Register($shift$$reg));
12861    %}
12862  ins_pipe(ialu_reg_reg_vshift);
12863%}
12864
12865// ror expander
12866
12867instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12868%{
12869  effect(DEF dst, USE src, USE shift);
12870
12871  format %{ "ror    $dst, $src, $shift" %}
12872  ins_cost(INSN_COST);
12873  ins_encode %{
12874    __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12875            as_Register($shift$$reg));
12876    %}
12877  ins_pipe(ialu_reg_reg_vshift);
12878%}
12879
12880instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12881%{
12882  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12883
12884  expand %{
12885    rorL_rReg(dst, src, shift, cr);
12886  %}
12887%}
12888
12889instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12890%{
12891  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12892
12893  expand %{
12894    rorL_rReg(dst, src, shift, cr);
12895  %}
12896%}
12897
12898instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12899%{
12900  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12901
12902  expand %{
12903    rorI_rReg(dst, src, shift, cr);
12904  %}
12905%}
12906
12907instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12908%{
12909  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12910
12911  expand %{
12912    rorI_rReg(dst, src, shift, cr);
12913  %}
12914%}
12915
12916// Add/subtract (extended)
12917
12918instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12919%{
12920  match(Set dst (AddL src1 (ConvI2L src2)));
12921  ins_cost(INSN_COST);
12922  format %{ "add  $dst, $src1, $src2, sxtw" %}
12923
12924   ins_encode %{
12925     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12926            as_Register($src2$$reg), ext::sxtw);
12927   %}
12928  ins_pipe(ialu_reg_reg);
12929%};
12930
12931instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12932%{
12933  match(Set dst (SubL src1 (ConvI2L src2)));
12934  ins_cost(INSN_COST);
12935  format %{ "sub  $dst, $src1, $src2, sxtw" %}
12936
12937   ins_encode %{
12938     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12939            as_Register($src2$$reg), ext::sxtw);
12940   %}
12941  ins_pipe(ialu_reg_reg);
12942%};
12943
12944
12945instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12946%{
12947  match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12948  ins_cost(INSN_COST);
12949  format %{ "add  $dst, $src1, $src2, sxth" %}
12950
12951   ins_encode %{
12952     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12953            as_Register($src2$$reg), ext::sxth);
12954   %}
12955  ins_pipe(ialu_reg_reg);
12956%}
12957
12958instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12959%{
12960  match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12961  ins_cost(INSN_COST);
12962  format %{ "add  $dst, $src1, $src2, sxtb" %}
12963
12964   ins_encode %{
12965     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12966            as_Register($src2$$reg), ext::sxtb);
12967   %}
12968  ins_pipe(ialu_reg_reg);
12969%}
12970
12971instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12972%{
12973  match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12974  ins_cost(INSN_COST);
12975  format %{ "add  $dst, $src1, $src2, uxtb" %}
12976
12977   ins_encode %{
12978     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12979            as_Register($src2$$reg), ext::uxtb);
12980   %}
12981  ins_pipe(ialu_reg_reg);
12982%}
12983
12984instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12985%{
12986  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12987  ins_cost(INSN_COST);
12988  format %{ "add  $dst, $src1, $src2, sxth" %}
12989
12990   ins_encode %{
12991     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12992            as_Register($src2$$reg), ext::sxth);
12993   %}
12994  ins_pipe(ialu_reg_reg);
12995%}
12996
12997instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12998%{
12999  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13000  ins_cost(INSN_COST);
13001  format %{ "add  $dst, $src1, $src2, sxtw" %}
13002
13003   ins_encode %{
13004     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13005            as_Register($src2$$reg), ext::sxtw);
13006   %}
13007  ins_pipe(ialu_reg_reg);
13008%}
13009
13010instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13011%{
13012  match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13013  ins_cost(INSN_COST);
13014  format %{ "add  $dst, $src1, $src2, sxtb" %}
13015
13016   ins_encode %{
13017     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13018            as_Register($src2$$reg), ext::sxtb);
13019   %}
13020  ins_pipe(ialu_reg_reg);
13021%}
13022
13023instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13024%{
13025  match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13026  ins_cost(INSN_COST);
13027  format %{ "add  $dst, $src1, $src2, uxtb" %}
13028
13029   ins_encode %{
13030     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13031            as_Register($src2$$reg), ext::uxtb);
13032   %}
13033  ins_pipe(ialu_reg_reg);
13034%}
13035
13036
13037instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13038%{
13039  match(Set dst (AddI src1 (AndI src2 mask)));
13040  ins_cost(INSN_COST);
13041  format %{ "addw  $dst, $src1, $src2, uxtb" %}
13042
13043   ins_encode %{
13044     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13045            as_Register($src2$$reg), ext::uxtb);
13046   %}
13047  ins_pipe(ialu_reg_reg);
13048%}
13049
13050instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13051%{
13052  match(Set dst (AddI src1 (AndI src2 mask)));
13053  ins_cost(INSN_COST);
13054  format %{ "addw  $dst, $src1, $src2, uxth" %}
13055
13056   ins_encode %{
13057     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13058            as_Register($src2$$reg), ext::uxth);
13059   %}
13060  ins_pipe(ialu_reg_reg);
13061%}
13062
13063instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13064%{
13065  match(Set dst (AddL src1 (AndL src2 mask)));
13066  ins_cost(INSN_COST);
13067  format %{ "add  $dst, $src1, $src2, uxtb" %}
13068
13069   ins_encode %{
13070     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13071            as_Register($src2$$reg), ext::uxtb);
13072   %}
13073  ins_pipe(ialu_reg_reg);
13074%}
13075
13076instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13077%{
13078  match(Set dst (AddL src1 (AndL src2 mask)));
13079  ins_cost(INSN_COST);
13080  format %{ "add  $dst, $src1, $src2, uxth" %}
13081
13082   ins_encode %{
13083     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13084            as_Register($src2$$reg), ext::uxth);
13085   %}
13086  ins_pipe(ialu_reg_reg);
13087%}
13088
13089instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13090%{
13091  match(Set dst (AddL src1 (AndL src2 mask)));
13092  ins_cost(INSN_COST);
13093  format %{ "add  $dst, $src1, $src2, uxtw" %}
13094
13095   ins_encode %{
13096     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13097            as_Register($src2$$reg), ext::uxtw);
13098   %}
13099  ins_pipe(ialu_reg_reg);
13100%}
13101
13102instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13103%{
13104  match(Set dst (SubI src1 (AndI src2 mask)));
13105  ins_cost(INSN_COST);
13106  format %{ "subw  $dst, $src1, $src2, uxtb" %}
13107
13108   ins_encode %{
13109     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13110            as_Register($src2$$reg), ext::uxtb);
13111   %}
13112  ins_pipe(ialu_reg_reg);
13113%}
13114
13115instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13116%{
13117  match(Set dst (SubI src1 (AndI src2 mask)));
13118  ins_cost(INSN_COST);
13119  format %{ "subw  $dst, $src1, $src2, uxth" %}
13120
13121   ins_encode %{
13122     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13123            as_Register($src2$$reg), ext::uxth);
13124   %}
13125  ins_pipe(ialu_reg_reg);
13126%}
13127
13128instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13129%{
13130  match(Set dst (SubL src1 (AndL src2 mask)));
13131  ins_cost(INSN_COST);
13132  format %{ "sub  $dst, $src1, $src2, uxtb" %}
13133
13134   ins_encode %{
13135     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13136            as_Register($src2$$reg), ext::uxtb);
13137   %}
13138  ins_pipe(ialu_reg_reg);
13139%}
13140
13141instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13142%{
13143  match(Set dst (SubL src1 (AndL src2 mask)));
13144  ins_cost(INSN_COST);
13145  format %{ "sub  $dst, $src1, $src2, uxth" %}
13146
13147   ins_encode %{
13148     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13149            as_Register($src2$$reg), ext::uxth);
13150   %}
13151  ins_pipe(ialu_reg_reg);
13152%}
13153
13154instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13155%{
13156  match(Set dst (SubL src1 (AndL src2 mask)));
13157  ins_cost(INSN_COST);
13158  format %{ "sub  $dst, $src1, $src2, uxtw" %}
13159
13160   ins_encode %{
13161     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13162            as_Register($src2$$reg), ext::uxtw);
13163   %}
13164  ins_pipe(ialu_reg_reg);
13165%}
13166
13167
13168instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13169%{
13170  match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13171  ins_cost(1.9 * INSN_COST);
13172  format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13173
13174   ins_encode %{
13175     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13176            as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13177   %}
13178  ins_pipe(ialu_reg_reg_shift);
13179%}
13180
13181instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13182%{
13183  match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13184  ins_cost(1.9 * INSN_COST);
13185  format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13186
13187   ins_encode %{
13188     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13189            as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13190   %}
13191  ins_pipe(ialu_reg_reg_shift);
13192%}
13193
13194instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13195%{
13196  match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13197  ins_cost(1.9 * INSN_COST);
13198  format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13199
13200   ins_encode %{
13201     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13202            as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13203   %}
13204  ins_pipe(ialu_reg_reg_shift);
13205%}
13206
13207instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13208%{
13209  match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13210  ins_cost(1.9 * INSN_COST);
13211  format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13212
13213   ins_encode %{
13214     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13215            as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13216   %}
13217  ins_pipe(ialu_reg_reg_shift);
13218%}
13219
13220instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13221%{
13222  match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13223  ins_cost(1.9 * INSN_COST);
13224  format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13225
13226   ins_encode %{
13227     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13228            as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13229   %}
13230  ins_pipe(ialu_reg_reg_shift);
13231%}
13232
13233instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13234%{
13235  match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13236  ins_cost(1.9 * INSN_COST);
13237  format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13238
13239   ins_encode %{
13240     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13241            as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13242   %}
13243  ins_pipe(ialu_reg_reg_shift);
13244%}
13245
13246instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13247%{
13248  match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13249  ins_cost(1.9 * INSN_COST);
13250  format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13251
13252   ins_encode %{
13253     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13254            as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13255   %}
13256  ins_pipe(ialu_reg_reg_shift);
13257%}
13258
13259instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13260%{
13261  match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13262  ins_cost(1.9 * INSN_COST);
13263  format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13264
13265   ins_encode %{
13266     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13267            as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13268   %}
13269  ins_pipe(ialu_reg_reg_shift);
13270%}
13271
13272instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13273%{
13274  match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13275  ins_cost(1.9 * INSN_COST);
13276  format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13277
13278   ins_encode %{
13279     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13280            as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13281   %}
13282  ins_pipe(ialu_reg_reg_shift);
13283%}
13284
13285instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13286%{
13287  match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13288  ins_cost(1.9 * INSN_COST);
13289  format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13290
13291   ins_encode %{
13292     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13293            as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13294   %}
13295  ins_pipe(ialu_reg_reg_shift);
13296%}
13297
13298
13299instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13300%{
13301  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13302  ins_cost(1.9 * INSN_COST);
13303  format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13304
13305   ins_encode %{
13306     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13307            as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13308   %}
13309  ins_pipe(ialu_reg_reg_shift);
13310%};
13311
13312instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13313%{
13314  match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13315  ins_cost(1.9 * INSN_COST);
13316  format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13317
13318   ins_encode %{
13319     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13320            as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13321   %}
13322  ins_pipe(ialu_reg_reg_shift);
13323%};
13324
13325
13326instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13327%{
13328  match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13329  ins_cost(1.9 * INSN_COST);
13330  format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13331
13332   ins_encode %{
13333     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13334            as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13335   %}
13336  ins_pipe(ialu_reg_reg_shift);
13337%}
13338
13339instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13340%{
13341  match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13342  ins_cost(1.9 * INSN_COST);
13343  format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13344
13345   ins_encode %{
13346     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13347            as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13348   %}
13349  ins_pipe(ialu_reg_reg_shift);
13350%}
13351
13352instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13353%{
13354  match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13355  ins_cost(1.9 * INSN_COST);
13356  format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13357
13358   ins_encode %{
13359     __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13360            as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13361   %}
13362  ins_pipe(ialu_reg_reg_shift);
13363%}
13364
13365instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13366%{
13367  match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13368  ins_cost(1.9 * INSN_COST);
13369  format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13370
13371   ins_encode %{
13372     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13373            as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13374   %}
13375  ins_pipe(ialu_reg_reg_shift);
13376%}
13377
13378instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13379%{
13380  match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13381  ins_cost(1.9 * INSN_COST);
13382  format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13383
13384   ins_encode %{
13385     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13386            as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13387   %}
13388  ins_pipe(ialu_reg_reg_shift);
13389%}
13390
13391instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13392%{
13393  match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13394  ins_cost(1.9 * INSN_COST);
13395  format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13396
13397   ins_encode %{
13398     __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13399            as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13400   %}
13401  ins_pipe(ialu_reg_reg_shift);
13402%}
13403
13404instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13405%{
13406  match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13407  ins_cost(1.9 * INSN_COST);
13408  format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13409
13410   ins_encode %{
13411     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13412            as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13413   %}
13414  ins_pipe(ialu_reg_reg_shift);
13415%}
13416
13417instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13418%{
13419  match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13420  ins_cost(1.9 * INSN_COST);
13421  format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13422
13423   ins_encode %{
13424     __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13425            as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13426   %}
13427  ins_pipe(ialu_reg_reg_shift);
13428%}
13429
13430instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13431%{
13432  match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13433  ins_cost(1.9 * INSN_COST);
13434  format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13435
13436   ins_encode %{
13437     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13438            as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13439   %}
13440  ins_pipe(ialu_reg_reg_shift);
13441%}
13442
13443instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13444%{
13445  match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13446  ins_cost(1.9 * INSN_COST);
13447  format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13448
13449   ins_encode %{
13450     __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13451            as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13452   %}
13453  ins_pipe(ialu_reg_reg_shift);
13454%}
13455// END This section of the file is automatically generated. Do not edit --------------
13456
13457// ============================================================================
13458// Floating Point Arithmetic Instructions
13459
13460instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13461  match(Set dst (AddF src1 src2));
13462
13463  ins_cost(INSN_COST * 5);
13464  format %{ "fadds   $dst, $src1, $src2" %}
13465
13466  ins_encode %{
13467    __ fadds(as_FloatRegister($dst$$reg),
13468             as_FloatRegister($src1$$reg),
13469             as_FloatRegister($src2$$reg));
13470  %}
13471
13472  ins_pipe(fp_dop_reg_reg_s);
13473%}
13474
13475instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13476  match(Set dst (AddD src1 src2));
13477
13478  ins_cost(INSN_COST * 5);
13479  format %{ "faddd   $dst, $src1, $src2" %}
13480
13481  ins_encode %{
13482    __ faddd(as_FloatRegister($dst$$reg),
13483             as_FloatRegister($src1$$reg),
13484             as_FloatRegister($src2$$reg));
13485  %}
13486
13487  ins_pipe(fp_dop_reg_reg_d);
13488%}
13489
13490instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13491  match(Set dst (SubF src1 src2));
13492
13493  ins_cost(INSN_COST * 5);
13494  format %{ "fsubs   $dst, $src1, $src2" %}
13495
13496  ins_encode %{
13497    __ fsubs(as_FloatRegister($dst$$reg),
13498             as_FloatRegister($src1$$reg),
13499             as_FloatRegister($src2$$reg));
13500  %}
13501
13502  ins_pipe(fp_dop_reg_reg_s);
13503%}
13504
13505instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13506  match(Set dst (SubD src1 src2));
13507
13508  ins_cost(INSN_COST * 5);
13509  format %{ "fsubd   $dst, $src1, $src2" %}
13510
13511  ins_encode %{
13512    __ fsubd(as_FloatRegister($dst$$reg),
13513             as_FloatRegister($src1$$reg),
13514             as_FloatRegister($src2$$reg));
13515  %}
13516
13517  ins_pipe(fp_dop_reg_reg_d);
13518%}
13519
13520instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13521  match(Set dst (MulF src1 src2));
13522
13523  ins_cost(INSN_COST * 6);
13524  format %{ "fmuls   $dst, $src1, $src2" %}
13525
13526  ins_encode %{
13527    __ fmuls(as_FloatRegister($dst$$reg),
13528             as_FloatRegister($src1$$reg),
13529             as_FloatRegister($src2$$reg));
13530  %}
13531
13532  ins_pipe(fp_dop_reg_reg_s);
13533%}
13534
13535instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13536  match(Set dst (MulD src1 src2));
13537
13538  ins_cost(INSN_COST * 6);
13539  format %{ "fmuld   $dst, $src1, $src2" %}
13540
13541  ins_encode %{
13542    __ fmuld(as_FloatRegister($dst$$reg),
13543             as_FloatRegister($src1$$reg),
13544             as_FloatRegister($src2$$reg));
13545  %}
13546
13547  ins_pipe(fp_dop_reg_reg_d);
13548%}
13549
13550// src1 * src2 + src3
13551instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13552  predicate(UseFMA);
13553  match(Set dst (FmaF src3 (Binary src1 src2)));
13554
13555  format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13556
13557  ins_encode %{
13558    __ fmadds(as_FloatRegister($dst$$reg),
13559             as_FloatRegister($src1$$reg),
13560             as_FloatRegister($src2$$reg),
13561             as_FloatRegister($src3$$reg));
13562  %}
13563
13564  ins_pipe(pipe_class_default);
13565%}
13566
13567// src1 * src2 + src3
13568instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13569  predicate(UseFMA);
13570  match(Set dst (FmaD src3 (Binary src1 src2)));
13571
13572  format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13573
13574  ins_encode %{
13575    __ fmaddd(as_FloatRegister($dst$$reg),
13576             as_FloatRegister($src1$$reg),
13577             as_FloatRegister($src2$$reg),
13578             as_FloatRegister($src3$$reg));
13579  %}
13580
13581  ins_pipe(pipe_class_default);
13582%}
13583
13584// -src1 * src2 + src3
13585instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13586  predicate(UseFMA);
13587  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13588  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13589
13590  format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13591
13592  ins_encode %{
13593    __ fmsubs(as_FloatRegister($dst$$reg),
13594              as_FloatRegister($src1$$reg),
13595              as_FloatRegister($src2$$reg),
13596              as_FloatRegister($src3$$reg));
13597  %}
13598
13599  ins_pipe(pipe_class_default);
13600%}
13601
13602// -src1 * src2 + src3
13603instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13604  predicate(UseFMA);
13605  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13606  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13607
13608  format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13609
13610  ins_encode %{
13611    __ fmsubd(as_FloatRegister($dst$$reg),
13612              as_FloatRegister($src1$$reg),
13613              as_FloatRegister($src2$$reg),
13614              as_FloatRegister($src3$$reg));
13615  %}
13616
13617  ins_pipe(pipe_class_default);
13618%}
13619
13620// -src1 * src2 - src3
13621instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13622  predicate(UseFMA);
13623  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13624  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13625
13626  format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13627
13628  ins_encode %{
13629    __ fnmadds(as_FloatRegister($dst$$reg),
13630               as_FloatRegister($src1$$reg),
13631               as_FloatRegister($src2$$reg),
13632               as_FloatRegister($src3$$reg));
13633  %}
13634
13635  ins_pipe(pipe_class_default);
13636%}
13637
13638// -src1 * src2 - src3
13639instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13640  predicate(UseFMA);
13641  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13642  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13643
13644  format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13645
13646  ins_encode %{
13647    __ fnmaddd(as_FloatRegister($dst$$reg),
13648               as_FloatRegister($src1$$reg),
13649               as_FloatRegister($src2$$reg),
13650               as_FloatRegister($src3$$reg));
13651  %}
13652
13653  ins_pipe(pipe_class_default);
13654%}
13655
13656// src1 * src2 - src3
13657instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13658  predicate(UseFMA);
13659  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13660
13661  format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13662
13663  ins_encode %{
13664    __ fnmsubs(as_FloatRegister($dst$$reg),
13665               as_FloatRegister($src1$$reg),
13666               as_FloatRegister($src2$$reg),
13667               as_FloatRegister($src3$$reg));
13668  %}
13669
13670  ins_pipe(pipe_class_default);
13671%}
13672
13673// src1 * src2 - src3
13674instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13675  predicate(UseFMA);
13676  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13677
13678  format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13679
13680  ins_encode %{
13681  // n.b. insn name should be fnmsubd
13682    __ fnmsub(as_FloatRegister($dst$$reg),
13683              as_FloatRegister($src1$$reg),
13684              as_FloatRegister($src2$$reg),
13685              as_FloatRegister($src3$$reg));
13686  %}
13687
13688  ins_pipe(pipe_class_default);
13689%}
13690
13691
13692instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13693  match(Set dst (DivF src1  src2));
13694
13695  ins_cost(INSN_COST * 18);
13696  format %{ "fdivs   $dst, $src1, $src2" %}
13697
13698  ins_encode %{
13699    __ fdivs(as_FloatRegister($dst$$reg),
13700             as_FloatRegister($src1$$reg),
13701             as_FloatRegister($src2$$reg));
13702  %}
13703
13704  ins_pipe(fp_div_s);
13705%}
13706
13707instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13708  match(Set dst (DivD src1  src2));
13709
13710  ins_cost(INSN_COST * 32);
13711  format %{ "fdivd   $dst, $src1, $src2" %}
13712
13713  ins_encode %{
13714    __ fdivd(as_FloatRegister($dst$$reg),
13715             as_FloatRegister($src1$$reg),
13716             as_FloatRegister($src2$$reg));
13717  %}
13718
13719  ins_pipe(fp_div_d);
13720%}
13721
13722instruct negF_reg_reg(vRegF dst, vRegF src) %{
13723  match(Set dst (NegF src));
13724
13725  ins_cost(INSN_COST * 3);
13726  format %{ "fneg   $dst, $src" %}
13727
13728  ins_encode %{
13729    __ fnegs(as_FloatRegister($dst$$reg),
13730             as_FloatRegister($src$$reg));
13731  %}
13732
13733  ins_pipe(fp_uop_s);
13734%}
13735
13736instruct negD_reg_reg(vRegD dst, vRegD src) %{
13737  match(Set dst (NegD src));
13738
13739  ins_cost(INSN_COST * 3);
13740  format %{ "fnegd   $dst, $src" %}
13741
13742  ins_encode %{
13743    __ fnegd(as_FloatRegister($dst$$reg),
13744             as_FloatRegister($src$$reg));
13745  %}
13746
13747  ins_pipe(fp_uop_d);
13748%}
13749
13750instruct absF_reg(vRegF dst, vRegF src) %{
13751  match(Set dst (AbsF src));
13752
13753  ins_cost(INSN_COST * 3);
13754  format %{ "fabss   $dst, $src" %}
13755  ins_encode %{
13756    __ fabss(as_FloatRegister($dst$$reg),
13757             as_FloatRegister($src$$reg));
13758  %}
13759
13760  ins_pipe(fp_uop_s);
13761%}
13762
13763instruct absD_reg(vRegD dst, vRegD src) %{
13764  match(Set dst (AbsD src));
13765
13766  ins_cost(INSN_COST * 3);
13767  format %{ "fabsd   $dst, $src" %}
13768  ins_encode %{
13769    __ fabsd(as_FloatRegister($dst$$reg),
13770             as_FloatRegister($src$$reg));
13771  %}
13772
13773  ins_pipe(fp_uop_d);
13774%}
13775
13776instruct sqrtD_reg(vRegD dst, vRegD src) %{
13777  match(Set dst (SqrtD src));
13778
13779  ins_cost(INSN_COST * 50);
13780  format %{ "fsqrtd  $dst, $src" %}
13781  ins_encode %{
13782    __ fsqrtd(as_FloatRegister($dst$$reg),
13783             as_FloatRegister($src$$reg));
13784  %}
13785
13786  ins_pipe(fp_div_s);
13787%}
13788
13789instruct sqrtF_reg(vRegF dst, vRegF src) %{
13790  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13791
13792  ins_cost(INSN_COST * 50);
13793  format %{ "fsqrts  $dst, $src" %}
13794  ins_encode %{
13795    __ fsqrts(as_FloatRegister($dst$$reg),
13796             as_FloatRegister($src$$reg));
13797  %}
13798
13799  ins_pipe(fp_div_d);
13800%}
13801
13802// ============================================================================
13803// Logical Instructions
13804
13805// Integer Logical Instructions
13806
13807// And Instructions
13808
13809
13810instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13811  match(Set dst (AndI src1 src2));
13812
13813  format %{ "andw  $dst, $src1, $src2\t# int" %}
13814
13815  ins_cost(INSN_COST);
13816  ins_encode %{
13817    __ andw(as_Register($dst$$reg),
13818            as_Register($src1$$reg),
13819            as_Register($src2$$reg));
13820  %}
13821
13822  ins_pipe(ialu_reg_reg);
13823%}
13824
13825instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13826  match(Set dst (AndI src1 src2));
13827
13828  format %{ "andsw  $dst, $src1, $src2\t# int" %}
13829
13830  ins_cost(INSN_COST);
13831  ins_encode %{
13832    __ andw(as_Register($dst$$reg),
13833            as_Register($src1$$reg),
13834            (unsigned long)($src2$$constant));
13835  %}
13836
13837  ins_pipe(ialu_reg_imm);
13838%}
13839
13840// Or Instructions
13841
13842instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13843  match(Set dst (OrI src1 src2));
13844
13845  format %{ "orrw  $dst, $src1, $src2\t# int" %}
13846
13847  ins_cost(INSN_COST);
13848  ins_encode %{
13849    __ orrw(as_Register($dst$$reg),
13850            as_Register($src1$$reg),
13851            as_Register($src2$$reg));
13852  %}
13853
13854  ins_pipe(ialu_reg_reg);
13855%}
13856
13857instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13858  match(Set dst (OrI src1 src2));
13859
13860  format %{ "orrw  $dst, $src1, $src2\t# int" %}
13861
13862  ins_cost(INSN_COST);
13863  ins_encode %{
13864    __ orrw(as_Register($dst$$reg),
13865            as_Register($src1$$reg),
13866            (unsigned long)($src2$$constant));
13867  %}
13868
13869  ins_pipe(ialu_reg_imm);
13870%}
13871
13872// Xor Instructions
13873
13874instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13875  match(Set dst (XorI src1 src2));
13876
13877  format %{ "eorw  $dst, $src1, $src2\t# int" %}
13878
13879  ins_cost(INSN_COST);
13880  ins_encode %{
13881    __ eorw(as_Register($dst$$reg),
13882            as_Register($src1$$reg),
13883            as_Register($src2$$reg));
13884  %}
13885
13886  ins_pipe(ialu_reg_reg);
13887%}
13888
13889instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13890  match(Set dst (XorI src1 src2));
13891
13892  format %{ "eorw  $dst, $src1, $src2\t# int" %}
13893
13894  ins_cost(INSN_COST);
13895  ins_encode %{
13896    __ eorw(as_Register($dst$$reg),
13897            as_Register($src1$$reg),
13898            (unsigned long)($src2$$constant));
13899  %}
13900
13901  ins_pipe(ialu_reg_imm);
13902%}
13903
13904// Long Logical Instructions
13905// TODO
13906
13907instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13908  match(Set dst (AndL src1 src2));
13909
13910  format %{ "and  $dst, $src1, $src2\t# int" %}
13911
13912  ins_cost(INSN_COST);
13913  ins_encode %{
13914    __ andr(as_Register($dst$$reg),
13915            as_Register($src1$$reg),
13916            as_Register($src2$$reg));
13917  %}
13918
13919  ins_pipe(ialu_reg_reg);
13920%}
13921
13922instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13923  match(Set dst (AndL src1 src2));
13924
13925  format %{ "and  $dst, $src1, $src2\t# int" %}
13926
13927  ins_cost(INSN_COST);
13928  ins_encode %{
13929    __ andr(as_Register($dst$$reg),
13930            as_Register($src1$$reg),
13931            (unsigned long)($src2$$constant));
13932  %}
13933
13934  ins_pipe(ialu_reg_imm);
13935%}
13936
13937// Or Instructions
13938
13939instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13940  match(Set dst (OrL src1 src2));
13941
13942  format %{ "orr  $dst, $src1, $src2\t# int" %}
13943
13944  ins_cost(INSN_COST);
13945  ins_encode %{
13946    __ orr(as_Register($dst$$reg),
13947           as_Register($src1$$reg),
13948           as_Register($src2$$reg));
13949  %}
13950
13951  ins_pipe(ialu_reg_reg);
13952%}
13953
13954instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13955  match(Set dst (OrL src1 src2));
13956
13957  format %{ "orr  $dst, $src1, $src2\t# int" %}
13958
13959  ins_cost(INSN_COST);
13960  ins_encode %{
13961    __ orr(as_Register($dst$$reg),
13962           as_Register($src1$$reg),
13963           (unsigned long)($src2$$constant));
13964  %}
13965
13966  ins_pipe(ialu_reg_imm);
13967%}
13968
13969// Xor Instructions
13970
13971instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13972  match(Set dst (XorL src1 src2));
13973
13974  format %{ "eor  $dst, $src1, $src2\t# int" %}
13975
13976  ins_cost(INSN_COST);
13977  ins_encode %{
13978    __ eor(as_Register($dst$$reg),
13979           as_Register($src1$$reg),
13980           as_Register($src2$$reg));
13981  %}
13982
13983  ins_pipe(ialu_reg_reg);
13984%}
13985
13986instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13987  match(Set dst (XorL src1 src2));
13988
13989  ins_cost(INSN_COST);
13990  format %{ "eor  $dst, $src1, $src2\t# int" %}
13991
13992  ins_encode %{
13993    __ eor(as_Register($dst$$reg),
13994           as_Register($src1$$reg),
13995           (unsigned long)($src2$$constant));
13996  %}
13997
13998  ins_pipe(ialu_reg_imm);
13999%}
14000
14001instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
14002%{
14003  match(Set dst (ConvI2L src));
14004
14005  ins_cost(INSN_COST);
14006  format %{ "sxtw  $dst, $src\t# i2l" %}
14007  ins_encode %{
14008    __ sbfm($dst$$Register, $src$$Register, 0, 31);
14009  %}
14010  ins_pipe(ialu_reg_shift);
14011%}
14012
14013// this pattern occurs in bigmath arithmetic
14014instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
14015%{
14016  match(Set dst (AndL (ConvI2L src) mask));
14017
14018  ins_cost(INSN_COST);
14019  format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
14020  ins_encode %{
14021    __ ubfm($dst$$Register, $src$$Register, 0, 31);
14022  %}
14023
14024  ins_pipe(ialu_reg_shift);
14025%}
14026
14027instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
14028  match(Set dst (ConvL2I src));
14029
14030  ins_cost(INSN_COST);
14031  format %{ "movw  $dst, $src \t// l2i" %}
14032
14033  ins_encode %{
14034    __ movw(as_Register($dst$$reg), as_Register($src$$reg));
14035  %}
14036
14037  ins_pipe(ialu_reg);
14038%}
14039
14040instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
14041%{
14042  match(Set dst (Conv2B src));
14043  effect(KILL cr);
14044
14045  format %{
14046    "cmpw $src, zr\n\t"
14047    "cset $dst, ne"
14048  %}
14049
14050  ins_encode %{
14051    __ cmpw(as_Register($src$$reg), zr);
14052    __ cset(as_Register($dst$$reg), Assembler::NE);
14053  %}
14054
14055  ins_pipe(ialu_reg);
14056%}
14057
14058instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14059%{
14060  match(Set dst (Conv2B src));
14061  effect(KILL cr);
14062
14063  format %{
14064    "cmp  $src, zr\n\t"
14065    "cset $dst, ne"
14066  %}
14067
14068  ins_encode %{
14069    __ cmp(as_Register($src$$reg), zr);
14070    __ cset(as_Register($dst$$reg), Assembler::NE);
14071  %}
14072
14073  ins_pipe(ialu_reg);
14074%}
14075
14076instruct convD2F_reg(vRegF dst, vRegD src) %{
14077  match(Set dst (ConvD2F src));
14078
14079  ins_cost(INSN_COST * 5);
14080  format %{ "fcvtd  $dst, $src \t// d2f" %}
14081
14082  ins_encode %{
14083    __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14084  %}
14085
14086  ins_pipe(fp_d2f);
14087%}
14088
14089instruct convF2D_reg(vRegD dst, vRegF src) %{
14090  match(Set dst (ConvF2D src));
14091
14092  ins_cost(INSN_COST * 5);
14093  format %{ "fcvts  $dst, $src \t// f2d" %}
14094
14095  ins_encode %{
14096    __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14097  %}
14098
14099  ins_pipe(fp_f2d);
14100%}
14101
14102instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14103  match(Set dst (ConvF2I src));
14104
14105  ins_cost(INSN_COST * 5);
14106  format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14107
14108  ins_encode %{
14109    __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14110  %}
14111
14112  ins_pipe(fp_f2i);
14113%}
14114
14115instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14116  match(Set dst (ConvF2L src));
14117
14118  ins_cost(INSN_COST * 5);
14119  format %{ "fcvtzs  $dst, $src \t// f2l" %}
14120
14121  ins_encode %{
14122    __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14123  %}
14124
14125  ins_pipe(fp_f2l);
14126%}
14127
14128instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14129  match(Set dst (ConvI2F src));
14130
14131  ins_cost(INSN_COST * 5);
14132  format %{ "scvtfws  $dst, $src \t// i2f" %}
14133
14134  ins_encode %{
14135    __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14136  %}
14137
14138  ins_pipe(fp_i2f);
14139%}
14140
14141instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14142  match(Set dst (ConvL2F src));
14143
14144  ins_cost(INSN_COST * 5);
14145  format %{ "scvtfs  $dst, $src \t// l2f" %}
14146
14147  ins_encode %{
14148    __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14149  %}
14150
14151  ins_pipe(fp_l2f);
14152%}
14153
14154instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14155  match(Set dst (ConvD2I src));
14156
14157  ins_cost(INSN_COST * 5);
14158  format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14159
14160  ins_encode %{
14161    __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14162  %}
14163
14164  ins_pipe(fp_d2i);
14165%}
14166
14167instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14168  match(Set dst (ConvD2L src));
14169
14170  ins_cost(INSN_COST * 5);
14171  format %{ "fcvtzd  $dst, $src \t// d2l" %}
14172
14173  ins_encode %{
14174    __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14175  %}
14176
14177  ins_pipe(fp_d2l);
14178%}
14179
14180instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14181  match(Set dst (ConvI2D src));
14182
14183  ins_cost(INSN_COST * 5);
14184  format %{ "scvtfwd  $dst, $src \t// i2d" %}
14185
14186  ins_encode %{
14187    __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14188  %}
14189
14190  ins_pipe(fp_i2d);
14191%}
14192
14193instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14194  match(Set dst (ConvL2D src));
14195
14196  ins_cost(INSN_COST * 5);
14197  format %{ "scvtfd  $dst, $src \t// l2d" %}
14198
14199  ins_encode %{
14200    __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14201  %}
14202
14203  ins_pipe(fp_l2d);
14204%}
14205
14206// stack <-> reg and reg <-> reg shuffles with no conversion
14207
14208instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14209
14210  match(Set dst (MoveF2I src));
14211
14212  effect(DEF dst, USE src);
14213
14214  ins_cost(4 * INSN_COST);
14215
14216  format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14217
14218  ins_encode %{
14219    __ ldrw($dst$$Register, Address(sp, $src$$disp));
14220  %}
14221
14222  ins_pipe(iload_reg_reg);
14223
14224%}
14225
14226instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14227
14228  match(Set dst (MoveI2F src));
14229
14230  effect(DEF dst, USE src);
14231
14232  ins_cost(4 * INSN_COST);
14233
14234  format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14235
14236  ins_encode %{
14237    __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14238  %}
14239
14240  ins_pipe(pipe_class_memory);
14241
14242%}
14243
14244instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14245
14246  match(Set dst (MoveD2L src));
14247
14248  effect(DEF dst, USE src);
14249
14250  ins_cost(4 * INSN_COST);
14251
14252  format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14253
14254  ins_encode %{
14255    __ ldr($dst$$Register, Address(sp, $src$$disp));
14256  %}
14257
14258  ins_pipe(iload_reg_reg);
14259
14260%}
14261
14262instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14263
14264  match(Set dst (MoveL2D src));
14265
14266  effect(DEF dst, USE src);
14267
14268  ins_cost(4 * INSN_COST);
14269
14270  format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14271
14272  ins_encode %{
14273    __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14274  %}
14275
14276  ins_pipe(pipe_class_memory);
14277
14278%}
14279
14280instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14281
14282  match(Set dst (MoveF2I src));
14283
14284  effect(DEF dst, USE src);
14285
14286  ins_cost(INSN_COST);
14287
14288  format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14289
14290  ins_encode %{
14291    __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14292  %}
14293
14294  ins_pipe(pipe_class_memory);
14295
14296%}
14297
14298instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14299
14300  match(Set dst (MoveI2F src));
14301
14302  effect(DEF dst, USE src);
14303
14304  ins_cost(INSN_COST);
14305
14306  format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14307
14308  ins_encode %{
14309    __ strw($src$$Register, Address(sp, $dst$$disp));
14310  %}
14311
14312  ins_pipe(istore_reg_reg);
14313
14314%}
14315
14316instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14317
14318  match(Set dst (MoveD2L src));
14319
14320  effect(DEF dst, USE src);
14321
14322  ins_cost(INSN_COST);
14323
14324  format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14325
14326  ins_encode %{
14327    __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14328  %}
14329
14330  ins_pipe(pipe_class_memory);
14331
14332%}
14333
14334instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14335
14336  match(Set dst (MoveL2D src));
14337
14338  effect(DEF dst, USE src);
14339
14340  ins_cost(INSN_COST);
14341
14342  format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14343
14344  ins_encode %{
14345    __ str($src$$Register, Address(sp, $dst$$disp));
14346  %}
14347
14348  ins_pipe(istore_reg_reg);
14349
14350%}
14351
14352instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14353
14354  match(Set dst (MoveF2I src));
14355
14356  effect(DEF dst, USE src);
14357
14358  ins_cost(INSN_COST);
14359
14360  format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14361
14362  ins_encode %{
14363    __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14364  %}
14365
14366  ins_pipe(fp_f2i);
14367
14368%}
14369
14370instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14371
14372  match(Set dst (MoveI2F src));
14373
14374  effect(DEF dst, USE src);
14375
14376  ins_cost(INSN_COST);
14377
14378  format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14379
14380  ins_encode %{
14381    __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14382  %}
14383
14384  ins_pipe(fp_i2f);
14385
14386%}
14387
14388instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14389
14390  match(Set dst (MoveD2L src));
14391
14392  effect(DEF dst, USE src);
14393
14394  ins_cost(INSN_COST);
14395
14396  format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14397
14398  ins_encode %{
14399    __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14400  %}
14401
14402  ins_pipe(fp_d2l);
14403
14404%}
14405
14406instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14407
14408  match(Set dst (MoveL2D src));
14409
14410  effect(DEF dst, USE src);
14411
14412  ins_cost(INSN_COST);
14413
14414  format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14415
14416  ins_encode %{
14417    __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14418  %}
14419
14420  ins_pipe(fp_l2d);
14421
14422%}
14423
14424// ============================================================================
14425// clearing of an array
14426
14427instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14428%{
14429  match(Set dummy (ClearArray cnt base));
14430  effect(USE_KILL cnt, USE_KILL base);
14431
14432  ins_cost(4 * INSN_COST);
14433  format %{ "ClearArray $cnt, $base" %}
14434
14435  ins_encode %{
14436    __ zero_words($base$$Register, $cnt$$Register);
14437  %}
14438
14439  ins_pipe(pipe_class_memory);
14440%}
14441
14442instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14443%{
14444  predicate((u_int64_t)n->in(2)->get_long()
14445            < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14446  match(Set dummy (ClearArray cnt base));
14447  effect(USE_KILL base);
14448
14449  ins_cost(4 * INSN_COST);
14450  format %{ "ClearArray $cnt, $base" %}
14451
14452  ins_encode %{
14453    __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14454  %}
14455
14456  ins_pipe(pipe_class_memory);
14457%}
14458
14459// ============================================================================
14460// Overflow Math Instructions
14461
14462instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14463%{
14464  match(Set cr (OverflowAddI op1 op2));
14465
14466  format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14467  ins_cost(INSN_COST);
14468  ins_encode %{
14469    __ cmnw($op1$$Register, $op2$$Register);
14470  %}
14471
14472  ins_pipe(icmp_reg_reg);
14473%}
14474
14475instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14476%{
14477  match(Set cr (OverflowAddI op1 op2));
14478
14479  format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14480  ins_cost(INSN_COST);
14481  ins_encode %{
14482    __ cmnw($op1$$Register, $op2$$constant);
14483  %}
14484
14485  ins_pipe(icmp_reg_imm);
14486%}
14487
14488instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14489%{
14490  match(Set cr (OverflowAddL op1 op2));
14491
14492  format %{ "cmn   $op1, $op2\t# overflow check long" %}
14493  ins_cost(INSN_COST);
14494  ins_encode %{
14495    __ cmn($op1$$Register, $op2$$Register);
14496  %}
14497
14498  ins_pipe(icmp_reg_reg);
14499%}
14500
14501instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14502%{
14503  match(Set cr (OverflowAddL op1 op2));
14504
14505  format %{ "cmn   $op1, $op2\t# overflow check long" %}
14506  ins_cost(INSN_COST);
14507  ins_encode %{
14508    __ cmn($op1$$Register, $op2$$constant);
14509  %}
14510
14511  ins_pipe(icmp_reg_imm);
14512%}
14513
14514instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14515%{
14516  match(Set cr (OverflowSubI op1 op2));
14517
14518  format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14519  ins_cost(INSN_COST);
14520  ins_encode %{
14521    __ cmpw($op1$$Register, $op2$$Register);
14522  %}
14523
14524  ins_pipe(icmp_reg_reg);
14525%}
14526
14527instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14528%{
14529  match(Set cr (OverflowSubI op1 op2));
14530
14531  format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14532  ins_cost(INSN_COST);
14533  ins_encode %{
14534    __ cmpw($op1$$Register, $op2$$constant);
14535  %}
14536
14537  ins_pipe(icmp_reg_imm);
14538%}
14539
14540instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14541%{
14542  match(Set cr (OverflowSubL op1 op2));
14543
14544  format %{ "cmp   $op1, $op2\t# overflow check long" %}
14545  ins_cost(INSN_COST);
14546  ins_encode %{
14547    __ cmp($op1$$Register, $op2$$Register);
14548  %}
14549
14550  ins_pipe(icmp_reg_reg);
14551%}
14552
14553instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14554%{
14555  match(Set cr (OverflowSubL op1 op2));
14556
14557  format %{ "cmp   $op1, $op2\t# overflow check long" %}
14558  ins_cost(INSN_COST);
14559  ins_encode %{
14560    __ cmp($op1$$Register, $op2$$constant);
14561  %}
14562
14563  ins_pipe(icmp_reg_imm);
14564%}
14565
14566instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14567%{
14568  match(Set cr (OverflowSubI zero op1));
14569
14570  format %{ "cmpw  zr, $op1\t# overflow check int" %}
14571  ins_cost(INSN_COST);
14572  ins_encode %{
14573    __ cmpw(zr, $op1$$Register);
14574  %}
14575
14576  ins_pipe(icmp_reg_imm);
14577%}
14578
14579instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14580%{
14581  match(Set cr (OverflowSubL zero op1));
14582
14583  format %{ "cmp   zr, $op1\t# overflow check long" %}
14584  ins_cost(INSN_COST);
14585  ins_encode %{
14586    __ cmp(zr, $op1$$Register);
14587  %}
14588
14589  ins_pipe(icmp_reg_imm);
14590%}
14591
14592instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14593%{
14594  match(Set cr (OverflowMulI op1 op2));
14595
14596  format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14597            "cmp   rscratch1, rscratch1, sxtw\n\t"
14598            "movw  rscratch1, #0x80000000\n\t"
14599            "cselw rscratch1, rscratch1, zr, NE\n\t"
14600            "cmpw  rscratch1, #1" %}
14601  ins_cost(5 * INSN_COST);
14602  ins_encode %{
14603    __ smull(rscratch1, $op1$$Register, $op2$$Register);
14604    __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14605    __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14606    __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14607    __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14608  %}
14609
14610  ins_pipe(pipe_slow);
14611%}
14612
14613instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14614%{
14615  match(If cmp (OverflowMulI op1 op2));
14616  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14617            || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14618  effect(USE labl, KILL cr);
14619
14620  format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14621            "cmp   rscratch1, rscratch1, sxtw\n\t"
14622            "b$cmp   $labl" %}
14623  ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14624  ins_encode %{
14625    Label* L = $labl$$label;
14626    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14627    __ smull(rscratch1, $op1$$Register, $op2$$Register);
14628    __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14629    __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14630  %}
14631
14632  ins_pipe(pipe_serial);
14633%}
14634
14635instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14636%{
14637  match(Set cr (OverflowMulL op1 op2));
14638
14639  format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14640            "smulh rscratch2, $op1, $op2\n\t"
14641            "cmp   rscratch2, rscratch1, ASR #63\n\t"
14642            "movw  rscratch1, #0x80000000\n\t"
14643            "cselw rscratch1, rscratch1, zr, NE\n\t"
14644            "cmpw  rscratch1, #1" %}
14645  ins_cost(6 * INSN_COST);
14646  ins_encode %{
14647    __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14648    __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14649    __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14650    __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14651    __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14652    __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14653  %}
14654
14655  ins_pipe(pipe_slow);
14656%}
14657
14658instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14659%{
14660  match(If cmp (OverflowMulL op1 op2));
14661  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14662            || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14663  effect(USE labl, KILL cr);
14664
14665  format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14666            "smulh rscratch2, $op1, $op2\n\t"
14667            "cmp   rscratch2, rscratch1, ASR #63\n\t"
14668            "b$cmp $labl" %}
14669  ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14670  ins_encode %{
14671    Label* L = $labl$$label;
14672    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14673    __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14674    __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14675    __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14676    __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14677  %}
14678
14679  ins_pipe(pipe_serial);
14680%}
14681
14682// ============================================================================
14683// Compare Instructions
14684
14685instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14686%{
14687  match(Set cr (CmpI op1 op2));
14688
14689  effect(DEF cr, USE op1, USE op2);
14690
14691  ins_cost(INSN_COST);
14692  format %{ "cmpw  $op1, $op2" %}
14693
14694  ins_encode(aarch64_enc_cmpw(op1, op2));
14695
14696  ins_pipe(icmp_reg_reg);
14697%}
14698
14699instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14700%{
14701  match(Set cr (CmpI op1 zero));
14702
14703  effect(DEF cr, USE op1);
14704
14705  ins_cost(INSN_COST);
14706  format %{ "cmpw $op1, 0" %}
14707
14708  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14709
14710  ins_pipe(icmp_reg_imm);
14711%}
14712
14713instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14714%{
14715  match(Set cr (CmpI op1 op2));
14716
14717  effect(DEF cr, USE op1);
14718
14719  ins_cost(INSN_COST);
14720  format %{ "cmpw  $op1, $op2" %}
14721
14722  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14723
14724  ins_pipe(icmp_reg_imm);
14725%}
14726
14727instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14728%{
14729  match(Set cr (CmpI op1 op2));
14730
14731  effect(DEF cr, USE op1);
14732
14733  ins_cost(INSN_COST * 2);
14734  format %{ "cmpw  $op1, $op2" %}
14735
14736  ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14737
14738  ins_pipe(icmp_reg_imm);
14739%}
14740
14741// Unsigned compare Instructions; really, same as signed compare
14742// except it should only be used to feed an If or a CMovI which takes a
14743// cmpOpU.
14744
14745instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14746%{
14747  match(Set cr (CmpU op1 op2));
14748
14749  effect(DEF cr, USE op1, USE op2);
14750
14751  ins_cost(INSN_COST);
14752  format %{ "cmpw  $op1, $op2\t# unsigned" %}
14753
14754  ins_encode(aarch64_enc_cmpw(op1, op2));
14755
14756  ins_pipe(icmp_reg_reg);
14757%}
14758
14759instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14760%{
14761  match(Set cr (CmpU op1 zero));
14762
14763  effect(DEF cr, USE op1);
14764
14765  ins_cost(INSN_COST);
14766  format %{ "cmpw $op1, #0\t# unsigned" %}
14767
14768  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14769
14770  ins_pipe(icmp_reg_imm);
14771%}
14772
14773instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14774%{
14775  match(Set cr (CmpU op1 op2));
14776
14777  effect(DEF cr, USE op1);
14778
14779  ins_cost(INSN_COST);
14780  format %{ "cmpw  $op1, $op2\t# unsigned" %}
14781
14782  ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14783
14784  ins_pipe(icmp_reg_imm);
14785%}
14786
14787instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14788%{
14789  match(Set cr (CmpU op1 op2));
14790
14791  effect(DEF cr, USE op1);
14792
14793  ins_cost(INSN_COST * 2);
14794  format %{ "cmpw  $op1, $op2\t# unsigned" %}
14795
14796  ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14797
14798  ins_pipe(icmp_reg_imm);
14799%}
14800
14801instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14802%{
14803  match(Set cr (CmpL op1 op2));
14804
14805  effect(DEF cr, USE op1, USE op2);
14806
14807  ins_cost(INSN_COST);
14808  format %{ "cmp  $op1, $op2" %}
14809
14810  ins_encode(aarch64_enc_cmp(op1, op2));
14811
14812  ins_pipe(icmp_reg_reg);
14813%}
14814
14815instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14816%{
14817  match(Set cr (CmpL op1 zero));
14818
14819  effect(DEF cr, USE op1);
14820
14821  ins_cost(INSN_COST);
14822  format %{ "tst  $op1" %}
14823
14824  ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14825
14826  ins_pipe(icmp_reg_imm);
14827%}
14828
14829instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14830%{
14831  match(Set cr (CmpL op1 op2));
14832
14833  effect(DEF cr, USE op1);
14834
14835  ins_cost(INSN_COST);
14836  format %{ "cmp  $op1, $op2" %}
14837
14838  ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14839
14840  ins_pipe(icmp_reg_imm);
14841%}
14842
14843instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14844%{
14845  match(Set cr (CmpL op1 op2));
14846
14847  effect(DEF cr, USE op1);
14848
14849  ins_cost(INSN_COST * 2);
14850  format %{ "cmp  $op1, $op2" %}
14851
14852  ins_encode(aarch64_enc_cmp_imm(op1, op2));
14853
14854  ins_pipe(icmp_reg_imm);
14855%}
14856
14857instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14858%{
14859  match(Set cr (CmpUL op1 op2));
14860
14861  effect(DEF cr, USE op1, USE op2);
14862
14863  ins_cost(INSN_COST);
14864  format %{ "cmp  $op1, $op2" %}
14865
14866  ins_encode(aarch64_enc_cmp(op1, op2));
14867
14868  ins_pipe(icmp_reg_reg);
14869%}
14870
14871instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14872%{
14873  match(Set cr (CmpUL op1 zero));
14874
14875  effect(DEF cr, USE op1);
14876
14877  ins_cost(INSN_COST);
14878  format %{ "tst  $op1" %}
14879
14880  ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14881
14882  ins_pipe(icmp_reg_imm);
14883%}
14884
14885instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14886%{
14887  match(Set cr (CmpUL op1 op2));
14888
14889  effect(DEF cr, USE op1);
14890
14891  ins_cost(INSN_COST);
14892  format %{ "cmp  $op1, $op2" %}
14893
14894  ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14895
14896  ins_pipe(icmp_reg_imm);
14897%}
14898
14899instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14900%{
14901  match(Set cr (CmpUL op1 op2));
14902
14903  effect(DEF cr, USE op1);
14904
14905  ins_cost(INSN_COST * 2);
14906  format %{ "cmp  $op1, $op2" %}
14907
14908  ins_encode(aarch64_enc_cmp_imm(op1, op2));
14909
14910  ins_pipe(icmp_reg_imm);
14911%}
14912
14913instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14914%{
14915  match(Set cr (CmpP op1 op2));
14916
14917  effect(DEF cr, USE op1, USE op2);
14918
14919  ins_cost(INSN_COST);
14920  format %{ "cmp  $op1, $op2\t // ptr" %}
14921
14922  ins_encode(aarch64_enc_cmpp(op1, op2));
14923
14924  ins_pipe(icmp_reg_reg);
14925%}
14926
14927instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14928%{
14929  match(Set cr (CmpN op1 op2));
14930
14931  effect(DEF cr, USE op1, USE op2);
14932
14933  ins_cost(INSN_COST);
14934  format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14935
14936  ins_encode(aarch64_enc_cmpn(op1, op2));
14937
14938  ins_pipe(icmp_reg_reg);
14939%}
14940
14941instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14942%{
14943  match(Set cr (CmpP op1 zero));
14944
14945  effect(DEF cr, USE op1, USE zero);
14946
14947  ins_cost(INSN_COST);
14948  format %{ "cmp  $op1, 0\t // ptr" %}
14949
14950  ins_encode(aarch64_enc_testp(op1));
14951
14952  ins_pipe(icmp_reg_imm);
14953%}
14954
14955instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14956%{
14957  match(Set cr (CmpN op1 zero));
14958
14959  effect(DEF cr, USE op1, USE zero);
14960
14961  ins_cost(INSN_COST);
14962  format %{ "cmp  $op1, 0\t // compressed ptr" %}
14963
14964  ins_encode(aarch64_enc_testn(op1));
14965
14966  ins_pipe(icmp_reg_imm);
14967%}
14968
14969// FP comparisons
14970//
14971// n.b. CmpF/CmpD set a normal flags reg which then gets compared
14972// using normal cmpOp. See declaration of rFlagsReg for details.
14973
14974instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14975%{
14976  match(Set cr (CmpF src1 src2));
14977
14978  ins_cost(3 * INSN_COST);
14979  format %{ "fcmps $src1, $src2" %}
14980
14981  ins_encode %{
14982    __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14983  %}
14984
14985  ins_pipe(pipe_class_compare);
14986%}
14987
14988instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14989%{
14990  match(Set cr (CmpF src1 src2));
14991
14992  ins_cost(3 * INSN_COST);
14993  format %{ "fcmps $src1, 0.0" %}
14994
14995  ins_encode %{
14996    __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14997  %}
14998
14999  ins_pipe(pipe_class_compare);
15000%}
15001// FROM HERE
15002
15003instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
15004%{
15005  match(Set cr (CmpD src1 src2));
15006
15007  ins_cost(3 * INSN_COST);
15008  format %{ "fcmpd $src1, $src2" %}
15009
15010  ins_encode %{
15011    __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15012  %}
15013
15014  ins_pipe(pipe_class_compare);
15015%}
15016
15017instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
15018%{
15019  match(Set cr (CmpD src1 src2));
15020
15021  ins_cost(3 * INSN_COST);
15022  format %{ "fcmpd $src1, 0.0" %}
15023
15024  ins_encode %{
15025    __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
15026  %}
15027
15028  ins_pipe(pipe_class_compare);
15029%}
15030
15031instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
15032%{
15033  match(Set dst (CmpF3 src1 src2));
15034  effect(KILL cr);
15035
15036  ins_cost(5 * INSN_COST);
15037  format %{ "fcmps $src1, $src2\n\t"
15038            "csinvw($dst, zr, zr, eq\n\t"
15039            "csnegw($dst, $dst, $dst, lt)"
15040  %}
15041
15042  ins_encode %{
15043    Label done;
15044    FloatRegister s1 = as_FloatRegister($src1$$reg);
15045    FloatRegister s2 = as_FloatRegister($src2$$reg);
15046    Register d = as_Register($dst$$reg);
15047    __ fcmps(s1, s2);
15048    // installs 0 if EQ else -1
15049    __ csinvw(d, zr, zr, Assembler::EQ);
15050    // keeps -1 if less or unordered else installs 1
15051    __ csnegw(d, d, d, Assembler::LT);
15052    __ bind(done);
15053  %}
15054
15055  ins_pipe(pipe_class_default);
15056
15057%}
15058
15059instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15060%{
15061  match(Set dst (CmpD3 src1 src2));
15062  effect(KILL cr);
15063
15064  ins_cost(5 * INSN_COST);
15065  format %{ "fcmpd $src1, $src2\n\t"
15066            "csinvw($dst, zr, zr, eq\n\t"
15067            "csnegw($dst, $dst, $dst, lt)"
15068  %}
15069
15070  ins_encode %{
15071    Label done;
15072    FloatRegister s1 = as_FloatRegister($src1$$reg);
15073    FloatRegister s2 = as_FloatRegister($src2$$reg);
15074    Register d = as_Register($dst$$reg);
15075    __ fcmpd(s1, s2);
15076    // installs 0 if EQ else -1
15077    __ csinvw(d, zr, zr, Assembler::EQ);
15078    // keeps -1 if less or unordered else installs 1
15079    __ csnegw(d, d, d, Assembler::LT);
15080    __ bind(done);
15081  %}
15082  ins_pipe(pipe_class_default);
15083
15084%}
15085
15086instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15087%{
15088  match(Set dst (CmpF3 src1 zero));
15089  effect(KILL cr);
15090
15091  ins_cost(5 * INSN_COST);
15092  format %{ "fcmps $src1, 0.0\n\t"
15093            "csinvw($dst, zr, zr, eq\n\t"
15094            "csnegw($dst, $dst, $dst, lt)"
15095  %}
15096
15097  ins_encode %{
15098    Label done;
15099    FloatRegister s1 = as_FloatRegister($src1$$reg);
15100    Register d = as_Register($dst$$reg);
15101    __ fcmps(s1, 0.0D);
15102    // installs 0 if EQ else -1
15103    __ csinvw(d, zr, zr, Assembler::EQ);
15104    // keeps -1 if less or unordered else installs 1
15105    __ csnegw(d, d, d, Assembler::LT);
15106    __ bind(done);
15107  %}
15108
15109  ins_pipe(pipe_class_default);
15110
15111%}
15112
15113instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15114%{
15115  match(Set dst (CmpD3 src1 zero));
15116  effect(KILL cr);
15117
15118  ins_cost(5 * INSN_COST);
15119  format %{ "fcmpd $src1, 0.0\n\t"
15120            "csinvw($dst, zr, zr, eq\n\t"
15121            "csnegw($dst, $dst, $dst, lt)"
15122  %}
15123
15124  ins_encode %{
15125    Label done;
15126    FloatRegister s1 = as_FloatRegister($src1$$reg);
15127    Register d = as_Register($dst$$reg);
15128    __ fcmpd(s1, 0.0D);
15129    // installs 0 if EQ else -1
15130    __ csinvw(d, zr, zr, Assembler::EQ);
15131    // keeps -1 if less or unordered else installs 1
15132    __ csnegw(d, d, d, Assembler::LT);
15133    __ bind(done);
15134  %}
15135  ins_pipe(pipe_class_default);
15136
15137%}
15138
15139instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15140%{
15141  match(Set dst (CmpLTMask p q));
15142  effect(KILL cr);
15143
15144  ins_cost(3 * INSN_COST);
15145
15146  format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15147            "csetw $dst, lt\n\t"
15148            "subw $dst, zr, $dst"
15149  %}
15150
15151  ins_encode %{
15152    __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15153    __ csetw(as_Register($dst$$reg), Assembler::LT);
15154    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15155  %}
15156
15157  ins_pipe(ialu_reg_reg);
15158%}
15159
15160instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15161%{
15162  match(Set dst (CmpLTMask src zero));
15163  effect(KILL cr);
15164
15165  ins_cost(INSN_COST);
15166
15167  format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15168
15169  ins_encode %{
15170    __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15171  %}
15172
15173  ins_pipe(ialu_reg_shift);
15174%}
15175
15176// ============================================================================
15177// Max and Min
15178
15179instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15180%{
15181  match(Set dst (MinI src1 src2));
15182
15183  effect(DEF dst, USE src1, USE src2, KILL cr);
15184  size(8);
15185
15186  ins_cost(INSN_COST * 3);
15187  format %{
15188    "cmpw $src1 $src2\t signed int\n\t"
15189    "cselw $dst, $src1, $src2 lt\t"
15190  %}
15191
15192  ins_encode %{
15193    __ cmpw(as_Register($src1$$reg),
15194            as_Register($src2$$reg));
15195    __ cselw(as_Register($dst$$reg),
15196             as_Register($src1$$reg),
15197             as_Register($src2$$reg),
15198             Assembler::LT);
15199  %}
15200
15201  ins_pipe(ialu_reg_reg);
15202%}
15203// FROM HERE
15204
15205instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15206%{
15207  match(Set dst (MaxI src1 src2));
15208
15209  effect(DEF dst, USE src1, USE src2, KILL cr);
15210  size(8);
15211
15212  ins_cost(INSN_COST * 3);
15213  format %{
15214    "cmpw $src1 $src2\t signed int\n\t"
15215    "cselw $dst, $src1, $src2 gt\t"
15216  %}
15217
15218  ins_encode %{
15219    __ cmpw(as_Register($src1$$reg),
15220            as_Register($src2$$reg));
15221    __ cselw(as_Register($dst$$reg),
15222             as_Register($src1$$reg),
15223             as_Register($src2$$reg),
15224             Assembler::GT);
15225  %}
15226
15227  ins_pipe(ialu_reg_reg);
15228%}
15229
15230// ============================================================================
15231// Branch Instructions
15232
15233// Direct Branch.
15234instruct branch(label lbl)
15235%{
15236  match(Goto);
15237
15238  effect(USE lbl);
15239
15240  ins_cost(BRANCH_COST);
15241  format %{ "b  $lbl" %}
15242
15243  ins_encode(aarch64_enc_b(lbl));
15244
15245  ins_pipe(pipe_branch);
15246%}
15247
15248// Conditional Near Branch
15249instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15250%{
15251  // Same match rule as `branchConFar'.
15252  match(If cmp cr);
15253
15254  effect(USE lbl);
15255
15256  ins_cost(BRANCH_COST);
15257  // If set to 1 this indicates that the current instruction is a
15258  // short variant of a long branch. This avoids using this
15259  // instruction in first-pass matching. It will then only be used in
15260  // the `Shorten_branches' pass.
15261  // ins_short_branch(1);
15262  format %{ "b$cmp  $lbl" %}
15263
15264  ins_encode(aarch64_enc_br_con(cmp, lbl));
15265
15266  ins_pipe(pipe_branch_cond);
15267%}
15268
15269// Conditional Near Branch Unsigned
15270instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15271%{
15272  // Same match rule as `branchConFar'.
15273  match(If cmp cr);
15274
15275  effect(USE lbl);
15276
15277  ins_cost(BRANCH_COST);
15278  // If set to 1 this indicates that the current instruction is a
15279  // short variant of a long branch. This avoids using this
15280  // instruction in first-pass matching. It will then only be used in
15281  // the `Shorten_branches' pass.
15282  // ins_short_branch(1);
15283  format %{ "b$cmp  $lbl\t# unsigned" %}
15284
15285  ins_encode(aarch64_enc_br_conU(cmp, lbl));
15286
15287  ins_pipe(pipe_branch_cond);
15288%}
15289
15290// Make use of CBZ and CBNZ.  These instructions, as well as being
15291// shorter than (cmp; branch), have the additional benefit of not
15292// killing the flags.
15293
15294instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15295  match(If cmp (CmpI op1 op2));
15296  effect(USE labl);
15297
15298  ins_cost(BRANCH_COST);
15299  format %{ "cbw$cmp   $op1, $labl" %}
15300  ins_encode %{
15301    Label* L = $labl$$label;
15302    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15303    if (cond == Assembler::EQ)
15304      __ cbzw($op1$$Register, *L);
15305    else
15306      __ cbnzw($op1$$Register, *L);
15307  %}
15308  ins_pipe(pipe_cmp_branch);
15309%}
15310
15311instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15312  match(If cmp (CmpL op1 op2));
15313  effect(USE labl);
15314
15315  ins_cost(BRANCH_COST);
15316  format %{ "cb$cmp   $op1, $labl" %}
15317  ins_encode %{
15318    Label* L = $labl$$label;
15319    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15320    if (cond == Assembler::EQ)
15321      __ cbz($op1$$Register, *L);
15322    else
15323      __ cbnz($op1$$Register, *L);
15324  %}
15325  ins_pipe(pipe_cmp_branch);
15326%}
15327
15328instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15329  match(If cmp (CmpP op1 op2));
15330  effect(USE labl);
15331
15332  ins_cost(BRANCH_COST);
15333  format %{ "cb$cmp   $op1, $labl" %}
15334  ins_encode %{
15335    Label* L = $labl$$label;
15336    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15337    if (cond == Assembler::EQ)
15338      __ cbz($op1$$Register, *L);
15339    else
15340      __ cbnz($op1$$Register, *L);
15341  %}
15342  ins_pipe(pipe_cmp_branch);
15343%}
15344
15345instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15346  match(If cmp (CmpN op1 op2));
15347  effect(USE labl);
15348
15349  ins_cost(BRANCH_COST);
15350  format %{ "cbw$cmp   $op1, $labl" %}
15351  ins_encode %{
15352    Label* L = $labl$$label;
15353    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15354    if (cond == Assembler::EQ)
15355      __ cbzw($op1$$Register, *L);
15356    else
15357      __ cbnzw($op1$$Register, *L);
15358  %}
15359  ins_pipe(pipe_cmp_branch);
15360%}
15361
15362instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15363  match(If cmp (CmpP (DecodeN oop) zero));
15364  effect(USE labl);
15365
15366  ins_cost(BRANCH_COST);
15367  format %{ "cb$cmp   $oop, $labl" %}
15368  ins_encode %{
15369    Label* L = $labl$$label;
15370    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15371    if (cond == Assembler::EQ)
15372      __ cbzw($oop$$Register, *L);
15373    else
15374      __ cbnzw($oop$$Register, *L);
15375  %}
15376  ins_pipe(pipe_cmp_branch);
15377%}
15378
15379instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15380  match(If cmp (CmpU op1 op2));
15381  effect(USE labl);
15382
15383  ins_cost(BRANCH_COST);
15384  format %{ "cbw$cmp   $op1, $labl" %}
15385  ins_encode %{
15386    Label* L = $labl$$label;
15387    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15388    if (cond == Assembler::EQ || cond == Assembler::LS)
15389      __ cbzw($op1$$Register, *L);
15390    else
15391      __ cbnzw($op1$$Register, *L);
15392  %}
15393  ins_pipe(pipe_cmp_branch);
15394%}
15395
15396instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15397  match(If cmp (CmpUL op1 op2));
15398  effect(USE labl);
15399
15400  ins_cost(BRANCH_COST);
15401  format %{ "cb$cmp   $op1, $labl" %}
15402  ins_encode %{
15403    Label* L = $labl$$label;
15404    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15405    if (cond == Assembler::EQ || cond == Assembler::LS)
15406      __ cbz($op1$$Register, *L);
15407    else
15408      __ cbnz($op1$$Register, *L);
15409  %}
15410  ins_pipe(pipe_cmp_branch);
15411%}
15412
15413// Test bit and Branch
15414
15415// Patterns for short (< 32KiB) variants
15416instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15417  match(If cmp (CmpL op1 op2));
15418  effect(USE labl);
15419
15420  ins_cost(BRANCH_COST);
15421  format %{ "cb$cmp   $op1, $labl # long" %}
15422  ins_encode %{
15423    Label* L = $labl$$label;
15424    Assembler::Condition cond =
15425      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15426    __ tbr(cond, $op1$$Register, 63, *L);
15427  %}
15428  ins_pipe(pipe_cmp_branch);
15429  ins_short_branch(1);
15430%}
15431
15432instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15433  match(If cmp (CmpI op1 op2));
15434  effect(USE labl);
15435
15436  ins_cost(BRANCH_COST);
15437  format %{ "cb$cmp   $op1, $labl # int" %}
15438  ins_encode %{
15439    Label* L = $labl$$label;
15440    Assembler::Condition cond =
15441      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15442    __ tbr(cond, $op1$$Register, 31, *L);
15443  %}
15444  ins_pipe(pipe_cmp_branch);
15445  ins_short_branch(1);
15446%}
15447
15448instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15449  match(If cmp (CmpL (AndL op1 op2) op3));
15450  predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15451  effect(USE labl);
15452
15453  ins_cost(BRANCH_COST);
15454  format %{ "tb$cmp   $op1, $op2, $labl" %}
15455  ins_encode %{
15456    Label* L = $labl$$label;
15457    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15458    int bit = exact_log2($op2$$constant);
15459    __ tbr(cond, $op1$$Register, bit, *L);
15460  %}
15461  ins_pipe(pipe_cmp_branch);
15462  ins_short_branch(1);
15463%}
15464
15465instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15466  match(If cmp (CmpI (AndI op1 op2) op3));
15467  predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15468  effect(USE labl);
15469
15470  ins_cost(BRANCH_COST);
15471  format %{ "tb$cmp   $op1, $op2, $labl" %}
15472  ins_encode %{
15473    Label* L = $labl$$label;
15474    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15475    int bit = exact_log2($op2$$constant);
15476    __ tbr(cond, $op1$$Register, bit, *L);
15477  %}
15478  ins_pipe(pipe_cmp_branch);
15479  ins_short_branch(1);
15480%}
15481
15482// And far variants
15483instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15484  match(If cmp (CmpL op1 op2));
15485  effect(USE labl);
15486
15487  ins_cost(BRANCH_COST);
15488  format %{ "cb$cmp   $op1, $labl # long" %}
15489  ins_encode %{
15490    Label* L = $labl$$label;
15491    Assembler::Condition cond =
15492      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15493    __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15494  %}
15495  ins_pipe(pipe_cmp_branch);
15496%}
15497
15498instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15499  match(If cmp (CmpI op1 op2));
15500  effect(USE labl);
15501
15502  ins_cost(BRANCH_COST);
15503  format %{ "cb$cmp   $op1, $labl # int" %}
15504  ins_encode %{
15505    Label* L = $labl$$label;
15506    Assembler::Condition cond =
15507      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15508    __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15509  %}
15510  ins_pipe(pipe_cmp_branch);
15511%}
15512
15513instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15514  match(If cmp (CmpL (AndL op1 op2) op3));
15515  predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15516  effect(USE labl);
15517
15518  ins_cost(BRANCH_COST);
15519  format %{ "tb$cmp   $op1, $op2, $labl" %}
15520  ins_encode %{
15521    Label* L = $labl$$label;
15522    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15523    int bit = exact_log2($op2$$constant);
15524    __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15525  %}
15526  ins_pipe(pipe_cmp_branch);
15527%}
15528
15529instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15530  match(If cmp (CmpI (AndI op1 op2) op3));
15531  predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15532  effect(USE labl);
15533
15534  ins_cost(BRANCH_COST);
15535  format %{ "tb$cmp   $op1, $op2, $labl" %}
15536  ins_encode %{
15537    Label* L = $labl$$label;
15538    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15539    int bit = exact_log2($op2$$constant);
15540    __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15541  %}
15542  ins_pipe(pipe_cmp_branch);
15543%}
15544
15545// Test bits
15546
15547instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15548  match(Set cr (CmpL (AndL op1 op2) op3));
15549  predicate(Assembler::operand_valid_for_logical_immediate
15550            (/*is_32*/false, n->in(1)->in(2)->get_long()));
15551
15552  ins_cost(INSN_COST);
15553  format %{ "tst $op1, $op2 # long" %}
15554  ins_encode %{
15555    __ tst($op1$$Register, $op2$$constant);
15556  %}
15557  ins_pipe(ialu_reg_reg);
15558%}
15559
15560instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15561  match(Set cr (CmpI (AndI op1 op2) op3));
15562  predicate(Assembler::operand_valid_for_logical_immediate
15563            (/*is_32*/true, n->in(1)->in(2)->get_int()));
15564
15565  ins_cost(INSN_COST);
15566  format %{ "tst $op1, $op2 # int" %}
15567  ins_encode %{
15568    __ tstw($op1$$Register, $op2$$constant);
15569  %}
15570  ins_pipe(ialu_reg_reg);
15571%}
15572
15573instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15574  match(Set cr (CmpL (AndL op1 op2) op3));
15575
15576  ins_cost(INSN_COST);
15577  format %{ "tst $op1, $op2 # long" %}
15578  ins_encode %{
15579    __ tst($op1$$Register, $op2$$Register);
15580  %}
15581  ins_pipe(ialu_reg_reg);
15582%}
15583
15584instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15585  match(Set cr (CmpI (AndI op1 op2) op3));
15586
15587  ins_cost(INSN_COST);
15588  format %{ "tstw $op1, $op2 # int" %}
15589  ins_encode %{
15590    __ tstw($op1$$Register, $op2$$Register);
15591  %}
15592  ins_pipe(ialu_reg_reg);
15593%}
15594
15595
15596// Conditional Far Branch
15597// Conditional Far Branch Unsigned
15598// TODO: fixme
15599
15600// counted loop end branch near
15601instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15602%{
15603  match(CountedLoopEnd cmp cr);
15604
15605  effect(USE lbl);
15606
15607  ins_cost(BRANCH_COST);
15608  // short variant.
15609  // ins_short_branch(1);
15610  format %{ "b$cmp $lbl \t// counted loop end" %}
15611
15612  ins_encode(aarch64_enc_br_con(cmp, lbl));
15613
15614  ins_pipe(pipe_branch);
15615%}
15616
15617// counted loop end branch near Unsigned
15618instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15619%{
15620  match(CountedLoopEnd cmp cr);
15621
15622  effect(USE lbl);
15623
15624  ins_cost(BRANCH_COST);
15625  // short variant.
15626  // ins_short_branch(1);
15627  format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15628
15629  ins_encode(aarch64_enc_br_conU(cmp, lbl));
15630
15631  ins_pipe(pipe_branch);
15632%}
15633
15634// counted loop end branch far
15635// counted loop end branch far unsigned
15636// TODO: fixme
15637
15638// ============================================================================
15639// inlined locking and unlocking
15640
15641instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15642%{
15643  match(Set cr (FastLock object box));
15644  effect(TEMP tmp, TEMP tmp2);
15645
15646  // TODO
15647  // identify correct cost
15648  ins_cost(5 * INSN_COST);
15649  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15650
15651  ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15652
15653  ins_pipe(pipe_serial);
15654%}
15655
15656instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15657%{
15658  match(Set cr (FastUnlock object box));
15659  effect(TEMP tmp, TEMP tmp2);
15660
15661  ins_cost(5 * INSN_COST);
15662  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15663
15664  ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15665
15666  ins_pipe(pipe_serial);
15667%}
15668
15669
15670// ============================================================================
15671// Safepoint Instructions
15672
15673// TODO
15674// provide a near and far version of this code
15675
15676instruct safePoint(iRegP poll)
15677%{
15678  match(SafePoint poll);
15679
15680  format %{
15681    "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15682  %}
15683  ins_encode %{
15684    __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15685  %}
15686  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15687%}
15688
15689
15690// ============================================================================
15691// Procedure Call/Return Instructions
15692
15693// Call Java Static Instruction
15694
15695instruct CallStaticJavaDirect(method meth)
15696%{
15697  match(CallStaticJava);
15698
15699  effect(USE meth);
15700
15701  ins_cost(CALL_COST);
15702
15703  format %{ "call,static $meth \t// ==> " %}
15704
15705  ins_encode( aarch64_enc_java_static_call(meth),
15706              aarch64_enc_call_epilog );
15707
15708  ins_pipe(pipe_class_call);
15709%}
15710
15711// TO HERE
15712
15713// Call Java Dynamic Instruction
15714instruct CallDynamicJavaDirect(method meth)
15715%{
15716  match(CallDynamicJava);
15717
15718  effect(USE meth);
15719
15720  ins_cost(CALL_COST);
15721
15722  format %{ "CALL,dynamic $meth \t// ==> " %}
15723
15724  ins_encode( aarch64_enc_java_dynamic_call(meth),
15725               aarch64_enc_call_epilog );
15726
15727  ins_pipe(pipe_class_call);
15728%}
15729
15730// Call Runtime Instruction
15731
15732instruct CallRuntimeDirect(method meth)
15733%{
15734  match(CallRuntime);
15735
15736  effect(USE meth);
15737
15738  ins_cost(CALL_COST);
15739
15740  format %{ "CALL, runtime $meth" %}
15741
15742  ins_encode( aarch64_enc_java_to_runtime(meth) );
15743
15744  ins_pipe(pipe_class_call);
15745%}
15746
15747// Call Runtime Instruction
15748
15749instruct CallLeafDirect(method meth)
15750%{
15751  match(CallLeaf);
15752
15753  effect(USE meth);
15754
15755  ins_cost(CALL_COST);
15756
15757  format %{ "CALL, runtime leaf $meth" %}
15758
15759  ins_encode( aarch64_enc_java_to_runtime(meth) );
15760
15761  ins_pipe(pipe_class_call);
15762%}
15763
15764// Call Runtime Instruction
15765
15766instruct CallLeafNoFPDirect(method meth)
15767%{
15768  match(CallLeafNoFP);
15769
15770  effect(USE meth);
15771
15772  ins_cost(CALL_COST);
15773
15774  format %{ "CALL, runtime leaf nofp $meth" %}
15775
15776  ins_encode( aarch64_enc_java_to_runtime(meth) );
15777
15778  ins_pipe(pipe_class_call);
15779%}
15780
15781// Tail Call; Jump from runtime stub to Java code.
15782// Also known as an 'interprocedural jump'.
15783// Target of jump will eventually return to caller.
15784// TailJump below removes the return address.
15785instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15786%{
15787  match(TailCall jump_target method_oop);
15788
15789  ins_cost(CALL_COST);
15790
15791  format %{ "br $jump_target\t# $method_oop holds method oop" %}
15792
15793  ins_encode(aarch64_enc_tail_call(jump_target));
15794
15795  ins_pipe(pipe_class_call);
15796%}
15797
15798instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15799%{
15800  match(TailJump jump_target ex_oop);
15801
15802  ins_cost(CALL_COST);
15803
15804  format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15805
15806  ins_encode(aarch64_enc_tail_jmp(jump_target));
15807
15808  ins_pipe(pipe_class_call);
15809%}
15810
15811// Create exception oop: created by stack-crawling runtime code.
15812// Created exception is now available to this handler, and is setup
15813// just prior to jumping to this handler. No code emitted.
15814// TODO check
15815// should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15816instruct CreateException(iRegP_R0 ex_oop)
15817%{
15818  match(Set ex_oop (CreateEx));
15819
15820  format %{ " -- \t// exception oop; no code emitted" %}
15821
15822  size(0);
15823
15824  ins_encode( /*empty*/ );
15825
15826  ins_pipe(pipe_class_empty);
15827%}
15828
15829// Rethrow exception: The exception oop will come in the first
15830// argument position. Then JUMP (not call) to the rethrow stub code.
15831instruct RethrowException() %{
15832  match(Rethrow);
15833  ins_cost(CALL_COST);
15834
15835  format %{ "b rethrow_stub" %}
15836
15837  ins_encode( aarch64_enc_rethrow() );
15838
15839  ins_pipe(pipe_class_call);
15840%}
15841
15842
15843// Return Instruction
15844// epilog node loads ret address into lr as part of frame pop
15845instruct Ret()
15846%{
15847  match(Return);
15848
15849  format %{ "ret\t// return register" %}
15850
15851  ins_encode( aarch64_enc_ret() );
15852
15853  ins_pipe(pipe_branch);
15854%}
15855
15856// Die now.
15857instruct ShouldNotReachHere() %{
15858  match(Halt);
15859
15860  ins_cost(CALL_COST);
15861  format %{ "ShouldNotReachHere" %}
15862
15863  ins_encode %{
15864    // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15865    // return true
15866    __ dpcs1(0xdead + 1);
15867  %}
15868
15869  ins_pipe(pipe_class_default);
15870%}
15871
15872// ============================================================================
15873// Partial Subtype Check
15874//
15875// superklass array for an instance of the superklass.  Set a hidden
15876// internal cache on a hit (cache is checked with exposed code in
15877// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15878// encoding ALSO sets flags.
15879
15880instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15881%{
15882  match(Set result (PartialSubtypeCheck sub super));
15883  effect(KILL cr, KILL temp);
15884
15885  ins_cost(1100);  // slightly larger than the next version
15886  format %{ "partialSubtypeCheck $result, $sub, $super" %}
15887
15888  ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15889
15890  opcode(0x1); // Force zero of result reg on hit
15891
15892  ins_pipe(pipe_class_memory);
15893%}
15894
15895instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15896%{
15897  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15898  effect(KILL temp, KILL result);
15899
15900  ins_cost(1100);  // slightly larger than the next version
15901  format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15902
15903  ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15904
15905  opcode(0x0); // Don't zero result reg on hit
15906
15907  ins_pipe(pipe_class_memory);
15908%}
15909
15910instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15911                        iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15912%{
15913  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15914  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15915  effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15916
15917  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15918  ins_encode %{
15919    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15920    __ string_compare($str1$$Register, $str2$$Register,
15921                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
15922                      $tmp1$$Register,
15923                      fnoreg, fnoreg, StrIntrinsicNode::UU);
15924  %}
15925  ins_pipe(pipe_class_memory);
15926%}
15927
15928instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15929                        iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15930%{
15931  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15932  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15933  effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15934
15935  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15936  ins_encode %{
15937    __ string_compare($str1$$Register, $str2$$Register,
15938                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
15939                      $tmp1$$Register,
15940                      fnoreg, fnoreg, StrIntrinsicNode::LL);
15941  %}
15942  ins_pipe(pipe_class_memory);
15943%}
15944
15945instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15946                        iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15947%{
15948  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15949  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15950  effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15951
15952  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15953  ins_encode %{
15954    __ string_compare($str1$$Register, $str2$$Register,
15955                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
15956                      $tmp1$$Register,
15957                      $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15958  %}
15959  ins_pipe(pipe_class_memory);
15960%}
15961
15962instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15963                        iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15964%{
15965  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15966  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15967  effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15968
15969  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15970  ins_encode %{
15971    __ string_compare($str1$$Register, $str2$$Register,
15972                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
15973                      $tmp1$$Register,
15974                      $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15975  %}
15976  ins_pipe(pipe_class_memory);
15977%}
15978
15979instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15980       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15981%{
15982  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15983  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15984  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15985         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15986  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15987
15988  ins_encode %{
15989    __ string_indexof($str1$$Register, $str2$$Register,
15990                      $cnt1$$Register, $cnt2$$Register,
15991                      $tmp1$$Register, $tmp2$$Register,
15992                      $tmp3$$Register, $tmp4$$Register,
15993                      -1, $result$$Register, StrIntrinsicNode::UU);
15994  %}
15995  ins_pipe(pipe_class_memory);
15996%}
15997
15998instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15999       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16000%{
16001  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16002  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16003  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16004         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16005  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
16006
16007  ins_encode %{
16008    __ string_indexof($str1$$Register, $str2$$Register,
16009                      $cnt1$$Register, $cnt2$$Register,
16010                      $tmp1$$Register, $tmp2$$Register,
16011                      $tmp3$$Register, $tmp4$$Register,
16012                      -1, $result$$Register, StrIntrinsicNode::LL);
16013  %}
16014  ins_pipe(pipe_class_memory);
16015%}
16016
16017instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16018       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16019%{
16020  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16021  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16022  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16023         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16024  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
16025
16026  ins_encode %{
16027    __ string_indexof($str1$$Register, $str2$$Register,
16028                      $cnt1$$Register, $cnt2$$Register,
16029                      $tmp1$$Register, $tmp2$$Register,
16030                      $tmp3$$Register, $tmp4$$Register,
16031                      -1, $result$$Register, StrIntrinsicNode::UL);
16032  %}
16033  ins_pipe(pipe_class_memory);
16034%}
16035
16036instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16037       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16038%{
16039  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16040  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16041  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16042         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16043  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
16044
16045  ins_encode %{
16046    __ string_indexof($str1$$Register, $str2$$Register,
16047                      $cnt1$$Register, $cnt2$$Register,
16048                      $tmp1$$Register, $tmp2$$Register,
16049                      $tmp3$$Register, $tmp4$$Register,
16050                      -1, $result$$Register, StrIntrinsicNode::LU);
16051  %}
16052  ins_pipe(pipe_class_memory);
16053%}
16054
16055instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16056                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16057                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16058%{
16059  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16060  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16061  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16062         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16063  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16064
16065  ins_encode %{
16066    int icnt2 = (int)$int_cnt2$$constant;
16067    __ string_indexof($str1$$Register, $str2$$Register,
16068                      $cnt1$$Register, zr,
16069                      $tmp1$$Register, $tmp2$$Register,
16070                      $tmp3$$Register, $tmp4$$Register,
16071                      icnt2, $result$$Register, StrIntrinsicNode::UU);
16072  %}
16073  ins_pipe(pipe_class_memory);
16074%}
16075
16076instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16077                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16078                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16079%{
16080  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16081  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16082  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16083         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16084  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16085
16086  ins_encode %{
16087    int icnt2 = (int)$int_cnt2$$constant;
16088    __ string_indexof($str1$$Register, $str2$$Register,
16089                      $cnt1$$Register, zr,
16090                      $tmp1$$Register, $tmp2$$Register,
16091                      $tmp3$$Register, $tmp4$$Register,
16092                      icnt2, $result$$Register, StrIntrinsicNode::LL);
16093  %}
16094  ins_pipe(pipe_class_memory);
16095%}
16096
16097instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16098                 immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16099                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16100%{
16101  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16102  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16103  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16104         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16105  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16106
16107  ins_encode %{
16108    int icnt2 = (int)$int_cnt2$$constant;
16109    __ string_indexof($str1$$Register, $str2$$Register,
16110                      $cnt1$$Register, zr,
16111                      $tmp1$$Register, $tmp2$$Register,
16112                      $tmp3$$Register, $tmp4$$Register,
16113                      icnt2, $result$$Register, StrIntrinsicNode::UL);
16114  %}
16115  ins_pipe(pipe_class_memory);
16116%}
16117
16118instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16119                 immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16120                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16121%{
16122  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16123  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16124  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16125         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16126  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
16127
16128  ins_encode %{
16129    int icnt2 = (int)$int_cnt2$$constant;
16130    __ string_indexof($str1$$Register, $str2$$Register,
16131                      $cnt1$$Register, zr,
16132                      $tmp1$$Register, $tmp2$$Register,
16133                      $tmp3$$Register, $tmp4$$Register,
16134                      icnt2, $result$$Register, StrIntrinsicNode::LU);
16135  %}
16136  ins_pipe(pipe_class_memory);
16137%}
16138
16139instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16140                              iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16141                              iRegINoSp tmp3, rFlagsReg cr)
16142%{
16143  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16144  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16145         TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16146
16147  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16148
16149  ins_encode %{
16150    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16151                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
16152                           $tmp3$$Register);
16153  %}
16154  ins_pipe(pipe_class_memory);
16155%}
16156
16157instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16158                        iRegI_R0 result, rFlagsReg cr)
16159%{
16160  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16161  match(Set result (StrEquals (Binary str1 str2) cnt));
16162  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16163
16164  format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16165  ins_encode %{
16166    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16167    __ arrays_equals($str1$$Register, $str2$$Register,
16168                     $result$$Register, $cnt$$Register,
16169                     1, /*is_string*/true);
16170  %}
16171  ins_pipe(pipe_class_memory);
16172%}
16173
16174instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16175                        iRegI_R0 result, rFlagsReg cr)
16176%{
16177  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16178  match(Set result (StrEquals (Binary str1 str2) cnt));
16179  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16180
16181  format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16182  ins_encode %{
16183    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16184    __ asrw($cnt$$Register, $cnt$$Register, 1);
16185    __ arrays_equals($str1$$Register, $str2$$Register,
16186                     $result$$Register, $cnt$$Register,
16187                     2, /*is_string*/true);
16188  %}
16189  ins_pipe(pipe_class_memory);
16190%}
16191
16192instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16193                      iRegP_R10 tmp, rFlagsReg cr)
16194%{
16195  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16196  match(Set result (AryEq ary1 ary2));
16197  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16198
16199  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16200  ins_encode %{
16201    __ arrays_equals($ary1$$Register, $ary2$$Register,
16202                     $result$$Register, $tmp$$Register,
16203                     1, /*is_string*/false);
16204    %}
16205  ins_pipe(pipe_class_memory);
16206%}
16207
16208instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16209                      iRegP_R10 tmp, rFlagsReg cr)
16210%{
16211  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16212  match(Set result (AryEq ary1 ary2));
16213  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16214
16215  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16216  ins_encode %{
16217    __ arrays_equals($ary1$$Register, $ary2$$Register,
16218                     $result$$Register, $tmp$$Register,
16219                     2, /*is_string*/false);
16220  %}
16221  ins_pipe(pipe_class_memory);
16222%}
16223
16224instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16225%{
16226  match(Set result (HasNegatives ary1 len));
16227  effect(USE_KILL ary1, USE_KILL len, KILL cr);
16228  format %{ "has negatives byte[] $ary1,$len -> $result" %}
16229  ins_encode %{
16230    __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16231  %}
16232  ins_pipe( pipe_slow );
16233%}
16234
16235// fast char[] to byte[] compression
16236instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16237                         vRegD_V0 tmp1, vRegD_V1 tmp2,
16238                         vRegD_V2 tmp3, vRegD_V3 tmp4,
16239                         iRegI_R0 result, rFlagsReg cr)
16240%{
16241  match(Set result (StrCompressedCopy src (Binary dst len)));
16242  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16243
16244  format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16245  ins_encode %{
16246    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16247                           $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16248                           $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16249                           $result$$Register);
16250  %}
16251  ins_pipe( pipe_slow );
16252%}
16253
16254// fast byte[] to char[] inflation
16255instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16256                        vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16257%{
16258  match(Set dummy (StrInflatedCopy src (Binary dst len)));
16259  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16260
16261  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16262  ins_encode %{
16263    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16264                          $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16265  %}
16266  ins_pipe(pipe_class_memory);
16267%}
16268
16269// encode char[] to byte[] in ISO_8859_1
16270instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16271                          vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16272                          vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16273                          iRegI_R0 result, rFlagsReg cr)
16274%{
16275  match(Set result (EncodeISOArray src (Binary dst len)));
16276  effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16277         KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16278
16279  format %{ "Encode array $src,$dst,$len -> $result" %}
16280  ins_encode %{
16281    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16282         $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16283         $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16284  %}
16285  ins_pipe( pipe_class_memory );
16286%}
16287
16288// ============================================================================
16289// This name is KNOWN by the ADLC and cannot be changed.
16290// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16291// for this guy.
16292instruct tlsLoadP(thread_RegP dst)
16293%{
16294  match(Set dst (ThreadLocal));
16295
16296  ins_cost(0);
16297
16298  format %{ " -- \t// $dst=Thread::current(), empty" %}
16299
16300  size(0);
16301
16302  ins_encode( /*empty*/ );
16303
16304  ins_pipe(pipe_class_empty);
16305%}
16306
16307// ====================VECTOR INSTRUCTIONS=====================================
16308
16309// Load vector (32 bits)
16310instruct loadV4(vecD dst, vmem4 mem)
16311%{
16312  predicate(n->as_LoadVector()->memory_size() == 4);
16313  match(Set dst (LoadVector mem));
16314  ins_cost(4 * INSN_COST);
16315  format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16316  ins_encode( aarch64_enc_ldrvS(dst, mem) );
16317  ins_pipe(vload_reg_mem64);
16318%}
16319
16320// Load vector (64 bits)
16321instruct loadV8(vecD dst, vmem8 mem)
16322%{
16323  predicate(n->as_LoadVector()->memory_size() == 8);
16324  match(Set dst (LoadVector mem));
16325  ins_cost(4 * INSN_COST);
16326  format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16327  ins_encode( aarch64_enc_ldrvD(dst, mem) );
16328  ins_pipe(vload_reg_mem64);
16329%}
16330
16331// Load Vector (128 bits)
16332instruct loadV16(vecX dst, vmem16 mem)
16333%{
16334  predicate(n->as_LoadVector()->memory_size() == 16);
16335  match(Set dst (LoadVector mem));
16336  ins_cost(4 * INSN_COST);
16337  format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16338  ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16339  ins_pipe(vload_reg_mem128);
16340%}
16341
16342// Store Vector (32 bits)
16343instruct storeV4(vecD src, vmem4 mem)
16344%{
16345  predicate(n->as_StoreVector()->memory_size() == 4);
16346  match(Set mem (StoreVector mem src));
16347  ins_cost(4 * INSN_COST);
16348  format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16349  ins_encode( aarch64_enc_strvS(src, mem) );
16350  ins_pipe(vstore_reg_mem64);
16351%}
16352
16353// Store Vector (64 bits)
16354instruct storeV8(vecD src, vmem8 mem)
16355%{
16356  predicate(n->as_StoreVector()->memory_size() == 8);
16357  match(Set mem (StoreVector mem src));
16358  ins_cost(4 * INSN_COST);
16359  format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16360  ins_encode( aarch64_enc_strvD(src, mem) );
16361  ins_pipe(vstore_reg_mem64);
16362%}
16363
16364// Store Vector (128 bits)
16365instruct storeV16(vecX src, vmem16 mem)
16366%{
16367  predicate(n->as_StoreVector()->memory_size() == 16);
16368  match(Set mem (StoreVector mem src));
16369  ins_cost(4 * INSN_COST);
16370  format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16371  ins_encode( aarch64_enc_strvQ(src, mem) );
16372  ins_pipe(vstore_reg_mem128);
16373%}
16374
16375instruct replicate8B(vecD dst, iRegIorL2I src)
16376%{
16377  predicate(n->as_Vector()->length() == 4 ||
16378            n->as_Vector()->length() == 8);
16379  match(Set dst (ReplicateB src));
16380  ins_cost(INSN_COST);
16381  format %{ "dup  $dst, $src\t# vector (8B)" %}
16382  ins_encode %{
16383    __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16384  %}
16385  ins_pipe(vdup_reg_reg64);
16386%}
16387
16388instruct replicate16B(vecX dst, iRegIorL2I src)
16389%{
16390  predicate(n->as_Vector()->length() == 16);
16391  match(Set dst (ReplicateB src));
16392  ins_cost(INSN_COST);
16393  format %{ "dup  $dst, $src\t# vector (16B)" %}
16394  ins_encode %{
16395    __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16396  %}
16397  ins_pipe(vdup_reg_reg128);
16398%}
16399
16400instruct replicate8B_imm(vecD dst, immI con)
16401%{
16402  predicate(n->as_Vector()->length() == 4 ||
16403            n->as_Vector()->length() == 8);
16404  match(Set dst (ReplicateB con));
16405  ins_cost(INSN_COST);
16406  format %{ "movi  $dst, $con\t# vector(8B)" %}
16407  ins_encode %{
16408    __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16409  %}
16410  ins_pipe(vmovi_reg_imm64);
16411%}
16412
16413instruct replicate16B_imm(vecX dst, immI con)
16414%{
16415  predicate(n->as_Vector()->length() == 16);
16416  match(Set dst (ReplicateB con));
16417  ins_cost(INSN_COST);
16418  format %{ "movi  $dst, $con\t# vector(16B)" %}
16419  ins_encode %{
16420    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16421  %}
16422  ins_pipe(vmovi_reg_imm128);
16423%}
16424
16425instruct replicate4S(vecD dst, iRegIorL2I src)
16426%{
16427  predicate(n->as_Vector()->length() == 2 ||
16428            n->as_Vector()->length() == 4);
16429  match(Set dst (ReplicateS src));
16430  ins_cost(INSN_COST);
16431  format %{ "dup  $dst, $src\t# vector (4S)" %}
16432  ins_encode %{
16433    __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16434  %}
16435  ins_pipe(vdup_reg_reg64);
16436%}
16437
16438instruct replicate8S(vecX dst, iRegIorL2I src)
16439%{
16440  predicate(n->as_Vector()->length() == 8);
16441  match(Set dst (ReplicateS src));
16442  ins_cost(INSN_COST);
16443  format %{ "dup  $dst, $src\t# vector (8S)" %}
16444  ins_encode %{
16445    __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16446  %}
16447  ins_pipe(vdup_reg_reg128);
16448%}
16449
16450instruct replicate4S_imm(vecD dst, immI con)
16451%{
16452  predicate(n->as_Vector()->length() == 2 ||
16453            n->as_Vector()->length() == 4);
16454  match(Set dst (ReplicateS con));
16455  ins_cost(INSN_COST);
16456  format %{ "movi  $dst, $con\t# vector(4H)" %}
16457  ins_encode %{
16458    __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16459  %}
16460  ins_pipe(vmovi_reg_imm64);
16461%}
16462
16463instruct replicate8S_imm(vecX dst, immI con)
16464%{
16465  predicate(n->as_Vector()->length() == 8);
16466  match(Set dst (ReplicateS con));
16467  ins_cost(INSN_COST);
16468  format %{ "movi  $dst, $con\t# vector(8H)" %}
16469  ins_encode %{
16470    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16471  %}
16472  ins_pipe(vmovi_reg_imm128);
16473%}
16474
16475instruct replicate2I(vecD dst, iRegIorL2I src)
16476%{
16477  predicate(n->as_Vector()->length() == 2);
16478  match(Set dst (ReplicateI src));
16479  ins_cost(INSN_COST);
16480  format %{ "dup  $dst, $src\t# vector (2I)" %}
16481  ins_encode %{
16482    __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16483  %}
16484  ins_pipe(vdup_reg_reg64);
16485%}
16486
16487instruct replicate4I(vecX dst, iRegIorL2I src)
16488%{
16489  predicate(n->as_Vector()->length() == 4);
16490  match(Set dst (ReplicateI src));
16491  ins_cost(INSN_COST);
16492  format %{ "dup  $dst, $src\t# vector (4I)" %}
16493  ins_encode %{
16494    __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16495  %}
16496  ins_pipe(vdup_reg_reg128);
16497%}
16498
16499instruct replicate2I_imm(vecD dst, immI con)
16500%{
16501  predicate(n->as_Vector()->length() == 2);
16502  match(Set dst (ReplicateI con));
16503  ins_cost(INSN_COST);
16504  format %{ "movi  $dst, $con\t# vector(2I)" %}
16505  ins_encode %{
16506    __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16507  %}
16508  ins_pipe(vmovi_reg_imm64);
16509%}
16510
16511instruct replicate4I_imm(vecX dst, immI con)
16512%{
16513  predicate(n->as_Vector()->length() == 4);
16514  match(Set dst (ReplicateI con));
16515  ins_cost(INSN_COST);
16516  format %{ "movi  $dst, $con\t# vector(4I)" %}
16517  ins_encode %{
16518    __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16519  %}
16520  ins_pipe(vmovi_reg_imm128);
16521%}
16522
16523instruct replicate2L(vecX dst, iRegL src)
16524%{
16525  predicate(n->as_Vector()->length() == 2);
16526  match(Set dst (ReplicateL src));
16527  ins_cost(INSN_COST);
16528  format %{ "dup  $dst, $src\t# vector (2L)" %}
16529  ins_encode %{
16530    __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16531  %}
16532  ins_pipe(vdup_reg_reg128);
16533%}
16534
16535instruct replicate2L_zero(vecX dst, immI0 zero)
16536%{
16537  predicate(n->as_Vector()->length() == 2);
16538  match(Set dst (ReplicateI zero));
16539  ins_cost(INSN_COST);
16540  format %{ "movi  $dst, $zero\t# vector(4I)" %}
16541  ins_encode %{
16542    __ eor(as_FloatRegister($dst$$reg), __ T16B,
16543           as_FloatRegister($dst$$reg),
16544           as_FloatRegister($dst$$reg));
16545  %}
16546  ins_pipe(vmovi_reg_imm128);
16547%}
16548
16549instruct replicate2F(vecD dst, vRegF src)
16550%{
16551  predicate(n->as_Vector()->length() == 2);
16552  match(Set dst (ReplicateF src));
16553  ins_cost(INSN_COST);
16554  format %{ "dup  $dst, $src\t# vector (2F)" %}
16555  ins_encode %{
16556    __ dup(as_FloatRegister($dst$$reg), __ T2S,
16557           as_FloatRegister($src$$reg));
16558  %}
16559  ins_pipe(vdup_reg_freg64);
16560%}
16561
16562instruct replicate4F(vecX dst, vRegF src)
16563%{
16564  predicate(n->as_Vector()->length() == 4);
16565  match(Set dst (ReplicateF src));
16566  ins_cost(INSN_COST);
16567  format %{ "dup  $dst, $src\t# vector (4F)" %}
16568  ins_encode %{
16569    __ dup(as_FloatRegister($dst$$reg), __ T4S,
16570           as_FloatRegister($src$$reg));
16571  %}
16572  ins_pipe(vdup_reg_freg128);
16573%}
16574
16575instruct replicate2D(vecX dst, vRegD src)
16576%{
16577  predicate(n->as_Vector()->length() == 2);
16578  match(Set dst (ReplicateD src));
16579  ins_cost(INSN_COST);
16580  format %{ "dup  $dst, $src\t# vector (2D)" %}
16581  ins_encode %{
16582    __ dup(as_FloatRegister($dst$$reg), __ T2D,
16583           as_FloatRegister($src$$reg));
16584  %}
16585  ins_pipe(vdup_reg_dreg128);
16586%}
16587
16588// ====================REDUCTION ARITHMETIC====================================
16589
16590instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16591%{
16592  match(Set dst (AddReductionVI src1 src2));
16593  ins_cost(INSN_COST);
16594  effect(TEMP tmp, TEMP tmp2);
16595  format %{ "umov  $tmp, $src2, S, 0\n\t"
16596            "umov  $tmp2, $src2, S, 1\n\t"
16597            "addw  $dst, $src1, $tmp\n\t"
16598            "addw  $dst, $dst, $tmp2\t add reduction2i"
16599  %}
16600  ins_encode %{
16601    __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16602    __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16603    __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16604    __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16605  %}
16606  ins_pipe(pipe_class_default);
16607%}
16608
16609instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16610%{
16611  match(Set dst (AddReductionVI src1 src2));
16612  ins_cost(INSN_COST);
16613  effect(TEMP tmp, TEMP tmp2);
16614  format %{ "addv  $tmp, T4S, $src2\n\t"
16615            "umov  $tmp2, $tmp, S, 0\n\t"
16616            "addw  $dst, $tmp2, $src1\t add reduction4i"
16617  %}
16618  ins_encode %{
16619    __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16620            as_FloatRegister($src2$$reg));
16621    __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16622    __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16623  %}
16624  ins_pipe(pipe_class_default);
16625%}
16626
16627instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16628%{
16629  match(Set dst (MulReductionVI src1 src2));
16630  ins_cost(INSN_COST);
16631  effect(TEMP tmp, TEMP dst);
16632  format %{ "umov  $tmp, $src2, S, 0\n\t"
16633            "mul   $dst, $tmp, $src1\n\t"
16634            "umov  $tmp, $src2, S, 1\n\t"
16635            "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16636  %}
16637  ins_encode %{
16638    __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16639    __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16640    __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16641    __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16642  %}
16643  ins_pipe(pipe_class_default);
16644%}
16645
16646instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16647%{
16648  match(Set dst (MulReductionVI src1 src2));
16649  ins_cost(INSN_COST);
16650  effect(TEMP tmp, TEMP tmp2, TEMP dst);
16651  format %{ "ins   $tmp, $src2, 0, 1\n\t"
16652            "mul   $tmp, $tmp, $src2\n\t"
16653            "umov  $tmp2, $tmp, S, 0\n\t"
16654            "mul   $dst, $tmp2, $src1\n\t"
16655            "umov  $tmp2, $tmp, S, 1\n\t"
16656            "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16657  %}
16658  ins_encode %{
16659    __ ins(as_FloatRegister($tmp$$reg), __ D,
16660           as_FloatRegister($src2$$reg), 0, 1);
16661    __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16662           as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16663    __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16664    __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16665    __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16666    __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16667  %}
16668  ins_pipe(pipe_class_default);
16669%}
16670
16671instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16672%{
16673  match(Set dst (AddReductionVF src1 src2));
16674  ins_cost(INSN_COST);
16675  effect(TEMP tmp, TEMP dst);
16676  format %{ "fadds $dst, $src1, $src2\n\t"
16677            "ins   $tmp, S, $src2, 0, 1\n\t"
16678            "fadds $dst, $dst, $tmp\t add reduction2f"
16679  %}
16680  ins_encode %{
16681    __ fadds(as_FloatRegister($dst$$reg),
16682             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16683    __ ins(as_FloatRegister($tmp$$reg), __ S,
16684           as_FloatRegister($src2$$reg), 0, 1);
16685    __ fadds(as_FloatRegister($dst$$reg),
16686             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16687  %}
16688  ins_pipe(pipe_class_default);
16689%}
16690
16691instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16692%{
16693  match(Set dst (AddReductionVF src1 src2));
16694  ins_cost(INSN_COST);
16695  effect(TEMP tmp, TEMP dst);
16696  format %{ "fadds $dst, $src1, $src2\n\t"
16697            "ins   $tmp, S, $src2, 0, 1\n\t"
16698            "fadds $dst, $dst, $tmp\n\t"
16699            "ins   $tmp, S, $src2, 0, 2\n\t"
16700            "fadds $dst, $dst, $tmp\n\t"
16701            "ins   $tmp, S, $src2, 0, 3\n\t"
16702            "fadds $dst, $dst, $tmp\t add reduction4f"
16703  %}
16704  ins_encode %{
16705    __ fadds(as_FloatRegister($dst$$reg),
16706             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16707    __ ins(as_FloatRegister($tmp$$reg), __ S,
16708           as_FloatRegister($src2$$reg), 0, 1);
16709    __ fadds(as_FloatRegister($dst$$reg),
16710             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16711    __ ins(as_FloatRegister($tmp$$reg), __ S,
16712           as_FloatRegister($src2$$reg), 0, 2);
16713    __ fadds(as_FloatRegister($dst$$reg),
16714             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16715    __ ins(as_FloatRegister($tmp$$reg), __ S,
16716           as_FloatRegister($src2$$reg), 0, 3);
16717    __ fadds(as_FloatRegister($dst$$reg),
16718             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16719  %}
16720  ins_pipe(pipe_class_default);
16721%}
16722
16723instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16724%{
16725  match(Set dst (MulReductionVF src1 src2));
16726  ins_cost(INSN_COST);
16727  effect(TEMP tmp, TEMP dst);
16728  format %{ "fmuls $dst, $src1, $src2\n\t"
16729            "ins   $tmp, S, $src2, 0, 1\n\t"
16730            "fmuls $dst, $dst, $tmp\t add reduction4f"
16731  %}
16732  ins_encode %{
16733    __ fmuls(as_FloatRegister($dst$$reg),
16734             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16735    __ ins(as_FloatRegister($tmp$$reg), __ S,
16736           as_FloatRegister($src2$$reg), 0, 1);
16737    __ fmuls(as_FloatRegister($dst$$reg),
16738             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16739  %}
16740  ins_pipe(pipe_class_default);
16741%}
16742
16743instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16744%{
16745  match(Set dst (MulReductionVF src1 src2));
16746  ins_cost(INSN_COST);
16747  effect(TEMP tmp, TEMP dst);
16748  format %{ "fmuls $dst, $src1, $src2\n\t"
16749            "ins   $tmp, S, $src2, 0, 1\n\t"
16750            "fmuls $dst, $dst, $tmp\n\t"
16751            "ins   $tmp, S, $src2, 0, 2\n\t"
16752            "fmuls $dst, $dst, $tmp\n\t"
16753            "ins   $tmp, S, $src2, 0, 3\n\t"
16754            "fmuls $dst, $dst, $tmp\t add reduction4f"
16755  %}
16756  ins_encode %{
16757    __ fmuls(as_FloatRegister($dst$$reg),
16758             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16759    __ ins(as_FloatRegister($tmp$$reg), __ S,
16760           as_FloatRegister($src2$$reg), 0, 1);
16761    __ fmuls(as_FloatRegister($dst$$reg),
16762             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16763    __ ins(as_FloatRegister($tmp$$reg), __ S,
16764           as_FloatRegister($src2$$reg), 0, 2);
16765    __ fmuls(as_FloatRegister($dst$$reg),
16766             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16767    __ ins(as_FloatRegister($tmp$$reg), __ S,
16768           as_FloatRegister($src2$$reg), 0, 3);
16769    __ fmuls(as_FloatRegister($dst$$reg),
16770             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16771  %}
16772  ins_pipe(pipe_class_default);
16773%}
16774
16775instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16776%{
16777  match(Set dst (AddReductionVD src1 src2));
16778  ins_cost(INSN_COST);
16779  effect(TEMP tmp, TEMP dst);
16780  format %{ "faddd $dst, $src1, $src2\n\t"
16781            "ins   $tmp, D, $src2, 0, 1\n\t"
16782            "faddd $dst, $dst, $tmp\t add reduction2d"
16783  %}
16784  ins_encode %{
16785    __ faddd(as_FloatRegister($dst$$reg),
16786             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16787    __ ins(as_FloatRegister($tmp$$reg), __ D,
16788           as_FloatRegister($src2$$reg), 0, 1);
16789    __ faddd(as_FloatRegister($dst$$reg),
16790             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16791  %}
16792  ins_pipe(pipe_class_default);
16793%}
16794
16795instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16796%{
16797  match(Set dst (MulReductionVD src1 src2));
16798  ins_cost(INSN_COST);
16799  effect(TEMP tmp, TEMP dst);
16800  format %{ "fmuld $dst, $src1, $src2\n\t"
16801            "ins   $tmp, D, $src2, 0, 1\n\t"
16802            "fmuld $dst, $dst, $tmp\t add reduction2d"
16803  %}
16804  ins_encode %{
16805    __ fmuld(as_FloatRegister($dst$$reg),
16806             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16807    __ ins(as_FloatRegister($tmp$$reg), __ D,
16808           as_FloatRegister($src2$$reg), 0, 1);
16809    __ fmuld(as_FloatRegister($dst$$reg),
16810             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16811  %}
16812  ins_pipe(pipe_class_default);
16813%}
16814
16815// ====================VECTOR ARITHMETIC=======================================
16816
16817// --------------------------------- ADD --------------------------------------
16818
16819instruct vadd8B(vecD dst, vecD src1, vecD src2)
16820%{
16821  predicate(n->as_Vector()->length() == 4 ||
16822            n->as_Vector()->length() == 8);
16823  match(Set dst (AddVB src1 src2));
16824  ins_cost(INSN_COST);
16825  format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16826  ins_encode %{
16827    __ addv(as_FloatRegister($dst$$reg), __ T8B,
16828            as_FloatRegister($src1$$reg),
16829            as_FloatRegister($src2$$reg));
16830  %}
16831  ins_pipe(vdop64);
16832%}
16833
16834instruct vadd16B(vecX dst, vecX src1, vecX src2)
16835%{
16836  predicate(n->as_Vector()->length() == 16);
16837  match(Set dst (AddVB src1 src2));
16838  ins_cost(INSN_COST);
16839  format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16840  ins_encode %{
16841    __ addv(as_FloatRegister($dst$$reg), __ T16B,
16842            as_FloatRegister($src1$$reg),
16843            as_FloatRegister($src2$$reg));
16844  %}
16845  ins_pipe(vdop128);
16846%}
16847
16848instruct vadd4S(vecD dst, vecD src1, vecD src2)
16849%{
16850  predicate(n->as_Vector()->length() == 2 ||
16851            n->as_Vector()->length() == 4);
16852  match(Set dst (AddVS src1 src2));
16853  ins_cost(INSN_COST);
16854  format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16855  ins_encode %{
16856    __ addv(as_FloatRegister($dst$$reg), __ T4H,
16857            as_FloatRegister($src1$$reg),
16858            as_FloatRegister($src2$$reg));
16859  %}
16860  ins_pipe(vdop64);
16861%}
16862
16863instruct vadd8S(vecX dst, vecX src1, vecX src2)
16864%{
16865  predicate(n->as_Vector()->length() == 8);
16866  match(Set dst (AddVS src1 src2));
16867  ins_cost(INSN_COST);
16868  format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16869  ins_encode %{
16870    __ addv(as_FloatRegister($dst$$reg), __ T8H,
16871            as_FloatRegister($src1$$reg),
16872            as_FloatRegister($src2$$reg));
16873  %}
16874  ins_pipe(vdop128);
16875%}
16876
16877instruct vadd2I(vecD dst, vecD src1, vecD src2)
16878%{
16879  predicate(n->as_Vector()->length() == 2);
16880  match(Set dst (AddVI src1 src2));
16881  ins_cost(INSN_COST);
16882  format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16883  ins_encode %{
16884    __ addv(as_FloatRegister($dst$$reg), __ T2S,
16885            as_FloatRegister($src1$$reg),
16886            as_FloatRegister($src2$$reg));
16887  %}
16888  ins_pipe(vdop64);
16889%}
16890
16891instruct vadd4I(vecX dst, vecX src1, vecX src2)
16892%{
16893  predicate(n->as_Vector()->length() == 4);
16894  match(Set dst (AddVI src1 src2));
16895  ins_cost(INSN_COST);
16896  format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16897  ins_encode %{
16898    __ addv(as_FloatRegister($dst$$reg), __ T4S,
16899            as_FloatRegister($src1$$reg),
16900            as_FloatRegister($src2$$reg));
16901  %}
16902  ins_pipe(vdop128);
16903%}
16904
16905instruct vadd2L(vecX dst, vecX src1, vecX src2)
16906%{
16907  predicate(n->as_Vector()->length() == 2);
16908  match(Set dst (AddVL src1 src2));
16909  ins_cost(INSN_COST);
16910  format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16911  ins_encode %{
16912    __ addv(as_FloatRegister($dst$$reg), __ T2D,
16913            as_FloatRegister($src1$$reg),
16914            as_FloatRegister($src2$$reg));
16915  %}
16916  ins_pipe(vdop128);
16917%}
16918
16919instruct vadd2F(vecD dst, vecD src1, vecD src2)
16920%{
16921  predicate(n->as_Vector()->length() == 2);
16922  match(Set dst (AddVF src1 src2));
16923  ins_cost(INSN_COST);
16924  format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16925  ins_encode %{
16926    __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16927            as_FloatRegister($src1$$reg),
16928            as_FloatRegister($src2$$reg));
16929  %}
16930  ins_pipe(vdop_fp64);
16931%}
16932
16933instruct vadd4F(vecX dst, vecX src1, vecX src2)
16934%{
16935  predicate(n->as_Vector()->length() == 4);
16936  match(Set dst (AddVF src1 src2));
16937  ins_cost(INSN_COST);
16938  format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16939  ins_encode %{
16940    __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16941            as_FloatRegister($src1$$reg),
16942            as_FloatRegister($src2$$reg));
16943  %}
16944  ins_pipe(vdop_fp128);
16945%}
16946
16947instruct vadd2D(vecX dst, vecX src1, vecX src2)
16948%{
16949  match(Set dst (AddVD src1 src2));
16950  ins_cost(INSN_COST);
16951  format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16952  ins_encode %{
16953    __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16954            as_FloatRegister($src1$$reg),
16955            as_FloatRegister($src2$$reg));
16956  %}
16957  ins_pipe(vdop_fp128);
16958%}
16959
16960// --------------------------------- SUB --------------------------------------
16961
16962instruct vsub8B(vecD dst, vecD src1, vecD src2)
16963%{
16964  predicate(n->as_Vector()->length() == 4 ||
16965            n->as_Vector()->length() == 8);
16966  match(Set dst (SubVB src1 src2));
16967  ins_cost(INSN_COST);
16968  format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16969  ins_encode %{
16970    __ subv(as_FloatRegister($dst$$reg), __ T8B,
16971            as_FloatRegister($src1$$reg),
16972            as_FloatRegister($src2$$reg));
16973  %}
16974  ins_pipe(vdop64);
16975%}
16976
16977instruct vsub16B(vecX dst, vecX src1, vecX src2)
16978%{
16979  predicate(n->as_Vector()->length() == 16);
16980  match(Set dst (SubVB src1 src2));
16981  ins_cost(INSN_COST);
16982  format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16983  ins_encode %{
16984    __ subv(as_FloatRegister($dst$$reg), __ T16B,
16985            as_FloatRegister($src1$$reg),
16986            as_FloatRegister($src2$$reg));
16987  %}
16988  ins_pipe(vdop128);
16989%}
16990
16991instruct vsub4S(vecD dst, vecD src1, vecD src2)
16992%{
16993  predicate(n->as_Vector()->length() == 2 ||
16994            n->as_Vector()->length() == 4);
16995  match(Set dst (SubVS src1 src2));
16996  ins_cost(INSN_COST);
16997  format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16998  ins_encode %{
16999    __ subv(as_FloatRegister($dst$$reg), __ T4H,
17000            as_FloatRegister($src1$$reg),
17001            as_FloatRegister($src2$$reg));
17002  %}
17003  ins_pipe(vdop64);
17004%}
17005
17006instruct vsub8S(vecX dst, vecX src1, vecX src2)
17007%{
17008  predicate(n->as_Vector()->length() == 8);
17009  match(Set dst (SubVS src1 src2));
17010  ins_cost(INSN_COST);
17011  format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
17012  ins_encode %{
17013    __ subv(as_FloatRegister($dst$$reg), __ T8H,
17014            as_FloatRegister($src1$$reg),
17015            as_FloatRegister($src2$$reg));
17016  %}
17017  ins_pipe(vdop128);
17018%}
17019
17020instruct vsub2I(vecD dst, vecD src1, vecD src2)
17021%{
17022  predicate(n->as_Vector()->length() == 2);
17023  match(Set dst (SubVI src1 src2));
17024  ins_cost(INSN_COST);
17025  format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
17026  ins_encode %{
17027    __ subv(as_FloatRegister($dst$$reg), __ T2S,
17028            as_FloatRegister($src1$$reg),
17029            as_FloatRegister($src2$$reg));
17030  %}
17031  ins_pipe(vdop64);
17032%}
17033
17034instruct vsub4I(vecX dst, vecX src1, vecX src2)
17035%{
17036  predicate(n->as_Vector()->length() == 4);
17037  match(Set dst (SubVI src1 src2));
17038  ins_cost(INSN_COST);
17039  format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
17040  ins_encode %{
17041    __ subv(as_FloatRegister($dst$$reg), __ T4S,
17042            as_FloatRegister($src1$$reg),
17043            as_FloatRegister($src2$$reg));
17044  %}
17045  ins_pipe(vdop128);
17046%}
17047
17048instruct vsub2L(vecX dst, vecX src1, vecX src2)
17049%{
17050  predicate(n->as_Vector()->length() == 2);
17051  match(Set dst (SubVL src1 src2));
17052  ins_cost(INSN_COST);
17053  format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
17054  ins_encode %{
17055    __ subv(as_FloatRegister($dst$$reg), __ T2D,
17056            as_FloatRegister($src1$$reg),
17057            as_FloatRegister($src2$$reg));
17058  %}
17059  ins_pipe(vdop128);
17060%}
17061
17062instruct vsub2F(vecD dst, vecD src1, vecD src2)
17063%{
17064  predicate(n->as_Vector()->length() == 2);
17065  match(Set dst (SubVF src1 src2));
17066  ins_cost(INSN_COST);
17067  format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17068  ins_encode %{
17069    __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17070            as_FloatRegister($src1$$reg),
17071            as_FloatRegister($src2$$reg));
17072  %}
17073  ins_pipe(vdop_fp64);
17074%}
17075
17076instruct vsub4F(vecX dst, vecX src1, vecX src2)
17077%{
17078  predicate(n->as_Vector()->length() == 4);
17079  match(Set dst (SubVF src1 src2));
17080  ins_cost(INSN_COST);
17081  format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17082  ins_encode %{
17083    __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17084            as_FloatRegister($src1$$reg),
17085            as_FloatRegister($src2$$reg));
17086  %}
17087  ins_pipe(vdop_fp128);
17088%}
17089
17090instruct vsub2D(vecX dst, vecX src1, vecX src2)
17091%{
17092  predicate(n->as_Vector()->length() == 2);
17093  match(Set dst (SubVD src1 src2));
17094  ins_cost(INSN_COST);
17095  format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17096  ins_encode %{
17097    __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17098            as_FloatRegister($src1$$reg),
17099            as_FloatRegister($src2$$reg));
17100  %}
17101  ins_pipe(vdop_fp128);
17102%}
17103
17104// --------------------------------- MUL --------------------------------------
17105
17106instruct vmul4S(vecD dst, vecD src1, vecD src2)
17107%{
17108  predicate(n->as_Vector()->length() == 2 ||
17109            n->as_Vector()->length() == 4);
17110  match(Set dst (MulVS src1 src2));
17111  ins_cost(INSN_COST);
17112  format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17113  ins_encode %{
17114    __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17115            as_FloatRegister($src1$$reg),
17116            as_FloatRegister($src2$$reg));
17117  %}
17118  ins_pipe(vmul64);
17119%}
17120
17121instruct vmul8S(vecX dst, vecX src1, vecX src2)
17122%{
17123  predicate(n->as_Vector()->length() == 8);
17124  match(Set dst (MulVS src1 src2));
17125  ins_cost(INSN_COST);
17126  format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17127  ins_encode %{
17128    __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17129            as_FloatRegister($src1$$reg),
17130            as_FloatRegister($src2$$reg));
17131  %}
17132  ins_pipe(vmul128);
17133%}
17134
17135instruct vmul2I(vecD dst, vecD src1, vecD src2)
17136%{
17137  predicate(n->as_Vector()->length() == 2);
17138  match(Set dst (MulVI src1 src2));
17139  ins_cost(INSN_COST);
17140  format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17141  ins_encode %{
17142    __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17143            as_FloatRegister($src1$$reg),
17144            as_FloatRegister($src2$$reg));
17145  %}
17146  ins_pipe(vmul64);
17147%}
17148
17149instruct vmul4I(vecX dst, vecX src1, vecX src2)
17150%{
17151  predicate(n->as_Vector()->length() == 4);
17152  match(Set dst (MulVI src1 src2));
17153  ins_cost(INSN_COST);
17154  format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17155  ins_encode %{
17156    __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17157            as_FloatRegister($src1$$reg),
17158            as_FloatRegister($src2$$reg));
17159  %}
17160  ins_pipe(vmul128);
17161%}
17162
17163instruct vmul2F(vecD dst, vecD src1, vecD src2)
17164%{
17165  predicate(n->as_Vector()->length() == 2);
17166  match(Set dst (MulVF src1 src2));
17167  ins_cost(INSN_COST);
17168  format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17169  ins_encode %{
17170    __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17171            as_FloatRegister($src1$$reg),
17172            as_FloatRegister($src2$$reg));
17173  %}
17174  ins_pipe(vmuldiv_fp64);
17175%}
17176
17177instruct vmul4F(vecX dst, vecX src1, vecX src2)
17178%{
17179  predicate(n->as_Vector()->length() == 4);
17180  match(Set dst (MulVF src1 src2));
17181  ins_cost(INSN_COST);
17182  format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17183  ins_encode %{
17184    __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17185            as_FloatRegister($src1$$reg),
17186            as_FloatRegister($src2$$reg));
17187  %}
17188  ins_pipe(vmuldiv_fp128);
17189%}
17190
17191instruct vmul2D(vecX dst, vecX src1, vecX src2)
17192%{
17193  predicate(n->as_Vector()->length() == 2);
17194  match(Set dst (MulVD src1 src2));
17195  ins_cost(INSN_COST);
17196  format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17197  ins_encode %{
17198    __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17199            as_FloatRegister($src1$$reg),
17200            as_FloatRegister($src2$$reg));
17201  %}
17202  ins_pipe(vmuldiv_fp128);
17203%}
17204
17205// --------------------------------- MLA --------------------------------------
17206
17207instruct vmla4S(vecD dst, vecD src1, vecD src2)
17208%{
17209  predicate(n->as_Vector()->length() == 2 ||
17210            n->as_Vector()->length() == 4);
17211  match(Set dst (AddVS dst (MulVS src1 src2)));
17212  ins_cost(INSN_COST);
17213  format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17214  ins_encode %{
17215    __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17216            as_FloatRegister($src1$$reg),
17217            as_FloatRegister($src2$$reg));
17218  %}
17219  ins_pipe(vmla64);
17220%}
17221
17222instruct vmla8S(vecX dst, vecX src1, vecX src2)
17223%{
17224  predicate(n->as_Vector()->length() == 8);
17225  match(Set dst (AddVS dst (MulVS src1 src2)));
17226  ins_cost(INSN_COST);
17227  format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17228  ins_encode %{
17229    __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17230            as_FloatRegister($src1$$reg),
17231            as_FloatRegister($src2$$reg));
17232  %}
17233  ins_pipe(vmla128);
17234%}
17235
17236instruct vmla2I(vecD dst, vecD src1, vecD src2)
17237%{
17238  predicate(n->as_Vector()->length() == 2);
17239  match(Set dst (AddVI dst (MulVI src1 src2)));
17240  ins_cost(INSN_COST);
17241  format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17242  ins_encode %{
17243    __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17244            as_FloatRegister($src1$$reg),
17245            as_FloatRegister($src2$$reg));
17246  %}
17247  ins_pipe(vmla64);
17248%}
17249
17250instruct vmla4I(vecX dst, vecX src1, vecX src2)
17251%{
17252  predicate(n->as_Vector()->length() == 4);
17253  match(Set dst (AddVI dst (MulVI src1 src2)));
17254  ins_cost(INSN_COST);
17255  format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17256  ins_encode %{
17257    __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17258            as_FloatRegister($src1$$reg),
17259            as_FloatRegister($src2$$reg));
17260  %}
17261  ins_pipe(vmla128);
17262%}
17263
17264// dst + src1 * src2
17265instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17266  predicate(UseFMA && n->as_Vector()->length() == 2);
17267  match(Set dst (FmaVF  dst (Binary src1 src2)));
17268  format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17269  ins_cost(INSN_COST);
17270  ins_encode %{
17271    __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17272            as_FloatRegister($src1$$reg),
17273            as_FloatRegister($src2$$reg));
17274  %}
17275  ins_pipe(vmuldiv_fp64);
17276%}
17277
17278// dst + src1 * src2
17279instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17280  predicate(UseFMA && n->as_Vector()->length() == 4);
17281  match(Set dst (FmaVF  dst (Binary src1 src2)));
17282  format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17283  ins_cost(INSN_COST);
17284  ins_encode %{
17285    __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17286            as_FloatRegister($src1$$reg),
17287            as_FloatRegister($src2$$reg));
17288  %}
17289  ins_pipe(vmuldiv_fp128);
17290%}
17291
17292// dst + src1 * src2
17293instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17294  predicate(UseFMA && n->as_Vector()->length() == 2);
17295  match(Set dst (FmaVD  dst (Binary src1 src2)));
17296  format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17297  ins_cost(INSN_COST);
17298  ins_encode %{
17299    __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17300            as_FloatRegister($src1$$reg),
17301            as_FloatRegister($src2$$reg));
17302  %}
17303  ins_pipe(vmuldiv_fp128);
17304%}
17305
17306// --------------------------------- MLS --------------------------------------
17307
17308instruct vmls4S(vecD dst, vecD src1, vecD src2)
17309%{
17310  predicate(n->as_Vector()->length() == 2 ||
17311            n->as_Vector()->length() == 4);
17312  match(Set dst (SubVS dst (MulVS src1 src2)));
17313  ins_cost(INSN_COST);
17314  format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17315  ins_encode %{
17316    __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17317            as_FloatRegister($src1$$reg),
17318            as_FloatRegister($src2$$reg));
17319  %}
17320  ins_pipe(vmla64);
17321%}
17322
17323instruct vmls8S(vecX dst, vecX src1, vecX src2)
17324%{
17325  predicate(n->as_Vector()->length() == 8);
17326  match(Set dst (SubVS dst (MulVS src1 src2)));
17327  ins_cost(INSN_COST);
17328  format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17329  ins_encode %{
17330    __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17331            as_FloatRegister($src1$$reg),
17332            as_FloatRegister($src2$$reg));
17333  %}
17334  ins_pipe(vmla128);
17335%}
17336
17337instruct vmls2I(vecD dst, vecD src1, vecD src2)
17338%{
17339  predicate(n->as_Vector()->length() == 2);
17340  match(Set dst (SubVI dst (MulVI src1 src2)));
17341  ins_cost(INSN_COST);
17342  format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17343  ins_encode %{
17344    __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17345            as_FloatRegister($src1$$reg),
17346            as_FloatRegister($src2$$reg));
17347  %}
17348  ins_pipe(vmla64);
17349%}
17350
17351instruct vmls4I(vecX dst, vecX src1, vecX src2)
17352%{
17353  predicate(n->as_Vector()->length() == 4);
17354  match(Set dst (SubVI dst (MulVI src1 src2)));
17355  ins_cost(INSN_COST);
17356  format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17357  ins_encode %{
17358    __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17359            as_FloatRegister($src1$$reg),
17360            as_FloatRegister($src2$$reg));
17361  %}
17362  ins_pipe(vmla128);
17363%}
17364
17365// dst - src1 * src2
17366instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17367  predicate(UseFMA && n->as_Vector()->length() == 2);
17368  match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17369  match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17370  format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17371  ins_cost(INSN_COST);
17372  ins_encode %{
17373    __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17374            as_FloatRegister($src1$$reg),
17375            as_FloatRegister($src2$$reg));
17376  %}
17377  ins_pipe(vmuldiv_fp64);
17378%}
17379
17380// dst - src1 * src2
17381instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17382  predicate(UseFMA && n->as_Vector()->length() == 4);
17383  match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17384  match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17385  format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17386  ins_cost(INSN_COST);
17387  ins_encode %{
17388    __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17389            as_FloatRegister($src1$$reg),
17390            as_FloatRegister($src2$$reg));
17391  %}
17392  ins_pipe(vmuldiv_fp128);
17393%}
17394
17395// dst - src1 * src2
17396instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17397  predicate(UseFMA && n->as_Vector()->length() == 2);
17398  match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17399  match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17400  format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17401  ins_cost(INSN_COST);
17402  ins_encode %{
17403    __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17404            as_FloatRegister($src1$$reg),
17405            as_FloatRegister($src2$$reg));
17406  %}
17407  ins_pipe(vmuldiv_fp128);
17408%}
17409
17410// --------------------------------- DIV --------------------------------------
17411
17412instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17413%{
17414  predicate(n->as_Vector()->length() == 2);
17415  match(Set dst (DivVF src1 src2));
17416  ins_cost(INSN_COST);
17417  format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17418  ins_encode %{
17419    __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17420            as_FloatRegister($src1$$reg),
17421            as_FloatRegister($src2$$reg));
17422  %}
17423  ins_pipe(vmuldiv_fp64);
17424%}
17425
17426instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17427%{
17428  predicate(n->as_Vector()->length() == 4);
17429  match(Set dst (DivVF src1 src2));
17430  ins_cost(INSN_COST);
17431  format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17432  ins_encode %{
17433    __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17434            as_FloatRegister($src1$$reg),
17435            as_FloatRegister($src2$$reg));
17436  %}
17437  ins_pipe(vmuldiv_fp128);
17438%}
17439
17440instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17441%{
17442  predicate(n->as_Vector()->length() == 2);
17443  match(Set dst (DivVD src1 src2));
17444  ins_cost(INSN_COST);
17445  format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17446  ins_encode %{
17447    __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17448            as_FloatRegister($src1$$reg),
17449            as_FloatRegister($src2$$reg));
17450  %}
17451  ins_pipe(vmuldiv_fp128);
17452%}
17453
17454// --------------------------------- SQRT -------------------------------------
17455
17456instruct vsqrt2D(vecX dst, vecX src)
17457%{
17458  predicate(n->as_Vector()->length() == 2);
17459  match(Set dst (SqrtVD src));
17460  format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17461  ins_encode %{
17462    __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17463             as_FloatRegister($src$$reg));
17464  %}
17465  ins_pipe(vsqrt_fp128);
17466%}
17467
17468// --------------------------------- ABS --------------------------------------
17469
17470instruct vabs2F(vecD dst, vecD src)
17471%{
17472  predicate(n->as_Vector()->length() == 2);
17473  match(Set dst (AbsVF src));
17474  ins_cost(INSN_COST * 3);
17475  format %{ "fabs  $dst,$src\t# vector (2S)" %}
17476  ins_encode %{
17477    __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17478            as_FloatRegister($src$$reg));
17479  %}
17480  ins_pipe(vunop_fp64);
17481%}
17482
17483instruct vabs4F(vecX dst, vecX src)
17484%{
17485  predicate(n->as_Vector()->length() == 4);
17486  match(Set dst (AbsVF src));
17487  ins_cost(INSN_COST * 3);
17488  format %{ "fabs  $dst,$src\t# vector (4S)" %}
17489  ins_encode %{
17490    __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17491            as_FloatRegister($src$$reg));
17492  %}
17493  ins_pipe(vunop_fp128);
17494%}
17495
17496instruct vabs2D(vecX dst, vecX src)
17497%{
17498  predicate(n->as_Vector()->length() == 2);
17499  match(Set dst (AbsVD src));
17500  ins_cost(INSN_COST * 3);
17501  format %{ "fabs  $dst,$src\t# vector (2D)" %}
17502  ins_encode %{
17503    __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17504            as_FloatRegister($src$$reg));
17505  %}
17506  ins_pipe(vunop_fp128);
17507%}
17508
17509// --------------------------------- NEG --------------------------------------
17510
17511instruct vneg2F(vecD dst, vecD src)
17512%{
17513  predicate(n->as_Vector()->length() == 2);
17514  match(Set dst (NegVF src));
17515  ins_cost(INSN_COST * 3);
17516  format %{ "fneg  $dst,$src\t# vector (2S)" %}
17517  ins_encode %{
17518    __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17519            as_FloatRegister($src$$reg));
17520  %}
17521  ins_pipe(vunop_fp64);
17522%}
17523
17524instruct vneg4F(vecX dst, vecX src)
17525%{
17526  predicate(n->as_Vector()->length() == 4);
17527  match(Set dst (NegVF src));
17528  ins_cost(INSN_COST * 3);
17529  format %{ "fneg  $dst,$src\t# vector (4S)" %}
17530  ins_encode %{
17531    __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17532            as_FloatRegister($src$$reg));
17533  %}
17534  ins_pipe(vunop_fp128);
17535%}
17536
17537instruct vneg2D(vecX dst, vecX src)
17538%{
17539  predicate(n->as_Vector()->length() == 2);
17540  match(Set dst (NegVD src));
17541  ins_cost(INSN_COST * 3);
17542  format %{ "fneg  $dst,$src\t# vector (2D)" %}
17543  ins_encode %{
17544    __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17545            as_FloatRegister($src$$reg));
17546  %}
17547  ins_pipe(vunop_fp128);
17548%}
17549
17550// --------------------------------- AND --------------------------------------
17551
17552instruct vand8B(vecD dst, vecD src1, vecD src2)
17553%{
17554  predicate(n->as_Vector()->length_in_bytes() == 4 ||
17555            n->as_Vector()->length_in_bytes() == 8);
17556  match(Set dst (AndV src1 src2));
17557  ins_cost(INSN_COST);
17558  format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17559  ins_encode %{
17560    __ andr(as_FloatRegister($dst$$reg), __ T8B,
17561            as_FloatRegister($src1$$reg),
17562            as_FloatRegister($src2$$reg));
17563  %}
17564  ins_pipe(vlogical64);
17565%}
17566
17567instruct vand16B(vecX dst, vecX src1, vecX src2)
17568%{
17569  predicate(n->as_Vector()->length_in_bytes() == 16);
17570  match(Set dst (AndV src1 src2));
17571  ins_cost(INSN_COST);
17572  format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17573  ins_encode %{
17574    __ andr(as_FloatRegister($dst$$reg), __ T16B,
17575            as_FloatRegister($src1$$reg),
17576            as_FloatRegister($src2$$reg));
17577  %}
17578  ins_pipe(vlogical128);
17579%}
17580
17581// --------------------------------- OR ---------------------------------------
17582
17583instruct vor8B(vecD dst, vecD src1, vecD src2)
17584%{
17585  predicate(n->as_Vector()->length_in_bytes() == 4 ||
17586            n->as_Vector()->length_in_bytes() == 8);
17587  match(Set dst (OrV src1 src2));
17588  ins_cost(INSN_COST);
17589  format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17590  ins_encode %{
17591    __ orr(as_FloatRegister($dst$$reg), __ T8B,
17592            as_FloatRegister($src1$$reg),
17593            as_FloatRegister($src2$$reg));
17594  %}
17595  ins_pipe(vlogical64);
17596%}
17597
17598instruct vor16B(vecX dst, vecX src1, vecX src2)
17599%{
17600  predicate(n->as_Vector()->length_in_bytes() == 16);
17601  match(Set dst (OrV src1 src2));
17602  ins_cost(INSN_COST);
17603  format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17604  ins_encode %{
17605    __ orr(as_FloatRegister($dst$$reg), __ T16B,
17606            as_FloatRegister($src1$$reg),
17607            as_FloatRegister($src2$$reg));
17608  %}
17609  ins_pipe(vlogical128);
17610%}
17611
17612// --------------------------------- XOR --------------------------------------
17613
17614instruct vxor8B(vecD dst, vecD src1, vecD src2)
17615%{
17616  predicate(n->as_Vector()->length_in_bytes() == 4 ||
17617            n->as_Vector()->length_in_bytes() == 8);
17618  match(Set dst (XorV src1 src2));
17619  ins_cost(INSN_COST);
17620  format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17621  ins_encode %{
17622    __ eor(as_FloatRegister($dst$$reg), __ T8B,
17623            as_FloatRegister($src1$$reg),
17624            as_FloatRegister($src2$$reg));
17625  %}
17626  ins_pipe(vlogical64);
17627%}
17628
17629instruct vxor16B(vecX dst, vecX src1, vecX src2)
17630%{
17631  predicate(n->as_Vector()->length_in_bytes() == 16);
17632  match(Set dst (XorV src1 src2));
17633  ins_cost(INSN_COST);
17634  format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17635  ins_encode %{
17636    __ eor(as_FloatRegister($dst$$reg), __ T16B,
17637            as_FloatRegister($src1$$reg),
17638            as_FloatRegister($src2$$reg));
17639  %}
17640  ins_pipe(vlogical128);
17641%}
17642
17643// ------------------------------ Shift ---------------------------------------
17644
17645instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17646  match(Set dst (LShiftCntV cnt));
17647  format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17648  ins_encode %{
17649    __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17650  %}
17651  ins_pipe(vdup_reg_reg128);
17652%}
17653
17654// Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17655instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17656  match(Set dst (RShiftCntV cnt));
17657  format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17658  ins_encode %{
17659    __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17660    __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17661  %}
17662  ins_pipe(vdup_reg_reg128);
17663%}
17664
17665instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17666  predicate(n->as_Vector()->length() == 4 ||
17667            n->as_Vector()->length() == 8);
17668  match(Set dst (LShiftVB src shift));
17669  match(Set dst (RShiftVB src shift));
17670  ins_cost(INSN_COST);
17671  format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17672  ins_encode %{
17673    __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17674            as_FloatRegister($src$$reg),
17675            as_FloatRegister($shift$$reg));
17676  %}
17677  ins_pipe(vshift64);
17678%}
17679
17680instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17681  predicate(n->as_Vector()->length() == 16);
17682  match(Set dst (LShiftVB src shift));
17683  match(Set dst (RShiftVB src shift));
17684  ins_cost(INSN_COST);
17685  format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17686  ins_encode %{
17687    __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17688            as_FloatRegister($src$$reg),
17689            as_FloatRegister($shift$$reg));
17690  %}
17691  ins_pipe(vshift128);
17692%}
17693
17694instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17695  predicate(n->as_Vector()->length() == 4 ||
17696            n->as_Vector()->length() == 8);
17697  match(Set dst (URShiftVB src shift));
17698  ins_cost(INSN_COST);
17699  format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17700  ins_encode %{
17701    __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17702            as_FloatRegister($src$$reg),
17703            as_FloatRegister($shift$$reg));
17704  %}
17705  ins_pipe(vshift64);
17706%}
17707
17708instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17709  predicate(n->as_Vector()->length() == 16);
17710  match(Set dst (URShiftVB src shift));
17711  ins_cost(INSN_COST);
17712  format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17713  ins_encode %{
17714    __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17715            as_FloatRegister($src$$reg),
17716            as_FloatRegister($shift$$reg));
17717  %}
17718  ins_pipe(vshift128);
17719%}
17720
17721instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17722  predicate(n->as_Vector()->length() == 4 ||
17723            n->as_Vector()->length() == 8);
17724  match(Set dst (LShiftVB src shift));
17725  ins_cost(INSN_COST);
17726  format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17727  ins_encode %{
17728    int sh = (int)$shift$$constant & 31;
17729    if (sh >= 8) {
17730      __ eor(as_FloatRegister($dst$$reg), __ T8B,
17731             as_FloatRegister($src$$reg),
17732             as_FloatRegister($src$$reg));
17733    } else {
17734      __ shl(as_FloatRegister($dst$$reg), __ T8B,
17735             as_FloatRegister($src$$reg), sh);
17736    }
17737  %}
17738  ins_pipe(vshift64_imm);
17739%}
17740
17741instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17742  predicate(n->as_Vector()->length() == 16);
17743  match(Set dst (LShiftVB src shift));
17744  ins_cost(INSN_COST);
17745  format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17746  ins_encode %{
17747    int sh = (int)$shift$$constant & 31;
17748    if (sh >= 8) {
17749      __ eor(as_FloatRegister($dst$$reg), __ T16B,
17750             as_FloatRegister($src$$reg),
17751             as_FloatRegister($src$$reg));
17752    } else {
17753      __ shl(as_FloatRegister($dst$$reg), __ T16B,
17754             as_FloatRegister($src$$reg), sh);
17755    }
17756  %}
17757  ins_pipe(vshift128_imm);
17758%}
17759
17760instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17761  predicate(n->as_Vector()->length() == 4 ||
17762            n->as_Vector()->length() == 8);
17763  match(Set dst (RShiftVB src shift));
17764  ins_cost(INSN_COST);
17765  format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17766  ins_encode %{
17767    int sh = (int)$shift$$constant & 31;
17768    if (sh >= 8) sh = 7;
17769    sh = -sh & 7;
17770    __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17771           as_FloatRegister($src$$reg), sh);
17772  %}
17773  ins_pipe(vshift64_imm);
17774%}
17775
17776instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17777  predicate(n->as_Vector()->length() == 16);
17778  match(Set dst (RShiftVB src shift));
17779  ins_cost(INSN_COST);
17780  format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17781  ins_encode %{
17782    int sh = (int)$shift$$constant & 31;
17783    if (sh >= 8) sh = 7;
17784    sh = -sh & 7;
17785    __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17786           as_FloatRegister($src$$reg), sh);
17787  %}
17788  ins_pipe(vshift128_imm);
17789%}
17790
17791instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17792  predicate(n->as_Vector()->length() == 4 ||
17793            n->as_Vector()->length() == 8);
17794  match(Set dst (URShiftVB src shift));
17795  ins_cost(INSN_COST);
17796  format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17797  ins_encode %{
17798    int sh = (int)$shift$$constant & 31;
17799    if (sh >= 8) {
17800      __ eor(as_FloatRegister($dst$$reg), __ T8B,
17801             as_FloatRegister($src$$reg),
17802             as_FloatRegister($src$$reg));
17803    } else {
17804      __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17805             as_FloatRegister($src$$reg), -sh & 7);
17806    }
17807  %}
17808  ins_pipe(vshift64_imm);
17809%}
17810
17811instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17812  predicate(n->as_Vector()->length() == 16);
17813  match(Set dst (URShiftVB src shift));
17814  ins_cost(INSN_COST);
17815  format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17816  ins_encode %{
17817    int sh = (int)$shift$$constant & 31;
17818    if (sh >= 8) {
17819      __ eor(as_FloatRegister($dst$$reg), __ T16B,
17820             as_FloatRegister($src$$reg),
17821             as_FloatRegister($src$$reg));
17822    } else {
17823      __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17824             as_FloatRegister($src$$reg), -sh & 7);
17825    }
17826  %}
17827  ins_pipe(vshift128_imm);
17828%}
17829
17830instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17831  predicate(n->as_Vector()->length() == 2 ||
17832            n->as_Vector()->length() == 4);
17833  match(Set dst (LShiftVS src shift));
17834  match(Set dst (RShiftVS src shift));
17835  ins_cost(INSN_COST);
17836  format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17837  ins_encode %{
17838    __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17839            as_FloatRegister($src$$reg),
17840            as_FloatRegister($shift$$reg));
17841  %}
17842  ins_pipe(vshift64);
17843%}
17844
17845instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17846  predicate(n->as_Vector()->length() == 8);
17847  match(Set dst (LShiftVS src shift));
17848  match(Set dst (RShiftVS src shift));
17849  ins_cost(INSN_COST);
17850  format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17851  ins_encode %{
17852    __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17853            as_FloatRegister($src$$reg),
17854            as_FloatRegister($shift$$reg));
17855  %}
17856  ins_pipe(vshift128);
17857%}
17858
17859instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17860  predicate(n->as_Vector()->length() == 2 ||
17861            n->as_Vector()->length() == 4);
17862  match(Set dst (URShiftVS src shift));
17863  ins_cost(INSN_COST);
17864  format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17865  ins_encode %{
17866    __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17867            as_FloatRegister($src$$reg),
17868            as_FloatRegister($shift$$reg));
17869  %}
17870  ins_pipe(vshift64);
17871%}
17872
17873instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17874  predicate(n->as_Vector()->length() == 8);
17875  match(Set dst (URShiftVS src shift));
17876  ins_cost(INSN_COST);
17877  format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17878  ins_encode %{
17879    __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17880            as_FloatRegister($src$$reg),
17881            as_FloatRegister($shift$$reg));
17882  %}
17883  ins_pipe(vshift128);
17884%}
17885
17886instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17887  predicate(n->as_Vector()->length() == 2 ||
17888            n->as_Vector()->length() == 4);
17889  match(Set dst (LShiftVS src shift));
17890  ins_cost(INSN_COST);
17891  format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17892  ins_encode %{
17893    int sh = (int)$shift$$constant & 31;
17894    if (sh >= 16) {
17895      __ eor(as_FloatRegister($dst$$reg), __ T8B,
17896             as_FloatRegister($src$$reg),
17897             as_FloatRegister($src$$reg));
17898    } else {
17899      __ shl(as_FloatRegister($dst$$reg), __ T4H,
17900             as_FloatRegister($src$$reg), sh);
17901    }
17902  %}
17903  ins_pipe(vshift64_imm);
17904%}
17905
17906instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17907  predicate(n->as_Vector()->length() == 8);
17908  match(Set dst (LShiftVS src shift));
17909  ins_cost(INSN_COST);
17910  format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17911  ins_encode %{
17912    int sh = (int)$shift$$constant & 31;
17913    if (sh >= 16) {
17914      __ eor(as_FloatRegister($dst$$reg), __ T16B,
17915             as_FloatRegister($src$$reg),
17916             as_FloatRegister($src$$reg));
17917    } else {
17918      __ shl(as_FloatRegister($dst$$reg), __ T8H,
17919             as_FloatRegister($src$$reg), sh);
17920    }
17921  %}
17922  ins_pipe(vshift128_imm);
17923%}
17924
17925instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17926  predicate(n->as_Vector()->length() == 2 ||
17927            n->as_Vector()->length() == 4);
17928  match(Set dst (RShiftVS src shift));
17929  ins_cost(INSN_COST);
17930  format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17931  ins_encode %{
17932    int sh = (int)$shift$$constant & 31;
17933    if (sh >= 16) sh = 15;
17934    sh = -sh & 15;
17935    __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17936           as_FloatRegister($src$$reg), sh);
17937  %}
17938  ins_pipe(vshift64_imm);
17939%}
17940
17941instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17942  predicate(n->as_Vector()->length() == 8);
17943  match(Set dst (RShiftVS src shift));
17944  ins_cost(INSN_COST);
17945  format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17946  ins_encode %{
17947    int sh = (int)$shift$$constant & 31;
17948    if (sh >= 16) sh = 15;
17949    sh = -sh & 15;
17950    __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17951           as_FloatRegister($src$$reg), sh);
17952  %}
17953  ins_pipe(vshift128_imm);
17954%}
17955
17956instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17957  predicate(n->as_Vector()->length() == 2 ||
17958            n->as_Vector()->length() == 4);
17959  match(Set dst (URShiftVS src shift));
17960  ins_cost(INSN_COST);
17961  format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17962  ins_encode %{
17963    int sh = (int)$shift$$constant & 31;
17964    if (sh >= 16) {
17965      __ eor(as_FloatRegister($dst$$reg), __ T8B,
17966             as_FloatRegister($src$$reg),
17967             as_FloatRegister($src$$reg));
17968    } else {
17969      __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17970             as_FloatRegister($src$$reg), -sh & 15);
17971    }
17972  %}
17973  ins_pipe(vshift64_imm);
17974%}
17975
17976instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17977  predicate(n->as_Vector()->length() == 8);
17978  match(Set dst (URShiftVS src shift));
17979  ins_cost(INSN_COST);
17980  format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17981  ins_encode %{
17982    int sh = (int)$shift$$constant & 31;
17983    if (sh >= 16) {
17984      __ eor(as_FloatRegister($dst$$reg), __ T16B,
17985             as_FloatRegister($src$$reg),
17986             as_FloatRegister($src$$reg));
17987    } else {
17988      __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17989             as_FloatRegister($src$$reg), -sh & 15);
17990    }
17991  %}
17992  ins_pipe(vshift128_imm);
17993%}
17994
17995instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17996  predicate(n->as_Vector()->length() == 2);
17997  match(Set dst (LShiftVI src shift));
17998  match(Set dst (RShiftVI src shift));
17999  ins_cost(INSN_COST);
18000  format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
18001  ins_encode %{
18002    __ sshl(as_FloatRegister($dst$$reg), __ T2S,
18003            as_FloatRegister($src$$reg),
18004            as_FloatRegister($shift$$reg));
18005  %}
18006  ins_pipe(vshift64);
18007%}
18008
18009instruct vsll4I(vecX dst, vecX src, vecX shift) %{
18010  predicate(n->as_Vector()->length() == 4);
18011  match(Set dst (LShiftVI src shift));
18012  match(Set dst (RShiftVI src shift));
18013  ins_cost(INSN_COST);
18014  format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
18015  ins_encode %{
18016    __ sshl(as_FloatRegister($dst$$reg), __ T4S,
18017            as_FloatRegister($src$$reg),
18018            as_FloatRegister($shift$$reg));
18019  %}
18020  ins_pipe(vshift128);
18021%}
18022
18023instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
18024  predicate(n->as_Vector()->length() == 2);
18025  match(Set dst (URShiftVI src shift));
18026  ins_cost(INSN_COST);
18027  format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
18028  ins_encode %{
18029    __ ushl(as_FloatRegister($dst$$reg), __ T2S,
18030            as_FloatRegister($src$$reg),
18031            as_FloatRegister($shift$$reg));
18032  %}
18033  ins_pipe(vshift64);
18034%}
18035
18036instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
18037  predicate(n->as_Vector()->length() == 4);
18038  match(Set dst (URShiftVI src shift));
18039  ins_cost(INSN_COST);
18040  format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
18041  ins_encode %{
18042    __ ushl(as_FloatRegister($dst$$reg), __ T4S,
18043            as_FloatRegister($src$$reg),
18044            as_FloatRegister($shift$$reg));
18045  %}
18046  ins_pipe(vshift128);
18047%}
18048
18049instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
18050  predicate(n->as_Vector()->length() == 2);
18051  match(Set dst (LShiftVI src shift));
18052  ins_cost(INSN_COST);
18053  format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
18054  ins_encode %{
18055    __ shl(as_FloatRegister($dst$$reg), __ T2S,
18056           as_FloatRegister($src$$reg),
18057           (int)$shift$$constant & 31);
18058  %}
18059  ins_pipe(vshift64_imm);
18060%}
18061
18062instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18063  predicate(n->as_Vector()->length() == 4);
18064  match(Set dst (LShiftVI src shift));
18065  ins_cost(INSN_COST);
18066  format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18067  ins_encode %{
18068    __ shl(as_FloatRegister($dst$$reg), __ T4S,
18069           as_FloatRegister($src$$reg),
18070           (int)$shift$$constant & 31);
18071  %}
18072  ins_pipe(vshift128_imm);
18073%}
18074
18075instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18076  predicate(n->as_Vector()->length() == 2);
18077  match(Set dst (RShiftVI src shift));
18078  ins_cost(INSN_COST);
18079  format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18080  ins_encode %{
18081    __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18082            as_FloatRegister($src$$reg),
18083            -(int)$shift$$constant & 31);
18084  %}
18085  ins_pipe(vshift64_imm);
18086%}
18087
18088instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18089  predicate(n->as_Vector()->length() == 4);
18090  match(Set dst (RShiftVI src shift));
18091  ins_cost(INSN_COST);
18092  format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18093  ins_encode %{
18094    __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18095            as_FloatRegister($src$$reg),
18096            -(int)$shift$$constant & 31);
18097  %}
18098  ins_pipe(vshift128_imm);
18099%}
18100
18101instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18102  predicate(n->as_Vector()->length() == 2);
18103  match(Set dst (URShiftVI src shift));
18104  ins_cost(INSN_COST);
18105  format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18106  ins_encode %{
18107    __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18108            as_FloatRegister($src$$reg),
18109            -(int)$shift$$constant & 31);
18110  %}
18111  ins_pipe(vshift64_imm);
18112%}
18113
18114instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18115  predicate(n->as_Vector()->length() == 4);
18116  match(Set dst (URShiftVI src shift));
18117  ins_cost(INSN_COST);
18118  format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18119  ins_encode %{
18120    __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18121            as_FloatRegister($src$$reg),
18122            -(int)$shift$$constant & 31);
18123  %}
18124  ins_pipe(vshift128_imm);
18125%}
18126
18127instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18128  predicate(n->as_Vector()->length() == 2);
18129  match(Set dst (LShiftVL src shift));
18130  match(Set dst (RShiftVL src shift));
18131  ins_cost(INSN_COST);
18132  format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18133  ins_encode %{
18134    __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18135            as_FloatRegister($src$$reg),
18136            as_FloatRegister($shift$$reg));
18137  %}
18138  ins_pipe(vshift128);
18139%}
18140
18141instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18142  predicate(n->as_Vector()->length() == 2);
18143  match(Set dst (URShiftVL src shift));
18144  ins_cost(INSN_COST);
18145  format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18146  ins_encode %{
18147    __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18148            as_FloatRegister($src$$reg),
18149            as_FloatRegister($shift$$reg));
18150  %}
18151  ins_pipe(vshift128);
18152%}
18153
18154instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18155  predicate(n->as_Vector()->length() == 2);
18156  match(Set dst (LShiftVL src shift));
18157  ins_cost(INSN_COST);
18158  format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18159  ins_encode %{
18160    __ shl(as_FloatRegister($dst$$reg), __ T2D,
18161           as_FloatRegister($src$$reg),
18162           (int)$shift$$constant & 63);
18163  %}
18164  ins_pipe(vshift128_imm);
18165%}
18166
18167instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18168  predicate(n->as_Vector()->length() == 2);
18169  match(Set dst (RShiftVL src shift));
18170  ins_cost(INSN_COST);
18171  format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18172  ins_encode %{
18173    __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18174            as_FloatRegister($src$$reg),
18175            -(int)$shift$$constant & 63);
18176  %}
18177  ins_pipe(vshift128_imm);
18178%}
18179
18180instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18181  predicate(n->as_Vector()->length() == 2);
18182  match(Set dst (URShiftVL src shift));
18183  ins_cost(INSN_COST);
18184  format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18185  ins_encode %{
18186    __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18187            as_FloatRegister($src$$reg),
18188            -(int)$shift$$constant & 63);
18189  %}
18190  ins_pipe(vshift128_imm);
18191%}
18192
18193//----------PEEPHOLE RULES-----------------------------------------------------
18194// These must follow all instruction definitions as they use the names
18195// defined in the instructions definitions.
18196//
18197// peepmatch ( root_instr_name [preceding_instruction]* );
18198//
18199// peepconstraint %{
18200// (instruction_number.operand_name relational_op instruction_number.operand_name
18201//  [, ...] );
18202// // instruction numbers are zero-based using left to right order in peepmatch
18203//
18204// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18205// // provide an instruction_number.operand_name for each operand that appears
18206// // in the replacement instruction's match rule
18207//
18208// ---------VM FLAGS---------------------------------------------------------
18209//
18210// All peephole optimizations can be turned off using -XX:-OptoPeephole
18211//
18212// Each peephole rule is given an identifying number starting with zero and
18213// increasing by one in the order seen by the parser.  An individual peephole
18214// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18215// on the command-line.
18216//
18217// ---------CURRENT LIMITATIONS----------------------------------------------
18218//
18219// Only match adjacent instructions in same basic block
18220// Only equality constraints
18221// Only constraints between operands, not (0.dest_reg == RAX_enc)
18222// Only one replacement instruction
18223//
18224// ---------EXAMPLE----------------------------------------------------------
18225//
18226// // pertinent parts of existing instructions in architecture description
18227// instruct movI(iRegINoSp dst, iRegI src)
18228// %{
18229//   match(Set dst (CopyI src));
18230// %}
18231//
18232// instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18233// %{
18234//   match(Set dst (AddI dst src));
18235//   effect(KILL cr);
18236// %}
18237//
18238// // Change (inc mov) to lea
18239// peephole %{
18240//   // increment preceeded by register-register move
18241//   peepmatch ( incI_iReg movI );
18242//   // require that the destination register of the increment
18243//   // match the destination register of the move
18244//   peepconstraint ( 0.dst == 1.dst );
18245//   // construct a replacement instruction that sets
18246//   // the destination to ( move's source register + one )
18247//   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18248// %}
18249//
18250
18251// Implementation no longer uses movX instructions since
18252// machine-independent system no longer uses CopyX nodes.
18253//
18254// peephole
18255// %{
18256//   peepmatch (incI_iReg movI);
18257//   peepconstraint (0.dst == 1.dst);
18258//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18259// %}
18260
18261// peephole
18262// %{
18263//   peepmatch (decI_iReg movI);
18264//   peepconstraint (0.dst == 1.dst);
18265//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18266// %}
18267
18268// peephole
18269// %{
18270//   peepmatch (addI_iReg_imm movI);
18271//   peepconstraint (0.dst == 1.dst);
18272//   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18273// %}
18274
18275// peephole
18276// %{
18277//   peepmatch (incL_iReg movL);
18278//   peepconstraint (0.dst == 1.dst);
18279//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18280// %}
18281
18282// peephole
18283// %{
18284//   peepmatch (decL_iReg movL);
18285//   peepconstraint (0.dst == 1.dst);
18286//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18287// %}
18288
18289// peephole
18290// %{
18291//   peepmatch (addL_iReg_imm movL);
18292//   peepconstraint (0.dst == 1.dst);
18293//   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18294// %}
18295
18296// peephole
18297// %{
18298//   peepmatch (addP_iReg_imm movP);
18299//   peepconstraint (0.dst == 1.dst);
18300//   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18301// %}
18302
18303// // Change load of spilled value to only a spill
18304// instruct storeI(memory mem, iRegI src)
18305// %{
18306//   match(Set mem (StoreI mem src));
18307// %}
18308//
18309// instruct loadI(iRegINoSp dst, memory mem)
18310// %{
18311//   match(Set dst (LoadI mem));
18312// %}
18313//
18314
18315//----------SMARTSPILL RULES---------------------------------------------------
18316// These must follow all instruction definitions as they use the names
18317// defined in the instructions definitions.
18318
18319// Local Variables:
18320// mode: c++
18321// End:
18322