x86.ad revision 3602:da91efe96a93
1//
2// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Common Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
63// Word a in each register holds a Float, words ab hold a Double.
64// The whole registers are used in SSE4.2 version intrinsics,
65// array copy stubs and superword operations (see UseSSE42Intrinsics,
66// UseXMMForArrayCopy and UseSuperword flags).
67// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68// Linux ABI:   No register preserved across function calls
69//              XMM0-XMM7 might hold parameters
70// Windows ABI: XMM6-XMM15 preserved across function calls
71//              XMM0-XMM3 might hold parameters
72
73reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
81
82reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
90
91reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
99
100reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
108
109reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
117
118reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
126
127#ifdef _WIN64
128
129reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
137
138reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
146
147reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
155
156reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
164
165reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
173
174reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
182
183reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
191
192reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
200
201reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
209
210reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
218
219#else // _WIN64
220
221reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
229
230reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
238
239#ifdef _LP64
240
241reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
249
250reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
258
259reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
267
268reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
276
277reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
285
286reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
294
295reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
303
304reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
312
313#endif // _LP64
314
315#endif // _WIN64
316
317#ifdef _LP64
318reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319#else
320reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321#endif // _LP64
322
323alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
324                   XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
325                   XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
326                   XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
327                   XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
328                   XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
329                   XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
330                   XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
331#ifdef _LP64
332                  ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
333                   XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
334                   XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335                   XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336                   XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337                   XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338                   XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339                   XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340#endif
341                   );
342
343// flags allocation class should be last.
344alloc_class chunk2(RFLAGS);
345
346// Singleton class for condition codes
347reg_class int_flags(RFLAGS);
348
349// Class for all float registers
350reg_class float_reg(XMM0,
351                    XMM1,
352                    XMM2,
353                    XMM3,
354                    XMM4,
355                    XMM5,
356                    XMM6,
357                    XMM7
358#ifdef _LP64
359                   ,XMM8,
360                    XMM9,
361                    XMM10,
362                    XMM11,
363                    XMM12,
364                    XMM13,
365                    XMM14,
366                    XMM15
367#endif
368                    );
369
370// Class for all double registers
371reg_class double_reg(XMM0,  XMM0b,
372                     XMM1,  XMM1b,
373                     XMM2,  XMM2b,
374                     XMM3,  XMM3b,
375                     XMM4,  XMM4b,
376                     XMM5,  XMM5b,
377                     XMM6,  XMM6b,
378                     XMM7,  XMM7b
379#ifdef _LP64
380                    ,XMM8,  XMM8b,
381                     XMM9,  XMM9b,
382                     XMM10, XMM10b,
383                     XMM11, XMM11b,
384                     XMM12, XMM12b,
385                     XMM13, XMM13b,
386                     XMM14, XMM14b,
387                     XMM15, XMM15b
388#endif
389                     );
390
391// Class for all 32bit vector registers
392reg_class vectors_reg(XMM0,
393                      XMM1,
394                      XMM2,
395                      XMM3,
396                      XMM4,
397                      XMM5,
398                      XMM6,
399                      XMM7
400#ifdef _LP64
401                     ,XMM8,
402                      XMM9,
403                      XMM10,
404                      XMM11,
405                      XMM12,
406                      XMM13,
407                      XMM14,
408                      XMM15
409#endif
410                      );
411
412// Class for all 64bit vector registers
413reg_class vectord_reg(XMM0,  XMM0b,
414                      XMM1,  XMM1b,
415                      XMM2,  XMM2b,
416                      XMM3,  XMM3b,
417                      XMM4,  XMM4b,
418                      XMM5,  XMM5b,
419                      XMM6,  XMM6b,
420                      XMM7,  XMM7b
421#ifdef _LP64
422                     ,XMM8,  XMM8b,
423                      XMM9,  XMM9b,
424                      XMM10, XMM10b,
425                      XMM11, XMM11b,
426                      XMM12, XMM12b,
427                      XMM13, XMM13b,
428                      XMM14, XMM14b,
429                      XMM15, XMM15b
430#endif
431                      );
432
433// Class for all 128bit vector registers
434reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
435                      XMM1,  XMM1b,  XMM1c,  XMM1d,
436                      XMM2,  XMM2b,  XMM2c,  XMM2d,
437                      XMM3,  XMM3b,  XMM3c,  XMM3d,
438                      XMM4,  XMM4b,  XMM4c,  XMM4d,
439                      XMM5,  XMM5b,  XMM5c,  XMM5d,
440                      XMM6,  XMM6b,  XMM6c,  XMM6d,
441                      XMM7,  XMM7b,  XMM7c,  XMM7d
442#ifdef _LP64
443                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,
444                      XMM9,  XMM9b,  XMM9c,  XMM9d,
445                      XMM10, XMM10b, XMM10c, XMM10d,
446                      XMM11, XMM11b, XMM11c, XMM11d,
447                      XMM12, XMM12b, XMM12c, XMM12d,
448                      XMM13, XMM13b, XMM13c, XMM13d,
449                      XMM14, XMM14b, XMM14c, XMM14d,
450                      XMM15, XMM15b, XMM15c, XMM15d
451#endif
452                      );
453
454// Class for all 256bit vector registers
455reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
456                      XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
457                      XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
458                      XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
459                      XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
460                      XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
461                      XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
462                      XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
463#ifdef _LP64
464                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
465                      XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
466                      XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467                      XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468                      XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469                      XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470                      XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471                      XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472#endif
473                      );
474
475%}
476
477source %{
478  // Float masks come from different places depending on platform.
479#ifdef _LP64
480  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
481  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
482  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
483  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
484#else
485  static address float_signmask()  { return (address)float_signmask_pool; }
486  static address float_signflip()  { return (address)float_signflip_pool; }
487  static address double_signmask() { return (address)double_signmask_pool; }
488  static address double_signflip() { return (address)double_signflip_pool; }
489#endif
490
491
492const bool Matcher::match_rule_supported(int opcode) {
493  if (!has_match_rule(opcode))
494    return false;
495
496  switch (opcode) {
497    case Op_PopCountI:
498    case Op_PopCountL:
499      if (!UsePopCountInstruction)
500        return false;
501    case Op_MulVI:
502      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
503        return false;
504    break;
505  }
506
507  return true;  // Per default match rules are supported.
508}
509
510// Max vector size in bytes. 0 if not supported.
511const int Matcher::vector_width_in_bytes(BasicType bt) {
512  assert(is_java_primitive(bt), "only primitive type vectors");
513  if (UseSSE < 2) return 0;
514  // SSE2 supports 128bit vectors for all types.
515  // AVX2 supports 256bit vectors for all types.
516  int size = (UseAVX > 1) ? 32 : 16;
517  // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
518  if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
519    size = 32;
520  // Use flag to limit vector size.
521  size = MIN2(size,(int)MaxVectorSize);
522  // Minimum 2 values in vector (or 4 for bytes).
523  switch (bt) {
524  case T_DOUBLE:
525  case T_LONG:
526    if (size < 16) return 0;
527  case T_FLOAT:
528  case T_INT:
529    if (size < 8) return 0;
530  case T_BOOLEAN:
531  case T_BYTE:
532  case T_CHAR:
533  case T_SHORT:
534    if (size < 4) return 0;
535    break;
536  default:
537    ShouldNotReachHere();
538  }
539  return size;
540}
541
542// Limits on vector size (number of elements) loaded into vector.
543const int Matcher::max_vector_size(const BasicType bt) {
544  return vector_width_in_bytes(bt)/type2aelembytes(bt);
545}
546const int Matcher::min_vector_size(const BasicType bt) {
547  int max_size = max_vector_size(bt);
548  // Min size which can be loaded into vector is 4 bytes.
549  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
550  return MIN2(size,max_size);
551}
552
553// Vector ideal reg corresponding to specidied size in bytes
554const int Matcher::vector_ideal_reg(int size) {
555  assert(MaxVectorSize >= size, "");
556  switch(size) {
557    case  4: return Op_VecS;
558    case  8: return Op_VecD;
559    case 16: return Op_VecX;
560    case 32: return Op_VecY;
561  }
562  ShouldNotReachHere();
563  return 0;
564}
565
566// x86 supports misaligned vectors store/load.
567const bool Matcher::misaligned_vectors_ok() {
568  return !AlignVector; // can be changed by flag
569}
570
571// Helper methods for MachSpillCopyNode::implementation().
572static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
573                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
574  // In 64-bit VM size calculation is very complex. Emitting instructions
575  // into scratch buffer is used to get size in 64-bit VM.
576  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
577  assert(ireg == Op_VecS || // 32bit vector
578         (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
579         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
580         "no non-adjacent vector moves" );
581  if (cbuf) {
582    MacroAssembler _masm(cbuf);
583    int offset = __ offset();
584    switch (ireg) {
585    case Op_VecS: // copy whole register
586    case Op_VecD:
587    case Op_VecX:
588      __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
589      break;
590    case Op_VecY:
591      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
592      break;
593    default:
594      ShouldNotReachHere();
595    }
596    int size = __ offset() - offset;
597#ifdef ASSERT
598    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
599    assert(!do_size || size == 4, "incorrect size calculattion");
600#endif
601    return size;
602#ifndef PRODUCT
603  } else if (!do_size) {
604    switch (ireg) {
605    case Op_VecS:
606    case Op_VecD:
607    case Op_VecX:
608      st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
609      break;
610    case Op_VecY:
611      st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
612      break;
613    default:
614      ShouldNotReachHere();
615    }
616#endif
617  }
618  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
619  return 4;
620}
621
622static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
623                            int stack_offset, int reg, uint ireg, outputStream* st) {
624  // In 64-bit VM size calculation is very complex. Emitting instructions
625  // into scratch buffer is used to get size in 64-bit VM.
626  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
627  if (cbuf) {
628    MacroAssembler _masm(cbuf);
629    int offset = __ offset();
630    if (is_load) {
631      switch (ireg) {
632      case Op_VecS:
633        __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
634        break;
635      case Op_VecD:
636        __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
637        break;
638      case Op_VecX:
639        __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
640        break;
641      case Op_VecY:
642        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
643        break;
644      default:
645        ShouldNotReachHere();
646      }
647    } else { // store
648      switch (ireg) {
649      case Op_VecS:
650        __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
651        break;
652      case Op_VecD:
653        __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
654        break;
655      case Op_VecX:
656        __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
657        break;
658      case Op_VecY:
659        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
660        break;
661      default:
662        ShouldNotReachHere();
663      }
664    }
665    int size = __ offset() - offset;
666#ifdef ASSERT
667    int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
668    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
669    assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
670#endif
671    return size;
672#ifndef PRODUCT
673  } else if (!do_size) {
674    if (is_load) {
675      switch (ireg) {
676      case Op_VecS:
677        st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
678        break;
679      case Op_VecD:
680        st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
681        break;
682       case Op_VecX:
683        st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
684        break;
685      case Op_VecY:
686        st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
687        break;
688      default:
689        ShouldNotReachHere();
690      }
691    } else { // store
692      switch (ireg) {
693      case Op_VecS:
694        st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
695        break;
696      case Op_VecD:
697        st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
698        break;
699       case Op_VecX:
700        st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
701        break;
702      case Op_VecY:
703        st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
704        break;
705      default:
706        ShouldNotReachHere();
707      }
708    }
709#endif
710  }
711  int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
712  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
713  return 5+offset_size;
714}
715
716static inline jfloat replicate4_imm(int con, int width) {
717  // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
718  assert(width == 1 || width == 2, "only byte or short types here");
719  int bit_width = width * 8;
720  jint val = con;
721  val &= (1 << bit_width) - 1;  // mask off sign bits
722  while(bit_width < 32) {
723    val |= (val << bit_width);
724    bit_width <<= 1;
725  }
726  jfloat fval = *((jfloat*) &val);  // coerce to float type
727  return fval;
728}
729
730static inline jdouble replicate8_imm(int con, int width) {
731  // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
732  assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
733  int bit_width = width * 8;
734  jlong val = con;
735  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
736  while(bit_width < 64) {
737    val |= (val << bit_width);
738    bit_width <<= 1;
739  }
740  jdouble dval = *((jdouble*) &val);  // coerce to double type
741  return dval;
742}
743
744#ifndef PRODUCT
745  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
746    st->print("nop \t# %d bytes pad for loops and calls", _count);
747  }
748#endif
749
750  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
751    MacroAssembler _masm(&cbuf);
752    __ nop(_count);
753  }
754
755  uint MachNopNode::size(PhaseRegAlloc*) const {
756    return _count;
757  }
758
759#ifndef PRODUCT
760  void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
761    st->print("# breakpoint");
762  }
763#endif
764
765  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
766    MacroAssembler _masm(&cbuf);
767    __ int3();
768  }
769
770  uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
771    return MachNode::size(ra_);
772  }
773
774%}
775
776encode %{
777
778  enc_class preserve_SP %{
779    debug_only(int off0 = cbuf.insts_size());
780    MacroAssembler _masm(&cbuf);
781    // RBP is preserved across all calls, even compiled calls.
782    // Use it to preserve RSP in places where the callee might change the SP.
783    __ movptr(rbp_mh_SP_save, rsp);
784    debug_only(int off1 = cbuf.insts_size());
785    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
786  %}
787
788  enc_class restore_SP %{
789    MacroAssembler _masm(&cbuf);
790    __ movptr(rsp, rbp_mh_SP_save);
791  %}
792
793  enc_class call_epilog %{
794    if (VerifyStackAtCalls) {
795      // Check that stack depth is unchanged: find majik cookie on stack
796      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
797      MacroAssembler _masm(&cbuf);
798      Label L;
799      __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
800      __ jccb(Assembler::equal, L);
801      // Die if stack mismatch
802      __ int3();
803      __ bind(L);
804    }
805  %}
806
807%}
808
809
810//----------OPERANDS-----------------------------------------------------------
811// Operand definitions must precede instruction definitions for correct parsing
812// in the ADLC because operands constitute user defined types which are used in
813// instruction definitions.
814
815// Vectors
816operand vecS() %{
817  constraint(ALLOC_IN_RC(vectors_reg));
818  match(VecS);
819
820  format %{ %}
821  interface(REG_INTER);
822%}
823
824operand vecD() %{
825  constraint(ALLOC_IN_RC(vectord_reg));
826  match(VecD);
827
828  format %{ %}
829  interface(REG_INTER);
830%}
831
832operand vecX() %{
833  constraint(ALLOC_IN_RC(vectorx_reg));
834  match(VecX);
835
836  format %{ %}
837  interface(REG_INTER);
838%}
839
840operand vecY() %{
841  constraint(ALLOC_IN_RC(vectory_reg));
842  match(VecY);
843
844  format %{ %}
845  interface(REG_INTER);
846%}
847
848
849// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
850
851// ============================================================================
852
853instruct ShouldNotReachHere() %{
854  match(Halt);
855  format %{ "int3\t# ShouldNotReachHere" %}
856  ins_encode %{
857    __ int3();
858  %}
859  ins_pipe(pipe_slow);
860%}
861
862// ============================================================================
863
864instruct addF_reg(regF dst, regF src) %{
865  predicate((UseSSE>=1) && (UseAVX == 0));
866  match(Set dst (AddF dst src));
867
868  format %{ "addss   $dst, $src" %}
869  ins_cost(150);
870  ins_encode %{
871    __ addss($dst$$XMMRegister, $src$$XMMRegister);
872  %}
873  ins_pipe(pipe_slow);
874%}
875
876instruct addF_mem(regF dst, memory src) %{
877  predicate((UseSSE>=1) && (UseAVX == 0));
878  match(Set dst (AddF dst (LoadF src)));
879
880  format %{ "addss   $dst, $src" %}
881  ins_cost(150);
882  ins_encode %{
883    __ addss($dst$$XMMRegister, $src$$Address);
884  %}
885  ins_pipe(pipe_slow);
886%}
887
888instruct addF_imm(regF dst, immF con) %{
889  predicate((UseSSE>=1) && (UseAVX == 0));
890  match(Set dst (AddF dst con));
891  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
892  ins_cost(150);
893  ins_encode %{
894    __ addss($dst$$XMMRegister, $constantaddress($con));
895  %}
896  ins_pipe(pipe_slow);
897%}
898
899instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
900  predicate(UseAVX > 0);
901  match(Set dst (AddF src1 src2));
902
903  format %{ "vaddss  $dst, $src1, $src2" %}
904  ins_cost(150);
905  ins_encode %{
906    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
907  %}
908  ins_pipe(pipe_slow);
909%}
910
911instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
912  predicate(UseAVX > 0);
913  match(Set dst (AddF src1 (LoadF src2)));
914
915  format %{ "vaddss  $dst, $src1, $src2" %}
916  ins_cost(150);
917  ins_encode %{
918    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
919  %}
920  ins_pipe(pipe_slow);
921%}
922
923instruct addF_reg_imm(regF dst, regF src, immF con) %{
924  predicate(UseAVX > 0);
925  match(Set dst (AddF src con));
926
927  format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
928  ins_cost(150);
929  ins_encode %{
930    __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
931  %}
932  ins_pipe(pipe_slow);
933%}
934
935instruct addD_reg(regD dst, regD src) %{
936  predicate((UseSSE>=2) && (UseAVX == 0));
937  match(Set dst (AddD dst src));
938
939  format %{ "addsd   $dst, $src" %}
940  ins_cost(150);
941  ins_encode %{
942    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
943  %}
944  ins_pipe(pipe_slow);
945%}
946
947instruct addD_mem(regD dst, memory src) %{
948  predicate((UseSSE>=2) && (UseAVX == 0));
949  match(Set dst (AddD dst (LoadD src)));
950
951  format %{ "addsd   $dst, $src" %}
952  ins_cost(150);
953  ins_encode %{
954    __ addsd($dst$$XMMRegister, $src$$Address);
955  %}
956  ins_pipe(pipe_slow);
957%}
958
959instruct addD_imm(regD dst, immD con) %{
960  predicate((UseSSE>=2) && (UseAVX == 0));
961  match(Set dst (AddD dst con));
962  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
963  ins_cost(150);
964  ins_encode %{
965    __ addsd($dst$$XMMRegister, $constantaddress($con));
966  %}
967  ins_pipe(pipe_slow);
968%}
969
970instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
971  predicate(UseAVX > 0);
972  match(Set dst (AddD src1 src2));
973
974  format %{ "vaddsd  $dst, $src1, $src2" %}
975  ins_cost(150);
976  ins_encode %{
977    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
978  %}
979  ins_pipe(pipe_slow);
980%}
981
982instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
983  predicate(UseAVX > 0);
984  match(Set dst (AddD src1 (LoadD src2)));
985
986  format %{ "vaddsd  $dst, $src1, $src2" %}
987  ins_cost(150);
988  ins_encode %{
989    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
990  %}
991  ins_pipe(pipe_slow);
992%}
993
994instruct addD_reg_imm(regD dst, regD src, immD con) %{
995  predicate(UseAVX > 0);
996  match(Set dst (AddD src con));
997
998  format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
999  ins_cost(150);
1000  ins_encode %{
1001    __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1002  %}
1003  ins_pipe(pipe_slow);
1004%}
1005
1006instruct subF_reg(regF dst, regF src) %{
1007  predicate((UseSSE>=1) && (UseAVX == 0));
1008  match(Set dst (SubF dst src));
1009
1010  format %{ "subss   $dst, $src" %}
1011  ins_cost(150);
1012  ins_encode %{
1013    __ subss($dst$$XMMRegister, $src$$XMMRegister);
1014  %}
1015  ins_pipe(pipe_slow);
1016%}
1017
1018instruct subF_mem(regF dst, memory src) %{
1019  predicate((UseSSE>=1) && (UseAVX == 0));
1020  match(Set dst (SubF dst (LoadF src)));
1021
1022  format %{ "subss   $dst, $src" %}
1023  ins_cost(150);
1024  ins_encode %{
1025    __ subss($dst$$XMMRegister, $src$$Address);
1026  %}
1027  ins_pipe(pipe_slow);
1028%}
1029
1030instruct subF_imm(regF dst, immF con) %{
1031  predicate((UseSSE>=1) && (UseAVX == 0));
1032  match(Set dst (SubF dst con));
1033  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1034  ins_cost(150);
1035  ins_encode %{
1036    __ subss($dst$$XMMRegister, $constantaddress($con));
1037  %}
1038  ins_pipe(pipe_slow);
1039%}
1040
1041instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
1042  predicate(UseAVX > 0);
1043  match(Set dst (SubF src1 src2));
1044
1045  format %{ "vsubss  $dst, $src1, $src2" %}
1046  ins_cost(150);
1047  ins_encode %{
1048    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1049  %}
1050  ins_pipe(pipe_slow);
1051%}
1052
1053instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
1054  predicate(UseAVX > 0);
1055  match(Set dst (SubF src1 (LoadF src2)));
1056
1057  format %{ "vsubss  $dst, $src1, $src2" %}
1058  ins_cost(150);
1059  ins_encode %{
1060    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1061  %}
1062  ins_pipe(pipe_slow);
1063%}
1064
1065instruct subF_reg_imm(regF dst, regF src, immF con) %{
1066  predicate(UseAVX > 0);
1067  match(Set dst (SubF src con));
1068
1069  format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1070  ins_cost(150);
1071  ins_encode %{
1072    __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1073  %}
1074  ins_pipe(pipe_slow);
1075%}
1076
1077instruct subD_reg(regD dst, regD src) %{
1078  predicate((UseSSE>=2) && (UseAVX == 0));
1079  match(Set dst (SubD dst src));
1080
1081  format %{ "subsd   $dst, $src" %}
1082  ins_cost(150);
1083  ins_encode %{
1084    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
1085  %}
1086  ins_pipe(pipe_slow);
1087%}
1088
1089instruct subD_mem(regD dst, memory src) %{
1090  predicate((UseSSE>=2) && (UseAVX == 0));
1091  match(Set dst (SubD dst (LoadD src)));
1092
1093  format %{ "subsd   $dst, $src" %}
1094  ins_cost(150);
1095  ins_encode %{
1096    __ subsd($dst$$XMMRegister, $src$$Address);
1097  %}
1098  ins_pipe(pipe_slow);
1099%}
1100
1101instruct subD_imm(regD dst, immD con) %{
1102  predicate((UseSSE>=2) && (UseAVX == 0));
1103  match(Set dst (SubD dst con));
1104  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1105  ins_cost(150);
1106  ins_encode %{
1107    __ subsd($dst$$XMMRegister, $constantaddress($con));
1108  %}
1109  ins_pipe(pipe_slow);
1110%}
1111
1112instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
1113  predicate(UseAVX > 0);
1114  match(Set dst (SubD src1 src2));
1115
1116  format %{ "vsubsd  $dst, $src1, $src2" %}
1117  ins_cost(150);
1118  ins_encode %{
1119    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1120  %}
1121  ins_pipe(pipe_slow);
1122%}
1123
1124instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
1125  predicate(UseAVX > 0);
1126  match(Set dst (SubD src1 (LoadD src2)));
1127
1128  format %{ "vsubsd  $dst, $src1, $src2" %}
1129  ins_cost(150);
1130  ins_encode %{
1131    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1132  %}
1133  ins_pipe(pipe_slow);
1134%}
1135
1136instruct subD_reg_imm(regD dst, regD src, immD con) %{
1137  predicate(UseAVX > 0);
1138  match(Set dst (SubD src con));
1139
1140  format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1141  ins_cost(150);
1142  ins_encode %{
1143    __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1144  %}
1145  ins_pipe(pipe_slow);
1146%}
1147
1148instruct mulF_reg(regF dst, regF src) %{
1149  predicate((UseSSE>=1) && (UseAVX == 0));
1150  match(Set dst (MulF dst src));
1151
1152  format %{ "mulss   $dst, $src" %}
1153  ins_cost(150);
1154  ins_encode %{
1155    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
1156  %}
1157  ins_pipe(pipe_slow);
1158%}
1159
1160instruct mulF_mem(regF dst, memory src) %{
1161  predicate((UseSSE>=1) && (UseAVX == 0));
1162  match(Set dst (MulF dst (LoadF src)));
1163
1164  format %{ "mulss   $dst, $src" %}
1165  ins_cost(150);
1166  ins_encode %{
1167    __ mulss($dst$$XMMRegister, $src$$Address);
1168  %}
1169  ins_pipe(pipe_slow);
1170%}
1171
1172instruct mulF_imm(regF dst, immF con) %{
1173  predicate((UseSSE>=1) && (UseAVX == 0));
1174  match(Set dst (MulF dst con));
1175  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1176  ins_cost(150);
1177  ins_encode %{
1178    __ mulss($dst$$XMMRegister, $constantaddress($con));
1179  %}
1180  ins_pipe(pipe_slow);
1181%}
1182
1183instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
1184  predicate(UseAVX > 0);
1185  match(Set dst (MulF src1 src2));
1186
1187  format %{ "vmulss  $dst, $src1, $src2" %}
1188  ins_cost(150);
1189  ins_encode %{
1190    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1191  %}
1192  ins_pipe(pipe_slow);
1193%}
1194
1195instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
1196  predicate(UseAVX > 0);
1197  match(Set dst (MulF src1 (LoadF src2)));
1198
1199  format %{ "vmulss  $dst, $src1, $src2" %}
1200  ins_cost(150);
1201  ins_encode %{
1202    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1203  %}
1204  ins_pipe(pipe_slow);
1205%}
1206
1207instruct mulF_reg_imm(regF dst, regF src, immF con) %{
1208  predicate(UseAVX > 0);
1209  match(Set dst (MulF src con));
1210
1211  format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1212  ins_cost(150);
1213  ins_encode %{
1214    __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1215  %}
1216  ins_pipe(pipe_slow);
1217%}
1218
1219instruct mulD_reg(regD dst, regD src) %{
1220  predicate((UseSSE>=2) && (UseAVX == 0));
1221  match(Set dst (MulD dst src));
1222
1223  format %{ "mulsd   $dst, $src" %}
1224  ins_cost(150);
1225  ins_encode %{
1226    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
1227  %}
1228  ins_pipe(pipe_slow);
1229%}
1230
1231instruct mulD_mem(regD dst, memory src) %{
1232  predicate((UseSSE>=2) && (UseAVX == 0));
1233  match(Set dst (MulD dst (LoadD src)));
1234
1235  format %{ "mulsd   $dst, $src" %}
1236  ins_cost(150);
1237  ins_encode %{
1238    __ mulsd($dst$$XMMRegister, $src$$Address);
1239  %}
1240  ins_pipe(pipe_slow);
1241%}
1242
1243instruct mulD_imm(regD dst, immD con) %{
1244  predicate((UseSSE>=2) && (UseAVX == 0));
1245  match(Set dst (MulD dst con));
1246  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1247  ins_cost(150);
1248  ins_encode %{
1249    __ mulsd($dst$$XMMRegister, $constantaddress($con));
1250  %}
1251  ins_pipe(pipe_slow);
1252%}
1253
1254instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
1255  predicate(UseAVX > 0);
1256  match(Set dst (MulD src1 src2));
1257
1258  format %{ "vmulsd  $dst, $src1, $src2" %}
1259  ins_cost(150);
1260  ins_encode %{
1261    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1262  %}
1263  ins_pipe(pipe_slow);
1264%}
1265
1266instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
1267  predicate(UseAVX > 0);
1268  match(Set dst (MulD src1 (LoadD src2)));
1269
1270  format %{ "vmulsd  $dst, $src1, $src2" %}
1271  ins_cost(150);
1272  ins_encode %{
1273    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1274  %}
1275  ins_pipe(pipe_slow);
1276%}
1277
1278instruct mulD_reg_imm(regD dst, regD src, immD con) %{
1279  predicate(UseAVX > 0);
1280  match(Set dst (MulD src con));
1281
1282  format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1283  ins_cost(150);
1284  ins_encode %{
1285    __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1286  %}
1287  ins_pipe(pipe_slow);
1288%}
1289
1290instruct divF_reg(regF dst, regF src) %{
1291  predicate((UseSSE>=1) && (UseAVX == 0));
1292  match(Set dst (DivF dst src));
1293
1294  format %{ "divss   $dst, $src" %}
1295  ins_cost(150);
1296  ins_encode %{
1297    __ divss($dst$$XMMRegister, $src$$XMMRegister);
1298  %}
1299  ins_pipe(pipe_slow);
1300%}
1301
1302instruct divF_mem(regF dst, memory src) %{
1303  predicate((UseSSE>=1) && (UseAVX == 0));
1304  match(Set dst (DivF dst (LoadF src)));
1305
1306  format %{ "divss   $dst, $src" %}
1307  ins_cost(150);
1308  ins_encode %{
1309    __ divss($dst$$XMMRegister, $src$$Address);
1310  %}
1311  ins_pipe(pipe_slow);
1312%}
1313
1314instruct divF_imm(regF dst, immF con) %{
1315  predicate((UseSSE>=1) && (UseAVX == 0));
1316  match(Set dst (DivF dst con));
1317  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1318  ins_cost(150);
1319  ins_encode %{
1320    __ divss($dst$$XMMRegister, $constantaddress($con));
1321  %}
1322  ins_pipe(pipe_slow);
1323%}
1324
1325instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
1326  predicate(UseAVX > 0);
1327  match(Set dst (DivF src1 src2));
1328
1329  format %{ "vdivss  $dst, $src1, $src2" %}
1330  ins_cost(150);
1331  ins_encode %{
1332    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1333  %}
1334  ins_pipe(pipe_slow);
1335%}
1336
1337instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
1338  predicate(UseAVX > 0);
1339  match(Set dst (DivF src1 (LoadF src2)));
1340
1341  format %{ "vdivss  $dst, $src1, $src2" %}
1342  ins_cost(150);
1343  ins_encode %{
1344    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1345  %}
1346  ins_pipe(pipe_slow);
1347%}
1348
1349instruct divF_reg_imm(regF dst, regF src, immF con) %{
1350  predicate(UseAVX > 0);
1351  match(Set dst (DivF src con));
1352
1353  format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1354  ins_cost(150);
1355  ins_encode %{
1356    __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1357  %}
1358  ins_pipe(pipe_slow);
1359%}
1360
1361instruct divD_reg(regD dst, regD src) %{
1362  predicate((UseSSE>=2) && (UseAVX == 0));
1363  match(Set dst (DivD dst src));
1364
1365  format %{ "divsd   $dst, $src" %}
1366  ins_cost(150);
1367  ins_encode %{
1368    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
1369  %}
1370  ins_pipe(pipe_slow);
1371%}
1372
1373instruct divD_mem(regD dst, memory src) %{
1374  predicate((UseSSE>=2) && (UseAVX == 0));
1375  match(Set dst (DivD dst (LoadD src)));
1376
1377  format %{ "divsd   $dst, $src" %}
1378  ins_cost(150);
1379  ins_encode %{
1380    __ divsd($dst$$XMMRegister, $src$$Address);
1381  %}
1382  ins_pipe(pipe_slow);
1383%}
1384
1385instruct divD_imm(regD dst, immD con) %{
1386  predicate((UseSSE>=2) && (UseAVX == 0));
1387  match(Set dst (DivD dst con));
1388  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1389  ins_cost(150);
1390  ins_encode %{
1391    __ divsd($dst$$XMMRegister, $constantaddress($con));
1392  %}
1393  ins_pipe(pipe_slow);
1394%}
1395
1396instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
1397  predicate(UseAVX > 0);
1398  match(Set dst (DivD src1 src2));
1399
1400  format %{ "vdivsd  $dst, $src1, $src2" %}
1401  ins_cost(150);
1402  ins_encode %{
1403    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1404  %}
1405  ins_pipe(pipe_slow);
1406%}
1407
1408instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
1409  predicate(UseAVX > 0);
1410  match(Set dst (DivD src1 (LoadD src2)));
1411
1412  format %{ "vdivsd  $dst, $src1, $src2" %}
1413  ins_cost(150);
1414  ins_encode %{
1415    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1416  %}
1417  ins_pipe(pipe_slow);
1418%}
1419
1420instruct divD_reg_imm(regD dst, regD src, immD con) %{
1421  predicate(UseAVX > 0);
1422  match(Set dst (DivD src con));
1423
1424  format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1425  ins_cost(150);
1426  ins_encode %{
1427    __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1428  %}
1429  ins_pipe(pipe_slow);
1430%}
1431
1432instruct absF_reg(regF dst) %{
1433  predicate((UseSSE>=1) && (UseAVX == 0));
1434  match(Set dst (AbsF dst));
1435  ins_cost(150);
1436  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
1437  ins_encode %{
1438    __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1439  %}
1440  ins_pipe(pipe_slow);
1441%}
1442
1443instruct absF_reg_reg(regF dst, regF src) %{
1444  predicate(UseAVX > 0);
1445  match(Set dst (AbsF src));
1446  ins_cost(150);
1447  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1448  ins_encode %{
1449    bool vector256 = false;
1450    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1451              ExternalAddress(float_signmask()), vector256);
1452  %}
1453  ins_pipe(pipe_slow);
1454%}
1455
1456instruct absD_reg(regD dst) %{
1457  predicate((UseSSE>=2) && (UseAVX == 0));
1458  match(Set dst (AbsD dst));
1459  ins_cost(150);
1460  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
1461            "# abs double by sign masking" %}
1462  ins_encode %{
1463    __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1464  %}
1465  ins_pipe(pipe_slow);
1466%}
1467
1468instruct absD_reg_reg(regD dst, regD src) %{
1469  predicate(UseAVX > 0);
1470  match(Set dst (AbsD src));
1471  ins_cost(150);
1472  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
1473            "# abs double by sign masking" %}
1474  ins_encode %{
1475    bool vector256 = false;
1476    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1477              ExternalAddress(double_signmask()), vector256);
1478  %}
1479  ins_pipe(pipe_slow);
1480%}
1481
1482instruct negF_reg(regF dst) %{
1483  predicate((UseSSE>=1) && (UseAVX == 0));
1484  match(Set dst (NegF dst));
1485  ins_cost(150);
1486  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
1487  ins_encode %{
1488    __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1489  %}
1490  ins_pipe(pipe_slow);
1491%}
1492
1493instruct negF_reg_reg(regF dst, regF src) %{
1494  predicate(UseAVX > 0);
1495  match(Set dst (NegF src));
1496  ins_cost(150);
1497  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1498  ins_encode %{
1499    bool vector256 = false;
1500    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1501              ExternalAddress(float_signflip()), vector256);
1502  %}
1503  ins_pipe(pipe_slow);
1504%}
1505
1506instruct negD_reg(regD dst) %{
1507  predicate((UseSSE>=2) && (UseAVX == 0));
1508  match(Set dst (NegD dst));
1509  ins_cost(150);
1510  format %{ "xorpd   $dst, [0x8000000000000000]\t"
1511            "# neg double by sign flipping" %}
1512  ins_encode %{
1513    __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1514  %}
1515  ins_pipe(pipe_slow);
1516%}
1517
1518instruct negD_reg_reg(regD dst, regD src) %{
1519  predicate(UseAVX > 0);
1520  match(Set dst (NegD src));
1521  ins_cost(150);
1522  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
1523            "# neg double by sign flipping" %}
1524  ins_encode %{
1525    bool vector256 = false;
1526    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1527              ExternalAddress(double_signflip()), vector256);
1528  %}
1529  ins_pipe(pipe_slow);
1530%}
1531
1532instruct sqrtF_reg(regF dst, regF src) %{
1533  predicate(UseSSE>=1);
1534  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1535
1536  format %{ "sqrtss  $dst, $src" %}
1537  ins_cost(150);
1538  ins_encode %{
1539    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1540  %}
1541  ins_pipe(pipe_slow);
1542%}
1543
1544instruct sqrtF_mem(regF dst, memory src) %{
1545  predicate(UseSSE>=1);
1546  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1547
1548  format %{ "sqrtss  $dst, $src" %}
1549  ins_cost(150);
1550  ins_encode %{
1551    __ sqrtss($dst$$XMMRegister, $src$$Address);
1552  %}
1553  ins_pipe(pipe_slow);
1554%}
1555
1556instruct sqrtF_imm(regF dst, immF con) %{
1557  predicate(UseSSE>=1);
1558  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
1559  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1560  ins_cost(150);
1561  ins_encode %{
1562    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
1563  %}
1564  ins_pipe(pipe_slow);
1565%}
1566
1567instruct sqrtD_reg(regD dst, regD src) %{
1568  predicate(UseSSE>=2);
1569  match(Set dst (SqrtD src));
1570
1571  format %{ "sqrtsd  $dst, $src" %}
1572  ins_cost(150);
1573  ins_encode %{
1574    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
1575  %}
1576  ins_pipe(pipe_slow);
1577%}
1578
1579instruct sqrtD_mem(regD dst, memory src) %{
1580  predicate(UseSSE>=2);
1581  match(Set dst (SqrtD (LoadD src)));
1582
1583  format %{ "sqrtsd  $dst, $src" %}
1584  ins_cost(150);
1585  ins_encode %{
1586    __ sqrtsd($dst$$XMMRegister, $src$$Address);
1587  %}
1588  ins_pipe(pipe_slow);
1589%}
1590
1591instruct sqrtD_imm(regD dst, immD con) %{
1592  predicate(UseSSE>=2);
1593  match(Set dst (SqrtD con));
1594  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1595  ins_cost(150);
1596  ins_encode %{
1597    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1598  %}
1599  ins_pipe(pipe_slow);
1600%}
1601
1602
1603// ====================VECTOR INSTRUCTIONS=====================================
1604
1605// Load vectors (4 bytes long)
1606instruct loadV4(vecS dst, memory mem) %{
1607  predicate(n->as_LoadVector()->memory_size() == 4);
1608  match(Set dst (LoadVector mem));
1609  ins_cost(125);
1610  format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
1611  ins_encode %{
1612    __ movdl($dst$$XMMRegister, $mem$$Address);
1613  %}
1614  ins_pipe( pipe_slow );
1615%}
1616
1617// Load vectors (8 bytes long)
1618instruct loadV8(vecD dst, memory mem) %{
1619  predicate(n->as_LoadVector()->memory_size() == 8);
1620  match(Set dst (LoadVector mem));
1621  ins_cost(125);
1622  format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
1623  ins_encode %{
1624    __ movq($dst$$XMMRegister, $mem$$Address);
1625  %}
1626  ins_pipe( pipe_slow );
1627%}
1628
1629// Load vectors (16 bytes long)
1630instruct loadV16(vecX dst, memory mem) %{
1631  predicate(n->as_LoadVector()->memory_size() == 16);
1632  match(Set dst (LoadVector mem));
1633  ins_cost(125);
1634  format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
1635  ins_encode %{
1636    __ movdqu($dst$$XMMRegister, $mem$$Address);
1637  %}
1638  ins_pipe( pipe_slow );
1639%}
1640
1641// Load vectors (32 bytes long)
1642instruct loadV32(vecY dst, memory mem) %{
1643  predicate(n->as_LoadVector()->memory_size() == 32);
1644  match(Set dst (LoadVector mem));
1645  ins_cost(125);
1646  format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1647  ins_encode %{
1648    __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1649  %}
1650  ins_pipe( pipe_slow );
1651%}
1652
1653// Store vectors
1654instruct storeV4(memory mem, vecS src) %{
1655  predicate(n->as_StoreVector()->memory_size() == 4);
1656  match(Set mem (StoreVector mem src));
1657  ins_cost(145);
1658  format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
1659  ins_encode %{
1660    __ movdl($mem$$Address, $src$$XMMRegister);
1661  %}
1662  ins_pipe( pipe_slow );
1663%}
1664
1665instruct storeV8(memory mem, vecD src) %{
1666  predicate(n->as_StoreVector()->memory_size() == 8);
1667  match(Set mem (StoreVector mem src));
1668  ins_cost(145);
1669  format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
1670  ins_encode %{
1671    __ movq($mem$$Address, $src$$XMMRegister);
1672  %}
1673  ins_pipe( pipe_slow );
1674%}
1675
1676instruct storeV16(memory mem, vecX src) %{
1677  predicate(n->as_StoreVector()->memory_size() == 16);
1678  match(Set mem (StoreVector mem src));
1679  ins_cost(145);
1680  format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
1681  ins_encode %{
1682    __ movdqu($mem$$Address, $src$$XMMRegister);
1683  %}
1684  ins_pipe( pipe_slow );
1685%}
1686
1687instruct storeV32(memory mem, vecY src) %{
1688  predicate(n->as_StoreVector()->memory_size() == 32);
1689  match(Set mem (StoreVector mem src));
1690  ins_cost(145);
1691  format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1692  ins_encode %{
1693    __ vmovdqu($mem$$Address, $src$$XMMRegister);
1694  %}
1695  ins_pipe( pipe_slow );
1696%}
1697
1698// Replicate byte scalar to be vector
1699instruct Repl4B(vecS dst, rRegI src) %{
1700  predicate(n->as_Vector()->length() == 4);
1701  match(Set dst (ReplicateB src));
1702  format %{ "movd    $dst,$src\n\t"
1703            "punpcklbw $dst,$dst\n\t"
1704            "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1705  ins_encode %{
1706    __ movdl($dst$$XMMRegister, $src$$Register);
1707    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1708    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1709  %}
1710  ins_pipe( pipe_slow );
1711%}
1712
1713instruct Repl8B(vecD dst, rRegI src) %{
1714  predicate(n->as_Vector()->length() == 8);
1715  match(Set dst (ReplicateB src));
1716  format %{ "movd    $dst,$src\n\t"
1717            "punpcklbw $dst,$dst\n\t"
1718            "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1719  ins_encode %{
1720    __ movdl($dst$$XMMRegister, $src$$Register);
1721    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1722    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1723  %}
1724  ins_pipe( pipe_slow );
1725%}
1726
1727instruct Repl16B(vecX dst, rRegI src) %{
1728  predicate(n->as_Vector()->length() == 16);
1729  match(Set dst (ReplicateB src));
1730  format %{ "movd    $dst,$src\n\t"
1731            "punpcklbw $dst,$dst\n\t"
1732            "pshuflw $dst,$dst,0x00\n\t"
1733            "punpcklqdq $dst,$dst\t! replicate16B" %}
1734  ins_encode %{
1735    __ movdl($dst$$XMMRegister, $src$$Register);
1736    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1737    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1738    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1739  %}
1740  ins_pipe( pipe_slow );
1741%}
1742
1743instruct Repl32B(vecY dst, rRegI src) %{
1744  predicate(n->as_Vector()->length() == 32);
1745  match(Set dst (ReplicateB src));
1746  format %{ "movd    $dst,$src\n\t"
1747            "punpcklbw $dst,$dst\n\t"
1748            "pshuflw $dst,$dst,0x00\n\t"
1749            "punpcklqdq $dst,$dst\n\t"
1750            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1751  ins_encode %{
1752    __ movdl($dst$$XMMRegister, $src$$Register);
1753    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1754    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1755    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1756    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1757  %}
1758  ins_pipe( pipe_slow );
1759%}
1760
1761// Replicate byte scalar immediate to be vector by loading from const table.
1762instruct Repl4B_imm(vecS dst, immI con) %{
1763  predicate(n->as_Vector()->length() == 4);
1764  match(Set dst (ReplicateB con));
1765  format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
1766  ins_encode %{
1767    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1768  %}
1769  ins_pipe( pipe_slow );
1770%}
1771
1772instruct Repl8B_imm(vecD dst, immI con) %{
1773  predicate(n->as_Vector()->length() == 8);
1774  match(Set dst (ReplicateB con));
1775  format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
1776  ins_encode %{
1777    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1778  %}
1779  ins_pipe( pipe_slow );
1780%}
1781
1782instruct Repl16B_imm(vecX dst, immI con) %{
1783  predicate(n->as_Vector()->length() == 16);
1784  match(Set dst (ReplicateB con));
1785  format %{ "movq    $dst,[$constantaddress]\n\t"
1786            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
1787  ins_encode %{
1788    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1789    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1790  %}
1791  ins_pipe( pipe_slow );
1792%}
1793
1794instruct Repl32B_imm(vecY dst, immI con) %{
1795  predicate(n->as_Vector()->length() == 32);
1796  match(Set dst (ReplicateB con));
1797  format %{ "movq    $dst,[$constantaddress]\n\t"
1798            "punpcklqdq $dst,$dst\n\t"
1799            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1800  ins_encode %{
1801    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1802    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1803    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1804  %}
1805  ins_pipe( pipe_slow );
1806%}
1807
1808// Replicate byte scalar zero to be vector
1809instruct Repl4B_zero(vecS dst, immI0 zero) %{
1810  predicate(n->as_Vector()->length() == 4);
1811  match(Set dst (ReplicateB zero));
1812  format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
1813  ins_encode %{
1814    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1815  %}
1816  ins_pipe( fpu_reg_reg );
1817%}
1818
1819instruct Repl8B_zero(vecD dst, immI0 zero) %{
1820  predicate(n->as_Vector()->length() == 8);
1821  match(Set dst (ReplicateB zero));
1822  format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
1823  ins_encode %{
1824    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1825  %}
1826  ins_pipe( fpu_reg_reg );
1827%}
1828
1829instruct Repl16B_zero(vecX dst, immI0 zero) %{
1830  predicate(n->as_Vector()->length() == 16);
1831  match(Set dst (ReplicateB zero));
1832  format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
1833  ins_encode %{
1834    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1835  %}
1836  ins_pipe( fpu_reg_reg );
1837%}
1838
1839instruct Repl32B_zero(vecY dst, immI0 zero) %{
1840  predicate(n->as_Vector()->length() == 32);
1841  match(Set dst (ReplicateB zero));
1842  format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
1843  ins_encode %{
1844    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1845    bool vector256 = true;
1846    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1847  %}
1848  ins_pipe( fpu_reg_reg );
1849%}
1850
1851// Replicate char/short (2 byte) scalar to be vector
1852instruct Repl2S(vecS dst, rRegI src) %{
1853  predicate(n->as_Vector()->length() == 2);
1854  match(Set dst (ReplicateS src));
1855  format %{ "movd    $dst,$src\n\t"
1856            "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1857  ins_encode %{
1858    __ movdl($dst$$XMMRegister, $src$$Register);
1859    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1860  %}
1861  ins_pipe( fpu_reg_reg );
1862%}
1863
1864instruct Repl4S(vecD dst, rRegI src) %{
1865  predicate(n->as_Vector()->length() == 4);
1866  match(Set dst (ReplicateS src));
1867  format %{ "movd    $dst,$src\n\t"
1868            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
1869  ins_encode %{
1870    __ movdl($dst$$XMMRegister, $src$$Register);
1871    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1872  %}
1873  ins_pipe( fpu_reg_reg );
1874%}
1875
1876instruct Repl8S(vecX dst, rRegI src) %{
1877  predicate(n->as_Vector()->length() == 8);
1878  match(Set dst (ReplicateS src));
1879  format %{ "movd    $dst,$src\n\t"
1880            "pshuflw $dst,$dst,0x00\n\t"
1881            "punpcklqdq $dst,$dst\t! replicate8S" %}
1882  ins_encode %{
1883    __ movdl($dst$$XMMRegister, $src$$Register);
1884    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1885    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1886  %}
1887  ins_pipe( pipe_slow );
1888%}
1889
1890instruct Repl16S(vecY dst, rRegI src) %{
1891  predicate(n->as_Vector()->length() == 16);
1892  match(Set dst (ReplicateS src));
1893  format %{ "movd    $dst,$src\n\t"
1894            "pshuflw $dst,$dst,0x00\n\t"
1895            "punpcklqdq $dst,$dst\n\t"
1896            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
1897  ins_encode %{
1898    __ movdl($dst$$XMMRegister, $src$$Register);
1899    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1900    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1901    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1902  %}
1903  ins_pipe( pipe_slow );
1904%}
1905
1906// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
1907instruct Repl2S_imm(vecS dst, immI con) %{
1908  predicate(n->as_Vector()->length() == 2);
1909  match(Set dst (ReplicateS con));
1910  format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
1911  ins_encode %{
1912    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
1913  %}
1914  ins_pipe( fpu_reg_reg );
1915%}
1916
1917instruct Repl4S_imm(vecD dst, immI con) %{
1918  predicate(n->as_Vector()->length() == 4);
1919  match(Set dst (ReplicateS con));
1920  format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
1921  ins_encode %{
1922    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1923  %}
1924  ins_pipe( fpu_reg_reg );
1925%}
1926
1927instruct Repl8S_imm(vecX dst, immI con) %{
1928  predicate(n->as_Vector()->length() == 8);
1929  match(Set dst (ReplicateS con));
1930  format %{ "movq    $dst,[$constantaddress]\n\t"
1931            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
1932  ins_encode %{
1933    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1934    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1935  %}
1936  ins_pipe( pipe_slow );
1937%}
1938
1939instruct Repl16S_imm(vecY dst, immI con) %{
1940  predicate(n->as_Vector()->length() == 16);
1941  match(Set dst (ReplicateS con));
1942  format %{ "movq    $dst,[$constantaddress]\n\t"
1943            "punpcklqdq $dst,$dst\n\t"
1944            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
1945  ins_encode %{
1946    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1947    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1948    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1949  %}
1950  ins_pipe( pipe_slow );
1951%}
1952
1953// Replicate char/short (2 byte) scalar zero to be vector
1954instruct Repl2S_zero(vecS dst, immI0 zero) %{
1955  predicate(n->as_Vector()->length() == 2);
1956  match(Set dst (ReplicateS zero));
1957  format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
1958  ins_encode %{
1959    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1960  %}
1961  ins_pipe( fpu_reg_reg );
1962%}
1963
1964instruct Repl4S_zero(vecD dst, immI0 zero) %{
1965  predicate(n->as_Vector()->length() == 4);
1966  match(Set dst (ReplicateS zero));
1967  format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
1968  ins_encode %{
1969    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1970  %}
1971  ins_pipe( fpu_reg_reg );
1972%}
1973
1974instruct Repl8S_zero(vecX dst, immI0 zero) %{
1975  predicate(n->as_Vector()->length() == 8);
1976  match(Set dst (ReplicateS zero));
1977  format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
1978  ins_encode %{
1979    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1980  %}
1981  ins_pipe( fpu_reg_reg );
1982%}
1983
1984instruct Repl16S_zero(vecY dst, immI0 zero) %{
1985  predicate(n->as_Vector()->length() == 16);
1986  match(Set dst (ReplicateS zero));
1987  format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
1988  ins_encode %{
1989    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1990    bool vector256 = true;
1991    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1992  %}
1993  ins_pipe( fpu_reg_reg );
1994%}
1995
1996// Replicate integer (4 byte) scalar to be vector
1997instruct Repl2I(vecD dst, rRegI src) %{
1998  predicate(n->as_Vector()->length() == 2);
1999  match(Set dst (ReplicateI src));
2000  format %{ "movd    $dst,$src\n\t"
2001            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2002  ins_encode %{
2003    __ movdl($dst$$XMMRegister, $src$$Register);
2004    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2005  %}
2006  ins_pipe( fpu_reg_reg );
2007%}
2008
2009instruct Repl4I(vecX dst, rRegI src) %{
2010  predicate(n->as_Vector()->length() == 4);
2011  match(Set dst (ReplicateI src));
2012  format %{ "movd    $dst,$src\n\t"
2013            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2014  ins_encode %{
2015    __ movdl($dst$$XMMRegister, $src$$Register);
2016    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2017  %}
2018  ins_pipe( pipe_slow );
2019%}
2020
2021instruct Repl8I(vecY dst, rRegI src) %{
2022  predicate(n->as_Vector()->length() == 8);
2023  match(Set dst (ReplicateI src));
2024  format %{ "movd    $dst,$src\n\t"
2025            "pshufd  $dst,$dst,0x00\n\t"
2026            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2027  ins_encode %{
2028    __ movdl($dst$$XMMRegister, $src$$Register);
2029    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2030    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2031  %}
2032  ins_pipe( pipe_slow );
2033%}
2034
2035// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2036instruct Repl2I_imm(vecD dst, immI con) %{
2037  predicate(n->as_Vector()->length() == 2);
2038  match(Set dst (ReplicateI con));
2039  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
2040  ins_encode %{
2041    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2042  %}
2043  ins_pipe( fpu_reg_reg );
2044%}
2045
2046instruct Repl4I_imm(vecX dst, immI con) %{
2047  predicate(n->as_Vector()->length() == 4);
2048  match(Set dst (ReplicateI con));
2049  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2050            "punpcklqdq $dst,$dst" %}
2051  ins_encode %{
2052    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2053    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2054  %}
2055  ins_pipe( pipe_slow );
2056%}
2057
2058instruct Repl8I_imm(vecY dst, immI con) %{
2059  predicate(n->as_Vector()->length() == 8);
2060  match(Set dst (ReplicateI con));
2061  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2062            "punpcklqdq $dst,$dst\n\t"
2063            "vinserti128h $dst,$dst,$dst" %}
2064  ins_encode %{
2065    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2066    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2067    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2068  %}
2069  ins_pipe( pipe_slow );
2070%}
2071
2072// Integer could be loaded into xmm register directly from memory.
2073instruct Repl2I_mem(vecD dst, memory mem) %{
2074  predicate(n->as_Vector()->length() == 2);
2075  match(Set dst (ReplicateI (LoadI mem)));
2076  format %{ "movd    $dst,$mem\n\t"
2077            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2078  ins_encode %{
2079    __ movdl($dst$$XMMRegister, $mem$$Address);
2080    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2081  %}
2082  ins_pipe( fpu_reg_reg );
2083%}
2084
2085instruct Repl4I_mem(vecX dst, memory mem) %{
2086  predicate(n->as_Vector()->length() == 4);
2087  match(Set dst (ReplicateI (LoadI mem)));
2088  format %{ "movd    $dst,$mem\n\t"
2089            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2090  ins_encode %{
2091    __ movdl($dst$$XMMRegister, $mem$$Address);
2092    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2093  %}
2094  ins_pipe( pipe_slow );
2095%}
2096
2097instruct Repl8I_mem(vecY dst, memory mem) %{
2098  predicate(n->as_Vector()->length() == 8);
2099  match(Set dst (ReplicateI (LoadI mem)));
2100  format %{ "movd    $dst,$mem\n\t"
2101            "pshufd  $dst,$dst,0x00\n\t"
2102            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2103  ins_encode %{
2104    __ movdl($dst$$XMMRegister, $mem$$Address);
2105    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2106    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2107  %}
2108  ins_pipe( pipe_slow );
2109%}
2110
2111// Replicate integer (4 byte) scalar zero to be vector
2112instruct Repl2I_zero(vecD dst, immI0 zero) %{
2113  predicate(n->as_Vector()->length() == 2);
2114  match(Set dst (ReplicateI zero));
2115  format %{ "pxor    $dst,$dst\t! replicate2I" %}
2116  ins_encode %{
2117    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2118  %}
2119  ins_pipe( fpu_reg_reg );
2120%}
2121
2122instruct Repl4I_zero(vecX dst, immI0 zero) %{
2123  predicate(n->as_Vector()->length() == 4);
2124  match(Set dst (ReplicateI zero));
2125  format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
2126  ins_encode %{
2127    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2128  %}
2129  ins_pipe( fpu_reg_reg );
2130%}
2131
2132instruct Repl8I_zero(vecY dst, immI0 zero) %{
2133  predicate(n->as_Vector()->length() == 8);
2134  match(Set dst (ReplicateI zero));
2135  format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
2136  ins_encode %{
2137    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2138    bool vector256 = true;
2139    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2140  %}
2141  ins_pipe( fpu_reg_reg );
2142%}
2143
2144// Replicate long (8 byte) scalar to be vector
2145#ifdef _LP64
2146instruct Repl2L(vecX dst, rRegL src) %{
2147  predicate(n->as_Vector()->length() == 2);
2148  match(Set dst (ReplicateL src));
2149  format %{ "movdq   $dst,$src\n\t"
2150            "punpcklqdq $dst,$dst\t! replicate2L" %}
2151  ins_encode %{
2152    __ movdq($dst$$XMMRegister, $src$$Register);
2153    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2154  %}
2155  ins_pipe( pipe_slow );
2156%}
2157
2158instruct Repl4L(vecY dst, rRegL src) %{
2159  predicate(n->as_Vector()->length() == 4);
2160  match(Set dst (ReplicateL src));
2161  format %{ "movdq   $dst,$src\n\t"
2162            "punpcklqdq $dst,$dst\n\t"
2163            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2164  ins_encode %{
2165    __ movdq($dst$$XMMRegister, $src$$Register);
2166    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2167    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2168  %}
2169  ins_pipe( pipe_slow );
2170%}
2171#else // _LP64
2172instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2173  predicate(n->as_Vector()->length() == 2);
2174  match(Set dst (ReplicateL src));
2175  effect(TEMP dst, USE src, TEMP tmp);
2176  format %{ "movdl   $dst,$src.lo\n\t"
2177            "movdl   $tmp,$src.hi\n\t"
2178            "punpckldq $dst,$tmp\n\t"
2179            "punpcklqdq $dst,$dst\t! replicate2L"%}
2180  ins_encode %{
2181    __ movdl($dst$$XMMRegister, $src$$Register);
2182    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2183    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2184    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2185  %}
2186  ins_pipe( pipe_slow );
2187%}
2188
2189instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2190  predicate(n->as_Vector()->length() == 4);
2191  match(Set dst (ReplicateL src));
2192  effect(TEMP dst, USE src, TEMP tmp);
2193  format %{ "movdl   $dst,$src.lo\n\t"
2194            "movdl   $tmp,$src.hi\n\t"
2195            "punpckldq $dst,$tmp\n\t"
2196            "punpcklqdq $dst,$dst\n\t"
2197            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2198  ins_encode %{
2199    __ movdl($dst$$XMMRegister, $src$$Register);
2200    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2201    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2202    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2203    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2204  %}
2205  ins_pipe( pipe_slow );
2206%}
2207#endif // _LP64
2208
2209// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2210instruct Repl2L_imm(vecX dst, immL con) %{
2211  predicate(n->as_Vector()->length() == 2);
2212  match(Set dst (ReplicateL con));
2213  format %{ "movq    $dst,[$constantaddress]\n\t"
2214            "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2215  ins_encode %{
2216    __ movq($dst$$XMMRegister, $constantaddress($con));
2217    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2218  %}
2219  ins_pipe( pipe_slow );
2220%}
2221
2222instruct Repl4L_imm(vecY dst, immL con) %{
2223  predicate(n->as_Vector()->length() == 4);
2224  match(Set dst (ReplicateL con));
2225  format %{ "movq    $dst,[$constantaddress]\n\t"
2226            "punpcklqdq $dst,$dst\n\t"
2227            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2228  ins_encode %{
2229    __ movq($dst$$XMMRegister, $constantaddress($con));
2230    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2231    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2232  %}
2233  ins_pipe( pipe_slow );
2234%}
2235
2236// Long could be loaded into xmm register directly from memory.
2237instruct Repl2L_mem(vecX dst, memory mem) %{
2238  predicate(n->as_Vector()->length() == 2);
2239  match(Set dst (ReplicateL (LoadL mem)));
2240  format %{ "movq    $dst,$mem\n\t"
2241            "punpcklqdq $dst,$dst\t! replicate2L" %}
2242  ins_encode %{
2243    __ movq($dst$$XMMRegister, $mem$$Address);
2244    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2245  %}
2246  ins_pipe( pipe_slow );
2247%}
2248
2249instruct Repl4L_mem(vecY dst, memory mem) %{
2250  predicate(n->as_Vector()->length() == 4);
2251  match(Set dst (ReplicateL (LoadL mem)));
2252  format %{ "movq    $dst,$mem\n\t"
2253            "punpcklqdq $dst,$dst\n\t"
2254            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2255  ins_encode %{
2256    __ movq($dst$$XMMRegister, $mem$$Address);
2257    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2258    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2259  %}
2260  ins_pipe( pipe_slow );
2261%}
2262
2263// Replicate long (8 byte) scalar zero to be vector
2264instruct Repl2L_zero(vecX dst, immL0 zero) %{
2265  predicate(n->as_Vector()->length() == 2);
2266  match(Set dst (ReplicateL zero));
2267  format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
2268  ins_encode %{
2269    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2270  %}
2271  ins_pipe( fpu_reg_reg );
2272%}
2273
2274instruct Repl4L_zero(vecY dst, immL0 zero) %{
2275  predicate(n->as_Vector()->length() == 4);
2276  match(Set dst (ReplicateL zero));
2277  format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
2278  ins_encode %{
2279    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2280    bool vector256 = true;
2281    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2282  %}
2283  ins_pipe( fpu_reg_reg );
2284%}
2285
2286// Replicate float (4 byte) scalar to be vector
2287instruct Repl2F(vecD dst, regF src) %{
2288  predicate(n->as_Vector()->length() == 2);
2289  match(Set dst (ReplicateF src));
2290  format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
2291  ins_encode %{
2292    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2293  %}
2294  ins_pipe( fpu_reg_reg );
2295%}
2296
2297instruct Repl4F(vecX dst, regF src) %{
2298  predicate(n->as_Vector()->length() == 4);
2299  match(Set dst (ReplicateF src));
2300  format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
2301  ins_encode %{
2302    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2303  %}
2304  ins_pipe( pipe_slow );
2305%}
2306
2307instruct Repl8F(vecY dst, regF src) %{
2308  predicate(n->as_Vector()->length() == 8);
2309  match(Set dst (ReplicateF src));
2310  format %{ "pshufd  $dst,$src,0x00\n\t"
2311            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2312  ins_encode %{
2313    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2314    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2315  %}
2316  ins_pipe( pipe_slow );
2317%}
2318
2319// Replicate float (4 byte) scalar zero to be vector
2320instruct Repl2F_zero(vecD dst, immF0 zero) %{
2321  predicate(n->as_Vector()->length() == 2);
2322  match(Set dst (ReplicateF zero));
2323  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
2324  ins_encode %{
2325    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2326  %}
2327  ins_pipe( fpu_reg_reg );
2328%}
2329
2330instruct Repl4F_zero(vecX dst, immF0 zero) %{
2331  predicate(n->as_Vector()->length() == 4);
2332  match(Set dst (ReplicateF zero));
2333  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
2334  ins_encode %{
2335    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2336  %}
2337  ins_pipe( fpu_reg_reg );
2338%}
2339
2340instruct Repl8F_zero(vecY dst, immF0 zero) %{
2341  predicate(n->as_Vector()->length() == 8);
2342  match(Set dst (ReplicateF zero));
2343  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
2344  ins_encode %{
2345    bool vector256 = true;
2346    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2347  %}
2348  ins_pipe( fpu_reg_reg );
2349%}
2350
2351// Replicate double (8 bytes) scalar to be vector
2352instruct Repl2D(vecX dst, regD src) %{
2353  predicate(n->as_Vector()->length() == 2);
2354  match(Set dst (ReplicateD src));
2355  format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
2356  ins_encode %{
2357    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2358  %}
2359  ins_pipe( pipe_slow );
2360%}
2361
2362instruct Repl4D(vecY dst, regD src) %{
2363  predicate(n->as_Vector()->length() == 4);
2364  match(Set dst (ReplicateD src));
2365  format %{ "pshufd  $dst,$src,0x44\n\t"
2366            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2367  ins_encode %{
2368    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2369    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2370  %}
2371  ins_pipe( pipe_slow );
2372%}
2373
2374// Replicate double (8 byte) scalar zero to be vector
2375instruct Repl2D_zero(vecX dst, immD0 zero) %{
2376  predicate(n->as_Vector()->length() == 2);
2377  match(Set dst (ReplicateD zero));
2378  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
2379  ins_encode %{
2380    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2381  %}
2382  ins_pipe( fpu_reg_reg );
2383%}
2384
2385instruct Repl4D_zero(vecY dst, immD0 zero) %{
2386  predicate(n->as_Vector()->length() == 4);
2387  match(Set dst (ReplicateD zero));
2388  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2389  ins_encode %{
2390    bool vector256 = true;
2391    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2392  %}
2393  ins_pipe( fpu_reg_reg );
2394%}
2395
2396// ====================VECTOR ARITHMETIC=======================================
2397
2398// --------------------------------- ADD --------------------------------------
2399
2400// Bytes vector add
2401instruct vadd4B(vecS dst, vecS src) %{
2402  predicate(n->as_Vector()->length() == 4);
2403  match(Set dst (AddVB dst src));
2404  format %{ "paddb   $dst,$src\t! add packed4B" %}
2405  ins_encode %{
2406    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2407  %}
2408  ins_pipe( pipe_slow );
2409%}
2410
2411instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2412  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2413  match(Set dst (AddVB src1 src2));
2414  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
2415  ins_encode %{
2416    bool vector256 = false;
2417    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2418  %}
2419  ins_pipe( pipe_slow );
2420%}
2421
2422instruct vadd8B(vecD dst, vecD src) %{
2423  predicate(n->as_Vector()->length() == 8);
2424  match(Set dst (AddVB dst src));
2425  format %{ "paddb   $dst,$src\t! add packed8B" %}
2426  ins_encode %{
2427    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2428  %}
2429  ins_pipe( pipe_slow );
2430%}
2431
2432instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2433  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2434  match(Set dst (AddVB src1 src2));
2435  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
2436  ins_encode %{
2437    bool vector256 = false;
2438    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2439  %}
2440  ins_pipe( pipe_slow );
2441%}
2442
2443instruct vadd16B(vecX dst, vecX src) %{
2444  predicate(n->as_Vector()->length() == 16);
2445  match(Set dst (AddVB dst src));
2446  format %{ "paddb   $dst,$src\t! add packed16B" %}
2447  ins_encode %{
2448    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2449  %}
2450  ins_pipe( pipe_slow );
2451%}
2452
2453instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2454  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2455  match(Set dst (AddVB src1 src2));
2456  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
2457  ins_encode %{
2458    bool vector256 = false;
2459    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2460  %}
2461  ins_pipe( pipe_slow );
2462%}
2463
2464instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2465  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2466  match(Set dst (AddVB src (LoadVector mem)));
2467  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
2468  ins_encode %{
2469    bool vector256 = false;
2470    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2471  %}
2472  ins_pipe( pipe_slow );
2473%}
2474
2475instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2476  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2477  match(Set dst (AddVB src1 src2));
2478  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
2479  ins_encode %{
2480    bool vector256 = true;
2481    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2482  %}
2483  ins_pipe( pipe_slow );
2484%}
2485
2486instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2487  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2488  match(Set dst (AddVB src (LoadVector mem)));
2489  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
2490  ins_encode %{
2491    bool vector256 = true;
2492    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2493  %}
2494  ins_pipe( pipe_slow );
2495%}
2496
2497// Shorts/Chars vector add
2498instruct vadd2S(vecS dst, vecS src) %{
2499  predicate(n->as_Vector()->length() == 2);
2500  match(Set dst (AddVS dst src));
2501  format %{ "paddw   $dst,$src\t! add packed2S" %}
2502  ins_encode %{
2503    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2504  %}
2505  ins_pipe( pipe_slow );
2506%}
2507
2508instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2509  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2510  match(Set dst (AddVS src1 src2));
2511  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
2512  ins_encode %{
2513    bool vector256 = false;
2514    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2515  %}
2516  ins_pipe( pipe_slow );
2517%}
2518
2519instruct vadd4S(vecD dst, vecD src) %{
2520  predicate(n->as_Vector()->length() == 4);
2521  match(Set dst (AddVS dst src));
2522  format %{ "paddw   $dst,$src\t! add packed4S" %}
2523  ins_encode %{
2524    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2525  %}
2526  ins_pipe( pipe_slow );
2527%}
2528
2529instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2530  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2531  match(Set dst (AddVS src1 src2));
2532  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
2533  ins_encode %{
2534    bool vector256 = false;
2535    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2536  %}
2537  ins_pipe( pipe_slow );
2538%}
2539
2540instruct vadd8S(vecX dst, vecX src) %{
2541  predicate(n->as_Vector()->length() == 8);
2542  match(Set dst (AddVS dst src));
2543  format %{ "paddw   $dst,$src\t! add packed8S" %}
2544  ins_encode %{
2545    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2546  %}
2547  ins_pipe( pipe_slow );
2548%}
2549
2550instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2551  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2552  match(Set dst (AddVS src1 src2));
2553  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
2554  ins_encode %{
2555    bool vector256 = false;
2556    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2557  %}
2558  ins_pipe( pipe_slow );
2559%}
2560
2561instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2562  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2563  match(Set dst (AddVS src (LoadVector mem)));
2564  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
2565  ins_encode %{
2566    bool vector256 = false;
2567    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2568  %}
2569  ins_pipe( pipe_slow );
2570%}
2571
2572instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2573  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2574  match(Set dst (AddVS src1 src2));
2575  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
2576  ins_encode %{
2577    bool vector256 = true;
2578    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2579  %}
2580  ins_pipe( pipe_slow );
2581%}
2582
2583instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2584  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2585  match(Set dst (AddVS src (LoadVector mem)));
2586  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
2587  ins_encode %{
2588    bool vector256 = true;
2589    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2590  %}
2591  ins_pipe( pipe_slow );
2592%}
2593
2594// Integers vector add
2595instruct vadd2I(vecD dst, vecD src) %{
2596  predicate(n->as_Vector()->length() == 2);
2597  match(Set dst (AddVI dst src));
2598  format %{ "paddd   $dst,$src\t! add packed2I" %}
2599  ins_encode %{
2600    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2601  %}
2602  ins_pipe( pipe_slow );
2603%}
2604
2605instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2606  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2607  match(Set dst (AddVI src1 src2));
2608  format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
2609  ins_encode %{
2610    bool vector256 = false;
2611    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2612  %}
2613  ins_pipe( pipe_slow );
2614%}
2615
2616instruct vadd4I(vecX dst, vecX src) %{
2617  predicate(n->as_Vector()->length() == 4);
2618  match(Set dst (AddVI dst src));
2619  format %{ "paddd   $dst,$src\t! add packed4I" %}
2620  ins_encode %{
2621    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2622  %}
2623  ins_pipe( pipe_slow );
2624%}
2625
2626instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2627  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2628  match(Set dst (AddVI src1 src2));
2629  format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
2630  ins_encode %{
2631    bool vector256 = false;
2632    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2633  %}
2634  ins_pipe( pipe_slow );
2635%}
2636
2637instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2638  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2639  match(Set dst (AddVI src (LoadVector mem)));
2640  format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
2641  ins_encode %{
2642    bool vector256 = false;
2643    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2644  %}
2645  ins_pipe( pipe_slow );
2646%}
2647
2648instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2649  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2650  match(Set dst (AddVI src1 src2));
2651  format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
2652  ins_encode %{
2653    bool vector256 = true;
2654    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2655  %}
2656  ins_pipe( pipe_slow );
2657%}
2658
2659instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2660  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2661  match(Set dst (AddVI src (LoadVector mem)));
2662  format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
2663  ins_encode %{
2664    bool vector256 = true;
2665    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2666  %}
2667  ins_pipe( pipe_slow );
2668%}
2669
2670// Longs vector add
2671instruct vadd2L(vecX dst, vecX src) %{
2672  predicate(n->as_Vector()->length() == 2);
2673  match(Set dst (AddVL dst src));
2674  format %{ "paddq   $dst,$src\t! add packed2L" %}
2675  ins_encode %{
2676    __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2677  %}
2678  ins_pipe( pipe_slow );
2679%}
2680
2681instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2682  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2683  match(Set dst (AddVL src1 src2));
2684  format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
2685  ins_encode %{
2686    bool vector256 = false;
2687    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2688  %}
2689  ins_pipe( pipe_slow );
2690%}
2691
2692instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2693  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2694  match(Set dst (AddVL src (LoadVector mem)));
2695  format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
2696  ins_encode %{
2697    bool vector256 = false;
2698    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2699  %}
2700  ins_pipe( pipe_slow );
2701%}
2702
2703instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2704  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2705  match(Set dst (AddVL src1 src2));
2706  format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
2707  ins_encode %{
2708    bool vector256 = true;
2709    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2710  %}
2711  ins_pipe( pipe_slow );
2712%}
2713
2714instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2715  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2716  match(Set dst (AddVL src (LoadVector mem)));
2717  format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
2718  ins_encode %{
2719    bool vector256 = true;
2720    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2721  %}
2722  ins_pipe( pipe_slow );
2723%}
2724
2725// Floats vector add
2726instruct vadd2F(vecD dst, vecD src) %{
2727  predicate(n->as_Vector()->length() == 2);
2728  match(Set dst (AddVF dst src));
2729  format %{ "addps   $dst,$src\t! add packed2F" %}
2730  ins_encode %{
2731    __ addps($dst$$XMMRegister, $src$$XMMRegister);
2732  %}
2733  ins_pipe( pipe_slow );
2734%}
2735
2736instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2737  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2738  match(Set dst (AddVF src1 src2));
2739  format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
2740  ins_encode %{
2741    bool vector256 = false;
2742    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2743  %}
2744  ins_pipe( pipe_slow );
2745%}
2746
2747instruct vadd4F(vecX dst, vecX src) %{
2748  predicate(n->as_Vector()->length() == 4);
2749  match(Set dst (AddVF dst src));
2750  format %{ "addps   $dst,$src\t! add packed4F" %}
2751  ins_encode %{
2752    __ addps($dst$$XMMRegister, $src$$XMMRegister);
2753  %}
2754  ins_pipe( pipe_slow );
2755%}
2756
2757instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2758  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2759  match(Set dst (AddVF src1 src2));
2760  format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
2761  ins_encode %{
2762    bool vector256 = false;
2763    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2764  %}
2765  ins_pipe( pipe_slow );
2766%}
2767
2768instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2769  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2770  match(Set dst (AddVF src (LoadVector mem)));
2771  format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
2772  ins_encode %{
2773    bool vector256 = false;
2774    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2775  %}
2776  ins_pipe( pipe_slow );
2777%}
2778
2779instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2780  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2781  match(Set dst (AddVF src1 src2));
2782  format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
2783  ins_encode %{
2784    bool vector256 = true;
2785    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2786  %}
2787  ins_pipe( pipe_slow );
2788%}
2789
2790instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2791  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2792  match(Set dst (AddVF src (LoadVector mem)));
2793  format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
2794  ins_encode %{
2795    bool vector256 = true;
2796    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2797  %}
2798  ins_pipe( pipe_slow );
2799%}
2800
2801// Doubles vector add
2802instruct vadd2D(vecX dst, vecX src) %{
2803  predicate(n->as_Vector()->length() == 2);
2804  match(Set dst (AddVD dst src));
2805  format %{ "addpd   $dst,$src\t! add packed2D" %}
2806  ins_encode %{
2807    __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2808  %}
2809  ins_pipe( pipe_slow );
2810%}
2811
2812instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2813  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2814  match(Set dst (AddVD src1 src2));
2815  format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
2816  ins_encode %{
2817    bool vector256 = false;
2818    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2819  %}
2820  ins_pipe( pipe_slow );
2821%}
2822
2823instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2824  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2825  match(Set dst (AddVD src (LoadVector mem)));
2826  format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
2827  ins_encode %{
2828    bool vector256 = false;
2829    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2830  %}
2831  ins_pipe( pipe_slow );
2832%}
2833
2834instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2835  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2836  match(Set dst (AddVD src1 src2));
2837  format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
2838  ins_encode %{
2839    bool vector256 = true;
2840    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2841  %}
2842  ins_pipe( pipe_slow );
2843%}
2844
2845instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2846  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2847  match(Set dst (AddVD src (LoadVector mem)));
2848  format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
2849  ins_encode %{
2850    bool vector256 = true;
2851    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2852  %}
2853  ins_pipe( pipe_slow );
2854%}
2855
2856// --------------------------------- SUB --------------------------------------
2857
2858// Bytes vector sub
2859instruct vsub4B(vecS dst, vecS src) %{
2860  predicate(n->as_Vector()->length() == 4);
2861  match(Set dst (SubVB dst src));
2862  format %{ "psubb   $dst,$src\t! sub packed4B" %}
2863  ins_encode %{
2864    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2865  %}
2866  ins_pipe( pipe_slow );
2867%}
2868
2869instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
2870  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2871  match(Set dst (SubVB src1 src2));
2872  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
2873  ins_encode %{
2874    bool vector256 = false;
2875    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2876  %}
2877  ins_pipe( pipe_slow );
2878%}
2879
2880instruct vsub8B(vecD dst, vecD src) %{
2881  predicate(n->as_Vector()->length() == 8);
2882  match(Set dst (SubVB dst src));
2883  format %{ "psubb   $dst,$src\t! sub packed8B" %}
2884  ins_encode %{
2885    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2886  %}
2887  ins_pipe( pipe_slow );
2888%}
2889
2890instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
2891  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2892  match(Set dst (SubVB src1 src2));
2893  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
2894  ins_encode %{
2895    bool vector256 = false;
2896    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2897  %}
2898  ins_pipe( pipe_slow );
2899%}
2900
2901instruct vsub16B(vecX dst, vecX src) %{
2902  predicate(n->as_Vector()->length() == 16);
2903  match(Set dst (SubVB dst src));
2904  format %{ "psubb   $dst,$src\t! sub packed16B" %}
2905  ins_encode %{
2906    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2907  %}
2908  ins_pipe( pipe_slow );
2909%}
2910
2911instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
2912  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2913  match(Set dst (SubVB src1 src2));
2914  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
2915  ins_encode %{
2916    bool vector256 = false;
2917    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2918  %}
2919  ins_pipe( pipe_slow );
2920%}
2921
2922instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
2923  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2924  match(Set dst (SubVB src (LoadVector mem)));
2925  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
2926  ins_encode %{
2927    bool vector256 = false;
2928    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2929  %}
2930  ins_pipe( pipe_slow );
2931%}
2932
2933instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
2934  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2935  match(Set dst (SubVB src1 src2));
2936  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
2937  ins_encode %{
2938    bool vector256 = true;
2939    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2940  %}
2941  ins_pipe( pipe_slow );
2942%}
2943
2944instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
2945  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2946  match(Set dst (SubVB src (LoadVector mem)));
2947  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
2948  ins_encode %{
2949    bool vector256 = true;
2950    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2951  %}
2952  ins_pipe( pipe_slow );
2953%}
2954
2955// Shorts/Chars vector sub
2956instruct vsub2S(vecS dst, vecS src) %{
2957  predicate(n->as_Vector()->length() == 2);
2958  match(Set dst (SubVS dst src));
2959  format %{ "psubw   $dst,$src\t! sub packed2S" %}
2960  ins_encode %{
2961    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2962  %}
2963  ins_pipe( pipe_slow );
2964%}
2965
2966instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
2967  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2968  match(Set dst (SubVS src1 src2));
2969  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
2970  ins_encode %{
2971    bool vector256 = false;
2972    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2973  %}
2974  ins_pipe( pipe_slow );
2975%}
2976
2977instruct vsub4S(vecD dst, vecD src) %{
2978  predicate(n->as_Vector()->length() == 4);
2979  match(Set dst (SubVS dst src));
2980  format %{ "psubw   $dst,$src\t! sub packed4S" %}
2981  ins_encode %{
2982    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
2983  %}
2984  ins_pipe( pipe_slow );
2985%}
2986
2987instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
2988  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2989  match(Set dst (SubVS src1 src2));
2990  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
2991  ins_encode %{
2992    bool vector256 = false;
2993    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2994  %}
2995  ins_pipe( pipe_slow );
2996%}
2997
2998instruct vsub8S(vecX dst, vecX src) %{
2999  predicate(n->as_Vector()->length() == 8);
3000  match(Set dst (SubVS dst src));
3001  format %{ "psubw   $dst,$src\t! sub packed8S" %}
3002  ins_encode %{
3003    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3004  %}
3005  ins_pipe( pipe_slow );
3006%}
3007
3008instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3009  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3010  match(Set dst (SubVS src1 src2));
3011  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
3012  ins_encode %{
3013    bool vector256 = false;
3014    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3015  %}
3016  ins_pipe( pipe_slow );
3017%}
3018
3019instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3020  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3021  match(Set dst (SubVS src (LoadVector mem)));
3022  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
3023  ins_encode %{
3024    bool vector256 = false;
3025    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3026  %}
3027  ins_pipe( pipe_slow );
3028%}
3029
3030instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3031  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3032  match(Set dst (SubVS src1 src2));
3033  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
3034  ins_encode %{
3035    bool vector256 = true;
3036    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3037  %}
3038  ins_pipe( pipe_slow );
3039%}
3040
3041instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3042  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3043  match(Set dst (SubVS src (LoadVector mem)));
3044  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
3045  ins_encode %{
3046    bool vector256 = true;
3047    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3048  %}
3049  ins_pipe( pipe_slow );
3050%}
3051
3052// Integers vector sub
3053instruct vsub2I(vecD dst, vecD src) %{
3054  predicate(n->as_Vector()->length() == 2);
3055  match(Set dst (SubVI dst src));
3056  format %{ "psubd   $dst,$src\t! sub packed2I" %}
3057  ins_encode %{
3058    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3059  %}
3060  ins_pipe( pipe_slow );
3061%}
3062
3063instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3064  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3065  match(Set dst (SubVI src1 src2));
3066  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
3067  ins_encode %{
3068    bool vector256 = false;
3069    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3070  %}
3071  ins_pipe( pipe_slow );
3072%}
3073
3074instruct vsub4I(vecX dst, vecX src) %{
3075  predicate(n->as_Vector()->length() == 4);
3076  match(Set dst (SubVI dst src));
3077  format %{ "psubd   $dst,$src\t! sub packed4I" %}
3078  ins_encode %{
3079    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3080  %}
3081  ins_pipe( pipe_slow );
3082%}
3083
3084instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3085  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3086  match(Set dst (SubVI src1 src2));
3087  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
3088  ins_encode %{
3089    bool vector256 = false;
3090    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3091  %}
3092  ins_pipe( pipe_slow );
3093%}
3094
3095instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3096  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3097  match(Set dst (SubVI src (LoadVector mem)));
3098  format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
3099  ins_encode %{
3100    bool vector256 = false;
3101    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3102  %}
3103  ins_pipe( pipe_slow );
3104%}
3105
3106instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3107  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3108  match(Set dst (SubVI src1 src2));
3109  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
3110  ins_encode %{
3111    bool vector256 = true;
3112    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3113  %}
3114  ins_pipe( pipe_slow );
3115%}
3116
3117instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3118  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3119  match(Set dst (SubVI src (LoadVector mem)));
3120  format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
3121  ins_encode %{
3122    bool vector256 = true;
3123    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3124  %}
3125  ins_pipe( pipe_slow );
3126%}
3127
3128// Longs vector sub
3129instruct vsub2L(vecX dst, vecX src) %{
3130  predicate(n->as_Vector()->length() == 2);
3131  match(Set dst (SubVL dst src));
3132  format %{ "psubq   $dst,$src\t! sub packed2L" %}
3133  ins_encode %{
3134    __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3135  %}
3136  ins_pipe( pipe_slow );
3137%}
3138
3139instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3140  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3141  match(Set dst (SubVL src1 src2));
3142  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
3143  ins_encode %{
3144    bool vector256 = false;
3145    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3146  %}
3147  ins_pipe( pipe_slow );
3148%}
3149
3150instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3151  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3152  match(Set dst (SubVL src (LoadVector mem)));
3153  format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
3154  ins_encode %{
3155    bool vector256 = false;
3156    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3157  %}
3158  ins_pipe( pipe_slow );
3159%}
3160
3161instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3162  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3163  match(Set dst (SubVL src1 src2));
3164  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
3165  ins_encode %{
3166    bool vector256 = true;
3167    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3168  %}
3169  ins_pipe( pipe_slow );
3170%}
3171
3172instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3173  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3174  match(Set dst (SubVL src (LoadVector mem)));
3175  format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
3176  ins_encode %{
3177    bool vector256 = true;
3178    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3179  %}
3180  ins_pipe( pipe_slow );
3181%}
3182
3183// Floats vector sub
3184instruct vsub2F(vecD dst, vecD src) %{
3185  predicate(n->as_Vector()->length() == 2);
3186  match(Set dst (SubVF dst src));
3187  format %{ "subps   $dst,$src\t! sub packed2F" %}
3188  ins_encode %{
3189    __ subps($dst$$XMMRegister, $src$$XMMRegister);
3190  %}
3191  ins_pipe( pipe_slow );
3192%}
3193
3194instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3195  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3196  match(Set dst (SubVF src1 src2));
3197  format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
3198  ins_encode %{
3199    bool vector256 = false;
3200    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3201  %}
3202  ins_pipe( pipe_slow );
3203%}
3204
3205instruct vsub4F(vecX dst, vecX src) %{
3206  predicate(n->as_Vector()->length() == 4);
3207  match(Set dst (SubVF dst src));
3208  format %{ "subps   $dst,$src\t! sub packed4F" %}
3209  ins_encode %{
3210    __ subps($dst$$XMMRegister, $src$$XMMRegister);
3211  %}
3212  ins_pipe( pipe_slow );
3213%}
3214
3215instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3216  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3217  match(Set dst (SubVF src1 src2));
3218  format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
3219  ins_encode %{
3220    bool vector256 = false;
3221    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3222  %}
3223  ins_pipe( pipe_slow );
3224%}
3225
3226instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3227  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3228  match(Set dst (SubVF src (LoadVector mem)));
3229  format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
3230  ins_encode %{
3231    bool vector256 = false;
3232    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3233  %}
3234  ins_pipe( pipe_slow );
3235%}
3236
3237instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3238  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3239  match(Set dst (SubVF src1 src2));
3240  format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
3241  ins_encode %{
3242    bool vector256 = true;
3243    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3244  %}
3245  ins_pipe( pipe_slow );
3246%}
3247
3248instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3249  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3250  match(Set dst (SubVF src (LoadVector mem)));
3251  format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
3252  ins_encode %{
3253    bool vector256 = true;
3254    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3255  %}
3256  ins_pipe( pipe_slow );
3257%}
3258
3259// Doubles vector sub
3260instruct vsub2D(vecX dst, vecX src) %{
3261  predicate(n->as_Vector()->length() == 2);
3262  match(Set dst (SubVD dst src));
3263  format %{ "subpd   $dst,$src\t! sub packed2D" %}
3264  ins_encode %{
3265    __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3266  %}
3267  ins_pipe( pipe_slow );
3268%}
3269
3270instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3271  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3272  match(Set dst (SubVD src1 src2));
3273  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
3274  ins_encode %{
3275    bool vector256 = false;
3276    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3277  %}
3278  ins_pipe( pipe_slow );
3279%}
3280
3281instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3282  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3283  match(Set dst (SubVD src (LoadVector mem)));
3284  format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
3285  ins_encode %{
3286    bool vector256 = false;
3287    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3288  %}
3289  ins_pipe( pipe_slow );
3290%}
3291
3292instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3293  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3294  match(Set dst (SubVD src1 src2));
3295  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
3296  ins_encode %{
3297    bool vector256 = true;
3298    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3299  %}
3300  ins_pipe( pipe_slow );
3301%}
3302
3303instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3304  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3305  match(Set dst (SubVD src (LoadVector mem)));
3306  format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
3307  ins_encode %{
3308    bool vector256 = true;
3309    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3310  %}
3311  ins_pipe( pipe_slow );
3312%}
3313
3314// --------------------------------- MUL --------------------------------------
3315
3316// Shorts/Chars vector mul
3317instruct vmul2S(vecS dst, vecS src) %{
3318  predicate(n->as_Vector()->length() == 2);
3319  match(Set dst (MulVS dst src));
3320  format %{ "pmullw $dst,$src\t! mul packed2S" %}
3321  ins_encode %{
3322    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3323  %}
3324  ins_pipe( pipe_slow );
3325%}
3326
3327instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3328  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3329  match(Set dst (MulVS src1 src2));
3330  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3331  ins_encode %{
3332    bool vector256 = false;
3333    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3334  %}
3335  ins_pipe( pipe_slow );
3336%}
3337
3338instruct vmul4S(vecD dst, vecD src) %{
3339  predicate(n->as_Vector()->length() == 4);
3340  match(Set dst (MulVS dst src));
3341  format %{ "pmullw  $dst,$src\t! mul packed4S" %}
3342  ins_encode %{
3343    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3344  %}
3345  ins_pipe( pipe_slow );
3346%}
3347
3348instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3349  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3350  match(Set dst (MulVS src1 src2));
3351  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3352  ins_encode %{
3353    bool vector256 = false;
3354    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3355  %}
3356  ins_pipe( pipe_slow );
3357%}
3358
3359instruct vmul8S(vecX dst, vecX src) %{
3360  predicate(n->as_Vector()->length() == 8);
3361  match(Set dst (MulVS dst src));
3362  format %{ "pmullw  $dst,$src\t! mul packed8S" %}
3363  ins_encode %{
3364    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3365  %}
3366  ins_pipe( pipe_slow );
3367%}
3368
3369instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3370  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3371  match(Set dst (MulVS src1 src2));
3372  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3373  ins_encode %{
3374    bool vector256 = false;
3375    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3376  %}
3377  ins_pipe( pipe_slow );
3378%}
3379
3380instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3381  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3382  match(Set dst (MulVS src (LoadVector mem)));
3383  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3384  ins_encode %{
3385    bool vector256 = false;
3386    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3387  %}
3388  ins_pipe( pipe_slow );
3389%}
3390
3391instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3392  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3393  match(Set dst (MulVS src1 src2));
3394  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3395  ins_encode %{
3396    bool vector256 = true;
3397    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3398  %}
3399  ins_pipe( pipe_slow );
3400%}
3401
3402instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3403  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3404  match(Set dst (MulVS src (LoadVector mem)));
3405  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3406  ins_encode %{
3407    bool vector256 = true;
3408    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3409  %}
3410  ins_pipe( pipe_slow );
3411%}
3412
3413// Integers vector mul (sse4_1)
3414instruct vmul2I(vecD dst, vecD src) %{
3415  predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3416  match(Set dst (MulVI dst src));
3417  format %{ "pmulld  $dst,$src\t! mul packed2I" %}
3418  ins_encode %{
3419    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3420  %}
3421  ins_pipe( pipe_slow );
3422%}
3423
3424instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3425  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3426  match(Set dst (MulVI src1 src2));
3427  format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3428  ins_encode %{
3429    bool vector256 = false;
3430    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3431  %}
3432  ins_pipe( pipe_slow );
3433%}
3434
3435instruct vmul4I(vecX dst, vecX src) %{
3436  predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3437  match(Set dst (MulVI dst src));
3438  format %{ "pmulld  $dst,$src\t! mul packed4I" %}
3439  ins_encode %{
3440    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3441  %}
3442  ins_pipe( pipe_slow );
3443%}
3444
3445instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3446  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3447  match(Set dst (MulVI src1 src2));
3448  format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3449  ins_encode %{
3450    bool vector256 = false;
3451    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3452  %}
3453  ins_pipe( pipe_slow );
3454%}
3455
3456instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3457  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3458  match(Set dst (MulVI src (LoadVector mem)));
3459  format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3460  ins_encode %{
3461    bool vector256 = false;
3462    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3463  %}
3464  ins_pipe( pipe_slow );
3465%}
3466
3467instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3468  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3469  match(Set dst (MulVI src1 src2));
3470  format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3471  ins_encode %{
3472    bool vector256 = true;
3473    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3474  %}
3475  ins_pipe( pipe_slow );
3476%}
3477
3478instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3479  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3480  match(Set dst (MulVI src (LoadVector mem)));
3481  format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3482  ins_encode %{
3483    bool vector256 = true;
3484    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3485  %}
3486  ins_pipe( pipe_slow );
3487%}
3488
3489// Floats vector mul
3490instruct vmul2F(vecD dst, vecD src) %{
3491  predicate(n->as_Vector()->length() == 2);
3492  match(Set dst (MulVF dst src));
3493  format %{ "mulps   $dst,$src\t! mul packed2F" %}
3494  ins_encode %{
3495    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3496  %}
3497  ins_pipe( pipe_slow );
3498%}
3499
3500instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3501  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3502  match(Set dst (MulVF src1 src2));
3503  format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
3504  ins_encode %{
3505    bool vector256 = false;
3506    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3507  %}
3508  ins_pipe( pipe_slow );
3509%}
3510
3511instruct vmul4F(vecX dst, vecX src) %{
3512  predicate(n->as_Vector()->length() == 4);
3513  match(Set dst (MulVF dst src));
3514  format %{ "mulps   $dst,$src\t! mul packed4F" %}
3515  ins_encode %{
3516    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3517  %}
3518  ins_pipe( pipe_slow );
3519%}
3520
3521instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3522  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3523  match(Set dst (MulVF src1 src2));
3524  format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
3525  ins_encode %{
3526    bool vector256 = false;
3527    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3528  %}
3529  ins_pipe( pipe_slow );
3530%}
3531
3532instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3533  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3534  match(Set dst (MulVF src (LoadVector mem)));
3535  format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
3536  ins_encode %{
3537    bool vector256 = false;
3538    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3539  %}
3540  ins_pipe( pipe_slow );
3541%}
3542
3543instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3544  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3545  match(Set dst (MulVF src1 src2));
3546  format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
3547  ins_encode %{
3548    bool vector256 = true;
3549    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3550  %}
3551  ins_pipe( pipe_slow );
3552%}
3553
3554instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3555  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3556  match(Set dst (MulVF src (LoadVector mem)));
3557  format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
3558  ins_encode %{
3559    bool vector256 = true;
3560    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3561  %}
3562  ins_pipe( pipe_slow );
3563%}
3564
3565// Doubles vector mul
3566instruct vmul2D(vecX dst, vecX src) %{
3567  predicate(n->as_Vector()->length() == 2);
3568  match(Set dst (MulVD dst src));
3569  format %{ "mulpd   $dst,$src\t! mul packed2D" %}
3570  ins_encode %{
3571    __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3572  %}
3573  ins_pipe( pipe_slow );
3574%}
3575
3576instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3577  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3578  match(Set dst (MulVD src1 src2));
3579  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
3580  ins_encode %{
3581    bool vector256 = false;
3582    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3583  %}
3584  ins_pipe( pipe_slow );
3585%}
3586
3587instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3588  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3589  match(Set dst (MulVD src (LoadVector mem)));
3590  format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
3591  ins_encode %{
3592    bool vector256 = false;
3593    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3594  %}
3595  ins_pipe( pipe_slow );
3596%}
3597
3598instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3599  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3600  match(Set dst (MulVD src1 src2));
3601  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
3602  ins_encode %{
3603    bool vector256 = true;
3604    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3605  %}
3606  ins_pipe( pipe_slow );
3607%}
3608
3609instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3610  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3611  match(Set dst (MulVD src (LoadVector mem)));
3612  format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
3613  ins_encode %{
3614    bool vector256 = true;
3615    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3616  %}
3617  ins_pipe( pipe_slow );
3618%}
3619
3620// --------------------------------- DIV --------------------------------------
3621
3622// Floats vector div
3623instruct vdiv2F(vecD dst, vecD src) %{
3624  predicate(n->as_Vector()->length() == 2);
3625  match(Set dst (DivVF dst src));
3626  format %{ "divps   $dst,$src\t! div packed2F" %}
3627  ins_encode %{
3628    __ divps($dst$$XMMRegister, $src$$XMMRegister);
3629  %}
3630  ins_pipe( pipe_slow );
3631%}
3632
3633instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3634  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3635  match(Set dst (DivVF src1 src2));
3636  format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
3637  ins_encode %{
3638    bool vector256 = false;
3639    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3640  %}
3641  ins_pipe( pipe_slow );
3642%}
3643
3644instruct vdiv4F(vecX dst, vecX src) %{
3645  predicate(n->as_Vector()->length() == 4);
3646  match(Set dst (DivVF dst src));
3647  format %{ "divps   $dst,$src\t! div packed4F" %}
3648  ins_encode %{
3649    __ divps($dst$$XMMRegister, $src$$XMMRegister);
3650  %}
3651  ins_pipe( pipe_slow );
3652%}
3653
3654instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3655  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3656  match(Set dst (DivVF src1 src2));
3657  format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
3658  ins_encode %{
3659    bool vector256 = false;
3660    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3661  %}
3662  ins_pipe( pipe_slow );
3663%}
3664
3665instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3666  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3667  match(Set dst (DivVF src (LoadVector mem)));
3668  format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
3669  ins_encode %{
3670    bool vector256 = false;
3671    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3672  %}
3673  ins_pipe( pipe_slow );
3674%}
3675
3676instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3677  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3678  match(Set dst (DivVF src1 src2));
3679  format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
3680  ins_encode %{
3681    bool vector256 = true;
3682    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3683  %}
3684  ins_pipe( pipe_slow );
3685%}
3686
3687instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3688  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3689  match(Set dst (DivVF src (LoadVector mem)));
3690  format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
3691  ins_encode %{
3692    bool vector256 = true;
3693    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3694  %}
3695  ins_pipe( pipe_slow );
3696%}
3697
3698// Doubles vector div
3699instruct vdiv2D(vecX dst, vecX src) %{
3700  predicate(n->as_Vector()->length() == 2);
3701  match(Set dst (DivVD dst src));
3702  format %{ "divpd   $dst,$src\t! div packed2D" %}
3703  ins_encode %{
3704    __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3705  %}
3706  ins_pipe( pipe_slow );
3707%}
3708
3709instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3710  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3711  match(Set dst (DivVD src1 src2));
3712  format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
3713  ins_encode %{
3714    bool vector256 = false;
3715    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3716  %}
3717  ins_pipe( pipe_slow );
3718%}
3719
3720instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3721  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3722  match(Set dst (DivVD src (LoadVector mem)));
3723  format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
3724  ins_encode %{
3725    bool vector256 = false;
3726    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3727  %}
3728  ins_pipe( pipe_slow );
3729%}
3730
3731instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3732  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3733  match(Set dst (DivVD src1 src2));
3734  format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
3735  ins_encode %{
3736    bool vector256 = true;
3737    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3738  %}
3739  ins_pipe( pipe_slow );
3740%}
3741
3742instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3743  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3744  match(Set dst (DivVD src (LoadVector mem)));
3745  format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
3746  ins_encode %{
3747    bool vector256 = true;
3748    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3749  %}
3750  ins_pipe( pipe_slow );
3751%}
3752
3753// ------------------------------ LeftShift -----------------------------------
3754
3755// Shorts/Chars vector left shift
3756instruct vsll2S(vecS dst, regF shift) %{
3757  predicate(n->as_Vector()->length() == 2);
3758  match(Set dst (LShiftVS dst shift));
3759  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3760  ins_encode %{
3761    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3762  %}
3763  ins_pipe( pipe_slow );
3764%}
3765
3766instruct vsll2S_imm(vecS dst, immI8 shift) %{
3767  predicate(n->as_Vector()->length() == 2);
3768  match(Set dst (LShiftVS dst shift));
3769  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3770  ins_encode %{
3771    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3772  %}
3773  ins_pipe( pipe_slow );
3774%}
3775
3776instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
3777  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3778  match(Set dst (LShiftVS src shift));
3779  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3780  ins_encode %{
3781    bool vector256 = false;
3782    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3783  %}
3784  ins_pipe( pipe_slow );
3785%}
3786
3787instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3788  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3789  match(Set dst (LShiftVS src shift));
3790  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3791  ins_encode %{
3792    bool vector256 = false;
3793    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3794  %}
3795  ins_pipe( pipe_slow );
3796%}
3797
3798instruct vsll4S(vecD dst, regF shift) %{
3799  predicate(n->as_Vector()->length() == 4);
3800  match(Set dst (LShiftVS dst shift));
3801  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3802  ins_encode %{
3803    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3804  %}
3805  ins_pipe( pipe_slow );
3806%}
3807
3808instruct vsll4S_imm(vecD dst, immI8 shift) %{
3809  predicate(n->as_Vector()->length() == 4);
3810  match(Set dst (LShiftVS dst shift));
3811  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3812  ins_encode %{
3813    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3814  %}
3815  ins_pipe( pipe_slow );
3816%}
3817
3818instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
3819  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3820  match(Set dst (LShiftVS src shift));
3821  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3822  ins_encode %{
3823    bool vector256 = false;
3824    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3825  %}
3826  ins_pipe( pipe_slow );
3827%}
3828
3829instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3830  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3831  match(Set dst (LShiftVS src shift));
3832  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3833  ins_encode %{
3834    bool vector256 = false;
3835    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3836  %}
3837  ins_pipe( pipe_slow );
3838%}
3839
3840instruct vsll8S(vecX dst, regF shift) %{
3841  predicate(n->as_Vector()->length() == 8);
3842  match(Set dst (LShiftVS dst shift));
3843  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3844  ins_encode %{
3845    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3846  %}
3847  ins_pipe( pipe_slow );
3848%}
3849
3850instruct vsll8S_imm(vecX dst, immI8 shift) %{
3851  predicate(n->as_Vector()->length() == 8);
3852  match(Set dst (LShiftVS dst shift));
3853  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3854  ins_encode %{
3855    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3856  %}
3857  ins_pipe( pipe_slow );
3858%}
3859
3860instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
3861  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3862  match(Set dst (LShiftVS src shift));
3863  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
3864  ins_encode %{
3865    bool vector256 = false;
3866    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3867  %}
3868  ins_pipe( pipe_slow );
3869%}
3870
3871instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
3872  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3873  match(Set dst (LShiftVS src shift));
3874  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
3875  ins_encode %{
3876    bool vector256 = false;
3877    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3878  %}
3879  ins_pipe( pipe_slow );
3880%}
3881
3882instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
3883  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3884  match(Set dst (LShiftVS src shift));
3885  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
3886  ins_encode %{
3887    bool vector256 = true;
3888    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3889  %}
3890  ins_pipe( pipe_slow );
3891%}
3892
3893instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
3894  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3895  match(Set dst (LShiftVS src shift));
3896  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
3897  ins_encode %{
3898    bool vector256 = true;
3899    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3900  %}
3901  ins_pipe( pipe_slow );
3902%}
3903
3904// Integers vector left shift
3905instruct vsll2I(vecD dst, regF shift) %{
3906  predicate(n->as_Vector()->length() == 2);
3907  match(Set dst (LShiftVI dst shift));
3908  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
3909  ins_encode %{
3910    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3911  %}
3912  ins_pipe( pipe_slow );
3913%}
3914
3915instruct vsll2I_imm(vecD dst, immI8 shift) %{
3916  predicate(n->as_Vector()->length() == 2);
3917  match(Set dst (LShiftVI dst shift));
3918  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
3919  ins_encode %{
3920    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3921  %}
3922  ins_pipe( pipe_slow );
3923%}
3924
3925instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
3926  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3927  match(Set dst (LShiftVI src shift));
3928  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
3929  ins_encode %{
3930    bool vector256 = false;
3931    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3932  %}
3933  ins_pipe( pipe_slow );
3934%}
3935
3936instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
3937  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3938  match(Set dst (LShiftVI src shift));
3939  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
3940  ins_encode %{
3941    bool vector256 = false;
3942    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3943  %}
3944  ins_pipe( pipe_slow );
3945%}
3946
3947instruct vsll4I(vecX dst, regF shift) %{
3948  predicate(n->as_Vector()->length() == 4);
3949  match(Set dst (LShiftVI dst shift));
3950  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
3951  ins_encode %{
3952    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
3953  %}
3954  ins_pipe( pipe_slow );
3955%}
3956
3957instruct vsll4I_imm(vecX dst, immI8 shift) %{
3958  predicate(n->as_Vector()->length() == 4);
3959  match(Set dst (LShiftVI dst shift));
3960  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
3961  ins_encode %{
3962    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
3963  %}
3964  ins_pipe( pipe_slow );
3965%}
3966
3967instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
3968  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3969  match(Set dst (LShiftVI src shift));
3970  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
3971  ins_encode %{
3972    bool vector256 = false;
3973    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3974  %}
3975  ins_pipe( pipe_slow );
3976%}
3977
3978instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
3979  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3980  match(Set dst (LShiftVI src shift));
3981  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
3982  ins_encode %{
3983    bool vector256 = false;
3984    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3985  %}
3986  ins_pipe( pipe_slow );
3987%}
3988
3989instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
3990  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3991  match(Set dst (LShiftVI src shift));
3992  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
3993  ins_encode %{
3994    bool vector256 = true;
3995    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3996  %}
3997  ins_pipe( pipe_slow );
3998%}
3999
4000instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4001  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4002  match(Set dst (LShiftVI src shift));
4003  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4004  ins_encode %{
4005    bool vector256 = true;
4006    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4007  %}
4008  ins_pipe( pipe_slow );
4009%}
4010
4011// Longs vector left shift
4012instruct vsll2L(vecX dst, regF shift) %{
4013  predicate(n->as_Vector()->length() == 2);
4014  match(Set dst (LShiftVL dst shift));
4015  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4016  ins_encode %{
4017    __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4018  %}
4019  ins_pipe( pipe_slow );
4020%}
4021
4022instruct vsll2L_imm(vecX dst, immI8 shift) %{
4023  predicate(n->as_Vector()->length() == 2);
4024  match(Set dst (LShiftVL dst shift));
4025  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4026  ins_encode %{
4027    __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4028  %}
4029  ins_pipe( pipe_slow );
4030%}
4031
4032instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
4033  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4034  match(Set dst (LShiftVL src shift));
4035  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4036  ins_encode %{
4037    bool vector256 = false;
4038    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4039  %}
4040  ins_pipe( pipe_slow );
4041%}
4042
4043instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4044  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4045  match(Set dst (LShiftVL src shift));
4046  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4047  ins_encode %{
4048    bool vector256 = false;
4049    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4050  %}
4051  ins_pipe( pipe_slow );
4052%}
4053
4054instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
4055  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4056  match(Set dst (LShiftVL src shift));
4057  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4058  ins_encode %{
4059    bool vector256 = true;
4060    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4061  %}
4062  ins_pipe( pipe_slow );
4063%}
4064
4065instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4066  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4067  match(Set dst (LShiftVL src shift));
4068  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4069  ins_encode %{
4070    bool vector256 = true;
4071    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4072  %}
4073  ins_pipe( pipe_slow );
4074%}
4075
4076// ----------------------- LogicalRightShift -----------------------------------
4077
4078// Shorts/Chars vector logical right shift produces incorrect Java result
4079// for negative data because java code convert short value into int with
4080// sign extension before a shift.
4081
4082// Integers vector logical right shift
4083instruct vsrl2I(vecD dst, regF shift) %{
4084  predicate(n->as_Vector()->length() == 2);
4085  match(Set dst (URShiftVI dst shift));
4086  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4087  ins_encode %{
4088    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4089  %}
4090  ins_pipe( pipe_slow );
4091%}
4092
4093instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4094  predicate(n->as_Vector()->length() == 2);
4095  match(Set dst (URShiftVI dst shift));
4096  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4097  ins_encode %{
4098    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4099  %}
4100  ins_pipe( pipe_slow );
4101%}
4102
4103instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
4104  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4105  match(Set dst (URShiftVI src shift));
4106  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4107  ins_encode %{
4108    bool vector256 = false;
4109    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4110  %}
4111  ins_pipe( pipe_slow );
4112%}
4113
4114instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4115  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4116  match(Set dst (URShiftVI src shift));
4117  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4118  ins_encode %{
4119    bool vector256 = false;
4120    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4121  %}
4122  ins_pipe( pipe_slow );
4123%}
4124
4125instruct vsrl4I(vecX dst, regF shift) %{
4126  predicate(n->as_Vector()->length() == 4);
4127  match(Set dst (URShiftVI dst shift));
4128  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4129  ins_encode %{
4130    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4131  %}
4132  ins_pipe( pipe_slow );
4133%}
4134
4135instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4136  predicate(n->as_Vector()->length() == 4);
4137  match(Set dst (URShiftVI dst shift));
4138  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4139  ins_encode %{
4140    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4141  %}
4142  ins_pipe( pipe_slow );
4143%}
4144
4145instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
4146  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4147  match(Set dst (URShiftVI src shift));
4148  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4149  ins_encode %{
4150    bool vector256 = false;
4151    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4152  %}
4153  ins_pipe( pipe_slow );
4154%}
4155
4156instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4157  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4158  match(Set dst (URShiftVI src shift));
4159  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4160  ins_encode %{
4161    bool vector256 = false;
4162    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4163  %}
4164  ins_pipe( pipe_slow );
4165%}
4166
4167instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
4168  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4169  match(Set dst (URShiftVI src shift));
4170  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4171  ins_encode %{
4172    bool vector256 = true;
4173    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4174  %}
4175  ins_pipe( pipe_slow );
4176%}
4177
4178instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4179  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4180  match(Set dst (URShiftVI src shift));
4181  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4182  ins_encode %{
4183    bool vector256 = true;
4184    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4185  %}
4186  ins_pipe( pipe_slow );
4187%}
4188
4189// Longs vector logical right shift
4190instruct vsrl2L(vecX dst, regF shift) %{
4191  predicate(n->as_Vector()->length() == 2);
4192  match(Set dst (URShiftVL dst shift));
4193  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4194  ins_encode %{
4195    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4196  %}
4197  ins_pipe( pipe_slow );
4198%}
4199
4200instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4201  predicate(n->as_Vector()->length() == 2);
4202  match(Set dst (URShiftVL dst shift));
4203  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4204  ins_encode %{
4205    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4206  %}
4207  ins_pipe( pipe_slow );
4208%}
4209
4210instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
4211  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4212  match(Set dst (URShiftVL src shift));
4213  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4214  ins_encode %{
4215    bool vector256 = false;
4216    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4217  %}
4218  ins_pipe( pipe_slow );
4219%}
4220
4221instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4222  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4223  match(Set dst (URShiftVL src shift));
4224  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4225  ins_encode %{
4226    bool vector256 = false;
4227    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4228  %}
4229  ins_pipe( pipe_slow );
4230%}
4231
4232instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
4233  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4234  match(Set dst (URShiftVL src shift));
4235  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4236  ins_encode %{
4237    bool vector256 = true;
4238    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4239  %}
4240  ins_pipe( pipe_slow );
4241%}
4242
4243instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4244  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4245  match(Set dst (URShiftVL src shift));
4246  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4247  ins_encode %{
4248    bool vector256 = true;
4249    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4250  %}
4251  ins_pipe( pipe_slow );
4252%}
4253
4254// ------------------- ArithmeticRightShift -----------------------------------
4255
4256// Shorts/Chars vector arithmetic right shift
4257instruct vsra2S(vecS dst, regF shift) %{
4258  predicate(n->as_Vector()->length() == 2);
4259  match(Set dst (RShiftVS dst shift));
4260  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4261  ins_encode %{
4262    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4263  %}
4264  ins_pipe( pipe_slow );
4265%}
4266
4267instruct vsra2S_imm(vecS dst, immI8 shift) %{
4268  predicate(n->as_Vector()->length() == 2);
4269  match(Set dst (RShiftVS dst shift));
4270  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4271  ins_encode %{
4272    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4273  %}
4274  ins_pipe( pipe_slow );
4275%}
4276
4277instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
4278  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4279  match(Set dst (RShiftVS src shift));
4280  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4281  ins_encode %{
4282    bool vector256 = false;
4283    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4284  %}
4285  ins_pipe( pipe_slow );
4286%}
4287
4288instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4289  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4290  match(Set dst (RShiftVS src shift));
4291  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4292  ins_encode %{
4293    bool vector256 = false;
4294    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4295  %}
4296  ins_pipe( pipe_slow );
4297%}
4298
4299instruct vsra4S(vecD dst, regF shift) %{
4300  predicate(n->as_Vector()->length() == 4);
4301  match(Set dst (RShiftVS dst shift));
4302  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4303  ins_encode %{
4304    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4305  %}
4306  ins_pipe( pipe_slow );
4307%}
4308
4309instruct vsra4S_imm(vecD dst, immI8 shift) %{
4310  predicate(n->as_Vector()->length() == 4);
4311  match(Set dst (RShiftVS dst shift));
4312  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4313  ins_encode %{
4314    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4315  %}
4316  ins_pipe( pipe_slow );
4317%}
4318
4319instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
4320  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4321  match(Set dst (RShiftVS src shift));
4322  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4323  ins_encode %{
4324    bool vector256 = false;
4325    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4326  %}
4327  ins_pipe( pipe_slow );
4328%}
4329
4330instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4331  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4332  match(Set dst (RShiftVS src shift));
4333  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4334  ins_encode %{
4335    bool vector256 = false;
4336    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4337  %}
4338  ins_pipe( pipe_slow );
4339%}
4340
4341instruct vsra8S(vecX dst, regF shift) %{
4342  predicate(n->as_Vector()->length() == 8);
4343  match(Set dst (RShiftVS dst shift));
4344  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4345  ins_encode %{
4346    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4347  %}
4348  ins_pipe( pipe_slow );
4349%}
4350
4351instruct vsra8S_imm(vecX dst, immI8 shift) %{
4352  predicate(n->as_Vector()->length() == 8);
4353  match(Set dst (RShiftVS dst shift));
4354  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4355  ins_encode %{
4356    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4357  %}
4358  ins_pipe( pipe_slow );
4359%}
4360
4361instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
4362  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4363  match(Set dst (RShiftVS src shift));
4364  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4365  ins_encode %{
4366    bool vector256 = false;
4367    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4368  %}
4369  ins_pipe( pipe_slow );
4370%}
4371
4372instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4373  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4374  match(Set dst (RShiftVS src shift));
4375  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4376  ins_encode %{
4377    bool vector256 = false;
4378    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4379  %}
4380  ins_pipe( pipe_slow );
4381%}
4382
4383instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
4384  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4385  match(Set dst (RShiftVS src shift));
4386  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4387  ins_encode %{
4388    bool vector256 = true;
4389    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4390  %}
4391  ins_pipe( pipe_slow );
4392%}
4393
4394instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4395  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4396  match(Set dst (RShiftVS src shift));
4397  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4398  ins_encode %{
4399    bool vector256 = true;
4400    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4401  %}
4402  ins_pipe( pipe_slow );
4403%}
4404
4405// Integers vector arithmetic right shift
4406instruct vsra2I(vecD dst, regF shift) %{
4407  predicate(n->as_Vector()->length() == 2);
4408  match(Set dst (RShiftVI dst shift));
4409  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4410  ins_encode %{
4411    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4412  %}
4413  ins_pipe( pipe_slow );
4414%}
4415
4416instruct vsra2I_imm(vecD dst, immI8 shift) %{
4417  predicate(n->as_Vector()->length() == 2);
4418  match(Set dst (RShiftVI dst shift));
4419  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4420  ins_encode %{
4421    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4422  %}
4423  ins_pipe( pipe_slow );
4424%}
4425
4426instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
4427  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4428  match(Set dst (RShiftVI src shift));
4429  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4430  ins_encode %{
4431    bool vector256 = false;
4432    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4433  %}
4434  ins_pipe( pipe_slow );
4435%}
4436
4437instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4438  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4439  match(Set dst (RShiftVI src shift));
4440  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4441  ins_encode %{
4442    bool vector256 = false;
4443    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4444  %}
4445  ins_pipe( pipe_slow );
4446%}
4447
4448instruct vsra4I(vecX dst, regF shift) %{
4449  predicate(n->as_Vector()->length() == 4);
4450  match(Set dst (RShiftVI dst shift));
4451  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4452  ins_encode %{
4453    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4454  %}
4455  ins_pipe( pipe_slow );
4456%}
4457
4458instruct vsra4I_imm(vecX dst, immI8 shift) %{
4459  predicate(n->as_Vector()->length() == 4);
4460  match(Set dst (RShiftVI dst shift));
4461  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4462  ins_encode %{
4463    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4464  %}
4465  ins_pipe( pipe_slow );
4466%}
4467
4468instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
4469  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4470  match(Set dst (RShiftVI src shift));
4471  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4472  ins_encode %{
4473    bool vector256 = false;
4474    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4475  %}
4476  ins_pipe( pipe_slow );
4477%}
4478
4479instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4480  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4481  match(Set dst (RShiftVI src shift));
4482  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4483  ins_encode %{
4484    bool vector256 = false;
4485    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4486  %}
4487  ins_pipe( pipe_slow );
4488%}
4489
4490instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
4491  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4492  match(Set dst (RShiftVI src shift));
4493  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4494  ins_encode %{
4495    bool vector256 = true;
4496    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4497  %}
4498  ins_pipe( pipe_slow );
4499%}
4500
4501instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4502  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4503  match(Set dst (RShiftVI src shift));
4504  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4505  ins_encode %{
4506    bool vector256 = true;
4507    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4508  %}
4509  ins_pipe( pipe_slow );
4510%}
4511
4512// There are no longs vector arithmetic right shift instructions.
4513
4514
4515// --------------------------------- AND --------------------------------------
4516
4517instruct vand4B(vecS dst, vecS src) %{
4518  predicate(n->as_Vector()->length_in_bytes() == 4);
4519  match(Set dst (AndV dst src));
4520  format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
4521  ins_encode %{
4522    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4523  %}
4524  ins_pipe( pipe_slow );
4525%}
4526
4527instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4528  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4529  match(Set dst (AndV src1 src2));
4530  format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4531  ins_encode %{
4532    bool vector256 = false;
4533    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4534  %}
4535  ins_pipe( pipe_slow );
4536%}
4537
4538instruct vand8B(vecD dst, vecD src) %{
4539  predicate(n->as_Vector()->length_in_bytes() == 8);
4540  match(Set dst (AndV dst src));
4541  format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
4542  ins_encode %{
4543    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4544  %}
4545  ins_pipe( pipe_slow );
4546%}
4547
4548instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4549  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4550  match(Set dst (AndV src1 src2));
4551  format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4552  ins_encode %{
4553    bool vector256 = false;
4554    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4555  %}
4556  ins_pipe( pipe_slow );
4557%}
4558
4559instruct vand16B(vecX dst, vecX src) %{
4560  predicate(n->as_Vector()->length_in_bytes() == 16);
4561  match(Set dst (AndV dst src));
4562  format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
4563  ins_encode %{
4564    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4565  %}
4566  ins_pipe( pipe_slow );
4567%}
4568
4569instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4570  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4571  match(Set dst (AndV src1 src2));
4572  format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4573  ins_encode %{
4574    bool vector256 = false;
4575    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4576  %}
4577  ins_pipe( pipe_slow );
4578%}
4579
4580instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4581  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4582  match(Set dst (AndV src (LoadVector mem)));
4583  format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
4584  ins_encode %{
4585    bool vector256 = false;
4586    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4587  %}
4588  ins_pipe( pipe_slow );
4589%}
4590
4591instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4592  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4593  match(Set dst (AndV src1 src2));
4594  format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4595  ins_encode %{
4596    bool vector256 = true;
4597    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4598  %}
4599  ins_pipe( pipe_slow );
4600%}
4601
4602instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4603  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4604  match(Set dst (AndV src (LoadVector mem)));
4605  format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
4606  ins_encode %{
4607    bool vector256 = true;
4608    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4609  %}
4610  ins_pipe( pipe_slow );
4611%}
4612
4613// --------------------------------- OR ---------------------------------------
4614
4615instruct vor4B(vecS dst, vecS src) %{
4616  predicate(n->as_Vector()->length_in_bytes() == 4);
4617  match(Set dst (OrV dst src));
4618  format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
4619  ins_encode %{
4620    __ por($dst$$XMMRegister, $src$$XMMRegister);
4621  %}
4622  ins_pipe( pipe_slow );
4623%}
4624
4625instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4626  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4627  match(Set dst (OrV src1 src2));
4628  format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4629  ins_encode %{
4630    bool vector256 = false;
4631    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4632  %}
4633  ins_pipe( pipe_slow );
4634%}
4635
4636instruct vor8B(vecD dst, vecD src) %{
4637  predicate(n->as_Vector()->length_in_bytes() == 8);
4638  match(Set dst (OrV dst src));
4639  format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
4640  ins_encode %{
4641    __ por($dst$$XMMRegister, $src$$XMMRegister);
4642  %}
4643  ins_pipe( pipe_slow );
4644%}
4645
4646instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4647  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4648  match(Set dst (OrV src1 src2));
4649  format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4650  ins_encode %{
4651    bool vector256 = false;
4652    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4653  %}
4654  ins_pipe( pipe_slow );
4655%}
4656
4657instruct vor16B(vecX dst, vecX src) %{
4658  predicate(n->as_Vector()->length_in_bytes() == 16);
4659  match(Set dst (OrV dst src));
4660  format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
4661  ins_encode %{
4662    __ por($dst$$XMMRegister, $src$$XMMRegister);
4663  %}
4664  ins_pipe( pipe_slow );
4665%}
4666
4667instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4668  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4669  match(Set dst (OrV src1 src2));
4670  format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4671  ins_encode %{
4672    bool vector256 = false;
4673    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4674  %}
4675  ins_pipe( pipe_slow );
4676%}
4677
4678instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4679  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4680  match(Set dst (OrV src (LoadVector mem)));
4681  format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
4682  ins_encode %{
4683    bool vector256 = false;
4684    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4685  %}
4686  ins_pipe( pipe_slow );
4687%}
4688
4689instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4690  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4691  match(Set dst (OrV src1 src2));
4692  format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4693  ins_encode %{
4694    bool vector256 = true;
4695    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4696  %}
4697  ins_pipe( pipe_slow );
4698%}
4699
4700instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
4701  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4702  match(Set dst (OrV src (LoadVector mem)));
4703  format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
4704  ins_encode %{
4705    bool vector256 = true;
4706    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4707  %}
4708  ins_pipe( pipe_slow );
4709%}
4710
4711// --------------------------------- XOR --------------------------------------
4712
4713instruct vxor4B(vecS dst, vecS src) %{
4714  predicate(n->as_Vector()->length_in_bytes() == 4);
4715  match(Set dst (XorV dst src));
4716  format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
4717  ins_encode %{
4718    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4719  %}
4720  ins_pipe( pipe_slow );
4721%}
4722
4723instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
4724  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4725  match(Set dst (XorV src1 src2));
4726  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
4727  ins_encode %{
4728    bool vector256 = false;
4729    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4730  %}
4731  ins_pipe( pipe_slow );
4732%}
4733
4734instruct vxor8B(vecD dst, vecD src) %{
4735  predicate(n->as_Vector()->length_in_bytes() == 8);
4736  match(Set dst (XorV dst src));
4737  format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
4738  ins_encode %{
4739    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4740  %}
4741  ins_pipe( pipe_slow );
4742%}
4743
4744instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
4745  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4746  match(Set dst (XorV src1 src2));
4747  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
4748  ins_encode %{
4749    bool vector256 = false;
4750    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4751  %}
4752  ins_pipe( pipe_slow );
4753%}
4754
4755instruct vxor16B(vecX dst, vecX src) %{
4756  predicate(n->as_Vector()->length_in_bytes() == 16);
4757  match(Set dst (XorV dst src));
4758  format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
4759  ins_encode %{
4760    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
4761  %}
4762  ins_pipe( pipe_slow );
4763%}
4764
4765instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
4766  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4767  match(Set dst (XorV src1 src2));
4768  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
4769  ins_encode %{
4770    bool vector256 = false;
4771    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4772  %}
4773  ins_pipe( pipe_slow );
4774%}
4775
4776instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
4777  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4778  match(Set dst (XorV src (LoadVector mem)));
4779  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
4780  ins_encode %{
4781    bool vector256 = false;
4782    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4783  %}
4784  ins_pipe( pipe_slow );
4785%}
4786
4787instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
4788  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4789  match(Set dst (XorV src1 src2));
4790  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
4791  ins_encode %{
4792    bool vector256 = true;
4793    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4794  %}
4795  ins_pipe( pipe_slow );
4796%}
4797
4798instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
4799  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4800  match(Set dst (XorV src (LoadVector mem)));
4801  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
4802  ins_encode %{
4803    bool vector256 = true;
4804    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4805  %}
4806  ins_pipe( pipe_slow );
4807%}
4808
4809