x86.ad revision 6760:22b98ab2a69f
1//
2// Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Common Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
63// Word a in each register holds a Float, words ab hold a Double.
64// The whole registers are used in SSE4.2 version intrinsics,
65// array copy stubs and superword operations (see UseSSE42Intrinsics,
66// UseXMMForArrayCopy and UseSuperword flags).
67// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68// Linux ABI:   No register preserved across function calls
69//              XMM0-XMM7 might hold parameters
70// Windows ABI: XMM6-XMM15 preserved across function calls
71//              XMM0-XMM3 might hold parameters
72
73reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
81
82reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
90
91reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
99
100reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
108
109reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
117
118reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
126
127#ifdef _WIN64
128
129reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
137
138reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
146
147reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
155
156reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
164
165reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
173
174reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
182
183reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
191
192reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
200
201reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
209
210reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
218
219#else // _WIN64
220
221reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
229
230reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
238
239#ifdef _LP64
240
241reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
249
250reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
258
259reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
267
268reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
276
277reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
285
286reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
294
295reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
303
304reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
312
313#endif // _LP64
314
315#endif // _WIN64
316
317#ifdef _LP64
318reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319#else
320reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321#endif // _LP64
322
323alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
324                   XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
325                   XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
326                   XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
327                   XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
328                   XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
329                   XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
330                   XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
331#ifdef _LP64
332                  ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
333                   XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
334                   XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335                   XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336                   XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337                   XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338                   XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339                   XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340#endif
341                   );
342
343// flags allocation class should be last.
344alloc_class chunk2(RFLAGS);
345
346// Singleton class for condition codes
347reg_class int_flags(RFLAGS);
348
349// Class for all float registers
350reg_class float_reg(XMM0,
351                    XMM1,
352                    XMM2,
353                    XMM3,
354                    XMM4,
355                    XMM5,
356                    XMM6,
357                    XMM7
358#ifdef _LP64
359                   ,XMM8,
360                    XMM9,
361                    XMM10,
362                    XMM11,
363                    XMM12,
364                    XMM13,
365                    XMM14,
366                    XMM15
367#endif
368                    );
369
370// Class for all double registers
371reg_class double_reg(XMM0,  XMM0b,
372                     XMM1,  XMM1b,
373                     XMM2,  XMM2b,
374                     XMM3,  XMM3b,
375                     XMM4,  XMM4b,
376                     XMM5,  XMM5b,
377                     XMM6,  XMM6b,
378                     XMM7,  XMM7b
379#ifdef _LP64
380                    ,XMM8,  XMM8b,
381                     XMM9,  XMM9b,
382                     XMM10, XMM10b,
383                     XMM11, XMM11b,
384                     XMM12, XMM12b,
385                     XMM13, XMM13b,
386                     XMM14, XMM14b,
387                     XMM15, XMM15b
388#endif
389                     );
390
391// Class for all 32bit vector registers
392reg_class vectors_reg(XMM0,
393                      XMM1,
394                      XMM2,
395                      XMM3,
396                      XMM4,
397                      XMM5,
398                      XMM6,
399                      XMM7
400#ifdef _LP64
401                     ,XMM8,
402                      XMM9,
403                      XMM10,
404                      XMM11,
405                      XMM12,
406                      XMM13,
407                      XMM14,
408                      XMM15
409#endif
410                      );
411
412// Class for all 64bit vector registers
413reg_class vectord_reg(XMM0,  XMM0b,
414                      XMM1,  XMM1b,
415                      XMM2,  XMM2b,
416                      XMM3,  XMM3b,
417                      XMM4,  XMM4b,
418                      XMM5,  XMM5b,
419                      XMM6,  XMM6b,
420                      XMM7,  XMM7b
421#ifdef _LP64
422                     ,XMM8,  XMM8b,
423                      XMM9,  XMM9b,
424                      XMM10, XMM10b,
425                      XMM11, XMM11b,
426                      XMM12, XMM12b,
427                      XMM13, XMM13b,
428                      XMM14, XMM14b,
429                      XMM15, XMM15b
430#endif
431                      );
432
433// Class for all 128bit vector registers
434reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
435                      XMM1,  XMM1b,  XMM1c,  XMM1d,
436                      XMM2,  XMM2b,  XMM2c,  XMM2d,
437                      XMM3,  XMM3b,  XMM3c,  XMM3d,
438                      XMM4,  XMM4b,  XMM4c,  XMM4d,
439                      XMM5,  XMM5b,  XMM5c,  XMM5d,
440                      XMM6,  XMM6b,  XMM6c,  XMM6d,
441                      XMM7,  XMM7b,  XMM7c,  XMM7d
442#ifdef _LP64
443                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,
444                      XMM9,  XMM9b,  XMM9c,  XMM9d,
445                      XMM10, XMM10b, XMM10c, XMM10d,
446                      XMM11, XMM11b, XMM11c, XMM11d,
447                      XMM12, XMM12b, XMM12c, XMM12d,
448                      XMM13, XMM13b, XMM13c, XMM13d,
449                      XMM14, XMM14b, XMM14c, XMM14d,
450                      XMM15, XMM15b, XMM15c, XMM15d
451#endif
452                      );
453
454// Class for all 256bit vector registers
455reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
456                      XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
457                      XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
458                      XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
459                      XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
460                      XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
461                      XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
462                      XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
463#ifdef _LP64
464                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
465                      XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
466                      XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467                      XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468                      XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469                      XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470                      XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471                      XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472#endif
473                      );
474
475%}
476
477
478//----------SOURCE BLOCK-------------------------------------------------------
479// This is a block of C++ code which provides values, functions, and
480// definitions necessary in the rest of the architecture description
481
482source_hpp %{
483// Header information of the source block.
484// Method declarations/definitions which are used outside
485// the ad-scope can conveniently be defined here.
486//
487// To keep related declarations/definitions/uses close together,
488// we switch between source %{ }% and source_hpp %{ }% freely as needed.
489
490class NativeJump;
491
492class CallStubImpl {
493 
494  //--------------------------------------------------------------
495  //---<  Used for optimization in Compile::shorten_branches  >---
496  //--------------------------------------------------------------
497
498 public:
499  // Size of call trampoline stub.
500  static uint size_call_trampoline() {
501    return 0; // no call trampolines on this platform
502  }
503  
504  // number of relocations needed by a call trampoline stub
505  static uint reloc_call_trampoline() { 
506    return 0; // no call trampolines on this platform
507  }
508};
509
510class HandlerImpl {
511
512 public:
513
514  static int emit_exception_handler(CodeBuffer &cbuf);
515  static int emit_deopt_handler(CodeBuffer& cbuf);
516
517  static uint size_exception_handler() {
518    // NativeCall instruction size is the same as NativeJump.
519    // exception handler starts out as jump and can be patched to
520    // a call be deoptimization.  (4932387)
521    // Note that this value is also credited (in output.cpp) to
522    // the size of the code section.
523    return NativeJump::instruction_size;
524  }
525
526#ifdef _LP64
527  static uint size_deopt_handler() {
528    // three 5 byte instructions
529    return 15;
530  }
531#else
532  static uint size_deopt_handler() {
533    // NativeCall instruction size is the same as NativeJump.
534    // exception handler starts out as jump and can be patched to
535    // a call be deoptimization.  (4932387)
536    // Note that this value is also credited (in output.cpp) to
537    // the size of the code section.
538    return 5 + NativeJump::instruction_size; // pushl(); jmp;
539  }
540#endif
541};
542
543%} // end source_hpp
544
545source %{
546
547// Emit exception handler code.
548// Stuff framesize into a register and call a VM stub routine.
549int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
550
551  // Note that the code buffer's insts_mark is always relative to insts.
552  // That's why we must use the macroassembler to generate a handler.
553  MacroAssembler _masm(&cbuf);
554  address base = __ start_a_stub(size_exception_handler());
555  if (base == NULL)  return 0;  // CodeBuffer::expand failed
556  int offset = __ offset();
557  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
558  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
559  __ end_a_stub();
560  return offset;
561}
562
563// Emit deopt handler code.
564int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
565
566  // Note that the code buffer's insts_mark is always relative to insts.
567  // That's why we must use the macroassembler to generate a handler.
568  MacroAssembler _masm(&cbuf);
569  address base = __ start_a_stub(size_deopt_handler());
570  if (base == NULL)  return 0;  // CodeBuffer::expand failed
571  int offset = __ offset();
572
573#ifdef _LP64
574  address the_pc = (address) __ pc();
575  Label next;
576  // push a "the_pc" on the stack without destroying any registers
577  // as they all may be live.
578
579  // push address of "next"
580  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
581  __ bind(next);
582  // adjust it so it matches "the_pc"
583  __ subptr(Address(rsp, 0), __ offset() - offset);
584#else
585  InternalAddress here(__ pc());
586  __ pushptr(here.addr());
587#endif
588
589  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
590  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
591  __ end_a_stub();
592  return offset;
593}
594
595
596//=============================================================================
597
598  // Float masks come from different places depending on platform.
599#ifdef _LP64
600  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
601  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
602  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
603  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
604#else
605  static address float_signmask()  { return (address)float_signmask_pool; }
606  static address float_signflip()  { return (address)float_signflip_pool; }
607  static address double_signmask() { return (address)double_signmask_pool; }
608  static address double_signflip() { return (address)double_signflip_pool; }
609#endif
610
611
612const bool Matcher::match_rule_supported(int opcode) {
613  if (!has_match_rule(opcode))
614    return false;
615
616  switch (opcode) {
617    case Op_PopCountI:
618    case Op_PopCountL:
619      if (!UsePopCountInstruction)
620        return false;
621    break;
622    case Op_MulVI:
623      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
624        return false;
625    break;
626    case Op_CompareAndSwapL:
627#ifdef _LP64
628    case Op_CompareAndSwapP:
629#endif
630      if (!VM_Version::supports_cx8())
631        return false;
632    break;
633  }
634
635  return true;  // Per default match rules are supported.
636}
637
638// Max vector size in bytes. 0 if not supported.
639const int Matcher::vector_width_in_bytes(BasicType bt) {
640  assert(is_java_primitive(bt), "only primitive type vectors");
641  if (UseSSE < 2) return 0;
642  // SSE2 supports 128bit vectors for all types.
643  // AVX2 supports 256bit vectors for all types.
644  int size = (UseAVX > 1) ? 32 : 16;
645  // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
646  if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
647    size = 32;
648  // Use flag to limit vector size.
649  size = MIN2(size,(int)MaxVectorSize);
650  // Minimum 2 values in vector (or 4 for bytes).
651  switch (bt) {
652  case T_DOUBLE:
653  case T_LONG:
654    if (size < 16) return 0;
655  case T_FLOAT:
656  case T_INT:
657    if (size < 8) return 0;
658  case T_BOOLEAN:
659  case T_BYTE:
660  case T_CHAR:
661  case T_SHORT:
662    if (size < 4) return 0;
663    break;
664  default:
665    ShouldNotReachHere();
666  }
667  return size;
668}
669
670// Limits on vector size (number of elements) loaded into vector.
671const int Matcher::max_vector_size(const BasicType bt) {
672  return vector_width_in_bytes(bt)/type2aelembytes(bt);
673}
674const int Matcher::min_vector_size(const BasicType bt) {
675  int max_size = max_vector_size(bt);
676  // Min size which can be loaded into vector is 4 bytes.
677  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
678  return MIN2(size,max_size);
679}
680
681// Vector ideal reg corresponding to specidied size in bytes
682const int Matcher::vector_ideal_reg(int size) {
683  assert(MaxVectorSize >= size, "");
684  switch(size) {
685    case  4: return Op_VecS;
686    case  8: return Op_VecD;
687    case 16: return Op_VecX;
688    case 32: return Op_VecY;
689  }
690  ShouldNotReachHere();
691  return 0;
692}
693
694// Only lowest bits of xmm reg are used for vector shift count.
695const int Matcher::vector_shift_count_ideal_reg(int size) {
696  return Op_VecS;
697}
698
699// x86 supports misaligned vectors store/load.
700const bool Matcher::misaligned_vectors_ok() {
701  return !AlignVector; // can be changed by flag
702}
703
704// x86 AES instructions are compatible with SunJCE expanded
705// keys, hence we do not need to pass the original key to stubs
706const bool Matcher::pass_original_key_for_aes() {
707  return false;
708}
709
710// Helper methods for MachSpillCopyNode::implementation().
711static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
712                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
713  // In 64-bit VM size calculation is very complex. Emitting instructions
714  // into scratch buffer is used to get size in 64-bit VM.
715  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
716  assert(ireg == Op_VecS || // 32bit vector
717         (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
718         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
719         "no non-adjacent vector moves" );
720  if (cbuf) {
721    MacroAssembler _masm(cbuf);
722    int offset = __ offset();
723    switch (ireg) {
724    case Op_VecS: // copy whole register
725    case Op_VecD:
726    case Op_VecX:
727      __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
728      break;
729    case Op_VecY:
730      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
731      break;
732    default:
733      ShouldNotReachHere();
734    }
735    int size = __ offset() - offset;
736#ifdef ASSERT
737    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
738    assert(!do_size || size == 4, "incorrect size calculattion");
739#endif
740    return size;
741#ifndef PRODUCT
742  } else if (!do_size) {
743    switch (ireg) {
744    case Op_VecS:
745    case Op_VecD:
746    case Op_VecX:
747      st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
748      break;
749    case Op_VecY:
750      st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
751      break;
752    default:
753      ShouldNotReachHere();
754    }
755#endif
756  }
757  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
758  return 4;
759}
760
761static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
762                            int stack_offset, int reg, uint ireg, outputStream* st) {
763  // In 64-bit VM size calculation is very complex. Emitting instructions
764  // into scratch buffer is used to get size in 64-bit VM.
765  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
766  if (cbuf) {
767    MacroAssembler _masm(cbuf);
768    int offset = __ offset();
769    if (is_load) {
770      switch (ireg) {
771      case Op_VecS:
772        __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
773        break;
774      case Op_VecD:
775        __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
776        break;
777      case Op_VecX:
778        __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
779        break;
780      case Op_VecY:
781        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
782        break;
783      default:
784        ShouldNotReachHere();
785      }
786    } else { // store
787      switch (ireg) {
788      case Op_VecS:
789        __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
790        break;
791      case Op_VecD:
792        __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
793        break;
794      case Op_VecX:
795        __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
796        break;
797      case Op_VecY:
798        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
799        break;
800      default:
801        ShouldNotReachHere();
802      }
803    }
804    int size = __ offset() - offset;
805#ifdef ASSERT
806    int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
807    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
808    assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
809#endif
810    return size;
811#ifndef PRODUCT
812  } else if (!do_size) {
813    if (is_load) {
814      switch (ireg) {
815      case Op_VecS:
816        st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
817        break;
818      case Op_VecD:
819        st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
820        break;
821       case Op_VecX:
822        st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
823        break;
824      case Op_VecY:
825        st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
826        break;
827      default:
828        ShouldNotReachHere();
829      }
830    } else { // store
831      switch (ireg) {
832      case Op_VecS:
833        st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
834        break;
835      case Op_VecD:
836        st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
837        break;
838       case Op_VecX:
839        st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
840        break;
841      case Op_VecY:
842        st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
843        break;
844      default:
845        ShouldNotReachHere();
846      }
847    }
848#endif
849  }
850  int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
851  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
852  return 5+offset_size;
853}
854
855static inline jfloat replicate4_imm(int con, int width) {
856  // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
857  assert(width == 1 || width == 2, "only byte or short types here");
858  int bit_width = width * 8;
859  jint val = con;
860  val &= (1 << bit_width) - 1;  // mask off sign bits
861  while(bit_width < 32) {
862    val |= (val << bit_width);
863    bit_width <<= 1;
864  }
865  jfloat fval = *((jfloat*) &val);  // coerce to float type
866  return fval;
867}
868
869static inline jdouble replicate8_imm(int con, int width) {
870  // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
871  assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
872  int bit_width = width * 8;
873  jlong val = con;
874  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
875  while(bit_width < 64) {
876    val |= (val << bit_width);
877    bit_width <<= 1;
878  }
879  jdouble dval = *((jdouble*) &val);  // coerce to double type
880  return dval;
881}
882
883#ifndef PRODUCT
884  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
885    st->print("nop \t# %d bytes pad for loops and calls", _count);
886  }
887#endif
888
889  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
890    MacroAssembler _masm(&cbuf);
891    __ nop(_count);
892  }
893
894  uint MachNopNode::size(PhaseRegAlloc*) const {
895    return _count;
896  }
897
898#ifndef PRODUCT
899  void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
900    st->print("# breakpoint");
901  }
902#endif
903
904  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
905    MacroAssembler _masm(&cbuf);
906    __ int3();
907  }
908
909  uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
910    return MachNode::size(ra_);
911  }
912
913%}
914
915encode %{
916
917  enc_class preserve_SP %{
918    debug_only(int off0 = cbuf.insts_size());
919    MacroAssembler _masm(&cbuf);
920    // RBP is preserved across all calls, even compiled calls.
921    // Use it to preserve RSP in places where the callee might change the SP.
922    __ movptr(rbp_mh_SP_save, rsp);
923    debug_only(int off1 = cbuf.insts_size());
924    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
925  %}
926
927  enc_class restore_SP %{
928    MacroAssembler _masm(&cbuf);
929    __ movptr(rsp, rbp_mh_SP_save);
930  %}
931
932  enc_class call_epilog %{
933    if (VerifyStackAtCalls) {
934      // Check that stack depth is unchanged: find majik cookie on stack
935      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
936      MacroAssembler _masm(&cbuf);
937      Label L;
938      __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
939      __ jccb(Assembler::equal, L);
940      // Die if stack mismatch
941      __ int3();
942      __ bind(L);
943    }
944  %}
945
946%}
947
948
949//----------OPERANDS-----------------------------------------------------------
950// Operand definitions must precede instruction definitions for correct parsing
951// in the ADLC because operands constitute user defined types which are used in
952// instruction definitions.
953
954// Vectors
955operand vecS() %{
956  constraint(ALLOC_IN_RC(vectors_reg));
957  match(VecS);
958
959  format %{ %}
960  interface(REG_INTER);
961%}
962
963operand vecD() %{
964  constraint(ALLOC_IN_RC(vectord_reg));
965  match(VecD);
966
967  format %{ %}
968  interface(REG_INTER);
969%}
970
971operand vecX() %{
972  constraint(ALLOC_IN_RC(vectorx_reg));
973  match(VecX);
974
975  format %{ %}
976  interface(REG_INTER);
977%}
978
979operand vecY() %{
980  constraint(ALLOC_IN_RC(vectory_reg));
981  match(VecY);
982
983  format %{ %}
984  interface(REG_INTER);
985%}
986
987
988// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
989
990// ============================================================================
991
992instruct ShouldNotReachHere() %{
993  match(Halt);
994  format %{ "int3\t# ShouldNotReachHere" %}
995  ins_encode %{
996    __ int3();
997  %}
998  ins_pipe(pipe_slow);
999%}
1000
1001// ============================================================================
1002
1003instruct addF_reg(regF dst, regF src) %{
1004  predicate((UseSSE>=1) && (UseAVX == 0));
1005  match(Set dst (AddF dst src));
1006
1007  format %{ "addss   $dst, $src" %}
1008  ins_cost(150);
1009  ins_encode %{
1010    __ addss($dst$$XMMRegister, $src$$XMMRegister);
1011  %}
1012  ins_pipe(pipe_slow);
1013%}
1014
1015instruct addF_mem(regF dst, memory src) %{
1016  predicate((UseSSE>=1) && (UseAVX == 0));
1017  match(Set dst (AddF dst (LoadF src)));
1018
1019  format %{ "addss   $dst, $src" %}
1020  ins_cost(150);
1021  ins_encode %{
1022    __ addss($dst$$XMMRegister, $src$$Address);
1023  %}
1024  ins_pipe(pipe_slow);
1025%}
1026
1027instruct addF_imm(regF dst, immF con) %{
1028  predicate((UseSSE>=1) && (UseAVX == 0));
1029  match(Set dst (AddF dst con));
1030  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1031  ins_cost(150);
1032  ins_encode %{
1033    __ addss($dst$$XMMRegister, $constantaddress($con));
1034  %}
1035  ins_pipe(pipe_slow);
1036%}
1037
1038instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
1039  predicate(UseAVX > 0);
1040  match(Set dst (AddF src1 src2));
1041
1042  format %{ "vaddss  $dst, $src1, $src2" %}
1043  ins_cost(150);
1044  ins_encode %{
1045    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1046  %}
1047  ins_pipe(pipe_slow);
1048%}
1049
1050instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
1051  predicate(UseAVX > 0);
1052  match(Set dst (AddF src1 (LoadF src2)));
1053
1054  format %{ "vaddss  $dst, $src1, $src2" %}
1055  ins_cost(150);
1056  ins_encode %{
1057    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1058  %}
1059  ins_pipe(pipe_slow);
1060%}
1061
1062instruct addF_reg_imm(regF dst, regF src, immF con) %{
1063  predicate(UseAVX > 0);
1064  match(Set dst (AddF src con));
1065
1066  format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1067  ins_cost(150);
1068  ins_encode %{
1069    __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1070  %}
1071  ins_pipe(pipe_slow);
1072%}
1073
1074instruct addD_reg(regD dst, regD src) %{
1075  predicate((UseSSE>=2) && (UseAVX == 0));
1076  match(Set dst (AddD dst src));
1077
1078  format %{ "addsd   $dst, $src" %}
1079  ins_cost(150);
1080  ins_encode %{
1081    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
1082  %}
1083  ins_pipe(pipe_slow);
1084%}
1085
1086instruct addD_mem(regD dst, memory src) %{
1087  predicate((UseSSE>=2) && (UseAVX == 0));
1088  match(Set dst (AddD dst (LoadD src)));
1089
1090  format %{ "addsd   $dst, $src" %}
1091  ins_cost(150);
1092  ins_encode %{
1093    __ addsd($dst$$XMMRegister, $src$$Address);
1094  %}
1095  ins_pipe(pipe_slow);
1096%}
1097
1098instruct addD_imm(regD dst, immD con) %{
1099  predicate((UseSSE>=2) && (UseAVX == 0));
1100  match(Set dst (AddD dst con));
1101  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1102  ins_cost(150);
1103  ins_encode %{
1104    __ addsd($dst$$XMMRegister, $constantaddress($con));
1105  %}
1106  ins_pipe(pipe_slow);
1107%}
1108
1109instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
1110  predicate(UseAVX > 0);
1111  match(Set dst (AddD src1 src2));
1112
1113  format %{ "vaddsd  $dst, $src1, $src2" %}
1114  ins_cost(150);
1115  ins_encode %{
1116    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1117  %}
1118  ins_pipe(pipe_slow);
1119%}
1120
1121instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
1122  predicate(UseAVX > 0);
1123  match(Set dst (AddD src1 (LoadD src2)));
1124
1125  format %{ "vaddsd  $dst, $src1, $src2" %}
1126  ins_cost(150);
1127  ins_encode %{
1128    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1129  %}
1130  ins_pipe(pipe_slow);
1131%}
1132
1133instruct addD_reg_imm(regD dst, regD src, immD con) %{
1134  predicate(UseAVX > 0);
1135  match(Set dst (AddD src con));
1136
1137  format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1138  ins_cost(150);
1139  ins_encode %{
1140    __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1141  %}
1142  ins_pipe(pipe_slow);
1143%}
1144
1145instruct subF_reg(regF dst, regF src) %{
1146  predicate((UseSSE>=1) && (UseAVX == 0));
1147  match(Set dst (SubF dst src));
1148
1149  format %{ "subss   $dst, $src" %}
1150  ins_cost(150);
1151  ins_encode %{
1152    __ subss($dst$$XMMRegister, $src$$XMMRegister);
1153  %}
1154  ins_pipe(pipe_slow);
1155%}
1156
1157instruct subF_mem(regF dst, memory src) %{
1158  predicate((UseSSE>=1) && (UseAVX == 0));
1159  match(Set dst (SubF dst (LoadF src)));
1160
1161  format %{ "subss   $dst, $src" %}
1162  ins_cost(150);
1163  ins_encode %{
1164    __ subss($dst$$XMMRegister, $src$$Address);
1165  %}
1166  ins_pipe(pipe_slow);
1167%}
1168
1169instruct subF_imm(regF dst, immF con) %{
1170  predicate((UseSSE>=1) && (UseAVX == 0));
1171  match(Set dst (SubF dst con));
1172  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1173  ins_cost(150);
1174  ins_encode %{
1175    __ subss($dst$$XMMRegister, $constantaddress($con));
1176  %}
1177  ins_pipe(pipe_slow);
1178%}
1179
1180instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
1181  predicate(UseAVX > 0);
1182  match(Set dst (SubF src1 src2));
1183
1184  format %{ "vsubss  $dst, $src1, $src2" %}
1185  ins_cost(150);
1186  ins_encode %{
1187    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1188  %}
1189  ins_pipe(pipe_slow);
1190%}
1191
1192instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
1193  predicate(UseAVX > 0);
1194  match(Set dst (SubF src1 (LoadF src2)));
1195
1196  format %{ "vsubss  $dst, $src1, $src2" %}
1197  ins_cost(150);
1198  ins_encode %{
1199    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1200  %}
1201  ins_pipe(pipe_slow);
1202%}
1203
1204instruct subF_reg_imm(regF dst, regF src, immF con) %{
1205  predicate(UseAVX > 0);
1206  match(Set dst (SubF src con));
1207
1208  format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1209  ins_cost(150);
1210  ins_encode %{
1211    __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1212  %}
1213  ins_pipe(pipe_slow);
1214%}
1215
1216instruct subD_reg(regD dst, regD src) %{
1217  predicate((UseSSE>=2) && (UseAVX == 0));
1218  match(Set dst (SubD dst src));
1219
1220  format %{ "subsd   $dst, $src" %}
1221  ins_cost(150);
1222  ins_encode %{
1223    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
1224  %}
1225  ins_pipe(pipe_slow);
1226%}
1227
1228instruct subD_mem(regD dst, memory src) %{
1229  predicate((UseSSE>=2) && (UseAVX == 0));
1230  match(Set dst (SubD dst (LoadD src)));
1231
1232  format %{ "subsd   $dst, $src" %}
1233  ins_cost(150);
1234  ins_encode %{
1235    __ subsd($dst$$XMMRegister, $src$$Address);
1236  %}
1237  ins_pipe(pipe_slow);
1238%}
1239
1240instruct subD_imm(regD dst, immD con) %{
1241  predicate((UseSSE>=2) && (UseAVX == 0));
1242  match(Set dst (SubD dst con));
1243  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1244  ins_cost(150);
1245  ins_encode %{
1246    __ subsd($dst$$XMMRegister, $constantaddress($con));
1247  %}
1248  ins_pipe(pipe_slow);
1249%}
1250
1251instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
1252  predicate(UseAVX > 0);
1253  match(Set dst (SubD src1 src2));
1254
1255  format %{ "vsubsd  $dst, $src1, $src2" %}
1256  ins_cost(150);
1257  ins_encode %{
1258    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1259  %}
1260  ins_pipe(pipe_slow);
1261%}
1262
1263instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
1264  predicate(UseAVX > 0);
1265  match(Set dst (SubD src1 (LoadD src2)));
1266
1267  format %{ "vsubsd  $dst, $src1, $src2" %}
1268  ins_cost(150);
1269  ins_encode %{
1270    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1271  %}
1272  ins_pipe(pipe_slow);
1273%}
1274
1275instruct subD_reg_imm(regD dst, regD src, immD con) %{
1276  predicate(UseAVX > 0);
1277  match(Set dst (SubD src con));
1278
1279  format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1280  ins_cost(150);
1281  ins_encode %{
1282    __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1283  %}
1284  ins_pipe(pipe_slow);
1285%}
1286
1287instruct mulF_reg(regF dst, regF src) %{
1288  predicate((UseSSE>=1) && (UseAVX == 0));
1289  match(Set dst (MulF dst src));
1290
1291  format %{ "mulss   $dst, $src" %}
1292  ins_cost(150);
1293  ins_encode %{
1294    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
1295  %}
1296  ins_pipe(pipe_slow);
1297%}
1298
1299instruct mulF_mem(regF dst, memory src) %{
1300  predicate((UseSSE>=1) && (UseAVX == 0));
1301  match(Set dst (MulF dst (LoadF src)));
1302
1303  format %{ "mulss   $dst, $src" %}
1304  ins_cost(150);
1305  ins_encode %{
1306    __ mulss($dst$$XMMRegister, $src$$Address);
1307  %}
1308  ins_pipe(pipe_slow);
1309%}
1310
1311instruct mulF_imm(regF dst, immF con) %{
1312  predicate((UseSSE>=1) && (UseAVX == 0));
1313  match(Set dst (MulF dst con));
1314  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1315  ins_cost(150);
1316  ins_encode %{
1317    __ mulss($dst$$XMMRegister, $constantaddress($con));
1318  %}
1319  ins_pipe(pipe_slow);
1320%}
1321
1322instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
1323  predicate(UseAVX > 0);
1324  match(Set dst (MulF src1 src2));
1325
1326  format %{ "vmulss  $dst, $src1, $src2" %}
1327  ins_cost(150);
1328  ins_encode %{
1329    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1330  %}
1331  ins_pipe(pipe_slow);
1332%}
1333
1334instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
1335  predicate(UseAVX > 0);
1336  match(Set dst (MulF src1 (LoadF src2)));
1337
1338  format %{ "vmulss  $dst, $src1, $src2" %}
1339  ins_cost(150);
1340  ins_encode %{
1341    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1342  %}
1343  ins_pipe(pipe_slow);
1344%}
1345
1346instruct mulF_reg_imm(regF dst, regF src, immF con) %{
1347  predicate(UseAVX > 0);
1348  match(Set dst (MulF src con));
1349
1350  format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1351  ins_cost(150);
1352  ins_encode %{
1353    __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1354  %}
1355  ins_pipe(pipe_slow);
1356%}
1357
1358instruct mulD_reg(regD dst, regD src) %{
1359  predicate((UseSSE>=2) && (UseAVX == 0));
1360  match(Set dst (MulD dst src));
1361
1362  format %{ "mulsd   $dst, $src" %}
1363  ins_cost(150);
1364  ins_encode %{
1365    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
1366  %}
1367  ins_pipe(pipe_slow);
1368%}
1369
1370instruct mulD_mem(regD dst, memory src) %{
1371  predicate((UseSSE>=2) && (UseAVX == 0));
1372  match(Set dst (MulD dst (LoadD src)));
1373
1374  format %{ "mulsd   $dst, $src" %}
1375  ins_cost(150);
1376  ins_encode %{
1377    __ mulsd($dst$$XMMRegister, $src$$Address);
1378  %}
1379  ins_pipe(pipe_slow);
1380%}
1381
1382instruct mulD_imm(regD dst, immD con) %{
1383  predicate((UseSSE>=2) && (UseAVX == 0));
1384  match(Set dst (MulD dst con));
1385  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1386  ins_cost(150);
1387  ins_encode %{
1388    __ mulsd($dst$$XMMRegister, $constantaddress($con));
1389  %}
1390  ins_pipe(pipe_slow);
1391%}
1392
1393instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
1394  predicate(UseAVX > 0);
1395  match(Set dst (MulD src1 src2));
1396
1397  format %{ "vmulsd  $dst, $src1, $src2" %}
1398  ins_cost(150);
1399  ins_encode %{
1400    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1401  %}
1402  ins_pipe(pipe_slow);
1403%}
1404
1405instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
1406  predicate(UseAVX > 0);
1407  match(Set dst (MulD src1 (LoadD src2)));
1408
1409  format %{ "vmulsd  $dst, $src1, $src2" %}
1410  ins_cost(150);
1411  ins_encode %{
1412    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1413  %}
1414  ins_pipe(pipe_slow);
1415%}
1416
1417instruct mulD_reg_imm(regD dst, regD src, immD con) %{
1418  predicate(UseAVX > 0);
1419  match(Set dst (MulD src con));
1420
1421  format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1422  ins_cost(150);
1423  ins_encode %{
1424    __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1425  %}
1426  ins_pipe(pipe_slow);
1427%}
1428
1429instruct divF_reg(regF dst, regF src) %{
1430  predicate((UseSSE>=1) && (UseAVX == 0));
1431  match(Set dst (DivF dst src));
1432
1433  format %{ "divss   $dst, $src" %}
1434  ins_cost(150);
1435  ins_encode %{
1436    __ divss($dst$$XMMRegister, $src$$XMMRegister);
1437  %}
1438  ins_pipe(pipe_slow);
1439%}
1440
1441instruct divF_mem(regF dst, memory src) %{
1442  predicate((UseSSE>=1) && (UseAVX == 0));
1443  match(Set dst (DivF dst (LoadF src)));
1444
1445  format %{ "divss   $dst, $src" %}
1446  ins_cost(150);
1447  ins_encode %{
1448    __ divss($dst$$XMMRegister, $src$$Address);
1449  %}
1450  ins_pipe(pipe_slow);
1451%}
1452
1453instruct divF_imm(regF dst, immF con) %{
1454  predicate((UseSSE>=1) && (UseAVX == 0));
1455  match(Set dst (DivF dst con));
1456  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1457  ins_cost(150);
1458  ins_encode %{
1459    __ divss($dst$$XMMRegister, $constantaddress($con));
1460  %}
1461  ins_pipe(pipe_slow);
1462%}
1463
1464instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
1465  predicate(UseAVX > 0);
1466  match(Set dst (DivF src1 src2));
1467
1468  format %{ "vdivss  $dst, $src1, $src2" %}
1469  ins_cost(150);
1470  ins_encode %{
1471    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1472  %}
1473  ins_pipe(pipe_slow);
1474%}
1475
1476instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
1477  predicate(UseAVX > 0);
1478  match(Set dst (DivF src1 (LoadF src2)));
1479
1480  format %{ "vdivss  $dst, $src1, $src2" %}
1481  ins_cost(150);
1482  ins_encode %{
1483    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1484  %}
1485  ins_pipe(pipe_slow);
1486%}
1487
1488instruct divF_reg_imm(regF dst, regF src, immF con) %{
1489  predicate(UseAVX > 0);
1490  match(Set dst (DivF src con));
1491
1492  format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1493  ins_cost(150);
1494  ins_encode %{
1495    __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1496  %}
1497  ins_pipe(pipe_slow);
1498%}
1499
1500instruct divD_reg(regD dst, regD src) %{
1501  predicate((UseSSE>=2) && (UseAVX == 0));
1502  match(Set dst (DivD dst src));
1503
1504  format %{ "divsd   $dst, $src" %}
1505  ins_cost(150);
1506  ins_encode %{
1507    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
1508  %}
1509  ins_pipe(pipe_slow);
1510%}
1511
1512instruct divD_mem(regD dst, memory src) %{
1513  predicate((UseSSE>=2) && (UseAVX == 0));
1514  match(Set dst (DivD dst (LoadD src)));
1515
1516  format %{ "divsd   $dst, $src" %}
1517  ins_cost(150);
1518  ins_encode %{
1519    __ divsd($dst$$XMMRegister, $src$$Address);
1520  %}
1521  ins_pipe(pipe_slow);
1522%}
1523
1524instruct divD_imm(regD dst, immD con) %{
1525  predicate((UseSSE>=2) && (UseAVX == 0));
1526  match(Set dst (DivD dst con));
1527  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1528  ins_cost(150);
1529  ins_encode %{
1530    __ divsd($dst$$XMMRegister, $constantaddress($con));
1531  %}
1532  ins_pipe(pipe_slow);
1533%}
1534
1535instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
1536  predicate(UseAVX > 0);
1537  match(Set dst (DivD src1 src2));
1538
1539  format %{ "vdivsd  $dst, $src1, $src2" %}
1540  ins_cost(150);
1541  ins_encode %{
1542    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1543  %}
1544  ins_pipe(pipe_slow);
1545%}
1546
1547instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
1548  predicate(UseAVX > 0);
1549  match(Set dst (DivD src1 (LoadD src2)));
1550
1551  format %{ "vdivsd  $dst, $src1, $src2" %}
1552  ins_cost(150);
1553  ins_encode %{
1554    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1555  %}
1556  ins_pipe(pipe_slow);
1557%}
1558
1559instruct divD_reg_imm(regD dst, regD src, immD con) %{
1560  predicate(UseAVX > 0);
1561  match(Set dst (DivD src con));
1562
1563  format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1564  ins_cost(150);
1565  ins_encode %{
1566    __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1567  %}
1568  ins_pipe(pipe_slow);
1569%}
1570
1571instruct absF_reg(regF dst) %{
1572  predicate((UseSSE>=1) && (UseAVX == 0));
1573  match(Set dst (AbsF dst));
1574  ins_cost(150);
1575  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
1576  ins_encode %{
1577    __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1578  %}
1579  ins_pipe(pipe_slow);
1580%}
1581
1582instruct absF_reg_reg(regF dst, regF src) %{
1583  predicate(UseAVX > 0);
1584  match(Set dst (AbsF src));
1585  ins_cost(150);
1586  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1587  ins_encode %{
1588    bool vector256 = false;
1589    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1590              ExternalAddress(float_signmask()), vector256);
1591  %}
1592  ins_pipe(pipe_slow);
1593%}
1594
1595instruct absD_reg(regD dst) %{
1596  predicate((UseSSE>=2) && (UseAVX == 0));
1597  match(Set dst (AbsD dst));
1598  ins_cost(150);
1599  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
1600            "# abs double by sign masking" %}
1601  ins_encode %{
1602    __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1603  %}
1604  ins_pipe(pipe_slow);
1605%}
1606
1607instruct absD_reg_reg(regD dst, regD src) %{
1608  predicate(UseAVX > 0);
1609  match(Set dst (AbsD src));
1610  ins_cost(150);
1611  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
1612            "# abs double by sign masking" %}
1613  ins_encode %{
1614    bool vector256 = false;
1615    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1616              ExternalAddress(double_signmask()), vector256);
1617  %}
1618  ins_pipe(pipe_slow);
1619%}
1620
1621instruct negF_reg(regF dst) %{
1622  predicate((UseSSE>=1) && (UseAVX == 0));
1623  match(Set dst (NegF dst));
1624  ins_cost(150);
1625  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
1626  ins_encode %{
1627    __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1628  %}
1629  ins_pipe(pipe_slow);
1630%}
1631
1632instruct negF_reg_reg(regF dst, regF src) %{
1633  predicate(UseAVX > 0);
1634  match(Set dst (NegF src));
1635  ins_cost(150);
1636  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1637  ins_encode %{
1638    bool vector256 = false;
1639    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1640              ExternalAddress(float_signflip()), vector256);
1641  %}
1642  ins_pipe(pipe_slow);
1643%}
1644
1645instruct negD_reg(regD dst) %{
1646  predicate((UseSSE>=2) && (UseAVX == 0));
1647  match(Set dst (NegD dst));
1648  ins_cost(150);
1649  format %{ "xorpd   $dst, [0x8000000000000000]\t"
1650            "# neg double by sign flipping" %}
1651  ins_encode %{
1652    __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1653  %}
1654  ins_pipe(pipe_slow);
1655%}
1656
1657instruct negD_reg_reg(regD dst, regD src) %{
1658  predicate(UseAVX > 0);
1659  match(Set dst (NegD src));
1660  ins_cost(150);
1661  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
1662            "# neg double by sign flipping" %}
1663  ins_encode %{
1664    bool vector256 = false;
1665    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1666              ExternalAddress(double_signflip()), vector256);
1667  %}
1668  ins_pipe(pipe_slow);
1669%}
1670
1671instruct sqrtF_reg(regF dst, regF src) %{
1672  predicate(UseSSE>=1);
1673  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1674
1675  format %{ "sqrtss  $dst, $src" %}
1676  ins_cost(150);
1677  ins_encode %{
1678    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1679  %}
1680  ins_pipe(pipe_slow);
1681%}
1682
1683instruct sqrtF_mem(regF dst, memory src) %{
1684  predicate(UseSSE>=1);
1685  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1686
1687  format %{ "sqrtss  $dst, $src" %}
1688  ins_cost(150);
1689  ins_encode %{
1690    __ sqrtss($dst$$XMMRegister, $src$$Address);
1691  %}
1692  ins_pipe(pipe_slow);
1693%}
1694
1695instruct sqrtF_imm(regF dst, immF con) %{
1696  predicate(UseSSE>=1);
1697  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
1698  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1699  ins_cost(150);
1700  ins_encode %{
1701    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
1702  %}
1703  ins_pipe(pipe_slow);
1704%}
1705
1706instruct sqrtD_reg(regD dst, regD src) %{
1707  predicate(UseSSE>=2);
1708  match(Set dst (SqrtD src));
1709
1710  format %{ "sqrtsd  $dst, $src" %}
1711  ins_cost(150);
1712  ins_encode %{
1713    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
1714  %}
1715  ins_pipe(pipe_slow);
1716%}
1717
1718instruct sqrtD_mem(regD dst, memory src) %{
1719  predicate(UseSSE>=2);
1720  match(Set dst (SqrtD (LoadD src)));
1721
1722  format %{ "sqrtsd  $dst, $src" %}
1723  ins_cost(150);
1724  ins_encode %{
1725    __ sqrtsd($dst$$XMMRegister, $src$$Address);
1726  %}
1727  ins_pipe(pipe_slow);
1728%}
1729
1730instruct sqrtD_imm(regD dst, immD con) %{
1731  predicate(UseSSE>=2);
1732  match(Set dst (SqrtD con));
1733  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1734  ins_cost(150);
1735  ins_encode %{
1736    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1737  %}
1738  ins_pipe(pipe_slow);
1739%}
1740
1741
1742// ====================VECTOR INSTRUCTIONS=====================================
1743
1744// Load vectors (4 bytes long)
1745instruct loadV4(vecS dst, memory mem) %{
1746  predicate(n->as_LoadVector()->memory_size() == 4);
1747  match(Set dst (LoadVector mem));
1748  ins_cost(125);
1749  format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
1750  ins_encode %{
1751    __ movdl($dst$$XMMRegister, $mem$$Address);
1752  %}
1753  ins_pipe( pipe_slow );
1754%}
1755
1756// Load vectors (8 bytes long)
1757instruct loadV8(vecD dst, memory mem) %{
1758  predicate(n->as_LoadVector()->memory_size() == 8);
1759  match(Set dst (LoadVector mem));
1760  ins_cost(125);
1761  format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
1762  ins_encode %{
1763    __ movq($dst$$XMMRegister, $mem$$Address);
1764  %}
1765  ins_pipe( pipe_slow );
1766%}
1767
1768// Load vectors (16 bytes long)
1769instruct loadV16(vecX dst, memory mem) %{
1770  predicate(n->as_LoadVector()->memory_size() == 16);
1771  match(Set dst (LoadVector mem));
1772  ins_cost(125);
1773  format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
1774  ins_encode %{
1775    __ movdqu($dst$$XMMRegister, $mem$$Address);
1776  %}
1777  ins_pipe( pipe_slow );
1778%}
1779
1780// Load vectors (32 bytes long)
1781instruct loadV32(vecY dst, memory mem) %{
1782  predicate(n->as_LoadVector()->memory_size() == 32);
1783  match(Set dst (LoadVector mem));
1784  ins_cost(125);
1785  format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1786  ins_encode %{
1787    __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1788  %}
1789  ins_pipe( pipe_slow );
1790%}
1791
1792// Store vectors
1793instruct storeV4(memory mem, vecS src) %{
1794  predicate(n->as_StoreVector()->memory_size() == 4);
1795  match(Set mem (StoreVector mem src));
1796  ins_cost(145);
1797  format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
1798  ins_encode %{
1799    __ movdl($mem$$Address, $src$$XMMRegister);
1800  %}
1801  ins_pipe( pipe_slow );
1802%}
1803
1804instruct storeV8(memory mem, vecD src) %{
1805  predicate(n->as_StoreVector()->memory_size() == 8);
1806  match(Set mem (StoreVector mem src));
1807  ins_cost(145);
1808  format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
1809  ins_encode %{
1810    __ movq($mem$$Address, $src$$XMMRegister);
1811  %}
1812  ins_pipe( pipe_slow );
1813%}
1814
1815instruct storeV16(memory mem, vecX src) %{
1816  predicate(n->as_StoreVector()->memory_size() == 16);
1817  match(Set mem (StoreVector mem src));
1818  ins_cost(145);
1819  format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
1820  ins_encode %{
1821    __ movdqu($mem$$Address, $src$$XMMRegister);
1822  %}
1823  ins_pipe( pipe_slow );
1824%}
1825
1826instruct storeV32(memory mem, vecY src) %{
1827  predicate(n->as_StoreVector()->memory_size() == 32);
1828  match(Set mem (StoreVector mem src));
1829  ins_cost(145);
1830  format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1831  ins_encode %{
1832    __ vmovdqu($mem$$Address, $src$$XMMRegister);
1833  %}
1834  ins_pipe( pipe_slow );
1835%}
1836
1837// Replicate byte scalar to be vector
1838instruct Repl4B(vecS dst, rRegI src) %{
1839  predicate(n->as_Vector()->length() == 4);
1840  match(Set dst (ReplicateB src));
1841  format %{ "movd    $dst,$src\n\t"
1842            "punpcklbw $dst,$dst\n\t"
1843            "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1844  ins_encode %{
1845    __ movdl($dst$$XMMRegister, $src$$Register);
1846    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1847    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1848  %}
1849  ins_pipe( pipe_slow );
1850%}
1851
1852instruct Repl8B(vecD dst, rRegI src) %{
1853  predicate(n->as_Vector()->length() == 8);
1854  match(Set dst (ReplicateB src));
1855  format %{ "movd    $dst,$src\n\t"
1856            "punpcklbw $dst,$dst\n\t"
1857            "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1858  ins_encode %{
1859    __ movdl($dst$$XMMRegister, $src$$Register);
1860    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1861    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1862  %}
1863  ins_pipe( pipe_slow );
1864%}
1865
1866instruct Repl16B(vecX dst, rRegI src) %{
1867  predicate(n->as_Vector()->length() == 16);
1868  match(Set dst (ReplicateB src));
1869  format %{ "movd    $dst,$src\n\t"
1870            "punpcklbw $dst,$dst\n\t"
1871            "pshuflw $dst,$dst,0x00\n\t"
1872            "punpcklqdq $dst,$dst\t! replicate16B" %}
1873  ins_encode %{
1874    __ movdl($dst$$XMMRegister, $src$$Register);
1875    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1876    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1877    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1878  %}
1879  ins_pipe( pipe_slow );
1880%}
1881
1882instruct Repl32B(vecY dst, rRegI src) %{
1883  predicate(n->as_Vector()->length() == 32);
1884  match(Set dst (ReplicateB src));
1885  format %{ "movd    $dst,$src\n\t"
1886            "punpcklbw $dst,$dst\n\t"
1887            "pshuflw $dst,$dst,0x00\n\t"
1888            "punpcklqdq $dst,$dst\n\t"
1889            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1890  ins_encode %{
1891    __ movdl($dst$$XMMRegister, $src$$Register);
1892    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1893    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1894    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1895    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1896  %}
1897  ins_pipe( pipe_slow );
1898%}
1899
1900// Replicate byte scalar immediate to be vector by loading from const table.
1901instruct Repl4B_imm(vecS dst, immI con) %{
1902  predicate(n->as_Vector()->length() == 4);
1903  match(Set dst (ReplicateB con));
1904  format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
1905  ins_encode %{
1906    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1907  %}
1908  ins_pipe( pipe_slow );
1909%}
1910
1911instruct Repl8B_imm(vecD dst, immI con) %{
1912  predicate(n->as_Vector()->length() == 8);
1913  match(Set dst (ReplicateB con));
1914  format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
1915  ins_encode %{
1916    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1917  %}
1918  ins_pipe( pipe_slow );
1919%}
1920
1921instruct Repl16B_imm(vecX dst, immI con) %{
1922  predicate(n->as_Vector()->length() == 16);
1923  match(Set dst (ReplicateB con));
1924  format %{ "movq    $dst,[$constantaddress]\n\t"
1925            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
1926  ins_encode %{
1927    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1928    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1929  %}
1930  ins_pipe( pipe_slow );
1931%}
1932
1933instruct Repl32B_imm(vecY dst, immI con) %{
1934  predicate(n->as_Vector()->length() == 32);
1935  match(Set dst (ReplicateB con));
1936  format %{ "movq    $dst,[$constantaddress]\n\t"
1937            "punpcklqdq $dst,$dst\n\t"
1938            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1939  ins_encode %{
1940    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1941    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1942    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1943  %}
1944  ins_pipe( pipe_slow );
1945%}
1946
1947// Replicate byte scalar zero to be vector
1948instruct Repl4B_zero(vecS dst, immI0 zero) %{
1949  predicate(n->as_Vector()->length() == 4);
1950  match(Set dst (ReplicateB zero));
1951  format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
1952  ins_encode %{
1953    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1954  %}
1955  ins_pipe( fpu_reg_reg );
1956%}
1957
1958instruct Repl8B_zero(vecD dst, immI0 zero) %{
1959  predicate(n->as_Vector()->length() == 8);
1960  match(Set dst (ReplicateB zero));
1961  format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
1962  ins_encode %{
1963    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1964  %}
1965  ins_pipe( fpu_reg_reg );
1966%}
1967
1968instruct Repl16B_zero(vecX dst, immI0 zero) %{
1969  predicate(n->as_Vector()->length() == 16);
1970  match(Set dst (ReplicateB zero));
1971  format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
1972  ins_encode %{
1973    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1974  %}
1975  ins_pipe( fpu_reg_reg );
1976%}
1977
1978instruct Repl32B_zero(vecY dst, immI0 zero) %{
1979  predicate(n->as_Vector()->length() == 32);
1980  match(Set dst (ReplicateB zero));
1981  format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
1982  ins_encode %{
1983    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1984    bool vector256 = true;
1985    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1986  %}
1987  ins_pipe( fpu_reg_reg );
1988%}
1989
1990// Replicate char/short (2 byte) scalar to be vector
1991instruct Repl2S(vecS dst, rRegI src) %{
1992  predicate(n->as_Vector()->length() == 2);
1993  match(Set dst (ReplicateS src));
1994  format %{ "movd    $dst,$src\n\t"
1995            "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1996  ins_encode %{
1997    __ movdl($dst$$XMMRegister, $src$$Register);
1998    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1999  %}
2000  ins_pipe( fpu_reg_reg );
2001%}
2002
2003instruct Repl4S(vecD dst, rRegI src) %{
2004  predicate(n->as_Vector()->length() == 4);
2005  match(Set dst (ReplicateS src));
2006  format %{ "movd    $dst,$src\n\t"
2007            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
2008  ins_encode %{
2009    __ movdl($dst$$XMMRegister, $src$$Register);
2010    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2011  %}
2012  ins_pipe( fpu_reg_reg );
2013%}
2014
2015instruct Repl8S(vecX dst, rRegI src) %{
2016  predicate(n->as_Vector()->length() == 8);
2017  match(Set dst (ReplicateS src));
2018  format %{ "movd    $dst,$src\n\t"
2019            "pshuflw $dst,$dst,0x00\n\t"
2020            "punpcklqdq $dst,$dst\t! replicate8S" %}
2021  ins_encode %{
2022    __ movdl($dst$$XMMRegister, $src$$Register);
2023    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2024    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2025  %}
2026  ins_pipe( pipe_slow );
2027%}
2028
2029instruct Repl16S(vecY dst, rRegI src) %{
2030  predicate(n->as_Vector()->length() == 16);
2031  match(Set dst (ReplicateS src));
2032  format %{ "movd    $dst,$src\n\t"
2033            "pshuflw $dst,$dst,0x00\n\t"
2034            "punpcklqdq $dst,$dst\n\t"
2035            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
2036  ins_encode %{
2037    __ movdl($dst$$XMMRegister, $src$$Register);
2038    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2039    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2040    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2041  %}
2042  ins_pipe( pipe_slow );
2043%}
2044
2045// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
2046instruct Repl2S_imm(vecS dst, immI con) %{
2047  predicate(n->as_Vector()->length() == 2);
2048  match(Set dst (ReplicateS con));
2049  format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
2050  ins_encode %{
2051    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
2052  %}
2053  ins_pipe( fpu_reg_reg );
2054%}
2055
2056instruct Repl4S_imm(vecD dst, immI con) %{
2057  predicate(n->as_Vector()->length() == 4);
2058  match(Set dst (ReplicateS con));
2059  format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
2060  ins_encode %{
2061    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2062  %}
2063  ins_pipe( fpu_reg_reg );
2064%}
2065
2066instruct Repl8S_imm(vecX dst, immI con) %{
2067  predicate(n->as_Vector()->length() == 8);
2068  match(Set dst (ReplicateS con));
2069  format %{ "movq    $dst,[$constantaddress]\n\t"
2070            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
2071  ins_encode %{
2072    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2073    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2074  %}
2075  ins_pipe( pipe_slow );
2076%}
2077
2078instruct Repl16S_imm(vecY dst, immI con) %{
2079  predicate(n->as_Vector()->length() == 16);
2080  match(Set dst (ReplicateS con));
2081  format %{ "movq    $dst,[$constantaddress]\n\t"
2082            "punpcklqdq $dst,$dst\n\t"
2083            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
2084  ins_encode %{
2085    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2086    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2087    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2088  %}
2089  ins_pipe( pipe_slow );
2090%}
2091
2092// Replicate char/short (2 byte) scalar zero to be vector
2093instruct Repl2S_zero(vecS dst, immI0 zero) %{
2094  predicate(n->as_Vector()->length() == 2);
2095  match(Set dst (ReplicateS zero));
2096  format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
2097  ins_encode %{
2098    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2099  %}
2100  ins_pipe( fpu_reg_reg );
2101%}
2102
2103instruct Repl4S_zero(vecD dst, immI0 zero) %{
2104  predicate(n->as_Vector()->length() == 4);
2105  match(Set dst (ReplicateS zero));
2106  format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
2107  ins_encode %{
2108    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2109  %}
2110  ins_pipe( fpu_reg_reg );
2111%}
2112
2113instruct Repl8S_zero(vecX dst, immI0 zero) %{
2114  predicate(n->as_Vector()->length() == 8);
2115  match(Set dst (ReplicateS zero));
2116  format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
2117  ins_encode %{
2118    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2119  %}
2120  ins_pipe( fpu_reg_reg );
2121%}
2122
2123instruct Repl16S_zero(vecY dst, immI0 zero) %{
2124  predicate(n->as_Vector()->length() == 16);
2125  match(Set dst (ReplicateS zero));
2126  format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
2127  ins_encode %{
2128    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2129    bool vector256 = true;
2130    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2131  %}
2132  ins_pipe( fpu_reg_reg );
2133%}
2134
2135// Replicate integer (4 byte) scalar to be vector
2136instruct Repl2I(vecD dst, rRegI src) %{
2137  predicate(n->as_Vector()->length() == 2);
2138  match(Set dst (ReplicateI src));
2139  format %{ "movd    $dst,$src\n\t"
2140            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2141  ins_encode %{
2142    __ movdl($dst$$XMMRegister, $src$$Register);
2143    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2144  %}
2145  ins_pipe( fpu_reg_reg );
2146%}
2147
2148instruct Repl4I(vecX dst, rRegI src) %{
2149  predicate(n->as_Vector()->length() == 4);
2150  match(Set dst (ReplicateI src));
2151  format %{ "movd    $dst,$src\n\t"
2152            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2153  ins_encode %{
2154    __ movdl($dst$$XMMRegister, $src$$Register);
2155    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2156  %}
2157  ins_pipe( pipe_slow );
2158%}
2159
2160instruct Repl8I(vecY dst, rRegI src) %{
2161  predicate(n->as_Vector()->length() == 8);
2162  match(Set dst (ReplicateI src));
2163  format %{ "movd    $dst,$src\n\t"
2164            "pshufd  $dst,$dst,0x00\n\t"
2165            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2166  ins_encode %{
2167    __ movdl($dst$$XMMRegister, $src$$Register);
2168    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2169    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2170  %}
2171  ins_pipe( pipe_slow );
2172%}
2173
2174// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2175instruct Repl2I_imm(vecD dst, immI con) %{
2176  predicate(n->as_Vector()->length() == 2);
2177  match(Set dst (ReplicateI con));
2178  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
2179  ins_encode %{
2180    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2181  %}
2182  ins_pipe( fpu_reg_reg );
2183%}
2184
2185instruct Repl4I_imm(vecX dst, immI con) %{
2186  predicate(n->as_Vector()->length() == 4);
2187  match(Set dst (ReplicateI con));
2188  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2189            "punpcklqdq $dst,$dst" %}
2190  ins_encode %{
2191    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2192    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2193  %}
2194  ins_pipe( pipe_slow );
2195%}
2196
2197instruct Repl8I_imm(vecY dst, immI con) %{
2198  predicate(n->as_Vector()->length() == 8);
2199  match(Set dst (ReplicateI con));
2200  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2201            "punpcklqdq $dst,$dst\n\t"
2202            "vinserti128h $dst,$dst,$dst" %}
2203  ins_encode %{
2204    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2205    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2206    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2207  %}
2208  ins_pipe( pipe_slow );
2209%}
2210
2211// Integer could be loaded into xmm register directly from memory.
2212instruct Repl2I_mem(vecD dst, memory mem) %{
2213  predicate(n->as_Vector()->length() == 2);
2214  match(Set dst (ReplicateI (LoadI mem)));
2215  format %{ "movd    $dst,$mem\n\t"
2216            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2217  ins_encode %{
2218    __ movdl($dst$$XMMRegister, $mem$$Address);
2219    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2220  %}
2221  ins_pipe( fpu_reg_reg );
2222%}
2223
2224instruct Repl4I_mem(vecX dst, memory mem) %{
2225  predicate(n->as_Vector()->length() == 4);
2226  match(Set dst (ReplicateI (LoadI mem)));
2227  format %{ "movd    $dst,$mem\n\t"
2228            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2229  ins_encode %{
2230    __ movdl($dst$$XMMRegister, $mem$$Address);
2231    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2232  %}
2233  ins_pipe( pipe_slow );
2234%}
2235
2236instruct Repl8I_mem(vecY dst, memory mem) %{
2237  predicate(n->as_Vector()->length() == 8);
2238  match(Set dst (ReplicateI (LoadI mem)));
2239  format %{ "movd    $dst,$mem\n\t"
2240            "pshufd  $dst,$dst,0x00\n\t"
2241            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2242  ins_encode %{
2243    __ movdl($dst$$XMMRegister, $mem$$Address);
2244    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2245    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2246  %}
2247  ins_pipe( pipe_slow );
2248%}
2249
2250// Replicate integer (4 byte) scalar zero to be vector
2251instruct Repl2I_zero(vecD dst, immI0 zero) %{
2252  predicate(n->as_Vector()->length() == 2);
2253  match(Set dst (ReplicateI zero));
2254  format %{ "pxor    $dst,$dst\t! replicate2I" %}
2255  ins_encode %{
2256    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2257  %}
2258  ins_pipe( fpu_reg_reg );
2259%}
2260
2261instruct Repl4I_zero(vecX dst, immI0 zero) %{
2262  predicate(n->as_Vector()->length() == 4);
2263  match(Set dst (ReplicateI zero));
2264  format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
2265  ins_encode %{
2266    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2267  %}
2268  ins_pipe( fpu_reg_reg );
2269%}
2270
2271instruct Repl8I_zero(vecY dst, immI0 zero) %{
2272  predicate(n->as_Vector()->length() == 8);
2273  match(Set dst (ReplicateI zero));
2274  format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
2275  ins_encode %{
2276    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2277    bool vector256 = true;
2278    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2279  %}
2280  ins_pipe( fpu_reg_reg );
2281%}
2282
2283// Replicate long (8 byte) scalar to be vector
2284#ifdef _LP64
2285instruct Repl2L(vecX dst, rRegL src) %{
2286  predicate(n->as_Vector()->length() == 2);
2287  match(Set dst (ReplicateL src));
2288  format %{ "movdq   $dst,$src\n\t"
2289            "punpcklqdq $dst,$dst\t! replicate2L" %}
2290  ins_encode %{
2291    __ movdq($dst$$XMMRegister, $src$$Register);
2292    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2293  %}
2294  ins_pipe( pipe_slow );
2295%}
2296
2297instruct Repl4L(vecY dst, rRegL src) %{
2298  predicate(n->as_Vector()->length() == 4);
2299  match(Set dst (ReplicateL src));
2300  format %{ "movdq   $dst,$src\n\t"
2301            "punpcklqdq $dst,$dst\n\t"
2302            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2303  ins_encode %{
2304    __ movdq($dst$$XMMRegister, $src$$Register);
2305    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2306    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2307  %}
2308  ins_pipe( pipe_slow );
2309%}
2310#else // _LP64
2311instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2312  predicate(n->as_Vector()->length() == 2);
2313  match(Set dst (ReplicateL src));
2314  effect(TEMP dst, USE src, TEMP tmp);
2315  format %{ "movdl   $dst,$src.lo\n\t"
2316            "movdl   $tmp,$src.hi\n\t"
2317            "punpckldq $dst,$tmp\n\t"
2318            "punpcklqdq $dst,$dst\t! replicate2L"%}
2319  ins_encode %{
2320    __ movdl($dst$$XMMRegister, $src$$Register);
2321    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2322    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2323    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2324  %}
2325  ins_pipe( pipe_slow );
2326%}
2327
2328instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2329  predicate(n->as_Vector()->length() == 4);
2330  match(Set dst (ReplicateL src));
2331  effect(TEMP dst, USE src, TEMP tmp);
2332  format %{ "movdl   $dst,$src.lo\n\t"
2333            "movdl   $tmp,$src.hi\n\t"
2334            "punpckldq $dst,$tmp\n\t"
2335            "punpcklqdq $dst,$dst\n\t"
2336            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2337  ins_encode %{
2338    __ movdl($dst$$XMMRegister, $src$$Register);
2339    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2340    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2341    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2342    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2343  %}
2344  ins_pipe( pipe_slow );
2345%}
2346#endif // _LP64
2347
2348// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2349instruct Repl2L_imm(vecX dst, immL con) %{
2350  predicate(n->as_Vector()->length() == 2);
2351  match(Set dst (ReplicateL con));
2352  format %{ "movq    $dst,[$constantaddress]\n\t"
2353            "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2354  ins_encode %{
2355    __ movq($dst$$XMMRegister, $constantaddress($con));
2356    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2357  %}
2358  ins_pipe( pipe_slow );
2359%}
2360
2361instruct Repl4L_imm(vecY dst, immL con) %{
2362  predicate(n->as_Vector()->length() == 4);
2363  match(Set dst (ReplicateL con));
2364  format %{ "movq    $dst,[$constantaddress]\n\t"
2365            "punpcklqdq $dst,$dst\n\t"
2366            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2367  ins_encode %{
2368    __ movq($dst$$XMMRegister, $constantaddress($con));
2369    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2370    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2371  %}
2372  ins_pipe( pipe_slow );
2373%}
2374
2375// Long could be loaded into xmm register directly from memory.
2376instruct Repl2L_mem(vecX dst, memory mem) %{
2377  predicate(n->as_Vector()->length() == 2);
2378  match(Set dst (ReplicateL (LoadL mem)));
2379  format %{ "movq    $dst,$mem\n\t"
2380            "punpcklqdq $dst,$dst\t! replicate2L" %}
2381  ins_encode %{
2382    __ movq($dst$$XMMRegister, $mem$$Address);
2383    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2384  %}
2385  ins_pipe( pipe_slow );
2386%}
2387
2388instruct Repl4L_mem(vecY dst, memory mem) %{
2389  predicate(n->as_Vector()->length() == 4);
2390  match(Set dst (ReplicateL (LoadL mem)));
2391  format %{ "movq    $dst,$mem\n\t"
2392            "punpcklqdq $dst,$dst\n\t"
2393            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2394  ins_encode %{
2395    __ movq($dst$$XMMRegister, $mem$$Address);
2396    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2397    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2398  %}
2399  ins_pipe( pipe_slow );
2400%}
2401
2402// Replicate long (8 byte) scalar zero to be vector
2403instruct Repl2L_zero(vecX dst, immL0 zero) %{
2404  predicate(n->as_Vector()->length() == 2);
2405  match(Set dst (ReplicateL zero));
2406  format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
2407  ins_encode %{
2408    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2409  %}
2410  ins_pipe( fpu_reg_reg );
2411%}
2412
2413instruct Repl4L_zero(vecY dst, immL0 zero) %{
2414  predicate(n->as_Vector()->length() == 4);
2415  match(Set dst (ReplicateL zero));
2416  format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
2417  ins_encode %{
2418    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2419    bool vector256 = true;
2420    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2421  %}
2422  ins_pipe( fpu_reg_reg );
2423%}
2424
2425// Replicate float (4 byte) scalar to be vector
2426instruct Repl2F(vecD dst, regF src) %{
2427  predicate(n->as_Vector()->length() == 2);
2428  match(Set dst (ReplicateF src));
2429  format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
2430  ins_encode %{
2431    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2432  %}
2433  ins_pipe( fpu_reg_reg );
2434%}
2435
2436instruct Repl4F(vecX dst, regF src) %{
2437  predicate(n->as_Vector()->length() == 4);
2438  match(Set dst (ReplicateF src));
2439  format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
2440  ins_encode %{
2441    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2442  %}
2443  ins_pipe( pipe_slow );
2444%}
2445
2446instruct Repl8F(vecY dst, regF src) %{
2447  predicate(n->as_Vector()->length() == 8);
2448  match(Set dst (ReplicateF src));
2449  format %{ "pshufd  $dst,$src,0x00\n\t"
2450            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2451  ins_encode %{
2452    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2453    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2454  %}
2455  ins_pipe( pipe_slow );
2456%}
2457
2458// Replicate float (4 byte) scalar zero to be vector
2459instruct Repl2F_zero(vecD dst, immF0 zero) %{
2460  predicate(n->as_Vector()->length() == 2);
2461  match(Set dst (ReplicateF zero));
2462  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
2463  ins_encode %{
2464    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2465  %}
2466  ins_pipe( fpu_reg_reg );
2467%}
2468
2469instruct Repl4F_zero(vecX dst, immF0 zero) %{
2470  predicate(n->as_Vector()->length() == 4);
2471  match(Set dst (ReplicateF zero));
2472  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
2473  ins_encode %{
2474    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2475  %}
2476  ins_pipe( fpu_reg_reg );
2477%}
2478
2479instruct Repl8F_zero(vecY dst, immF0 zero) %{
2480  predicate(n->as_Vector()->length() == 8);
2481  match(Set dst (ReplicateF zero));
2482  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
2483  ins_encode %{
2484    bool vector256 = true;
2485    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2486  %}
2487  ins_pipe( fpu_reg_reg );
2488%}
2489
2490// Replicate double (8 bytes) scalar to be vector
2491instruct Repl2D(vecX dst, regD src) %{
2492  predicate(n->as_Vector()->length() == 2);
2493  match(Set dst (ReplicateD src));
2494  format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
2495  ins_encode %{
2496    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2497  %}
2498  ins_pipe( pipe_slow );
2499%}
2500
2501instruct Repl4D(vecY dst, regD src) %{
2502  predicate(n->as_Vector()->length() == 4);
2503  match(Set dst (ReplicateD src));
2504  format %{ "pshufd  $dst,$src,0x44\n\t"
2505            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2506  ins_encode %{
2507    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2508    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2509  %}
2510  ins_pipe( pipe_slow );
2511%}
2512
2513// Replicate double (8 byte) scalar zero to be vector
2514instruct Repl2D_zero(vecX dst, immD0 zero) %{
2515  predicate(n->as_Vector()->length() == 2);
2516  match(Set dst (ReplicateD zero));
2517  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
2518  ins_encode %{
2519    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2520  %}
2521  ins_pipe( fpu_reg_reg );
2522%}
2523
2524instruct Repl4D_zero(vecY dst, immD0 zero) %{
2525  predicate(n->as_Vector()->length() == 4);
2526  match(Set dst (ReplicateD zero));
2527  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2528  ins_encode %{
2529    bool vector256 = true;
2530    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2531  %}
2532  ins_pipe( fpu_reg_reg );
2533%}
2534
2535// ====================VECTOR ARITHMETIC=======================================
2536
2537// --------------------------------- ADD --------------------------------------
2538
2539// Bytes vector add
2540instruct vadd4B(vecS dst, vecS src) %{
2541  predicate(n->as_Vector()->length() == 4);
2542  match(Set dst (AddVB dst src));
2543  format %{ "paddb   $dst,$src\t! add packed4B" %}
2544  ins_encode %{
2545    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2546  %}
2547  ins_pipe( pipe_slow );
2548%}
2549
2550instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2551  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2552  match(Set dst (AddVB src1 src2));
2553  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
2554  ins_encode %{
2555    bool vector256 = false;
2556    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2557  %}
2558  ins_pipe( pipe_slow );
2559%}
2560
2561instruct vadd8B(vecD dst, vecD src) %{
2562  predicate(n->as_Vector()->length() == 8);
2563  match(Set dst (AddVB dst src));
2564  format %{ "paddb   $dst,$src\t! add packed8B" %}
2565  ins_encode %{
2566    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2567  %}
2568  ins_pipe( pipe_slow );
2569%}
2570
2571instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2572  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2573  match(Set dst (AddVB src1 src2));
2574  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
2575  ins_encode %{
2576    bool vector256 = false;
2577    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2578  %}
2579  ins_pipe( pipe_slow );
2580%}
2581
2582instruct vadd16B(vecX dst, vecX src) %{
2583  predicate(n->as_Vector()->length() == 16);
2584  match(Set dst (AddVB dst src));
2585  format %{ "paddb   $dst,$src\t! add packed16B" %}
2586  ins_encode %{
2587    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2588  %}
2589  ins_pipe( pipe_slow );
2590%}
2591
2592instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2593  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2594  match(Set dst (AddVB src1 src2));
2595  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
2596  ins_encode %{
2597    bool vector256 = false;
2598    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2599  %}
2600  ins_pipe( pipe_slow );
2601%}
2602
2603instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2604  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2605  match(Set dst (AddVB src (LoadVector mem)));
2606  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
2607  ins_encode %{
2608    bool vector256 = false;
2609    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2610  %}
2611  ins_pipe( pipe_slow );
2612%}
2613
2614instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2615  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2616  match(Set dst (AddVB src1 src2));
2617  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
2618  ins_encode %{
2619    bool vector256 = true;
2620    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2621  %}
2622  ins_pipe( pipe_slow );
2623%}
2624
2625instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2626  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2627  match(Set dst (AddVB src (LoadVector mem)));
2628  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
2629  ins_encode %{
2630    bool vector256 = true;
2631    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2632  %}
2633  ins_pipe( pipe_slow );
2634%}
2635
2636// Shorts/Chars vector add
2637instruct vadd2S(vecS dst, vecS src) %{
2638  predicate(n->as_Vector()->length() == 2);
2639  match(Set dst (AddVS dst src));
2640  format %{ "paddw   $dst,$src\t! add packed2S" %}
2641  ins_encode %{
2642    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2643  %}
2644  ins_pipe( pipe_slow );
2645%}
2646
2647instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2648  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2649  match(Set dst (AddVS src1 src2));
2650  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
2651  ins_encode %{
2652    bool vector256 = false;
2653    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2654  %}
2655  ins_pipe( pipe_slow );
2656%}
2657
2658instruct vadd4S(vecD dst, vecD src) %{
2659  predicate(n->as_Vector()->length() == 4);
2660  match(Set dst (AddVS dst src));
2661  format %{ "paddw   $dst,$src\t! add packed4S" %}
2662  ins_encode %{
2663    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2664  %}
2665  ins_pipe( pipe_slow );
2666%}
2667
2668instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2669  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2670  match(Set dst (AddVS src1 src2));
2671  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
2672  ins_encode %{
2673    bool vector256 = false;
2674    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2675  %}
2676  ins_pipe( pipe_slow );
2677%}
2678
2679instruct vadd8S(vecX dst, vecX src) %{
2680  predicate(n->as_Vector()->length() == 8);
2681  match(Set dst (AddVS dst src));
2682  format %{ "paddw   $dst,$src\t! add packed8S" %}
2683  ins_encode %{
2684    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2685  %}
2686  ins_pipe( pipe_slow );
2687%}
2688
2689instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2690  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2691  match(Set dst (AddVS src1 src2));
2692  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
2693  ins_encode %{
2694    bool vector256 = false;
2695    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2696  %}
2697  ins_pipe( pipe_slow );
2698%}
2699
2700instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2701  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2702  match(Set dst (AddVS src (LoadVector mem)));
2703  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
2704  ins_encode %{
2705    bool vector256 = false;
2706    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2707  %}
2708  ins_pipe( pipe_slow );
2709%}
2710
2711instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2712  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2713  match(Set dst (AddVS src1 src2));
2714  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
2715  ins_encode %{
2716    bool vector256 = true;
2717    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2718  %}
2719  ins_pipe( pipe_slow );
2720%}
2721
2722instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2723  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2724  match(Set dst (AddVS src (LoadVector mem)));
2725  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
2726  ins_encode %{
2727    bool vector256 = true;
2728    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2729  %}
2730  ins_pipe( pipe_slow );
2731%}
2732
2733// Integers vector add
2734instruct vadd2I(vecD dst, vecD src) %{
2735  predicate(n->as_Vector()->length() == 2);
2736  match(Set dst (AddVI dst src));
2737  format %{ "paddd   $dst,$src\t! add packed2I" %}
2738  ins_encode %{
2739    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2740  %}
2741  ins_pipe( pipe_slow );
2742%}
2743
2744instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2745  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2746  match(Set dst (AddVI src1 src2));
2747  format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
2748  ins_encode %{
2749    bool vector256 = false;
2750    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2751  %}
2752  ins_pipe( pipe_slow );
2753%}
2754
2755instruct vadd4I(vecX dst, vecX src) %{
2756  predicate(n->as_Vector()->length() == 4);
2757  match(Set dst (AddVI dst src));
2758  format %{ "paddd   $dst,$src\t! add packed4I" %}
2759  ins_encode %{
2760    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2761  %}
2762  ins_pipe( pipe_slow );
2763%}
2764
2765instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2766  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2767  match(Set dst (AddVI src1 src2));
2768  format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
2769  ins_encode %{
2770    bool vector256 = false;
2771    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2772  %}
2773  ins_pipe( pipe_slow );
2774%}
2775
2776instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2777  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2778  match(Set dst (AddVI src (LoadVector mem)));
2779  format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
2780  ins_encode %{
2781    bool vector256 = false;
2782    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2783  %}
2784  ins_pipe( pipe_slow );
2785%}
2786
2787instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2788  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2789  match(Set dst (AddVI src1 src2));
2790  format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
2791  ins_encode %{
2792    bool vector256 = true;
2793    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2794  %}
2795  ins_pipe( pipe_slow );
2796%}
2797
2798instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2799  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2800  match(Set dst (AddVI src (LoadVector mem)));
2801  format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
2802  ins_encode %{
2803    bool vector256 = true;
2804    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2805  %}
2806  ins_pipe( pipe_slow );
2807%}
2808
2809// Longs vector add
2810instruct vadd2L(vecX dst, vecX src) %{
2811  predicate(n->as_Vector()->length() == 2);
2812  match(Set dst (AddVL dst src));
2813  format %{ "paddq   $dst,$src\t! add packed2L" %}
2814  ins_encode %{
2815    __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2816  %}
2817  ins_pipe( pipe_slow );
2818%}
2819
2820instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2821  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2822  match(Set dst (AddVL src1 src2));
2823  format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
2824  ins_encode %{
2825    bool vector256 = false;
2826    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2827  %}
2828  ins_pipe( pipe_slow );
2829%}
2830
2831instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2832  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2833  match(Set dst (AddVL src (LoadVector mem)));
2834  format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
2835  ins_encode %{
2836    bool vector256 = false;
2837    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2838  %}
2839  ins_pipe( pipe_slow );
2840%}
2841
2842instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2843  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2844  match(Set dst (AddVL src1 src2));
2845  format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
2846  ins_encode %{
2847    bool vector256 = true;
2848    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2849  %}
2850  ins_pipe( pipe_slow );
2851%}
2852
2853instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2854  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2855  match(Set dst (AddVL src (LoadVector mem)));
2856  format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
2857  ins_encode %{
2858    bool vector256 = true;
2859    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2860  %}
2861  ins_pipe( pipe_slow );
2862%}
2863
2864// Floats vector add
2865instruct vadd2F(vecD dst, vecD src) %{
2866  predicate(n->as_Vector()->length() == 2);
2867  match(Set dst (AddVF dst src));
2868  format %{ "addps   $dst,$src\t! add packed2F" %}
2869  ins_encode %{
2870    __ addps($dst$$XMMRegister, $src$$XMMRegister);
2871  %}
2872  ins_pipe( pipe_slow );
2873%}
2874
2875instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2876  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2877  match(Set dst (AddVF src1 src2));
2878  format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
2879  ins_encode %{
2880    bool vector256 = false;
2881    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2882  %}
2883  ins_pipe( pipe_slow );
2884%}
2885
2886instruct vadd4F(vecX dst, vecX src) %{
2887  predicate(n->as_Vector()->length() == 4);
2888  match(Set dst (AddVF dst src));
2889  format %{ "addps   $dst,$src\t! add packed4F" %}
2890  ins_encode %{
2891    __ addps($dst$$XMMRegister, $src$$XMMRegister);
2892  %}
2893  ins_pipe( pipe_slow );
2894%}
2895
2896instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2897  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2898  match(Set dst (AddVF src1 src2));
2899  format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
2900  ins_encode %{
2901    bool vector256 = false;
2902    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2903  %}
2904  ins_pipe( pipe_slow );
2905%}
2906
2907instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2908  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2909  match(Set dst (AddVF src (LoadVector mem)));
2910  format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
2911  ins_encode %{
2912    bool vector256 = false;
2913    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2914  %}
2915  ins_pipe( pipe_slow );
2916%}
2917
2918instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2919  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2920  match(Set dst (AddVF src1 src2));
2921  format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
2922  ins_encode %{
2923    bool vector256 = true;
2924    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2925  %}
2926  ins_pipe( pipe_slow );
2927%}
2928
2929instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2930  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2931  match(Set dst (AddVF src (LoadVector mem)));
2932  format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
2933  ins_encode %{
2934    bool vector256 = true;
2935    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2936  %}
2937  ins_pipe( pipe_slow );
2938%}
2939
2940// Doubles vector add
2941instruct vadd2D(vecX dst, vecX src) %{
2942  predicate(n->as_Vector()->length() == 2);
2943  match(Set dst (AddVD dst src));
2944  format %{ "addpd   $dst,$src\t! add packed2D" %}
2945  ins_encode %{
2946    __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2947  %}
2948  ins_pipe( pipe_slow );
2949%}
2950
2951instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2952  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2953  match(Set dst (AddVD src1 src2));
2954  format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
2955  ins_encode %{
2956    bool vector256 = false;
2957    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2958  %}
2959  ins_pipe( pipe_slow );
2960%}
2961
2962instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2963  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2964  match(Set dst (AddVD src (LoadVector mem)));
2965  format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
2966  ins_encode %{
2967    bool vector256 = false;
2968    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2969  %}
2970  ins_pipe( pipe_slow );
2971%}
2972
2973instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2974  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2975  match(Set dst (AddVD src1 src2));
2976  format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
2977  ins_encode %{
2978    bool vector256 = true;
2979    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2980  %}
2981  ins_pipe( pipe_slow );
2982%}
2983
2984instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2985  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2986  match(Set dst (AddVD src (LoadVector mem)));
2987  format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
2988  ins_encode %{
2989    bool vector256 = true;
2990    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2991  %}
2992  ins_pipe( pipe_slow );
2993%}
2994
2995// --------------------------------- SUB --------------------------------------
2996
2997// Bytes vector sub
2998instruct vsub4B(vecS dst, vecS src) %{
2999  predicate(n->as_Vector()->length() == 4);
3000  match(Set dst (SubVB dst src));
3001  format %{ "psubb   $dst,$src\t! sub packed4B" %}
3002  ins_encode %{
3003    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3004  %}
3005  ins_pipe( pipe_slow );
3006%}
3007
3008instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
3009  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3010  match(Set dst (SubVB src1 src2));
3011  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
3012  ins_encode %{
3013    bool vector256 = false;
3014    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3015  %}
3016  ins_pipe( pipe_slow );
3017%}
3018
3019instruct vsub8B(vecD dst, vecD src) %{
3020  predicate(n->as_Vector()->length() == 8);
3021  match(Set dst (SubVB dst src));
3022  format %{ "psubb   $dst,$src\t! sub packed8B" %}
3023  ins_encode %{
3024    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3025  %}
3026  ins_pipe( pipe_slow );
3027%}
3028
3029instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
3030  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3031  match(Set dst (SubVB src1 src2));
3032  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
3033  ins_encode %{
3034    bool vector256 = false;
3035    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3036  %}
3037  ins_pipe( pipe_slow );
3038%}
3039
3040instruct vsub16B(vecX dst, vecX src) %{
3041  predicate(n->as_Vector()->length() == 16);
3042  match(Set dst (SubVB dst src));
3043  format %{ "psubb   $dst,$src\t! sub packed16B" %}
3044  ins_encode %{
3045    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3046  %}
3047  ins_pipe( pipe_slow );
3048%}
3049
3050instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
3051  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3052  match(Set dst (SubVB src1 src2));
3053  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
3054  ins_encode %{
3055    bool vector256 = false;
3056    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3057  %}
3058  ins_pipe( pipe_slow );
3059%}
3060
3061instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
3062  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3063  match(Set dst (SubVB src (LoadVector mem)));
3064  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
3065  ins_encode %{
3066    bool vector256 = false;
3067    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3068  %}
3069  ins_pipe( pipe_slow );
3070%}
3071
3072instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
3073  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3074  match(Set dst (SubVB src1 src2));
3075  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
3076  ins_encode %{
3077    bool vector256 = true;
3078    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3079  %}
3080  ins_pipe( pipe_slow );
3081%}
3082
3083instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
3084  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3085  match(Set dst (SubVB src (LoadVector mem)));
3086  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
3087  ins_encode %{
3088    bool vector256 = true;
3089    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3090  %}
3091  ins_pipe( pipe_slow );
3092%}
3093
3094// Shorts/Chars vector sub
3095instruct vsub2S(vecS dst, vecS src) %{
3096  predicate(n->as_Vector()->length() == 2);
3097  match(Set dst (SubVS dst src));
3098  format %{ "psubw   $dst,$src\t! sub packed2S" %}
3099  ins_encode %{
3100    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3101  %}
3102  ins_pipe( pipe_slow );
3103%}
3104
3105instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
3106  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3107  match(Set dst (SubVS src1 src2));
3108  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
3109  ins_encode %{
3110    bool vector256 = false;
3111    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3112  %}
3113  ins_pipe( pipe_slow );
3114%}
3115
3116instruct vsub4S(vecD dst, vecD src) %{
3117  predicate(n->as_Vector()->length() == 4);
3118  match(Set dst (SubVS dst src));
3119  format %{ "psubw   $dst,$src\t! sub packed4S" %}
3120  ins_encode %{
3121    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3122  %}
3123  ins_pipe( pipe_slow );
3124%}
3125
3126instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3127  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3128  match(Set dst (SubVS src1 src2));
3129  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
3130  ins_encode %{
3131    bool vector256 = false;
3132    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3133  %}
3134  ins_pipe( pipe_slow );
3135%}
3136
3137instruct vsub8S(vecX dst, vecX src) %{
3138  predicate(n->as_Vector()->length() == 8);
3139  match(Set dst (SubVS dst src));
3140  format %{ "psubw   $dst,$src\t! sub packed8S" %}
3141  ins_encode %{
3142    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3143  %}
3144  ins_pipe( pipe_slow );
3145%}
3146
3147instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3148  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3149  match(Set dst (SubVS src1 src2));
3150  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
3151  ins_encode %{
3152    bool vector256 = false;
3153    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3154  %}
3155  ins_pipe( pipe_slow );
3156%}
3157
3158instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3159  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3160  match(Set dst (SubVS src (LoadVector mem)));
3161  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
3162  ins_encode %{
3163    bool vector256 = false;
3164    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3165  %}
3166  ins_pipe( pipe_slow );
3167%}
3168
3169instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3170  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3171  match(Set dst (SubVS src1 src2));
3172  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
3173  ins_encode %{
3174    bool vector256 = true;
3175    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3176  %}
3177  ins_pipe( pipe_slow );
3178%}
3179
3180instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3181  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3182  match(Set dst (SubVS src (LoadVector mem)));
3183  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
3184  ins_encode %{
3185    bool vector256 = true;
3186    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3187  %}
3188  ins_pipe( pipe_slow );
3189%}
3190
3191// Integers vector sub
3192instruct vsub2I(vecD dst, vecD src) %{
3193  predicate(n->as_Vector()->length() == 2);
3194  match(Set dst (SubVI dst src));
3195  format %{ "psubd   $dst,$src\t! sub packed2I" %}
3196  ins_encode %{
3197    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3198  %}
3199  ins_pipe( pipe_slow );
3200%}
3201
3202instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3203  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3204  match(Set dst (SubVI src1 src2));
3205  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
3206  ins_encode %{
3207    bool vector256 = false;
3208    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3209  %}
3210  ins_pipe( pipe_slow );
3211%}
3212
3213instruct vsub4I(vecX dst, vecX src) %{
3214  predicate(n->as_Vector()->length() == 4);
3215  match(Set dst (SubVI dst src));
3216  format %{ "psubd   $dst,$src\t! sub packed4I" %}
3217  ins_encode %{
3218    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3219  %}
3220  ins_pipe( pipe_slow );
3221%}
3222
3223instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3224  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3225  match(Set dst (SubVI src1 src2));
3226  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
3227  ins_encode %{
3228    bool vector256 = false;
3229    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3230  %}
3231  ins_pipe( pipe_slow );
3232%}
3233
3234instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3235  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3236  match(Set dst (SubVI src (LoadVector mem)));
3237  format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
3238  ins_encode %{
3239    bool vector256 = false;
3240    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3241  %}
3242  ins_pipe( pipe_slow );
3243%}
3244
3245instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3246  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3247  match(Set dst (SubVI src1 src2));
3248  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
3249  ins_encode %{
3250    bool vector256 = true;
3251    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3252  %}
3253  ins_pipe( pipe_slow );
3254%}
3255
3256instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3257  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3258  match(Set dst (SubVI src (LoadVector mem)));
3259  format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
3260  ins_encode %{
3261    bool vector256 = true;
3262    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3263  %}
3264  ins_pipe( pipe_slow );
3265%}
3266
3267// Longs vector sub
3268instruct vsub2L(vecX dst, vecX src) %{
3269  predicate(n->as_Vector()->length() == 2);
3270  match(Set dst (SubVL dst src));
3271  format %{ "psubq   $dst,$src\t! sub packed2L" %}
3272  ins_encode %{
3273    __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3274  %}
3275  ins_pipe( pipe_slow );
3276%}
3277
3278instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3279  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3280  match(Set dst (SubVL src1 src2));
3281  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
3282  ins_encode %{
3283    bool vector256 = false;
3284    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3285  %}
3286  ins_pipe( pipe_slow );
3287%}
3288
3289instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3290  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3291  match(Set dst (SubVL src (LoadVector mem)));
3292  format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
3293  ins_encode %{
3294    bool vector256 = false;
3295    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3296  %}
3297  ins_pipe( pipe_slow );
3298%}
3299
3300instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3301  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3302  match(Set dst (SubVL src1 src2));
3303  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
3304  ins_encode %{
3305    bool vector256 = true;
3306    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3307  %}
3308  ins_pipe( pipe_slow );
3309%}
3310
3311instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3312  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3313  match(Set dst (SubVL src (LoadVector mem)));
3314  format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
3315  ins_encode %{
3316    bool vector256 = true;
3317    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3318  %}
3319  ins_pipe( pipe_slow );
3320%}
3321
3322// Floats vector sub
3323instruct vsub2F(vecD dst, vecD src) %{
3324  predicate(n->as_Vector()->length() == 2);
3325  match(Set dst (SubVF dst src));
3326  format %{ "subps   $dst,$src\t! sub packed2F" %}
3327  ins_encode %{
3328    __ subps($dst$$XMMRegister, $src$$XMMRegister);
3329  %}
3330  ins_pipe( pipe_slow );
3331%}
3332
3333instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3334  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3335  match(Set dst (SubVF src1 src2));
3336  format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
3337  ins_encode %{
3338    bool vector256 = false;
3339    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3340  %}
3341  ins_pipe( pipe_slow );
3342%}
3343
3344instruct vsub4F(vecX dst, vecX src) %{
3345  predicate(n->as_Vector()->length() == 4);
3346  match(Set dst (SubVF dst src));
3347  format %{ "subps   $dst,$src\t! sub packed4F" %}
3348  ins_encode %{
3349    __ subps($dst$$XMMRegister, $src$$XMMRegister);
3350  %}
3351  ins_pipe( pipe_slow );
3352%}
3353
3354instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3355  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3356  match(Set dst (SubVF src1 src2));
3357  format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
3358  ins_encode %{
3359    bool vector256 = false;
3360    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3361  %}
3362  ins_pipe( pipe_slow );
3363%}
3364
3365instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3366  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3367  match(Set dst (SubVF src (LoadVector mem)));
3368  format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
3369  ins_encode %{
3370    bool vector256 = false;
3371    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3372  %}
3373  ins_pipe( pipe_slow );
3374%}
3375
3376instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3377  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3378  match(Set dst (SubVF src1 src2));
3379  format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
3380  ins_encode %{
3381    bool vector256 = true;
3382    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3383  %}
3384  ins_pipe( pipe_slow );
3385%}
3386
3387instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3388  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3389  match(Set dst (SubVF src (LoadVector mem)));
3390  format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
3391  ins_encode %{
3392    bool vector256 = true;
3393    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3394  %}
3395  ins_pipe( pipe_slow );
3396%}
3397
3398// Doubles vector sub
3399instruct vsub2D(vecX dst, vecX src) %{
3400  predicate(n->as_Vector()->length() == 2);
3401  match(Set dst (SubVD dst src));
3402  format %{ "subpd   $dst,$src\t! sub packed2D" %}
3403  ins_encode %{
3404    __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3405  %}
3406  ins_pipe( pipe_slow );
3407%}
3408
3409instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3410  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3411  match(Set dst (SubVD src1 src2));
3412  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
3413  ins_encode %{
3414    bool vector256 = false;
3415    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3416  %}
3417  ins_pipe( pipe_slow );
3418%}
3419
3420instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3421  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3422  match(Set dst (SubVD src (LoadVector mem)));
3423  format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
3424  ins_encode %{
3425    bool vector256 = false;
3426    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3427  %}
3428  ins_pipe( pipe_slow );
3429%}
3430
3431instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3432  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3433  match(Set dst (SubVD src1 src2));
3434  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
3435  ins_encode %{
3436    bool vector256 = true;
3437    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3438  %}
3439  ins_pipe( pipe_slow );
3440%}
3441
3442instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3443  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3444  match(Set dst (SubVD src (LoadVector mem)));
3445  format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
3446  ins_encode %{
3447    bool vector256 = true;
3448    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3449  %}
3450  ins_pipe( pipe_slow );
3451%}
3452
3453// --------------------------------- MUL --------------------------------------
3454
3455// Shorts/Chars vector mul
3456instruct vmul2S(vecS dst, vecS src) %{
3457  predicate(n->as_Vector()->length() == 2);
3458  match(Set dst (MulVS dst src));
3459  format %{ "pmullw $dst,$src\t! mul packed2S" %}
3460  ins_encode %{
3461    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3462  %}
3463  ins_pipe( pipe_slow );
3464%}
3465
3466instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3467  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3468  match(Set dst (MulVS src1 src2));
3469  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3470  ins_encode %{
3471    bool vector256 = false;
3472    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3473  %}
3474  ins_pipe( pipe_slow );
3475%}
3476
3477instruct vmul4S(vecD dst, vecD src) %{
3478  predicate(n->as_Vector()->length() == 4);
3479  match(Set dst (MulVS dst src));
3480  format %{ "pmullw  $dst,$src\t! mul packed4S" %}
3481  ins_encode %{
3482    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3483  %}
3484  ins_pipe( pipe_slow );
3485%}
3486
3487instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3488  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3489  match(Set dst (MulVS src1 src2));
3490  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3491  ins_encode %{
3492    bool vector256 = false;
3493    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3494  %}
3495  ins_pipe( pipe_slow );
3496%}
3497
3498instruct vmul8S(vecX dst, vecX src) %{
3499  predicate(n->as_Vector()->length() == 8);
3500  match(Set dst (MulVS dst src));
3501  format %{ "pmullw  $dst,$src\t! mul packed8S" %}
3502  ins_encode %{
3503    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3504  %}
3505  ins_pipe( pipe_slow );
3506%}
3507
3508instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3509  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3510  match(Set dst (MulVS src1 src2));
3511  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3512  ins_encode %{
3513    bool vector256 = false;
3514    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3515  %}
3516  ins_pipe( pipe_slow );
3517%}
3518
3519instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3520  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3521  match(Set dst (MulVS src (LoadVector mem)));
3522  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3523  ins_encode %{
3524    bool vector256 = false;
3525    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3526  %}
3527  ins_pipe( pipe_slow );
3528%}
3529
3530instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3531  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3532  match(Set dst (MulVS src1 src2));
3533  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3534  ins_encode %{
3535    bool vector256 = true;
3536    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3537  %}
3538  ins_pipe( pipe_slow );
3539%}
3540
3541instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3542  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3543  match(Set dst (MulVS src (LoadVector mem)));
3544  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3545  ins_encode %{
3546    bool vector256 = true;
3547    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3548  %}
3549  ins_pipe( pipe_slow );
3550%}
3551
3552// Integers vector mul (sse4_1)
3553instruct vmul2I(vecD dst, vecD src) %{
3554  predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3555  match(Set dst (MulVI dst src));
3556  format %{ "pmulld  $dst,$src\t! mul packed2I" %}
3557  ins_encode %{
3558    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3559  %}
3560  ins_pipe( pipe_slow );
3561%}
3562
3563instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3564  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3565  match(Set dst (MulVI src1 src2));
3566  format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3567  ins_encode %{
3568    bool vector256 = false;
3569    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3570  %}
3571  ins_pipe( pipe_slow );
3572%}
3573
3574instruct vmul4I(vecX dst, vecX src) %{
3575  predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3576  match(Set dst (MulVI dst src));
3577  format %{ "pmulld  $dst,$src\t! mul packed4I" %}
3578  ins_encode %{
3579    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3580  %}
3581  ins_pipe( pipe_slow );
3582%}
3583
3584instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3585  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3586  match(Set dst (MulVI src1 src2));
3587  format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3588  ins_encode %{
3589    bool vector256 = false;
3590    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3591  %}
3592  ins_pipe( pipe_slow );
3593%}
3594
3595instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3596  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3597  match(Set dst (MulVI src (LoadVector mem)));
3598  format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3599  ins_encode %{
3600    bool vector256 = false;
3601    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3602  %}
3603  ins_pipe( pipe_slow );
3604%}
3605
3606instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3607  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3608  match(Set dst (MulVI src1 src2));
3609  format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3610  ins_encode %{
3611    bool vector256 = true;
3612    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3613  %}
3614  ins_pipe( pipe_slow );
3615%}
3616
3617instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3618  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3619  match(Set dst (MulVI src (LoadVector mem)));
3620  format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3621  ins_encode %{
3622    bool vector256 = true;
3623    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3624  %}
3625  ins_pipe( pipe_slow );
3626%}
3627
3628// Floats vector mul
3629instruct vmul2F(vecD dst, vecD src) %{
3630  predicate(n->as_Vector()->length() == 2);
3631  match(Set dst (MulVF dst src));
3632  format %{ "mulps   $dst,$src\t! mul packed2F" %}
3633  ins_encode %{
3634    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3635  %}
3636  ins_pipe( pipe_slow );
3637%}
3638
3639instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3640  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3641  match(Set dst (MulVF src1 src2));
3642  format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
3643  ins_encode %{
3644    bool vector256 = false;
3645    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3646  %}
3647  ins_pipe( pipe_slow );
3648%}
3649
3650instruct vmul4F(vecX dst, vecX src) %{
3651  predicate(n->as_Vector()->length() == 4);
3652  match(Set dst (MulVF dst src));
3653  format %{ "mulps   $dst,$src\t! mul packed4F" %}
3654  ins_encode %{
3655    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3656  %}
3657  ins_pipe( pipe_slow );
3658%}
3659
3660instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3661  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3662  match(Set dst (MulVF src1 src2));
3663  format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
3664  ins_encode %{
3665    bool vector256 = false;
3666    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3667  %}
3668  ins_pipe( pipe_slow );
3669%}
3670
3671instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3672  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3673  match(Set dst (MulVF src (LoadVector mem)));
3674  format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
3675  ins_encode %{
3676    bool vector256 = false;
3677    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3678  %}
3679  ins_pipe( pipe_slow );
3680%}
3681
3682instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3683  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3684  match(Set dst (MulVF src1 src2));
3685  format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
3686  ins_encode %{
3687    bool vector256 = true;
3688    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3689  %}
3690  ins_pipe( pipe_slow );
3691%}
3692
3693instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3694  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3695  match(Set dst (MulVF src (LoadVector mem)));
3696  format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
3697  ins_encode %{
3698    bool vector256 = true;
3699    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3700  %}
3701  ins_pipe( pipe_slow );
3702%}
3703
3704// Doubles vector mul
3705instruct vmul2D(vecX dst, vecX src) %{
3706  predicate(n->as_Vector()->length() == 2);
3707  match(Set dst (MulVD dst src));
3708  format %{ "mulpd   $dst,$src\t! mul packed2D" %}
3709  ins_encode %{
3710    __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3711  %}
3712  ins_pipe( pipe_slow );
3713%}
3714
3715instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3716  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3717  match(Set dst (MulVD src1 src2));
3718  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
3719  ins_encode %{
3720    bool vector256 = false;
3721    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3722  %}
3723  ins_pipe( pipe_slow );
3724%}
3725
3726instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3727  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3728  match(Set dst (MulVD src (LoadVector mem)));
3729  format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
3730  ins_encode %{
3731    bool vector256 = false;
3732    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3733  %}
3734  ins_pipe( pipe_slow );
3735%}
3736
3737instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3738  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3739  match(Set dst (MulVD src1 src2));
3740  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
3741  ins_encode %{
3742    bool vector256 = true;
3743    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3744  %}
3745  ins_pipe( pipe_slow );
3746%}
3747
3748instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3749  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3750  match(Set dst (MulVD src (LoadVector mem)));
3751  format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
3752  ins_encode %{
3753    bool vector256 = true;
3754    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3755  %}
3756  ins_pipe( pipe_slow );
3757%}
3758
3759// --------------------------------- DIV --------------------------------------
3760
3761// Floats vector div
3762instruct vdiv2F(vecD dst, vecD src) %{
3763  predicate(n->as_Vector()->length() == 2);
3764  match(Set dst (DivVF dst src));
3765  format %{ "divps   $dst,$src\t! div packed2F" %}
3766  ins_encode %{
3767    __ divps($dst$$XMMRegister, $src$$XMMRegister);
3768  %}
3769  ins_pipe( pipe_slow );
3770%}
3771
3772instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3773  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3774  match(Set dst (DivVF src1 src2));
3775  format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
3776  ins_encode %{
3777    bool vector256 = false;
3778    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3779  %}
3780  ins_pipe( pipe_slow );
3781%}
3782
3783instruct vdiv4F(vecX dst, vecX src) %{
3784  predicate(n->as_Vector()->length() == 4);
3785  match(Set dst (DivVF dst src));
3786  format %{ "divps   $dst,$src\t! div packed4F" %}
3787  ins_encode %{
3788    __ divps($dst$$XMMRegister, $src$$XMMRegister);
3789  %}
3790  ins_pipe( pipe_slow );
3791%}
3792
3793instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3794  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3795  match(Set dst (DivVF src1 src2));
3796  format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
3797  ins_encode %{
3798    bool vector256 = false;
3799    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3800  %}
3801  ins_pipe( pipe_slow );
3802%}
3803
3804instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3805  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3806  match(Set dst (DivVF src (LoadVector mem)));
3807  format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
3808  ins_encode %{
3809    bool vector256 = false;
3810    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3811  %}
3812  ins_pipe( pipe_slow );
3813%}
3814
3815instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3816  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3817  match(Set dst (DivVF src1 src2));
3818  format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
3819  ins_encode %{
3820    bool vector256 = true;
3821    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3822  %}
3823  ins_pipe( pipe_slow );
3824%}
3825
3826instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3827  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3828  match(Set dst (DivVF src (LoadVector mem)));
3829  format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
3830  ins_encode %{
3831    bool vector256 = true;
3832    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3833  %}
3834  ins_pipe( pipe_slow );
3835%}
3836
3837// Doubles vector div
3838instruct vdiv2D(vecX dst, vecX src) %{
3839  predicate(n->as_Vector()->length() == 2);
3840  match(Set dst (DivVD dst src));
3841  format %{ "divpd   $dst,$src\t! div packed2D" %}
3842  ins_encode %{
3843    __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3844  %}
3845  ins_pipe( pipe_slow );
3846%}
3847
3848instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3849  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3850  match(Set dst (DivVD src1 src2));
3851  format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
3852  ins_encode %{
3853    bool vector256 = false;
3854    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3855  %}
3856  ins_pipe( pipe_slow );
3857%}
3858
3859instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3860  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3861  match(Set dst (DivVD src (LoadVector mem)));
3862  format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
3863  ins_encode %{
3864    bool vector256 = false;
3865    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3866  %}
3867  ins_pipe( pipe_slow );
3868%}
3869
3870instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3871  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3872  match(Set dst (DivVD src1 src2));
3873  format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
3874  ins_encode %{
3875    bool vector256 = true;
3876    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3877  %}
3878  ins_pipe( pipe_slow );
3879%}
3880
3881instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3882  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3883  match(Set dst (DivVD src (LoadVector mem)));
3884  format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
3885  ins_encode %{
3886    bool vector256 = true;
3887    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3888  %}
3889  ins_pipe( pipe_slow );
3890%}
3891
3892// ------------------------------ Shift ---------------------------------------
3893
3894// Left and right shift count vectors are the same on x86
3895// (only lowest bits of xmm reg are used for count).
3896instruct vshiftcnt(vecS dst, rRegI cnt) %{
3897  match(Set dst (LShiftCntV cnt));
3898  match(Set dst (RShiftCntV cnt));
3899  format %{ "movd    $dst,$cnt\t! load shift count" %}
3900  ins_encode %{
3901    __ movdl($dst$$XMMRegister, $cnt$$Register);
3902  %}
3903  ins_pipe( pipe_slow );
3904%}
3905
3906// ------------------------------ LeftShift -----------------------------------
3907
3908// Shorts/Chars vector left shift
3909instruct vsll2S(vecS dst, vecS shift) %{
3910  predicate(n->as_Vector()->length() == 2);
3911  match(Set dst (LShiftVS dst shift));
3912  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3913  ins_encode %{
3914    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3915  %}
3916  ins_pipe( pipe_slow );
3917%}
3918
3919instruct vsll2S_imm(vecS dst, immI8 shift) %{
3920  predicate(n->as_Vector()->length() == 2);
3921  match(Set dst (LShiftVS dst shift));
3922  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3923  ins_encode %{
3924    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3925  %}
3926  ins_pipe( pipe_slow );
3927%}
3928
3929instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
3930  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3931  match(Set dst (LShiftVS src shift));
3932  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3933  ins_encode %{
3934    bool vector256 = false;
3935    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3936  %}
3937  ins_pipe( pipe_slow );
3938%}
3939
3940instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3941  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3942  match(Set dst (LShiftVS src shift));
3943  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3944  ins_encode %{
3945    bool vector256 = false;
3946    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3947  %}
3948  ins_pipe( pipe_slow );
3949%}
3950
3951instruct vsll4S(vecD dst, vecS shift) %{
3952  predicate(n->as_Vector()->length() == 4);
3953  match(Set dst (LShiftVS dst shift));
3954  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3955  ins_encode %{
3956    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3957  %}
3958  ins_pipe( pipe_slow );
3959%}
3960
3961instruct vsll4S_imm(vecD dst, immI8 shift) %{
3962  predicate(n->as_Vector()->length() == 4);
3963  match(Set dst (LShiftVS dst shift));
3964  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3965  ins_encode %{
3966    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3967  %}
3968  ins_pipe( pipe_slow );
3969%}
3970
3971instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
3972  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3973  match(Set dst (LShiftVS src shift));
3974  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3975  ins_encode %{
3976    bool vector256 = false;
3977    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3978  %}
3979  ins_pipe( pipe_slow );
3980%}
3981
3982instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3983  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3984  match(Set dst (LShiftVS src shift));
3985  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3986  ins_encode %{
3987    bool vector256 = false;
3988    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3989  %}
3990  ins_pipe( pipe_slow );
3991%}
3992
3993instruct vsll8S(vecX dst, vecS shift) %{
3994  predicate(n->as_Vector()->length() == 8);
3995  match(Set dst (LShiftVS dst shift));
3996  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3997  ins_encode %{
3998    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3999  %}
4000  ins_pipe( pipe_slow );
4001%}
4002
4003instruct vsll8S_imm(vecX dst, immI8 shift) %{
4004  predicate(n->as_Vector()->length() == 8);
4005  match(Set dst (LShiftVS dst shift));
4006  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
4007  ins_encode %{
4008    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4009  %}
4010  ins_pipe( pipe_slow );
4011%}
4012
4013instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
4014  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4015  match(Set dst (LShiftVS src shift));
4016  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
4017  ins_encode %{
4018    bool vector256 = false;
4019    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4020  %}
4021  ins_pipe( pipe_slow );
4022%}
4023
4024instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4025  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4026  match(Set dst (LShiftVS src shift));
4027  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
4028  ins_encode %{
4029    bool vector256 = false;
4030    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4031  %}
4032  ins_pipe( pipe_slow );
4033%}
4034
4035instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
4036  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4037  match(Set dst (LShiftVS src shift));
4038  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
4039  ins_encode %{
4040    bool vector256 = true;
4041    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4042  %}
4043  ins_pipe( pipe_slow );
4044%}
4045
4046instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4047  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4048  match(Set dst (LShiftVS src shift));
4049  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
4050  ins_encode %{
4051    bool vector256 = true;
4052    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4053  %}
4054  ins_pipe( pipe_slow );
4055%}
4056
4057// Integers vector left shift
4058instruct vsll2I(vecD dst, vecS shift) %{
4059  predicate(n->as_Vector()->length() == 2);
4060  match(Set dst (LShiftVI dst shift));
4061  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
4062  ins_encode %{
4063    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4064  %}
4065  ins_pipe( pipe_slow );
4066%}
4067
4068instruct vsll2I_imm(vecD dst, immI8 shift) %{
4069  predicate(n->as_Vector()->length() == 2);
4070  match(Set dst (LShiftVI dst shift));
4071  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
4072  ins_encode %{
4073    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4074  %}
4075  ins_pipe( pipe_slow );
4076%}
4077
4078instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
4079  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4080  match(Set dst (LShiftVI src shift));
4081  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
4082  ins_encode %{
4083    bool vector256 = false;
4084    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4085  %}
4086  ins_pipe( pipe_slow );
4087%}
4088
4089instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4090  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4091  match(Set dst (LShiftVI src shift));
4092  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
4093  ins_encode %{
4094    bool vector256 = false;
4095    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4096  %}
4097  ins_pipe( pipe_slow );
4098%}
4099
4100instruct vsll4I(vecX dst, vecS shift) %{
4101  predicate(n->as_Vector()->length() == 4);
4102  match(Set dst (LShiftVI dst shift));
4103  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
4104  ins_encode %{
4105    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4106  %}
4107  ins_pipe( pipe_slow );
4108%}
4109
4110instruct vsll4I_imm(vecX dst, immI8 shift) %{
4111  predicate(n->as_Vector()->length() == 4);
4112  match(Set dst (LShiftVI dst shift));
4113  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
4114  ins_encode %{
4115    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4116  %}
4117  ins_pipe( pipe_slow );
4118%}
4119
4120instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
4121  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4122  match(Set dst (LShiftVI src shift));
4123  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
4124  ins_encode %{
4125    bool vector256 = false;
4126    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4127  %}
4128  ins_pipe( pipe_slow );
4129%}
4130
4131instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4132  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4133  match(Set dst (LShiftVI src shift));
4134  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
4135  ins_encode %{
4136    bool vector256 = false;
4137    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4138  %}
4139  ins_pipe( pipe_slow );
4140%}
4141
4142instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4143  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4144  match(Set dst (LShiftVI src shift));
4145  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4146  ins_encode %{
4147    bool vector256 = true;
4148    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4149  %}
4150  ins_pipe( pipe_slow );
4151%}
4152
4153instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4154  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4155  match(Set dst (LShiftVI src shift));
4156  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4157  ins_encode %{
4158    bool vector256 = true;
4159    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4160  %}
4161  ins_pipe( pipe_slow );
4162%}
4163
4164// Longs vector left shift
4165instruct vsll2L(vecX dst, vecS shift) %{
4166  predicate(n->as_Vector()->length() == 2);
4167  match(Set dst (LShiftVL dst shift));
4168  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4169  ins_encode %{
4170    __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4171  %}
4172  ins_pipe( pipe_slow );
4173%}
4174
4175instruct vsll2L_imm(vecX dst, immI8 shift) %{
4176  predicate(n->as_Vector()->length() == 2);
4177  match(Set dst (LShiftVL dst shift));
4178  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4179  ins_encode %{
4180    __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4181  %}
4182  ins_pipe( pipe_slow );
4183%}
4184
4185instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4186  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4187  match(Set dst (LShiftVL src shift));
4188  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4189  ins_encode %{
4190    bool vector256 = false;
4191    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4192  %}
4193  ins_pipe( pipe_slow );
4194%}
4195
4196instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4197  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4198  match(Set dst (LShiftVL src shift));
4199  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4200  ins_encode %{
4201    bool vector256 = false;
4202    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4203  %}
4204  ins_pipe( pipe_slow );
4205%}
4206
4207instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4208  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4209  match(Set dst (LShiftVL src shift));
4210  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4211  ins_encode %{
4212    bool vector256 = true;
4213    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4214  %}
4215  ins_pipe( pipe_slow );
4216%}
4217
4218instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4219  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4220  match(Set dst (LShiftVL src shift));
4221  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4222  ins_encode %{
4223    bool vector256 = true;
4224    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4225  %}
4226  ins_pipe( pipe_slow );
4227%}
4228
4229// ----------------------- LogicalRightShift -----------------------------------
4230
4231// Shorts vector logical right shift produces incorrect Java result
4232// for negative data because java code convert short value into int with
4233// sign extension before a shift. But char vectors are fine since chars are
4234// unsigned values.
4235
4236instruct vsrl2S(vecS dst, vecS shift) %{
4237  predicate(n->as_Vector()->length() == 2);
4238  match(Set dst (URShiftVS dst shift));
4239  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4240  ins_encode %{
4241    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4242  %}
4243  ins_pipe( pipe_slow );
4244%}
4245
4246instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4247  predicate(n->as_Vector()->length() == 2);
4248  match(Set dst (URShiftVS dst shift));
4249  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4250  ins_encode %{
4251    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4252  %}
4253  ins_pipe( pipe_slow );
4254%}
4255
4256instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4257  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4258  match(Set dst (URShiftVS src shift));
4259  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4260  ins_encode %{
4261    bool vector256 = false;
4262    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4263  %}
4264  ins_pipe( pipe_slow );
4265%}
4266
4267instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4268  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4269  match(Set dst (URShiftVS src shift));
4270  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4271  ins_encode %{
4272    bool vector256 = false;
4273    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4274  %}
4275  ins_pipe( pipe_slow );
4276%}
4277
4278instruct vsrl4S(vecD dst, vecS shift) %{
4279  predicate(n->as_Vector()->length() == 4);
4280  match(Set dst (URShiftVS dst shift));
4281  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4282  ins_encode %{
4283    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4284  %}
4285  ins_pipe( pipe_slow );
4286%}
4287
4288instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4289  predicate(n->as_Vector()->length() == 4);
4290  match(Set dst (URShiftVS dst shift));
4291  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4292  ins_encode %{
4293    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4294  %}
4295  ins_pipe( pipe_slow );
4296%}
4297
4298instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4299  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4300  match(Set dst (URShiftVS src shift));
4301  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4302  ins_encode %{
4303    bool vector256 = false;
4304    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4305  %}
4306  ins_pipe( pipe_slow );
4307%}
4308
4309instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4310  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4311  match(Set dst (URShiftVS src shift));
4312  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4313  ins_encode %{
4314    bool vector256 = false;
4315    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4316  %}
4317  ins_pipe( pipe_slow );
4318%}
4319
4320instruct vsrl8S(vecX dst, vecS shift) %{
4321  predicate(n->as_Vector()->length() == 8);
4322  match(Set dst (URShiftVS dst shift));
4323  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4324  ins_encode %{
4325    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4326  %}
4327  ins_pipe( pipe_slow );
4328%}
4329
4330instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4331  predicate(n->as_Vector()->length() == 8);
4332  match(Set dst (URShiftVS dst shift));
4333  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4334  ins_encode %{
4335    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4336  %}
4337  ins_pipe( pipe_slow );
4338%}
4339
4340instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4341  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4342  match(Set dst (URShiftVS src shift));
4343  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4344  ins_encode %{
4345    bool vector256 = false;
4346    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4347  %}
4348  ins_pipe( pipe_slow );
4349%}
4350
4351instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4352  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4353  match(Set dst (URShiftVS src shift));
4354  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4355  ins_encode %{
4356    bool vector256 = false;
4357    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4358  %}
4359  ins_pipe( pipe_slow );
4360%}
4361
4362instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4363  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4364  match(Set dst (URShiftVS src shift));
4365  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4366  ins_encode %{
4367    bool vector256 = true;
4368    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4369  %}
4370  ins_pipe( pipe_slow );
4371%}
4372
4373instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4374  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4375  match(Set dst (URShiftVS src shift));
4376  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4377  ins_encode %{
4378    bool vector256 = true;
4379    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4380  %}
4381  ins_pipe( pipe_slow );
4382%}
4383
4384// Integers vector logical right shift
4385instruct vsrl2I(vecD dst, vecS shift) %{
4386  predicate(n->as_Vector()->length() == 2);
4387  match(Set dst (URShiftVI dst shift));
4388  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4389  ins_encode %{
4390    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4391  %}
4392  ins_pipe( pipe_slow );
4393%}
4394
4395instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4396  predicate(n->as_Vector()->length() == 2);
4397  match(Set dst (URShiftVI dst shift));
4398  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4399  ins_encode %{
4400    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4401  %}
4402  ins_pipe( pipe_slow );
4403%}
4404
4405instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4406  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4407  match(Set dst (URShiftVI src shift));
4408  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4409  ins_encode %{
4410    bool vector256 = false;
4411    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4412  %}
4413  ins_pipe( pipe_slow );
4414%}
4415
4416instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4417  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4418  match(Set dst (URShiftVI src shift));
4419  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4420  ins_encode %{
4421    bool vector256 = false;
4422    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4423  %}
4424  ins_pipe( pipe_slow );
4425%}
4426
4427instruct vsrl4I(vecX dst, vecS shift) %{
4428  predicate(n->as_Vector()->length() == 4);
4429  match(Set dst (URShiftVI dst shift));
4430  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4431  ins_encode %{
4432    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4433  %}
4434  ins_pipe( pipe_slow );
4435%}
4436
4437instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4438  predicate(n->as_Vector()->length() == 4);
4439  match(Set dst (URShiftVI dst shift));
4440  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4441  ins_encode %{
4442    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4443  %}
4444  ins_pipe( pipe_slow );
4445%}
4446
4447instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
4448  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4449  match(Set dst (URShiftVI src shift));
4450  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4451  ins_encode %{
4452    bool vector256 = false;
4453    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4454  %}
4455  ins_pipe( pipe_slow );
4456%}
4457
4458instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4459  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4460  match(Set dst (URShiftVI src shift));
4461  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4462  ins_encode %{
4463    bool vector256 = false;
4464    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4465  %}
4466  ins_pipe( pipe_slow );
4467%}
4468
4469instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
4470  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4471  match(Set dst (URShiftVI src shift));
4472  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4473  ins_encode %{
4474    bool vector256 = true;
4475    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4476  %}
4477  ins_pipe( pipe_slow );
4478%}
4479
4480instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4481  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4482  match(Set dst (URShiftVI src shift));
4483  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4484  ins_encode %{
4485    bool vector256 = true;
4486    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4487  %}
4488  ins_pipe( pipe_slow );
4489%}
4490
4491// Longs vector logical right shift
4492instruct vsrl2L(vecX dst, vecS shift) %{
4493  predicate(n->as_Vector()->length() == 2);
4494  match(Set dst (URShiftVL dst shift));
4495  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4496  ins_encode %{
4497    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4498  %}
4499  ins_pipe( pipe_slow );
4500%}
4501
4502instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4503  predicate(n->as_Vector()->length() == 2);
4504  match(Set dst (URShiftVL dst shift));
4505  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4506  ins_encode %{
4507    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4508  %}
4509  ins_pipe( pipe_slow );
4510%}
4511
4512instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
4513  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4514  match(Set dst (URShiftVL src shift));
4515  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4516  ins_encode %{
4517    bool vector256 = false;
4518    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4519  %}
4520  ins_pipe( pipe_slow );
4521%}
4522
4523instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4524  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4525  match(Set dst (URShiftVL src shift));
4526  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4527  ins_encode %{
4528    bool vector256 = false;
4529    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4530  %}
4531  ins_pipe( pipe_slow );
4532%}
4533
4534instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
4535  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4536  match(Set dst (URShiftVL src shift));
4537  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4538  ins_encode %{
4539    bool vector256 = true;
4540    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4541  %}
4542  ins_pipe( pipe_slow );
4543%}
4544
4545instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4546  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4547  match(Set dst (URShiftVL src shift));
4548  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4549  ins_encode %{
4550    bool vector256 = true;
4551    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4552  %}
4553  ins_pipe( pipe_slow );
4554%}
4555
4556// ------------------- ArithmeticRightShift -----------------------------------
4557
4558// Shorts/Chars vector arithmetic right shift
4559instruct vsra2S(vecS dst, vecS shift) %{
4560  predicate(n->as_Vector()->length() == 2);
4561  match(Set dst (RShiftVS dst shift));
4562  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4563  ins_encode %{
4564    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4565  %}
4566  ins_pipe( pipe_slow );
4567%}
4568
4569instruct vsra2S_imm(vecS dst, immI8 shift) %{
4570  predicate(n->as_Vector()->length() == 2);
4571  match(Set dst (RShiftVS dst shift));
4572  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4573  ins_encode %{
4574    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4575  %}
4576  ins_pipe( pipe_slow );
4577%}
4578
4579instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
4580  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4581  match(Set dst (RShiftVS src shift));
4582  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4583  ins_encode %{
4584    bool vector256 = false;
4585    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4586  %}
4587  ins_pipe( pipe_slow );
4588%}
4589
4590instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4591  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4592  match(Set dst (RShiftVS src shift));
4593  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4594  ins_encode %{
4595    bool vector256 = false;
4596    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4597  %}
4598  ins_pipe( pipe_slow );
4599%}
4600
4601instruct vsra4S(vecD dst, vecS shift) %{
4602  predicate(n->as_Vector()->length() == 4);
4603  match(Set dst (RShiftVS dst shift));
4604  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4605  ins_encode %{
4606    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4607  %}
4608  ins_pipe( pipe_slow );
4609%}
4610
4611instruct vsra4S_imm(vecD dst, immI8 shift) %{
4612  predicate(n->as_Vector()->length() == 4);
4613  match(Set dst (RShiftVS dst shift));
4614  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4615  ins_encode %{
4616    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4617  %}
4618  ins_pipe( pipe_slow );
4619%}
4620
4621instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
4622  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4623  match(Set dst (RShiftVS src shift));
4624  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4625  ins_encode %{
4626    bool vector256 = false;
4627    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4628  %}
4629  ins_pipe( pipe_slow );
4630%}
4631
4632instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4633  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4634  match(Set dst (RShiftVS src shift));
4635  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4636  ins_encode %{
4637    bool vector256 = false;
4638    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4639  %}
4640  ins_pipe( pipe_slow );
4641%}
4642
4643instruct vsra8S(vecX dst, vecS shift) %{
4644  predicate(n->as_Vector()->length() == 8);
4645  match(Set dst (RShiftVS dst shift));
4646  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4647  ins_encode %{
4648    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4649  %}
4650  ins_pipe( pipe_slow );
4651%}
4652
4653instruct vsra8S_imm(vecX dst, immI8 shift) %{
4654  predicate(n->as_Vector()->length() == 8);
4655  match(Set dst (RShiftVS dst shift));
4656  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4657  ins_encode %{
4658    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4659  %}
4660  ins_pipe( pipe_slow );
4661%}
4662
4663instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
4664  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4665  match(Set dst (RShiftVS src shift));
4666  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4667  ins_encode %{
4668    bool vector256 = false;
4669    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4670  %}
4671  ins_pipe( pipe_slow );
4672%}
4673
4674instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4675  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4676  match(Set dst (RShiftVS src shift));
4677  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4678  ins_encode %{
4679    bool vector256 = false;
4680    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4681  %}
4682  ins_pipe( pipe_slow );
4683%}
4684
4685instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
4686  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4687  match(Set dst (RShiftVS src shift));
4688  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4689  ins_encode %{
4690    bool vector256 = true;
4691    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4692  %}
4693  ins_pipe( pipe_slow );
4694%}
4695
4696instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4697  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4698  match(Set dst (RShiftVS src shift));
4699  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4700  ins_encode %{
4701    bool vector256 = true;
4702    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4703  %}
4704  ins_pipe( pipe_slow );
4705%}
4706
4707// Integers vector arithmetic right shift
4708instruct vsra2I(vecD dst, vecS shift) %{
4709  predicate(n->as_Vector()->length() == 2);
4710  match(Set dst (RShiftVI dst shift));
4711  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4712  ins_encode %{
4713    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4714  %}
4715  ins_pipe( pipe_slow );
4716%}
4717
4718instruct vsra2I_imm(vecD dst, immI8 shift) %{
4719  predicate(n->as_Vector()->length() == 2);
4720  match(Set dst (RShiftVI dst shift));
4721  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4722  ins_encode %{
4723    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4724  %}
4725  ins_pipe( pipe_slow );
4726%}
4727
4728instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
4729  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4730  match(Set dst (RShiftVI src shift));
4731  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4732  ins_encode %{
4733    bool vector256 = false;
4734    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4735  %}
4736  ins_pipe( pipe_slow );
4737%}
4738
4739instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4740  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4741  match(Set dst (RShiftVI src shift));
4742  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4743  ins_encode %{
4744    bool vector256 = false;
4745    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4746  %}
4747  ins_pipe( pipe_slow );
4748%}
4749
4750instruct vsra4I(vecX dst, vecS shift) %{
4751  predicate(n->as_Vector()->length() == 4);
4752  match(Set dst (RShiftVI dst shift));
4753  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4754  ins_encode %{
4755    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4756  %}
4757  ins_pipe( pipe_slow );
4758%}
4759
4760instruct vsra4I_imm(vecX dst, immI8 shift) %{
4761  predicate(n->as_Vector()->length() == 4);
4762  match(Set dst (RShiftVI dst shift));
4763  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4764  ins_encode %{
4765    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4766  %}
4767  ins_pipe( pipe_slow );
4768%}
4769
4770instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
4771  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4772  match(Set dst (RShiftVI src shift));
4773  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4774  ins_encode %{
4775    bool vector256 = false;
4776    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4777  %}
4778  ins_pipe( pipe_slow );
4779%}
4780
4781instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4782  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4783  match(Set dst (RShiftVI src shift));
4784  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4785  ins_encode %{
4786    bool vector256 = false;
4787    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4788  %}
4789  ins_pipe( pipe_slow );
4790%}
4791
4792instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
4793  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4794  match(Set dst (RShiftVI src shift));
4795  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4796  ins_encode %{
4797    bool vector256 = true;
4798    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4799  %}
4800  ins_pipe( pipe_slow );
4801%}
4802
4803instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4804  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4805  match(Set dst (RShiftVI src shift));
4806  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4807  ins_encode %{
4808    bool vector256 = true;
4809    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4810  %}
4811  ins_pipe( pipe_slow );
4812%}
4813
4814// There are no longs vector arithmetic right shift instructions.
4815
4816
4817// --------------------------------- AND --------------------------------------
4818
4819instruct vand4B(vecS dst, vecS src) %{
4820  predicate(n->as_Vector()->length_in_bytes() == 4);
4821  match(Set dst (AndV dst src));
4822  format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
4823  ins_encode %{
4824    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4825  %}
4826  ins_pipe( pipe_slow );
4827%}
4828
4829instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4830  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4831  match(Set dst (AndV src1 src2));
4832  format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4833  ins_encode %{
4834    bool vector256 = false;
4835    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4836  %}
4837  ins_pipe( pipe_slow );
4838%}
4839
4840instruct vand8B(vecD dst, vecD src) %{
4841  predicate(n->as_Vector()->length_in_bytes() == 8);
4842  match(Set dst (AndV dst src));
4843  format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
4844  ins_encode %{
4845    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4846  %}
4847  ins_pipe( pipe_slow );
4848%}
4849
4850instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4851  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4852  match(Set dst (AndV src1 src2));
4853  format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4854  ins_encode %{
4855    bool vector256 = false;
4856    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4857  %}
4858  ins_pipe( pipe_slow );
4859%}
4860
4861instruct vand16B(vecX dst, vecX src) %{
4862  predicate(n->as_Vector()->length_in_bytes() == 16);
4863  match(Set dst (AndV dst src));
4864  format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
4865  ins_encode %{
4866    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4867  %}
4868  ins_pipe( pipe_slow );
4869%}
4870
4871instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4872  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4873  match(Set dst (AndV src1 src2));
4874  format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4875  ins_encode %{
4876    bool vector256 = false;
4877    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4878  %}
4879  ins_pipe( pipe_slow );
4880%}
4881
4882instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4883  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4884  match(Set dst (AndV src (LoadVector mem)));
4885  format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
4886  ins_encode %{
4887    bool vector256 = false;
4888    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4889  %}
4890  ins_pipe( pipe_slow );
4891%}
4892
4893instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4894  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4895  match(Set dst (AndV src1 src2));
4896  format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4897  ins_encode %{
4898    bool vector256 = true;
4899    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4900  %}
4901  ins_pipe( pipe_slow );
4902%}
4903
4904instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4905  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4906  match(Set dst (AndV src (LoadVector mem)));
4907  format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
4908  ins_encode %{
4909    bool vector256 = true;
4910    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4911  %}
4912  ins_pipe( pipe_slow );
4913%}
4914
4915// --------------------------------- OR ---------------------------------------
4916
4917instruct vor4B(vecS dst, vecS src) %{
4918  predicate(n->as_Vector()->length_in_bytes() == 4);
4919  match(Set dst (OrV dst src));
4920  format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
4921  ins_encode %{
4922    __ por($dst$$XMMRegister, $src$$XMMRegister);
4923  %}
4924  ins_pipe( pipe_slow );
4925%}
4926
4927instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4928  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4929  match(Set dst (OrV src1 src2));
4930  format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4931  ins_encode %{
4932    bool vector256 = false;
4933    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4934  %}
4935  ins_pipe( pipe_slow );
4936%}
4937
4938instruct vor8B(vecD dst, vecD src) %{
4939  predicate(n->as_Vector()->length_in_bytes() == 8);
4940  match(Set dst (OrV dst src));
4941  format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
4942  ins_encode %{
4943    __ por($dst$$XMMRegister, $src$$XMMRegister);
4944  %}
4945  ins_pipe( pipe_slow );
4946%}
4947
4948instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4949  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4950  match(Set dst (OrV src1 src2));
4951  format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4952  ins_encode %{
4953    bool vector256 = false;
4954    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4955  %}
4956  ins_pipe( pipe_slow );
4957%}
4958
4959instruct vor16B(vecX dst, vecX src) %{
4960  predicate(n->as_Vector()->length_in_bytes() == 16);
4961  match(Set dst (OrV dst src));
4962  format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
4963  ins_encode %{
4964    __ por($dst$$XMMRegister, $src$$XMMRegister);
4965  %}
4966  ins_pipe( pipe_slow );
4967%}
4968
4969instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4970  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4971  match(Set dst (OrV src1 src2));
4972  format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4973  ins_encode %{
4974    bool vector256 = false;
4975    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4976  %}
4977  ins_pipe( pipe_slow );
4978%}
4979
4980instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4981  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4982  match(Set dst (OrV src (LoadVector mem)));
4983  format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
4984  ins_encode %{
4985    bool vector256 = false;
4986    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4987  %}
4988  ins_pipe( pipe_slow );
4989%}
4990
4991instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4992  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4993  match(Set dst (OrV src1 src2));
4994  format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4995  ins_encode %{
4996    bool vector256 = true;
4997    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4998  %}
4999  ins_pipe( pipe_slow );
5000%}
5001
5002instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
5003  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5004  match(Set dst (OrV src (LoadVector mem)));
5005  format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
5006  ins_encode %{
5007    bool vector256 = true;
5008    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5009  %}
5010  ins_pipe( pipe_slow );
5011%}
5012
5013// --------------------------------- XOR --------------------------------------
5014
5015instruct vxor4B(vecS dst, vecS src) %{
5016  predicate(n->as_Vector()->length_in_bytes() == 4);
5017  match(Set dst (XorV dst src));
5018  format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
5019  ins_encode %{
5020    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5021  %}
5022  ins_pipe( pipe_slow );
5023%}
5024
5025instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
5026  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5027  match(Set dst (XorV src1 src2));
5028  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
5029  ins_encode %{
5030    bool vector256 = false;
5031    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5032  %}
5033  ins_pipe( pipe_slow );
5034%}
5035
5036instruct vxor8B(vecD dst, vecD src) %{
5037  predicate(n->as_Vector()->length_in_bytes() == 8);
5038  match(Set dst (XorV dst src));
5039  format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
5040  ins_encode %{
5041    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5042  %}
5043  ins_pipe( pipe_slow );
5044%}
5045
5046instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
5047  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5048  match(Set dst (XorV src1 src2));
5049  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
5050  ins_encode %{
5051    bool vector256 = false;
5052    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5053  %}
5054  ins_pipe( pipe_slow );
5055%}
5056
5057instruct vxor16B(vecX dst, vecX src) %{
5058  predicate(n->as_Vector()->length_in_bytes() == 16);
5059  match(Set dst (XorV dst src));
5060  format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
5061  ins_encode %{
5062    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5063  %}
5064  ins_pipe( pipe_slow );
5065%}
5066
5067instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
5068  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5069  match(Set dst (XorV src1 src2));
5070  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
5071  ins_encode %{
5072    bool vector256 = false;
5073    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5074  %}
5075  ins_pipe( pipe_slow );
5076%}
5077
5078instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
5079  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5080  match(Set dst (XorV src (LoadVector mem)));
5081  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
5082  ins_encode %{
5083    bool vector256 = false;
5084    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5085  %}
5086  ins_pipe( pipe_slow );
5087%}
5088
5089instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
5090  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5091  match(Set dst (XorV src1 src2));
5092  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
5093  ins_encode %{
5094    bool vector256 = true;
5095    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5096  %}
5097  ins_pipe( pipe_slow );
5098%}
5099
5100instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
5101  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5102  match(Set dst (XorV src (LoadVector mem)));
5103  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
5104  ins_encode %{
5105    bool vector256 = true;
5106    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5107  %}
5108  ins_pipe( pipe_slow );
5109%}
5110
5111