x86.ad revision 3602:da91efe96a93
1// 2// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Common Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63// Word a in each register holds a Float, words ab hold a Double. 64// The whole registers are used in SSE4.2 version intrinsics, 65// array copy stubs and superword operations (see UseSSE42Intrinsics, 66// UseXMMForArrayCopy and UseSuperword flags). 67// XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68// Linux ABI: No register preserved across function calls 69// XMM0-XMM7 might hold parameters 70// Windows ABI: XMM6-XMM15 preserved across function calls 71// XMM0-XMM3 might hold parameters 72 73reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127#ifdef _WIN64 128 129reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219#else // _WIN64 220 221reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239#ifdef _LP64 240 241reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313#endif // _LP64 314 315#endif // _WIN64 316 317#ifdef _LP64 318reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319#else 320reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321#endif // _LP64 322 323alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331#ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340#endif 341 ); 342 343// flags allocation class should be last. 344alloc_class chunk2(RFLAGS); 345 346// Singleton class for condition codes 347reg_class int_flags(RFLAGS); 348 349// Class for all float registers 350reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358#ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367#endif 368 ); 369 370// Class for all double registers 371reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379#ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388#endif 389 ); 390 391// Class for all 32bit vector registers 392reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400#ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409#endif 410 ); 411 412// Class for all 64bit vector registers 413reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421#ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430#endif 431 ); 432 433// Class for all 128bit vector registers 434reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442#ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451#endif 452 ); 453 454// Class for all 256bit vector registers 455reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463#ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472#endif 473 ); 474 475%} 476 477source %{ 478 // Float masks come from different places depending on platform. 479#ifdef _LP64 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484#else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489#endif 490 491 492const bool Matcher::match_rule_supported(int opcode) { 493 if (!has_match_rule(opcode)) 494 return false; 495 496 switch (opcode) { 497 case Op_PopCountI: 498 case Op_PopCountL: 499 if (!UsePopCountInstruction) 500 return false; 501 case Op_MulVI: 502 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 503 return false; 504 break; 505 } 506 507 return true; // Per default match rules are supported. 508} 509 510// Max vector size in bytes. 0 if not supported. 511const int Matcher::vector_width_in_bytes(BasicType bt) { 512 assert(is_java_primitive(bt), "only primitive type vectors"); 513 if (UseSSE < 2) return 0; 514 // SSE2 supports 128bit vectors for all types. 515 // AVX2 supports 256bit vectors for all types. 516 int size = (UseAVX > 1) ? 32 : 16; 517 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 518 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 519 size = 32; 520 // Use flag to limit vector size. 521 size = MIN2(size,(int)MaxVectorSize); 522 // Minimum 2 values in vector (or 4 for bytes). 523 switch (bt) { 524 case T_DOUBLE: 525 case T_LONG: 526 if (size < 16) return 0; 527 case T_FLOAT: 528 case T_INT: 529 if (size < 8) return 0; 530 case T_BOOLEAN: 531 case T_BYTE: 532 case T_CHAR: 533 case T_SHORT: 534 if (size < 4) return 0; 535 break; 536 default: 537 ShouldNotReachHere(); 538 } 539 return size; 540} 541 542// Limits on vector size (number of elements) loaded into vector. 543const int Matcher::max_vector_size(const BasicType bt) { 544 return vector_width_in_bytes(bt)/type2aelembytes(bt); 545} 546const int Matcher::min_vector_size(const BasicType bt) { 547 int max_size = max_vector_size(bt); 548 // Min size which can be loaded into vector is 4 bytes. 549 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 550 return MIN2(size,max_size); 551} 552 553// Vector ideal reg corresponding to specidied size in bytes 554const int Matcher::vector_ideal_reg(int size) { 555 assert(MaxVectorSize >= size, ""); 556 switch(size) { 557 case 4: return Op_VecS; 558 case 8: return Op_VecD; 559 case 16: return Op_VecX; 560 case 32: return Op_VecY; 561 } 562 ShouldNotReachHere(); 563 return 0; 564} 565 566// x86 supports misaligned vectors store/load. 567const bool Matcher::misaligned_vectors_ok() { 568 return !AlignVector; // can be changed by flag 569} 570 571// Helper methods for MachSpillCopyNode::implementation(). 572static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 573 int src_hi, int dst_hi, uint ireg, outputStream* st) { 574 // In 64-bit VM size calculation is very complex. Emitting instructions 575 // into scratch buffer is used to get size in 64-bit VM. 576 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 577 assert(ireg == Op_VecS || // 32bit vector 578 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 579 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 580 "no non-adjacent vector moves" ); 581 if (cbuf) { 582 MacroAssembler _masm(cbuf); 583 int offset = __ offset(); 584 switch (ireg) { 585 case Op_VecS: // copy whole register 586 case Op_VecD: 587 case Op_VecX: 588 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 589 break; 590 case Op_VecY: 591 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 592 break; 593 default: 594 ShouldNotReachHere(); 595 } 596 int size = __ offset() - offset; 597#ifdef ASSERT 598 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 599 assert(!do_size || size == 4, "incorrect size calculattion"); 600#endif 601 return size; 602#ifndef PRODUCT 603 } else if (!do_size) { 604 switch (ireg) { 605 case Op_VecS: 606 case Op_VecD: 607 case Op_VecX: 608 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 609 break; 610 case Op_VecY: 611 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 612 break; 613 default: 614 ShouldNotReachHere(); 615 } 616#endif 617 } 618 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 619 return 4; 620} 621 622static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 623 int stack_offset, int reg, uint ireg, outputStream* st) { 624 // In 64-bit VM size calculation is very complex. Emitting instructions 625 // into scratch buffer is used to get size in 64-bit VM. 626 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 627 if (cbuf) { 628 MacroAssembler _masm(cbuf); 629 int offset = __ offset(); 630 if (is_load) { 631 switch (ireg) { 632 case Op_VecS: 633 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 634 break; 635 case Op_VecD: 636 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 637 break; 638 case Op_VecX: 639 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 640 break; 641 case Op_VecY: 642 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 643 break; 644 default: 645 ShouldNotReachHere(); 646 } 647 } else { // store 648 switch (ireg) { 649 case Op_VecS: 650 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 651 break; 652 case Op_VecD: 653 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 654 break; 655 case Op_VecX: 656 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 657 break; 658 case Op_VecY: 659 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 660 break; 661 default: 662 ShouldNotReachHere(); 663 } 664 } 665 int size = __ offset() - offset; 666#ifdef ASSERT 667 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 668 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 669 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 670#endif 671 return size; 672#ifndef PRODUCT 673 } else if (!do_size) { 674 if (is_load) { 675 switch (ireg) { 676 case Op_VecS: 677 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 678 break; 679 case Op_VecD: 680 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 681 break; 682 case Op_VecX: 683 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 684 break; 685 case Op_VecY: 686 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 687 break; 688 default: 689 ShouldNotReachHere(); 690 } 691 } else { // store 692 switch (ireg) { 693 case Op_VecS: 694 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 695 break; 696 case Op_VecD: 697 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 698 break; 699 case Op_VecX: 700 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 701 break; 702 case Op_VecY: 703 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 704 break; 705 default: 706 ShouldNotReachHere(); 707 } 708 } 709#endif 710 } 711 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 712 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 713 return 5+offset_size; 714} 715 716static inline jfloat replicate4_imm(int con, int width) { 717 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 718 assert(width == 1 || width == 2, "only byte or short types here"); 719 int bit_width = width * 8; 720 jint val = con; 721 val &= (1 << bit_width) - 1; // mask off sign bits 722 while(bit_width < 32) { 723 val |= (val << bit_width); 724 bit_width <<= 1; 725 } 726 jfloat fval = *((jfloat*) &val); // coerce to float type 727 return fval; 728} 729 730static inline jdouble replicate8_imm(int con, int width) { 731 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 732 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 733 int bit_width = width * 8; 734 jlong val = con; 735 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 736 while(bit_width < 64) { 737 val |= (val << bit_width); 738 bit_width <<= 1; 739 } 740 jdouble dval = *((jdouble*) &val); // coerce to double type 741 return dval; 742} 743 744#ifndef PRODUCT 745 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 746 st->print("nop \t# %d bytes pad for loops and calls", _count); 747 } 748#endif 749 750 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 751 MacroAssembler _masm(&cbuf); 752 __ nop(_count); 753 } 754 755 uint MachNopNode::size(PhaseRegAlloc*) const { 756 return _count; 757 } 758 759#ifndef PRODUCT 760 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 761 st->print("# breakpoint"); 762 } 763#endif 764 765 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 766 MacroAssembler _masm(&cbuf); 767 __ int3(); 768 } 769 770 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 771 return MachNode::size(ra_); 772 } 773 774%} 775 776encode %{ 777 778 enc_class preserve_SP %{ 779 debug_only(int off0 = cbuf.insts_size()); 780 MacroAssembler _masm(&cbuf); 781 // RBP is preserved across all calls, even compiled calls. 782 // Use it to preserve RSP in places where the callee might change the SP. 783 __ movptr(rbp_mh_SP_save, rsp); 784 debug_only(int off1 = cbuf.insts_size()); 785 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 786 %} 787 788 enc_class restore_SP %{ 789 MacroAssembler _masm(&cbuf); 790 __ movptr(rsp, rbp_mh_SP_save); 791 %} 792 793 enc_class call_epilog %{ 794 if (VerifyStackAtCalls) { 795 // Check that stack depth is unchanged: find majik cookie on stack 796 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 797 MacroAssembler _masm(&cbuf); 798 Label L; 799 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 800 __ jccb(Assembler::equal, L); 801 // Die if stack mismatch 802 __ int3(); 803 __ bind(L); 804 } 805 %} 806 807%} 808 809 810//----------OPERANDS----------------------------------------------------------- 811// Operand definitions must precede instruction definitions for correct parsing 812// in the ADLC because operands constitute user defined types which are used in 813// instruction definitions. 814 815// Vectors 816operand vecS() %{ 817 constraint(ALLOC_IN_RC(vectors_reg)); 818 match(VecS); 819 820 format %{ %} 821 interface(REG_INTER); 822%} 823 824operand vecD() %{ 825 constraint(ALLOC_IN_RC(vectord_reg)); 826 match(VecD); 827 828 format %{ %} 829 interface(REG_INTER); 830%} 831 832operand vecX() %{ 833 constraint(ALLOC_IN_RC(vectorx_reg)); 834 match(VecX); 835 836 format %{ %} 837 interface(REG_INTER); 838%} 839 840operand vecY() %{ 841 constraint(ALLOC_IN_RC(vectory_reg)); 842 match(VecY); 843 844 format %{ %} 845 interface(REG_INTER); 846%} 847 848 849// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 850 851// ============================================================================ 852 853instruct ShouldNotReachHere() %{ 854 match(Halt); 855 format %{ "int3\t# ShouldNotReachHere" %} 856 ins_encode %{ 857 __ int3(); 858 %} 859 ins_pipe(pipe_slow); 860%} 861 862// ============================================================================ 863 864instruct addF_reg(regF dst, regF src) %{ 865 predicate((UseSSE>=1) && (UseAVX == 0)); 866 match(Set dst (AddF dst src)); 867 868 format %{ "addss $dst, $src" %} 869 ins_cost(150); 870 ins_encode %{ 871 __ addss($dst$$XMMRegister, $src$$XMMRegister); 872 %} 873 ins_pipe(pipe_slow); 874%} 875 876instruct addF_mem(regF dst, memory src) %{ 877 predicate((UseSSE>=1) && (UseAVX == 0)); 878 match(Set dst (AddF dst (LoadF src))); 879 880 format %{ "addss $dst, $src" %} 881 ins_cost(150); 882 ins_encode %{ 883 __ addss($dst$$XMMRegister, $src$$Address); 884 %} 885 ins_pipe(pipe_slow); 886%} 887 888instruct addF_imm(regF dst, immF con) %{ 889 predicate((UseSSE>=1) && (UseAVX == 0)); 890 match(Set dst (AddF dst con)); 891 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 892 ins_cost(150); 893 ins_encode %{ 894 __ addss($dst$$XMMRegister, $constantaddress($con)); 895 %} 896 ins_pipe(pipe_slow); 897%} 898 899instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 900 predicate(UseAVX > 0); 901 match(Set dst (AddF src1 src2)); 902 903 format %{ "vaddss $dst, $src1, $src2" %} 904 ins_cost(150); 905 ins_encode %{ 906 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 907 %} 908 ins_pipe(pipe_slow); 909%} 910 911instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 912 predicate(UseAVX > 0); 913 match(Set dst (AddF src1 (LoadF src2))); 914 915 format %{ "vaddss $dst, $src1, $src2" %} 916 ins_cost(150); 917 ins_encode %{ 918 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 919 %} 920 ins_pipe(pipe_slow); 921%} 922 923instruct addF_reg_imm(regF dst, regF src, immF con) %{ 924 predicate(UseAVX > 0); 925 match(Set dst (AddF src con)); 926 927 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 928 ins_cost(150); 929 ins_encode %{ 930 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 931 %} 932 ins_pipe(pipe_slow); 933%} 934 935instruct addD_reg(regD dst, regD src) %{ 936 predicate((UseSSE>=2) && (UseAVX == 0)); 937 match(Set dst (AddD dst src)); 938 939 format %{ "addsd $dst, $src" %} 940 ins_cost(150); 941 ins_encode %{ 942 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 943 %} 944 ins_pipe(pipe_slow); 945%} 946 947instruct addD_mem(regD dst, memory src) %{ 948 predicate((UseSSE>=2) && (UseAVX == 0)); 949 match(Set dst (AddD dst (LoadD src))); 950 951 format %{ "addsd $dst, $src" %} 952 ins_cost(150); 953 ins_encode %{ 954 __ addsd($dst$$XMMRegister, $src$$Address); 955 %} 956 ins_pipe(pipe_slow); 957%} 958 959instruct addD_imm(regD dst, immD con) %{ 960 predicate((UseSSE>=2) && (UseAVX == 0)); 961 match(Set dst (AddD dst con)); 962 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 963 ins_cost(150); 964 ins_encode %{ 965 __ addsd($dst$$XMMRegister, $constantaddress($con)); 966 %} 967 ins_pipe(pipe_slow); 968%} 969 970instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 971 predicate(UseAVX > 0); 972 match(Set dst (AddD src1 src2)); 973 974 format %{ "vaddsd $dst, $src1, $src2" %} 975 ins_cost(150); 976 ins_encode %{ 977 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 978 %} 979 ins_pipe(pipe_slow); 980%} 981 982instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 983 predicate(UseAVX > 0); 984 match(Set dst (AddD src1 (LoadD src2))); 985 986 format %{ "vaddsd $dst, $src1, $src2" %} 987 ins_cost(150); 988 ins_encode %{ 989 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 990 %} 991 ins_pipe(pipe_slow); 992%} 993 994instruct addD_reg_imm(regD dst, regD src, immD con) %{ 995 predicate(UseAVX > 0); 996 match(Set dst (AddD src con)); 997 998 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 999 ins_cost(150); 1000 ins_encode %{ 1001 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1002 %} 1003 ins_pipe(pipe_slow); 1004%} 1005 1006instruct subF_reg(regF dst, regF src) %{ 1007 predicate((UseSSE>=1) && (UseAVX == 0)); 1008 match(Set dst (SubF dst src)); 1009 1010 format %{ "subss $dst, $src" %} 1011 ins_cost(150); 1012 ins_encode %{ 1013 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1014 %} 1015 ins_pipe(pipe_slow); 1016%} 1017 1018instruct subF_mem(regF dst, memory src) %{ 1019 predicate((UseSSE>=1) && (UseAVX == 0)); 1020 match(Set dst (SubF dst (LoadF src))); 1021 1022 format %{ "subss $dst, $src" %} 1023 ins_cost(150); 1024 ins_encode %{ 1025 __ subss($dst$$XMMRegister, $src$$Address); 1026 %} 1027 ins_pipe(pipe_slow); 1028%} 1029 1030instruct subF_imm(regF dst, immF con) %{ 1031 predicate((UseSSE>=1) && (UseAVX == 0)); 1032 match(Set dst (SubF dst con)); 1033 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1034 ins_cost(150); 1035 ins_encode %{ 1036 __ subss($dst$$XMMRegister, $constantaddress($con)); 1037 %} 1038 ins_pipe(pipe_slow); 1039%} 1040 1041instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1042 predicate(UseAVX > 0); 1043 match(Set dst (SubF src1 src2)); 1044 1045 format %{ "vsubss $dst, $src1, $src2" %} 1046 ins_cost(150); 1047 ins_encode %{ 1048 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1049 %} 1050 ins_pipe(pipe_slow); 1051%} 1052 1053instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1054 predicate(UseAVX > 0); 1055 match(Set dst (SubF src1 (LoadF src2))); 1056 1057 format %{ "vsubss $dst, $src1, $src2" %} 1058 ins_cost(150); 1059 ins_encode %{ 1060 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1061 %} 1062 ins_pipe(pipe_slow); 1063%} 1064 1065instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1066 predicate(UseAVX > 0); 1067 match(Set dst (SubF src con)); 1068 1069 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1070 ins_cost(150); 1071 ins_encode %{ 1072 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1073 %} 1074 ins_pipe(pipe_slow); 1075%} 1076 1077instruct subD_reg(regD dst, regD src) %{ 1078 predicate((UseSSE>=2) && (UseAVX == 0)); 1079 match(Set dst (SubD dst src)); 1080 1081 format %{ "subsd $dst, $src" %} 1082 ins_cost(150); 1083 ins_encode %{ 1084 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1085 %} 1086 ins_pipe(pipe_slow); 1087%} 1088 1089instruct subD_mem(regD dst, memory src) %{ 1090 predicate((UseSSE>=2) && (UseAVX == 0)); 1091 match(Set dst (SubD dst (LoadD src))); 1092 1093 format %{ "subsd $dst, $src" %} 1094 ins_cost(150); 1095 ins_encode %{ 1096 __ subsd($dst$$XMMRegister, $src$$Address); 1097 %} 1098 ins_pipe(pipe_slow); 1099%} 1100 1101instruct subD_imm(regD dst, immD con) %{ 1102 predicate((UseSSE>=2) && (UseAVX == 0)); 1103 match(Set dst (SubD dst con)); 1104 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1105 ins_cost(150); 1106 ins_encode %{ 1107 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1108 %} 1109 ins_pipe(pipe_slow); 1110%} 1111 1112instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1113 predicate(UseAVX > 0); 1114 match(Set dst (SubD src1 src2)); 1115 1116 format %{ "vsubsd $dst, $src1, $src2" %} 1117 ins_cost(150); 1118 ins_encode %{ 1119 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1120 %} 1121 ins_pipe(pipe_slow); 1122%} 1123 1124instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1125 predicate(UseAVX > 0); 1126 match(Set dst (SubD src1 (LoadD src2))); 1127 1128 format %{ "vsubsd $dst, $src1, $src2" %} 1129 ins_cost(150); 1130 ins_encode %{ 1131 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1132 %} 1133 ins_pipe(pipe_slow); 1134%} 1135 1136instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1137 predicate(UseAVX > 0); 1138 match(Set dst (SubD src con)); 1139 1140 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1141 ins_cost(150); 1142 ins_encode %{ 1143 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1144 %} 1145 ins_pipe(pipe_slow); 1146%} 1147 1148instruct mulF_reg(regF dst, regF src) %{ 1149 predicate((UseSSE>=1) && (UseAVX == 0)); 1150 match(Set dst (MulF dst src)); 1151 1152 format %{ "mulss $dst, $src" %} 1153 ins_cost(150); 1154 ins_encode %{ 1155 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1156 %} 1157 ins_pipe(pipe_slow); 1158%} 1159 1160instruct mulF_mem(regF dst, memory src) %{ 1161 predicate((UseSSE>=1) && (UseAVX == 0)); 1162 match(Set dst (MulF dst (LoadF src))); 1163 1164 format %{ "mulss $dst, $src" %} 1165 ins_cost(150); 1166 ins_encode %{ 1167 __ mulss($dst$$XMMRegister, $src$$Address); 1168 %} 1169 ins_pipe(pipe_slow); 1170%} 1171 1172instruct mulF_imm(regF dst, immF con) %{ 1173 predicate((UseSSE>=1) && (UseAVX == 0)); 1174 match(Set dst (MulF dst con)); 1175 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1176 ins_cost(150); 1177 ins_encode %{ 1178 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1179 %} 1180 ins_pipe(pipe_slow); 1181%} 1182 1183instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1184 predicate(UseAVX > 0); 1185 match(Set dst (MulF src1 src2)); 1186 1187 format %{ "vmulss $dst, $src1, $src2" %} 1188 ins_cost(150); 1189 ins_encode %{ 1190 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1191 %} 1192 ins_pipe(pipe_slow); 1193%} 1194 1195instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1196 predicate(UseAVX > 0); 1197 match(Set dst (MulF src1 (LoadF src2))); 1198 1199 format %{ "vmulss $dst, $src1, $src2" %} 1200 ins_cost(150); 1201 ins_encode %{ 1202 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1203 %} 1204 ins_pipe(pipe_slow); 1205%} 1206 1207instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1208 predicate(UseAVX > 0); 1209 match(Set dst (MulF src con)); 1210 1211 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1212 ins_cost(150); 1213 ins_encode %{ 1214 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1215 %} 1216 ins_pipe(pipe_slow); 1217%} 1218 1219instruct mulD_reg(regD dst, regD src) %{ 1220 predicate((UseSSE>=2) && (UseAVX == 0)); 1221 match(Set dst (MulD dst src)); 1222 1223 format %{ "mulsd $dst, $src" %} 1224 ins_cost(150); 1225 ins_encode %{ 1226 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1227 %} 1228 ins_pipe(pipe_slow); 1229%} 1230 1231instruct mulD_mem(regD dst, memory src) %{ 1232 predicate((UseSSE>=2) && (UseAVX == 0)); 1233 match(Set dst (MulD dst (LoadD src))); 1234 1235 format %{ "mulsd $dst, $src" %} 1236 ins_cost(150); 1237 ins_encode %{ 1238 __ mulsd($dst$$XMMRegister, $src$$Address); 1239 %} 1240 ins_pipe(pipe_slow); 1241%} 1242 1243instruct mulD_imm(regD dst, immD con) %{ 1244 predicate((UseSSE>=2) && (UseAVX == 0)); 1245 match(Set dst (MulD dst con)); 1246 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1247 ins_cost(150); 1248 ins_encode %{ 1249 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1250 %} 1251 ins_pipe(pipe_slow); 1252%} 1253 1254instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1255 predicate(UseAVX > 0); 1256 match(Set dst (MulD src1 src2)); 1257 1258 format %{ "vmulsd $dst, $src1, $src2" %} 1259 ins_cost(150); 1260 ins_encode %{ 1261 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1262 %} 1263 ins_pipe(pipe_slow); 1264%} 1265 1266instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1267 predicate(UseAVX > 0); 1268 match(Set dst (MulD src1 (LoadD src2))); 1269 1270 format %{ "vmulsd $dst, $src1, $src2" %} 1271 ins_cost(150); 1272 ins_encode %{ 1273 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1274 %} 1275 ins_pipe(pipe_slow); 1276%} 1277 1278instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1279 predicate(UseAVX > 0); 1280 match(Set dst (MulD src con)); 1281 1282 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1283 ins_cost(150); 1284 ins_encode %{ 1285 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1286 %} 1287 ins_pipe(pipe_slow); 1288%} 1289 1290instruct divF_reg(regF dst, regF src) %{ 1291 predicate((UseSSE>=1) && (UseAVX == 0)); 1292 match(Set dst (DivF dst src)); 1293 1294 format %{ "divss $dst, $src" %} 1295 ins_cost(150); 1296 ins_encode %{ 1297 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1298 %} 1299 ins_pipe(pipe_slow); 1300%} 1301 1302instruct divF_mem(regF dst, memory src) %{ 1303 predicate((UseSSE>=1) && (UseAVX == 0)); 1304 match(Set dst (DivF dst (LoadF src))); 1305 1306 format %{ "divss $dst, $src" %} 1307 ins_cost(150); 1308 ins_encode %{ 1309 __ divss($dst$$XMMRegister, $src$$Address); 1310 %} 1311 ins_pipe(pipe_slow); 1312%} 1313 1314instruct divF_imm(regF dst, immF con) %{ 1315 predicate((UseSSE>=1) && (UseAVX == 0)); 1316 match(Set dst (DivF dst con)); 1317 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1318 ins_cost(150); 1319 ins_encode %{ 1320 __ divss($dst$$XMMRegister, $constantaddress($con)); 1321 %} 1322 ins_pipe(pipe_slow); 1323%} 1324 1325instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1326 predicate(UseAVX > 0); 1327 match(Set dst (DivF src1 src2)); 1328 1329 format %{ "vdivss $dst, $src1, $src2" %} 1330 ins_cost(150); 1331 ins_encode %{ 1332 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1333 %} 1334 ins_pipe(pipe_slow); 1335%} 1336 1337instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1338 predicate(UseAVX > 0); 1339 match(Set dst (DivF src1 (LoadF src2))); 1340 1341 format %{ "vdivss $dst, $src1, $src2" %} 1342 ins_cost(150); 1343 ins_encode %{ 1344 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1345 %} 1346 ins_pipe(pipe_slow); 1347%} 1348 1349instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1350 predicate(UseAVX > 0); 1351 match(Set dst (DivF src con)); 1352 1353 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1354 ins_cost(150); 1355 ins_encode %{ 1356 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1357 %} 1358 ins_pipe(pipe_slow); 1359%} 1360 1361instruct divD_reg(regD dst, regD src) %{ 1362 predicate((UseSSE>=2) && (UseAVX == 0)); 1363 match(Set dst (DivD dst src)); 1364 1365 format %{ "divsd $dst, $src" %} 1366 ins_cost(150); 1367 ins_encode %{ 1368 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1369 %} 1370 ins_pipe(pipe_slow); 1371%} 1372 1373instruct divD_mem(regD dst, memory src) %{ 1374 predicate((UseSSE>=2) && (UseAVX == 0)); 1375 match(Set dst (DivD dst (LoadD src))); 1376 1377 format %{ "divsd $dst, $src" %} 1378 ins_cost(150); 1379 ins_encode %{ 1380 __ divsd($dst$$XMMRegister, $src$$Address); 1381 %} 1382 ins_pipe(pipe_slow); 1383%} 1384 1385instruct divD_imm(regD dst, immD con) %{ 1386 predicate((UseSSE>=2) && (UseAVX == 0)); 1387 match(Set dst (DivD dst con)); 1388 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1389 ins_cost(150); 1390 ins_encode %{ 1391 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1392 %} 1393 ins_pipe(pipe_slow); 1394%} 1395 1396instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1397 predicate(UseAVX > 0); 1398 match(Set dst (DivD src1 src2)); 1399 1400 format %{ "vdivsd $dst, $src1, $src2" %} 1401 ins_cost(150); 1402 ins_encode %{ 1403 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1404 %} 1405 ins_pipe(pipe_slow); 1406%} 1407 1408instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1409 predicate(UseAVX > 0); 1410 match(Set dst (DivD src1 (LoadD src2))); 1411 1412 format %{ "vdivsd $dst, $src1, $src2" %} 1413 ins_cost(150); 1414 ins_encode %{ 1415 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1416 %} 1417 ins_pipe(pipe_slow); 1418%} 1419 1420instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1421 predicate(UseAVX > 0); 1422 match(Set dst (DivD src con)); 1423 1424 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1425 ins_cost(150); 1426 ins_encode %{ 1427 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1428 %} 1429 ins_pipe(pipe_slow); 1430%} 1431 1432instruct absF_reg(regF dst) %{ 1433 predicate((UseSSE>=1) && (UseAVX == 0)); 1434 match(Set dst (AbsF dst)); 1435 ins_cost(150); 1436 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1437 ins_encode %{ 1438 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1439 %} 1440 ins_pipe(pipe_slow); 1441%} 1442 1443instruct absF_reg_reg(regF dst, regF src) %{ 1444 predicate(UseAVX > 0); 1445 match(Set dst (AbsF src)); 1446 ins_cost(150); 1447 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1448 ins_encode %{ 1449 bool vector256 = false; 1450 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1451 ExternalAddress(float_signmask()), vector256); 1452 %} 1453 ins_pipe(pipe_slow); 1454%} 1455 1456instruct absD_reg(regD dst) %{ 1457 predicate((UseSSE>=2) && (UseAVX == 0)); 1458 match(Set dst (AbsD dst)); 1459 ins_cost(150); 1460 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1461 "# abs double by sign masking" %} 1462 ins_encode %{ 1463 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1464 %} 1465 ins_pipe(pipe_slow); 1466%} 1467 1468instruct absD_reg_reg(regD dst, regD src) %{ 1469 predicate(UseAVX > 0); 1470 match(Set dst (AbsD src)); 1471 ins_cost(150); 1472 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1473 "# abs double by sign masking" %} 1474 ins_encode %{ 1475 bool vector256 = false; 1476 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1477 ExternalAddress(double_signmask()), vector256); 1478 %} 1479 ins_pipe(pipe_slow); 1480%} 1481 1482instruct negF_reg(regF dst) %{ 1483 predicate((UseSSE>=1) && (UseAVX == 0)); 1484 match(Set dst (NegF dst)); 1485 ins_cost(150); 1486 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1487 ins_encode %{ 1488 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1489 %} 1490 ins_pipe(pipe_slow); 1491%} 1492 1493instruct negF_reg_reg(regF dst, regF src) %{ 1494 predicate(UseAVX > 0); 1495 match(Set dst (NegF src)); 1496 ins_cost(150); 1497 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1498 ins_encode %{ 1499 bool vector256 = false; 1500 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1501 ExternalAddress(float_signflip()), vector256); 1502 %} 1503 ins_pipe(pipe_slow); 1504%} 1505 1506instruct negD_reg(regD dst) %{ 1507 predicate((UseSSE>=2) && (UseAVX == 0)); 1508 match(Set dst (NegD dst)); 1509 ins_cost(150); 1510 format %{ "xorpd $dst, [0x8000000000000000]\t" 1511 "# neg double by sign flipping" %} 1512 ins_encode %{ 1513 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1514 %} 1515 ins_pipe(pipe_slow); 1516%} 1517 1518instruct negD_reg_reg(regD dst, regD src) %{ 1519 predicate(UseAVX > 0); 1520 match(Set dst (NegD src)); 1521 ins_cost(150); 1522 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1523 "# neg double by sign flipping" %} 1524 ins_encode %{ 1525 bool vector256 = false; 1526 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1527 ExternalAddress(double_signflip()), vector256); 1528 %} 1529 ins_pipe(pipe_slow); 1530%} 1531 1532instruct sqrtF_reg(regF dst, regF src) %{ 1533 predicate(UseSSE>=1); 1534 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1535 1536 format %{ "sqrtss $dst, $src" %} 1537 ins_cost(150); 1538 ins_encode %{ 1539 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1540 %} 1541 ins_pipe(pipe_slow); 1542%} 1543 1544instruct sqrtF_mem(regF dst, memory src) %{ 1545 predicate(UseSSE>=1); 1546 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1547 1548 format %{ "sqrtss $dst, $src" %} 1549 ins_cost(150); 1550 ins_encode %{ 1551 __ sqrtss($dst$$XMMRegister, $src$$Address); 1552 %} 1553 ins_pipe(pipe_slow); 1554%} 1555 1556instruct sqrtF_imm(regF dst, immF con) %{ 1557 predicate(UseSSE>=1); 1558 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1559 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1560 ins_cost(150); 1561 ins_encode %{ 1562 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1563 %} 1564 ins_pipe(pipe_slow); 1565%} 1566 1567instruct sqrtD_reg(regD dst, regD src) %{ 1568 predicate(UseSSE>=2); 1569 match(Set dst (SqrtD src)); 1570 1571 format %{ "sqrtsd $dst, $src" %} 1572 ins_cost(150); 1573 ins_encode %{ 1574 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1575 %} 1576 ins_pipe(pipe_slow); 1577%} 1578 1579instruct sqrtD_mem(regD dst, memory src) %{ 1580 predicate(UseSSE>=2); 1581 match(Set dst (SqrtD (LoadD src))); 1582 1583 format %{ "sqrtsd $dst, $src" %} 1584 ins_cost(150); 1585 ins_encode %{ 1586 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1587 %} 1588 ins_pipe(pipe_slow); 1589%} 1590 1591instruct sqrtD_imm(regD dst, immD con) %{ 1592 predicate(UseSSE>=2); 1593 match(Set dst (SqrtD con)); 1594 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1595 ins_cost(150); 1596 ins_encode %{ 1597 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1598 %} 1599 ins_pipe(pipe_slow); 1600%} 1601 1602 1603// ====================VECTOR INSTRUCTIONS===================================== 1604 1605// Load vectors (4 bytes long) 1606instruct loadV4(vecS dst, memory mem) %{ 1607 predicate(n->as_LoadVector()->memory_size() == 4); 1608 match(Set dst (LoadVector mem)); 1609 ins_cost(125); 1610 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1611 ins_encode %{ 1612 __ movdl($dst$$XMMRegister, $mem$$Address); 1613 %} 1614 ins_pipe( pipe_slow ); 1615%} 1616 1617// Load vectors (8 bytes long) 1618instruct loadV8(vecD dst, memory mem) %{ 1619 predicate(n->as_LoadVector()->memory_size() == 8); 1620 match(Set dst (LoadVector mem)); 1621 ins_cost(125); 1622 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1623 ins_encode %{ 1624 __ movq($dst$$XMMRegister, $mem$$Address); 1625 %} 1626 ins_pipe( pipe_slow ); 1627%} 1628 1629// Load vectors (16 bytes long) 1630instruct loadV16(vecX dst, memory mem) %{ 1631 predicate(n->as_LoadVector()->memory_size() == 16); 1632 match(Set dst (LoadVector mem)); 1633 ins_cost(125); 1634 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1635 ins_encode %{ 1636 __ movdqu($dst$$XMMRegister, $mem$$Address); 1637 %} 1638 ins_pipe( pipe_slow ); 1639%} 1640 1641// Load vectors (32 bytes long) 1642instruct loadV32(vecY dst, memory mem) %{ 1643 predicate(n->as_LoadVector()->memory_size() == 32); 1644 match(Set dst (LoadVector mem)); 1645 ins_cost(125); 1646 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1647 ins_encode %{ 1648 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1649 %} 1650 ins_pipe( pipe_slow ); 1651%} 1652 1653// Store vectors 1654instruct storeV4(memory mem, vecS src) %{ 1655 predicate(n->as_StoreVector()->memory_size() == 4); 1656 match(Set mem (StoreVector mem src)); 1657 ins_cost(145); 1658 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1659 ins_encode %{ 1660 __ movdl($mem$$Address, $src$$XMMRegister); 1661 %} 1662 ins_pipe( pipe_slow ); 1663%} 1664 1665instruct storeV8(memory mem, vecD src) %{ 1666 predicate(n->as_StoreVector()->memory_size() == 8); 1667 match(Set mem (StoreVector mem src)); 1668 ins_cost(145); 1669 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1670 ins_encode %{ 1671 __ movq($mem$$Address, $src$$XMMRegister); 1672 %} 1673 ins_pipe( pipe_slow ); 1674%} 1675 1676instruct storeV16(memory mem, vecX src) %{ 1677 predicate(n->as_StoreVector()->memory_size() == 16); 1678 match(Set mem (StoreVector mem src)); 1679 ins_cost(145); 1680 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1681 ins_encode %{ 1682 __ movdqu($mem$$Address, $src$$XMMRegister); 1683 %} 1684 ins_pipe( pipe_slow ); 1685%} 1686 1687instruct storeV32(memory mem, vecY src) %{ 1688 predicate(n->as_StoreVector()->memory_size() == 32); 1689 match(Set mem (StoreVector mem src)); 1690 ins_cost(145); 1691 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1692 ins_encode %{ 1693 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1694 %} 1695 ins_pipe( pipe_slow ); 1696%} 1697 1698// Replicate byte scalar to be vector 1699instruct Repl4B(vecS dst, rRegI src) %{ 1700 predicate(n->as_Vector()->length() == 4); 1701 match(Set dst (ReplicateB src)); 1702 format %{ "movd $dst,$src\n\t" 1703 "punpcklbw $dst,$dst\n\t" 1704 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1705 ins_encode %{ 1706 __ movdl($dst$$XMMRegister, $src$$Register); 1707 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1708 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1709 %} 1710 ins_pipe( pipe_slow ); 1711%} 1712 1713instruct Repl8B(vecD dst, rRegI src) %{ 1714 predicate(n->as_Vector()->length() == 8); 1715 match(Set dst (ReplicateB src)); 1716 format %{ "movd $dst,$src\n\t" 1717 "punpcklbw $dst,$dst\n\t" 1718 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1719 ins_encode %{ 1720 __ movdl($dst$$XMMRegister, $src$$Register); 1721 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1722 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1723 %} 1724 ins_pipe( pipe_slow ); 1725%} 1726 1727instruct Repl16B(vecX dst, rRegI src) %{ 1728 predicate(n->as_Vector()->length() == 16); 1729 match(Set dst (ReplicateB src)); 1730 format %{ "movd $dst,$src\n\t" 1731 "punpcklbw $dst,$dst\n\t" 1732 "pshuflw $dst,$dst,0x00\n\t" 1733 "punpcklqdq $dst,$dst\t! replicate16B" %} 1734 ins_encode %{ 1735 __ movdl($dst$$XMMRegister, $src$$Register); 1736 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1737 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1738 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1739 %} 1740 ins_pipe( pipe_slow ); 1741%} 1742 1743instruct Repl32B(vecY dst, rRegI src) %{ 1744 predicate(n->as_Vector()->length() == 32); 1745 match(Set dst (ReplicateB src)); 1746 format %{ "movd $dst,$src\n\t" 1747 "punpcklbw $dst,$dst\n\t" 1748 "pshuflw $dst,$dst,0x00\n\t" 1749 "punpcklqdq $dst,$dst\n\t" 1750 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1751 ins_encode %{ 1752 __ movdl($dst$$XMMRegister, $src$$Register); 1753 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1754 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1755 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1756 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1757 %} 1758 ins_pipe( pipe_slow ); 1759%} 1760 1761// Replicate byte scalar immediate to be vector by loading from const table. 1762instruct Repl4B_imm(vecS dst, immI con) %{ 1763 predicate(n->as_Vector()->length() == 4); 1764 match(Set dst (ReplicateB con)); 1765 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1766 ins_encode %{ 1767 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1768 %} 1769 ins_pipe( pipe_slow ); 1770%} 1771 1772instruct Repl8B_imm(vecD dst, immI con) %{ 1773 predicate(n->as_Vector()->length() == 8); 1774 match(Set dst (ReplicateB con)); 1775 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1776 ins_encode %{ 1777 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1778 %} 1779 ins_pipe( pipe_slow ); 1780%} 1781 1782instruct Repl16B_imm(vecX dst, immI con) %{ 1783 predicate(n->as_Vector()->length() == 16); 1784 match(Set dst (ReplicateB con)); 1785 format %{ "movq $dst,[$constantaddress]\n\t" 1786 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1787 ins_encode %{ 1788 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1789 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1790 %} 1791 ins_pipe( pipe_slow ); 1792%} 1793 1794instruct Repl32B_imm(vecY dst, immI con) %{ 1795 predicate(n->as_Vector()->length() == 32); 1796 match(Set dst (ReplicateB con)); 1797 format %{ "movq $dst,[$constantaddress]\n\t" 1798 "punpcklqdq $dst,$dst\n\t" 1799 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1800 ins_encode %{ 1801 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1802 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1803 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1804 %} 1805 ins_pipe( pipe_slow ); 1806%} 1807 1808// Replicate byte scalar zero to be vector 1809instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1810 predicate(n->as_Vector()->length() == 4); 1811 match(Set dst (ReplicateB zero)); 1812 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1813 ins_encode %{ 1814 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1815 %} 1816 ins_pipe( fpu_reg_reg ); 1817%} 1818 1819instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1820 predicate(n->as_Vector()->length() == 8); 1821 match(Set dst (ReplicateB zero)); 1822 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1823 ins_encode %{ 1824 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1825 %} 1826 ins_pipe( fpu_reg_reg ); 1827%} 1828 1829instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1830 predicate(n->as_Vector()->length() == 16); 1831 match(Set dst (ReplicateB zero)); 1832 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1833 ins_encode %{ 1834 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1835 %} 1836 ins_pipe( fpu_reg_reg ); 1837%} 1838 1839instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1840 predicate(n->as_Vector()->length() == 32); 1841 match(Set dst (ReplicateB zero)); 1842 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1843 ins_encode %{ 1844 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1845 bool vector256 = true; 1846 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1847 %} 1848 ins_pipe( fpu_reg_reg ); 1849%} 1850 1851// Replicate char/short (2 byte) scalar to be vector 1852instruct Repl2S(vecS dst, rRegI src) %{ 1853 predicate(n->as_Vector()->length() == 2); 1854 match(Set dst (ReplicateS src)); 1855 format %{ "movd $dst,$src\n\t" 1856 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1857 ins_encode %{ 1858 __ movdl($dst$$XMMRegister, $src$$Register); 1859 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1860 %} 1861 ins_pipe( fpu_reg_reg ); 1862%} 1863 1864instruct Repl4S(vecD dst, rRegI src) %{ 1865 predicate(n->as_Vector()->length() == 4); 1866 match(Set dst (ReplicateS src)); 1867 format %{ "movd $dst,$src\n\t" 1868 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1869 ins_encode %{ 1870 __ movdl($dst$$XMMRegister, $src$$Register); 1871 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1872 %} 1873 ins_pipe( fpu_reg_reg ); 1874%} 1875 1876instruct Repl8S(vecX dst, rRegI src) %{ 1877 predicate(n->as_Vector()->length() == 8); 1878 match(Set dst (ReplicateS src)); 1879 format %{ "movd $dst,$src\n\t" 1880 "pshuflw $dst,$dst,0x00\n\t" 1881 "punpcklqdq $dst,$dst\t! replicate8S" %} 1882 ins_encode %{ 1883 __ movdl($dst$$XMMRegister, $src$$Register); 1884 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1885 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1886 %} 1887 ins_pipe( pipe_slow ); 1888%} 1889 1890instruct Repl16S(vecY dst, rRegI src) %{ 1891 predicate(n->as_Vector()->length() == 16); 1892 match(Set dst (ReplicateS src)); 1893 format %{ "movd $dst,$src\n\t" 1894 "pshuflw $dst,$dst,0x00\n\t" 1895 "punpcklqdq $dst,$dst\n\t" 1896 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 1897 ins_encode %{ 1898 __ movdl($dst$$XMMRegister, $src$$Register); 1899 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1900 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1901 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1902 %} 1903 ins_pipe( pipe_slow ); 1904%} 1905 1906// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 1907instruct Repl2S_imm(vecS dst, immI con) %{ 1908 predicate(n->as_Vector()->length() == 2); 1909 match(Set dst (ReplicateS con)); 1910 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 1911 ins_encode %{ 1912 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1913 %} 1914 ins_pipe( fpu_reg_reg ); 1915%} 1916 1917instruct Repl4S_imm(vecD dst, immI con) %{ 1918 predicate(n->as_Vector()->length() == 4); 1919 match(Set dst (ReplicateS con)); 1920 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 1921 ins_encode %{ 1922 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1923 %} 1924 ins_pipe( fpu_reg_reg ); 1925%} 1926 1927instruct Repl8S_imm(vecX dst, immI con) %{ 1928 predicate(n->as_Vector()->length() == 8); 1929 match(Set dst (ReplicateS con)); 1930 format %{ "movq $dst,[$constantaddress]\n\t" 1931 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 1932 ins_encode %{ 1933 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1934 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1935 %} 1936 ins_pipe( pipe_slow ); 1937%} 1938 1939instruct Repl16S_imm(vecY dst, immI con) %{ 1940 predicate(n->as_Vector()->length() == 16); 1941 match(Set dst (ReplicateS con)); 1942 format %{ "movq $dst,[$constantaddress]\n\t" 1943 "punpcklqdq $dst,$dst\n\t" 1944 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 1945 ins_encode %{ 1946 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1947 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1948 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1949 %} 1950 ins_pipe( pipe_slow ); 1951%} 1952 1953// Replicate char/short (2 byte) scalar zero to be vector 1954instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1955 predicate(n->as_Vector()->length() == 2); 1956 match(Set dst (ReplicateS zero)); 1957 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1958 ins_encode %{ 1959 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1960 %} 1961 ins_pipe( fpu_reg_reg ); 1962%} 1963 1964instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1965 predicate(n->as_Vector()->length() == 4); 1966 match(Set dst (ReplicateS zero)); 1967 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1968 ins_encode %{ 1969 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1970 %} 1971 ins_pipe( fpu_reg_reg ); 1972%} 1973 1974instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1975 predicate(n->as_Vector()->length() == 8); 1976 match(Set dst (ReplicateS zero)); 1977 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1978 ins_encode %{ 1979 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1980 %} 1981 ins_pipe( fpu_reg_reg ); 1982%} 1983 1984instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1985 predicate(n->as_Vector()->length() == 16); 1986 match(Set dst (ReplicateS zero)); 1987 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 1988 ins_encode %{ 1989 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1990 bool vector256 = true; 1991 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1992 %} 1993 ins_pipe( fpu_reg_reg ); 1994%} 1995 1996// Replicate integer (4 byte) scalar to be vector 1997instruct Repl2I(vecD dst, rRegI src) %{ 1998 predicate(n->as_Vector()->length() == 2); 1999 match(Set dst (ReplicateI src)); 2000 format %{ "movd $dst,$src\n\t" 2001 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2002 ins_encode %{ 2003 __ movdl($dst$$XMMRegister, $src$$Register); 2004 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2005 %} 2006 ins_pipe( fpu_reg_reg ); 2007%} 2008 2009instruct Repl4I(vecX dst, rRegI src) %{ 2010 predicate(n->as_Vector()->length() == 4); 2011 match(Set dst (ReplicateI src)); 2012 format %{ "movd $dst,$src\n\t" 2013 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2014 ins_encode %{ 2015 __ movdl($dst$$XMMRegister, $src$$Register); 2016 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2017 %} 2018 ins_pipe( pipe_slow ); 2019%} 2020 2021instruct Repl8I(vecY dst, rRegI src) %{ 2022 predicate(n->as_Vector()->length() == 8); 2023 match(Set dst (ReplicateI src)); 2024 format %{ "movd $dst,$src\n\t" 2025 "pshufd $dst,$dst,0x00\n\t" 2026 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2027 ins_encode %{ 2028 __ movdl($dst$$XMMRegister, $src$$Register); 2029 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2030 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2031 %} 2032 ins_pipe( pipe_slow ); 2033%} 2034 2035// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2036instruct Repl2I_imm(vecD dst, immI con) %{ 2037 predicate(n->as_Vector()->length() == 2); 2038 match(Set dst (ReplicateI con)); 2039 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2040 ins_encode %{ 2041 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2042 %} 2043 ins_pipe( fpu_reg_reg ); 2044%} 2045 2046instruct Repl4I_imm(vecX dst, immI con) %{ 2047 predicate(n->as_Vector()->length() == 4); 2048 match(Set dst (ReplicateI con)); 2049 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2050 "punpcklqdq $dst,$dst" %} 2051 ins_encode %{ 2052 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2053 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2054 %} 2055 ins_pipe( pipe_slow ); 2056%} 2057 2058instruct Repl8I_imm(vecY dst, immI con) %{ 2059 predicate(n->as_Vector()->length() == 8); 2060 match(Set dst (ReplicateI con)); 2061 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2062 "punpcklqdq $dst,$dst\n\t" 2063 "vinserti128h $dst,$dst,$dst" %} 2064 ins_encode %{ 2065 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2066 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2067 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2068 %} 2069 ins_pipe( pipe_slow ); 2070%} 2071 2072// Integer could be loaded into xmm register directly from memory. 2073instruct Repl2I_mem(vecD dst, memory mem) %{ 2074 predicate(n->as_Vector()->length() == 2); 2075 match(Set dst (ReplicateI (LoadI mem))); 2076 format %{ "movd $dst,$mem\n\t" 2077 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2078 ins_encode %{ 2079 __ movdl($dst$$XMMRegister, $mem$$Address); 2080 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2081 %} 2082 ins_pipe( fpu_reg_reg ); 2083%} 2084 2085instruct Repl4I_mem(vecX dst, memory mem) %{ 2086 predicate(n->as_Vector()->length() == 4); 2087 match(Set dst (ReplicateI (LoadI mem))); 2088 format %{ "movd $dst,$mem\n\t" 2089 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2090 ins_encode %{ 2091 __ movdl($dst$$XMMRegister, $mem$$Address); 2092 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2093 %} 2094 ins_pipe( pipe_slow ); 2095%} 2096 2097instruct Repl8I_mem(vecY dst, memory mem) %{ 2098 predicate(n->as_Vector()->length() == 8); 2099 match(Set dst (ReplicateI (LoadI mem))); 2100 format %{ "movd $dst,$mem\n\t" 2101 "pshufd $dst,$dst,0x00\n\t" 2102 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2103 ins_encode %{ 2104 __ movdl($dst$$XMMRegister, $mem$$Address); 2105 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2106 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2107 %} 2108 ins_pipe( pipe_slow ); 2109%} 2110 2111// Replicate integer (4 byte) scalar zero to be vector 2112instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2113 predicate(n->as_Vector()->length() == 2); 2114 match(Set dst (ReplicateI zero)); 2115 format %{ "pxor $dst,$dst\t! replicate2I" %} 2116 ins_encode %{ 2117 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2118 %} 2119 ins_pipe( fpu_reg_reg ); 2120%} 2121 2122instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2123 predicate(n->as_Vector()->length() == 4); 2124 match(Set dst (ReplicateI zero)); 2125 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2126 ins_encode %{ 2127 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2128 %} 2129 ins_pipe( fpu_reg_reg ); 2130%} 2131 2132instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2133 predicate(n->as_Vector()->length() == 8); 2134 match(Set dst (ReplicateI zero)); 2135 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2136 ins_encode %{ 2137 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2138 bool vector256 = true; 2139 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2140 %} 2141 ins_pipe( fpu_reg_reg ); 2142%} 2143 2144// Replicate long (8 byte) scalar to be vector 2145#ifdef _LP64 2146instruct Repl2L(vecX dst, rRegL src) %{ 2147 predicate(n->as_Vector()->length() == 2); 2148 match(Set dst (ReplicateL src)); 2149 format %{ "movdq $dst,$src\n\t" 2150 "punpcklqdq $dst,$dst\t! replicate2L" %} 2151 ins_encode %{ 2152 __ movdq($dst$$XMMRegister, $src$$Register); 2153 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2154 %} 2155 ins_pipe( pipe_slow ); 2156%} 2157 2158instruct Repl4L(vecY dst, rRegL src) %{ 2159 predicate(n->as_Vector()->length() == 4); 2160 match(Set dst (ReplicateL src)); 2161 format %{ "movdq $dst,$src\n\t" 2162 "punpcklqdq $dst,$dst\n\t" 2163 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2164 ins_encode %{ 2165 __ movdq($dst$$XMMRegister, $src$$Register); 2166 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2167 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2168 %} 2169 ins_pipe( pipe_slow ); 2170%} 2171#else // _LP64 2172instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2173 predicate(n->as_Vector()->length() == 2); 2174 match(Set dst (ReplicateL src)); 2175 effect(TEMP dst, USE src, TEMP tmp); 2176 format %{ "movdl $dst,$src.lo\n\t" 2177 "movdl $tmp,$src.hi\n\t" 2178 "punpckldq $dst,$tmp\n\t" 2179 "punpcklqdq $dst,$dst\t! replicate2L"%} 2180 ins_encode %{ 2181 __ movdl($dst$$XMMRegister, $src$$Register); 2182 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2183 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2184 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2185 %} 2186 ins_pipe( pipe_slow ); 2187%} 2188 2189instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2190 predicate(n->as_Vector()->length() == 4); 2191 match(Set dst (ReplicateL src)); 2192 effect(TEMP dst, USE src, TEMP tmp); 2193 format %{ "movdl $dst,$src.lo\n\t" 2194 "movdl $tmp,$src.hi\n\t" 2195 "punpckldq $dst,$tmp\n\t" 2196 "punpcklqdq $dst,$dst\n\t" 2197 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2198 ins_encode %{ 2199 __ movdl($dst$$XMMRegister, $src$$Register); 2200 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2201 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2202 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2203 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2204 %} 2205 ins_pipe( pipe_slow ); 2206%} 2207#endif // _LP64 2208 2209// Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2210instruct Repl2L_imm(vecX dst, immL con) %{ 2211 predicate(n->as_Vector()->length() == 2); 2212 match(Set dst (ReplicateL con)); 2213 format %{ "movq $dst,[$constantaddress]\n\t" 2214 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2215 ins_encode %{ 2216 __ movq($dst$$XMMRegister, $constantaddress($con)); 2217 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2218 %} 2219 ins_pipe( pipe_slow ); 2220%} 2221 2222instruct Repl4L_imm(vecY dst, immL con) %{ 2223 predicate(n->as_Vector()->length() == 4); 2224 match(Set dst (ReplicateL con)); 2225 format %{ "movq $dst,[$constantaddress]\n\t" 2226 "punpcklqdq $dst,$dst\n\t" 2227 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2228 ins_encode %{ 2229 __ movq($dst$$XMMRegister, $constantaddress($con)); 2230 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2231 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2232 %} 2233 ins_pipe( pipe_slow ); 2234%} 2235 2236// Long could be loaded into xmm register directly from memory. 2237instruct Repl2L_mem(vecX dst, memory mem) %{ 2238 predicate(n->as_Vector()->length() == 2); 2239 match(Set dst (ReplicateL (LoadL mem))); 2240 format %{ "movq $dst,$mem\n\t" 2241 "punpcklqdq $dst,$dst\t! replicate2L" %} 2242 ins_encode %{ 2243 __ movq($dst$$XMMRegister, $mem$$Address); 2244 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2245 %} 2246 ins_pipe( pipe_slow ); 2247%} 2248 2249instruct Repl4L_mem(vecY dst, memory mem) %{ 2250 predicate(n->as_Vector()->length() == 4); 2251 match(Set dst (ReplicateL (LoadL mem))); 2252 format %{ "movq $dst,$mem\n\t" 2253 "punpcklqdq $dst,$dst\n\t" 2254 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2255 ins_encode %{ 2256 __ movq($dst$$XMMRegister, $mem$$Address); 2257 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2258 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2259 %} 2260 ins_pipe( pipe_slow ); 2261%} 2262 2263// Replicate long (8 byte) scalar zero to be vector 2264instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2265 predicate(n->as_Vector()->length() == 2); 2266 match(Set dst (ReplicateL zero)); 2267 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2268 ins_encode %{ 2269 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2270 %} 2271 ins_pipe( fpu_reg_reg ); 2272%} 2273 2274instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2275 predicate(n->as_Vector()->length() == 4); 2276 match(Set dst (ReplicateL zero)); 2277 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2278 ins_encode %{ 2279 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2280 bool vector256 = true; 2281 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2282 %} 2283 ins_pipe( fpu_reg_reg ); 2284%} 2285 2286// Replicate float (4 byte) scalar to be vector 2287instruct Repl2F(vecD dst, regF src) %{ 2288 predicate(n->as_Vector()->length() == 2); 2289 match(Set dst (ReplicateF src)); 2290 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2291 ins_encode %{ 2292 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2293 %} 2294 ins_pipe( fpu_reg_reg ); 2295%} 2296 2297instruct Repl4F(vecX dst, regF src) %{ 2298 predicate(n->as_Vector()->length() == 4); 2299 match(Set dst (ReplicateF src)); 2300 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2301 ins_encode %{ 2302 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2303 %} 2304 ins_pipe( pipe_slow ); 2305%} 2306 2307instruct Repl8F(vecY dst, regF src) %{ 2308 predicate(n->as_Vector()->length() == 8); 2309 match(Set dst (ReplicateF src)); 2310 format %{ "pshufd $dst,$src,0x00\n\t" 2311 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2312 ins_encode %{ 2313 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2314 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2315 %} 2316 ins_pipe( pipe_slow ); 2317%} 2318 2319// Replicate float (4 byte) scalar zero to be vector 2320instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2321 predicate(n->as_Vector()->length() == 2); 2322 match(Set dst (ReplicateF zero)); 2323 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2324 ins_encode %{ 2325 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2326 %} 2327 ins_pipe( fpu_reg_reg ); 2328%} 2329 2330instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2331 predicate(n->as_Vector()->length() == 4); 2332 match(Set dst (ReplicateF zero)); 2333 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2334 ins_encode %{ 2335 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2336 %} 2337 ins_pipe( fpu_reg_reg ); 2338%} 2339 2340instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2341 predicate(n->as_Vector()->length() == 8); 2342 match(Set dst (ReplicateF zero)); 2343 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2344 ins_encode %{ 2345 bool vector256 = true; 2346 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2347 %} 2348 ins_pipe( fpu_reg_reg ); 2349%} 2350 2351// Replicate double (8 bytes) scalar to be vector 2352instruct Repl2D(vecX dst, regD src) %{ 2353 predicate(n->as_Vector()->length() == 2); 2354 match(Set dst (ReplicateD src)); 2355 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2356 ins_encode %{ 2357 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2358 %} 2359 ins_pipe( pipe_slow ); 2360%} 2361 2362instruct Repl4D(vecY dst, regD src) %{ 2363 predicate(n->as_Vector()->length() == 4); 2364 match(Set dst (ReplicateD src)); 2365 format %{ "pshufd $dst,$src,0x44\n\t" 2366 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2367 ins_encode %{ 2368 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2369 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2370 %} 2371 ins_pipe( pipe_slow ); 2372%} 2373 2374// Replicate double (8 byte) scalar zero to be vector 2375instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2376 predicate(n->as_Vector()->length() == 2); 2377 match(Set dst (ReplicateD zero)); 2378 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2379 ins_encode %{ 2380 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2381 %} 2382 ins_pipe( fpu_reg_reg ); 2383%} 2384 2385instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2386 predicate(n->as_Vector()->length() == 4); 2387 match(Set dst (ReplicateD zero)); 2388 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2389 ins_encode %{ 2390 bool vector256 = true; 2391 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2392 %} 2393 ins_pipe( fpu_reg_reg ); 2394%} 2395 2396// ====================VECTOR ARITHMETIC======================================= 2397 2398// --------------------------------- ADD -------------------------------------- 2399 2400// Bytes vector add 2401instruct vadd4B(vecS dst, vecS src) %{ 2402 predicate(n->as_Vector()->length() == 4); 2403 match(Set dst (AddVB dst src)); 2404 format %{ "paddb $dst,$src\t! add packed4B" %} 2405 ins_encode %{ 2406 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2407 %} 2408 ins_pipe( pipe_slow ); 2409%} 2410 2411instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2412 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2413 match(Set dst (AddVB src1 src2)); 2414 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2415 ins_encode %{ 2416 bool vector256 = false; 2417 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2418 %} 2419 ins_pipe( pipe_slow ); 2420%} 2421 2422instruct vadd8B(vecD dst, vecD src) %{ 2423 predicate(n->as_Vector()->length() == 8); 2424 match(Set dst (AddVB dst src)); 2425 format %{ "paddb $dst,$src\t! add packed8B" %} 2426 ins_encode %{ 2427 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2428 %} 2429 ins_pipe( pipe_slow ); 2430%} 2431 2432instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2433 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2434 match(Set dst (AddVB src1 src2)); 2435 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2436 ins_encode %{ 2437 bool vector256 = false; 2438 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2439 %} 2440 ins_pipe( pipe_slow ); 2441%} 2442 2443instruct vadd16B(vecX dst, vecX src) %{ 2444 predicate(n->as_Vector()->length() == 16); 2445 match(Set dst (AddVB dst src)); 2446 format %{ "paddb $dst,$src\t! add packed16B" %} 2447 ins_encode %{ 2448 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2449 %} 2450 ins_pipe( pipe_slow ); 2451%} 2452 2453instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2454 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2455 match(Set dst (AddVB src1 src2)); 2456 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2457 ins_encode %{ 2458 bool vector256 = false; 2459 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2460 %} 2461 ins_pipe( pipe_slow ); 2462%} 2463 2464instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2465 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2466 match(Set dst (AddVB src (LoadVector mem))); 2467 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2468 ins_encode %{ 2469 bool vector256 = false; 2470 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2471 %} 2472 ins_pipe( pipe_slow ); 2473%} 2474 2475instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2476 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2477 match(Set dst (AddVB src1 src2)); 2478 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2479 ins_encode %{ 2480 bool vector256 = true; 2481 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2482 %} 2483 ins_pipe( pipe_slow ); 2484%} 2485 2486instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2487 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2488 match(Set dst (AddVB src (LoadVector mem))); 2489 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2490 ins_encode %{ 2491 bool vector256 = true; 2492 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2493 %} 2494 ins_pipe( pipe_slow ); 2495%} 2496 2497// Shorts/Chars vector add 2498instruct vadd2S(vecS dst, vecS src) %{ 2499 predicate(n->as_Vector()->length() == 2); 2500 match(Set dst (AddVS dst src)); 2501 format %{ "paddw $dst,$src\t! add packed2S" %} 2502 ins_encode %{ 2503 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2504 %} 2505 ins_pipe( pipe_slow ); 2506%} 2507 2508instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2509 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2510 match(Set dst (AddVS src1 src2)); 2511 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2512 ins_encode %{ 2513 bool vector256 = false; 2514 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2515 %} 2516 ins_pipe( pipe_slow ); 2517%} 2518 2519instruct vadd4S(vecD dst, vecD src) %{ 2520 predicate(n->as_Vector()->length() == 4); 2521 match(Set dst (AddVS dst src)); 2522 format %{ "paddw $dst,$src\t! add packed4S" %} 2523 ins_encode %{ 2524 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2525 %} 2526 ins_pipe( pipe_slow ); 2527%} 2528 2529instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2530 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2531 match(Set dst (AddVS src1 src2)); 2532 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2533 ins_encode %{ 2534 bool vector256 = false; 2535 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2536 %} 2537 ins_pipe( pipe_slow ); 2538%} 2539 2540instruct vadd8S(vecX dst, vecX src) %{ 2541 predicate(n->as_Vector()->length() == 8); 2542 match(Set dst (AddVS dst src)); 2543 format %{ "paddw $dst,$src\t! add packed8S" %} 2544 ins_encode %{ 2545 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2546 %} 2547 ins_pipe( pipe_slow ); 2548%} 2549 2550instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2551 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2552 match(Set dst (AddVS src1 src2)); 2553 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2554 ins_encode %{ 2555 bool vector256 = false; 2556 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2557 %} 2558 ins_pipe( pipe_slow ); 2559%} 2560 2561instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2562 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2563 match(Set dst (AddVS src (LoadVector mem))); 2564 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2565 ins_encode %{ 2566 bool vector256 = false; 2567 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2568 %} 2569 ins_pipe( pipe_slow ); 2570%} 2571 2572instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2573 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2574 match(Set dst (AddVS src1 src2)); 2575 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2576 ins_encode %{ 2577 bool vector256 = true; 2578 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2579 %} 2580 ins_pipe( pipe_slow ); 2581%} 2582 2583instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2584 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2585 match(Set dst (AddVS src (LoadVector mem))); 2586 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2587 ins_encode %{ 2588 bool vector256 = true; 2589 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2590 %} 2591 ins_pipe( pipe_slow ); 2592%} 2593 2594// Integers vector add 2595instruct vadd2I(vecD dst, vecD src) %{ 2596 predicate(n->as_Vector()->length() == 2); 2597 match(Set dst (AddVI dst src)); 2598 format %{ "paddd $dst,$src\t! add packed2I" %} 2599 ins_encode %{ 2600 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2601 %} 2602 ins_pipe( pipe_slow ); 2603%} 2604 2605instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2606 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2607 match(Set dst (AddVI src1 src2)); 2608 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2609 ins_encode %{ 2610 bool vector256 = false; 2611 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2612 %} 2613 ins_pipe( pipe_slow ); 2614%} 2615 2616instruct vadd4I(vecX dst, vecX src) %{ 2617 predicate(n->as_Vector()->length() == 4); 2618 match(Set dst (AddVI dst src)); 2619 format %{ "paddd $dst,$src\t! add packed4I" %} 2620 ins_encode %{ 2621 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2622 %} 2623 ins_pipe( pipe_slow ); 2624%} 2625 2626instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2627 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2628 match(Set dst (AddVI src1 src2)); 2629 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2630 ins_encode %{ 2631 bool vector256 = false; 2632 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2633 %} 2634 ins_pipe( pipe_slow ); 2635%} 2636 2637instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2638 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2639 match(Set dst (AddVI src (LoadVector mem))); 2640 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2641 ins_encode %{ 2642 bool vector256 = false; 2643 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2644 %} 2645 ins_pipe( pipe_slow ); 2646%} 2647 2648instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2649 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2650 match(Set dst (AddVI src1 src2)); 2651 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2652 ins_encode %{ 2653 bool vector256 = true; 2654 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2655 %} 2656 ins_pipe( pipe_slow ); 2657%} 2658 2659instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2660 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2661 match(Set dst (AddVI src (LoadVector mem))); 2662 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2663 ins_encode %{ 2664 bool vector256 = true; 2665 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2666 %} 2667 ins_pipe( pipe_slow ); 2668%} 2669 2670// Longs vector add 2671instruct vadd2L(vecX dst, vecX src) %{ 2672 predicate(n->as_Vector()->length() == 2); 2673 match(Set dst (AddVL dst src)); 2674 format %{ "paddq $dst,$src\t! add packed2L" %} 2675 ins_encode %{ 2676 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2677 %} 2678 ins_pipe( pipe_slow ); 2679%} 2680 2681instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2682 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2683 match(Set dst (AddVL src1 src2)); 2684 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2685 ins_encode %{ 2686 bool vector256 = false; 2687 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2688 %} 2689 ins_pipe( pipe_slow ); 2690%} 2691 2692instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2693 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2694 match(Set dst (AddVL src (LoadVector mem))); 2695 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2696 ins_encode %{ 2697 bool vector256 = false; 2698 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2699 %} 2700 ins_pipe( pipe_slow ); 2701%} 2702 2703instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2704 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2705 match(Set dst (AddVL src1 src2)); 2706 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2707 ins_encode %{ 2708 bool vector256 = true; 2709 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2710 %} 2711 ins_pipe( pipe_slow ); 2712%} 2713 2714instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2715 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2716 match(Set dst (AddVL src (LoadVector mem))); 2717 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2718 ins_encode %{ 2719 bool vector256 = true; 2720 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2721 %} 2722 ins_pipe( pipe_slow ); 2723%} 2724 2725// Floats vector add 2726instruct vadd2F(vecD dst, vecD src) %{ 2727 predicate(n->as_Vector()->length() == 2); 2728 match(Set dst (AddVF dst src)); 2729 format %{ "addps $dst,$src\t! add packed2F" %} 2730 ins_encode %{ 2731 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2732 %} 2733 ins_pipe( pipe_slow ); 2734%} 2735 2736instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2737 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2738 match(Set dst (AddVF src1 src2)); 2739 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2740 ins_encode %{ 2741 bool vector256 = false; 2742 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2743 %} 2744 ins_pipe( pipe_slow ); 2745%} 2746 2747instruct vadd4F(vecX dst, vecX src) %{ 2748 predicate(n->as_Vector()->length() == 4); 2749 match(Set dst (AddVF dst src)); 2750 format %{ "addps $dst,$src\t! add packed4F" %} 2751 ins_encode %{ 2752 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2753 %} 2754 ins_pipe( pipe_slow ); 2755%} 2756 2757instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2758 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2759 match(Set dst (AddVF src1 src2)); 2760 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2761 ins_encode %{ 2762 bool vector256 = false; 2763 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2764 %} 2765 ins_pipe( pipe_slow ); 2766%} 2767 2768instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2770 match(Set dst (AddVF src (LoadVector mem))); 2771 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2772 ins_encode %{ 2773 bool vector256 = false; 2774 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2775 %} 2776 ins_pipe( pipe_slow ); 2777%} 2778 2779instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2780 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2781 match(Set dst (AddVF src1 src2)); 2782 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2783 ins_encode %{ 2784 bool vector256 = true; 2785 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2786 %} 2787 ins_pipe( pipe_slow ); 2788%} 2789 2790instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2791 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2792 match(Set dst (AddVF src (LoadVector mem))); 2793 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2794 ins_encode %{ 2795 bool vector256 = true; 2796 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2797 %} 2798 ins_pipe( pipe_slow ); 2799%} 2800 2801// Doubles vector add 2802instruct vadd2D(vecX dst, vecX src) %{ 2803 predicate(n->as_Vector()->length() == 2); 2804 match(Set dst (AddVD dst src)); 2805 format %{ "addpd $dst,$src\t! add packed2D" %} 2806 ins_encode %{ 2807 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2808 %} 2809 ins_pipe( pipe_slow ); 2810%} 2811 2812instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2813 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2814 match(Set dst (AddVD src1 src2)); 2815 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2816 ins_encode %{ 2817 bool vector256 = false; 2818 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2819 %} 2820 ins_pipe( pipe_slow ); 2821%} 2822 2823instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2825 match(Set dst (AddVD src (LoadVector mem))); 2826 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2827 ins_encode %{ 2828 bool vector256 = false; 2829 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2830 %} 2831 ins_pipe( pipe_slow ); 2832%} 2833 2834instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2835 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2836 match(Set dst (AddVD src1 src2)); 2837 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2838 ins_encode %{ 2839 bool vector256 = true; 2840 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2841 %} 2842 ins_pipe( pipe_slow ); 2843%} 2844 2845instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2847 match(Set dst (AddVD src (LoadVector mem))); 2848 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2849 ins_encode %{ 2850 bool vector256 = true; 2851 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2852 %} 2853 ins_pipe( pipe_slow ); 2854%} 2855 2856// --------------------------------- SUB -------------------------------------- 2857 2858// Bytes vector sub 2859instruct vsub4B(vecS dst, vecS src) %{ 2860 predicate(n->as_Vector()->length() == 4); 2861 match(Set dst (SubVB dst src)); 2862 format %{ "psubb $dst,$src\t! sub packed4B" %} 2863 ins_encode %{ 2864 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2865 %} 2866 ins_pipe( pipe_slow ); 2867%} 2868 2869instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2870 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2871 match(Set dst (SubVB src1 src2)); 2872 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 2873 ins_encode %{ 2874 bool vector256 = false; 2875 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2876 %} 2877 ins_pipe( pipe_slow ); 2878%} 2879 2880instruct vsub8B(vecD dst, vecD src) %{ 2881 predicate(n->as_Vector()->length() == 8); 2882 match(Set dst (SubVB dst src)); 2883 format %{ "psubb $dst,$src\t! sub packed8B" %} 2884 ins_encode %{ 2885 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2886 %} 2887 ins_pipe( pipe_slow ); 2888%} 2889 2890instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 2891 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2892 match(Set dst (SubVB src1 src2)); 2893 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 2894 ins_encode %{ 2895 bool vector256 = false; 2896 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2897 %} 2898 ins_pipe( pipe_slow ); 2899%} 2900 2901instruct vsub16B(vecX dst, vecX src) %{ 2902 predicate(n->as_Vector()->length() == 16); 2903 match(Set dst (SubVB dst src)); 2904 format %{ "psubb $dst,$src\t! sub packed16B" %} 2905 ins_encode %{ 2906 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2907 %} 2908 ins_pipe( pipe_slow ); 2909%} 2910 2911instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 2912 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2913 match(Set dst (SubVB src1 src2)); 2914 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 2915 ins_encode %{ 2916 bool vector256 = false; 2917 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2918 %} 2919 ins_pipe( pipe_slow ); 2920%} 2921 2922instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 2923 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2924 match(Set dst (SubVB src (LoadVector mem))); 2925 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 2926 ins_encode %{ 2927 bool vector256 = false; 2928 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2929 %} 2930 ins_pipe( pipe_slow ); 2931%} 2932 2933instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 2934 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2935 match(Set dst (SubVB src1 src2)); 2936 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 2937 ins_encode %{ 2938 bool vector256 = true; 2939 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2940 %} 2941 ins_pipe( pipe_slow ); 2942%} 2943 2944instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 2945 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2946 match(Set dst (SubVB src (LoadVector mem))); 2947 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 2948 ins_encode %{ 2949 bool vector256 = true; 2950 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2951 %} 2952 ins_pipe( pipe_slow ); 2953%} 2954 2955// Shorts/Chars vector sub 2956instruct vsub2S(vecS dst, vecS src) %{ 2957 predicate(n->as_Vector()->length() == 2); 2958 match(Set dst (SubVS dst src)); 2959 format %{ "psubw $dst,$src\t! sub packed2S" %} 2960 ins_encode %{ 2961 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2962 %} 2963 ins_pipe( pipe_slow ); 2964%} 2965 2966instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 2967 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2968 match(Set dst (SubVS src1 src2)); 2969 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 2970 ins_encode %{ 2971 bool vector256 = false; 2972 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2973 %} 2974 ins_pipe( pipe_slow ); 2975%} 2976 2977instruct vsub4S(vecD dst, vecD src) %{ 2978 predicate(n->as_Vector()->length() == 4); 2979 match(Set dst (SubVS dst src)); 2980 format %{ "psubw $dst,$src\t! sub packed4S" %} 2981 ins_encode %{ 2982 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 2983 %} 2984 ins_pipe( pipe_slow ); 2985%} 2986 2987instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 2988 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2989 match(Set dst (SubVS src1 src2)); 2990 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 2991 ins_encode %{ 2992 bool vector256 = false; 2993 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2994 %} 2995 ins_pipe( pipe_slow ); 2996%} 2997 2998instruct vsub8S(vecX dst, vecX src) %{ 2999 predicate(n->as_Vector()->length() == 8); 3000 match(Set dst (SubVS dst src)); 3001 format %{ "psubw $dst,$src\t! sub packed8S" %} 3002 ins_encode %{ 3003 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3004 %} 3005 ins_pipe( pipe_slow ); 3006%} 3007 3008instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3009 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3010 match(Set dst (SubVS src1 src2)); 3011 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3012 ins_encode %{ 3013 bool vector256 = false; 3014 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3015 %} 3016 ins_pipe( pipe_slow ); 3017%} 3018 3019instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3020 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3021 match(Set dst (SubVS src (LoadVector mem))); 3022 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3023 ins_encode %{ 3024 bool vector256 = false; 3025 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3026 %} 3027 ins_pipe( pipe_slow ); 3028%} 3029 3030instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3031 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3032 match(Set dst (SubVS src1 src2)); 3033 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3034 ins_encode %{ 3035 bool vector256 = true; 3036 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3037 %} 3038 ins_pipe( pipe_slow ); 3039%} 3040 3041instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3042 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3043 match(Set dst (SubVS src (LoadVector mem))); 3044 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3045 ins_encode %{ 3046 bool vector256 = true; 3047 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3048 %} 3049 ins_pipe( pipe_slow ); 3050%} 3051 3052// Integers vector sub 3053instruct vsub2I(vecD dst, vecD src) %{ 3054 predicate(n->as_Vector()->length() == 2); 3055 match(Set dst (SubVI dst src)); 3056 format %{ "psubd $dst,$src\t! sub packed2I" %} 3057 ins_encode %{ 3058 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3059 %} 3060 ins_pipe( pipe_slow ); 3061%} 3062 3063instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3064 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3065 match(Set dst (SubVI src1 src2)); 3066 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3067 ins_encode %{ 3068 bool vector256 = false; 3069 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3070 %} 3071 ins_pipe( pipe_slow ); 3072%} 3073 3074instruct vsub4I(vecX dst, vecX src) %{ 3075 predicate(n->as_Vector()->length() == 4); 3076 match(Set dst (SubVI dst src)); 3077 format %{ "psubd $dst,$src\t! sub packed4I" %} 3078 ins_encode %{ 3079 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3080 %} 3081 ins_pipe( pipe_slow ); 3082%} 3083 3084instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3085 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3086 match(Set dst (SubVI src1 src2)); 3087 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3088 ins_encode %{ 3089 bool vector256 = false; 3090 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3091 %} 3092 ins_pipe( pipe_slow ); 3093%} 3094 3095instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3096 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3097 match(Set dst (SubVI src (LoadVector mem))); 3098 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3099 ins_encode %{ 3100 bool vector256 = false; 3101 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3102 %} 3103 ins_pipe( pipe_slow ); 3104%} 3105 3106instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3107 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3108 match(Set dst (SubVI src1 src2)); 3109 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3110 ins_encode %{ 3111 bool vector256 = true; 3112 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3113 %} 3114 ins_pipe( pipe_slow ); 3115%} 3116 3117instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3118 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3119 match(Set dst (SubVI src (LoadVector mem))); 3120 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3121 ins_encode %{ 3122 bool vector256 = true; 3123 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3124 %} 3125 ins_pipe( pipe_slow ); 3126%} 3127 3128// Longs vector sub 3129instruct vsub2L(vecX dst, vecX src) %{ 3130 predicate(n->as_Vector()->length() == 2); 3131 match(Set dst (SubVL dst src)); 3132 format %{ "psubq $dst,$src\t! sub packed2L" %} 3133 ins_encode %{ 3134 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3135 %} 3136 ins_pipe( pipe_slow ); 3137%} 3138 3139instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3140 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3141 match(Set dst (SubVL src1 src2)); 3142 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3143 ins_encode %{ 3144 bool vector256 = false; 3145 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3146 %} 3147 ins_pipe( pipe_slow ); 3148%} 3149 3150instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3151 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3152 match(Set dst (SubVL src (LoadVector mem))); 3153 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3154 ins_encode %{ 3155 bool vector256 = false; 3156 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3157 %} 3158 ins_pipe( pipe_slow ); 3159%} 3160 3161instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3162 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3163 match(Set dst (SubVL src1 src2)); 3164 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3165 ins_encode %{ 3166 bool vector256 = true; 3167 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3168 %} 3169 ins_pipe( pipe_slow ); 3170%} 3171 3172instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3173 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3174 match(Set dst (SubVL src (LoadVector mem))); 3175 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3176 ins_encode %{ 3177 bool vector256 = true; 3178 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3179 %} 3180 ins_pipe( pipe_slow ); 3181%} 3182 3183// Floats vector sub 3184instruct vsub2F(vecD dst, vecD src) %{ 3185 predicate(n->as_Vector()->length() == 2); 3186 match(Set dst (SubVF dst src)); 3187 format %{ "subps $dst,$src\t! sub packed2F" %} 3188 ins_encode %{ 3189 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3190 %} 3191 ins_pipe( pipe_slow ); 3192%} 3193 3194instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3195 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3196 match(Set dst (SubVF src1 src2)); 3197 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3198 ins_encode %{ 3199 bool vector256 = false; 3200 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3201 %} 3202 ins_pipe( pipe_slow ); 3203%} 3204 3205instruct vsub4F(vecX dst, vecX src) %{ 3206 predicate(n->as_Vector()->length() == 4); 3207 match(Set dst (SubVF dst src)); 3208 format %{ "subps $dst,$src\t! sub packed4F" %} 3209 ins_encode %{ 3210 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3211 %} 3212 ins_pipe( pipe_slow ); 3213%} 3214 3215instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3216 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3217 match(Set dst (SubVF src1 src2)); 3218 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3219 ins_encode %{ 3220 bool vector256 = false; 3221 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3222 %} 3223 ins_pipe( pipe_slow ); 3224%} 3225 3226instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3227 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3228 match(Set dst (SubVF src (LoadVector mem))); 3229 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3230 ins_encode %{ 3231 bool vector256 = false; 3232 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3233 %} 3234 ins_pipe( pipe_slow ); 3235%} 3236 3237instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3238 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3239 match(Set dst (SubVF src1 src2)); 3240 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3241 ins_encode %{ 3242 bool vector256 = true; 3243 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3244 %} 3245 ins_pipe( pipe_slow ); 3246%} 3247 3248instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3249 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3250 match(Set dst (SubVF src (LoadVector mem))); 3251 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3252 ins_encode %{ 3253 bool vector256 = true; 3254 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3255 %} 3256 ins_pipe( pipe_slow ); 3257%} 3258 3259// Doubles vector sub 3260instruct vsub2D(vecX dst, vecX src) %{ 3261 predicate(n->as_Vector()->length() == 2); 3262 match(Set dst (SubVD dst src)); 3263 format %{ "subpd $dst,$src\t! sub packed2D" %} 3264 ins_encode %{ 3265 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3266 %} 3267 ins_pipe( pipe_slow ); 3268%} 3269 3270instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3271 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3272 match(Set dst (SubVD src1 src2)); 3273 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3274 ins_encode %{ 3275 bool vector256 = false; 3276 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3277 %} 3278 ins_pipe( pipe_slow ); 3279%} 3280 3281instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3282 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3283 match(Set dst (SubVD src (LoadVector mem))); 3284 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3285 ins_encode %{ 3286 bool vector256 = false; 3287 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3288 %} 3289 ins_pipe( pipe_slow ); 3290%} 3291 3292instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3293 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3294 match(Set dst (SubVD src1 src2)); 3295 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3296 ins_encode %{ 3297 bool vector256 = true; 3298 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3299 %} 3300 ins_pipe( pipe_slow ); 3301%} 3302 3303instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3305 match(Set dst (SubVD src (LoadVector mem))); 3306 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3307 ins_encode %{ 3308 bool vector256 = true; 3309 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3310 %} 3311 ins_pipe( pipe_slow ); 3312%} 3313 3314// --------------------------------- MUL -------------------------------------- 3315 3316// Shorts/Chars vector mul 3317instruct vmul2S(vecS dst, vecS src) %{ 3318 predicate(n->as_Vector()->length() == 2); 3319 match(Set dst (MulVS dst src)); 3320 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3321 ins_encode %{ 3322 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3323 %} 3324 ins_pipe( pipe_slow ); 3325%} 3326 3327instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3328 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3329 match(Set dst (MulVS src1 src2)); 3330 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3331 ins_encode %{ 3332 bool vector256 = false; 3333 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3334 %} 3335 ins_pipe( pipe_slow ); 3336%} 3337 3338instruct vmul4S(vecD dst, vecD src) %{ 3339 predicate(n->as_Vector()->length() == 4); 3340 match(Set dst (MulVS dst src)); 3341 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3342 ins_encode %{ 3343 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3344 %} 3345 ins_pipe( pipe_slow ); 3346%} 3347 3348instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3349 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3350 match(Set dst (MulVS src1 src2)); 3351 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3352 ins_encode %{ 3353 bool vector256 = false; 3354 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3355 %} 3356 ins_pipe( pipe_slow ); 3357%} 3358 3359instruct vmul8S(vecX dst, vecX src) %{ 3360 predicate(n->as_Vector()->length() == 8); 3361 match(Set dst (MulVS dst src)); 3362 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3363 ins_encode %{ 3364 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3365 %} 3366 ins_pipe( pipe_slow ); 3367%} 3368 3369instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3370 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3371 match(Set dst (MulVS src1 src2)); 3372 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3373 ins_encode %{ 3374 bool vector256 = false; 3375 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3376 %} 3377 ins_pipe( pipe_slow ); 3378%} 3379 3380instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3382 match(Set dst (MulVS src (LoadVector mem))); 3383 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3384 ins_encode %{ 3385 bool vector256 = false; 3386 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3387 %} 3388 ins_pipe( pipe_slow ); 3389%} 3390 3391instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3392 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3393 match(Set dst (MulVS src1 src2)); 3394 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3395 ins_encode %{ 3396 bool vector256 = true; 3397 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3398 %} 3399 ins_pipe( pipe_slow ); 3400%} 3401 3402instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3404 match(Set dst (MulVS src (LoadVector mem))); 3405 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3406 ins_encode %{ 3407 bool vector256 = true; 3408 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3409 %} 3410 ins_pipe( pipe_slow ); 3411%} 3412 3413// Integers vector mul (sse4_1) 3414instruct vmul2I(vecD dst, vecD src) %{ 3415 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3416 match(Set dst (MulVI dst src)); 3417 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3418 ins_encode %{ 3419 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3420 %} 3421 ins_pipe( pipe_slow ); 3422%} 3423 3424instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3425 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3426 match(Set dst (MulVI src1 src2)); 3427 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3428 ins_encode %{ 3429 bool vector256 = false; 3430 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3431 %} 3432 ins_pipe( pipe_slow ); 3433%} 3434 3435instruct vmul4I(vecX dst, vecX src) %{ 3436 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3437 match(Set dst (MulVI dst src)); 3438 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3439 ins_encode %{ 3440 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3441 %} 3442 ins_pipe( pipe_slow ); 3443%} 3444 3445instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3447 match(Set dst (MulVI src1 src2)); 3448 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3449 ins_encode %{ 3450 bool vector256 = false; 3451 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3452 %} 3453 ins_pipe( pipe_slow ); 3454%} 3455 3456instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3458 match(Set dst (MulVI src (LoadVector mem))); 3459 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3460 ins_encode %{ 3461 bool vector256 = false; 3462 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3463 %} 3464 ins_pipe( pipe_slow ); 3465%} 3466 3467instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3468 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3469 match(Set dst (MulVI src1 src2)); 3470 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3471 ins_encode %{ 3472 bool vector256 = true; 3473 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3474 %} 3475 ins_pipe( pipe_slow ); 3476%} 3477 3478instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3479 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3480 match(Set dst (MulVI src (LoadVector mem))); 3481 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3482 ins_encode %{ 3483 bool vector256 = true; 3484 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3485 %} 3486 ins_pipe( pipe_slow ); 3487%} 3488 3489// Floats vector mul 3490instruct vmul2F(vecD dst, vecD src) %{ 3491 predicate(n->as_Vector()->length() == 2); 3492 match(Set dst (MulVF dst src)); 3493 format %{ "mulps $dst,$src\t! mul packed2F" %} 3494 ins_encode %{ 3495 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3496 %} 3497 ins_pipe( pipe_slow ); 3498%} 3499 3500instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3501 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3502 match(Set dst (MulVF src1 src2)); 3503 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3504 ins_encode %{ 3505 bool vector256 = false; 3506 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3507 %} 3508 ins_pipe( pipe_slow ); 3509%} 3510 3511instruct vmul4F(vecX dst, vecX src) %{ 3512 predicate(n->as_Vector()->length() == 4); 3513 match(Set dst (MulVF dst src)); 3514 format %{ "mulps $dst,$src\t! mul packed4F" %} 3515 ins_encode %{ 3516 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3517 %} 3518 ins_pipe( pipe_slow ); 3519%} 3520 3521instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3522 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3523 match(Set dst (MulVF src1 src2)); 3524 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3525 ins_encode %{ 3526 bool vector256 = false; 3527 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3528 %} 3529 ins_pipe( pipe_slow ); 3530%} 3531 3532instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3533 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3534 match(Set dst (MulVF src (LoadVector mem))); 3535 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3536 ins_encode %{ 3537 bool vector256 = false; 3538 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3539 %} 3540 ins_pipe( pipe_slow ); 3541%} 3542 3543instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3544 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3545 match(Set dst (MulVF src1 src2)); 3546 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3547 ins_encode %{ 3548 bool vector256 = true; 3549 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3550 %} 3551 ins_pipe( pipe_slow ); 3552%} 3553 3554instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3556 match(Set dst (MulVF src (LoadVector mem))); 3557 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3558 ins_encode %{ 3559 bool vector256 = true; 3560 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3561 %} 3562 ins_pipe( pipe_slow ); 3563%} 3564 3565// Doubles vector mul 3566instruct vmul2D(vecX dst, vecX src) %{ 3567 predicate(n->as_Vector()->length() == 2); 3568 match(Set dst (MulVD dst src)); 3569 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3570 ins_encode %{ 3571 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3572 %} 3573 ins_pipe( pipe_slow ); 3574%} 3575 3576instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3577 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3578 match(Set dst (MulVD src1 src2)); 3579 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3580 ins_encode %{ 3581 bool vector256 = false; 3582 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3583 %} 3584 ins_pipe( pipe_slow ); 3585%} 3586 3587instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3588 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3589 match(Set dst (MulVD src (LoadVector mem))); 3590 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3591 ins_encode %{ 3592 bool vector256 = false; 3593 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3594 %} 3595 ins_pipe( pipe_slow ); 3596%} 3597 3598instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3599 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3600 match(Set dst (MulVD src1 src2)); 3601 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3602 ins_encode %{ 3603 bool vector256 = true; 3604 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3605 %} 3606 ins_pipe( pipe_slow ); 3607%} 3608 3609instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3611 match(Set dst (MulVD src (LoadVector mem))); 3612 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3613 ins_encode %{ 3614 bool vector256 = true; 3615 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3616 %} 3617 ins_pipe( pipe_slow ); 3618%} 3619 3620// --------------------------------- DIV -------------------------------------- 3621 3622// Floats vector div 3623instruct vdiv2F(vecD dst, vecD src) %{ 3624 predicate(n->as_Vector()->length() == 2); 3625 match(Set dst (DivVF dst src)); 3626 format %{ "divps $dst,$src\t! div packed2F" %} 3627 ins_encode %{ 3628 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3629 %} 3630 ins_pipe( pipe_slow ); 3631%} 3632 3633instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3634 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3635 match(Set dst (DivVF src1 src2)); 3636 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3637 ins_encode %{ 3638 bool vector256 = false; 3639 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3640 %} 3641 ins_pipe( pipe_slow ); 3642%} 3643 3644instruct vdiv4F(vecX dst, vecX src) %{ 3645 predicate(n->as_Vector()->length() == 4); 3646 match(Set dst (DivVF dst src)); 3647 format %{ "divps $dst,$src\t! div packed4F" %} 3648 ins_encode %{ 3649 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3650 %} 3651 ins_pipe( pipe_slow ); 3652%} 3653 3654instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3655 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3656 match(Set dst (DivVF src1 src2)); 3657 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3658 ins_encode %{ 3659 bool vector256 = false; 3660 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3661 %} 3662 ins_pipe( pipe_slow ); 3663%} 3664 3665instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3667 match(Set dst (DivVF src (LoadVector mem))); 3668 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3669 ins_encode %{ 3670 bool vector256 = false; 3671 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3672 %} 3673 ins_pipe( pipe_slow ); 3674%} 3675 3676instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3678 match(Set dst (DivVF src1 src2)); 3679 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3680 ins_encode %{ 3681 bool vector256 = true; 3682 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3683 %} 3684 ins_pipe( pipe_slow ); 3685%} 3686 3687instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3688 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3689 match(Set dst (DivVF src (LoadVector mem))); 3690 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3691 ins_encode %{ 3692 bool vector256 = true; 3693 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3694 %} 3695 ins_pipe( pipe_slow ); 3696%} 3697 3698// Doubles vector div 3699instruct vdiv2D(vecX dst, vecX src) %{ 3700 predicate(n->as_Vector()->length() == 2); 3701 match(Set dst (DivVD dst src)); 3702 format %{ "divpd $dst,$src\t! div packed2D" %} 3703 ins_encode %{ 3704 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3705 %} 3706 ins_pipe( pipe_slow ); 3707%} 3708 3709instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3711 match(Set dst (DivVD src1 src2)); 3712 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3713 ins_encode %{ 3714 bool vector256 = false; 3715 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3716 %} 3717 ins_pipe( pipe_slow ); 3718%} 3719 3720instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3722 match(Set dst (DivVD src (LoadVector mem))); 3723 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3724 ins_encode %{ 3725 bool vector256 = false; 3726 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3727 %} 3728 ins_pipe( pipe_slow ); 3729%} 3730 3731instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3733 match(Set dst (DivVD src1 src2)); 3734 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3735 ins_encode %{ 3736 bool vector256 = true; 3737 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3738 %} 3739 ins_pipe( pipe_slow ); 3740%} 3741 3742instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3744 match(Set dst (DivVD src (LoadVector mem))); 3745 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3746 ins_encode %{ 3747 bool vector256 = true; 3748 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3749 %} 3750 ins_pipe( pipe_slow ); 3751%} 3752 3753// ------------------------------ LeftShift ----------------------------------- 3754 3755// Shorts/Chars vector left shift 3756instruct vsll2S(vecS dst, regF shift) %{ 3757 predicate(n->as_Vector()->length() == 2); 3758 match(Set dst (LShiftVS dst shift)); 3759 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3760 ins_encode %{ 3761 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3762 %} 3763 ins_pipe( pipe_slow ); 3764%} 3765 3766instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3767 predicate(n->as_Vector()->length() == 2); 3768 match(Set dst (LShiftVS dst shift)); 3769 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3770 ins_encode %{ 3771 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3772 %} 3773 ins_pipe( pipe_slow ); 3774%} 3775 3776instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ 3777 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3778 match(Set dst (LShiftVS src shift)); 3779 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3780 ins_encode %{ 3781 bool vector256 = false; 3782 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3783 %} 3784 ins_pipe( pipe_slow ); 3785%} 3786 3787instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3789 match(Set dst (LShiftVS src shift)); 3790 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3791 ins_encode %{ 3792 bool vector256 = false; 3793 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3794 %} 3795 ins_pipe( pipe_slow ); 3796%} 3797 3798instruct vsll4S(vecD dst, regF shift) %{ 3799 predicate(n->as_Vector()->length() == 4); 3800 match(Set dst (LShiftVS dst shift)); 3801 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3802 ins_encode %{ 3803 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3804 %} 3805 ins_pipe( pipe_slow ); 3806%} 3807 3808instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3809 predicate(n->as_Vector()->length() == 4); 3810 match(Set dst (LShiftVS dst shift)); 3811 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3812 ins_encode %{ 3813 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3814 %} 3815 ins_pipe( pipe_slow ); 3816%} 3817 3818instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ 3819 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3820 match(Set dst (LShiftVS src shift)); 3821 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3822 ins_encode %{ 3823 bool vector256 = false; 3824 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3825 %} 3826 ins_pipe( pipe_slow ); 3827%} 3828 3829instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3831 match(Set dst (LShiftVS src shift)); 3832 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3833 ins_encode %{ 3834 bool vector256 = false; 3835 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3836 %} 3837 ins_pipe( pipe_slow ); 3838%} 3839 3840instruct vsll8S(vecX dst, regF shift) %{ 3841 predicate(n->as_Vector()->length() == 8); 3842 match(Set dst (LShiftVS dst shift)); 3843 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3844 ins_encode %{ 3845 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3846 %} 3847 ins_pipe( pipe_slow ); 3848%} 3849 3850instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3851 predicate(n->as_Vector()->length() == 8); 3852 match(Set dst (LShiftVS dst shift)); 3853 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3854 ins_encode %{ 3855 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3856 %} 3857 ins_pipe( pipe_slow ); 3858%} 3859 3860instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ 3861 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3862 match(Set dst (LShiftVS src shift)); 3863 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3864 ins_encode %{ 3865 bool vector256 = false; 3866 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3867 %} 3868 ins_pipe( pipe_slow ); 3869%} 3870 3871instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3872 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3873 match(Set dst (LShiftVS src shift)); 3874 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 3875 ins_encode %{ 3876 bool vector256 = false; 3877 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3878 %} 3879 ins_pipe( pipe_slow ); 3880%} 3881 3882instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ 3883 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3884 match(Set dst (LShiftVS src shift)); 3885 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3886 ins_encode %{ 3887 bool vector256 = true; 3888 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3889 %} 3890 ins_pipe( pipe_slow ); 3891%} 3892 3893instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 3894 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3895 match(Set dst (LShiftVS src shift)); 3896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 3897 ins_encode %{ 3898 bool vector256 = true; 3899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3900 %} 3901 ins_pipe( pipe_slow ); 3902%} 3903 3904// Integers vector left shift 3905instruct vsll2I(vecD dst, regF shift) %{ 3906 predicate(n->as_Vector()->length() == 2); 3907 match(Set dst (LShiftVI dst shift)); 3908 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3909 ins_encode %{ 3910 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3911 %} 3912 ins_pipe( pipe_slow ); 3913%} 3914 3915instruct vsll2I_imm(vecD dst, immI8 shift) %{ 3916 predicate(n->as_Vector()->length() == 2); 3917 match(Set dst (LShiftVI dst shift)); 3918 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 3919 ins_encode %{ 3920 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3921 %} 3922 ins_pipe( pipe_slow ); 3923%} 3924 3925instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ 3926 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3927 match(Set dst (LShiftVI src shift)); 3928 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3929 ins_encode %{ 3930 bool vector256 = false; 3931 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3932 %} 3933 ins_pipe( pipe_slow ); 3934%} 3935 3936instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3937 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3938 match(Set dst (LShiftVI src shift)); 3939 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 3940 ins_encode %{ 3941 bool vector256 = false; 3942 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3943 %} 3944 ins_pipe( pipe_slow ); 3945%} 3946 3947instruct vsll4I(vecX dst, regF shift) %{ 3948 predicate(n->as_Vector()->length() == 4); 3949 match(Set dst (LShiftVI dst shift)); 3950 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3951 ins_encode %{ 3952 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 3953 %} 3954 ins_pipe( pipe_slow ); 3955%} 3956 3957instruct vsll4I_imm(vecX dst, immI8 shift) %{ 3958 predicate(n->as_Vector()->length() == 4); 3959 match(Set dst (LShiftVI dst shift)); 3960 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 3961 ins_encode %{ 3962 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 3963 %} 3964 ins_pipe( pipe_slow ); 3965%} 3966 3967instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ 3968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3969 match(Set dst (LShiftVI src shift)); 3970 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3971 ins_encode %{ 3972 bool vector256 = false; 3973 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3974 %} 3975 ins_pipe( pipe_slow ); 3976%} 3977 3978instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 3979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3980 match(Set dst (LShiftVI src shift)); 3981 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 3982 ins_encode %{ 3983 bool vector256 = false; 3984 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3985 %} 3986 ins_pipe( pipe_slow ); 3987%} 3988 3989instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ 3990 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3991 match(Set dst (LShiftVI src shift)); 3992 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 3993 ins_encode %{ 3994 bool vector256 = true; 3995 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3996 %} 3997 ins_pipe( pipe_slow ); 3998%} 3999 4000instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4001 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4002 match(Set dst (LShiftVI src shift)); 4003 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4004 ins_encode %{ 4005 bool vector256 = true; 4006 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4007 %} 4008 ins_pipe( pipe_slow ); 4009%} 4010 4011// Longs vector left shift 4012instruct vsll2L(vecX dst, regF shift) %{ 4013 predicate(n->as_Vector()->length() == 2); 4014 match(Set dst (LShiftVL dst shift)); 4015 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4016 ins_encode %{ 4017 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4018 %} 4019 ins_pipe( pipe_slow ); 4020%} 4021 4022instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4023 predicate(n->as_Vector()->length() == 2); 4024 match(Set dst (LShiftVL dst shift)); 4025 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4026 ins_encode %{ 4027 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4028 %} 4029 ins_pipe( pipe_slow ); 4030%} 4031 4032instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ 4033 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4034 match(Set dst (LShiftVL src shift)); 4035 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4036 ins_encode %{ 4037 bool vector256 = false; 4038 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4039 %} 4040 ins_pipe( pipe_slow ); 4041%} 4042 4043instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4045 match(Set dst (LShiftVL src shift)); 4046 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4047 ins_encode %{ 4048 bool vector256 = false; 4049 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4050 %} 4051 ins_pipe( pipe_slow ); 4052%} 4053 4054instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ 4055 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4056 match(Set dst (LShiftVL src shift)); 4057 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4058 ins_encode %{ 4059 bool vector256 = true; 4060 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4061 %} 4062 ins_pipe( pipe_slow ); 4063%} 4064 4065instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4066 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4067 match(Set dst (LShiftVL src shift)); 4068 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4069 ins_encode %{ 4070 bool vector256 = true; 4071 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4072 %} 4073 ins_pipe( pipe_slow ); 4074%} 4075 4076// ----------------------- LogicalRightShift ----------------------------------- 4077 4078// Shorts/Chars vector logical right shift produces incorrect Java result 4079// for negative data because java code convert short value into int with 4080// sign extension before a shift. 4081 4082// Integers vector logical right shift 4083instruct vsrl2I(vecD dst, regF shift) %{ 4084 predicate(n->as_Vector()->length() == 2); 4085 match(Set dst (URShiftVI dst shift)); 4086 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4087 ins_encode %{ 4088 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4089 %} 4090 ins_pipe( pipe_slow ); 4091%} 4092 4093instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4094 predicate(n->as_Vector()->length() == 2); 4095 match(Set dst (URShiftVI dst shift)); 4096 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4097 ins_encode %{ 4098 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4099 %} 4100 ins_pipe( pipe_slow ); 4101%} 4102 4103instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ 4104 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4105 match(Set dst (URShiftVI src shift)); 4106 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4107 ins_encode %{ 4108 bool vector256 = false; 4109 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4110 %} 4111 ins_pipe( pipe_slow ); 4112%} 4113 4114instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4116 match(Set dst (URShiftVI src shift)); 4117 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4118 ins_encode %{ 4119 bool vector256 = false; 4120 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4121 %} 4122 ins_pipe( pipe_slow ); 4123%} 4124 4125instruct vsrl4I(vecX dst, regF shift) %{ 4126 predicate(n->as_Vector()->length() == 4); 4127 match(Set dst (URShiftVI dst shift)); 4128 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4129 ins_encode %{ 4130 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4131 %} 4132 ins_pipe( pipe_slow ); 4133%} 4134 4135instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4136 predicate(n->as_Vector()->length() == 4); 4137 match(Set dst (URShiftVI dst shift)); 4138 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4139 ins_encode %{ 4140 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4141 %} 4142 ins_pipe( pipe_slow ); 4143%} 4144 4145instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ 4146 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4147 match(Set dst (URShiftVI src shift)); 4148 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4149 ins_encode %{ 4150 bool vector256 = false; 4151 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4152 %} 4153 ins_pipe( pipe_slow ); 4154%} 4155 4156instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4157 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4158 match(Set dst (URShiftVI src shift)); 4159 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4160 ins_encode %{ 4161 bool vector256 = false; 4162 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4163 %} 4164 ins_pipe( pipe_slow ); 4165%} 4166 4167instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ 4168 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4169 match(Set dst (URShiftVI src shift)); 4170 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4171 ins_encode %{ 4172 bool vector256 = true; 4173 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4174 %} 4175 ins_pipe( pipe_slow ); 4176%} 4177 4178instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4179 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4180 match(Set dst (URShiftVI src shift)); 4181 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4182 ins_encode %{ 4183 bool vector256 = true; 4184 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4185 %} 4186 ins_pipe( pipe_slow ); 4187%} 4188 4189// Longs vector logical right shift 4190instruct vsrl2L(vecX dst, regF shift) %{ 4191 predicate(n->as_Vector()->length() == 2); 4192 match(Set dst (URShiftVL dst shift)); 4193 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4194 ins_encode %{ 4195 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4196 %} 4197 ins_pipe( pipe_slow ); 4198%} 4199 4200instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4201 predicate(n->as_Vector()->length() == 2); 4202 match(Set dst (URShiftVL dst shift)); 4203 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4204 ins_encode %{ 4205 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4206 %} 4207 ins_pipe( pipe_slow ); 4208%} 4209 4210instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ 4211 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4212 match(Set dst (URShiftVL src shift)); 4213 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4214 ins_encode %{ 4215 bool vector256 = false; 4216 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4217 %} 4218 ins_pipe( pipe_slow ); 4219%} 4220 4221instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4223 match(Set dst (URShiftVL src shift)); 4224 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4225 ins_encode %{ 4226 bool vector256 = false; 4227 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4228 %} 4229 ins_pipe( pipe_slow ); 4230%} 4231 4232instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ 4233 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4234 match(Set dst (URShiftVL src shift)); 4235 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4236 ins_encode %{ 4237 bool vector256 = true; 4238 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4239 %} 4240 ins_pipe( pipe_slow ); 4241%} 4242 4243instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4244 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4245 match(Set dst (URShiftVL src shift)); 4246 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4247 ins_encode %{ 4248 bool vector256 = true; 4249 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4250 %} 4251 ins_pipe( pipe_slow ); 4252%} 4253 4254// ------------------- ArithmeticRightShift ----------------------------------- 4255 4256// Shorts/Chars vector arithmetic right shift 4257instruct vsra2S(vecS dst, regF shift) %{ 4258 predicate(n->as_Vector()->length() == 2); 4259 match(Set dst (RShiftVS dst shift)); 4260 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4261 ins_encode %{ 4262 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4263 %} 4264 ins_pipe( pipe_slow ); 4265%} 4266 4267instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4268 predicate(n->as_Vector()->length() == 2); 4269 match(Set dst (RShiftVS dst shift)); 4270 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4271 ins_encode %{ 4272 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4273 %} 4274 ins_pipe( pipe_slow ); 4275%} 4276 4277instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ 4278 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4279 match(Set dst (RShiftVS src shift)); 4280 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4281 ins_encode %{ 4282 bool vector256 = false; 4283 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4284 %} 4285 ins_pipe( pipe_slow ); 4286%} 4287 4288instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4290 match(Set dst (RShiftVS src shift)); 4291 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4292 ins_encode %{ 4293 bool vector256 = false; 4294 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4295 %} 4296 ins_pipe( pipe_slow ); 4297%} 4298 4299instruct vsra4S(vecD dst, regF shift) %{ 4300 predicate(n->as_Vector()->length() == 4); 4301 match(Set dst (RShiftVS dst shift)); 4302 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4303 ins_encode %{ 4304 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4305 %} 4306 ins_pipe( pipe_slow ); 4307%} 4308 4309instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4310 predicate(n->as_Vector()->length() == 4); 4311 match(Set dst (RShiftVS dst shift)); 4312 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4313 ins_encode %{ 4314 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4315 %} 4316 ins_pipe( pipe_slow ); 4317%} 4318 4319instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ 4320 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4321 match(Set dst (RShiftVS src shift)); 4322 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4323 ins_encode %{ 4324 bool vector256 = false; 4325 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4326 %} 4327 ins_pipe( pipe_slow ); 4328%} 4329 4330instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4332 match(Set dst (RShiftVS src shift)); 4333 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4334 ins_encode %{ 4335 bool vector256 = false; 4336 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4337 %} 4338 ins_pipe( pipe_slow ); 4339%} 4340 4341instruct vsra8S(vecX dst, regF shift) %{ 4342 predicate(n->as_Vector()->length() == 8); 4343 match(Set dst (RShiftVS dst shift)); 4344 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4345 ins_encode %{ 4346 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4347 %} 4348 ins_pipe( pipe_slow ); 4349%} 4350 4351instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4352 predicate(n->as_Vector()->length() == 8); 4353 match(Set dst (RShiftVS dst shift)); 4354 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4355 ins_encode %{ 4356 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4357 %} 4358 ins_pipe( pipe_slow ); 4359%} 4360 4361instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ 4362 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4363 match(Set dst (RShiftVS src shift)); 4364 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4365 ins_encode %{ 4366 bool vector256 = false; 4367 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4368 %} 4369 ins_pipe( pipe_slow ); 4370%} 4371 4372instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4374 match(Set dst (RShiftVS src shift)); 4375 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4376 ins_encode %{ 4377 bool vector256 = false; 4378 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4379 %} 4380 ins_pipe( pipe_slow ); 4381%} 4382 4383instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ 4384 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4385 match(Set dst (RShiftVS src shift)); 4386 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4387 ins_encode %{ 4388 bool vector256 = true; 4389 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4390 %} 4391 ins_pipe( pipe_slow ); 4392%} 4393 4394instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4396 match(Set dst (RShiftVS src shift)); 4397 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4398 ins_encode %{ 4399 bool vector256 = true; 4400 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4401 %} 4402 ins_pipe( pipe_slow ); 4403%} 4404 4405// Integers vector arithmetic right shift 4406instruct vsra2I(vecD dst, regF shift) %{ 4407 predicate(n->as_Vector()->length() == 2); 4408 match(Set dst (RShiftVI dst shift)); 4409 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4410 ins_encode %{ 4411 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4412 %} 4413 ins_pipe( pipe_slow ); 4414%} 4415 4416instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4417 predicate(n->as_Vector()->length() == 2); 4418 match(Set dst (RShiftVI dst shift)); 4419 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4420 ins_encode %{ 4421 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4422 %} 4423 ins_pipe( pipe_slow ); 4424%} 4425 4426instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ 4427 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4428 match(Set dst (RShiftVI src shift)); 4429 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4430 ins_encode %{ 4431 bool vector256 = false; 4432 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4433 %} 4434 ins_pipe( pipe_slow ); 4435%} 4436 4437instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4439 match(Set dst (RShiftVI src shift)); 4440 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4441 ins_encode %{ 4442 bool vector256 = false; 4443 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4444 %} 4445 ins_pipe( pipe_slow ); 4446%} 4447 4448instruct vsra4I(vecX dst, regF shift) %{ 4449 predicate(n->as_Vector()->length() == 4); 4450 match(Set dst (RShiftVI dst shift)); 4451 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4452 ins_encode %{ 4453 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4454 %} 4455 ins_pipe( pipe_slow ); 4456%} 4457 4458instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4459 predicate(n->as_Vector()->length() == 4); 4460 match(Set dst (RShiftVI dst shift)); 4461 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4462 ins_encode %{ 4463 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4464 %} 4465 ins_pipe( pipe_slow ); 4466%} 4467 4468instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ 4469 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4470 match(Set dst (RShiftVI src shift)); 4471 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4472 ins_encode %{ 4473 bool vector256 = false; 4474 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4475 %} 4476 ins_pipe( pipe_slow ); 4477%} 4478 4479instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4480 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4481 match(Set dst (RShiftVI src shift)); 4482 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4483 ins_encode %{ 4484 bool vector256 = false; 4485 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4486 %} 4487 ins_pipe( pipe_slow ); 4488%} 4489 4490instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ 4491 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4492 match(Set dst (RShiftVI src shift)); 4493 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4494 ins_encode %{ 4495 bool vector256 = true; 4496 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4497 %} 4498 ins_pipe( pipe_slow ); 4499%} 4500 4501instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4502 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4503 match(Set dst (RShiftVI src shift)); 4504 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4505 ins_encode %{ 4506 bool vector256 = true; 4507 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4508 %} 4509 ins_pipe( pipe_slow ); 4510%} 4511 4512// There are no longs vector arithmetic right shift instructions. 4513 4514 4515// --------------------------------- AND -------------------------------------- 4516 4517instruct vand4B(vecS dst, vecS src) %{ 4518 predicate(n->as_Vector()->length_in_bytes() == 4); 4519 match(Set dst (AndV dst src)); 4520 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4521 ins_encode %{ 4522 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4523 %} 4524 ins_pipe( pipe_slow ); 4525%} 4526 4527instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4528 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4529 match(Set dst (AndV src1 src2)); 4530 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4531 ins_encode %{ 4532 bool vector256 = false; 4533 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4534 %} 4535 ins_pipe( pipe_slow ); 4536%} 4537 4538instruct vand8B(vecD dst, vecD src) %{ 4539 predicate(n->as_Vector()->length_in_bytes() == 8); 4540 match(Set dst (AndV dst src)); 4541 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4542 ins_encode %{ 4543 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4544 %} 4545 ins_pipe( pipe_slow ); 4546%} 4547 4548instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4549 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4550 match(Set dst (AndV src1 src2)); 4551 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4552 ins_encode %{ 4553 bool vector256 = false; 4554 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4555 %} 4556 ins_pipe( pipe_slow ); 4557%} 4558 4559instruct vand16B(vecX dst, vecX src) %{ 4560 predicate(n->as_Vector()->length_in_bytes() == 16); 4561 match(Set dst (AndV dst src)); 4562 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4563 ins_encode %{ 4564 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4565 %} 4566 ins_pipe( pipe_slow ); 4567%} 4568 4569instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4570 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4571 match(Set dst (AndV src1 src2)); 4572 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4573 ins_encode %{ 4574 bool vector256 = false; 4575 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4576 %} 4577 ins_pipe( pipe_slow ); 4578%} 4579 4580instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4581 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4582 match(Set dst (AndV src (LoadVector mem))); 4583 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4584 ins_encode %{ 4585 bool vector256 = false; 4586 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4587 %} 4588 ins_pipe( pipe_slow ); 4589%} 4590 4591instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4592 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4593 match(Set dst (AndV src1 src2)); 4594 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4595 ins_encode %{ 4596 bool vector256 = true; 4597 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4598 %} 4599 ins_pipe( pipe_slow ); 4600%} 4601 4602instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4603 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4604 match(Set dst (AndV src (LoadVector mem))); 4605 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4606 ins_encode %{ 4607 bool vector256 = true; 4608 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4609 %} 4610 ins_pipe( pipe_slow ); 4611%} 4612 4613// --------------------------------- OR --------------------------------------- 4614 4615instruct vor4B(vecS dst, vecS src) %{ 4616 predicate(n->as_Vector()->length_in_bytes() == 4); 4617 match(Set dst (OrV dst src)); 4618 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4619 ins_encode %{ 4620 __ por($dst$$XMMRegister, $src$$XMMRegister); 4621 %} 4622 ins_pipe( pipe_slow ); 4623%} 4624 4625instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4626 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4627 match(Set dst (OrV src1 src2)); 4628 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4629 ins_encode %{ 4630 bool vector256 = false; 4631 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4632 %} 4633 ins_pipe( pipe_slow ); 4634%} 4635 4636instruct vor8B(vecD dst, vecD src) %{ 4637 predicate(n->as_Vector()->length_in_bytes() == 8); 4638 match(Set dst (OrV dst src)); 4639 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4640 ins_encode %{ 4641 __ por($dst$$XMMRegister, $src$$XMMRegister); 4642 %} 4643 ins_pipe( pipe_slow ); 4644%} 4645 4646instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4647 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4648 match(Set dst (OrV src1 src2)); 4649 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4650 ins_encode %{ 4651 bool vector256 = false; 4652 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4653 %} 4654 ins_pipe( pipe_slow ); 4655%} 4656 4657instruct vor16B(vecX dst, vecX src) %{ 4658 predicate(n->as_Vector()->length_in_bytes() == 16); 4659 match(Set dst (OrV dst src)); 4660 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4661 ins_encode %{ 4662 __ por($dst$$XMMRegister, $src$$XMMRegister); 4663 %} 4664 ins_pipe( pipe_slow ); 4665%} 4666 4667instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4668 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4669 match(Set dst (OrV src1 src2)); 4670 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4671 ins_encode %{ 4672 bool vector256 = false; 4673 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4674 %} 4675 ins_pipe( pipe_slow ); 4676%} 4677 4678instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4679 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4680 match(Set dst (OrV src (LoadVector mem))); 4681 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4682 ins_encode %{ 4683 bool vector256 = false; 4684 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4685 %} 4686 ins_pipe( pipe_slow ); 4687%} 4688 4689instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4690 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4691 match(Set dst (OrV src1 src2)); 4692 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4693 ins_encode %{ 4694 bool vector256 = true; 4695 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4696 %} 4697 ins_pipe( pipe_slow ); 4698%} 4699 4700instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4701 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4702 match(Set dst (OrV src (LoadVector mem))); 4703 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4704 ins_encode %{ 4705 bool vector256 = true; 4706 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4707 %} 4708 ins_pipe( pipe_slow ); 4709%} 4710 4711// --------------------------------- XOR -------------------------------------- 4712 4713instruct vxor4B(vecS dst, vecS src) %{ 4714 predicate(n->as_Vector()->length_in_bytes() == 4); 4715 match(Set dst (XorV dst src)); 4716 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 4717 ins_encode %{ 4718 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4719 %} 4720 ins_pipe( pipe_slow ); 4721%} 4722 4723instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4724 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4725 match(Set dst (XorV src1 src2)); 4726 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 4727 ins_encode %{ 4728 bool vector256 = false; 4729 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4730 %} 4731 ins_pipe( pipe_slow ); 4732%} 4733 4734instruct vxor8B(vecD dst, vecD src) %{ 4735 predicate(n->as_Vector()->length_in_bytes() == 8); 4736 match(Set dst (XorV dst src)); 4737 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 4738 ins_encode %{ 4739 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4740 %} 4741 ins_pipe( pipe_slow ); 4742%} 4743 4744instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4745 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4746 match(Set dst (XorV src1 src2)); 4747 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 4748 ins_encode %{ 4749 bool vector256 = false; 4750 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4751 %} 4752 ins_pipe( pipe_slow ); 4753%} 4754 4755instruct vxor16B(vecX dst, vecX src) %{ 4756 predicate(n->as_Vector()->length_in_bytes() == 16); 4757 match(Set dst (XorV dst src)); 4758 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 4759 ins_encode %{ 4760 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 4761 %} 4762 ins_pipe( pipe_slow ); 4763%} 4764 4765instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4766 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4767 match(Set dst (XorV src1 src2)); 4768 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 4769 ins_encode %{ 4770 bool vector256 = false; 4771 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4772 %} 4773 ins_pipe( pipe_slow ); 4774%} 4775 4776instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 4777 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4778 match(Set dst (XorV src (LoadVector mem))); 4779 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 4780 ins_encode %{ 4781 bool vector256 = false; 4782 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4783 %} 4784 ins_pipe( pipe_slow ); 4785%} 4786 4787instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4788 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4789 match(Set dst (XorV src1 src2)); 4790 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 4791 ins_encode %{ 4792 bool vector256 = true; 4793 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4794 %} 4795 ins_pipe( pipe_slow ); 4796%} 4797 4798instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 4799 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4800 match(Set dst (XorV src (LoadVector mem))); 4801 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 4802 ins_encode %{ 4803 bool vector256 = true; 4804 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4805 %} 4806 ins_pipe( pipe_slow ); 4807%} 4808 4809