x86.ad revision 6760:22b98ab2a69f
1// 2// Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Common Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63// Word a in each register holds a Float, words ab hold a Double. 64// The whole registers are used in SSE4.2 version intrinsics, 65// array copy stubs and superword operations (see UseSSE42Intrinsics, 66// UseXMMForArrayCopy and UseSuperword flags). 67// XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68// Linux ABI: No register preserved across function calls 69// XMM0-XMM7 might hold parameters 70// Windows ABI: XMM6-XMM15 preserved across function calls 71// XMM0-XMM3 might hold parameters 72 73reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127#ifdef _WIN64 128 129reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219#else // _WIN64 220 221reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239#ifdef _LP64 240 241reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313#endif // _LP64 314 315#endif // _WIN64 316 317#ifdef _LP64 318reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319#else 320reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321#endif // _LP64 322 323alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331#ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340#endif 341 ); 342 343// flags allocation class should be last. 344alloc_class chunk2(RFLAGS); 345 346// Singleton class for condition codes 347reg_class int_flags(RFLAGS); 348 349// Class for all float registers 350reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358#ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367#endif 368 ); 369 370// Class for all double registers 371reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379#ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388#endif 389 ); 390 391// Class for all 32bit vector registers 392reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400#ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409#endif 410 ); 411 412// Class for all 64bit vector registers 413reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421#ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430#endif 431 ); 432 433// Class for all 128bit vector registers 434reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442#ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451#endif 452 ); 453 454// Class for all 256bit vector registers 455reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463#ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472#endif 473 ); 474 475%} 476 477 478//----------SOURCE BLOCK------------------------------------------------------- 479// This is a block of C++ code which provides values, functions, and 480// definitions necessary in the rest of the architecture description 481 482source_hpp %{ 483// Header information of the source block. 484// Method declarations/definitions which are used outside 485// the ad-scope can conveniently be defined here. 486// 487// To keep related declarations/definitions/uses close together, 488// we switch between source %{ }% and source_hpp %{ }% freely as needed. 489 490class NativeJump; 491 492class CallStubImpl { 493 494 //-------------------------------------------------------------- 495 //---< Used for optimization in Compile::shorten_branches >--- 496 //-------------------------------------------------------------- 497 498 public: 499 // Size of call trampoline stub. 500 static uint size_call_trampoline() { 501 return 0; // no call trampolines on this platform 502 } 503 504 // number of relocations needed by a call trampoline stub 505 static uint reloc_call_trampoline() { 506 return 0; // no call trampolines on this platform 507 } 508}; 509 510class HandlerImpl { 511 512 public: 513 514 static int emit_exception_handler(CodeBuffer &cbuf); 515 static int emit_deopt_handler(CodeBuffer& cbuf); 516 517 static uint size_exception_handler() { 518 // NativeCall instruction size is the same as NativeJump. 519 // exception handler starts out as jump and can be patched to 520 // a call be deoptimization. (4932387) 521 // Note that this value is also credited (in output.cpp) to 522 // the size of the code section. 523 return NativeJump::instruction_size; 524 } 525 526#ifdef _LP64 527 static uint size_deopt_handler() { 528 // three 5 byte instructions 529 return 15; 530 } 531#else 532 static uint size_deopt_handler() { 533 // NativeCall instruction size is the same as NativeJump. 534 // exception handler starts out as jump and can be patched to 535 // a call be deoptimization. (4932387) 536 // Note that this value is also credited (in output.cpp) to 537 // the size of the code section. 538 return 5 + NativeJump::instruction_size; // pushl(); jmp; 539 } 540#endif 541}; 542 543%} // end source_hpp 544 545source %{ 546 547// Emit exception handler code. 548// Stuff framesize into a register and call a VM stub routine. 549int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 550 551 // Note that the code buffer's insts_mark is always relative to insts. 552 // That's why we must use the macroassembler to generate a handler. 553 MacroAssembler _masm(&cbuf); 554 address base = __ start_a_stub(size_exception_handler()); 555 if (base == NULL) return 0; // CodeBuffer::expand failed 556 int offset = __ offset(); 557 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 558 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 559 __ end_a_stub(); 560 return offset; 561} 562 563// Emit deopt handler code. 564int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 565 566 // Note that the code buffer's insts_mark is always relative to insts. 567 // That's why we must use the macroassembler to generate a handler. 568 MacroAssembler _masm(&cbuf); 569 address base = __ start_a_stub(size_deopt_handler()); 570 if (base == NULL) return 0; // CodeBuffer::expand failed 571 int offset = __ offset(); 572 573#ifdef _LP64 574 address the_pc = (address) __ pc(); 575 Label next; 576 // push a "the_pc" on the stack without destroying any registers 577 // as they all may be live. 578 579 // push address of "next" 580 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 581 __ bind(next); 582 // adjust it so it matches "the_pc" 583 __ subptr(Address(rsp, 0), __ offset() - offset); 584#else 585 InternalAddress here(__ pc()); 586 __ pushptr(here.addr()); 587#endif 588 589 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 590 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 591 __ end_a_stub(); 592 return offset; 593} 594 595 596//============================================================================= 597 598 // Float masks come from different places depending on platform. 599#ifdef _LP64 600 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 601 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 602 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 603 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 604#else 605 static address float_signmask() { return (address)float_signmask_pool; } 606 static address float_signflip() { return (address)float_signflip_pool; } 607 static address double_signmask() { return (address)double_signmask_pool; } 608 static address double_signflip() { return (address)double_signflip_pool; } 609#endif 610 611 612const bool Matcher::match_rule_supported(int opcode) { 613 if (!has_match_rule(opcode)) 614 return false; 615 616 switch (opcode) { 617 case Op_PopCountI: 618 case Op_PopCountL: 619 if (!UsePopCountInstruction) 620 return false; 621 break; 622 case Op_MulVI: 623 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 624 return false; 625 break; 626 case Op_CompareAndSwapL: 627#ifdef _LP64 628 case Op_CompareAndSwapP: 629#endif 630 if (!VM_Version::supports_cx8()) 631 return false; 632 break; 633 } 634 635 return true; // Per default match rules are supported. 636} 637 638// Max vector size in bytes. 0 if not supported. 639const int Matcher::vector_width_in_bytes(BasicType bt) { 640 assert(is_java_primitive(bt), "only primitive type vectors"); 641 if (UseSSE < 2) return 0; 642 // SSE2 supports 128bit vectors for all types. 643 // AVX2 supports 256bit vectors for all types. 644 int size = (UseAVX > 1) ? 32 : 16; 645 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 646 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 647 size = 32; 648 // Use flag to limit vector size. 649 size = MIN2(size,(int)MaxVectorSize); 650 // Minimum 2 values in vector (or 4 for bytes). 651 switch (bt) { 652 case T_DOUBLE: 653 case T_LONG: 654 if (size < 16) return 0; 655 case T_FLOAT: 656 case T_INT: 657 if (size < 8) return 0; 658 case T_BOOLEAN: 659 case T_BYTE: 660 case T_CHAR: 661 case T_SHORT: 662 if (size < 4) return 0; 663 break; 664 default: 665 ShouldNotReachHere(); 666 } 667 return size; 668} 669 670// Limits on vector size (number of elements) loaded into vector. 671const int Matcher::max_vector_size(const BasicType bt) { 672 return vector_width_in_bytes(bt)/type2aelembytes(bt); 673} 674const int Matcher::min_vector_size(const BasicType bt) { 675 int max_size = max_vector_size(bt); 676 // Min size which can be loaded into vector is 4 bytes. 677 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 678 return MIN2(size,max_size); 679} 680 681// Vector ideal reg corresponding to specidied size in bytes 682const int Matcher::vector_ideal_reg(int size) { 683 assert(MaxVectorSize >= size, ""); 684 switch(size) { 685 case 4: return Op_VecS; 686 case 8: return Op_VecD; 687 case 16: return Op_VecX; 688 case 32: return Op_VecY; 689 } 690 ShouldNotReachHere(); 691 return 0; 692} 693 694// Only lowest bits of xmm reg are used for vector shift count. 695const int Matcher::vector_shift_count_ideal_reg(int size) { 696 return Op_VecS; 697} 698 699// x86 supports misaligned vectors store/load. 700const bool Matcher::misaligned_vectors_ok() { 701 return !AlignVector; // can be changed by flag 702} 703 704// x86 AES instructions are compatible with SunJCE expanded 705// keys, hence we do not need to pass the original key to stubs 706const bool Matcher::pass_original_key_for_aes() { 707 return false; 708} 709 710// Helper methods for MachSpillCopyNode::implementation(). 711static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 712 int src_hi, int dst_hi, uint ireg, outputStream* st) { 713 // In 64-bit VM size calculation is very complex. Emitting instructions 714 // into scratch buffer is used to get size in 64-bit VM. 715 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 716 assert(ireg == Op_VecS || // 32bit vector 717 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 718 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 719 "no non-adjacent vector moves" ); 720 if (cbuf) { 721 MacroAssembler _masm(cbuf); 722 int offset = __ offset(); 723 switch (ireg) { 724 case Op_VecS: // copy whole register 725 case Op_VecD: 726 case Op_VecX: 727 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 728 break; 729 case Op_VecY: 730 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 731 break; 732 default: 733 ShouldNotReachHere(); 734 } 735 int size = __ offset() - offset; 736#ifdef ASSERT 737 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 738 assert(!do_size || size == 4, "incorrect size calculattion"); 739#endif 740 return size; 741#ifndef PRODUCT 742 } else if (!do_size) { 743 switch (ireg) { 744 case Op_VecS: 745 case Op_VecD: 746 case Op_VecX: 747 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 748 break; 749 case Op_VecY: 750 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 751 break; 752 default: 753 ShouldNotReachHere(); 754 } 755#endif 756 } 757 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 758 return 4; 759} 760 761static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 762 int stack_offset, int reg, uint ireg, outputStream* st) { 763 // In 64-bit VM size calculation is very complex. Emitting instructions 764 // into scratch buffer is used to get size in 64-bit VM. 765 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 766 if (cbuf) { 767 MacroAssembler _masm(cbuf); 768 int offset = __ offset(); 769 if (is_load) { 770 switch (ireg) { 771 case Op_VecS: 772 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 773 break; 774 case Op_VecD: 775 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 776 break; 777 case Op_VecX: 778 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 779 break; 780 case Op_VecY: 781 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 782 break; 783 default: 784 ShouldNotReachHere(); 785 } 786 } else { // store 787 switch (ireg) { 788 case Op_VecS: 789 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 790 break; 791 case Op_VecD: 792 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 793 break; 794 case Op_VecX: 795 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 796 break; 797 case Op_VecY: 798 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 799 break; 800 default: 801 ShouldNotReachHere(); 802 } 803 } 804 int size = __ offset() - offset; 805#ifdef ASSERT 806 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 807 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 808 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 809#endif 810 return size; 811#ifndef PRODUCT 812 } else if (!do_size) { 813 if (is_load) { 814 switch (ireg) { 815 case Op_VecS: 816 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 817 break; 818 case Op_VecD: 819 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 820 break; 821 case Op_VecX: 822 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 823 break; 824 case Op_VecY: 825 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 826 break; 827 default: 828 ShouldNotReachHere(); 829 } 830 } else { // store 831 switch (ireg) { 832 case Op_VecS: 833 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 834 break; 835 case Op_VecD: 836 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 837 break; 838 case Op_VecX: 839 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 840 break; 841 case Op_VecY: 842 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 843 break; 844 default: 845 ShouldNotReachHere(); 846 } 847 } 848#endif 849 } 850 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 851 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 852 return 5+offset_size; 853} 854 855static inline jfloat replicate4_imm(int con, int width) { 856 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 857 assert(width == 1 || width == 2, "only byte or short types here"); 858 int bit_width = width * 8; 859 jint val = con; 860 val &= (1 << bit_width) - 1; // mask off sign bits 861 while(bit_width < 32) { 862 val |= (val << bit_width); 863 bit_width <<= 1; 864 } 865 jfloat fval = *((jfloat*) &val); // coerce to float type 866 return fval; 867} 868 869static inline jdouble replicate8_imm(int con, int width) { 870 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 871 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 872 int bit_width = width * 8; 873 jlong val = con; 874 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 875 while(bit_width < 64) { 876 val |= (val << bit_width); 877 bit_width <<= 1; 878 } 879 jdouble dval = *((jdouble*) &val); // coerce to double type 880 return dval; 881} 882 883#ifndef PRODUCT 884 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 885 st->print("nop \t# %d bytes pad for loops and calls", _count); 886 } 887#endif 888 889 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 890 MacroAssembler _masm(&cbuf); 891 __ nop(_count); 892 } 893 894 uint MachNopNode::size(PhaseRegAlloc*) const { 895 return _count; 896 } 897 898#ifndef PRODUCT 899 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 900 st->print("# breakpoint"); 901 } 902#endif 903 904 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 905 MacroAssembler _masm(&cbuf); 906 __ int3(); 907 } 908 909 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 910 return MachNode::size(ra_); 911 } 912 913%} 914 915encode %{ 916 917 enc_class preserve_SP %{ 918 debug_only(int off0 = cbuf.insts_size()); 919 MacroAssembler _masm(&cbuf); 920 // RBP is preserved across all calls, even compiled calls. 921 // Use it to preserve RSP in places where the callee might change the SP. 922 __ movptr(rbp_mh_SP_save, rsp); 923 debug_only(int off1 = cbuf.insts_size()); 924 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 925 %} 926 927 enc_class restore_SP %{ 928 MacroAssembler _masm(&cbuf); 929 __ movptr(rsp, rbp_mh_SP_save); 930 %} 931 932 enc_class call_epilog %{ 933 if (VerifyStackAtCalls) { 934 // Check that stack depth is unchanged: find majik cookie on stack 935 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 936 MacroAssembler _masm(&cbuf); 937 Label L; 938 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 939 __ jccb(Assembler::equal, L); 940 // Die if stack mismatch 941 __ int3(); 942 __ bind(L); 943 } 944 %} 945 946%} 947 948 949//----------OPERANDS----------------------------------------------------------- 950// Operand definitions must precede instruction definitions for correct parsing 951// in the ADLC because operands constitute user defined types which are used in 952// instruction definitions. 953 954// Vectors 955operand vecS() %{ 956 constraint(ALLOC_IN_RC(vectors_reg)); 957 match(VecS); 958 959 format %{ %} 960 interface(REG_INTER); 961%} 962 963operand vecD() %{ 964 constraint(ALLOC_IN_RC(vectord_reg)); 965 match(VecD); 966 967 format %{ %} 968 interface(REG_INTER); 969%} 970 971operand vecX() %{ 972 constraint(ALLOC_IN_RC(vectorx_reg)); 973 match(VecX); 974 975 format %{ %} 976 interface(REG_INTER); 977%} 978 979operand vecY() %{ 980 constraint(ALLOC_IN_RC(vectory_reg)); 981 match(VecY); 982 983 format %{ %} 984 interface(REG_INTER); 985%} 986 987 988// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 989 990// ============================================================================ 991 992instruct ShouldNotReachHere() %{ 993 match(Halt); 994 format %{ "int3\t# ShouldNotReachHere" %} 995 ins_encode %{ 996 __ int3(); 997 %} 998 ins_pipe(pipe_slow); 999%} 1000 1001// ============================================================================ 1002 1003instruct addF_reg(regF dst, regF src) %{ 1004 predicate((UseSSE>=1) && (UseAVX == 0)); 1005 match(Set dst (AddF dst src)); 1006 1007 format %{ "addss $dst, $src" %} 1008 ins_cost(150); 1009 ins_encode %{ 1010 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1011 %} 1012 ins_pipe(pipe_slow); 1013%} 1014 1015instruct addF_mem(regF dst, memory src) %{ 1016 predicate((UseSSE>=1) && (UseAVX == 0)); 1017 match(Set dst (AddF dst (LoadF src))); 1018 1019 format %{ "addss $dst, $src" %} 1020 ins_cost(150); 1021 ins_encode %{ 1022 __ addss($dst$$XMMRegister, $src$$Address); 1023 %} 1024 ins_pipe(pipe_slow); 1025%} 1026 1027instruct addF_imm(regF dst, immF con) %{ 1028 predicate((UseSSE>=1) && (UseAVX == 0)); 1029 match(Set dst (AddF dst con)); 1030 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1031 ins_cost(150); 1032 ins_encode %{ 1033 __ addss($dst$$XMMRegister, $constantaddress($con)); 1034 %} 1035 ins_pipe(pipe_slow); 1036%} 1037 1038instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1039 predicate(UseAVX > 0); 1040 match(Set dst (AddF src1 src2)); 1041 1042 format %{ "vaddss $dst, $src1, $src2" %} 1043 ins_cost(150); 1044 ins_encode %{ 1045 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1046 %} 1047 ins_pipe(pipe_slow); 1048%} 1049 1050instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1051 predicate(UseAVX > 0); 1052 match(Set dst (AddF src1 (LoadF src2))); 1053 1054 format %{ "vaddss $dst, $src1, $src2" %} 1055 ins_cost(150); 1056 ins_encode %{ 1057 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1058 %} 1059 ins_pipe(pipe_slow); 1060%} 1061 1062instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1063 predicate(UseAVX > 0); 1064 match(Set dst (AddF src con)); 1065 1066 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1067 ins_cost(150); 1068 ins_encode %{ 1069 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1070 %} 1071 ins_pipe(pipe_slow); 1072%} 1073 1074instruct addD_reg(regD dst, regD src) %{ 1075 predicate((UseSSE>=2) && (UseAVX == 0)); 1076 match(Set dst (AddD dst src)); 1077 1078 format %{ "addsd $dst, $src" %} 1079 ins_cost(150); 1080 ins_encode %{ 1081 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1082 %} 1083 ins_pipe(pipe_slow); 1084%} 1085 1086instruct addD_mem(regD dst, memory src) %{ 1087 predicate((UseSSE>=2) && (UseAVX == 0)); 1088 match(Set dst (AddD dst (LoadD src))); 1089 1090 format %{ "addsd $dst, $src" %} 1091 ins_cost(150); 1092 ins_encode %{ 1093 __ addsd($dst$$XMMRegister, $src$$Address); 1094 %} 1095 ins_pipe(pipe_slow); 1096%} 1097 1098instruct addD_imm(regD dst, immD con) %{ 1099 predicate((UseSSE>=2) && (UseAVX == 0)); 1100 match(Set dst (AddD dst con)); 1101 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1102 ins_cost(150); 1103 ins_encode %{ 1104 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1105 %} 1106 ins_pipe(pipe_slow); 1107%} 1108 1109instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1110 predicate(UseAVX > 0); 1111 match(Set dst (AddD src1 src2)); 1112 1113 format %{ "vaddsd $dst, $src1, $src2" %} 1114 ins_cost(150); 1115 ins_encode %{ 1116 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1117 %} 1118 ins_pipe(pipe_slow); 1119%} 1120 1121instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1122 predicate(UseAVX > 0); 1123 match(Set dst (AddD src1 (LoadD src2))); 1124 1125 format %{ "vaddsd $dst, $src1, $src2" %} 1126 ins_cost(150); 1127 ins_encode %{ 1128 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1129 %} 1130 ins_pipe(pipe_slow); 1131%} 1132 1133instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1134 predicate(UseAVX > 0); 1135 match(Set dst (AddD src con)); 1136 1137 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1138 ins_cost(150); 1139 ins_encode %{ 1140 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1141 %} 1142 ins_pipe(pipe_slow); 1143%} 1144 1145instruct subF_reg(regF dst, regF src) %{ 1146 predicate((UseSSE>=1) && (UseAVX == 0)); 1147 match(Set dst (SubF dst src)); 1148 1149 format %{ "subss $dst, $src" %} 1150 ins_cost(150); 1151 ins_encode %{ 1152 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1153 %} 1154 ins_pipe(pipe_slow); 1155%} 1156 1157instruct subF_mem(regF dst, memory src) %{ 1158 predicate((UseSSE>=1) && (UseAVX == 0)); 1159 match(Set dst (SubF dst (LoadF src))); 1160 1161 format %{ "subss $dst, $src" %} 1162 ins_cost(150); 1163 ins_encode %{ 1164 __ subss($dst$$XMMRegister, $src$$Address); 1165 %} 1166 ins_pipe(pipe_slow); 1167%} 1168 1169instruct subF_imm(regF dst, immF con) %{ 1170 predicate((UseSSE>=1) && (UseAVX == 0)); 1171 match(Set dst (SubF dst con)); 1172 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1173 ins_cost(150); 1174 ins_encode %{ 1175 __ subss($dst$$XMMRegister, $constantaddress($con)); 1176 %} 1177 ins_pipe(pipe_slow); 1178%} 1179 1180instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1181 predicate(UseAVX > 0); 1182 match(Set dst (SubF src1 src2)); 1183 1184 format %{ "vsubss $dst, $src1, $src2" %} 1185 ins_cost(150); 1186 ins_encode %{ 1187 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1188 %} 1189 ins_pipe(pipe_slow); 1190%} 1191 1192instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1193 predicate(UseAVX > 0); 1194 match(Set dst (SubF src1 (LoadF src2))); 1195 1196 format %{ "vsubss $dst, $src1, $src2" %} 1197 ins_cost(150); 1198 ins_encode %{ 1199 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1200 %} 1201 ins_pipe(pipe_slow); 1202%} 1203 1204instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1205 predicate(UseAVX > 0); 1206 match(Set dst (SubF src con)); 1207 1208 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1209 ins_cost(150); 1210 ins_encode %{ 1211 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1212 %} 1213 ins_pipe(pipe_slow); 1214%} 1215 1216instruct subD_reg(regD dst, regD src) %{ 1217 predicate((UseSSE>=2) && (UseAVX == 0)); 1218 match(Set dst (SubD dst src)); 1219 1220 format %{ "subsd $dst, $src" %} 1221 ins_cost(150); 1222 ins_encode %{ 1223 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1224 %} 1225 ins_pipe(pipe_slow); 1226%} 1227 1228instruct subD_mem(regD dst, memory src) %{ 1229 predicate((UseSSE>=2) && (UseAVX == 0)); 1230 match(Set dst (SubD dst (LoadD src))); 1231 1232 format %{ "subsd $dst, $src" %} 1233 ins_cost(150); 1234 ins_encode %{ 1235 __ subsd($dst$$XMMRegister, $src$$Address); 1236 %} 1237 ins_pipe(pipe_slow); 1238%} 1239 1240instruct subD_imm(regD dst, immD con) %{ 1241 predicate((UseSSE>=2) && (UseAVX == 0)); 1242 match(Set dst (SubD dst con)); 1243 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1244 ins_cost(150); 1245 ins_encode %{ 1246 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1247 %} 1248 ins_pipe(pipe_slow); 1249%} 1250 1251instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1252 predicate(UseAVX > 0); 1253 match(Set dst (SubD src1 src2)); 1254 1255 format %{ "vsubsd $dst, $src1, $src2" %} 1256 ins_cost(150); 1257 ins_encode %{ 1258 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1259 %} 1260 ins_pipe(pipe_slow); 1261%} 1262 1263instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1264 predicate(UseAVX > 0); 1265 match(Set dst (SubD src1 (LoadD src2))); 1266 1267 format %{ "vsubsd $dst, $src1, $src2" %} 1268 ins_cost(150); 1269 ins_encode %{ 1270 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1271 %} 1272 ins_pipe(pipe_slow); 1273%} 1274 1275instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1276 predicate(UseAVX > 0); 1277 match(Set dst (SubD src con)); 1278 1279 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1280 ins_cost(150); 1281 ins_encode %{ 1282 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1283 %} 1284 ins_pipe(pipe_slow); 1285%} 1286 1287instruct mulF_reg(regF dst, regF src) %{ 1288 predicate((UseSSE>=1) && (UseAVX == 0)); 1289 match(Set dst (MulF dst src)); 1290 1291 format %{ "mulss $dst, $src" %} 1292 ins_cost(150); 1293 ins_encode %{ 1294 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1295 %} 1296 ins_pipe(pipe_slow); 1297%} 1298 1299instruct mulF_mem(regF dst, memory src) %{ 1300 predicate((UseSSE>=1) && (UseAVX == 0)); 1301 match(Set dst (MulF dst (LoadF src))); 1302 1303 format %{ "mulss $dst, $src" %} 1304 ins_cost(150); 1305 ins_encode %{ 1306 __ mulss($dst$$XMMRegister, $src$$Address); 1307 %} 1308 ins_pipe(pipe_slow); 1309%} 1310 1311instruct mulF_imm(regF dst, immF con) %{ 1312 predicate((UseSSE>=1) && (UseAVX == 0)); 1313 match(Set dst (MulF dst con)); 1314 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1315 ins_cost(150); 1316 ins_encode %{ 1317 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1318 %} 1319 ins_pipe(pipe_slow); 1320%} 1321 1322instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1323 predicate(UseAVX > 0); 1324 match(Set dst (MulF src1 src2)); 1325 1326 format %{ "vmulss $dst, $src1, $src2" %} 1327 ins_cost(150); 1328 ins_encode %{ 1329 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1330 %} 1331 ins_pipe(pipe_slow); 1332%} 1333 1334instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1335 predicate(UseAVX > 0); 1336 match(Set dst (MulF src1 (LoadF src2))); 1337 1338 format %{ "vmulss $dst, $src1, $src2" %} 1339 ins_cost(150); 1340 ins_encode %{ 1341 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1342 %} 1343 ins_pipe(pipe_slow); 1344%} 1345 1346instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1347 predicate(UseAVX > 0); 1348 match(Set dst (MulF src con)); 1349 1350 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1351 ins_cost(150); 1352 ins_encode %{ 1353 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1354 %} 1355 ins_pipe(pipe_slow); 1356%} 1357 1358instruct mulD_reg(regD dst, regD src) %{ 1359 predicate((UseSSE>=2) && (UseAVX == 0)); 1360 match(Set dst (MulD dst src)); 1361 1362 format %{ "mulsd $dst, $src" %} 1363 ins_cost(150); 1364 ins_encode %{ 1365 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1366 %} 1367 ins_pipe(pipe_slow); 1368%} 1369 1370instruct mulD_mem(regD dst, memory src) %{ 1371 predicate((UseSSE>=2) && (UseAVX == 0)); 1372 match(Set dst (MulD dst (LoadD src))); 1373 1374 format %{ "mulsd $dst, $src" %} 1375 ins_cost(150); 1376 ins_encode %{ 1377 __ mulsd($dst$$XMMRegister, $src$$Address); 1378 %} 1379 ins_pipe(pipe_slow); 1380%} 1381 1382instruct mulD_imm(regD dst, immD con) %{ 1383 predicate((UseSSE>=2) && (UseAVX == 0)); 1384 match(Set dst (MulD dst con)); 1385 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1386 ins_cost(150); 1387 ins_encode %{ 1388 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1389 %} 1390 ins_pipe(pipe_slow); 1391%} 1392 1393instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1394 predicate(UseAVX > 0); 1395 match(Set dst (MulD src1 src2)); 1396 1397 format %{ "vmulsd $dst, $src1, $src2" %} 1398 ins_cost(150); 1399 ins_encode %{ 1400 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1401 %} 1402 ins_pipe(pipe_slow); 1403%} 1404 1405instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1406 predicate(UseAVX > 0); 1407 match(Set dst (MulD src1 (LoadD src2))); 1408 1409 format %{ "vmulsd $dst, $src1, $src2" %} 1410 ins_cost(150); 1411 ins_encode %{ 1412 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1413 %} 1414 ins_pipe(pipe_slow); 1415%} 1416 1417instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1418 predicate(UseAVX > 0); 1419 match(Set dst (MulD src con)); 1420 1421 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1422 ins_cost(150); 1423 ins_encode %{ 1424 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1425 %} 1426 ins_pipe(pipe_slow); 1427%} 1428 1429instruct divF_reg(regF dst, regF src) %{ 1430 predicate((UseSSE>=1) && (UseAVX == 0)); 1431 match(Set dst (DivF dst src)); 1432 1433 format %{ "divss $dst, $src" %} 1434 ins_cost(150); 1435 ins_encode %{ 1436 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1437 %} 1438 ins_pipe(pipe_slow); 1439%} 1440 1441instruct divF_mem(regF dst, memory src) %{ 1442 predicate((UseSSE>=1) && (UseAVX == 0)); 1443 match(Set dst (DivF dst (LoadF src))); 1444 1445 format %{ "divss $dst, $src" %} 1446 ins_cost(150); 1447 ins_encode %{ 1448 __ divss($dst$$XMMRegister, $src$$Address); 1449 %} 1450 ins_pipe(pipe_slow); 1451%} 1452 1453instruct divF_imm(regF dst, immF con) %{ 1454 predicate((UseSSE>=1) && (UseAVX == 0)); 1455 match(Set dst (DivF dst con)); 1456 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1457 ins_cost(150); 1458 ins_encode %{ 1459 __ divss($dst$$XMMRegister, $constantaddress($con)); 1460 %} 1461 ins_pipe(pipe_slow); 1462%} 1463 1464instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1465 predicate(UseAVX > 0); 1466 match(Set dst (DivF src1 src2)); 1467 1468 format %{ "vdivss $dst, $src1, $src2" %} 1469 ins_cost(150); 1470 ins_encode %{ 1471 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1472 %} 1473 ins_pipe(pipe_slow); 1474%} 1475 1476instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1477 predicate(UseAVX > 0); 1478 match(Set dst (DivF src1 (LoadF src2))); 1479 1480 format %{ "vdivss $dst, $src1, $src2" %} 1481 ins_cost(150); 1482 ins_encode %{ 1483 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1484 %} 1485 ins_pipe(pipe_slow); 1486%} 1487 1488instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1489 predicate(UseAVX > 0); 1490 match(Set dst (DivF src con)); 1491 1492 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1493 ins_cost(150); 1494 ins_encode %{ 1495 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1496 %} 1497 ins_pipe(pipe_slow); 1498%} 1499 1500instruct divD_reg(regD dst, regD src) %{ 1501 predicate((UseSSE>=2) && (UseAVX == 0)); 1502 match(Set dst (DivD dst src)); 1503 1504 format %{ "divsd $dst, $src" %} 1505 ins_cost(150); 1506 ins_encode %{ 1507 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1508 %} 1509 ins_pipe(pipe_slow); 1510%} 1511 1512instruct divD_mem(regD dst, memory src) %{ 1513 predicate((UseSSE>=2) && (UseAVX == 0)); 1514 match(Set dst (DivD dst (LoadD src))); 1515 1516 format %{ "divsd $dst, $src" %} 1517 ins_cost(150); 1518 ins_encode %{ 1519 __ divsd($dst$$XMMRegister, $src$$Address); 1520 %} 1521 ins_pipe(pipe_slow); 1522%} 1523 1524instruct divD_imm(regD dst, immD con) %{ 1525 predicate((UseSSE>=2) && (UseAVX == 0)); 1526 match(Set dst (DivD dst con)); 1527 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1528 ins_cost(150); 1529 ins_encode %{ 1530 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1531 %} 1532 ins_pipe(pipe_slow); 1533%} 1534 1535instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1536 predicate(UseAVX > 0); 1537 match(Set dst (DivD src1 src2)); 1538 1539 format %{ "vdivsd $dst, $src1, $src2" %} 1540 ins_cost(150); 1541 ins_encode %{ 1542 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1543 %} 1544 ins_pipe(pipe_slow); 1545%} 1546 1547instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1548 predicate(UseAVX > 0); 1549 match(Set dst (DivD src1 (LoadD src2))); 1550 1551 format %{ "vdivsd $dst, $src1, $src2" %} 1552 ins_cost(150); 1553 ins_encode %{ 1554 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1555 %} 1556 ins_pipe(pipe_slow); 1557%} 1558 1559instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1560 predicate(UseAVX > 0); 1561 match(Set dst (DivD src con)); 1562 1563 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1564 ins_cost(150); 1565 ins_encode %{ 1566 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1567 %} 1568 ins_pipe(pipe_slow); 1569%} 1570 1571instruct absF_reg(regF dst) %{ 1572 predicate((UseSSE>=1) && (UseAVX == 0)); 1573 match(Set dst (AbsF dst)); 1574 ins_cost(150); 1575 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1576 ins_encode %{ 1577 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1578 %} 1579 ins_pipe(pipe_slow); 1580%} 1581 1582instruct absF_reg_reg(regF dst, regF src) %{ 1583 predicate(UseAVX > 0); 1584 match(Set dst (AbsF src)); 1585 ins_cost(150); 1586 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1587 ins_encode %{ 1588 bool vector256 = false; 1589 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1590 ExternalAddress(float_signmask()), vector256); 1591 %} 1592 ins_pipe(pipe_slow); 1593%} 1594 1595instruct absD_reg(regD dst) %{ 1596 predicate((UseSSE>=2) && (UseAVX == 0)); 1597 match(Set dst (AbsD dst)); 1598 ins_cost(150); 1599 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1600 "# abs double by sign masking" %} 1601 ins_encode %{ 1602 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1603 %} 1604 ins_pipe(pipe_slow); 1605%} 1606 1607instruct absD_reg_reg(regD dst, regD src) %{ 1608 predicate(UseAVX > 0); 1609 match(Set dst (AbsD src)); 1610 ins_cost(150); 1611 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1612 "# abs double by sign masking" %} 1613 ins_encode %{ 1614 bool vector256 = false; 1615 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1616 ExternalAddress(double_signmask()), vector256); 1617 %} 1618 ins_pipe(pipe_slow); 1619%} 1620 1621instruct negF_reg(regF dst) %{ 1622 predicate((UseSSE>=1) && (UseAVX == 0)); 1623 match(Set dst (NegF dst)); 1624 ins_cost(150); 1625 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1626 ins_encode %{ 1627 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1628 %} 1629 ins_pipe(pipe_slow); 1630%} 1631 1632instruct negF_reg_reg(regF dst, regF src) %{ 1633 predicate(UseAVX > 0); 1634 match(Set dst (NegF src)); 1635 ins_cost(150); 1636 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1637 ins_encode %{ 1638 bool vector256 = false; 1639 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1640 ExternalAddress(float_signflip()), vector256); 1641 %} 1642 ins_pipe(pipe_slow); 1643%} 1644 1645instruct negD_reg(regD dst) %{ 1646 predicate((UseSSE>=2) && (UseAVX == 0)); 1647 match(Set dst (NegD dst)); 1648 ins_cost(150); 1649 format %{ "xorpd $dst, [0x8000000000000000]\t" 1650 "# neg double by sign flipping" %} 1651 ins_encode %{ 1652 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1653 %} 1654 ins_pipe(pipe_slow); 1655%} 1656 1657instruct negD_reg_reg(regD dst, regD src) %{ 1658 predicate(UseAVX > 0); 1659 match(Set dst (NegD src)); 1660 ins_cost(150); 1661 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1662 "# neg double by sign flipping" %} 1663 ins_encode %{ 1664 bool vector256 = false; 1665 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1666 ExternalAddress(double_signflip()), vector256); 1667 %} 1668 ins_pipe(pipe_slow); 1669%} 1670 1671instruct sqrtF_reg(regF dst, regF src) %{ 1672 predicate(UseSSE>=1); 1673 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1674 1675 format %{ "sqrtss $dst, $src" %} 1676 ins_cost(150); 1677 ins_encode %{ 1678 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1679 %} 1680 ins_pipe(pipe_slow); 1681%} 1682 1683instruct sqrtF_mem(regF dst, memory src) %{ 1684 predicate(UseSSE>=1); 1685 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1686 1687 format %{ "sqrtss $dst, $src" %} 1688 ins_cost(150); 1689 ins_encode %{ 1690 __ sqrtss($dst$$XMMRegister, $src$$Address); 1691 %} 1692 ins_pipe(pipe_slow); 1693%} 1694 1695instruct sqrtF_imm(regF dst, immF con) %{ 1696 predicate(UseSSE>=1); 1697 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1698 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1699 ins_cost(150); 1700 ins_encode %{ 1701 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1702 %} 1703 ins_pipe(pipe_slow); 1704%} 1705 1706instruct sqrtD_reg(regD dst, regD src) %{ 1707 predicate(UseSSE>=2); 1708 match(Set dst (SqrtD src)); 1709 1710 format %{ "sqrtsd $dst, $src" %} 1711 ins_cost(150); 1712 ins_encode %{ 1713 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1714 %} 1715 ins_pipe(pipe_slow); 1716%} 1717 1718instruct sqrtD_mem(regD dst, memory src) %{ 1719 predicate(UseSSE>=2); 1720 match(Set dst (SqrtD (LoadD src))); 1721 1722 format %{ "sqrtsd $dst, $src" %} 1723 ins_cost(150); 1724 ins_encode %{ 1725 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1726 %} 1727 ins_pipe(pipe_slow); 1728%} 1729 1730instruct sqrtD_imm(regD dst, immD con) %{ 1731 predicate(UseSSE>=2); 1732 match(Set dst (SqrtD con)); 1733 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1734 ins_cost(150); 1735 ins_encode %{ 1736 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1737 %} 1738 ins_pipe(pipe_slow); 1739%} 1740 1741 1742// ====================VECTOR INSTRUCTIONS===================================== 1743 1744// Load vectors (4 bytes long) 1745instruct loadV4(vecS dst, memory mem) %{ 1746 predicate(n->as_LoadVector()->memory_size() == 4); 1747 match(Set dst (LoadVector mem)); 1748 ins_cost(125); 1749 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1750 ins_encode %{ 1751 __ movdl($dst$$XMMRegister, $mem$$Address); 1752 %} 1753 ins_pipe( pipe_slow ); 1754%} 1755 1756// Load vectors (8 bytes long) 1757instruct loadV8(vecD dst, memory mem) %{ 1758 predicate(n->as_LoadVector()->memory_size() == 8); 1759 match(Set dst (LoadVector mem)); 1760 ins_cost(125); 1761 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1762 ins_encode %{ 1763 __ movq($dst$$XMMRegister, $mem$$Address); 1764 %} 1765 ins_pipe( pipe_slow ); 1766%} 1767 1768// Load vectors (16 bytes long) 1769instruct loadV16(vecX dst, memory mem) %{ 1770 predicate(n->as_LoadVector()->memory_size() == 16); 1771 match(Set dst (LoadVector mem)); 1772 ins_cost(125); 1773 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1774 ins_encode %{ 1775 __ movdqu($dst$$XMMRegister, $mem$$Address); 1776 %} 1777 ins_pipe( pipe_slow ); 1778%} 1779 1780// Load vectors (32 bytes long) 1781instruct loadV32(vecY dst, memory mem) %{ 1782 predicate(n->as_LoadVector()->memory_size() == 32); 1783 match(Set dst (LoadVector mem)); 1784 ins_cost(125); 1785 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1786 ins_encode %{ 1787 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1788 %} 1789 ins_pipe( pipe_slow ); 1790%} 1791 1792// Store vectors 1793instruct storeV4(memory mem, vecS src) %{ 1794 predicate(n->as_StoreVector()->memory_size() == 4); 1795 match(Set mem (StoreVector mem src)); 1796 ins_cost(145); 1797 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1798 ins_encode %{ 1799 __ movdl($mem$$Address, $src$$XMMRegister); 1800 %} 1801 ins_pipe( pipe_slow ); 1802%} 1803 1804instruct storeV8(memory mem, vecD src) %{ 1805 predicate(n->as_StoreVector()->memory_size() == 8); 1806 match(Set mem (StoreVector mem src)); 1807 ins_cost(145); 1808 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1809 ins_encode %{ 1810 __ movq($mem$$Address, $src$$XMMRegister); 1811 %} 1812 ins_pipe( pipe_slow ); 1813%} 1814 1815instruct storeV16(memory mem, vecX src) %{ 1816 predicate(n->as_StoreVector()->memory_size() == 16); 1817 match(Set mem (StoreVector mem src)); 1818 ins_cost(145); 1819 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1820 ins_encode %{ 1821 __ movdqu($mem$$Address, $src$$XMMRegister); 1822 %} 1823 ins_pipe( pipe_slow ); 1824%} 1825 1826instruct storeV32(memory mem, vecY src) %{ 1827 predicate(n->as_StoreVector()->memory_size() == 32); 1828 match(Set mem (StoreVector mem src)); 1829 ins_cost(145); 1830 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1831 ins_encode %{ 1832 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1833 %} 1834 ins_pipe( pipe_slow ); 1835%} 1836 1837// Replicate byte scalar to be vector 1838instruct Repl4B(vecS dst, rRegI src) %{ 1839 predicate(n->as_Vector()->length() == 4); 1840 match(Set dst (ReplicateB src)); 1841 format %{ "movd $dst,$src\n\t" 1842 "punpcklbw $dst,$dst\n\t" 1843 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1844 ins_encode %{ 1845 __ movdl($dst$$XMMRegister, $src$$Register); 1846 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1847 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1848 %} 1849 ins_pipe( pipe_slow ); 1850%} 1851 1852instruct Repl8B(vecD dst, rRegI src) %{ 1853 predicate(n->as_Vector()->length() == 8); 1854 match(Set dst (ReplicateB src)); 1855 format %{ "movd $dst,$src\n\t" 1856 "punpcklbw $dst,$dst\n\t" 1857 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1858 ins_encode %{ 1859 __ movdl($dst$$XMMRegister, $src$$Register); 1860 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1861 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1862 %} 1863 ins_pipe( pipe_slow ); 1864%} 1865 1866instruct Repl16B(vecX dst, rRegI src) %{ 1867 predicate(n->as_Vector()->length() == 16); 1868 match(Set dst (ReplicateB src)); 1869 format %{ "movd $dst,$src\n\t" 1870 "punpcklbw $dst,$dst\n\t" 1871 "pshuflw $dst,$dst,0x00\n\t" 1872 "punpcklqdq $dst,$dst\t! replicate16B" %} 1873 ins_encode %{ 1874 __ movdl($dst$$XMMRegister, $src$$Register); 1875 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1876 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1877 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1878 %} 1879 ins_pipe( pipe_slow ); 1880%} 1881 1882instruct Repl32B(vecY dst, rRegI src) %{ 1883 predicate(n->as_Vector()->length() == 32); 1884 match(Set dst (ReplicateB src)); 1885 format %{ "movd $dst,$src\n\t" 1886 "punpcklbw $dst,$dst\n\t" 1887 "pshuflw $dst,$dst,0x00\n\t" 1888 "punpcklqdq $dst,$dst\n\t" 1889 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1890 ins_encode %{ 1891 __ movdl($dst$$XMMRegister, $src$$Register); 1892 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1893 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1894 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1895 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1896 %} 1897 ins_pipe( pipe_slow ); 1898%} 1899 1900// Replicate byte scalar immediate to be vector by loading from const table. 1901instruct Repl4B_imm(vecS dst, immI con) %{ 1902 predicate(n->as_Vector()->length() == 4); 1903 match(Set dst (ReplicateB con)); 1904 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1905 ins_encode %{ 1906 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1907 %} 1908 ins_pipe( pipe_slow ); 1909%} 1910 1911instruct Repl8B_imm(vecD dst, immI con) %{ 1912 predicate(n->as_Vector()->length() == 8); 1913 match(Set dst (ReplicateB con)); 1914 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1915 ins_encode %{ 1916 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1917 %} 1918 ins_pipe( pipe_slow ); 1919%} 1920 1921instruct Repl16B_imm(vecX dst, immI con) %{ 1922 predicate(n->as_Vector()->length() == 16); 1923 match(Set dst (ReplicateB con)); 1924 format %{ "movq $dst,[$constantaddress]\n\t" 1925 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1926 ins_encode %{ 1927 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1928 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1929 %} 1930 ins_pipe( pipe_slow ); 1931%} 1932 1933instruct Repl32B_imm(vecY dst, immI con) %{ 1934 predicate(n->as_Vector()->length() == 32); 1935 match(Set dst (ReplicateB con)); 1936 format %{ "movq $dst,[$constantaddress]\n\t" 1937 "punpcklqdq $dst,$dst\n\t" 1938 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1939 ins_encode %{ 1940 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1941 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1942 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1943 %} 1944 ins_pipe( pipe_slow ); 1945%} 1946 1947// Replicate byte scalar zero to be vector 1948instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1949 predicate(n->as_Vector()->length() == 4); 1950 match(Set dst (ReplicateB zero)); 1951 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1952 ins_encode %{ 1953 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1954 %} 1955 ins_pipe( fpu_reg_reg ); 1956%} 1957 1958instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1959 predicate(n->as_Vector()->length() == 8); 1960 match(Set dst (ReplicateB zero)); 1961 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1962 ins_encode %{ 1963 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1964 %} 1965 ins_pipe( fpu_reg_reg ); 1966%} 1967 1968instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1969 predicate(n->as_Vector()->length() == 16); 1970 match(Set dst (ReplicateB zero)); 1971 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1972 ins_encode %{ 1973 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1974 %} 1975 ins_pipe( fpu_reg_reg ); 1976%} 1977 1978instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1979 predicate(n->as_Vector()->length() == 32); 1980 match(Set dst (ReplicateB zero)); 1981 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1982 ins_encode %{ 1983 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1984 bool vector256 = true; 1985 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1986 %} 1987 ins_pipe( fpu_reg_reg ); 1988%} 1989 1990// Replicate char/short (2 byte) scalar to be vector 1991instruct Repl2S(vecS dst, rRegI src) %{ 1992 predicate(n->as_Vector()->length() == 2); 1993 match(Set dst (ReplicateS src)); 1994 format %{ "movd $dst,$src\n\t" 1995 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1996 ins_encode %{ 1997 __ movdl($dst$$XMMRegister, $src$$Register); 1998 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1999 %} 2000 ins_pipe( fpu_reg_reg ); 2001%} 2002 2003instruct Repl4S(vecD dst, rRegI src) %{ 2004 predicate(n->as_Vector()->length() == 4); 2005 match(Set dst (ReplicateS src)); 2006 format %{ "movd $dst,$src\n\t" 2007 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2008 ins_encode %{ 2009 __ movdl($dst$$XMMRegister, $src$$Register); 2010 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2011 %} 2012 ins_pipe( fpu_reg_reg ); 2013%} 2014 2015instruct Repl8S(vecX dst, rRegI src) %{ 2016 predicate(n->as_Vector()->length() == 8); 2017 match(Set dst (ReplicateS src)); 2018 format %{ "movd $dst,$src\n\t" 2019 "pshuflw $dst,$dst,0x00\n\t" 2020 "punpcklqdq $dst,$dst\t! replicate8S" %} 2021 ins_encode %{ 2022 __ movdl($dst$$XMMRegister, $src$$Register); 2023 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2024 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2025 %} 2026 ins_pipe( pipe_slow ); 2027%} 2028 2029instruct Repl16S(vecY dst, rRegI src) %{ 2030 predicate(n->as_Vector()->length() == 16); 2031 match(Set dst (ReplicateS src)); 2032 format %{ "movd $dst,$src\n\t" 2033 "pshuflw $dst,$dst,0x00\n\t" 2034 "punpcklqdq $dst,$dst\n\t" 2035 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 2036 ins_encode %{ 2037 __ movdl($dst$$XMMRegister, $src$$Register); 2038 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2039 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2040 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2041 %} 2042 ins_pipe( pipe_slow ); 2043%} 2044 2045// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 2046instruct Repl2S_imm(vecS dst, immI con) %{ 2047 predicate(n->as_Vector()->length() == 2); 2048 match(Set dst (ReplicateS con)); 2049 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 2050 ins_encode %{ 2051 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2052 %} 2053 ins_pipe( fpu_reg_reg ); 2054%} 2055 2056instruct Repl4S_imm(vecD dst, immI con) %{ 2057 predicate(n->as_Vector()->length() == 4); 2058 match(Set dst (ReplicateS con)); 2059 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 2060 ins_encode %{ 2061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2062 %} 2063 ins_pipe( fpu_reg_reg ); 2064%} 2065 2066instruct Repl8S_imm(vecX dst, immI con) %{ 2067 predicate(n->as_Vector()->length() == 8); 2068 match(Set dst (ReplicateS con)); 2069 format %{ "movq $dst,[$constantaddress]\n\t" 2070 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2071 ins_encode %{ 2072 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2073 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2074 %} 2075 ins_pipe( pipe_slow ); 2076%} 2077 2078instruct Repl16S_imm(vecY dst, immI con) %{ 2079 predicate(n->as_Vector()->length() == 16); 2080 match(Set dst (ReplicateS con)); 2081 format %{ "movq $dst,[$constantaddress]\n\t" 2082 "punpcklqdq $dst,$dst\n\t" 2083 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 2084 ins_encode %{ 2085 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2086 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2087 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2088 %} 2089 ins_pipe( pipe_slow ); 2090%} 2091 2092// Replicate char/short (2 byte) scalar zero to be vector 2093instruct Repl2S_zero(vecS dst, immI0 zero) %{ 2094 predicate(n->as_Vector()->length() == 2); 2095 match(Set dst (ReplicateS zero)); 2096 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 2097 ins_encode %{ 2098 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2099 %} 2100 ins_pipe( fpu_reg_reg ); 2101%} 2102 2103instruct Repl4S_zero(vecD dst, immI0 zero) %{ 2104 predicate(n->as_Vector()->length() == 4); 2105 match(Set dst (ReplicateS zero)); 2106 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 2107 ins_encode %{ 2108 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2109 %} 2110 ins_pipe( fpu_reg_reg ); 2111%} 2112 2113instruct Repl8S_zero(vecX dst, immI0 zero) %{ 2114 predicate(n->as_Vector()->length() == 8); 2115 match(Set dst (ReplicateS zero)); 2116 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 2117 ins_encode %{ 2118 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2119 %} 2120 ins_pipe( fpu_reg_reg ); 2121%} 2122 2123instruct Repl16S_zero(vecY dst, immI0 zero) %{ 2124 predicate(n->as_Vector()->length() == 16); 2125 match(Set dst (ReplicateS zero)); 2126 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 2127 ins_encode %{ 2128 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2129 bool vector256 = true; 2130 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2131 %} 2132 ins_pipe( fpu_reg_reg ); 2133%} 2134 2135// Replicate integer (4 byte) scalar to be vector 2136instruct Repl2I(vecD dst, rRegI src) %{ 2137 predicate(n->as_Vector()->length() == 2); 2138 match(Set dst (ReplicateI src)); 2139 format %{ "movd $dst,$src\n\t" 2140 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2141 ins_encode %{ 2142 __ movdl($dst$$XMMRegister, $src$$Register); 2143 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2144 %} 2145 ins_pipe( fpu_reg_reg ); 2146%} 2147 2148instruct Repl4I(vecX dst, rRegI src) %{ 2149 predicate(n->as_Vector()->length() == 4); 2150 match(Set dst (ReplicateI src)); 2151 format %{ "movd $dst,$src\n\t" 2152 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2153 ins_encode %{ 2154 __ movdl($dst$$XMMRegister, $src$$Register); 2155 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2156 %} 2157 ins_pipe( pipe_slow ); 2158%} 2159 2160instruct Repl8I(vecY dst, rRegI src) %{ 2161 predicate(n->as_Vector()->length() == 8); 2162 match(Set dst (ReplicateI src)); 2163 format %{ "movd $dst,$src\n\t" 2164 "pshufd $dst,$dst,0x00\n\t" 2165 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2166 ins_encode %{ 2167 __ movdl($dst$$XMMRegister, $src$$Register); 2168 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2169 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2170 %} 2171 ins_pipe( pipe_slow ); 2172%} 2173 2174// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2175instruct Repl2I_imm(vecD dst, immI con) %{ 2176 predicate(n->as_Vector()->length() == 2); 2177 match(Set dst (ReplicateI con)); 2178 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2179 ins_encode %{ 2180 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2181 %} 2182 ins_pipe( fpu_reg_reg ); 2183%} 2184 2185instruct Repl4I_imm(vecX dst, immI con) %{ 2186 predicate(n->as_Vector()->length() == 4); 2187 match(Set dst (ReplicateI con)); 2188 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2189 "punpcklqdq $dst,$dst" %} 2190 ins_encode %{ 2191 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2192 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2193 %} 2194 ins_pipe( pipe_slow ); 2195%} 2196 2197instruct Repl8I_imm(vecY dst, immI con) %{ 2198 predicate(n->as_Vector()->length() == 8); 2199 match(Set dst (ReplicateI con)); 2200 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2201 "punpcklqdq $dst,$dst\n\t" 2202 "vinserti128h $dst,$dst,$dst" %} 2203 ins_encode %{ 2204 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2205 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2206 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2207 %} 2208 ins_pipe( pipe_slow ); 2209%} 2210 2211// Integer could be loaded into xmm register directly from memory. 2212instruct Repl2I_mem(vecD dst, memory mem) %{ 2213 predicate(n->as_Vector()->length() == 2); 2214 match(Set dst (ReplicateI (LoadI mem))); 2215 format %{ "movd $dst,$mem\n\t" 2216 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2217 ins_encode %{ 2218 __ movdl($dst$$XMMRegister, $mem$$Address); 2219 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2220 %} 2221 ins_pipe( fpu_reg_reg ); 2222%} 2223 2224instruct Repl4I_mem(vecX dst, memory mem) %{ 2225 predicate(n->as_Vector()->length() == 4); 2226 match(Set dst (ReplicateI (LoadI mem))); 2227 format %{ "movd $dst,$mem\n\t" 2228 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2229 ins_encode %{ 2230 __ movdl($dst$$XMMRegister, $mem$$Address); 2231 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2232 %} 2233 ins_pipe( pipe_slow ); 2234%} 2235 2236instruct Repl8I_mem(vecY dst, memory mem) %{ 2237 predicate(n->as_Vector()->length() == 8); 2238 match(Set dst (ReplicateI (LoadI mem))); 2239 format %{ "movd $dst,$mem\n\t" 2240 "pshufd $dst,$dst,0x00\n\t" 2241 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2242 ins_encode %{ 2243 __ movdl($dst$$XMMRegister, $mem$$Address); 2244 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2245 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2246 %} 2247 ins_pipe( pipe_slow ); 2248%} 2249 2250// Replicate integer (4 byte) scalar zero to be vector 2251instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2252 predicate(n->as_Vector()->length() == 2); 2253 match(Set dst (ReplicateI zero)); 2254 format %{ "pxor $dst,$dst\t! replicate2I" %} 2255 ins_encode %{ 2256 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2257 %} 2258 ins_pipe( fpu_reg_reg ); 2259%} 2260 2261instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2262 predicate(n->as_Vector()->length() == 4); 2263 match(Set dst (ReplicateI zero)); 2264 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2265 ins_encode %{ 2266 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2267 %} 2268 ins_pipe( fpu_reg_reg ); 2269%} 2270 2271instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2272 predicate(n->as_Vector()->length() == 8); 2273 match(Set dst (ReplicateI zero)); 2274 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2275 ins_encode %{ 2276 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2277 bool vector256 = true; 2278 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2279 %} 2280 ins_pipe( fpu_reg_reg ); 2281%} 2282 2283// Replicate long (8 byte) scalar to be vector 2284#ifdef _LP64 2285instruct Repl2L(vecX dst, rRegL src) %{ 2286 predicate(n->as_Vector()->length() == 2); 2287 match(Set dst (ReplicateL src)); 2288 format %{ "movdq $dst,$src\n\t" 2289 "punpcklqdq $dst,$dst\t! replicate2L" %} 2290 ins_encode %{ 2291 __ movdq($dst$$XMMRegister, $src$$Register); 2292 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2293 %} 2294 ins_pipe( pipe_slow ); 2295%} 2296 2297instruct Repl4L(vecY dst, rRegL src) %{ 2298 predicate(n->as_Vector()->length() == 4); 2299 match(Set dst (ReplicateL src)); 2300 format %{ "movdq $dst,$src\n\t" 2301 "punpcklqdq $dst,$dst\n\t" 2302 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2303 ins_encode %{ 2304 __ movdq($dst$$XMMRegister, $src$$Register); 2305 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2306 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2307 %} 2308 ins_pipe( pipe_slow ); 2309%} 2310#else // _LP64 2311instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2312 predicate(n->as_Vector()->length() == 2); 2313 match(Set dst (ReplicateL src)); 2314 effect(TEMP dst, USE src, TEMP tmp); 2315 format %{ "movdl $dst,$src.lo\n\t" 2316 "movdl $tmp,$src.hi\n\t" 2317 "punpckldq $dst,$tmp\n\t" 2318 "punpcklqdq $dst,$dst\t! replicate2L"%} 2319 ins_encode %{ 2320 __ movdl($dst$$XMMRegister, $src$$Register); 2321 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2322 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2323 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2324 %} 2325 ins_pipe( pipe_slow ); 2326%} 2327 2328instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2329 predicate(n->as_Vector()->length() == 4); 2330 match(Set dst (ReplicateL src)); 2331 effect(TEMP dst, USE src, TEMP tmp); 2332 format %{ "movdl $dst,$src.lo\n\t" 2333 "movdl $tmp,$src.hi\n\t" 2334 "punpckldq $dst,$tmp\n\t" 2335 "punpcklqdq $dst,$dst\n\t" 2336 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2337 ins_encode %{ 2338 __ movdl($dst$$XMMRegister, $src$$Register); 2339 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2340 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2341 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2342 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2343 %} 2344 ins_pipe( pipe_slow ); 2345%} 2346#endif // _LP64 2347 2348// Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2349instruct Repl2L_imm(vecX dst, immL con) %{ 2350 predicate(n->as_Vector()->length() == 2); 2351 match(Set dst (ReplicateL con)); 2352 format %{ "movq $dst,[$constantaddress]\n\t" 2353 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2354 ins_encode %{ 2355 __ movq($dst$$XMMRegister, $constantaddress($con)); 2356 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2357 %} 2358 ins_pipe( pipe_slow ); 2359%} 2360 2361instruct Repl4L_imm(vecY dst, immL con) %{ 2362 predicate(n->as_Vector()->length() == 4); 2363 match(Set dst (ReplicateL con)); 2364 format %{ "movq $dst,[$constantaddress]\n\t" 2365 "punpcklqdq $dst,$dst\n\t" 2366 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2367 ins_encode %{ 2368 __ movq($dst$$XMMRegister, $constantaddress($con)); 2369 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2370 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2371 %} 2372 ins_pipe( pipe_slow ); 2373%} 2374 2375// Long could be loaded into xmm register directly from memory. 2376instruct Repl2L_mem(vecX dst, memory mem) %{ 2377 predicate(n->as_Vector()->length() == 2); 2378 match(Set dst (ReplicateL (LoadL mem))); 2379 format %{ "movq $dst,$mem\n\t" 2380 "punpcklqdq $dst,$dst\t! replicate2L" %} 2381 ins_encode %{ 2382 __ movq($dst$$XMMRegister, $mem$$Address); 2383 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2384 %} 2385 ins_pipe( pipe_slow ); 2386%} 2387 2388instruct Repl4L_mem(vecY dst, memory mem) %{ 2389 predicate(n->as_Vector()->length() == 4); 2390 match(Set dst (ReplicateL (LoadL mem))); 2391 format %{ "movq $dst,$mem\n\t" 2392 "punpcklqdq $dst,$dst\n\t" 2393 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2394 ins_encode %{ 2395 __ movq($dst$$XMMRegister, $mem$$Address); 2396 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2397 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2398 %} 2399 ins_pipe( pipe_slow ); 2400%} 2401 2402// Replicate long (8 byte) scalar zero to be vector 2403instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2404 predicate(n->as_Vector()->length() == 2); 2405 match(Set dst (ReplicateL zero)); 2406 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2407 ins_encode %{ 2408 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2409 %} 2410 ins_pipe( fpu_reg_reg ); 2411%} 2412 2413instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2414 predicate(n->as_Vector()->length() == 4); 2415 match(Set dst (ReplicateL zero)); 2416 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2417 ins_encode %{ 2418 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2419 bool vector256 = true; 2420 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2421 %} 2422 ins_pipe( fpu_reg_reg ); 2423%} 2424 2425// Replicate float (4 byte) scalar to be vector 2426instruct Repl2F(vecD dst, regF src) %{ 2427 predicate(n->as_Vector()->length() == 2); 2428 match(Set dst (ReplicateF src)); 2429 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2430 ins_encode %{ 2431 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2432 %} 2433 ins_pipe( fpu_reg_reg ); 2434%} 2435 2436instruct Repl4F(vecX dst, regF src) %{ 2437 predicate(n->as_Vector()->length() == 4); 2438 match(Set dst (ReplicateF src)); 2439 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2440 ins_encode %{ 2441 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2442 %} 2443 ins_pipe( pipe_slow ); 2444%} 2445 2446instruct Repl8F(vecY dst, regF src) %{ 2447 predicate(n->as_Vector()->length() == 8); 2448 match(Set dst (ReplicateF src)); 2449 format %{ "pshufd $dst,$src,0x00\n\t" 2450 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2451 ins_encode %{ 2452 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2453 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2454 %} 2455 ins_pipe( pipe_slow ); 2456%} 2457 2458// Replicate float (4 byte) scalar zero to be vector 2459instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2460 predicate(n->as_Vector()->length() == 2); 2461 match(Set dst (ReplicateF zero)); 2462 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2463 ins_encode %{ 2464 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2465 %} 2466 ins_pipe( fpu_reg_reg ); 2467%} 2468 2469instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2470 predicate(n->as_Vector()->length() == 4); 2471 match(Set dst (ReplicateF zero)); 2472 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2473 ins_encode %{ 2474 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2475 %} 2476 ins_pipe( fpu_reg_reg ); 2477%} 2478 2479instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2480 predicate(n->as_Vector()->length() == 8); 2481 match(Set dst (ReplicateF zero)); 2482 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2483 ins_encode %{ 2484 bool vector256 = true; 2485 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2486 %} 2487 ins_pipe( fpu_reg_reg ); 2488%} 2489 2490// Replicate double (8 bytes) scalar to be vector 2491instruct Repl2D(vecX dst, regD src) %{ 2492 predicate(n->as_Vector()->length() == 2); 2493 match(Set dst (ReplicateD src)); 2494 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2495 ins_encode %{ 2496 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2497 %} 2498 ins_pipe( pipe_slow ); 2499%} 2500 2501instruct Repl4D(vecY dst, regD src) %{ 2502 predicate(n->as_Vector()->length() == 4); 2503 match(Set dst (ReplicateD src)); 2504 format %{ "pshufd $dst,$src,0x44\n\t" 2505 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2506 ins_encode %{ 2507 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2508 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2509 %} 2510 ins_pipe( pipe_slow ); 2511%} 2512 2513// Replicate double (8 byte) scalar zero to be vector 2514instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2515 predicate(n->as_Vector()->length() == 2); 2516 match(Set dst (ReplicateD zero)); 2517 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2518 ins_encode %{ 2519 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2520 %} 2521 ins_pipe( fpu_reg_reg ); 2522%} 2523 2524instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2525 predicate(n->as_Vector()->length() == 4); 2526 match(Set dst (ReplicateD zero)); 2527 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2528 ins_encode %{ 2529 bool vector256 = true; 2530 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2531 %} 2532 ins_pipe( fpu_reg_reg ); 2533%} 2534 2535// ====================VECTOR ARITHMETIC======================================= 2536 2537// --------------------------------- ADD -------------------------------------- 2538 2539// Bytes vector add 2540instruct vadd4B(vecS dst, vecS src) %{ 2541 predicate(n->as_Vector()->length() == 4); 2542 match(Set dst (AddVB dst src)); 2543 format %{ "paddb $dst,$src\t! add packed4B" %} 2544 ins_encode %{ 2545 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2546 %} 2547 ins_pipe( pipe_slow ); 2548%} 2549 2550instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2551 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2552 match(Set dst (AddVB src1 src2)); 2553 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2554 ins_encode %{ 2555 bool vector256 = false; 2556 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2557 %} 2558 ins_pipe( pipe_slow ); 2559%} 2560 2561instruct vadd8B(vecD dst, vecD src) %{ 2562 predicate(n->as_Vector()->length() == 8); 2563 match(Set dst (AddVB dst src)); 2564 format %{ "paddb $dst,$src\t! add packed8B" %} 2565 ins_encode %{ 2566 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2567 %} 2568 ins_pipe( pipe_slow ); 2569%} 2570 2571instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2572 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2573 match(Set dst (AddVB src1 src2)); 2574 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2575 ins_encode %{ 2576 bool vector256 = false; 2577 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2578 %} 2579 ins_pipe( pipe_slow ); 2580%} 2581 2582instruct vadd16B(vecX dst, vecX src) %{ 2583 predicate(n->as_Vector()->length() == 16); 2584 match(Set dst (AddVB dst src)); 2585 format %{ "paddb $dst,$src\t! add packed16B" %} 2586 ins_encode %{ 2587 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2588 %} 2589 ins_pipe( pipe_slow ); 2590%} 2591 2592instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2593 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2594 match(Set dst (AddVB src1 src2)); 2595 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2596 ins_encode %{ 2597 bool vector256 = false; 2598 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2599 %} 2600 ins_pipe( pipe_slow ); 2601%} 2602 2603instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2604 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2605 match(Set dst (AddVB src (LoadVector mem))); 2606 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2607 ins_encode %{ 2608 bool vector256 = false; 2609 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2610 %} 2611 ins_pipe( pipe_slow ); 2612%} 2613 2614instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2615 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2616 match(Set dst (AddVB src1 src2)); 2617 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2618 ins_encode %{ 2619 bool vector256 = true; 2620 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2621 %} 2622 ins_pipe( pipe_slow ); 2623%} 2624 2625instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2626 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2627 match(Set dst (AddVB src (LoadVector mem))); 2628 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2629 ins_encode %{ 2630 bool vector256 = true; 2631 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2632 %} 2633 ins_pipe( pipe_slow ); 2634%} 2635 2636// Shorts/Chars vector add 2637instruct vadd2S(vecS dst, vecS src) %{ 2638 predicate(n->as_Vector()->length() == 2); 2639 match(Set dst (AddVS dst src)); 2640 format %{ "paddw $dst,$src\t! add packed2S" %} 2641 ins_encode %{ 2642 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2643 %} 2644 ins_pipe( pipe_slow ); 2645%} 2646 2647instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2648 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2649 match(Set dst (AddVS src1 src2)); 2650 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2651 ins_encode %{ 2652 bool vector256 = false; 2653 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2654 %} 2655 ins_pipe( pipe_slow ); 2656%} 2657 2658instruct vadd4S(vecD dst, vecD src) %{ 2659 predicate(n->as_Vector()->length() == 4); 2660 match(Set dst (AddVS dst src)); 2661 format %{ "paddw $dst,$src\t! add packed4S" %} 2662 ins_encode %{ 2663 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2664 %} 2665 ins_pipe( pipe_slow ); 2666%} 2667 2668instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2669 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2670 match(Set dst (AddVS src1 src2)); 2671 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2672 ins_encode %{ 2673 bool vector256 = false; 2674 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2675 %} 2676 ins_pipe( pipe_slow ); 2677%} 2678 2679instruct vadd8S(vecX dst, vecX src) %{ 2680 predicate(n->as_Vector()->length() == 8); 2681 match(Set dst (AddVS dst src)); 2682 format %{ "paddw $dst,$src\t! add packed8S" %} 2683 ins_encode %{ 2684 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2685 %} 2686 ins_pipe( pipe_slow ); 2687%} 2688 2689instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2691 match(Set dst (AddVS src1 src2)); 2692 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2693 ins_encode %{ 2694 bool vector256 = false; 2695 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2696 %} 2697 ins_pipe( pipe_slow ); 2698%} 2699 2700instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2701 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2702 match(Set dst (AddVS src (LoadVector mem))); 2703 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2704 ins_encode %{ 2705 bool vector256 = false; 2706 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2707 %} 2708 ins_pipe( pipe_slow ); 2709%} 2710 2711instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2712 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2713 match(Set dst (AddVS src1 src2)); 2714 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2715 ins_encode %{ 2716 bool vector256 = true; 2717 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2718 %} 2719 ins_pipe( pipe_slow ); 2720%} 2721 2722instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2723 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2724 match(Set dst (AddVS src (LoadVector mem))); 2725 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2726 ins_encode %{ 2727 bool vector256 = true; 2728 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2729 %} 2730 ins_pipe( pipe_slow ); 2731%} 2732 2733// Integers vector add 2734instruct vadd2I(vecD dst, vecD src) %{ 2735 predicate(n->as_Vector()->length() == 2); 2736 match(Set dst (AddVI dst src)); 2737 format %{ "paddd $dst,$src\t! add packed2I" %} 2738 ins_encode %{ 2739 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2740 %} 2741 ins_pipe( pipe_slow ); 2742%} 2743 2744instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2746 match(Set dst (AddVI src1 src2)); 2747 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2748 ins_encode %{ 2749 bool vector256 = false; 2750 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2751 %} 2752 ins_pipe( pipe_slow ); 2753%} 2754 2755instruct vadd4I(vecX dst, vecX src) %{ 2756 predicate(n->as_Vector()->length() == 4); 2757 match(Set dst (AddVI dst src)); 2758 format %{ "paddd $dst,$src\t! add packed4I" %} 2759 ins_encode %{ 2760 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2761 %} 2762 ins_pipe( pipe_slow ); 2763%} 2764 2765instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2767 match(Set dst (AddVI src1 src2)); 2768 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2769 ins_encode %{ 2770 bool vector256 = false; 2771 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2772 %} 2773 ins_pipe( pipe_slow ); 2774%} 2775 2776instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2778 match(Set dst (AddVI src (LoadVector mem))); 2779 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2780 ins_encode %{ 2781 bool vector256 = false; 2782 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2783 %} 2784 ins_pipe( pipe_slow ); 2785%} 2786 2787instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2788 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2789 match(Set dst (AddVI src1 src2)); 2790 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2791 ins_encode %{ 2792 bool vector256 = true; 2793 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2794 %} 2795 ins_pipe( pipe_slow ); 2796%} 2797 2798instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2799 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2800 match(Set dst (AddVI src (LoadVector mem))); 2801 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2802 ins_encode %{ 2803 bool vector256 = true; 2804 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2805 %} 2806 ins_pipe( pipe_slow ); 2807%} 2808 2809// Longs vector add 2810instruct vadd2L(vecX dst, vecX src) %{ 2811 predicate(n->as_Vector()->length() == 2); 2812 match(Set dst (AddVL dst src)); 2813 format %{ "paddq $dst,$src\t! add packed2L" %} 2814 ins_encode %{ 2815 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2816 %} 2817 ins_pipe( pipe_slow ); 2818%} 2819 2820instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2822 match(Set dst (AddVL src1 src2)); 2823 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2824 ins_encode %{ 2825 bool vector256 = false; 2826 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2827 %} 2828 ins_pipe( pipe_slow ); 2829%} 2830 2831instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2833 match(Set dst (AddVL src (LoadVector mem))); 2834 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2835 ins_encode %{ 2836 bool vector256 = false; 2837 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2838 %} 2839 ins_pipe( pipe_slow ); 2840%} 2841 2842instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2843 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2844 match(Set dst (AddVL src1 src2)); 2845 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2846 ins_encode %{ 2847 bool vector256 = true; 2848 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2849 %} 2850 ins_pipe( pipe_slow ); 2851%} 2852 2853instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2854 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2855 match(Set dst (AddVL src (LoadVector mem))); 2856 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2857 ins_encode %{ 2858 bool vector256 = true; 2859 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2860 %} 2861 ins_pipe( pipe_slow ); 2862%} 2863 2864// Floats vector add 2865instruct vadd2F(vecD dst, vecD src) %{ 2866 predicate(n->as_Vector()->length() == 2); 2867 match(Set dst (AddVF dst src)); 2868 format %{ "addps $dst,$src\t! add packed2F" %} 2869 ins_encode %{ 2870 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2871 %} 2872 ins_pipe( pipe_slow ); 2873%} 2874 2875instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2876 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2877 match(Set dst (AddVF src1 src2)); 2878 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2879 ins_encode %{ 2880 bool vector256 = false; 2881 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2882 %} 2883 ins_pipe( pipe_slow ); 2884%} 2885 2886instruct vadd4F(vecX dst, vecX src) %{ 2887 predicate(n->as_Vector()->length() == 4); 2888 match(Set dst (AddVF dst src)); 2889 format %{ "addps $dst,$src\t! add packed4F" %} 2890 ins_encode %{ 2891 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2892 %} 2893 ins_pipe( pipe_slow ); 2894%} 2895 2896instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2897 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2898 match(Set dst (AddVF src1 src2)); 2899 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2900 ins_encode %{ 2901 bool vector256 = false; 2902 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2903 %} 2904 ins_pipe( pipe_slow ); 2905%} 2906 2907instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2908 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2909 match(Set dst (AddVF src (LoadVector mem))); 2910 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2911 ins_encode %{ 2912 bool vector256 = false; 2913 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2914 %} 2915 ins_pipe( pipe_slow ); 2916%} 2917 2918instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2919 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2920 match(Set dst (AddVF src1 src2)); 2921 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2922 ins_encode %{ 2923 bool vector256 = true; 2924 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2925 %} 2926 ins_pipe( pipe_slow ); 2927%} 2928 2929instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2930 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2931 match(Set dst (AddVF src (LoadVector mem))); 2932 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2933 ins_encode %{ 2934 bool vector256 = true; 2935 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2936 %} 2937 ins_pipe( pipe_slow ); 2938%} 2939 2940// Doubles vector add 2941instruct vadd2D(vecX dst, vecX src) %{ 2942 predicate(n->as_Vector()->length() == 2); 2943 match(Set dst (AddVD dst src)); 2944 format %{ "addpd $dst,$src\t! add packed2D" %} 2945 ins_encode %{ 2946 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2947 %} 2948 ins_pipe( pipe_slow ); 2949%} 2950 2951instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2952 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2953 match(Set dst (AddVD src1 src2)); 2954 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2955 ins_encode %{ 2956 bool vector256 = false; 2957 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2958 %} 2959 ins_pipe( pipe_slow ); 2960%} 2961 2962instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2963 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2964 match(Set dst (AddVD src (LoadVector mem))); 2965 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2966 ins_encode %{ 2967 bool vector256 = false; 2968 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2969 %} 2970 ins_pipe( pipe_slow ); 2971%} 2972 2973instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2974 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2975 match(Set dst (AddVD src1 src2)); 2976 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2977 ins_encode %{ 2978 bool vector256 = true; 2979 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2980 %} 2981 ins_pipe( pipe_slow ); 2982%} 2983 2984instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2985 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2986 match(Set dst (AddVD src (LoadVector mem))); 2987 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2988 ins_encode %{ 2989 bool vector256 = true; 2990 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2991 %} 2992 ins_pipe( pipe_slow ); 2993%} 2994 2995// --------------------------------- SUB -------------------------------------- 2996 2997// Bytes vector sub 2998instruct vsub4B(vecS dst, vecS src) %{ 2999 predicate(n->as_Vector()->length() == 4); 3000 match(Set dst (SubVB dst src)); 3001 format %{ "psubb $dst,$src\t! sub packed4B" %} 3002 ins_encode %{ 3003 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3004 %} 3005 ins_pipe( pipe_slow ); 3006%} 3007 3008instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 3009 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3010 match(Set dst (SubVB src1 src2)); 3011 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 3012 ins_encode %{ 3013 bool vector256 = false; 3014 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3015 %} 3016 ins_pipe( pipe_slow ); 3017%} 3018 3019instruct vsub8B(vecD dst, vecD src) %{ 3020 predicate(n->as_Vector()->length() == 8); 3021 match(Set dst (SubVB dst src)); 3022 format %{ "psubb $dst,$src\t! sub packed8B" %} 3023 ins_encode %{ 3024 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3025 %} 3026 ins_pipe( pipe_slow ); 3027%} 3028 3029instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 3030 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3031 match(Set dst (SubVB src1 src2)); 3032 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 3033 ins_encode %{ 3034 bool vector256 = false; 3035 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3036 %} 3037 ins_pipe( pipe_slow ); 3038%} 3039 3040instruct vsub16B(vecX dst, vecX src) %{ 3041 predicate(n->as_Vector()->length() == 16); 3042 match(Set dst (SubVB dst src)); 3043 format %{ "psubb $dst,$src\t! sub packed16B" %} 3044 ins_encode %{ 3045 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3046 %} 3047 ins_pipe( pipe_slow ); 3048%} 3049 3050instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 3051 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3052 match(Set dst (SubVB src1 src2)); 3053 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 3054 ins_encode %{ 3055 bool vector256 = false; 3056 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3057 %} 3058 ins_pipe( pipe_slow ); 3059%} 3060 3061instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 3062 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3063 match(Set dst (SubVB src (LoadVector mem))); 3064 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 3065 ins_encode %{ 3066 bool vector256 = false; 3067 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3068 %} 3069 ins_pipe( pipe_slow ); 3070%} 3071 3072instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 3073 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3074 match(Set dst (SubVB src1 src2)); 3075 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 3076 ins_encode %{ 3077 bool vector256 = true; 3078 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3079 %} 3080 ins_pipe( pipe_slow ); 3081%} 3082 3083instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 3084 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3085 match(Set dst (SubVB src (LoadVector mem))); 3086 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 3087 ins_encode %{ 3088 bool vector256 = true; 3089 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3090 %} 3091 ins_pipe( pipe_slow ); 3092%} 3093 3094// Shorts/Chars vector sub 3095instruct vsub2S(vecS dst, vecS src) %{ 3096 predicate(n->as_Vector()->length() == 2); 3097 match(Set dst (SubVS dst src)); 3098 format %{ "psubw $dst,$src\t! sub packed2S" %} 3099 ins_encode %{ 3100 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3101 %} 3102 ins_pipe( pipe_slow ); 3103%} 3104 3105instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 3106 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3107 match(Set dst (SubVS src1 src2)); 3108 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 3109 ins_encode %{ 3110 bool vector256 = false; 3111 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3112 %} 3113 ins_pipe( pipe_slow ); 3114%} 3115 3116instruct vsub4S(vecD dst, vecD src) %{ 3117 predicate(n->as_Vector()->length() == 4); 3118 match(Set dst (SubVS dst src)); 3119 format %{ "psubw $dst,$src\t! sub packed4S" %} 3120 ins_encode %{ 3121 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3122 %} 3123 ins_pipe( pipe_slow ); 3124%} 3125 3126instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 3127 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3128 match(Set dst (SubVS src1 src2)); 3129 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3130 ins_encode %{ 3131 bool vector256 = false; 3132 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3133 %} 3134 ins_pipe( pipe_slow ); 3135%} 3136 3137instruct vsub8S(vecX dst, vecX src) %{ 3138 predicate(n->as_Vector()->length() == 8); 3139 match(Set dst (SubVS dst src)); 3140 format %{ "psubw $dst,$src\t! sub packed8S" %} 3141 ins_encode %{ 3142 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3143 %} 3144 ins_pipe( pipe_slow ); 3145%} 3146 3147instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3148 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3149 match(Set dst (SubVS src1 src2)); 3150 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3151 ins_encode %{ 3152 bool vector256 = false; 3153 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3154 %} 3155 ins_pipe( pipe_slow ); 3156%} 3157 3158instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3159 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3160 match(Set dst (SubVS src (LoadVector mem))); 3161 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3162 ins_encode %{ 3163 bool vector256 = false; 3164 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3165 %} 3166 ins_pipe( pipe_slow ); 3167%} 3168 3169instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3170 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3171 match(Set dst (SubVS src1 src2)); 3172 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3173 ins_encode %{ 3174 bool vector256 = true; 3175 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3176 %} 3177 ins_pipe( pipe_slow ); 3178%} 3179 3180instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3181 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3182 match(Set dst (SubVS src (LoadVector mem))); 3183 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3184 ins_encode %{ 3185 bool vector256 = true; 3186 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3187 %} 3188 ins_pipe( pipe_slow ); 3189%} 3190 3191// Integers vector sub 3192instruct vsub2I(vecD dst, vecD src) %{ 3193 predicate(n->as_Vector()->length() == 2); 3194 match(Set dst (SubVI dst src)); 3195 format %{ "psubd $dst,$src\t! sub packed2I" %} 3196 ins_encode %{ 3197 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3198 %} 3199 ins_pipe( pipe_slow ); 3200%} 3201 3202instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3204 match(Set dst (SubVI src1 src2)); 3205 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3206 ins_encode %{ 3207 bool vector256 = false; 3208 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3209 %} 3210 ins_pipe( pipe_slow ); 3211%} 3212 3213instruct vsub4I(vecX dst, vecX src) %{ 3214 predicate(n->as_Vector()->length() == 4); 3215 match(Set dst (SubVI dst src)); 3216 format %{ "psubd $dst,$src\t! sub packed4I" %} 3217 ins_encode %{ 3218 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3219 %} 3220 ins_pipe( pipe_slow ); 3221%} 3222 3223instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3225 match(Set dst (SubVI src1 src2)); 3226 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3227 ins_encode %{ 3228 bool vector256 = false; 3229 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3230 %} 3231 ins_pipe( pipe_slow ); 3232%} 3233 3234instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3235 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3236 match(Set dst (SubVI src (LoadVector mem))); 3237 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3238 ins_encode %{ 3239 bool vector256 = false; 3240 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3241 %} 3242 ins_pipe( pipe_slow ); 3243%} 3244 3245instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3246 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3247 match(Set dst (SubVI src1 src2)); 3248 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3249 ins_encode %{ 3250 bool vector256 = true; 3251 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3252 %} 3253 ins_pipe( pipe_slow ); 3254%} 3255 3256instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3257 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3258 match(Set dst (SubVI src (LoadVector mem))); 3259 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3260 ins_encode %{ 3261 bool vector256 = true; 3262 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3263 %} 3264 ins_pipe( pipe_slow ); 3265%} 3266 3267// Longs vector sub 3268instruct vsub2L(vecX dst, vecX src) %{ 3269 predicate(n->as_Vector()->length() == 2); 3270 match(Set dst (SubVL dst src)); 3271 format %{ "psubq $dst,$src\t! sub packed2L" %} 3272 ins_encode %{ 3273 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3274 %} 3275 ins_pipe( pipe_slow ); 3276%} 3277 3278instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3280 match(Set dst (SubVL src1 src2)); 3281 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3282 ins_encode %{ 3283 bool vector256 = false; 3284 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3285 %} 3286 ins_pipe( pipe_slow ); 3287%} 3288 3289instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3291 match(Set dst (SubVL src (LoadVector mem))); 3292 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3293 ins_encode %{ 3294 bool vector256 = false; 3295 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3296 %} 3297 ins_pipe( pipe_slow ); 3298%} 3299 3300instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3301 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3302 match(Set dst (SubVL src1 src2)); 3303 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3304 ins_encode %{ 3305 bool vector256 = true; 3306 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3307 %} 3308 ins_pipe( pipe_slow ); 3309%} 3310 3311instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3312 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3313 match(Set dst (SubVL src (LoadVector mem))); 3314 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3315 ins_encode %{ 3316 bool vector256 = true; 3317 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3318 %} 3319 ins_pipe( pipe_slow ); 3320%} 3321 3322// Floats vector sub 3323instruct vsub2F(vecD dst, vecD src) %{ 3324 predicate(n->as_Vector()->length() == 2); 3325 match(Set dst (SubVF dst src)); 3326 format %{ "subps $dst,$src\t! sub packed2F" %} 3327 ins_encode %{ 3328 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3329 %} 3330 ins_pipe( pipe_slow ); 3331%} 3332 3333instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3334 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3335 match(Set dst (SubVF src1 src2)); 3336 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3337 ins_encode %{ 3338 bool vector256 = false; 3339 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3340 %} 3341 ins_pipe( pipe_slow ); 3342%} 3343 3344instruct vsub4F(vecX dst, vecX src) %{ 3345 predicate(n->as_Vector()->length() == 4); 3346 match(Set dst (SubVF dst src)); 3347 format %{ "subps $dst,$src\t! sub packed4F" %} 3348 ins_encode %{ 3349 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3350 %} 3351 ins_pipe( pipe_slow ); 3352%} 3353 3354instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3355 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3356 match(Set dst (SubVF src1 src2)); 3357 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3358 ins_encode %{ 3359 bool vector256 = false; 3360 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3361 %} 3362 ins_pipe( pipe_slow ); 3363%} 3364 3365instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3366 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3367 match(Set dst (SubVF src (LoadVector mem))); 3368 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3369 ins_encode %{ 3370 bool vector256 = false; 3371 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3372 %} 3373 ins_pipe( pipe_slow ); 3374%} 3375 3376instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3377 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3378 match(Set dst (SubVF src1 src2)); 3379 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3380 ins_encode %{ 3381 bool vector256 = true; 3382 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3383 %} 3384 ins_pipe( pipe_slow ); 3385%} 3386 3387instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3388 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3389 match(Set dst (SubVF src (LoadVector mem))); 3390 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3391 ins_encode %{ 3392 bool vector256 = true; 3393 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3394 %} 3395 ins_pipe( pipe_slow ); 3396%} 3397 3398// Doubles vector sub 3399instruct vsub2D(vecX dst, vecX src) %{ 3400 predicate(n->as_Vector()->length() == 2); 3401 match(Set dst (SubVD dst src)); 3402 format %{ "subpd $dst,$src\t! sub packed2D" %} 3403 ins_encode %{ 3404 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3405 %} 3406 ins_pipe( pipe_slow ); 3407%} 3408 3409instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3410 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3411 match(Set dst (SubVD src1 src2)); 3412 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3413 ins_encode %{ 3414 bool vector256 = false; 3415 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3416 %} 3417 ins_pipe( pipe_slow ); 3418%} 3419 3420instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3421 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3422 match(Set dst (SubVD src (LoadVector mem))); 3423 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3424 ins_encode %{ 3425 bool vector256 = false; 3426 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3427 %} 3428 ins_pipe( pipe_slow ); 3429%} 3430 3431instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3432 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3433 match(Set dst (SubVD src1 src2)); 3434 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3435 ins_encode %{ 3436 bool vector256 = true; 3437 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3438 %} 3439 ins_pipe( pipe_slow ); 3440%} 3441 3442instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3443 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3444 match(Set dst (SubVD src (LoadVector mem))); 3445 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3446 ins_encode %{ 3447 bool vector256 = true; 3448 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3449 %} 3450 ins_pipe( pipe_slow ); 3451%} 3452 3453// --------------------------------- MUL -------------------------------------- 3454 3455// Shorts/Chars vector mul 3456instruct vmul2S(vecS dst, vecS src) %{ 3457 predicate(n->as_Vector()->length() == 2); 3458 match(Set dst (MulVS dst src)); 3459 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3460 ins_encode %{ 3461 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3462 %} 3463 ins_pipe( pipe_slow ); 3464%} 3465 3466instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3467 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3468 match(Set dst (MulVS src1 src2)); 3469 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3470 ins_encode %{ 3471 bool vector256 = false; 3472 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3473 %} 3474 ins_pipe( pipe_slow ); 3475%} 3476 3477instruct vmul4S(vecD dst, vecD src) %{ 3478 predicate(n->as_Vector()->length() == 4); 3479 match(Set dst (MulVS dst src)); 3480 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3481 ins_encode %{ 3482 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3483 %} 3484 ins_pipe( pipe_slow ); 3485%} 3486 3487instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3488 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3489 match(Set dst (MulVS src1 src2)); 3490 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3491 ins_encode %{ 3492 bool vector256 = false; 3493 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3494 %} 3495 ins_pipe( pipe_slow ); 3496%} 3497 3498instruct vmul8S(vecX dst, vecX src) %{ 3499 predicate(n->as_Vector()->length() == 8); 3500 match(Set dst (MulVS dst src)); 3501 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3502 ins_encode %{ 3503 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3504 %} 3505 ins_pipe( pipe_slow ); 3506%} 3507 3508instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3509 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3510 match(Set dst (MulVS src1 src2)); 3511 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3512 ins_encode %{ 3513 bool vector256 = false; 3514 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3515 %} 3516 ins_pipe( pipe_slow ); 3517%} 3518 3519instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3520 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3521 match(Set dst (MulVS src (LoadVector mem))); 3522 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3523 ins_encode %{ 3524 bool vector256 = false; 3525 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3526 %} 3527 ins_pipe( pipe_slow ); 3528%} 3529 3530instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3531 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3532 match(Set dst (MulVS src1 src2)); 3533 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3534 ins_encode %{ 3535 bool vector256 = true; 3536 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3537 %} 3538 ins_pipe( pipe_slow ); 3539%} 3540 3541instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3542 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3543 match(Set dst (MulVS src (LoadVector mem))); 3544 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3545 ins_encode %{ 3546 bool vector256 = true; 3547 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3548 %} 3549 ins_pipe( pipe_slow ); 3550%} 3551 3552// Integers vector mul (sse4_1) 3553instruct vmul2I(vecD dst, vecD src) %{ 3554 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3555 match(Set dst (MulVI dst src)); 3556 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3557 ins_encode %{ 3558 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3559 %} 3560 ins_pipe( pipe_slow ); 3561%} 3562 3563instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3564 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3565 match(Set dst (MulVI src1 src2)); 3566 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3567 ins_encode %{ 3568 bool vector256 = false; 3569 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3570 %} 3571 ins_pipe( pipe_slow ); 3572%} 3573 3574instruct vmul4I(vecX dst, vecX src) %{ 3575 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3576 match(Set dst (MulVI dst src)); 3577 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3578 ins_encode %{ 3579 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3580 %} 3581 ins_pipe( pipe_slow ); 3582%} 3583 3584instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3585 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3586 match(Set dst (MulVI src1 src2)); 3587 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3588 ins_encode %{ 3589 bool vector256 = false; 3590 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3591 %} 3592 ins_pipe( pipe_slow ); 3593%} 3594 3595instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3596 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3597 match(Set dst (MulVI src (LoadVector mem))); 3598 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3599 ins_encode %{ 3600 bool vector256 = false; 3601 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3602 %} 3603 ins_pipe( pipe_slow ); 3604%} 3605 3606instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3607 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3608 match(Set dst (MulVI src1 src2)); 3609 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3610 ins_encode %{ 3611 bool vector256 = true; 3612 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3613 %} 3614 ins_pipe( pipe_slow ); 3615%} 3616 3617instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3618 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3619 match(Set dst (MulVI src (LoadVector mem))); 3620 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3621 ins_encode %{ 3622 bool vector256 = true; 3623 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3624 %} 3625 ins_pipe( pipe_slow ); 3626%} 3627 3628// Floats vector mul 3629instruct vmul2F(vecD dst, vecD src) %{ 3630 predicate(n->as_Vector()->length() == 2); 3631 match(Set dst (MulVF dst src)); 3632 format %{ "mulps $dst,$src\t! mul packed2F" %} 3633 ins_encode %{ 3634 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3635 %} 3636 ins_pipe( pipe_slow ); 3637%} 3638 3639instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3640 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3641 match(Set dst (MulVF src1 src2)); 3642 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3643 ins_encode %{ 3644 bool vector256 = false; 3645 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3646 %} 3647 ins_pipe( pipe_slow ); 3648%} 3649 3650instruct vmul4F(vecX dst, vecX src) %{ 3651 predicate(n->as_Vector()->length() == 4); 3652 match(Set dst (MulVF dst src)); 3653 format %{ "mulps $dst,$src\t! mul packed4F" %} 3654 ins_encode %{ 3655 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3656 %} 3657 ins_pipe( pipe_slow ); 3658%} 3659 3660instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3662 match(Set dst (MulVF src1 src2)); 3663 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3664 ins_encode %{ 3665 bool vector256 = false; 3666 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3667 %} 3668 ins_pipe( pipe_slow ); 3669%} 3670 3671instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3672 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3673 match(Set dst (MulVF src (LoadVector mem))); 3674 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3675 ins_encode %{ 3676 bool vector256 = false; 3677 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3678 %} 3679 ins_pipe( pipe_slow ); 3680%} 3681 3682instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3683 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3684 match(Set dst (MulVF src1 src2)); 3685 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3686 ins_encode %{ 3687 bool vector256 = true; 3688 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3689 %} 3690 ins_pipe( pipe_slow ); 3691%} 3692 3693instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3694 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3695 match(Set dst (MulVF src (LoadVector mem))); 3696 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3697 ins_encode %{ 3698 bool vector256 = true; 3699 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3700 %} 3701 ins_pipe( pipe_slow ); 3702%} 3703 3704// Doubles vector mul 3705instruct vmul2D(vecX dst, vecX src) %{ 3706 predicate(n->as_Vector()->length() == 2); 3707 match(Set dst (MulVD dst src)); 3708 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3709 ins_encode %{ 3710 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3711 %} 3712 ins_pipe( pipe_slow ); 3713%} 3714 3715instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3716 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3717 match(Set dst (MulVD src1 src2)); 3718 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3719 ins_encode %{ 3720 bool vector256 = false; 3721 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3722 %} 3723 ins_pipe( pipe_slow ); 3724%} 3725 3726instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3727 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3728 match(Set dst (MulVD src (LoadVector mem))); 3729 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3730 ins_encode %{ 3731 bool vector256 = false; 3732 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3733 %} 3734 ins_pipe( pipe_slow ); 3735%} 3736 3737instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3738 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3739 match(Set dst (MulVD src1 src2)); 3740 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3741 ins_encode %{ 3742 bool vector256 = true; 3743 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3744 %} 3745 ins_pipe( pipe_slow ); 3746%} 3747 3748instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3749 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3750 match(Set dst (MulVD src (LoadVector mem))); 3751 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3752 ins_encode %{ 3753 bool vector256 = true; 3754 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3755 %} 3756 ins_pipe( pipe_slow ); 3757%} 3758 3759// --------------------------------- DIV -------------------------------------- 3760 3761// Floats vector div 3762instruct vdiv2F(vecD dst, vecD src) %{ 3763 predicate(n->as_Vector()->length() == 2); 3764 match(Set dst (DivVF dst src)); 3765 format %{ "divps $dst,$src\t! div packed2F" %} 3766 ins_encode %{ 3767 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3768 %} 3769 ins_pipe( pipe_slow ); 3770%} 3771 3772instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3773 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3774 match(Set dst (DivVF src1 src2)); 3775 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3776 ins_encode %{ 3777 bool vector256 = false; 3778 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3779 %} 3780 ins_pipe( pipe_slow ); 3781%} 3782 3783instruct vdiv4F(vecX dst, vecX src) %{ 3784 predicate(n->as_Vector()->length() == 4); 3785 match(Set dst (DivVF dst src)); 3786 format %{ "divps $dst,$src\t! div packed4F" %} 3787 ins_encode %{ 3788 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3789 %} 3790 ins_pipe( pipe_slow ); 3791%} 3792 3793instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3795 match(Set dst (DivVF src1 src2)); 3796 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3797 ins_encode %{ 3798 bool vector256 = false; 3799 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3800 %} 3801 ins_pipe( pipe_slow ); 3802%} 3803 3804instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3805 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3806 match(Set dst (DivVF src (LoadVector mem))); 3807 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3808 ins_encode %{ 3809 bool vector256 = false; 3810 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3811 %} 3812 ins_pipe( pipe_slow ); 3813%} 3814 3815instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3816 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3817 match(Set dst (DivVF src1 src2)); 3818 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3819 ins_encode %{ 3820 bool vector256 = true; 3821 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3822 %} 3823 ins_pipe( pipe_slow ); 3824%} 3825 3826instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3827 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3828 match(Set dst (DivVF src (LoadVector mem))); 3829 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3830 ins_encode %{ 3831 bool vector256 = true; 3832 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3833 %} 3834 ins_pipe( pipe_slow ); 3835%} 3836 3837// Doubles vector div 3838instruct vdiv2D(vecX dst, vecX src) %{ 3839 predicate(n->as_Vector()->length() == 2); 3840 match(Set dst (DivVD dst src)); 3841 format %{ "divpd $dst,$src\t! div packed2D" %} 3842 ins_encode %{ 3843 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3844 %} 3845 ins_pipe( pipe_slow ); 3846%} 3847 3848instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3849 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3850 match(Set dst (DivVD src1 src2)); 3851 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3852 ins_encode %{ 3853 bool vector256 = false; 3854 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3855 %} 3856 ins_pipe( pipe_slow ); 3857%} 3858 3859instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3860 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3861 match(Set dst (DivVD src (LoadVector mem))); 3862 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3863 ins_encode %{ 3864 bool vector256 = false; 3865 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3866 %} 3867 ins_pipe( pipe_slow ); 3868%} 3869 3870instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3871 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3872 match(Set dst (DivVD src1 src2)); 3873 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3874 ins_encode %{ 3875 bool vector256 = true; 3876 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3877 %} 3878 ins_pipe( pipe_slow ); 3879%} 3880 3881instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3882 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3883 match(Set dst (DivVD src (LoadVector mem))); 3884 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3885 ins_encode %{ 3886 bool vector256 = true; 3887 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3888 %} 3889 ins_pipe( pipe_slow ); 3890%} 3891 3892// ------------------------------ Shift --------------------------------------- 3893 3894// Left and right shift count vectors are the same on x86 3895// (only lowest bits of xmm reg are used for count). 3896instruct vshiftcnt(vecS dst, rRegI cnt) %{ 3897 match(Set dst (LShiftCntV cnt)); 3898 match(Set dst (RShiftCntV cnt)); 3899 format %{ "movd $dst,$cnt\t! load shift count" %} 3900 ins_encode %{ 3901 __ movdl($dst$$XMMRegister, $cnt$$Register); 3902 %} 3903 ins_pipe( pipe_slow ); 3904%} 3905 3906// ------------------------------ LeftShift ----------------------------------- 3907 3908// Shorts/Chars vector left shift 3909instruct vsll2S(vecS dst, vecS shift) %{ 3910 predicate(n->as_Vector()->length() == 2); 3911 match(Set dst (LShiftVS dst shift)); 3912 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3913 ins_encode %{ 3914 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3915 %} 3916 ins_pipe( pipe_slow ); 3917%} 3918 3919instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3920 predicate(n->as_Vector()->length() == 2); 3921 match(Set dst (LShiftVS dst shift)); 3922 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3923 ins_encode %{ 3924 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3925 %} 3926 ins_pipe( pipe_slow ); 3927%} 3928 3929instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 3930 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3931 match(Set dst (LShiftVS src shift)); 3932 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3933 ins_encode %{ 3934 bool vector256 = false; 3935 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3936 %} 3937 ins_pipe( pipe_slow ); 3938%} 3939 3940instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3941 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3942 match(Set dst (LShiftVS src shift)); 3943 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3944 ins_encode %{ 3945 bool vector256 = false; 3946 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3947 %} 3948 ins_pipe( pipe_slow ); 3949%} 3950 3951instruct vsll4S(vecD dst, vecS shift) %{ 3952 predicate(n->as_Vector()->length() == 4); 3953 match(Set dst (LShiftVS dst shift)); 3954 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3955 ins_encode %{ 3956 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3957 %} 3958 ins_pipe( pipe_slow ); 3959%} 3960 3961instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3962 predicate(n->as_Vector()->length() == 4); 3963 match(Set dst (LShiftVS dst shift)); 3964 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3965 ins_encode %{ 3966 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3967 %} 3968 ins_pipe( pipe_slow ); 3969%} 3970 3971instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 3972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3973 match(Set dst (LShiftVS src shift)); 3974 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3975 ins_encode %{ 3976 bool vector256 = false; 3977 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3978 %} 3979 ins_pipe( pipe_slow ); 3980%} 3981 3982instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3983 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3984 match(Set dst (LShiftVS src shift)); 3985 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3986 ins_encode %{ 3987 bool vector256 = false; 3988 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3989 %} 3990 ins_pipe( pipe_slow ); 3991%} 3992 3993instruct vsll8S(vecX dst, vecS shift) %{ 3994 predicate(n->as_Vector()->length() == 8); 3995 match(Set dst (LShiftVS dst shift)); 3996 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3997 ins_encode %{ 3998 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3999 %} 4000 ins_pipe( pipe_slow ); 4001%} 4002 4003instruct vsll8S_imm(vecX dst, immI8 shift) %{ 4004 predicate(n->as_Vector()->length() == 8); 4005 match(Set dst (LShiftVS dst shift)); 4006 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 4007 ins_encode %{ 4008 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 4009 %} 4010 ins_pipe( pipe_slow ); 4011%} 4012 4013instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 4014 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4015 match(Set dst (LShiftVS src shift)); 4016 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4017 ins_encode %{ 4018 bool vector256 = false; 4019 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4020 %} 4021 ins_pipe( pipe_slow ); 4022%} 4023 4024instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4025 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4026 match(Set dst (LShiftVS src shift)); 4027 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4028 ins_encode %{ 4029 bool vector256 = false; 4030 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4031 %} 4032 ins_pipe( pipe_slow ); 4033%} 4034 4035instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 4036 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4037 match(Set dst (LShiftVS src shift)); 4038 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4039 ins_encode %{ 4040 bool vector256 = true; 4041 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4042 %} 4043 ins_pipe( pipe_slow ); 4044%} 4045 4046instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4047 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4048 match(Set dst (LShiftVS src shift)); 4049 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4050 ins_encode %{ 4051 bool vector256 = true; 4052 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4053 %} 4054 ins_pipe( pipe_slow ); 4055%} 4056 4057// Integers vector left shift 4058instruct vsll2I(vecD dst, vecS shift) %{ 4059 predicate(n->as_Vector()->length() == 2); 4060 match(Set dst (LShiftVI dst shift)); 4061 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4062 ins_encode %{ 4063 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4064 %} 4065 ins_pipe( pipe_slow ); 4066%} 4067 4068instruct vsll2I_imm(vecD dst, immI8 shift) %{ 4069 predicate(n->as_Vector()->length() == 2); 4070 match(Set dst (LShiftVI dst shift)); 4071 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4072 ins_encode %{ 4073 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4074 %} 4075 ins_pipe( pipe_slow ); 4076%} 4077 4078instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 4079 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4080 match(Set dst (LShiftVI src shift)); 4081 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4082 ins_encode %{ 4083 bool vector256 = false; 4084 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4085 %} 4086 ins_pipe( pipe_slow ); 4087%} 4088 4089instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4090 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4091 match(Set dst (LShiftVI src shift)); 4092 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4093 ins_encode %{ 4094 bool vector256 = false; 4095 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4096 %} 4097 ins_pipe( pipe_slow ); 4098%} 4099 4100instruct vsll4I(vecX dst, vecS shift) %{ 4101 predicate(n->as_Vector()->length() == 4); 4102 match(Set dst (LShiftVI dst shift)); 4103 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4104 ins_encode %{ 4105 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4106 %} 4107 ins_pipe( pipe_slow ); 4108%} 4109 4110instruct vsll4I_imm(vecX dst, immI8 shift) %{ 4111 predicate(n->as_Vector()->length() == 4); 4112 match(Set dst (LShiftVI dst shift)); 4113 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4114 ins_encode %{ 4115 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4116 %} 4117 ins_pipe( pipe_slow ); 4118%} 4119 4120instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 4121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4122 match(Set dst (LShiftVI src shift)); 4123 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4124 ins_encode %{ 4125 bool vector256 = false; 4126 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4127 %} 4128 ins_pipe( pipe_slow ); 4129%} 4130 4131instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4132 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4133 match(Set dst (LShiftVI src shift)); 4134 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4135 ins_encode %{ 4136 bool vector256 = false; 4137 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4138 %} 4139 ins_pipe( pipe_slow ); 4140%} 4141 4142instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 4143 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4144 match(Set dst (LShiftVI src shift)); 4145 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4146 ins_encode %{ 4147 bool vector256 = true; 4148 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4149 %} 4150 ins_pipe( pipe_slow ); 4151%} 4152 4153instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4154 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4155 match(Set dst (LShiftVI src shift)); 4156 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4157 ins_encode %{ 4158 bool vector256 = true; 4159 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4160 %} 4161 ins_pipe( pipe_slow ); 4162%} 4163 4164// Longs vector left shift 4165instruct vsll2L(vecX dst, vecS shift) %{ 4166 predicate(n->as_Vector()->length() == 2); 4167 match(Set dst (LShiftVL dst shift)); 4168 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4169 ins_encode %{ 4170 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4171 %} 4172 ins_pipe( pipe_slow ); 4173%} 4174 4175instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4176 predicate(n->as_Vector()->length() == 2); 4177 match(Set dst (LShiftVL dst shift)); 4178 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4179 ins_encode %{ 4180 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4181 %} 4182 ins_pipe( pipe_slow ); 4183%} 4184 4185instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 4186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4187 match(Set dst (LShiftVL src shift)); 4188 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4189 ins_encode %{ 4190 bool vector256 = false; 4191 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4192 %} 4193 ins_pipe( pipe_slow ); 4194%} 4195 4196instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4197 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4198 match(Set dst (LShiftVL src shift)); 4199 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4200 ins_encode %{ 4201 bool vector256 = false; 4202 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4203 %} 4204 ins_pipe( pipe_slow ); 4205%} 4206 4207instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 4208 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4209 match(Set dst (LShiftVL src shift)); 4210 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4211 ins_encode %{ 4212 bool vector256 = true; 4213 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4214 %} 4215 ins_pipe( pipe_slow ); 4216%} 4217 4218instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4219 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4220 match(Set dst (LShiftVL src shift)); 4221 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4222 ins_encode %{ 4223 bool vector256 = true; 4224 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4225 %} 4226 ins_pipe( pipe_slow ); 4227%} 4228 4229// ----------------------- LogicalRightShift ----------------------------------- 4230 4231// Shorts vector logical right shift produces incorrect Java result 4232// for negative data because java code convert short value into int with 4233// sign extension before a shift. But char vectors are fine since chars are 4234// unsigned values. 4235 4236instruct vsrl2S(vecS dst, vecS shift) %{ 4237 predicate(n->as_Vector()->length() == 2); 4238 match(Set dst (URShiftVS dst shift)); 4239 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4240 ins_encode %{ 4241 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4242 %} 4243 ins_pipe( pipe_slow ); 4244%} 4245 4246instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 4247 predicate(n->as_Vector()->length() == 2); 4248 match(Set dst (URShiftVS dst shift)); 4249 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4250 ins_encode %{ 4251 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4252 %} 4253 ins_pipe( pipe_slow ); 4254%} 4255 4256instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 4257 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4258 match(Set dst (URShiftVS src shift)); 4259 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4260 ins_encode %{ 4261 bool vector256 = false; 4262 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4263 %} 4264 ins_pipe( pipe_slow ); 4265%} 4266 4267instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4268 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4269 match(Set dst (URShiftVS src shift)); 4270 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4271 ins_encode %{ 4272 bool vector256 = false; 4273 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4274 %} 4275 ins_pipe( pipe_slow ); 4276%} 4277 4278instruct vsrl4S(vecD dst, vecS shift) %{ 4279 predicate(n->as_Vector()->length() == 4); 4280 match(Set dst (URShiftVS dst shift)); 4281 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4282 ins_encode %{ 4283 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4284 %} 4285 ins_pipe( pipe_slow ); 4286%} 4287 4288instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 4289 predicate(n->as_Vector()->length() == 4); 4290 match(Set dst (URShiftVS dst shift)); 4291 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4292 ins_encode %{ 4293 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4294 %} 4295 ins_pipe( pipe_slow ); 4296%} 4297 4298instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 4299 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4300 match(Set dst (URShiftVS src shift)); 4301 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4302 ins_encode %{ 4303 bool vector256 = false; 4304 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4305 %} 4306 ins_pipe( pipe_slow ); 4307%} 4308 4309instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4310 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4311 match(Set dst (URShiftVS src shift)); 4312 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4313 ins_encode %{ 4314 bool vector256 = false; 4315 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4316 %} 4317 ins_pipe( pipe_slow ); 4318%} 4319 4320instruct vsrl8S(vecX dst, vecS shift) %{ 4321 predicate(n->as_Vector()->length() == 8); 4322 match(Set dst (URShiftVS dst shift)); 4323 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4324 ins_encode %{ 4325 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4326 %} 4327 ins_pipe( pipe_slow ); 4328%} 4329 4330instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 4331 predicate(n->as_Vector()->length() == 8); 4332 match(Set dst (URShiftVS dst shift)); 4333 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4334 ins_encode %{ 4335 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4336 %} 4337 ins_pipe( pipe_slow ); 4338%} 4339 4340instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 4341 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4342 match(Set dst (URShiftVS src shift)); 4343 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4344 ins_encode %{ 4345 bool vector256 = false; 4346 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4347 %} 4348 ins_pipe( pipe_slow ); 4349%} 4350 4351instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4352 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4353 match(Set dst (URShiftVS src shift)); 4354 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4355 ins_encode %{ 4356 bool vector256 = false; 4357 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4358 %} 4359 ins_pipe( pipe_slow ); 4360%} 4361 4362instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 4363 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4364 match(Set dst (URShiftVS src shift)); 4365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4366 ins_encode %{ 4367 bool vector256 = true; 4368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4369 %} 4370 ins_pipe( pipe_slow ); 4371%} 4372 4373instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4374 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4375 match(Set dst (URShiftVS src shift)); 4376 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4377 ins_encode %{ 4378 bool vector256 = true; 4379 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4380 %} 4381 ins_pipe( pipe_slow ); 4382%} 4383 4384// Integers vector logical right shift 4385instruct vsrl2I(vecD dst, vecS shift) %{ 4386 predicate(n->as_Vector()->length() == 2); 4387 match(Set dst (URShiftVI dst shift)); 4388 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4389 ins_encode %{ 4390 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4391 %} 4392 ins_pipe( pipe_slow ); 4393%} 4394 4395instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4396 predicate(n->as_Vector()->length() == 2); 4397 match(Set dst (URShiftVI dst shift)); 4398 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4399 ins_encode %{ 4400 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4401 %} 4402 ins_pipe( pipe_slow ); 4403%} 4404 4405instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 4406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4407 match(Set dst (URShiftVI src shift)); 4408 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4409 ins_encode %{ 4410 bool vector256 = false; 4411 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4412 %} 4413 ins_pipe( pipe_slow ); 4414%} 4415 4416instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4418 match(Set dst (URShiftVI src shift)); 4419 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4420 ins_encode %{ 4421 bool vector256 = false; 4422 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4423 %} 4424 ins_pipe( pipe_slow ); 4425%} 4426 4427instruct vsrl4I(vecX dst, vecS shift) %{ 4428 predicate(n->as_Vector()->length() == 4); 4429 match(Set dst (URShiftVI dst shift)); 4430 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4431 ins_encode %{ 4432 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4433 %} 4434 ins_pipe( pipe_slow ); 4435%} 4436 4437instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4438 predicate(n->as_Vector()->length() == 4); 4439 match(Set dst (URShiftVI dst shift)); 4440 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4441 ins_encode %{ 4442 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4443 %} 4444 ins_pipe( pipe_slow ); 4445%} 4446 4447instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 4448 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4449 match(Set dst (URShiftVI src shift)); 4450 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4451 ins_encode %{ 4452 bool vector256 = false; 4453 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4454 %} 4455 ins_pipe( pipe_slow ); 4456%} 4457 4458instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4459 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4460 match(Set dst (URShiftVI src shift)); 4461 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4462 ins_encode %{ 4463 bool vector256 = false; 4464 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4465 %} 4466 ins_pipe( pipe_slow ); 4467%} 4468 4469instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 4470 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4471 match(Set dst (URShiftVI src shift)); 4472 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4473 ins_encode %{ 4474 bool vector256 = true; 4475 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4476 %} 4477 ins_pipe( pipe_slow ); 4478%} 4479 4480instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4481 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4482 match(Set dst (URShiftVI src shift)); 4483 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4484 ins_encode %{ 4485 bool vector256 = true; 4486 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4487 %} 4488 ins_pipe( pipe_slow ); 4489%} 4490 4491// Longs vector logical right shift 4492instruct vsrl2L(vecX dst, vecS shift) %{ 4493 predicate(n->as_Vector()->length() == 2); 4494 match(Set dst (URShiftVL dst shift)); 4495 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4496 ins_encode %{ 4497 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4498 %} 4499 ins_pipe( pipe_slow ); 4500%} 4501 4502instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4503 predicate(n->as_Vector()->length() == 2); 4504 match(Set dst (URShiftVL dst shift)); 4505 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4506 ins_encode %{ 4507 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4508 %} 4509 ins_pipe( pipe_slow ); 4510%} 4511 4512instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 4513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4514 match(Set dst (URShiftVL src shift)); 4515 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4516 ins_encode %{ 4517 bool vector256 = false; 4518 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4519 %} 4520 ins_pipe( pipe_slow ); 4521%} 4522 4523instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4525 match(Set dst (URShiftVL src shift)); 4526 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4527 ins_encode %{ 4528 bool vector256 = false; 4529 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4530 %} 4531 ins_pipe( pipe_slow ); 4532%} 4533 4534instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 4535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4536 match(Set dst (URShiftVL src shift)); 4537 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4538 ins_encode %{ 4539 bool vector256 = true; 4540 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4541 %} 4542 ins_pipe( pipe_slow ); 4543%} 4544 4545instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4546 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4547 match(Set dst (URShiftVL src shift)); 4548 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4549 ins_encode %{ 4550 bool vector256 = true; 4551 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4552 %} 4553 ins_pipe( pipe_slow ); 4554%} 4555 4556// ------------------- ArithmeticRightShift ----------------------------------- 4557 4558// Shorts/Chars vector arithmetic right shift 4559instruct vsra2S(vecS dst, vecS shift) %{ 4560 predicate(n->as_Vector()->length() == 2); 4561 match(Set dst (RShiftVS dst shift)); 4562 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4563 ins_encode %{ 4564 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4565 %} 4566 ins_pipe( pipe_slow ); 4567%} 4568 4569instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4570 predicate(n->as_Vector()->length() == 2); 4571 match(Set dst (RShiftVS dst shift)); 4572 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4573 ins_encode %{ 4574 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4575 %} 4576 ins_pipe( pipe_slow ); 4577%} 4578 4579instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 4580 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4581 match(Set dst (RShiftVS src shift)); 4582 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4583 ins_encode %{ 4584 bool vector256 = false; 4585 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4586 %} 4587 ins_pipe( pipe_slow ); 4588%} 4589 4590instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4591 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4592 match(Set dst (RShiftVS src shift)); 4593 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4594 ins_encode %{ 4595 bool vector256 = false; 4596 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4597 %} 4598 ins_pipe( pipe_slow ); 4599%} 4600 4601instruct vsra4S(vecD dst, vecS shift) %{ 4602 predicate(n->as_Vector()->length() == 4); 4603 match(Set dst (RShiftVS dst shift)); 4604 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4605 ins_encode %{ 4606 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4607 %} 4608 ins_pipe( pipe_slow ); 4609%} 4610 4611instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4612 predicate(n->as_Vector()->length() == 4); 4613 match(Set dst (RShiftVS dst shift)); 4614 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4615 ins_encode %{ 4616 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4617 %} 4618 ins_pipe( pipe_slow ); 4619%} 4620 4621instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 4622 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4623 match(Set dst (RShiftVS src shift)); 4624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4625 ins_encode %{ 4626 bool vector256 = false; 4627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4628 %} 4629 ins_pipe( pipe_slow ); 4630%} 4631 4632instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4633 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4634 match(Set dst (RShiftVS src shift)); 4635 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4636 ins_encode %{ 4637 bool vector256 = false; 4638 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4639 %} 4640 ins_pipe( pipe_slow ); 4641%} 4642 4643instruct vsra8S(vecX dst, vecS shift) %{ 4644 predicate(n->as_Vector()->length() == 8); 4645 match(Set dst (RShiftVS dst shift)); 4646 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4647 ins_encode %{ 4648 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4649 %} 4650 ins_pipe( pipe_slow ); 4651%} 4652 4653instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4654 predicate(n->as_Vector()->length() == 8); 4655 match(Set dst (RShiftVS dst shift)); 4656 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4657 ins_encode %{ 4658 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4659 %} 4660 ins_pipe( pipe_slow ); 4661%} 4662 4663instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 4664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4665 match(Set dst (RShiftVS src shift)); 4666 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4667 ins_encode %{ 4668 bool vector256 = false; 4669 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4670 %} 4671 ins_pipe( pipe_slow ); 4672%} 4673 4674instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4675 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4676 match(Set dst (RShiftVS src shift)); 4677 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4678 ins_encode %{ 4679 bool vector256 = false; 4680 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4681 %} 4682 ins_pipe( pipe_slow ); 4683%} 4684 4685instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 4686 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4687 match(Set dst (RShiftVS src shift)); 4688 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4689 ins_encode %{ 4690 bool vector256 = true; 4691 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4692 %} 4693 ins_pipe( pipe_slow ); 4694%} 4695 4696instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4697 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4698 match(Set dst (RShiftVS src shift)); 4699 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4700 ins_encode %{ 4701 bool vector256 = true; 4702 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4703 %} 4704 ins_pipe( pipe_slow ); 4705%} 4706 4707// Integers vector arithmetic right shift 4708instruct vsra2I(vecD dst, vecS shift) %{ 4709 predicate(n->as_Vector()->length() == 2); 4710 match(Set dst (RShiftVI dst shift)); 4711 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4712 ins_encode %{ 4713 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4714 %} 4715 ins_pipe( pipe_slow ); 4716%} 4717 4718instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4719 predicate(n->as_Vector()->length() == 2); 4720 match(Set dst (RShiftVI dst shift)); 4721 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4722 ins_encode %{ 4723 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4724 %} 4725 ins_pipe( pipe_slow ); 4726%} 4727 4728instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 4729 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4730 match(Set dst (RShiftVI src shift)); 4731 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4732 ins_encode %{ 4733 bool vector256 = false; 4734 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4735 %} 4736 ins_pipe( pipe_slow ); 4737%} 4738 4739instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4740 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4741 match(Set dst (RShiftVI src shift)); 4742 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4743 ins_encode %{ 4744 bool vector256 = false; 4745 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4746 %} 4747 ins_pipe( pipe_slow ); 4748%} 4749 4750instruct vsra4I(vecX dst, vecS shift) %{ 4751 predicate(n->as_Vector()->length() == 4); 4752 match(Set dst (RShiftVI dst shift)); 4753 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4754 ins_encode %{ 4755 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4756 %} 4757 ins_pipe( pipe_slow ); 4758%} 4759 4760instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4761 predicate(n->as_Vector()->length() == 4); 4762 match(Set dst (RShiftVI dst shift)); 4763 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4764 ins_encode %{ 4765 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4766 %} 4767 ins_pipe( pipe_slow ); 4768%} 4769 4770instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 4771 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4772 match(Set dst (RShiftVI src shift)); 4773 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4774 ins_encode %{ 4775 bool vector256 = false; 4776 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4777 %} 4778 ins_pipe( pipe_slow ); 4779%} 4780 4781instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4782 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4783 match(Set dst (RShiftVI src shift)); 4784 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4785 ins_encode %{ 4786 bool vector256 = false; 4787 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4788 %} 4789 ins_pipe( pipe_slow ); 4790%} 4791 4792instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 4793 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4794 match(Set dst (RShiftVI src shift)); 4795 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4796 ins_encode %{ 4797 bool vector256 = true; 4798 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4799 %} 4800 ins_pipe( pipe_slow ); 4801%} 4802 4803instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4804 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4805 match(Set dst (RShiftVI src shift)); 4806 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4807 ins_encode %{ 4808 bool vector256 = true; 4809 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4810 %} 4811 ins_pipe( pipe_slow ); 4812%} 4813 4814// There are no longs vector arithmetic right shift instructions. 4815 4816 4817// --------------------------------- AND -------------------------------------- 4818 4819instruct vand4B(vecS dst, vecS src) %{ 4820 predicate(n->as_Vector()->length_in_bytes() == 4); 4821 match(Set dst (AndV dst src)); 4822 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4823 ins_encode %{ 4824 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4825 %} 4826 ins_pipe( pipe_slow ); 4827%} 4828 4829instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4830 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4831 match(Set dst (AndV src1 src2)); 4832 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4833 ins_encode %{ 4834 bool vector256 = false; 4835 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4836 %} 4837 ins_pipe( pipe_slow ); 4838%} 4839 4840instruct vand8B(vecD dst, vecD src) %{ 4841 predicate(n->as_Vector()->length_in_bytes() == 8); 4842 match(Set dst (AndV dst src)); 4843 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4844 ins_encode %{ 4845 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4846 %} 4847 ins_pipe( pipe_slow ); 4848%} 4849 4850instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4851 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4852 match(Set dst (AndV src1 src2)); 4853 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4854 ins_encode %{ 4855 bool vector256 = false; 4856 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4857 %} 4858 ins_pipe( pipe_slow ); 4859%} 4860 4861instruct vand16B(vecX dst, vecX src) %{ 4862 predicate(n->as_Vector()->length_in_bytes() == 16); 4863 match(Set dst (AndV dst src)); 4864 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4865 ins_encode %{ 4866 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4867 %} 4868 ins_pipe( pipe_slow ); 4869%} 4870 4871instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4872 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4873 match(Set dst (AndV src1 src2)); 4874 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4875 ins_encode %{ 4876 bool vector256 = false; 4877 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4878 %} 4879 ins_pipe( pipe_slow ); 4880%} 4881 4882instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4883 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4884 match(Set dst (AndV src (LoadVector mem))); 4885 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4886 ins_encode %{ 4887 bool vector256 = false; 4888 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4889 %} 4890 ins_pipe( pipe_slow ); 4891%} 4892 4893instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4894 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4895 match(Set dst (AndV src1 src2)); 4896 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4897 ins_encode %{ 4898 bool vector256 = true; 4899 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4900 %} 4901 ins_pipe( pipe_slow ); 4902%} 4903 4904instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4905 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4906 match(Set dst (AndV src (LoadVector mem))); 4907 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4908 ins_encode %{ 4909 bool vector256 = true; 4910 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4911 %} 4912 ins_pipe( pipe_slow ); 4913%} 4914 4915// --------------------------------- OR --------------------------------------- 4916 4917instruct vor4B(vecS dst, vecS src) %{ 4918 predicate(n->as_Vector()->length_in_bytes() == 4); 4919 match(Set dst (OrV dst src)); 4920 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4921 ins_encode %{ 4922 __ por($dst$$XMMRegister, $src$$XMMRegister); 4923 %} 4924 ins_pipe( pipe_slow ); 4925%} 4926 4927instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4928 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4929 match(Set dst (OrV src1 src2)); 4930 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4931 ins_encode %{ 4932 bool vector256 = false; 4933 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4934 %} 4935 ins_pipe( pipe_slow ); 4936%} 4937 4938instruct vor8B(vecD dst, vecD src) %{ 4939 predicate(n->as_Vector()->length_in_bytes() == 8); 4940 match(Set dst (OrV dst src)); 4941 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4942 ins_encode %{ 4943 __ por($dst$$XMMRegister, $src$$XMMRegister); 4944 %} 4945 ins_pipe( pipe_slow ); 4946%} 4947 4948instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4949 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4950 match(Set dst (OrV src1 src2)); 4951 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4952 ins_encode %{ 4953 bool vector256 = false; 4954 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4955 %} 4956 ins_pipe( pipe_slow ); 4957%} 4958 4959instruct vor16B(vecX dst, vecX src) %{ 4960 predicate(n->as_Vector()->length_in_bytes() == 16); 4961 match(Set dst (OrV dst src)); 4962 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4963 ins_encode %{ 4964 __ por($dst$$XMMRegister, $src$$XMMRegister); 4965 %} 4966 ins_pipe( pipe_slow ); 4967%} 4968 4969instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4970 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4971 match(Set dst (OrV src1 src2)); 4972 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4973 ins_encode %{ 4974 bool vector256 = false; 4975 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4976 %} 4977 ins_pipe( pipe_slow ); 4978%} 4979 4980instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4981 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4982 match(Set dst (OrV src (LoadVector mem))); 4983 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4984 ins_encode %{ 4985 bool vector256 = false; 4986 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4987 %} 4988 ins_pipe( pipe_slow ); 4989%} 4990 4991instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4992 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4993 match(Set dst (OrV src1 src2)); 4994 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4995 ins_encode %{ 4996 bool vector256 = true; 4997 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4998 %} 4999 ins_pipe( pipe_slow ); 5000%} 5001 5002instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 5003 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5004 match(Set dst (OrV src (LoadVector mem))); 5005 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 5006 ins_encode %{ 5007 bool vector256 = true; 5008 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5009 %} 5010 ins_pipe( pipe_slow ); 5011%} 5012 5013// --------------------------------- XOR -------------------------------------- 5014 5015instruct vxor4B(vecS dst, vecS src) %{ 5016 predicate(n->as_Vector()->length_in_bytes() == 4); 5017 match(Set dst (XorV dst src)); 5018 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 5019 ins_encode %{ 5020 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5021 %} 5022 ins_pipe( pipe_slow ); 5023%} 5024 5025instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5026 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5027 match(Set dst (XorV src1 src2)); 5028 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 5029 ins_encode %{ 5030 bool vector256 = false; 5031 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5032 %} 5033 ins_pipe( pipe_slow ); 5034%} 5035 5036instruct vxor8B(vecD dst, vecD src) %{ 5037 predicate(n->as_Vector()->length_in_bytes() == 8); 5038 match(Set dst (XorV dst src)); 5039 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 5040 ins_encode %{ 5041 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5042 %} 5043 ins_pipe( pipe_slow ); 5044%} 5045 5046instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5047 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5048 match(Set dst (XorV src1 src2)); 5049 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 5050 ins_encode %{ 5051 bool vector256 = false; 5052 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5053 %} 5054 ins_pipe( pipe_slow ); 5055%} 5056 5057instruct vxor16B(vecX dst, vecX src) %{ 5058 predicate(n->as_Vector()->length_in_bytes() == 16); 5059 match(Set dst (XorV dst src)); 5060 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 5061 ins_encode %{ 5062 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5063 %} 5064 ins_pipe( pipe_slow ); 5065%} 5066 5067instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5068 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5069 match(Set dst (XorV src1 src2)); 5070 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 5071 ins_encode %{ 5072 bool vector256 = false; 5073 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5074 %} 5075 ins_pipe( pipe_slow ); 5076%} 5077 5078instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 5079 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5080 match(Set dst (XorV src (LoadVector mem))); 5081 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 5082 ins_encode %{ 5083 bool vector256 = false; 5084 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5085 %} 5086 ins_pipe( pipe_slow ); 5087%} 5088 5089instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5090 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5091 match(Set dst (XorV src1 src2)); 5092 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 5093 ins_encode %{ 5094 bool vector256 = true; 5095 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5096 %} 5097 ins_pipe( pipe_slow ); 5098%} 5099 5100instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 5101 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5102 match(Set dst (XorV src (LoadVector mem))); 5103 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 5104 ins_encode %{ 5105 bool vector256 = true; 5106 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5107 %} 5108 ins_pipe( pipe_slow ); 5109%} 5110 5111