x86_32.ad revision 9801:80f8be586fae
1// 2// Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// General Registers 63// Previously set EBX, ESI, and EDI as save-on-entry for java code 64// Turn off SOE in java-code due to frequent use of uncommon-traps. 65// Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77// Float registers. We treat TOS/FPR0 special. It is invisible to the 78// allocator, and only shows up in the encodings. 79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81// Ok so here's the trick FPR1 is really st(0) except in the midst 82// of emission of assembly for a machnode. During the emission the fpu stack 83// is pushed making FPR1 == st(1) temporarily. However at any safepoint 84// the stack will not have this element so FPR1 == st(0) from the 85// oopMap viewpoint. This same weirdness with numbering causes 86// instruction encoding to have to play games with the register 87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88// where it does flt->flt moves to see an example 89// 90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104// 105// Empty fill registers, which are never used, but supply alignment to xmm regs 106// 107reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116// Specify priority of register selection within phases of register 117// allocation. Highest priority is first. A useful heuristic is to 118// give registers a low priority when they are required by machine 119// instructions, like EAX and EDX. Registers which are used as 120// pairs must fall on an even boundary (witness the FPR#L's in this list). 121// For the Intel integer registers, the equivalent Long pairs are 122// EDX:EAX, EBX:ECX, and EDI:EBP. 123alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130//----------Architecture Description Register Classes-------------------------- 131// Several register classes are automatically defined based upon information in 132// this architecture description. 133// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137// 138// Class for no registers (empty set). 139reg_class no_reg(); 140 141// Class for all registers 142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143// Class for all registers (excluding EBP) 144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145// Dynamic register class that selects at runtime between register classes 146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150// Class for general registers 151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152// Class for general registers (excluding EBP). 153// This register class can be used for implicit null checks on win95. 154// It is also safe for use by tailjumps (we don't want to allocate in ebp). 155// Used also if the PreserveFramePointer flag is true. 156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157// Dynamic register class that selects between int_reg and int_reg_no_ebp. 158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160// Class of "X" registers 161reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163// Class of registers that can appear in an address with no offset. 164// EBP and ESP require an extra instruction byte for zero offset. 165// Used in fast-unlock 166reg_class p_reg(EDX, EDI, ESI, EBX); 167 168// Class for general registers excluding ECX 169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170// Class for general registers excluding ECX (and EBP) 171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175// Class for general registers excluding EAX 176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178// Class for general registers excluding EAX and EBX. 179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180// Class for general registers excluding EAX and EBX (and EBP) 181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185// Class of EAX (for multiply and divide operations) 186reg_class eax_reg(EAX); 187 188// Class of EBX (for atomic add) 189reg_class ebx_reg(EBX); 190 191// Class of ECX (for shift and JCXZ operations and cmpLTMask) 192reg_class ecx_reg(ECX); 193 194// Class of EDX (for multiply and divide operations) 195reg_class edx_reg(EDX); 196 197// Class of EDI (for synchronization) 198reg_class edi_reg(EDI); 199 200// Class of ESI (for synchronization) 201reg_class esi_reg(ESI); 202 203// Singleton class for stack pointer 204reg_class sp_reg(ESP); 205 206// Singleton class for instruction pointer 207// reg_class ip_reg(EIP); 208 209// Class of integer register pairs 210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211// Class of integer register pairs (excluding EBP and EDI); 212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213// Dynamic register class that selects between long_reg and long_reg_no_ebp. 214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216// Class of integer register pairs that aligns with calling convention 217reg_class eadx_reg( EAX,EDX ); 218reg_class ebcx_reg( ECX,EBX ); 219 220// Not AX or DX, used in divides 221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222// Not AX or DX (and neither EBP), used in divides 223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227// Floating point registers. Notice FPR0 is not a choice. 228// FPR0 is not ever allocated; we use clever encodings to fake 229// a 2-address instructions out of Intels FP stack. 230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236reg_class fp_flt_reg0( FPR1L ); 237reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242%} 243 244 245//----------SOURCE BLOCK------------------------------------------------------- 246// This is a block of C++ code which provides values, functions, and 247// definitions necessary in the rest of the architecture description 248source_hpp %{ 249// Must be visible to the DFA in dfa_x86_32.cpp 250extern bool is_operand_hi32_zero(Node* n); 251%} 252 253source %{ 254#define RELOC_IMM32 Assembler::imm_operand 255#define RELOC_DISP32 Assembler::disp32_operand 256 257#define __ _masm. 258 259// How to find the high register of a Long pair, given the low register 260#define HIGH_FROM_LOW(x) ((x)+2) 261 262// These masks are used to provide 128-bit aligned bitmasks to the XMM 263// instructions, to allow sign-masking or sign-bit flipping. They allow 264// fast versions of NegF/NegD and AbsF/AbsD. 265 266// Note: 'double' and 'long long' have 32-bits alignment on x86. 267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275} 276 277// Buffer for 128-bits masks used by SSE instructions. 278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280// Static initialization during VM startup. 281static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286// Offset hacking within calls. 287static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297} 298 299// !!!!! Special hack to get all type of calls to specify the byte offset 300// from the start of the call to the point where the return address 301// will point. 302int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304} 305 306int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308} 309 310static int sizeof_FFree_Float_Stack_All = -1; 311 312int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315} 316 317// Indicate if the safepoint node needs the polling page as an input. 318// Since x86 does have absolute addressing, it doesn't. 319bool SafePointNode::needs_polling_address_input() { 320 return false; 321} 322 323// 324// Compute padding required for nodes which need alignment 325// 326 327// The address of the call instruction needs to be 4-byte aligned to 328// ensure that it does not span a cache line so that it can be patched. 329int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333} 334 335// The address of the call instruction needs to be 4-byte aligned to 336// ensure that it does not span a cache line so that it can be patched. 337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342} 343 344// EMIT_RM() 345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348} 349 350// EMIT_CC() 351void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354} 355 356// EMIT_OPCODE() 357void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359} 360 361// EMIT_OPCODE() w/ relocation information 362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365} 366 367// EMIT_D8() 368void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370} 371 372// EMIT_D16() 373void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375} 376 377// EMIT_D32() 378void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380} 381 382// emit 32 bit value and construct relocation entry from relocInfo::relocType 383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387} 388 389// emit 32 bit value and construct relocation entry from RelocationHolder 390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392#ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396#endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399} 400 401// Access stack slot for load or store 402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413} 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480} 481 482 483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490} 491 492void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510} 511 512void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520} 521 522 523//============================================================================= 524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528} 529 530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533} 534 535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537} 538 539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541} 542 543#ifndef PRODUCT 544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546} 547#endif 548 549 550//============================================================================= 551#ifndef PRODUCT 552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604#ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609#endif 610 st->cr(); 611} 612#endif 613 614 615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632} 633 634uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636} 637 638int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640} 641 642//============================================================================= 643#ifndef PRODUCT 644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668} 669#endif 670 671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714} 715 716uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737} 738 739int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741} 742 743const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745} 746 747int MachEpilogNode::safepoint_offset() const { return 0; } 748 749//============================================================================= 750 751enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765} 766 767static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772#ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781#endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785} 786 787// Helper for XMM registers. Extra opcode bits, limited syntax. 788static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 if (reg_lo+1 == reg_hi) { // double move? 799 if (is_load) { 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } else { 805 if (is_load) { 806 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 807 } else { 808 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 809 } 810 } 811#ifndef PRODUCT 812 } else if (!do_size) { 813 if (size != 0) st->print("\n\t"); 814 if (reg_lo+1 == reg_hi) { // double move? 815 if (is_load) st->print("%s %s,[ESP + #%d]", 816 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 817 Matcher::regName[reg_lo], offset); 818 else st->print("MOVSD [ESP + #%d],%s", 819 offset, Matcher::regName[reg_lo]); 820 } else { 821 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 822 Matcher::regName[reg_lo], offset); 823 else st->print("MOVSS [ESP + #%d],%s", 824 offset, Matcher::regName[reg_lo]); 825 } 826#endif 827 } 828 bool is_single_byte = false; 829 if ((UseAVX > 2) && (offset != 0)) { 830 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 831 } 832 int offset_size = 0; 833 if (UseAVX > 2 ) { 834 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 835 } else { 836 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 837 } 838 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 840 return size+5+offset_size; 841} 842 843 844static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 845 int src_hi, int dst_hi, int size, outputStream* st ) { 846 if (cbuf) { 847 MacroAssembler _masm(cbuf); 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 850 as_XMMRegister(Matcher::_regEncode[src_lo])); 851 } else { 852 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } 855#ifndef PRODUCT 856 } else if (!do_size) { 857 if (size != 0) st->print("\n\t"); 858 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 859 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 860 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } else { 865 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 866 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } else { 868 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } 870 } 871#endif 872 } 873 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 874 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 875 int sz = (UseAVX > 2) ? 6 : 4; 876 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 877 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 878 return size + sz; 879} 880 881static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 MacroAssembler _masm(cbuf); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888#ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891#endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894} 895 896 897static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904#ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907#endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910} 911 912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916#ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920#endif 921 } 922 return size+2; 923} 924 925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931#ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935#endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953} 954 955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 src_offset += 4; 974 dst_offset += 4; 975 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 case Op_VecX: 980 case Op_VecY: 981 case Op_VecZ: 982 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 983 break; 984 default: 985 ShouldNotReachHere(); 986 } 987 if (cbuf) { 988 MacroAssembler _masm(cbuf); 989 int offset = __ offset(); 990 switch (ireg) { 991 case Op_VecS: 992 __ pushl(Address(rsp, src_offset)); 993 __ popl (Address(rsp, dst_offset)); 994 break; 995 case Op_VecD: 996 __ pushl(Address(rsp, src_offset)); 997 __ popl (Address(rsp, dst_offset)); 998 __ pushl(Address(rsp, src_offset+4)); 999 __ popl (Address(rsp, dst_offset+4)); 1000 break; 1001 case Op_VecX: 1002 __ movdqu(Address(rsp, -16), xmm0); 1003 __ movdqu(xmm0, Address(rsp, src_offset)); 1004 __ movdqu(Address(rsp, dst_offset), xmm0); 1005 __ movdqu(xmm0, Address(rsp, -16)); 1006 break; 1007 case Op_VecY: 1008 __ vmovdqu(Address(rsp, -32), xmm0); 1009 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1010 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1011 __ vmovdqu(xmm0, Address(rsp, -32)); 1012 case Op_VecZ: 1013 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1014 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1015 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1016 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1017 break; 1018 default: 1019 ShouldNotReachHere(); 1020 } 1021 int size = __ offset() - offset; 1022 assert(size == calc_size, "incorrect size calculattion"); 1023 return size; 1024#ifndef PRODUCT 1025 } else if (!do_size) { 1026 switch (ireg) { 1027 case Op_VecS: 1028 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1029 "popl [rsp + #%d]", 1030 src_offset, dst_offset); 1031 break; 1032 case Op_VecD: 1033 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1034 "popq [rsp + #%d]\n\t" 1035 "pushl [rsp + #%d]\n\t" 1036 "popq [rsp + #%d]", 1037 src_offset, dst_offset, src_offset+4, dst_offset+4); 1038 break; 1039 case Op_VecX: 1040 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1041 "movdqu xmm0, [rsp + #%d]\n\t" 1042 "movdqu [rsp + #%d], xmm0\n\t" 1043 "movdqu xmm0, [rsp - #16]", 1044 src_offset, dst_offset); 1045 break; 1046 case Op_VecY: 1047 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1048 "vmovdqu xmm0, [rsp + #%d]\n\t" 1049 "vmovdqu [rsp + #%d], xmm0\n\t" 1050 "vmovdqu xmm0, [rsp - #32]", 1051 src_offset, dst_offset); 1052 case Op_VecZ: 1053 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1054 "vmovdqu xmm0, [rsp + #%d]\n\t" 1055 "vmovdqu [rsp + #%d], xmm0\n\t" 1056 "vmovdqu xmm0, [rsp - #64]", 1057 src_offset, dst_offset); 1058 break; 1059 default: 1060 ShouldNotReachHere(); 1061 } 1062#endif 1063 } 1064 return calc_size; 1065} 1066 1067uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1068 // Get registers to move 1069 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1070 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1071 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1072 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1073 1074 enum RC src_second_rc = rc_class(src_second); 1075 enum RC src_first_rc = rc_class(src_first); 1076 enum RC dst_second_rc = rc_class(dst_second); 1077 enum RC dst_first_rc = rc_class(dst_first); 1078 1079 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1080 1081 // Generate spill code! 1082 int size = 0; 1083 1084 if( src_first == dst_first && src_second == dst_second ) 1085 return size; // Self copy, no move 1086 1087 if (bottom_type()->isa_vect() != NULL) { 1088 uint ireg = ideal_reg(); 1089 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1090 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1091 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1092 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1093 // mem -> mem 1094 int src_offset = ra_->reg2offset(src_first); 1095 int dst_offset = ra_->reg2offset(dst_first); 1096 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1097 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1098 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1099 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1100 int stack_offset = ra_->reg2offset(dst_first); 1101 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1102 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1103 int stack_offset = ra_->reg2offset(src_first); 1104 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1105 } else { 1106 ShouldNotReachHere(); 1107 } 1108 } 1109 1110 // -------------------------------------- 1111 // Check for mem-mem move. push/pop to move. 1112 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1113 if( src_second == dst_first ) { // overlapping stack copy ranges 1114 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1115 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1116 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1117 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1118 } 1119 // move low bits 1120 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1121 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1122 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1125 } 1126 return size; 1127 } 1128 1129 // -------------------------------------- 1130 // Check for integer reg-reg copy 1131 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1132 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1133 1134 // Check for integer store 1135 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1137 1138 // Check for integer load 1139 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1140 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1141 1142 // Check for integer reg-xmm reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1144 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1145 "no 64 bit integer-float reg moves" ); 1146 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1147 } 1148 // -------------------------------------- 1149 // Check for float reg-reg copy 1150 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1151 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1152 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1153 if( cbuf ) { 1154 1155 // Note the mucking with the register encode to compensate for the 0/1 1156 // indexing issue mentioned in a comment in the reg_def sections 1157 // for FPR registers many lines above here. 1158 1159 if( src_first != FPR1L_num ) { 1160 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1161 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1162 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1163 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1164 } else { 1165 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1166 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1167 } 1168#ifndef PRODUCT 1169 } else if( !do_size ) { 1170 if( size != 0 ) st->print("\n\t"); 1171 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1172 else st->print( "FST %s", Matcher::regName[dst_first]); 1173#endif 1174 } 1175 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1176 } 1177 1178 // Check for float store 1179 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1180 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1181 } 1182 1183 // Check for float load 1184 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1185 int offset = ra_->reg2offset(src_first); 1186 const char *op_str; 1187 int op; 1188 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1189 op_str = "FLD_D"; 1190 op = 0xDD; 1191 } else { // 32-bit load 1192 op_str = "FLD_S"; 1193 op = 0xD9; 1194 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1195 } 1196 if( cbuf ) { 1197 emit_opcode (*cbuf, op ); 1198 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1199 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1200 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1201#ifndef PRODUCT 1202 } else if( !do_size ) { 1203 if( size != 0 ) st->print("\n\t"); 1204 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1205#endif 1206 } 1207 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1208 return size + 3+offset_size+2; 1209 } 1210 1211 // Check for xmm reg-reg copy 1212 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1213 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1214 (src_first+1 == src_second && dst_first+1 == dst_second), 1215 "no non-adjacent float-moves" ); 1216 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1217 } 1218 1219 // Check for xmm reg-integer reg copy 1220 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1221 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1222 "no 64 bit float-integer reg moves" ); 1223 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1224 } 1225 1226 // Check for xmm store 1227 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1228 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1229 } 1230 1231 // Check for float xmm load 1232 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1233 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1234 } 1235 1236 // Copy from float reg to xmm reg 1237 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1238 // copy to the top of stack from floating point reg 1239 // and use LEA to preserve flags 1240 if( cbuf ) { 1241 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1242 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1243 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1244 emit_d8(*cbuf,0xF8); 1245#ifndef PRODUCT 1246 } else if( !do_size ) { 1247 if( size != 0 ) st->print("\n\t"); 1248 st->print("LEA ESP,[ESP-8]"); 1249#endif 1250 } 1251 size += 4; 1252 1253 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1254 1255 // Copy from the temp memory to the xmm reg. 1256 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1257 1258 if( cbuf ) { 1259 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1260 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1261 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1262 emit_d8(*cbuf,0x08); 1263#ifndef PRODUCT 1264 } else if( !do_size ) { 1265 if( size != 0 ) st->print("\n\t"); 1266 st->print("LEA ESP,[ESP+8]"); 1267#endif 1268 } 1269 size += 4; 1270 return size; 1271 } 1272 1273 assert( size > 0, "missed a case" ); 1274 1275 // -------------------------------------------------------------------- 1276 // Check for second bits still needing moving. 1277 if( src_second == dst_second ) 1278 return size; // Self copy; no move 1279 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1280 1281 // Check for second word int-int move 1282 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1283 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1284 1285 // Check for second word integer store 1286 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1287 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1288 1289 // Check for second word integer load 1290 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1291 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1292 1293 1294 Unimplemented(); 1295 return 0; // Mute compiler 1296} 1297 1298#ifndef PRODUCT 1299void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1300 implementation( NULL, ra_, false, st ); 1301} 1302#endif 1303 1304void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1305 implementation( &cbuf, ra_, false, NULL ); 1306} 1307 1308uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1309 return implementation( NULL, ra_, true, NULL ); 1310} 1311 1312 1313//============================================================================= 1314#ifndef PRODUCT 1315void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1316 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1317 int reg = ra_->get_reg_first(this); 1318 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1319} 1320#endif 1321 1322void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1323 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1324 int reg = ra_->get_encode(this); 1325 if( offset >= 128 ) { 1326 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1327 emit_rm(cbuf, 0x2, reg, 0x04); 1328 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1329 emit_d32(cbuf, offset); 1330 } 1331 else { 1332 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1333 emit_rm(cbuf, 0x1, reg, 0x04); 1334 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1335 emit_d8(cbuf, offset); 1336 } 1337} 1338 1339uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1340 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1341 if( offset >= 128 ) { 1342 return 7; 1343 } 1344 else { 1345 return 4; 1346 } 1347} 1348 1349//============================================================================= 1350#ifndef PRODUCT 1351void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1352 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1353 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1354 st->print_cr("\tNOP"); 1355 st->print_cr("\tNOP"); 1356 if( !OptoBreakpoint ) 1357 st->print_cr("\tNOP"); 1358} 1359#endif 1360 1361void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1362 MacroAssembler masm(&cbuf); 1363#ifdef ASSERT 1364 uint insts_size = cbuf.insts_size(); 1365#endif 1366 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1367 masm.jump_cc(Assembler::notEqual, 1368 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1369 /* WARNING these NOPs are critical so that verified entry point is properly 1370 aligned for patching by NativeJump::patch_verified_entry() */ 1371 int nops_cnt = 2; 1372 if( !OptoBreakpoint ) // Leave space for int3 1373 nops_cnt += 1; 1374 masm.nop(nops_cnt); 1375 1376 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1377} 1378 1379uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1380 return OptoBreakpoint ? 11 : 12; 1381} 1382 1383 1384//============================================================================= 1385 1386int Matcher::regnum_to_fpu_offset(int regnum) { 1387 return regnum - 32; // The FP registers are in the second chunk 1388} 1389 1390// This is UltraSparc specific, true just means we have fast l2f conversion 1391const bool Matcher::convL2FSupported(void) { 1392 return true; 1393} 1394 1395// Is this branch offset short enough that a short branch can be used? 1396// 1397// NOTE: If the platform does not provide any short branch variants, then 1398// this method should return false for offset 0. 1399bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1400 // The passed offset is relative to address of the branch. 1401 // On 86 a branch displacement is calculated relative to address 1402 // of a next instruction. 1403 offset -= br_size; 1404 1405 // the short version of jmpConUCF2 contains multiple branches, 1406 // making the reach slightly less 1407 if (rule == jmpConUCF2_rule) 1408 return (-126 <= offset && offset <= 125); 1409 return (-128 <= offset && offset <= 127); 1410} 1411 1412const bool Matcher::isSimpleConstant64(jlong value) { 1413 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1414 return false; 1415} 1416 1417// The ecx parameter to rep stos for the ClearArray node is in dwords. 1418const bool Matcher::init_array_count_is_in_bytes = false; 1419 1420// Threshold size for cleararray. 1421const int Matcher::init_array_short_size = 8 * BytesPerLong; 1422 1423// Needs 2 CMOV's for longs. 1424const int Matcher::long_cmove_cost() { return 1; } 1425 1426// No CMOVF/CMOVD with SSE/SSE2 1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429// Does the CPU require late expand (see block.cpp for description of late expand)? 1430const bool Matcher::require_postalloc_expand = false; 1431 1432// Should the Matcher clone shifts on addressing modes, expecting them to 1433// be subsumed into complex addressing expressions or compute them into 1434// registers? True for Intel but false for most RISCs 1435const bool Matcher::clone_shift_expressions = true; 1436 1437// Do we need to mask the count passed to shift instructions or does 1438// the cpu only look at the lower 5/6 bits anyway? 1439const bool Matcher::need_masked_shift_count = false; 1440 1441bool Matcher::narrow_oop_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444} 1445 1446bool Matcher::narrow_klass_use_complex_address() { 1447 ShouldNotCallThis(); 1448 return true; 1449} 1450 1451 1452// Is it better to copy float constants, or load them directly from memory? 1453// Intel can load a float constant from a direct address, requiring no 1454// extra registers. Most RISCs will have to materialize an address into a 1455// register first, so they would do better to copy the constant from stack. 1456const bool Matcher::rematerialize_float_constants = true; 1457 1458// If CPU can load and store mis-aligned doubles directly then no fixup is 1459// needed. Else we split the double into 2 integer pieces and move it 1460// piece-by-piece. Only happens when passing doubles into C code as the 1461// Java calling convention forces doubles to be aligned. 1462const bool Matcher::misaligned_doubles_ok = true; 1463 1464 1465void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1466 // Get the memory operand from the node 1467 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1468 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1469 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1470 uint opcnt = 1; // First operand 1471 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1472 while( idx >= skipped+num_edges ) { 1473 skipped += num_edges; 1474 opcnt++; // Bump operand count 1475 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1476 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1477 } 1478 1479 MachOper *memory = node->_opnds[opcnt]; 1480 MachOper *new_memory = NULL; 1481 switch (memory->opcode()) { 1482 case DIRECT: 1483 case INDOFFSET32X: 1484 // No transformation necessary. 1485 return; 1486 case INDIRECT: 1487 new_memory = new indirect_win95_safeOper( ); 1488 break; 1489 case INDOFFSET8: 1490 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1491 break; 1492 case INDOFFSET32: 1493 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1494 break; 1495 case INDINDEXOFFSET: 1496 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1497 break; 1498 case INDINDEXSCALE: 1499 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1500 break; 1501 case INDINDEXSCALEOFFSET: 1502 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1503 break; 1504 case LOAD_LONG_INDIRECT: 1505 case LOAD_LONG_INDOFFSET32: 1506 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1507 return; 1508 default: 1509 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1510 return; 1511 } 1512 node->_opnds[opcnt] = new_memory; 1513} 1514 1515// Advertise here if the CPU requires explicit rounding operations 1516// to implement the UseStrictFP mode. 1517const bool Matcher::strict_fp_requires_explicit_rounding = true; 1518 1519// Are floats conerted to double when stored to stack during deoptimization? 1520// On x32 it is stored with convertion only when FPU is used for floats. 1521bool Matcher::float_in_double() { return (UseSSE == 0); } 1522 1523// Do ints take an entire long register or just half? 1524const bool Matcher::int_in_long = false; 1525 1526// Return whether or not this register is ever used as an argument. This 1527// function is used on startup to build the trampoline stubs in generateOptoStub. 1528// Registers not mentioned will be killed by the VM call in the trampoline, and 1529// arguments in those registers not be available to the callee. 1530bool Matcher::can_be_java_arg( int reg ) { 1531 if( reg == ECX_num || reg == EDX_num ) return true; 1532 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1533 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1534 return false; 1535} 1536 1537bool Matcher::is_spillable_arg( int reg ) { 1538 return can_be_java_arg(reg); 1539} 1540 1541bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1542 // Use hardware integer DIV instruction when 1543 // it is faster than a code which use multiply. 1544 // Only when constant divisor fits into 32 bit 1545 // (min_jint is excluded to get only correct 1546 // positive 32 bit values from negative). 1547 return VM_Version::has_fast_idiv() && 1548 (divisor == (int)divisor && divisor != min_jint); 1549} 1550 1551// Register for DIVI projection of divmodI 1552RegMask Matcher::divI_proj_mask() { 1553 return EAX_REG_mask(); 1554} 1555 1556// Register for MODI projection of divmodI 1557RegMask Matcher::modI_proj_mask() { 1558 return EDX_REG_mask(); 1559} 1560 1561// Register for DIVL projection of divmodL 1562RegMask Matcher::divL_proj_mask() { 1563 ShouldNotReachHere(); 1564 return RegMask(); 1565} 1566 1567// Register for MODL projection of divmodL 1568RegMask Matcher::modL_proj_mask() { 1569 ShouldNotReachHere(); 1570 return RegMask(); 1571} 1572 1573const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1574 return NO_REG_mask(); 1575} 1576 1577// Returns true if the high 32 bits of the value is known to be zero. 1578bool is_operand_hi32_zero(Node* n) { 1579 int opc = n->Opcode(); 1580 if (opc == Op_AndL) { 1581 Node* o2 = n->in(2); 1582 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1583 return true; 1584 } 1585 } 1586 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 return false; 1590} 1591 1592%} 1593 1594//----------ENCODING BLOCK----------------------------------------------------- 1595// This block specifies the encoding classes used by the compiler to output 1596// byte streams. Encoding classes generate functions which are called by 1597// Machine Instruction Nodes in order to generate the bit encoding of the 1598// instruction. Operands specify their base encoding interface with the 1599// interface keyword. There are currently supported four interfaces, 1600// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1601// operand to generate a function which returns its register number when 1602// queried. CONST_INTER causes an operand to generate a function which 1603// returns the value of the constant when queried. MEMORY_INTER causes an 1604// operand to generate four functions which return the Base Register, the 1605// Index Register, the Scale Value, and the Offset Value of the operand when 1606// queried. COND_INTER causes an operand to generate six functions which 1607// return the encoding code (ie - encoding bits for the instruction) 1608// associated with each basic boolean condition for a conditional instruction. 1609// Instructions specify two basic values for encoding. They use the 1610// ins_encode keyword to specify their encoding class (which must be one of 1611// the class names specified in the encoding block), and they use the 1612// opcode keyword to specify, in order, their primary, secondary, and 1613// tertiary opcode. Only the opcode sections which a particular instruction 1614// needs for encoding need to be specified. 1615encode %{ 1616 // Build emit functions for each basic byte or larger field in the intel 1617 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1618 // code in the enc_class source block. Emit functions will live in the 1619 // main source block for now. In future, we can generalize this by 1620 // adding a syntax that specifies the sizes of fields in an order, 1621 // so that the adlc can build the emit functions automagically 1622 1623 // Emit primary opcode 1624 enc_class OpcP %{ 1625 emit_opcode(cbuf, $primary); 1626 %} 1627 1628 // Emit secondary opcode 1629 enc_class OpcS %{ 1630 emit_opcode(cbuf, $secondary); 1631 %} 1632 1633 // Emit opcode directly 1634 enc_class Opcode(immI d8) %{ 1635 emit_opcode(cbuf, $d8$$constant); 1636 %} 1637 1638 enc_class SizePrefix %{ 1639 emit_opcode(cbuf,0x66); 1640 %} 1641 1642 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1643 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1644 %} 1645 1646 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1647 emit_opcode(cbuf,$opcode$$constant); 1648 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1649 %} 1650 1651 enc_class mov_r32_imm0( rRegI dst ) %{ 1652 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1653 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1654 %} 1655 1656 enc_class cdq_enc %{ 1657 // Full implementation of Java idiv and irem; checks for 1658 // special case as described in JVM spec., p.243 & p.271. 1659 // 1660 // normal case special case 1661 // 1662 // input : rax,: dividend min_int 1663 // reg: divisor -1 1664 // 1665 // output: rax,: quotient (= rax, idiv reg) min_int 1666 // rdx: remainder (= rax, irem reg) 0 1667 // 1668 // Code sequnce: 1669 // 1670 // 81 F8 00 00 00 80 cmp rax,80000000h 1671 // 0F 85 0B 00 00 00 jne normal_case 1672 // 33 D2 xor rdx,edx 1673 // 83 F9 FF cmp rcx,0FFh 1674 // 0F 84 03 00 00 00 je done 1675 // normal_case: 1676 // 99 cdq 1677 // F7 F9 idiv rax,ecx 1678 // done: 1679 // 1680 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1681 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1682 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1683 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1684 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1686 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1687 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1688 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1689 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1690 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1691 // normal_case: 1692 emit_opcode(cbuf,0x99); // cdq 1693 // idiv (note: must be emitted by the user of this rule) 1694 // normal: 1695 %} 1696 1697 // Dense encoding for older common ops 1698 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1699 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1700 %} 1701 1702 1703 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1704 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1705 // Check for 8-bit immediate, and set sign extend bit in opcode 1706 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1707 emit_opcode(cbuf, $primary | 0x02); 1708 } 1709 else { // If 32-bit immediate 1710 emit_opcode(cbuf, $primary); 1711 } 1712 %} 1713 1714 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1715 // Emit primary opcode and set sign-extend bit 1716 // Check for 8-bit immediate, and set sign extend bit in opcode 1717 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1718 emit_opcode(cbuf, $primary | 0x02); } 1719 else { // If 32-bit immediate 1720 emit_opcode(cbuf, $primary); 1721 } 1722 // Emit r/m byte with secondary opcode, after primary opcode. 1723 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1724 %} 1725 1726 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1727 // Check for 8-bit immediate, and set sign extend bit in opcode 1728 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1729 $$$emit8$imm$$constant; 1730 } 1731 else { // If 32-bit immediate 1732 // Output immediate 1733 $$$emit32$imm$$constant; 1734 } 1735 %} 1736 1737 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1738 // Emit primary opcode and set sign-extend bit 1739 // Check for 8-bit immediate, and set sign extend bit in opcode 1740 int con = (int)$imm$$constant; // Throw away top bits 1741 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1742 // Emit r/m byte with secondary opcode, after primary opcode. 1743 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1744 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1745 else emit_d32(cbuf,con); 1746 %} 1747 1748 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1749 // Emit primary opcode and set sign-extend bit 1750 // Check for 8-bit immediate, and set sign extend bit in opcode 1751 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1752 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1753 // Emit r/m byte with tertiary opcode, after primary opcode. 1754 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1755 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1756 else emit_d32(cbuf,con); 1757 %} 1758 1759 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1760 emit_cc(cbuf, $secondary, $dst$$reg ); 1761 %} 1762 1763 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1764 int destlo = $dst$$reg; 1765 int desthi = HIGH_FROM_LOW(destlo); 1766 // bswap lo 1767 emit_opcode(cbuf, 0x0F); 1768 emit_cc(cbuf, 0xC8, destlo); 1769 // bswap hi 1770 emit_opcode(cbuf, 0x0F); 1771 emit_cc(cbuf, 0xC8, desthi); 1772 // xchg lo and hi 1773 emit_opcode(cbuf, 0x87); 1774 emit_rm(cbuf, 0x3, destlo, desthi); 1775 %} 1776 1777 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1778 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1779 %} 1780 1781 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1782 $$$emit8$primary; 1783 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1784 %} 1785 1786 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1787 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1788 emit_d8(cbuf, op >> 8 ); 1789 emit_d8(cbuf, op & 255); 1790 %} 1791 1792 // emulate a CMOV with a conditional branch around a MOV 1793 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1794 // Invert sense of branch from sense of CMOV 1795 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1796 emit_d8( cbuf, $brOffs$$constant ); 1797 %} 1798 1799 enc_class enc_PartialSubtypeCheck( ) %{ 1800 Register Redi = as_Register(EDI_enc); // result register 1801 Register Reax = as_Register(EAX_enc); // super class 1802 Register Recx = as_Register(ECX_enc); // killed 1803 Register Resi = as_Register(ESI_enc); // sub class 1804 Label miss; 1805 1806 MacroAssembler _masm(&cbuf); 1807 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1808 NULL, &miss, 1809 /*set_cond_codes:*/ true); 1810 if ($primary) { 1811 __ xorptr(Redi, Redi); 1812 } 1813 __ bind(miss); 1814 %} 1815 1816 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1817 MacroAssembler masm(&cbuf); 1818 int start = masm.offset(); 1819 if (UseSSE >= 2) { 1820 if (VerifyFPU) { 1821 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1822 } 1823 } else { 1824 // External c_calling_convention expects the FPU stack to be 'clean'. 1825 // Compiled code leaves it dirty. Do cleanup now. 1826 masm.empty_FPU_stack(); 1827 } 1828 if (sizeof_FFree_Float_Stack_All == -1) { 1829 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1830 } else { 1831 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1832 } 1833 %} 1834 1835 enc_class Verify_FPU_For_Leaf %{ 1836 if( VerifyFPU ) { 1837 MacroAssembler masm(&cbuf); 1838 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1839 } 1840 %} 1841 1842 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1843 // This is the instruction starting address for relocation info. 1844 cbuf.set_insts_mark(); 1845 $$$emit8$primary; 1846 // CALL directly to the runtime 1847 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1848 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1849 1850 if (UseSSE >= 2) { 1851 MacroAssembler _masm(&cbuf); 1852 BasicType rt = tf()->return_type(); 1853 1854 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1855 // A C runtime call where the return value is unused. In SSE2+ 1856 // mode the result needs to be removed from the FPU stack. It's 1857 // likely that this function call could be removed by the 1858 // optimizer if the C function is a pure function. 1859 __ ffree(0); 1860 } else if (rt == T_FLOAT) { 1861 __ lea(rsp, Address(rsp, -4)); 1862 __ fstp_s(Address(rsp, 0)); 1863 __ movflt(xmm0, Address(rsp, 0)); 1864 __ lea(rsp, Address(rsp, 4)); 1865 } else if (rt == T_DOUBLE) { 1866 __ lea(rsp, Address(rsp, -8)); 1867 __ fstp_d(Address(rsp, 0)); 1868 __ movdbl(xmm0, Address(rsp, 0)); 1869 __ lea(rsp, Address(rsp, 8)); 1870 } 1871 } 1872 %} 1873 1874 1875 enc_class pre_call_resets %{ 1876 // If method sets FPU control word restore it here 1877 debug_only(int off0 = cbuf.insts_size()); 1878 if (ra_->C->in_24_bit_fp_mode()) { 1879 MacroAssembler _masm(&cbuf); 1880 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1881 } 1882 if (ra_->C->max_vector_size() > 16) { 1883 // Clear upper bits of YMM registers when current compiled code uses 1884 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1885 MacroAssembler _masm(&cbuf); 1886 __ vzeroupper(); 1887 } 1888 debug_only(int off1 = cbuf.insts_size()); 1889 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1890 %} 1891 1892 enc_class post_call_FPU %{ 1893 // If method sets FPU control word do it here also 1894 if (Compile::current()->in_24_bit_fp_mode()) { 1895 MacroAssembler masm(&cbuf); 1896 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1897 } 1898 %} 1899 1900 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1901 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1902 // who we intended to call. 1903 cbuf.set_insts_mark(); 1904 $$$emit8$primary; 1905 1906 if (!_method) { 1907 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1908 runtime_call_Relocation::spec(), 1909 RELOC_IMM32); 1910 } else { 1911 int method_index = resolved_method_index(cbuf); 1912 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1913 : static_call_Relocation::spec(method_index); 1914 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1915 rspec, RELOC_DISP32); 1916 // Emit stubs for static call. 1917 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1918 if (stub == NULL) { 1919 ciEnv::current()->record_failure("CodeCache is full"); 1920 return; 1921 } 1922 } 1923 %} 1924 1925 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1926 MacroAssembler _masm(&cbuf); 1927 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1928 %} 1929 1930 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1931 int disp = in_bytes(Method::from_compiled_offset()); 1932 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1933 1934 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1935 cbuf.set_insts_mark(); 1936 $$$emit8$primary; 1937 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1938 emit_d8(cbuf, disp); // Displacement 1939 1940 %} 1941 1942// Following encoding is no longer used, but may be restored if calling 1943// convention changes significantly. 1944// Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1945// 1946// enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1947// // int ic_reg = Matcher::inline_cache_reg(); 1948// // int ic_encode = Matcher::_regEncode[ic_reg]; 1949// // int imo_reg = Matcher::interpreter_method_oop_reg(); 1950// // int imo_encode = Matcher::_regEncode[imo_reg]; 1951// 1952// // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1953// // // so we load it immediately before the call 1954// // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1955// // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1956// 1957// // xor rbp,ebp 1958// emit_opcode(cbuf, 0x33); 1959// emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1960// 1961// // CALL to interpreter. 1962// cbuf.set_insts_mark(); 1963// $$$emit8$primary; 1964// emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1965// runtime_call_Relocation::spec(), RELOC_IMM32 ); 1966// %} 1967 1968 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1969 $$$emit8$primary; 1970 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1971 $$$emit8$shift$$constant; 1972 %} 1973 1974 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1975 // Load immediate does not have a zero or sign extended version 1976 // for 8-bit immediates 1977 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1978 $$$emit32$src$$constant; 1979 %} 1980 1981 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1982 // Load immediate does not have a zero or sign extended version 1983 // for 8-bit immediates 1984 emit_opcode(cbuf, $primary + $dst$$reg); 1985 $$$emit32$src$$constant; 1986 %} 1987 1988 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1989 // Load immediate does not have a zero or sign extended version 1990 // for 8-bit immediates 1991 int dst_enc = $dst$$reg; 1992 int src_con = $src$$constant & 0x0FFFFFFFFL; 1993 if (src_con == 0) { 1994 // xor dst, dst 1995 emit_opcode(cbuf, 0x33); 1996 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1997 } else { 1998 emit_opcode(cbuf, $primary + dst_enc); 1999 emit_d32(cbuf, src_con); 2000 } 2001 %} 2002 2003 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2004 // Load immediate does not have a zero or sign extended version 2005 // for 8-bit immediates 2006 int dst_enc = $dst$$reg + 2; 2007 int src_con = ((julong)($src$$constant)) >> 32; 2008 if (src_con == 0) { 2009 // xor dst, dst 2010 emit_opcode(cbuf, 0x33); 2011 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2012 } else { 2013 emit_opcode(cbuf, $primary + dst_enc); 2014 emit_d32(cbuf, src_con); 2015 } 2016 %} 2017 2018 2019 // Encode a reg-reg copy. If it is useless, then empty encoding. 2020 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2021 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2022 %} 2023 2024 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2025 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2026 %} 2027 2028 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2029 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2030 %} 2031 2032 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2033 $$$emit8$primary; 2034 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2035 %} 2036 2037 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2038 $$$emit8$secondary; 2039 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2040 %} 2041 2042 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2043 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2044 %} 2045 2046 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2047 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2048 %} 2049 2050 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2051 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2052 %} 2053 2054 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2055 // Output immediate 2056 $$$emit32$src$$constant; 2057 %} 2058 2059 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2060 // Output Float immediate bits 2061 jfloat jf = $src$$constant; 2062 int jf_as_bits = jint_cast( jf ); 2063 emit_d32(cbuf, jf_as_bits); 2064 %} 2065 2066 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2067 // Output Float immediate bits 2068 jfloat jf = $src$$constant; 2069 int jf_as_bits = jint_cast( jf ); 2070 emit_d32(cbuf, jf_as_bits); 2071 %} 2072 2073 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2074 // Output immediate 2075 $$$emit16$src$$constant; 2076 %} 2077 2078 enc_class Con_d32(immI src) %{ 2079 emit_d32(cbuf,$src$$constant); 2080 %} 2081 2082 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2083 // Output immediate memory reference 2084 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2085 emit_d32(cbuf, 0x00); 2086 %} 2087 2088 enc_class lock_prefix( ) %{ 2089 if( os::is_MP() ) 2090 emit_opcode(cbuf,0xF0); // [Lock] 2091 %} 2092 2093 // Cmp-xchg long value. 2094 // Note: we need to swap rbx, and rcx before and after the 2095 // cmpxchg8 instruction because the instruction uses 2096 // rcx as the high order word of the new value to store but 2097 // our register encoding uses rbx,. 2098 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2099 2100 // XCHG rbx,ecx 2101 emit_opcode(cbuf,0x87); 2102 emit_opcode(cbuf,0xD9); 2103 // [Lock] 2104 if( os::is_MP() ) 2105 emit_opcode(cbuf,0xF0); 2106 // CMPXCHG8 [Eptr] 2107 emit_opcode(cbuf,0x0F); 2108 emit_opcode(cbuf,0xC7); 2109 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2110 // XCHG rbx,ecx 2111 emit_opcode(cbuf,0x87); 2112 emit_opcode(cbuf,0xD9); 2113 %} 2114 2115 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2116 // [Lock] 2117 if( os::is_MP() ) 2118 emit_opcode(cbuf,0xF0); 2119 2120 // CMPXCHG [Eptr] 2121 emit_opcode(cbuf,0x0F); 2122 emit_opcode(cbuf,0xB1); 2123 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2124 %} 2125 2126 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2127 int res_encoding = $res$$reg; 2128 2129 // MOV res,0 2130 emit_opcode( cbuf, 0xB8 + res_encoding); 2131 emit_d32( cbuf, 0 ); 2132 // JNE,s fail 2133 emit_opcode(cbuf,0x75); 2134 emit_d8(cbuf, 5 ); 2135 // MOV res,1 2136 emit_opcode( cbuf, 0xB8 + res_encoding); 2137 emit_d32( cbuf, 1 ); 2138 // fail: 2139 %} 2140 2141 enc_class set_instruction_start( ) %{ 2142 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2143 %} 2144 2145 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2146 int reg_encoding = $ereg$$reg; 2147 int base = $mem$$base; 2148 int index = $mem$$index; 2149 int scale = $mem$$scale; 2150 int displace = $mem$$disp; 2151 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2152 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2153 %} 2154 2155 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2156 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2157 int base = $mem$$base; 2158 int index = $mem$$index; 2159 int scale = $mem$$scale; 2160 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2161 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2162 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2163 %} 2164 2165 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2166 int r1, r2; 2167 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2168 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2169 emit_opcode(cbuf,0x0F); 2170 emit_opcode(cbuf,$tertiary); 2171 emit_rm(cbuf, 0x3, r1, r2); 2172 emit_d8(cbuf,$cnt$$constant); 2173 emit_d8(cbuf,$primary); 2174 emit_rm(cbuf, 0x3, $secondary, r1); 2175 emit_d8(cbuf,$cnt$$constant); 2176 %} 2177 2178 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2179 emit_opcode( cbuf, 0x8B ); // Move 2180 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2181 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2182 emit_d8(cbuf,$primary); 2183 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2184 emit_d8(cbuf,$cnt$$constant-32); 2185 } 2186 emit_d8(cbuf,$primary); 2187 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2188 emit_d8(cbuf,31); 2189 %} 2190 2191 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2192 int r1, r2; 2193 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2194 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2195 2196 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2197 emit_rm(cbuf, 0x3, r1, r2); 2198 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2199 emit_opcode(cbuf,$primary); 2200 emit_rm(cbuf, 0x3, $secondary, r1); 2201 emit_d8(cbuf,$cnt$$constant-32); 2202 } 2203 emit_opcode(cbuf,0x33); // XOR r2,r2 2204 emit_rm(cbuf, 0x3, r2, r2); 2205 %} 2206 2207 // Clone of RegMem but accepts an extra parameter to access each 2208 // half of a double in memory; it never needs relocation info. 2209 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2210 emit_opcode(cbuf,$opcode$$constant); 2211 int reg_encoding = $rm_reg$$reg; 2212 int base = $mem$$base; 2213 int index = $mem$$index; 2214 int scale = $mem$$scale; 2215 int displace = $mem$$disp + $disp_for_half$$constant; 2216 relocInfo::relocType disp_reloc = relocInfo::none; 2217 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2218 %} 2219 2220 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2221 // 2222 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2223 // and it never needs relocation information. 2224 // Frequently used to move data between FPU's Stack Top and memory. 2225 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2226 int rm_byte_opcode = $rm_opcode$$constant; 2227 int base = $mem$$base; 2228 int index = $mem$$index; 2229 int scale = $mem$$scale; 2230 int displace = $mem$$disp; 2231 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2232 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2233 %} 2234 2235 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2236 int rm_byte_opcode = $rm_opcode$$constant; 2237 int base = $mem$$base; 2238 int index = $mem$$index; 2239 int scale = $mem$$scale; 2240 int displace = $mem$$disp; 2241 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2242 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2243 %} 2244 2245 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2246 int reg_encoding = $dst$$reg; 2247 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2248 int index = 0x04; // 0x04 indicates no index 2249 int scale = 0x00; // 0x00 indicates no scale 2250 int displace = $src1$$constant; // 0x00 indicates no displacement 2251 relocInfo::relocType disp_reloc = relocInfo::none; 2252 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2253 %} 2254 2255 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2256 // Compare dst,src 2257 emit_opcode(cbuf,0x3B); 2258 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2259 // jmp dst < src around move 2260 emit_opcode(cbuf,0x7C); 2261 emit_d8(cbuf,2); 2262 // move dst,src 2263 emit_opcode(cbuf,0x8B); 2264 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2265 %} 2266 2267 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2268 // Compare dst,src 2269 emit_opcode(cbuf,0x3B); 2270 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2271 // jmp dst > src around move 2272 emit_opcode(cbuf,0x7F); 2273 emit_d8(cbuf,2); 2274 // move dst,src 2275 emit_opcode(cbuf,0x8B); 2276 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2277 %} 2278 2279 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2280 // If src is FPR1, we can just FST to store it. 2281 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2282 int reg_encoding = 0x2; // Just store 2283 int base = $mem$$base; 2284 int index = $mem$$index; 2285 int scale = $mem$$scale; 2286 int displace = $mem$$disp; 2287 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2288 if( $src$$reg != FPR1L_enc ) { 2289 reg_encoding = 0x3; // Store & pop 2290 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2291 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2292 } 2293 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2294 emit_opcode(cbuf,$primary); 2295 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2296 %} 2297 2298 enc_class neg_reg(rRegI dst) %{ 2299 // NEG $dst 2300 emit_opcode(cbuf,0xF7); 2301 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2302 %} 2303 2304 enc_class setLT_reg(eCXRegI dst) %{ 2305 // SETLT $dst 2306 emit_opcode(cbuf,0x0F); 2307 emit_opcode(cbuf,0x9C); 2308 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2309 %} 2310 2311 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2312 int tmpReg = $tmp$$reg; 2313 2314 // SUB $p,$q 2315 emit_opcode(cbuf,0x2B); 2316 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2317 // SBB $tmp,$tmp 2318 emit_opcode(cbuf,0x1B); 2319 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2320 // AND $tmp,$y 2321 emit_opcode(cbuf,0x23); 2322 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2323 // ADD $p,$tmp 2324 emit_opcode(cbuf,0x03); 2325 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2326 %} 2327 2328 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2329 // TEST shift,32 2330 emit_opcode(cbuf,0xF7); 2331 emit_rm(cbuf, 0x3, 0, ECX_enc); 2332 emit_d32(cbuf,0x20); 2333 // JEQ,s small 2334 emit_opcode(cbuf, 0x74); 2335 emit_d8(cbuf, 0x04); 2336 // MOV $dst.hi,$dst.lo 2337 emit_opcode( cbuf, 0x8B ); 2338 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2339 // CLR $dst.lo 2340 emit_opcode(cbuf, 0x33); 2341 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2342// small: 2343 // SHLD $dst.hi,$dst.lo,$shift 2344 emit_opcode(cbuf,0x0F); 2345 emit_opcode(cbuf,0xA5); 2346 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2347 // SHL $dst.lo,$shift" 2348 emit_opcode(cbuf,0xD3); 2349 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2350 %} 2351 2352 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2353 // TEST shift,32 2354 emit_opcode(cbuf,0xF7); 2355 emit_rm(cbuf, 0x3, 0, ECX_enc); 2356 emit_d32(cbuf,0x20); 2357 // JEQ,s small 2358 emit_opcode(cbuf, 0x74); 2359 emit_d8(cbuf, 0x04); 2360 // MOV $dst.lo,$dst.hi 2361 emit_opcode( cbuf, 0x8B ); 2362 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2363 // CLR $dst.hi 2364 emit_opcode(cbuf, 0x33); 2365 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2366// small: 2367 // SHRD $dst.lo,$dst.hi,$shift 2368 emit_opcode(cbuf,0x0F); 2369 emit_opcode(cbuf,0xAD); 2370 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2371 // SHR $dst.hi,$shift" 2372 emit_opcode(cbuf,0xD3); 2373 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2374 %} 2375 2376 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2377 // TEST shift,32 2378 emit_opcode(cbuf,0xF7); 2379 emit_rm(cbuf, 0x3, 0, ECX_enc); 2380 emit_d32(cbuf,0x20); 2381 // JEQ,s small 2382 emit_opcode(cbuf, 0x74); 2383 emit_d8(cbuf, 0x05); 2384 // MOV $dst.lo,$dst.hi 2385 emit_opcode( cbuf, 0x8B ); 2386 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2387 // SAR $dst.hi,31 2388 emit_opcode(cbuf, 0xC1); 2389 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2390 emit_d8(cbuf, 0x1F ); 2391// small: 2392 // SHRD $dst.lo,$dst.hi,$shift 2393 emit_opcode(cbuf,0x0F); 2394 emit_opcode(cbuf,0xAD); 2395 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2396 // SAR $dst.hi,$shift" 2397 emit_opcode(cbuf,0xD3); 2398 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2399 %} 2400 2401 2402 // ----------------- Encodings for floating point unit ----------------- 2403 // May leave result in FPU-TOS or FPU reg depending on opcodes 2404 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2405 $$$emit8$primary; 2406 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2407 %} 2408 2409 // Pop argument in FPR0 with FSTP ST(0) 2410 enc_class PopFPU() %{ 2411 emit_opcode( cbuf, 0xDD ); 2412 emit_d8( cbuf, 0xD8 ); 2413 %} 2414 2415 // !!!!! equivalent to Pop_Reg_F 2416 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2417 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2418 emit_d8( cbuf, 0xD8+$dst$$reg ); 2419 %} 2420 2421 enc_class Push_Reg_DPR( regDPR dst ) %{ 2422 emit_opcode( cbuf, 0xD9 ); 2423 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2424 %} 2425 2426 enc_class strictfp_bias1( regDPR dst ) %{ 2427 emit_opcode( cbuf, 0xDB ); // FLD m80real 2428 emit_opcode( cbuf, 0x2D ); 2429 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2430 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2431 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2432 %} 2433 2434 enc_class strictfp_bias2( regDPR dst ) %{ 2435 emit_opcode( cbuf, 0xDB ); // FLD m80real 2436 emit_opcode( cbuf, 0x2D ); 2437 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2438 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2439 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2440 %} 2441 2442 // Special case for moving an integer register to a stack slot. 2443 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2444 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2445 %} 2446 2447 // Special case for moving a register to a stack slot. 2448 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2449 // Opcode already emitted 2450 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2451 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2452 emit_d32(cbuf, $dst$$disp); // Displacement 2453 %} 2454 2455 // Push the integer in stackSlot 'src' onto FP-stack 2456 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2457 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2458 %} 2459 2460 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2461 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2462 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2463 %} 2464 2465 // Same as Pop_Mem_F except for opcode 2466 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2467 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2468 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2469 %} 2470 2471 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2472 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2473 emit_d8( cbuf, 0xD8+$dst$$reg ); 2474 %} 2475 2476 enc_class Push_Reg_FPR( regFPR dst ) %{ 2477 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2478 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2479 %} 2480 2481 // Push FPU's float to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2483 int pop = 0x02; 2484 if ($src$$reg != FPR1L_enc) { 2485 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2486 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2487 pop = 0x03; 2488 } 2489 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2490 %} 2491 2492 // Push FPU's double to a stack-slot, and pop FPU-stack 2493 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2494 int pop = 0x02; 2495 if ($src$$reg != FPR1L_enc) { 2496 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2497 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2498 pop = 0x03; 2499 } 2500 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2501 %} 2502 2503 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2504 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2505 int pop = 0xD0 - 1; // -1 since we skip FLD 2506 if ($src$$reg != FPR1L_enc) { 2507 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2508 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2509 pop = 0xD8; 2510 } 2511 emit_opcode( cbuf, 0xDD ); 2512 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2513 %} 2514 2515 2516 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2517 // load dst in FPR0 2518 emit_opcode( cbuf, 0xD9 ); 2519 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2520 if ($src$$reg != FPR1L_enc) { 2521 // fincstp 2522 emit_opcode (cbuf, 0xD9); 2523 emit_opcode (cbuf, 0xF7); 2524 // swap src with FPR1: 2525 // FXCH FPR1 with src 2526 emit_opcode(cbuf, 0xD9); 2527 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2528 // fdecstp 2529 emit_opcode (cbuf, 0xD9); 2530 emit_opcode (cbuf, 0xF6); 2531 } 2532 %} 2533 2534 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2535 MacroAssembler _masm(&cbuf); 2536 __ subptr(rsp, 8); 2537 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2538 __ fld_d(Address(rsp, 0)); 2539 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2540 __ fld_d(Address(rsp, 0)); 2541 %} 2542 2543 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2544 MacroAssembler _masm(&cbuf); 2545 __ subptr(rsp, 4); 2546 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2547 __ fld_s(Address(rsp, 0)); 2548 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2549 __ fld_s(Address(rsp, 0)); 2550 %} 2551 2552 enc_class Push_ResultD(regD dst) %{ 2553 MacroAssembler _masm(&cbuf); 2554 __ fstp_d(Address(rsp, 0)); 2555 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2556 __ addptr(rsp, 8); 2557 %} 2558 2559 enc_class Push_ResultF(regF dst, immI d8) %{ 2560 MacroAssembler _masm(&cbuf); 2561 __ fstp_s(Address(rsp, 0)); 2562 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2563 __ addptr(rsp, $d8$$constant); 2564 %} 2565 2566 enc_class Push_SrcD(regD src) %{ 2567 MacroAssembler _masm(&cbuf); 2568 __ subptr(rsp, 8); 2569 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2570 __ fld_d(Address(rsp, 0)); 2571 %} 2572 2573 enc_class push_stack_temp_qword() %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ subptr(rsp, 8); 2576 %} 2577 2578 enc_class pop_stack_temp_qword() %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ addptr(rsp, 8); 2581 %} 2582 2583 enc_class push_xmm_to_fpr1(regD src) %{ 2584 MacroAssembler _masm(&cbuf); 2585 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2586 __ fld_d(Address(rsp, 0)); 2587 %} 2588 2589 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2590 if ($src$$reg != FPR1L_enc) { 2591 // fincstp 2592 emit_opcode (cbuf, 0xD9); 2593 emit_opcode (cbuf, 0xF7); 2594 // FXCH FPR1 with src 2595 emit_opcode(cbuf, 0xD9); 2596 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2597 // fdecstp 2598 emit_opcode (cbuf, 0xD9); 2599 emit_opcode (cbuf, 0xF6); 2600 } 2601 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2602 // // FSTP FPR$dst$$reg 2603 // emit_opcode( cbuf, 0xDD ); 2604 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2605 %} 2606 2607 enc_class fnstsw_sahf_skip_parity() %{ 2608 // fnstsw ax 2609 emit_opcode( cbuf, 0xDF ); 2610 emit_opcode( cbuf, 0xE0 ); 2611 // sahf 2612 emit_opcode( cbuf, 0x9E ); 2613 // jnp ::skip 2614 emit_opcode( cbuf, 0x7B ); 2615 emit_opcode( cbuf, 0x05 ); 2616 %} 2617 2618 enc_class emitModDPR() %{ 2619 // fprem must be iterative 2620 // :: loop 2621 // fprem 2622 emit_opcode( cbuf, 0xD9 ); 2623 emit_opcode( cbuf, 0xF8 ); 2624 // wait 2625 emit_opcode( cbuf, 0x9b ); 2626 // fnstsw ax 2627 emit_opcode( cbuf, 0xDF ); 2628 emit_opcode( cbuf, 0xE0 ); 2629 // sahf 2630 emit_opcode( cbuf, 0x9E ); 2631 // jp ::loop 2632 emit_opcode( cbuf, 0x0F ); 2633 emit_opcode( cbuf, 0x8A ); 2634 emit_opcode( cbuf, 0xF4 ); 2635 emit_opcode( cbuf, 0xFF ); 2636 emit_opcode( cbuf, 0xFF ); 2637 emit_opcode( cbuf, 0xFF ); 2638 %} 2639 2640 enc_class fpu_flags() %{ 2641 // fnstsw_ax 2642 emit_opcode( cbuf, 0xDF); 2643 emit_opcode( cbuf, 0xE0); 2644 // test ax,0x0400 2645 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2646 emit_opcode( cbuf, 0xA9 ); 2647 emit_d16 ( cbuf, 0x0400 ); 2648 // // // This sequence works, but stalls for 12-16 cycles on PPro 2649 // // test rax,0x0400 2650 // emit_opcode( cbuf, 0xA9 ); 2651 // emit_d32 ( cbuf, 0x00000400 ); 2652 // 2653 // jz exit (no unordered comparison) 2654 emit_opcode( cbuf, 0x74 ); 2655 emit_d8 ( cbuf, 0x02 ); 2656 // mov ah,1 - treat as LT case (set carry flag) 2657 emit_opcode( cbuf, 0xB4 ); 2658 emit_d8 ( cbuf, 0x01 ); 2659 // sahf 2660 emit_opcode( cbuf, 0x9E); 2661 %} 2662 2663 enc_class cmpF_P6_fixup() %{ 2664 // Fixup the integer flags in case comparison involved a NaN 2665 // 2666 // JNP exit (no unordered comparison, P-flag is set by NaN) 2667 emit_opcode( cbuf, 0x7B ); 2668 emit_d8 ( cbuf, 0x03 ); 2669 // MOV AH,1 - treat as LT case (set carry flag) 2670 emit_opcode( cbuf, 0xB4 ); 2671 emit_d8 ( cbuf, 0x01 ); 2672 // SAHF 2673 emit_opcode( cbuf, 0x9E); 2674 // NOP // target for branch to avoid branch to branch 2675 emit_opcode( cbuf, 0x90); 2676 %} 2677 2678// fnstsw_ax(); 2679// sahf(); 2680// movl(dst, nan_result); 2681// jcc(Assembler::parity, exit); 2682// movl(dst, less_result); 2683// jcc(Assembler::below, exit); 2684// movl(dst, equal_result); 2685// jcc(Assembler::equal, exit); 2686// movl(dst, greater_result); 2687 2688// less_result = 1; 2689// greater_result = -1; 2690// equal_result = 0; 2691// nan_result = -1; 2692 2693 enc_class CmpF_Result(rRegI dst) %{ 2694 // fnstsw_ax(); 2695 emit_opcode( cbuf, 0xDF); 2696 emit_opcode( cbuf, 0xE0); 2697 // sahf 2698 emit_opcode( cbuf, 0x9E); 2699 // movl(dst, nan_result); 2700 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2701 emit_d32( cbuf, -1 ); 2702 // jcc(Assembler::parity, exit); 2703 emit_opcode( cbuf, 0x7A ); 2704 emit_d8 ( cbuf, 0x13 ); 2705 // movl(dst, less_result); 2706 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2707 emit_d32( cbuf, -1 ); 2708 // jcc(Assembler::below, exit); 2709 emit_opcode( cbuf, 0x72 ); 2710 emit_d8 ( cbuf, 0x0C ); 2711 // movl(dst, equal_result); 2712 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2713 emit_d32( cbuf, 0 ); 2714 // jcc(Assembler::equal, exit); 2715 emit_opcode( cbuf, 0x74 ); 2716 emit_d8 ( cbuf, 0x05 ); 2717 // movl(dst, greater_result); 2718 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2719 emit_d32( cbuf, 1 ); 2720 %} 2721 2722 2723 // Compare the longs and set flags 2724 // BROKEN! Do Not use as-is 2725 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2726 // CMP $src1.hi,$src2.hi 2727 emit_opcode( cbuf, 0x3B ); 2728 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2729 // JNE,s done 2730 emit_opcode(cbuf,0x75); 2731 emit_d8(cbuf, 2 ); 2732 // CMP $src1.lo,$src2.lo 2733 emit_opcode( cbuf, 0x3B ); 2734 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2735// done: 2736 %} 2737 2738 enc_class convert_int_long( regL dst, rRegI src ) %{ 2739 // mov $dst.lo,$src 2740 int dst_encoding = $dst$$reg; 2741 int src_encoding = $src$$reg; 2742 encode_Copy( cbuf, dst_encoding , src_encoding ); 2743 // mov $dst.hi,$src 2744 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2745 // sar $dst.hi,31 2746 emit_opcode( cbuf, 0xC1 ); 2747 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2748 emit_d8(cbuf, 0x1F ); 2749 %} 2750 2751 enc_class convert_long_double( eRegL src ) %{ 2752 // push $src.hi 2753 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2754 // push $src.lo 2755 emit_opcode(cbuf, 0x50+$src$$reg ); 2756 // fild 64-bits at [SP] 2757 emit_opcode(cbuf,0xdf); 2758 emit_d8(cbuf, 0x6C); 2759 emit_d8(cbuf, 0x24); 2760 emit_d8(cbuf, 0x00); 2761 // pop stack 2762 emit_opcode(cbuf, 0x83); // add SP, #8 2763 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2764 emit_d8(cbuf, 0x8); 2765 %} 2766 2767 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2768 // IMUL EDX:EAX,$src1 2769 emit_opcode( cbuf, 0xF7 ); 2770 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2771 // SAR EDX,$cnt-32 2772 int shift_count = ((int)$cnt$$constant) - 32; 2773 if (shift_count > 0) { 2774 emit_opcode(cbuf, 0xC1); 2775 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2776 emit_d8(cbuf, shift_count); 2777 } 2778 %} 2779 2780 // this version doesn't have add sp, 8 2781 enc_class convert_long_double2( eRegL src ) %{ 2782 // push $src.hi 2783 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2784 // push $src.lo 2785 emit_opcode(cbuf, 0x50+$src$$reg ); 2786 // fild 64-bits at [SP] 2787 emit_opcode(cbuf,0xdf); 2788 emit_d8(cbuf, 0x6C); 2789 emit_d8(cbuf, 0x24); 2790 emit_d8(cbuf, 0x00); 2791 %} 2792 2793 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2794 // Basic idea: long = (long)int * (long)int 2795 // IMUL EDX:EAX, src 2796 emit_opcode( cbuf, 0xF7 ); 2797 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2798 %} 2799 2800 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2801 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2802 // MUL EDX:EAX, src 2803 emit_opcode( cbuf, 0xF7 ); 2804 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2805 %} 2806 2807 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2808 // Basic idea: lo(result) = lo(x_lo * y_lo) 2809 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2810 // MOV $tmp,$src.lo 2811 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2812 // IMUL $tmp,EDX 2813 emit_opcode( cbuf, 0x0F ); 2814 emit_opcode( cbuf, 0xAF ); 2815 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2816 // MOV EDX,$src.hi 2817 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2818 // IMUL EDX,EAX 2819 emit_opcode( cbuf, 0x0F ); 2820 emit_opcode( cbuf, 0xAF ); 2821 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2822 // ADD $tmp,EDX 2823 emit_opcode( cbuf, 0x03 ); 2824 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2825 // MUL EDX:EAX,$src.lo 2826 emit_opcode( cbuf, 0xF7 ); 2827 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2828 // ADD EDX,ESI 2829 emit_opcode( cbuf, 0x03 ); 2830 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2831 %} 2832 2833 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2834 // Basic idea: lo(result) = lo(src * y_lo) 2835 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2836 // IMUL $tmp,EDX,$src 2837 emit_opcode( cbuf, 0x6B ); 2838 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2839 emit_d8( cbuf, (int)$src$$constant ); 2840 // MOV EDX,$src 2841 emit_opcode(cbuf, 0xB8 + EDX_enc); 2842 emit_d32( cbuf, (int)$src$$constant ); 2843 // MUL EDX:EAX,EDX 2844 emit_opcode( cbuf, 0xF7 ); 2845 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2846 // ADD EDX,ESI 2847 emit_opcode( cbuf, 0x03 ); 2848 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2849 %} 2850 2851 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2852 // PUSH src1.hi 2853 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2854 // PUSH src1.lo 2855 emit_opcode(cbuf, 0x50+$src1$$reg ); 2856 // PUSH src2.hi 2857 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2858 // PUSH src2.lo 2859 emit_opcode(cbuf, 0x50+$src2$$reg ); 2860 // CALL directly to the runtime 2861 cbuf.set_insts_mark(); 2862 emit_opcode(cbuf,0xE8); // Call into runtime 2863 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2864 // Restore stack 2865 emit_opcode(cbuf, 0x83); // add SP, #framesize 2866 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2867 emit_d8(cbuf, 4*4); 2868 %} 2869 2870 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2871 // PUSH src1.hi 2872 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2873 // PUSH src1.lo 2874 emit_opcode(cbuf, 0x50+$src1$$reg ); 2875 // PUSH src2.hi 2876 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2877 // PUSH src2.lo 2878 emit_opcode(cbuf, 0x50+$src2$$reg ); 2879 // CALL directly to the runtime 2880 cbuf.set_insts_mark(); 2881 emit_opcode(cbuf,0xE8); // Call into runtime 2882 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2883 // Restore stack 2884 emit_opcode(cbuf, 0x83); // add SP, #framesize 2885 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2886 emit_d8(cbuf, 4*4); 2887 %} 2888 2889 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2890 // MOV $tmp,$src.lo 2891 emit_opcode(cbuf, 0x8B); 2892 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2893 // OR $tmp,$src.hi 2894 emit_opcode(cbuf, 0x0B); 2895 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2896 %} 2897 2898 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2899 // CMP $src1.lo,$src2.lo 2900 emit_opcode( cbuf, 0x3B ); 2901 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2902 // JNE,s skip 2903 emit_cc(cbuf, 0x70, 0x5); 2904 emit_d8(cbuf,2); 2905 // CMP $src1.hi,$src2.hi 2906 emit_opcode( cbuf, 0x3B ); 2907 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2908 %} 2909 2910 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2911 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2912 emit_opcode( cbuf, 0x3B ); 2913 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2914 // MOV $tmp,$src1.hi 2915 emit_opcode( cbuf, 0x8B ); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2917 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2918 emit_opcode( cbuf, 0x1B ); 2919 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2920 %} 2921 2922 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2923 // XOR $tmp,$tmp 2924 emit_opcode(cbuf,0x33); // XOR 2925 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2926 // CMP $tmp,$src.lo 2927 emit_opcode( cbuf, 0x3B ); 2928 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2929 // SBB $tmp,$src.hi 2930 emit_opcode( cbuf, 0x1B ); 2931 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2932 %} 2933 2934 // Sniff, sniff... smells like Gnu Superoptimizer 2935 enc_class neg_long( eRegL dst ) %{ 2936 emit_opcode(cbuf,0xF7); // NEG hi 2937 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2938 emit_opcode(cbuf,0xF7); // NEG lo 2939 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2940 emit_opcode(cbuf,0x83); // SBB hi,0 2941 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2942 emit_d8 (cbuf,0 ); 2943 %} 2944 2945 enc_class enc_pop_rdx() %{ 2946 emit_opcode(cbuf,0x5A); 2947 %} 2948 2949 enc_class enc_rethrow() %{ 2950 cbuf.set_insts_mark(); 2951 emit_opcode(cbuf, 0xE9); // jmp entry 2952 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2953 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2954 %} 2955 2956 2957 // Convert a double to an int. Java semantics require we do complex 2958 // manglelations in the corner cases. So we set the rounding mode to 2959 // 'zero', store the darned double down as an int, and reset the 2960 // rounding mode to 'nearest'. The hardware throws an exception which 2961 // patches up the correct value directly to the stack. 2962 enc_class DPR2I_encoding( regDPR src ) %{ 2963 // Flip to round-to-zero mode. We attempted to allow invalid-op 2964 // exceptions here, so that a NAN or other corner-case value will 2965 // thrown an exception (but normal values get converted at full speed). 2966 // However, I2C adapters and other float-stack manglers leave pending 2967 // invalid-op exceptions hanging. We would have to clear them before 2968 // enabling them and that is more expensive than just testing for the 2969 // invalid value Intel stores down in the corner cases. 2970 emit_opcode(cbuf,0xD9); // FLDCW trunc 2971 emit_opcode(cbuf,0x2D); 2972 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2973 // Allocate a word 2974 emit_opcode(cbuf,0x83); // SUB ESP,4 2975 emit_opcode(cbuf,0xEC); 2976 emit_d8(cbuf,0x04); 2977 // Encoding assumes a double has been pushed into FPR0. 2978 // Store down the double as an int, popping the FPU stack 2979 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2980 emit_opcode(cbuf,0x1C); 2981 emit_d8(cbuf,0x24); 2982 // Restore the rounding mode; mask the exception 2983 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2984 emit_opcode(cbuf,0x2D); 2985 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2986 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2987 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2988 2989 // Load the converted int; adjust CPU stack 2990 emit_opcode(cbuf,0x58); // POP EAX 2991 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2992 emit_d32 (cbuf,0x80000000); // 0x80000000 2993 emit_opcode(cbuf,0x75); // JNE around_slow_call 2994 emit_d8 (cbuf,0x07); // Size of slow_call 2995 // Push src onto stack slow-path 2996 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2997 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2998 // CALL directly to the runtime 2999 cbuf.set_insts_mark(); 3000 emit_opcode(cbuf,0xE8); // Call into runtime 3001 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3002 // Carry on here... 3003 %} 3004 3005 enc_class DPR2L_encoding( regDPR src ) %{ 3006 emit_opcode(cbuf,0xD9); // FLDCW trunc 3007 emit_opcode(cbuf,0x2D); 3008 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3009 // Allocate a word 3010 emit_opcode(cbuf,0x83); // SUB ESP,8 3011 emit_opcode(cbuf,0xEC); 3012 emit_d8(cbuf,0x08); 3013 // Encoding assumes a double has been pushed into FPR0. 3014 // Store down the double as a long, popping the FPU stack 3015 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3016 emit_opcode(cbuf,0x3C); 3017 emit_d8(cbuf,0x24); 3018 // Restore the rounding mode; mask the exception 3019 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3020 emit_opcode(cbuf,0x2D); 3021 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3022 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3023 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3024 3025 // Load the converted int; adjust CPU stack 3026 emit_opcode(cbuf,0x58); // POP EAX 3027 emit_opcode(cbuf,0x5A); // POP EDX 3028 emit_opcode(cbuf,0x81); // CMP EDX,imm 3029 emit_d8 (cbuf,0xFA); // rdx 3030 emit_d32 (cbuf,0x80000000); // 0x80000000 3031 emit_opcode(cbuf,0x75); // JNE around_slow_call 3032 emit_d8 (cbuf,0x07+4); // Size of slow_call 3033 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3034 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3035 emit_opcode(cbuf,0x75); // JNE around_slow_call 3036 emit_d8 (cbuf,0x07); // Size of slow_call 3037 // Push src onto stack slow-path 3038 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3039 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3040 // CALL directly to the runtime 3041 cbuf.set_insts_mark(); 3042 emit_opcode(cbuf,0xE8); // Call into runtime 3043 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3044 // Carry on here... 3045 %} 3046 3047 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3048 // Operand was loaded from memory into fp ST (stack top) 3049 // FMUL ST,$src /* D8 C8+i */ 3050 emit_opcode(cbuf, 0xD8); 3051 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3052 %} 3053 3054 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3055 // FADDP ST,src2 /* D8 C0+i */ 3056 emit_opcode(cbuf, 0xD8); 3057 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3058 //could use FADDP src2,fpST /* DE C0+i */ 3059 %} 3060 3061 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3062 // FADDP src2,ST /* DE C0+i */ 3063 emit_opcode(cbuf, 0xDE); 3064 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3065 %} 3066 3067 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3068 // Operand has been loaded into fp ST (stack top) 3069 // FSUB ST,$src1 3070 emit_opcode(cbuf, 0xD8); 3071 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3072 3073 // FDIV 3074 emit_opcode(cbuf, 0xD8); 3075 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3076 %} 3077 3078 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3079 // Operand was loaded from memory into fp ST (stack top) 3080 // FADD ST,$src /* D8 C0+i */ 3081 emit_opcode(cbuf, 0xD8); 3082 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3083 3084 // FMUL ST,src2 /* D8 C*+i */ 3085 emit_opcode(cbuf, 0xD8); 3086 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3087 %} 3088 3089 3090 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3091 // Operand was loaded from memory into fp ST (stack top) 3092 // FADD ST,$src /* D8 C0+i */ 3093 emit_opcode(cbuf, 0xD8); 3094 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3095 3096 // FMULP src2,ST /* DE C8+i */ 3097 emit_opcode(cbuf, 0xDE); 3098 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3099 %} 3100 3101 // Atomically load the volatile long 3102 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3103 emit_opcode(cbuf,0xDF); 3104 int rm_byte_opcode = 0x05; 3105 int base = $mem$$base; 3106 int index = $mem$$index; 3107 int scale = $mem$$scale; 3108 int displace = $mem$$disp; 3109 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3110 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3111 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3112 %} 3113 3114 // Volatile Store Long. Must be atomic, so move it into 3115 // the FP TOS and then do a 64-bit FIST. Has to probe the 3116 // target address before the store (for null-ptr checks) 3117 // so the memory operand is used twice in the encoding. 3118 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3119 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3120 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3121 emit_opcode(cbuf,0xDF); 3122 int rm_byte_opcode = 0x07; 3123 int base = $mem$$base; 3124 int index = $mem$$index; 3125 int scale = $mem$$scale; 3126 int displace = $mem$$disp; 3127 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3128 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3129 %} 3130 3131 // Safepoint Poll. This polls the safepoint page, and causes an 3132 // exception if it is not readable. Unfortunately, it kills the condition code 3133 // in the process 3134 // We current use TESTL [spp],EDI 3135 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3136 3137 enc_class Safepoint_Poll() %{ 3138 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3139 emit_opcode(cbuf,0x85); 3140 emit_rm (cbuf, 0x0, 0x7, 0x5); 3141 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3142 %} 3143%} 3144 3145 3146//----------FRAME-------------------------------------------------------------- 3147// Definition of frame structure and management information. 3148// 3149// S T A C K L A Y O U T Allocators stack-slot number 3150// | (to get allocators register number 3151// G Owned by | | v add OptoReg::stack0()) 3152// r CALLER | | 3153// o | +--------+ pad to even-align allocators stack-slot 3154// w V | pad0 | numbers; owned by CALLER 3155// t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3156// h ^ | in | 5 3157// | | args | 4 Holes in incoming args owned by SELF 3158// | | | | 3 3159// | | +--------+ 3160// V | | old out| Empty on Intel, window on Sparc 3161// | old |preserve| Must be even aligned. 3162// | SP-+--------+----> Matcher::_old_SP, even aligned 3163// | | in | 3 area for Intel ret address 3164// Owned by |preserve| Empty on Sparc. 3165// SELF +--------+ 3166// | | pad2 | 2 pad to align old SP 3167// | +--------+ 1 3168// | | locks | 0 3169// | +--------+----> OptoReg::stack0(), even aligned 3170// | | pad1 | 11 pad to align new SP 3171// | +--------+ 3172// | | | 10 3173// | | spills | 9 spills 3174// V | | 8 (pad0 slot for callee) 3175// -----------+--------+----> Matcher::_out_arg_limit, unaligned 3176// ^ | out | 7 3177// | | args | 6 Holes in outgoing args owned by CALLEE 3178// Owned by +--------+ 3179// CALLEE | new out| 6 Empty on Intel, window on Sparc 3180// | new |preserve| Must be even-aligned. 3181// | SP-+--------+----> Matcher::_new_SP, even aligned 3182// | | | 3183// 3184// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3185// known from SELF's arguments and the Java calling convention. 3186// Region 6-7 is determined per call site. 3187// Note 2: If the calling convention leaves holes in the incoming argument 3188// area, those holes are owned by SELF. Holes in the outgoing area 3189// are owned by the CALLEE. Holes should not be nessecary in the 3190// incoming area, as the Java calling convention is completely under 3191// the control of the AD file. Doubles can be sorted and packed to 3192// avoid holes. Holes in the outgoing arguments may be nessecary for 3193// varargs C calling conventions. 3194// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3195// even aligned with pad0 as needed. 3196// Region 6 is even aligned. Region 6-7 is NOT even aligned; 3197// region 6-11 is even aligned; it may be padded out more so that 3198// the region from SP to FP meets the minimum stack alignment. 3199 3200frame %{ 3201 // What direction does stack grow in (assumed to be same for C & Java) 3202 stack_direction(TOWARDS_LOW); 3203 3204 // These three registers define part of the calling convention 3205 // between compiled code and the interpreter. 3206 inline_cache_reg(EAX); // Inline Cache Register 3207 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3208 3209 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3210 cisc_spilling_operand_name(indOffset32); 3211 3212 // Number of stack slots consumed by locking an object 3213 sync_stack_slots(1); 3214 3215 // Compiled code's Frame Pointer 3216 frame_pointer(ESP); 3217 // Interpreter stores its frame pointer in a register which is 3218 // stored to the stack by I2CAdaptors. 3219 // I2CAdaptors convert from interpreted java to compiled java. 3220 interpreter_frame_pointer(EBP); 3221 3222 // Stack alignment requirement 3223 // Alignment size in bytes (128-bit -> 16 bytes) 3224 stack_alignment(StackAlignmentInBytes); 3225 3226 // Number of stack slots between incoming argument block and the start of 3227 // a new frame. The PROLOG must add this many slots to the stack. The 3228 // EPILOG must remove this many slots. Intel needs one slot for 3229 // return address and one for rbp, (must save rbp) 3230 in_preserve_stack_slots(2+VerifyStackAtCalls); 3231 3232 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3233 // for calls to C. Supports the var-args backing area for register parms. 3234 varargs_C_out_slots_killed(0); 3235 3236 // The after-PROLOG location of the return address. Location of 3237 // return address specifies a type (REG or STACK) and a number 3238 // representing the register number (i.e. - use a register name) or 3239 // stack slot. 3240 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3241 // Otherwise, it is above the locks and verification slot and alignment word 3242 return_addr(STACK - 1 + 3243 round_to((Compile::current()->in_preserve_stack_slots() + 3244 Compile::current()->fixed_slots()), 3245 stack_alignment_in_slots())); 3246 3247 // Body of function which returns an integer array locating 3248 // arguments either in registers or in stack slots. Passed an array 3249 // of ideal registers called "sig" and a "length" count. Stack-slot 3250 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3251 // arguments for a CALLEE. Incoming stack arguments are 3252 // automatically biased by the preserve_stack_slots field above. 3253 calling_convention %{ 3254 // No difference between ingoing/outgoing just pass false 3255 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3256 %} 3257 3258 3259 // Body of function which returns an integer array locating 3260 // arguments either in registers or in stack slots. Passed an array 3261 // of ideal registers called "sig" and a "length" count. Stack-slot 3262 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3263 // arguments for a CALLEE. Incoming stack arguments are 3264 // automatically biased by the preserve_stack_slots field above. 3265 c_calling_convention %{ 3266 // This is obviously always outgoing 3267 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3268 %} 3269 3270 // Location of C & interpreter return values 3271 c_return_value %{ 3272 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3273 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3274 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3275 3276 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3277 // that C functions return float and double results in XMM0. 3278 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3279 return OptoRegPair(XMM0b_num,XMM0_num); 3280 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3281 return OptoRegPair(OptoReg::Bad,XMM0_num); 3282 3283 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3284 %} 3285 3286 // Location of return values 3287 return_value %{ 3288 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3289 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3290 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3291 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3292 return OptoRegPair(XMM0b_num,XMM0_num); 3293 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3294 return OptoRegPair(OptoReg::Bad,XMM0_num); 3295 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3296 %} 3297 3298%} 3299 3300//----------ATTRIBUTES--------------------------------------------------------- 3301//----------Operand Attributes------------------------------------------------- 3302op_attrib op_cost(0); // Required cost attribute 3303 3304//----------Instruction Attributes--------------------------------------------- 3305ins_attrib ins_cost(100); // Required cost attribute 3306ins_attrib ins_size(8); // Required size attribute (in bits) 3307ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3308 // non-matching short branch variant of some 3309 // long branch? 3310ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3311 // specifies the alignment that some part of the instruction (not 3312 // necessarily the start) requires. If > 1, a compute_padding() 3313 // function must be provided for the instruction 3314 3315//----------OPERANDS----------------------------------------------------------- 3316// Operand definitions must precede instruction definitions for correct parsing 3317// in the ADLC because operands constitute user defined types which are used in 3318// instruction definitions. 3319 3320//----------Simple Operands---------------------------------------------------- 3321// Immediate Operands 3322// Integer Immediate 3323operand immI() %{ 3324 match(ConI); 3325 3326 op_cost(10); 3327 format %{ %} 3328 interface(CONST_INTER); 3329%} 3330 3331// Constant for test vs zero 3332operand immI0() %{ 3333 predicate(n->get_int() == 0); 3334 match(ConI); 3335 3336 op_cost(0); 3337 format %{ %} 3338 interface(CONST_INTER); 3339%} 3340 3341// Constant for increment 3342operand immI1() %{ 3343 predicate(n->get_int() == 1); 3344 match(ConI); 3345 3346 op_cost(0); 3347 format %{ %} 3348 interface(CONST_INTER); 3349%} 3350 3351// Constant for decrement 3352operand immI_M1() %{ 3353 predicate(n->get_int() == -1); 3354 match(ConI); 3355 3356 op_cost(0); 3357 format %{ %} 3358 interface(CONST_INTER); 3359%} 3360 3361// Valid scale values for addressing modes 3362operand immI2() %{ 3363 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3364 match(ConI); 3365 3366 format %{ %} 3367 interface(CONST_INTER); 3368%} 3369 3370operand immI8() %{ 3371 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3372 match(ConI); 3373 3374 op_cost(5); 3375 format %{ %} 3376 interface(CONST_INTER); 3377%} 3378 3379operand immI16() %{ 3380 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3381 match(ConI); 3382 3383 op_cost(10); 3384 format %{ %} 3385 interface(CONST_INTER); 3386%} 3387 3388// Int Immediate non-negative 3389operand immU31() 3390%{ 3391 predicate(n->get_int() >= 0); 3392 match(ConI); 3393 3394 op_cost(0); 3395 format %{ %} 3396 interface(CONST_INTER); 3397%} 3398 3399// Constant for long shifts 3400operand immI_32() %{ 3401 predicate( n->get_int() == 32 ); 3402 match(ConI); 3403 3404 op_cost(0); 3405 format %{ %} 3406 interface(CONST_INTER); 3407%} 3408 3409operand immI_1_31() %{ 3410 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3411 match(ConI); 3412 3413 op_cost(0); 3414 format %{ %} 3415 interface(CONST_INTER); 3416%} 3417 3418operand immI_32_63() %{ 3419 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3420 match(ConI); 3421 op_cost(0); 3422 3423 format %{ %} 3424 interface(CONST_INTER); 3425%} 3426 3427operand immI_1() %{ 3428 predicate( n->get_int() == 1 ); 3429 match(ConI); 3430 3431 op_cost(0); 3432 format %{ %} 3433 interface(CONST_INTER); 3434%} 3435 3436operand immI_2() %{ 3437 predicate( n->get_int() == 2 ); 3438 match(ConI); 3439 3440 op_cost(0); 3441 format %{ %} 3442 interface(CONST_INTER); 3443%} 3444 3445operand immI_3() %{ 3446 predicate( n->get_int() == 3 ); 3447 match(ConI); 3448 3449 op_cost(0); 3450 format %{ %} 3451 interface(CONST_INTER); 3452%} 3453 3454// Pointer Immediate 3455operand immP() %{ 3456 match(ConP); 3457 3458 op_cost(10); 3459 format %{ %} 3460 interface(CONST_INTER); 3461%} 3462 3463// NULL Pointer Immediate 3464operand immP0() %{ 3465 predicate( n->get_ptr() == 0 ); 3466 match(ConP); 3467 op_cost(0); 3468 3469 format %{ %} 3470 interface(CONST_INTER); 3471%} 3472 3473// Long Immediate 3474operand immL() %{ 3475 match(ConL); 3476 3477 op_cost(20); 3478 format %{ %} 3479 interface(CONST_INTER); 3480%} 3481 3482// Long Immediate zero 3483operand immL0() %{ 3484 predicate( n->get_long() == 0L ); 3485 match(ConL); 3486 op_cost(0); 3487 3488 format %{ %} 3489 interface(CONST_INTER); 3490%} 3491 3492// Long Immediate zero 3493operand immL_M1() %{ 3494 predicate( n->get_long() == -1L ); 3495 match(ConL); 3496 op_cost(0); 3497 3498 format %{ %} 3499 interface(CONST_INTER); 3500%} 3501 3502// Long immediate from 0 to 127. 3503// Used for a shorter form of long mul by 10. 3504operand immL_127() %{ 3505 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3506 match(ConL); 3507 op_cost(0); 3508 3509 format %{ %} 3510 interface(CONST_INTER); 3511%} 3512 3513// Long Immediate: low 32-bit mask 3514operand immL_32bits() %{ 3515 predicate(n->get_long() == 0xFFFFFFFFL); 3516 match(ConL); 3517 op_cost(0); 3518 3519 format %{ %} 3520 interface(CONST_INTER); 3521%} 3522 3523// Long Immediate: low 32-bit mask 3524operand immL32() %{ 3525 predicate(n->get_long() == (int)(n->get_long())); 3526 match(ConL); 3527 op_cost(20); 3528 3529 format %{ %} 3530 interface(CONST_INTER); 3531%} 3532 3533//Double Immediate zero 3534operand immDPR0() %{ 3535 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3536 // bug that generates code such that NaNs compare equal to 0.0 3537 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3538 match(ConD); 3539 3540 op_cost(5); 3541 format %{ %} 3542 interface(CONST_INTER); 3543%} 3544 3545// Double Immediate one 3546operand immDPR1() %{ 3547 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3548 match(ConD); 3549 3550 op_cost(5); 3551 format %{ %} 3552 interface(CONST_INTER); 3553%} 3554 3555// Double Immediate 3556operand immDPR() %{ 3557 predicate(UseSSE<=1); 3558 match(ConD); 3559 3560 op_cost(5); 3561 format %{ %} 3562 interface(CONST_INTER); 3563%} 3564 3565operand immD() %{ 3566 predicate(UseSSE>=2); 3567 match(ConD); 3568 3569 op_cost(5); 3570 format %{ %} 3571 interface(CONST_INTER); 3572%} 3573 3574// Double Immediate zero 3575operand immD0() %{ 3576 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3577 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3578 // compare equal to -0.0. 3579 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3580 match(ConD); 3581 3582 format %{ %} 3583 interface(CONST_INTER); 3584%} 3585 3586// Float Immediate zero 3587operand immFPR0() %{ 3588 predicate(UseSSE == 0 && n->getf() == 0.0F); 3589 match(ConF); 3590 3591 op_cost(5); 3592 format %{ %} 3593 interface(CONST_INTER); 3594%} 3595 3596// Float Immediate one 3597operand immFPR1() %{ 3598 predicate(UseSSE == 0 && n->getf() == 1.0F); 3599 match(ConF); 3600 3601 op_cost(5); 3602 format %{ %} 3603 interface(CONST_INTER); 3604%} 3605 3606// Float Immediate 3607operand immFPR() %{ 3608 predicate( UseSSE == 0 ); 3609 match(ConF); 3610 3611 op_cost(5); 3612 format %{ %} 3613 interface(CONST_INTER); 3614%} 3615 3616// Float Immediate 3617operand immF() %{ 3618 predicate(UseSSE >= 1); 3619 match(ConF); 3620 3621 op_cost(5); 3622 format %{ %} 3623 interface(CONST_INTER); 3624%} 3625 3626// Float Immediate zero. Zero and not -0.0 3627operand immF0() %{ 3628 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3629 match(ConF); 3630 3631 op_cost(5); 3632 format %{ %} 3633 interface(CONST_INTER); 3634%} 3635 3636// Immediates for special shifts (sign extend) 3637 3638// Constants for increment 3639operand immI_16() %{ 3640 predicate( n->get_int() == 16 ); 3641 match(ConI); 3642 3643 format %{ %} 3644 interface(CONST_INTER); 3645%} 3646 3647operand immI_24() %{ 3648 predicate( n->get_int() == 24 ); 3649 match(ConI); 3650 3651 format %{ %} 3652 interface(CONST_INTER); 3653%} 3654 3655// Constant for byte-wide masking 3656operand immI_255() %{ 3657 predicate( n->get_int() == 255 ); 3658 match(ConI); 3659 3660 format %{ %} 3661 interface(CONST_INTER); 3662%} 3663 3664// Constant for short-wide masking 3665operand immI_65535() %{ 3666 predicate(n->get_int() == 65535); 3667 match(ConI); 3668 3669 format %{ %} 3670 interface(CONST_INTER); 3671%} 3672 3673// Register Operands 3674// Integer Register 3675operand rRegI() %{ 3676 constraint(ALLOC_IN_RC(int_reg)); 3677 match(RegI); 3678 match(xRegI); 3679 match(eAXRegI); 3680 match(eBXRegI); 3681 match(eCXRegI); 3682 match(eDXRegI); 3683 match(eDIRegI); 3684 match(eSIRegI); 3685 3686 format %{ %} 3687 interface(REG_INTER); 3688%} 3689 3690// Subset of Integer Register 3691operand xRegI(rRegI reg) %{ 3692 constraint(ALLOC_IN_RC(int_x_reg)); 3693 match(reg); 3694 match(eAXRegI); 3695 match(eBXRegI); 3696 match(eCXRegI); 3697 match(eDXRegI); 3698 3699 format %{ %} 3700 interface(REG_INTER); 3701%} 3702 3703// Special Registers 3704operand eAXRegI(xRegI reg) %{ 3705 constraint(ALLOC_IN_RC(eax_reg)); 3706 match(reg); 3707 match(rRegI); 3708 3709 format %{ "EAX" %} 3710 interface(REG_INTER); 3711%} 3712 3713// Special Registers 3714operand eBXRegI(xRegI reg) %{ 3715 constraint(ALLOC_IN_RC(ebx_reg)); 3716 match(reg); 3717 match(rRegI); 3718 3719 format %{ "EBX" %} 3720 interface(REG_INTER); 3721%} 3722 3723operand eCXRegI(xRegI reg) %{ 3724 constraint(ALLOC_IN_RC(ecx_reg)); 3725 match(reg); 3726 match(rRegI); 3727 3728 format %{ "ECX" %} 3729 interface(REG_INTER); 3730%} 3731 3732operand eDXRegI(xRegI reg) %{ 3733 constraint(ALLOC_IN_RC(edx_reg)); 3734 match(reg); 3735 match(rRegI); 3736 3737 format %{ "EDX" %} 3738 interface(REG_INTER); 3739%} 3740 3741operand eDIRegI(xRegI reg) %{ 3742 constraint(ALLOC_IN_RC(edi_reg)); 3743 match(reg); 3744 match(rRegI); 3745 3746 format %{ "EDI" %} 3747 interface(REG_INTER); 3748%} 3749 3750operand naxRegI() %{ 3751 constraint(ALLOC_IN_RC(nax_reg)); 3752 match(RegI); 3753 match(eCXRegI); 3754 match(eDXRegI); 3755 match(eSIRegI); 3756 match(eDIRegI); 3757 3758 format %{ %} 3759 interface(REG_INTER); 3760%} 3761 3762operand nadxRegI() %{ 3763 constraint(ALLOC_IN_RC(nadx_reg)); 3764 match(RegI); 3765 match(eBXRegI); 3766 match(eCXRegI); 3767 match(eSIRegI); 3768 match(eDIRegI); 3769 3770 format %{ %} 3771 interface(REG_INTER); 3772%} 3773 3774operand ncxRegI() %{ 3775 constraint(ALLOC_IN_RC(ncx_reg)); 3776 match(RegI); 3777 match(eAXRegI); 3778 match(eDXRegI); 3779 match(eSIRegI); 3780 match(eDIRegI); 3781 3782 format %{ %} 3783 interface(REG_INTER); 3784%} 3785 3786// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3787// // 3788operand eSIRegI(xRegI reg) %{ 3789 constraint(ALLOC_IN_RC(esi_reg)); 3790 match(reg); 3791 match(rRegI); 3792 3793 format %{ "ESI" %} 3794 interface(REG_INTER); 3795%} 3796 3797// Pointer Register 3798operand anyRegP() %{ 3799 constraint(ALLOC_IN_RC(any_reg)); 3800 match(RegP); 3801 match(eAXRegP); 3802 match(eBXRegP); 3803 match(eCXRegP); 3804 match(eDIRegP); 3805 match(eRegP); 3806 3807 format %{ %} 3808 interface(REG_INTER); 3809%} 3810 3811operand eRegP() %{ 3812 constraint(ALLOC_IN_RC(int_reg)); 3813 match(RegP); 3814 match(eAXRegP); 3815 match(eBXRegP); 3816 match(eCXRegP); 3817 match(eDIRegP); 3818 3819 format %{ %} 3820 interface(REG_INTER); 3821%} 3822 3823// On windows95, EBP is not safe to use for implicit null tests. 3824operand eRegP_no_EBP() %{ 3825 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3826 match(RegP); 3827 match(eAXRegP); 3828 match(eBXRegP); 3829 match(eCXRegP); 3830 match(eDIRegP); 3831 3832 op_cost(100); 3833 format %{ %} 3834 interface(REG_INTER); 3835%} 3836 3837operand naxRegP() %{ 3838 constraint(ALLOC_IN_RC(nax_reg)); 3839 match(RegP); 3840 match(eBXRegP); 3841 match(eDXRegP); 3842 match(eCXRegP); 3843 match(eSIRegP); 3844 match(eDIRegP); 3845 3846 format %{ %} 3847 interface(REG_INTER); 3848%} 3849 3850operand nabxRegP() %{ 3851 constraint(ALLOC_IN_RC(nabx_reg)); 3852 match(RegP); 3853 match(eCXRegP); 3854 match(eDXRegP); 3855 match(eSIRegP); 3856 match(eDIRegP); 3857 3858 format %{ %} 3859 interface(REG_INTER); 3860%} 3861 3862operand pRegP() %{ 3863 constraint(ALLOC_IN_RC(p_reg)); 3864 match(RegP); 3865 match(eBXRegP); 3866 match(eDXRegP); 3867 match(eSIRegP); 3868 match(eDIRegP); 3869 3870 format %{ %} 3871 interface(REG_INTER); 3872%} 3873 3874// Special Registers 3875// Return a pointer value 3876operand eAXRegP(eRegP reg) %{ 3877 constraint(ALLOC_IN_RC(eax_reg)); 3878 match(reg); 3879 format %{ "EAX" %} 3880 interface(REG_INTER); 3881%} 3882 3883// Used in AtomicAdd 3884operand eBXRegP(eRegP reg) %{ 3885 constraint(ALLOC_IN_RC(ebx_reg)); 3886 match(reg); 3887 format %{ "EBX" %} 3888 interface(REG_INTER); 3889%} 3890 3891// Tail-call (interprocedural jump) to interpreter 3892operand eCXRegP(eRegP reg) %{ 3893 constraint(ALLOC_IN_RC(ecx_reg)); 3894 match(reg); 3895 format %{ "ECX" %} 3896 interface(REG_INTER); 3897%} 3898 3899operand eSIRegP(eRegP reg) %{ 3900 constraint(ALLOC_IN_RC(esi_reg)); 3901 match(reg); 3902 format %{ "ESI" %} 3903 interface(REG_INTER); 3904%} 3905 3906// Used in rep stosw 3907operand eDIRegP(eRegP reg) %{ 3908 constraint(ALLOC_IN_RC(edi_reg)); 3909 match(reg); 3910 format %{ "EDI" %} 3911 interface(REG_INTER); 3912%} 3913 3914operand eRegL() %{ 3915 constraint(ALLOC_IN_RC(long_reg)); 3916 match(RegL); 3917 match(eADXRegL); 3918 3919 format %{ %} 3920 interface(REG_INTER); 3921%} 3922 3923operand eADXRegL( eRegL reg ) %{ 3924 constraint(ALLOC_IN_RC(eadx_reg)); 3925 match(reg); 3926 3927 format %{ "EDX:EAX" %} 3928 interface(REG_INTER); 3929%} 3930 3931operand eBCXRegL( eRegL reg ) %{ 3932 constraint(ALLOC_IN_RC(ebcx_reg)); 3933 match(reg); 3934 3935 format %{ "EBX:ECX" %} 3936 interface(REG_INTER); 3937%} 3938 3939// Special case for integer high multiply 3940operand eADXRegL_low_only() %{ 3941 constraint(ALLOC_IN_RC(eadx_reg)); 3942 match(RegL); 3943 3944 format %{ "EAX" %} 3945 interface(REG_INTER); 3946%} 3947 3948// Flags register, used as output of compare instructions 3949operand eFlagsReg() %{ 3950 constraint(ALLOC_IN_RC(int_flags)); 3951 match(RegFlags); 3952 3953 format %{ "EFLAGS" %} 3954 interface(REG_INTER); 3955%} 3956 3957// Flags register, used as output of FLOATING POINT compare instructions 3958operand eFlagsRegU() %{ 3959 constraint(ALLOC_IN_RC(int_flags)); 3960 match(RegFlags); 3961 3962 format %{ "EFLAGS_U" %} 3963 interface(REG_INTER); 3964%} 3965 3966operand eFlagsRegUCF() %{ 3967 constraint(ALLOC_IN_RC(int_flags)); 3968 match(RegFlags); 3969 predicate(false); 3970 3971 format %{ "EFLAGS_U_CF" %} 3972 interface(REG_INTER); 3973%} 3974 3975// Condition Code Register used by long compare 3976operand flagsReg_long_LTGE() %{ 3977 constraint(ALLOC_IN_RC(int_flags)); 3978 match(RegFlags); 3979 format %{ "FLAGS_LTGE" %} 3980 interface(REG_INTER); 3981%} 3982operand flagsReg_long_EQNE() %{ 3983 constraint(ALLOC_IN_RC(int_flags)); 3984 match(RegFlags); 3985 format %{ "FLAGS_EQNE" %} 3986 interface(REG_INTER); 3987%} 3988operand flagsReg_long_LEGT() %{ 3989 constraint(ALLOC_IN_RC(int_flags)); 3990 match(RegFlags); 3991 format %{ "FLAGS_LEGT" %} 3992 interface(REG_INTER); 3993%} 3994 3995// Float register operands 3996operand regDPR() %{ 3997 predicate( UseSSE < 2 ); 3998 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3999 match(RegD); 4000 match(regDPR1); 4001 match(regDPR2); 4002 format %{ %} 4003 interface(REG_INTER); 4004%} 4005 4006operand regDPR1(regDPR reg) %{ 4007 predicate( UseSSE < 2 ); 4008 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4009 match(reg); 4010 format %{ "FPR1" %} 4011 interface(REG_INTER); 4012%} 4013 4014operand regDPR2(regDPR reg) %{ 4015 predicate( UseSSE < 2 ); 4016 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4017 match(reg); 4018 format %{ "FPR2" %} 4019 interface(REG_INTER); 4020%} 4021 4022operand regnotDPR1(regDPR reg) %{ 4023 predicate( UseSSE < 2 ); 4024 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4025 match(reg); 4026 format %{ %} 4027 interface(REG_INTER); 4028%} 4029 4030// Float register operands 4031operand regFPR() %{ 4032 predicate( UseSSE < 2 ); 4033 constraint(ALLOC_IN_RC(fp_flt_reg)); 4034 match(RegF); 4035 match(regFPR1); 4036 format %{ %} 4037 interface(REG_INTER); 4038%} 4039 4040// Float register operands 4041operand regFPR1(regFPR reg) %{ 4042 predicate( UseSSE < 2 ); 4043 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4044 match(reg); 4045 format %{ "FPR1" %} 4046 interface(REG_INTER); 4047%} 4048 4049// XMM Float register operands 4050operand regF() %{ 4051 predicate( UseSSE>=1 ); 4052 constraint(ALLOC_IN_RC(float_reg_legacy)); 4053 match(RegF); 4054 format %{ %} 4055 interface(REG_INTER); 4056%} 4057 4058// XMM Double register operands 4059operand regD() %{ 4060 predicate( UseSSE>=2 ); 4061 constraint(ALLOC_IN_RC(double_reg_legacy)); 4062 match(RegD); 4063 format %{ %} 4064 interface(REG_INTER); 4065%} 4066 4067// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4068// runtime code generation via reg_class_dynamic. 4069operand vecS() %{ 4070 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4071 match(VecS); 4072 4073 format %{ %} 4074 interface(REG_INTER); 4075%} 4076 4077operand vecD() %{ 4078 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4079 match(VecD); 4080 4081 format %{ %} 4082 interface(REG_INTER); 4083%} 4084 4085operand vecX() %{ 4086 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4087 match(VecX); 4088 4089 format %{ %} 4090 interface(REG_INTER); 4091%} 4092 4093operand vecY() %{ 4094 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4095 match(VecY); 4096 4097 format %{ %} 4098 interface(REG_INTER); 4099%} 4100 4101//----------Memory Operands---------------------------------------------------- 4102// Direct Memory Operand 4103operand direct(immP addr) %{ 4104 match(addr); 4105 4106 format %{ "[$addr]" %} 4107 interface(MEMORY_INTER) %{ 4108 base(0xFFFFFFFF); 4109 index(0x4); 4110 scale(0x0); 4111 disp($addr); 4112 %} 4113%} 4114 4115// Indirect Memory Operand 4116operand indirect(eRegP reg) %{ 4117 constraint(ALLOC_IN_RC(int_reg)); 4118 match(reg); 4119 4120 format %{ "[$reg]" %} 4121 interface(MEMORY_INTER) %{ 4122 base($reg); 4123 index(0x4); 4124 scale(0x0); 4125 disp(0x0); 4126 %} 4127%} 4128 4129// Indirect Memory Plus Short Offset Operand 4130operand indOffset8(eRegP reg, immI8 off) %{ 4131 match(AddP reg off); 4132 4133 format %{ "[$reg + $off]" %} 4134 interface(MEMORY_INTER) %{ 4135 base($reg); 4136 index(0x4); 4137 scale(0x0); 4138 disp($off); 4139 %} 4140%} 4141 4142// Indirect Memory Plus Long Offset Operand 4143operand indOffset32(eRegP reg, immI off) %{ 4144 match(AddP reg off); 4145 4146 format %{ "[$reg + $off]" %} 4147 interface(MEMORY_INTER) %{ 4148 base($reg); 4149 index(0x4); 4150 scale(0x0); 4151 disp($off); 4152 %} 4153%} 4154 4155// Indirect Memory Plus Long Offset Operand 4156operand indOffset32X(rRegI reg, immP off) %{ 4157 match(AddP off reg); 4158 4159 format %{ "[$reg + $off]" %} 4160 interface(MEMORY_INTER) %{ 4161 base($reg); 4162 index(0x4); 4163 scale(0x0); 4164 disp($off); 4165 %} 4166%} 4167 4168// Indirect Memory Plus Index Register Plus Offset Operand 4169operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4170 match(AddP (AddP reg ireg) off); 4171 4172 op_cost(10); 4173 format %{"[$reg + $off + $ireg]" %} 4174 interface(MEMORY_INTER) %{ 4175 base($reg); 4176 index($ireg); 4177 scale(0x0); 4178 disp($off); 4179 %} 4180%} 4181 4182// Indirect Memory Plus Index Register Plus Offset Operand 4183operand indIndex(eRegP reg, rRegI ireg) %{ 4184 match(AddP reg ireg); 4185 4186 op_cost(10); 4187 format %{"[$reg + $ireg]" %} 4188 interface(MEMORY_INTER) %{ 4189 base($reg); 4190 index($ireg); 4191 scale(0x0); 4192 disp(0x0); 4193 %} 4194%} 4195 4196// // ------------------------------------------------------------------------- 4197// // 486 architecture doesn't support "scale * index + offset" with out a base 4198// // ------------------------------------------------------------------------- 4199// // Scaled Memory Operands 4200// // Indirect Memory Times Scale Plus Offset Operand 4201// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4202// match(AddP off (LShiftI ireg scale)); 4203// 4204// op_cost(10); 4205// format %{"[$off + $ireg << $scale]" %} 4206// interface(MEMORY_INTER) %{ 4207// base(0x4); 4208// index($ireg); 4209// scale($scale); 4210// disp($off); 4211// %} 4212// %} 4213 4214// Indirect Memory Times Scale Plus Index Register 4215operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4216 match(AddP reg (LShiftI ireg scale)); 4217 4218 op_cost(10); 4219 format %{"[$reg + $ireg << $scale]" %} 4220 interface(MEMORY_INTER) %{ 4221 base($reg); 4222 index($ireg); 4223 scale($scale); 4224 disp(0x0); 4225 %} 4226%} 4227 4228// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4229operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4230 match(AddP (AddP reg (LShiftI ireg scale)) off); 4231 4232 op_cost(10); 4233 format %{"[$reg + $off + $ireg << $scale]" %} 4234 interface(MEMORY_INTER) %{ 4235 base($reg); 4236 index($ireg); 4237 scale($scale); 4238 disp($off); 4239 %} 4240%} 4241 4242//----------Load Long Memory Operands------------------------------------------ 4243// The load-long idiom will use it's address expression again after loading 4244// the first word of the long. If the load-long destination overlaps with 4245// registers used in the addressing expression, the 2nd half will be loaded 4246// from a clobbered address. Fix this by requiring that load-long use 4247// address registers that do not overlap with the load-long target. 4248 4249// load-long support 4250operand load_long_RegP() %{ 4251 constraint(ALLOC_IN_RC(esi_reg)); 4252 match(RegP); 4253 match(eSIRegP); 4254 op_cost(100); 4255 format %{ %} 4256 interface(REG_INTER); 4257%} 4258 4259// Indirect Memory Operand Long 4260operand load_long_indirect(load_long_RegP reg) %{ 4261 constraint(ALLOC_IN_RC(esi_reg)); 4262 match(reg); 4263 4264 format %{ "[$reg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base($reg); 4267 index(0x4); 4268 scale(0x0); 4269 disp(0x0); 4270 %} 4271%} 4272 4273// Indirect Memory Plus Long Offset Operand 4274operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4275 match(AddP reg off); 4276 4277 format %{ "[$reg + $off]" %} 4278 interface(MEMORY_INTER) %{ 4279 base($reg); 4280 index(0x4); 4281 scale(0x0); 4282 disp($off); 4283 %} 4284%} 4285 4286opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4287 4288 4289//----------Special Memory Operands-------------------------------------------- 4290// Stack Slot Operand - This operand is used for loading and storing temporary 4291// values on the stack where a match requires a value to 4292// flow through memory. 4293operand stackSlotP(sRegP reg) %{ 4294 constraint(ALLOC_IN_RC(stack_slots)); 4295 // No match rule because this operand is only generated in matching 4296 format %{ "[$reg]" %} 4297 interface(MEMORY_INTER) %{ 4298 base(0x4); // ESP 4299 index(0x4); // No Index 4300 scale(0x0); // No Scale 4301 disp($reg); // Stack Offset 4302 %} 4303%} 4304 4305operand stackSlotI(sRegI reg) %{ 4306 constraint(ALLOC_IN_RC(stack_slots)); 4307 // No match rule because this operand is only generated in matching 4308 format %{ "[$reg]" %} 4309 interface(MEMORY_INTER) %{ 4310 base(0x4); // ESP 4311 index(0x4); // No Index 4312 scale(0x0); // No Scale 4313 disp($reg); // Stack Offset 4314 %} 4315%} 4316 4317operand stackSlotF(sRegF reg) %{ 4318 constraint(ALLOC_IN_RC(stack_slots)); 4319 // No match rule because this operand is only generated in matching 4320 format %{ "[$reg]" %} 4321 interface(MEMORY_INTER) %{ 4322 base(0x4); // ESP 4323 index(0x4); // No Index 4324 scale(0x0); // No Scale 4325 disp($reg); // Stack Offset 4326 %} 4327%} 4328 4329operand stackSlotD(sRegD reg) %{ 4330 constraint(ALLOC_IN_RC(stack_slots)); 4331 // No match rule because this operand is only generated in matching 4332 format %{ "[$reg]" %} 4333 interface(MEMORY_INTER) %{ 4334 base(0x4); // ESP 4335 index(0x4); // No Index 4336 scale(0x0); // No Scale 4337 disp($reg); // Stack Offset 4338 %} 4339%} 4340 4341operand stackSlotL(sRegL reg) %{ 4342 constraint(ALLOC_IN_RC(stack_slots)); 4343 // No match rule because this operand is only generated in matching 4344 format %{ "[$reg]" %} 4345 interface(MEMORY_INTER) %{ 4346 base(0x4); // ESP 4347 index(0x4); // No Index 4348 scale(0x0); // No Scale 4349 disp($reg); // Stack Offset 4350 %} 4351%} 4352 4353//----------Memory Operands - Win95 Implicit Null Variants---------------- 4354// Indirect Memory Operand 4355operand indirect_win95_safe(eRegP_no_EBP reg) 4356%{ 4357 constraint(ALLOC_IN_RC(int_reg)); 4358 match(reg); 4359 4360 op_cost(100); 4361 format %{ "[$reg]" %} 4362 interface(MEMORY_INTER) %{ 4363 base($reg); 4364 index(0x4); 4365 scale(0x0); 4366 disp(0x0); 4367 %} 4368%} 4369 4370// Indirect Memory Plus Short Offset Operand 4371operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4372%{ 4373 match(AddP reg off); 4374 4375 op_cost(100); 4376 format %{ "[$reg + $off]" %} 4377 interface(MEMORY_INTER) %{ 4378 base($reg); 4379 index(0x4); 4380 scale(0x0); 4381 disp($off); 4382 %} 4383%} 4384 4385// Indirect Memory Plus Long Offset Operand 4386operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4387%{ 4388 match(AddP reg off); 4389 4390 op_cost(100); 4391 format %{ "[$reg + $off]" %} 4392 interface(MEMORY_INTER) %{ 4393 base($reg); 4394 index(0x4); 4395 scale(0x0); 4396 disp($off); 4397 %} 4398%} 4399 4400// Indirect Memory Plus Index Register Plus Offset Operand 4401operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4402%{ 4403 match(AddP (AddP reg ireg) off); 4404 4405 op_cost(100); 4406 format %{"[$reg + $off + $ireg]" %} 4407 interface(MEMORY_INTER) %{ 4408 base($reg); 4409 index($ireg); 4410 scale(0x0); 4411 disp($off); 4412 %} 4413%} 4414 4415// Indirect Memory Times Scale Plus Index Register 4416operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4417%{ 4418 match(AddP reg (LShiftI ireg scale)); 4419 4420 op_cost(100); 4421 format %{"[$reg + $ireg << $scale]" %} 4422 interface(MEMORY_INTER) %{ 4423 base($reg); 4424 index($ireg); 4425 scale($scale); 4426 disp(0x0); 4427 %} 4428%} 4429 4430// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4431operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4432%{ 4433 match(AddP (AddP reg (LShiftI ireg scale)) off); 4434 4435 op_cost(100); 4436 format %{"[$reg + $off + $ireg << $scale]" %} 4437 interface(MEMORY_INTER) %{ 4438 base($reg); 4439 index($ireg); 4440 scale($scale); 4441 disp($off); 4442 %} 4443%} 4444 4445//----------Conditional Branch Operands---------------------------------------- 4446// Comparison Op - This is the operation of the comparison, and is limited to 4447// the following set of codes: 4448// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4449// 4450// Other attributes of the comparison, such as unsignedness, are specified 4451// by the comparison instruction that sets a condition code flags register. 4452// That result is represented by a flags operand whose subtype is appropriate 4453// to the unsignedness (etc.) of the comparison. 4454// 4455// Later, the instruction which matches both the Comparison Op (a Bool) and 4456// the flags (produced by the Cmp) specifies the coding of the comparison op 4457// by matching a specific subtype of Bool operand below, such as cmpOpU. 4458 4459// Comparision Code 4460operand cmpOp() %{ 4461 match(Bool); 4462 4463 format %{ "" %} 4464 interface(COND_INTER) %{ 4465 equal(0x4, "e"); 4466 not_equal(0x5, "ne"); 4467 less(0xC, "l"); 4468 greater_equal(0xD, "ge"); 4469 less_equal(0xE, "le"); 4470 greater(0xF, "g"); 4471 overflow(0x0, "o"); 4472 no_overflow(0x1, "no"); 4473 %} 4474%} 4475 4476// Comparison Code, unsigned compare. Used by FP also, with 4477// C2 (unordered) turned into GT or LT already. The other bits 4478// C0 and C3 are turned into Carry & Zero flags. 4479operand cmpOpU() %{ 4480 match(Bool); 4481 4482 format %{ "" %} 4483 interface(COND_INTER) %{ 4484 equal(0x4, "e"); 4485 not_equal(0x5, "ne"); 4486 less(0x2, "b"); 4487 greater_equal(0x3, "nb"); 4488 less_equal(0x6, "be"); 4489 greater(0x7, "nbe"); 4490 overflow(0x0, "o"); 4491 no_overflow(0x1, "no"); 4492 %} 4493%} 4494 4495// Floating comparisons that don't require any fixup for the unordered case 4496operand cmpOpUCF() %{ 4497 match(Bool); 4498 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4499 n->as_Bool()->_test._test == BoolTest::ge || 4500 n->as_Bool()->_test._test == BoolTest::le || 4501 n->as_Bool()->_test._test == BoolTest::gt); 4502 format %{ "" %} 4503 interface(COND_INTER) %{ 4504 equal(0x4, "e"); 4505 not_equal(0x5, "ne"); 4506 less(0x2, "b"); 4507 greater_equal(0x3, "nb"); 4508 less_equal(0x6, "be"); 4509 greater(0x7, "nbe"); 4510 overflow(0x0, "o"); 4511 no_overflow(0x1, "no"); 4512 %} 4513%} 4514 4515 4516// Floating comparisons that can be fixed up with extra conditional jumps 4517operand cmpOpUCF2() %{ 4518 match(Bool); 4519 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4520 n->as_Bool()->_test._test == BoolTest::eq); 4521 format %{ "" %} 4522 interface(COND_INTER) %{ 4523 equal(0x4, "e"); 4524 not_equal(0x5, "ne"); 4525 less(0x2, "b"); 4526 greater_equal(0x3, "nb"); 4527 less_equal(0x6, "be"); 4528 greater(0x7, "nbe"); 4529 overflow(0x0, "o"); 4530 no_overflow(0x1, "no"); 4531 %} 4532%} 4533 4534// Comparison Code for FP conditional move 4535operand cmpOp_fcmov() %{ 4536 match(Bool); 4537 4538 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4539 n->as_Bool()->_test._test != BoolTest::no_overflow); 4540 format %{ "" %} 4541 interface(COND_INTER) %{ 4542 equal (0x0C8); 4543 not_equal (0x1C8); 4544 less (0x0C0); 4545 greater_equal(0x1C0); 4546 less_equal (0x0D0); 4547 greater (0x1D0); 4548 overflow(0x0, "o"); // not really supported by the instruction 4549 no_overflow(0x1, "no"); // not really supported by the instruction 4550 %} 4551%} 4552 4553// Comparision Code used in long compares 4554operand cmpOp_commute() %{ 4555 match(Bool); 4556 4557 format %{ "" %} 4558 interface(COND_INTER) %{ 4559 equal(0x4, "e"); 4560 not_equal(0x5, "ne"); 4561 less(0xF, "g"); 4562 greater_equal(0xE, "le"); 4563 less_equal(0xD, "ge"); 4564 greater(0xC, "l"); 4565 overflow(0x0, "o"); 4566 no_overflow(0x1, "no"); 4567 %} 4568%} 4569 4570//----------OPERAND CLASSES---------------------------------------------------- 4571// Operand Classes are groups of operands that are used as to simplify 4572// instruction definitions by not requiring the AD writer to specify separate 4573// instructions for every form of operand when the instruction accepts 4574// multiple operand types with the same basic encoding and format. The classic 4575// case of this is memory operands. 4576 4577opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4578 indIndex, indIndexScale, indIndexScaleOffset); 4579 4580// Long memory operations are encoded in 2 instructions and a +4 offset. 4581// This means some kind of offset is always required and you cannot use 4582// an oop as the offset (done when working on static globals). 4583opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4584 indIndex, indIndexScale, indIndexScaleOffset); 4585 4586 4587//----------PIPELINE----------------------------------------------------------- 4588// Rules which define the behavior of the target architectures pipeline. 4589pipeline %{ 4590 4591//----------ATTRIBUTES--------------------------------------------------------- 4592attributes %{ 4593 variable_size_instructions; // Fixed size instructions 4594 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4595 instruction_unit_size = 1; // An instruction is 1 bytes long 4596 instruction_fetch_unit_size = 16; // The processor fetches one line 4597 instruction_fetch_units = 1; // of 16 bytes 4598 4599 // List of nop instructions 4600 nops( MachNop ); 4601%} 4602 4603//----------RESOURCES---------------------------------------------------------- 4604// Resources are the functional units available to the machine 4605 4606// Generic P2/P3 pipeline 4607// 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4608// 3 instructions decoded per cycle. 4609// 2 load/store ops per cycle, 1 branch, 1 FPU, 4610// 2 ALU op, only ALU0 handles mul/div instructions. 4611resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4612 MS0, MS1, MEM = MS0 | MS1, 4613 BR, FPU, 4614 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4615 4616//----------PIPELINE DESCRIPTION----------------------------------------------- 4617// Pipeline Description specifies the stages in the machine's pipeline 4618 4619// Generic P2/P3 pipeline 4620pipe_desc(S0, S1, S2, S3, S4, S5); 4621 4622//----------PIPELINE CLASSES--------------------------------------------------- 4623// Pipeline Classes describe the stages in which input and output are 4624// referenced by the hardware pipeline. 4625 4626// Naming convention: ialu or fpu 4627// Then: _reg 4628// Then: _reg if there is a 2nd register 4629// Then: _long if it's a pair of instructions implementing a long 4630// Then: _fat if it requires the big decoder 4631// Or: _mem if it requires the big decoder and a memory unit. 4632 4633// Integer ALU reg operation 4634pipe_class ialu_reg(rRegI dst) %{ 4635 single_instruction; 4636 dst : S4(write); 4637 dst : S3(read); 4638 DECODE : S0; // any decoder 4639 ALU : S3; // any alu 4640%} 4641 4642// Long ALU reg operation 4643pipe_class ialu_reg_long(eRegL dst) %{ 4644 instruction_count(2); 4645 dst : S4(write); 4646 dst : S3(read); 4647 DECODE : S0(2); // any 2 decoders 4648 ALU : S3(2); // both alus 4649%} 4650 4651// Integer ALU reg operation using big decoder 4652pipe_class ialu_reg_fat(rRegI dst) %{ 4653 single_instruction; 4654 dst : S4(write); 4655 dst : S3(read); 4656 D0 : S0; // big decoder only 4657 ALU : S3; // any alu 4658%} 4659 4660// Long ALU reg operation using big decoder 4661pipe_class ialu_reg_long_fat(eRegL dst) %{ 4662 instruction_count(2); 4663 dst : S4(write); 4664 dst : S3(read); 4665 D0 : S0(2); // big decoder only; twice 4666 ALU : S3(2); // any 2 alus 4667%} 4668 4669// Integer ALU reg-reg operation 4670pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4671 single_instruction; 4672 dst : S4(write); 4673 src : S3(read); 4674 DECODE : S0; // any decoder 4675 ALU : S3; // any alu 4676%} 4677 4678// Long ALU reg-reg operation 4679pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4680 instruction_count(2); 4681 dst : S4(write); 4682 src : S3(read); 4683 DECODE : S0(2); // any 2 decoders 4684 ALU : S3(2); // both alus 4685%} 4686 4687// Integer ALU reg-reg operation 4688pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4689 single_instruction; 4690 dst : S4(write); 4691 src : S3(read); 4692 D0 : S0; // big decoder only 4693 ALU : S3; // any alu 4694%} 4695 4696// Long ALU reg-reg operation 4697pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4698 instruction_count(2); 4699 dst : S4(write); 4700 src : S3(read); 4701 D0 : S0(2); // big decoder only; twice 4702 ALU : S3(2); // both alus 4703%} 4704 4705// Integer ALU reg-mem operation 4706pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4707 single_instruction; 4708 dst : S5(write); 4709 mem : S3(read); 4710 D0 : S0; // big decoder only 4711 ALU : S4; // any alu 4712 MEM : S3; // any mem 4713%} 4714 4715// Long ALU reg-mem operation 4716pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4717 instruction_count(2); 4718 dst : S5(write); 4719 mem : S3(read); 4720 D0 : S0(2); // big decoder only; twice 4721 ALU : S4(2); // any 2 alus 4722 MEM : S3(2); // both mems 4723%} 4724 4725// Integer mem operation (prefetch) 4726pipe_class ialu_mem(memory mem) 4727%{ 4728 single_instruction; 4729 mem : S3(read); 4730 D0 : S0; // big decoder only 4731 MEM : S3; // any mem 4732%} 4733 4734// Integer Store to Memory 4735pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4736 single_instruction; 4737 mem : S3(read); 4738 src : S5(read); 4739 D0 : S0; // big decoder only 4740 ALU : S4; // any alu 4741 MEM : S3; 4742%} 4743 4744// Long Store to Memory 4745pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4746 instruction_count(2); 4747 mem : S3(read); 4748 src : S5(read); 4749 D0 : S0(2); // big decoder only; twice 4750 ALU : S4(2); // any 2 alus 4751 MEM : S3(2); // Both mems 4752%} 4753 4754// Integer Store to Memory 4755pipe_class ialu_mem_imm(memory mem) %{ 4756 single_instruction; 4757 mem : S3(read); 4758 D0 : S0; // big decoder only 4759 ALU : S4; // any alu 4760 MEM : S3; 4761%} 4762 4763// Integer ALU0 reg-reg operation 4764pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4765 single_instruction; 4766 dst : S4(write); 4767 src : S3(read); 4768 D0 : S0; // Big decoder only 4769 ALU0 : S3; // only alu0 4770%} 4771 4772// Integer ALU0 reg-mem operation 4773pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4774 single_instruction; 4775 dst : S5(write); 4776 mem : S3(read); 4777 D0 : S0; // big decoder only 4778 ALU0 : S4; // ALU0 only 4779 MEM : S3; // any mem 4780%} 4781 4782// Integer ALU reg-reg operation 4783pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4784 single_instruction; 4785 cr : S4(write); 4786 src1 : S3(read); 4787 src2 : S3(read); 4788 DECODE : S0; // any decoder 4789 ALU : S3; // any alu 4790%} 4791 4792// Integer ALU reg-imm operation 4793pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4794 single_instruction; 4795 cr : S4(write); 4796 src1 : S3(read); 4797 DECODE : S0; // any decoder 4798 ALU : S3; // any alu 4799%} 4800 4801// Integer ALU reg-mem operation 4802pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4803 single_instruction; 4804 cr : S4(write); 4805 src1 : S3(read); 4806 src2 : S3(read); 4807 D0 : S0; // big decoder only 4808 ALU : S4; // any alu 4809 MEM : S3; 4810%} 4811 4812// Conditional move reg-reg 4813pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4814 instruction_count(4); 4815 y : S4(read); 4816 q : S3(read); 4817 p : S3(read); 4818 DECODE : S0(4); // any decoder 4819%} 4820 4821// Conditional move reg-reg 4822pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4823 single_instruction; 4824 dst : S4(write); 4825 src : S3(read); 4826 cr : S3(read); 4827 DECODE : S0; // any decoder 4828%} 4829 4830// Conditional move reg-mem 4831pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4832 single_instruction; 4833 dst : S4(write); 4834 src : S3(read); 4835 cr : S3(read); 4836 DECODE : S0; // any decoder 4837 MEM : S3; 4838%} 4839 4840// Conditional move reg-reg long 4841pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4842 single_instruction; 4843 dst : S4(write); 4844 src : S3(read); 4845 cr : S3(read); 4846 DECODE : S0(2); // any 2 decoders 4847%} 4848 4849// Conditional move double reg-reg 4850pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4851 single_instruction; 4852 dst : S4(write); 4853 src : S3(read); 4854 cr : S3(read); 4855 DECODE : S0; // any decoder 4856%} 4857 4858// Float reg-reg operation 4859pipe_class fpu_reg(regDPR dst) %{ 4860 instruction_count(2); 4861 dst : S3(read); 4862 DECODE : S0(2); // any 2 decoders 4863 FPU : S3; 4864%} 4865 4866// Float reg-reg operation 4867pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4868 instruction_count(2); 4869 dst : S4(write); 4870 src : S3(read); 4871 DECODE : S0(2); // any 2 decoders 4872 FPU : S3; 4873%} 4874 4875// Float reg-reg operation 4876pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4877 instruction_count(3); 4878 dst : S4(write); 4879 src1 : S3(read); 4880 src2 : S3(read); 4881 DECODE : S0(3); // any 3 decoders 4882 FPU : S3(2); 4883%} 4884 4885// Float reg-reg operation 4886pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4887 instruction_count(4); 4888 dst : S4(write); 4889 src1 : S3(read); 4890 src2 : S3(read); 4891 src3 : S3(read); 4892 DECODE : S0(4); // any 3 decoders 4893 FPU : S3(2); 4894%} 4895 4896// Float reg-reg operation 4897pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4898 instruction_count(4); 4899 dst : S4(write); 4900 src1 : S3(read); 4901 src2 : S3(read); 4902 src3 : S3(read); 4903 DECODE : S1(3); // any 3 decoders 4904 D0 : S0; // Big decoder only 4905 FPU : S3(2); 4906 MEM : S3; 4907%} 4908 4909// Float reg-mem operation 4910pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4911 instruction_count(2); 4912 dst : S5(write); 4913 mem : S3(read); 4914 D0 : S0; // big decoder only 4915 DECODE : S1; // any decoder for FPU POP 4916 FPU : S4; 4917 MEM : S3; // any mem 4918%} 4919 4920// Float reg-mem operation 4921pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4922 instruction_count(3); 4923 dst : S5(write); 4924 src1 : S3(read); 4925 mem : S3(read); 4926 D0 : S0; // big decoder only 4927 DECODE : S1(2); // any decoder for FPU POP 4928 FPU : S4; 4929 MEM : S3; // any mem 4930%} 4931 4932// Float mem-reg operation 4933pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4934 instruction_count(2); 4935 src : S5(read); 4936 mem : S3(read); 4937 DECODE : S0; // any decoder for FPU PUSH 4938 D0 : S1; // big decoder only 4939 FPU : S4; 4940 MEM : S3; // any mem 4941%} 4942 4943pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4944 instruction_count(3); 4945 src1 : S3(read); 4946 src2 : S3(read); 4947 mem : S3(read); 4948 DECODE : S0(2); // any decoder for FPU PUSH 4949 D0 : S1; // big decoder only 4950 FPU : S4; 4951 MEM : S3; // any mem 4952%} 4953 4954pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4955 instruction_count(3); 4956 src1 : S3(read); 4957 src2 : S3(read); 4958 mem : S4(read); 4959 DECODE : S0; // any decoder for FPU PUSH 4960 D0 : S0(2); // big decoder only 4961 FPU : S4; 4962 MEM : S3(2); // any mem 4963%} 4964 4965pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4966 instruction_count(2); 4967 src1 : S3(read); 4968 dst : S4(read); 4969 D0 : S0(2); // big decoder only 4970 MEM : S3(2); // any mem 4971%} 4972 4973pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4974 instruction_count(3); 4975 src1 : S3(read); 4976 src2 : S3(read); 4977 dst : S4(read); 4978 D0 : S0(3); // big decoder only 4979 FPU : S4; 4980 MEM : S3(3); // any mem 4981%} 4982 4983pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4984 instruction_count(3); 4985 src1 : S4(read); 4986 mem : S4(read); 4987 DECODE : S0; // any decoder for FPU PUSH 4988 D0 : S0(2); // big decoder only 4989 FPU : S4; 4990 MEM : S3(2); // any mem 4991%} 4992 4993// Float load constant 4994pipe_class fpu_reg_con(regDPR dst) %{ 4995 instruction_count(2); 4996 dst : S5(write); 4997 D0 : S0; // big decoder only for the load 4998 DECODE : S1; // any decoder for FPU POP 4999 FPU : S4; 5000 MEM : S3; // any mem 5001%} 5002 5003// Float load constant 5004pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5005 instruction_count(3); 5006 dst : S5(write); 5007 src : S3(read); 5008 D0 : S0; // big decoder only for the load 5009 DECODE : S1(2); // any decoder for FPU POP 5010 FPU : S4; 5011 MEM : S3; // any mem 5012%} 5013 5014// UnConditional branch 5015pipe_class pipe_jmp( label labl ) %{ 5016 single_instruction; 5017 BR : S3; 5018%} 5019 5020// Conditional branch 5021pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5022 single_instruction; 5023 cr : S1(read); 5024 BR : S3; 5025%} 5026 5027// Allocation idiom 5028pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5029 instruction_count(1); force_serialization; 5030 fixed_latency(6); 5031 heap_ptr : S3(read); 5032 DECODE : S0(3); 5033 D0 : S2; 5034 MEM : S3; 5035 ALU : S3(2); 5036 dst : S5(write); 5037 BR : S5; 5038%} 5039 5040// Generic big/slow expanded idiom 5041pipe_class pipe_slow( ) %{ 5042 instruction_count(10); multiple_bundles; force_serialization; 5043 fixed_latency(100); 5044 D0 : S0(2); 5045 MEM : S3(2); 5046%} 5047 5048// The real do-nothing guy 5049pipe_class empty( ) %{ 5050 instruction_count(0); 5051%} 5052 5053// Define the class for the Nop node 5054define %{ 5055 MachNop = empty; 5056%} 5057 5058%} 5059 5060//----------INSTRUCTIONS------------------------------------------------------- 5061// 5062// match -- States which machine-independent subtree may be replaced 5063// by this instruction. 5064// ins_cost -- The estimated cost of this instruction is used by instruction 5065// selection to identify a minimum cost tree of machine 5066// instructions that matches a tree of machine-independent 5067// instructions. 5068// format -- A string providing the disassembly for this instruction. 5069// The value of an instruction's operand may be inserted 5070// by referring to it with a '$' prefix. 5071// opcode -- Three instruction opcodes may be provided. These are referred 5072// to within an encode class as $primary, $secondary, and $tertiary 5073// respectively. The primary opcode is commonly used to 5074// indicate the type of machine instruction, while secondary 5075// and tertiary are often used for prefix options or addressing 5076// modes. 5077// ins_encode -- A list of encode classes with parameters. The encode class 5078// name must have been defined in an 'enc_class' specification 5079// in the encode section of the architecture description. 5080 5081//----------BSWAP-Instruction-------------------------------------------------- 5082instruct bytes_reverse_int(rRegI dst) %{ 5083 match(Set dst (ReverseBytesI dst)); 5084 5085 format %{ "BSWAP $dst" %} 5086 opcode(0x0F, 0xC8); 5087 ins_encode( OpcP, OpcSReg(dst) ); 5088 ins_pipe( ialu_reg ); 5089%} 5090 5091instruct bytes_reverse_long(eRegL dst) %{ 5092 match(Set dst (ReverseBytesL dst)); 5093 5094 format %{ "BSWAP $dst.lo\n\t" 5095 "BSWAP $dst.hi\n\t" 5096 "XCHG $dst.lo $dst.hi" %} 5097 5098 ins_cost(125); 5099 ins_encode( bswap_long_bytes(dst) ); 5100 ins_pipe( ialu_reg_reg); 5101%} 5102 5103instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5104 match(Set dst (ReverseBytesUS dst)); 5105 effect(KILL cr); 5106 5107 format %{ "BSWAP $dst\n\t" 5108 "SHR $dst,16\n\t" %} 5109 ins_encode %{ 5110 __ bswapl($dst$$Register); 5111 __ shrl($dst$$Register, 16); 5112 %} 5113 ins_pipe( ialu_reg ); 5114%} 5115 5116instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5117 match(Set dst (ReverseBytesS dst)); 5118 effect(KILL cr); 5119 5120 format %{ "BSWAP $dst\n\t" 5121 "SAR $dst,16\n\t" %} 5122 ins_encode %{ 5123 __ bswapl($dst$$Register); 5124 __ sarl($dst$$Register, 16); 5125 %} 5126 ins_pipe( ialu_reg ); 5127%} 5128 5129 5130//---------- Zeros Count Instructions ------------------------------------------ 5131 5132instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5133 predicate(UseCountLeadingZerosInstruction); 5134 match(Set dst (CountLeadingZerosI src)); 5135 effect(KILL cr); 5136 5137 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5138 ins_encode %{ 5139 __ lzcntl($dst$$Register, $src$$Register); 5140 %} 5141 ins_pipe(ialu_reg); 5142%} 5143 5144instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5145 predicate(!UseCountLeadingZerosInstruction); 5146 match(Set dst (CountLeadingZerosI src)); 5147 effect(KILL cr); 5148 5149 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5150 "JNZ skip\n\t" 5151 "MOV $dst, -1\n" 5152 "skip:\n\t" 5153 "NEG $dst\n\t" 5154 "ADD $dst, 31" %} 5155 ins_encode %{ 5156 Register Rdst = $dst$$Register; 5157 Register Rsrc = $src$$Register; 5158 Label skip; 5159 __ bsrl(Rdst, Rsrc); 5160 __ jccb(Assembler::notZero, skip); 5161 __ movl(Rdst, -1); 5162 __ bind(skip); 5163 __ negl(Rdst); 5164 __ addl(Rdst, BitsPerInt - 1); 5165 %} 5166 ins_pipe(ialu_reg); 5167%} 5168 5169instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5170 predicate(UseCountLeadingZerosInstruction); 5171 match(Set dst (CountLeadingZerosL src)); 5172 effect(TEMP dst, KILL cr); 5173 5174 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5175 "JNC done\n\t" 5176 "LZCNT $dst, $src.lo\n\t" 5177 "ADD $dst, 32\n" 5178 "done:" %} 5179 ins_encode %{ 5180 Register Rdst = $dst$$Register; 5181 Register Rsrc = $src$$Register; 5182 Label done; 5183 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5184 __ jccb(Assembler::carryClear, done); 5185 __ lzcntl(Rdst, Rsrc); 5186 __ addl(Rdst, BitsPerInt); 5187 __ bind(done); 5188 %} 5189 ins_pipe(ialu_reg); 5190%} 5191 5192instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5193 predicate(!UseCountLeadingZerosInstruction); 5194 match(Set dst (CountLeadingZerosL src)); 5195 effect(TEMP dst, KILL cr); 5196 5197 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5198 "JZ msw_is_zero\n\t" 5199 "ADD $dst, 32\n\t" 5200 "JMP not_zero\n" 5201 "msw_is_zero:\n\t" 5202 "BSR $dst, $src.lo\n\t" 5203 "JNZ not_zero\n\t" 5204 "MOV $dst, -1\n" 5205 "not_zero:\n\t" 5206 "NEG $dst\n\t" 5207 "ADD $dst, 63\n" %} 5208 ins_encode %{ 5209 Register Rdst = $dst$$Register; 5210 Register Rsrc = $src$$Register; 5211 Label msw_is_zero; 5212 Label not_zero; 5213 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5214 __ jccb(Assembler::zero, msw_is_zero); 5215 __ addl(Rdst, BitsPerInt); 5216 __ jmpb(not_zero); 5217 __ bind(msw_is_zero); 5218 __ bsrl(Rdst, Rsrc); 5219 __ jccb(Assembler::notZero, not_zero); 5220 __ movl(Rdst, -1); 5221 __ bind(not_zero); 5222 __ negl(Rdst); 5223 __ addl(Rdst, BitsPerLong - 1); 5224 %} 5225 ins_pipe(ialu_reg); 5226%} 5227 5228instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5229 predicate(UseCountTrailingZerosInstruction); 5230 match(Set dst (CountTrailingZerosI src)); 5231 effect(KILL cr); 5232 5233 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5234 ins_encode %{ 5235 __ tzcntl($dst$$Register, $src$$Register); 5236 %} 5237 ins_pipe(ialu_reg); 5238%} 5239 5240instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5241 predicate(!UseCountTrailingZerosInstruction); 5242 match(Set dst (CountTrailingZerosI src)); 5243 effect(KILL cr); 5244 5245 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5246 "JNZ done\n\t" 5247 "MOV $dst, 32\n" 5248 "done:" %} 5249 ins_encode %{ 5250 Register Rdst = $dst$$Register; 5251 Label done; 5252 __ bsfl(Rdst, $src$$Register); 5253 __ jccb(Assembler::notZero, done); 5254 __ movl(Rdst, BitsPerInt); 5255 __ bind(done); 5256 %} 5257 ins_pipe(ialu_reg); 5258%} 5259 5260instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5261 predicate(UseCountTrailingZerosInstruction); 5262 match(Set dst (CountTrailingZerosL src)); 5263 effect(TEMP dst, KILL cr); 5264 5265 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5266 "JNC done\n\t" 5267 "TZCNT $dst, $src.hi\n\t" 5268 "ADD $dst, 32\n" 5269 "done:" %} 5270 ins_encode %{ 5271 Register Rdst = $dst$$Register; 5272 Register Rsrc = $src$$Register; 5273 Label done; 5274 __ tzcntl(Rdst, Rsrc); 5275 __ jccb(Assembler::carryClear, done); 5276 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5277 __ addl(Rdst, BitsPerInt); 5278 __ bind(done); 5279 %} 5280 ins_pipe(ialu_reg); 5281%} 5282 5283instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5284 predicate(!UseCountTrailingZerosInstruction); 5285 match(Set dst (CountTrailingZerosL src)); 5286 effect(TEMP dst, KILL cr); 5287 5288 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5289 "JNZ done\n\t" 5290 "BSF $dst, $src.hi\n\t" 5291 "JNZ msw_not_zero\n\t" 5292 "MOV $dst, 32\n" 5293 "msw_not_zero:\n\t" 5294 "ADD $dst, 32\n" 5295 "done:" %} 5296 ins_encode %{ 5297 Register Rdst = $dst$$Register; 5298 Register Rsrc = $src$$Register; 5299 Label msw_not_zero; 5300 Label done; 5301 __ bsfl(Rdst, Rsrc); 5302 __ jccb(Assembler::notZero, done); 5303 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5304 __ jccb(Assembler::notZero, msw_not_zero); 5305 __ movl(Rdst, BitsPerInt); 5306 __ bind(msw_not_zero); 5307 __ addl(Rdst, BitsPerInt); 5308 __ bind(done); 5309 %} 5310 ins_pipe(ialu_reg); 5311%} 5312 5313 5314//---------- Population Count Instructions ------------------------------------- 5315 5316instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5317 predicate(UsePopCountInstruction); 5318 match(Set dst (PopCountI src)); 5319 effect(KILL cr); 5320 5321 format %{ "POPCNT $dst, $src" %} 5322 ins_encode %{ 5323 __ popcntl($dst$$Register, $src$$Register); 5324 %} 5325 ins_pipe(ialu_reg); 5326%} 5327 5328instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5329 predicate(UsePopCountInstruction); 5330 match(Set dst (PopCountI (LoadI mem))); 5331 effect(KILL cr); 5332 5333 format %{ "POPCNT $dst, $mem" %} 5334 ins_encode %{ 5335 __ popcntl($dst$$Register, $mem$$Address); 5336 %} 5337 ins_pipe(ialu_reg); 5338%} 5339 5340// Note: Long.bitCount(long) returns an int. 5341instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5342 predicate(UsePopCountInstruction); 5343 match(Set dst (PopCountL src)); 5344 effect(KILL cr, TEMP tmp, TEMP dst); 5345 5346 format %{ "POPCNT $dst, $src.lo\n\t" 5347 "POPCNT $tmp, $src.hi\n\t" 5348 "ADD $dst, $tmp" %} 5349 ins_encode %{ 5350 __ popcntl($dst$$Register, $src$$Register); 5351 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5352 __ addl($dst$$Register, $tmp$$Register); 5353 %} 5354 ins_pipe(ialu_reg); 5355%} 5356 5357// Note: Long.bitCount(long) returns an int. 5358instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5359 predicate(UsePopCountInstruction); 5360 match(Set dst (PopCountL (LoadL mem))); 5361 effect(KILL cr, TEMP tmp, TEMP dst); 5362 5363 format %{ "POPCNT $dst, $mem\n\t" 5364 "POPCNT $tmp, $mem+4\n\t" 5365 "ADD $dst, $tmp" %} 5366 ins_encode %{ 5367 //__ popcntl($dst$$Register, $mem$$Address$$first); 5368 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5369 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5370 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5371 __ addl($dst$$Register, $tmp$$Register); 5372 %} 5373 ins_pipe(ialu_reg); 5374%} 5375 5376 5377//----------Load/Store/Move Instructions--------------------------------------- 5378//----------Load Instructions-------------------------------------------------- 5379// Load Byte (8bit signed) 5380instruct loadB(xRegI dst, memory mem) %{ 5381 match(Set dst (LoadB mem)); 5382 5383 ins_cost(125); 5384 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5385 5386 ins_encode %{ 5387 __ movsbl($dst$$Register, $mem$$Address); 5388 %} 5389 5390 ins_pipe(ialu_reg_mem); 5391%} 5392 5393// Load Byte (8bit signed) into Long Register 5394instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5395 match(Set dst (ConvI2L (LoadB mem))); 5396 effect(KILL cr); 5397 5398 ins_cost(375); 5399 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5400 "MOV $dst.hi,$dst.lo\n\t" 5401 "SAR $dst.hi,7" %} 5402 5403 ins_encode %{ 5404 __ movsbl($dst$$Register, $mem$$Address); 5405 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5406 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5407 %} 5408 5409 ins_pipe(ialu_reg_mem); 5410%} 5411 5412// Load Unsigned Byte (8bit UNsigned) 5413instruct loadUB(xRegI dst, memory mem) %{ 5414 match(Set dst (LoadUB mem)); 5415 5416 ins_cost(125); 5417 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5418 5419 ins_encode %{ 5420 __ movzbl($dst$$Register, $mem$$Address); 5421 %} 5422 5423 ins_pipe(ialu_reg_mem); 5424%} 5425 5426// Load Unsigned Byte (8 bit UNsigned) into Long Register 5427instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5428 match(Set dst (ConvI2L (LoadUB mem))); 5429 effect(KILL cr); 5430 5431 ins_cost(250); 5432 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5433 "XOR $dst.hi,$dst.hi" %} 5434 5435 ins_encode %{ 5436 Register Rdst = $dst$$Register; 5437 __ movzbl(Rdst, $mem$$Address); 5438 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5439 %} 5440 5441 ins_pipe(ialu_reg_mem); 5442%} 5443 5444// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5445instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5446 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5447 effect(KILL cr); 5448 5449 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5450 "XOR $dst.hi,$dst.hi\n\t" 5451 "AND $dst.lo,right_n_bits($mask, 8)" %} 5452 ins_encode %{ 5453 Register Rdst = $dst$$Register; 5454 __ movzbl(Rdst, $mem$$Address); 5455 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5456 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5457 %} 5458 ins_pipe(ialu_reg_mem); 5459%} 5460 5461// Load Short (16bit signed) 5462instruct loadS(rRegI dst, memory mem) %{ 5463 match(Set dst (LoadS mem)); 5464 5465 ins_cost(125); 5466 format %{ "MOVSX $dst,$mem\t# short" %} 5467 5468 ins_encode %{ 5469 __ movswl($dst$$Register, $mem$$Address); 5470 %} 5471 5472 ins_pipe(ialu_reg_mem); 5473%} 5474 5475// Load Short (16 bit signed) to Byte (8 bit signed) 5476instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5477 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5478 5479 ins_cost(125); 5480 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5481 ins_encode %{ 5482 __ movsbl($dst$$Register, $mem$$Address); 5483 %} 5484 ins_pipe(ialu_reg_mem); 5485%} 5486 5487// Load Short (16bit signed) into Long Register 5488instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5489 match(Set dst (ConvI2L (LoadS mem))); 5490 effect(KILL cr); 5491 5492 ins_cost(375); 5493 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5494 "MOV $dst.hi,$dst.lo\n\t" 5495 "SAR $dst.hi,15" %} 5496 5497 ins_encode %{ 5498 __ movswl($dst$$Register, $mem$$Address); 5499 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5500 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5501 %} 5502 5503 ins_pipe(ialu_reg_mem); 5504%} 5505 5506// Load Unsigned Short/Char (16bit unsigned) 5507instruct loadUS(rRegI dst, memory mem) %{ 5508 match(Set dst (LoadUS mem)); 5509 5510 ins_cost(125); 5511 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5512 5513 ins_encode %{ 5514 __ movzwl($dst$$Register, $mem$$Address); 5515 %} 5516 5517 ins_pipe(ialu_reg_mem); 5518%} 5519 5520// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5521instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5522 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5523 5524 ins_cost(125); 5525 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5526 ins_encode %{ 5527 __ movsbl($dst$$Register, $mem$$Address); 5528 %} 5529 ins_pipe(ialu_reg_mem); 5530%} 5531 5532// Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5533instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5534 match(Set dst (ConvI2L (LoadUS mem))); 5535 effect(KILL cr); 5536 5537 ins_cost(250); 5538 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5539 "XOR $dst.hi,$dst.hi" %} 5540 5541 ins_encode %{ 5542 __ movzwl($dst$$Register, $mem$$Address); 5543 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5544 %} 5545 5546 ins_pipe(ialu_reg_mem); 5547%} 5548 5549// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5550instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5551 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5552 effect(KILL cr); 5553 5554 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5555 "XOR $dst.hi,$dst.hi" %} 5556 ins_encode %{ 5557 Register Rdst = $dst$$Register; 5558 __ movzbl(Rdst, $mem$$Address); 5559 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5560 %} 5561 ins_pipe(ialu_reg_mem); 5562%} 5563 5564// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5565instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5566 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5567 effect(KILL cr); 5568 5569 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5570 "XOR $dst.hi,$dst.hi\n\t" 5571 "AND $dst.lo,right_n_bits($mask, 16)" %} 5572 ins_encode %{ 5573 Register Rdst = $dst$$Register; 5574 __ movzwl(Rdst, $mem$$Address); 5575 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5576 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5577 %} 5578 ins_pipe(ialu_reg_mem); 5579%} 5580 5581// Load Integer 5582instruct loadI(rRegI dst, memory mem) %{ 5583 match(Set dst (LoadI mem)); 5584 5585 ins_cost(125); 5586 format %{ "MOV $dst,$mem\t# int" %} 5587 5588 ins_encode %{ 5589 __ movl($dst$$Register, $mem$$Address); 5590 %} 5591 5592 ins_pipe(ialu_reg_mem); 5593%} 5594 5595// Load Integer (32 bit signed) to Byte (8 bit signed) 5596instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5597 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5598 5599 ins_cost(125); 5600 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5601 ins_encode %{ 5602 __ movsbl($dst$$Register, $mem$$Address); 5603 %} 5604 ins_pipe(ialu_reg_mem); 5605%} 5606 5607// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5608instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5609 match(Set dst (AndI (LoadI mem) mask)); 5610 5611 ins_cost(125); 5612 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5613 ins_encode %{ 5614 __ movzbl($dst$$Register, $mem$$Address); 5615 %} 5616 ins_pipe(ialu_reg_mem); 5617%} 5618 5619// Load Integer (32 bit signed) to Short (16 bit signed) 5620instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5621 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5622 5623 ins_cost(125); 5624 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5625 ins_encode %{ 5626 __ movswl($dst$$Register, $mem$$Address); 5627 %} 5628 ins_pipe(ialu_reg_mem); 5629%} 5630 5631// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5632instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5633 match(Set dst (AndI (LoadI mem) mask)); 5634 5635 ins_cost(125); 5636 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5637 ins_encode %{ 5638 __ movzwl($dst$$Register, $mem$$Address); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641%} 5642 5643// Load Integer into Long Register 5644instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5645 match(Set dst (ConvI2L (LoadI mem))); 5646 effect(KILL cr); 5647 5648 ins_cost(375); 5649 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5650 "MOV $dst.hi,$dst.lo\n\t" 5651 "SAR $dst.hi,31" %} 5652 5653 ins_encode %{ 5654 __ movl($dst$$Register, $mem$$Address); 5655 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5656 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5657 %} 5658 5659 ins_pipe(ialu_reg_mem); 5660%} 5661 5662// Load Integer with mask 0xFF into Long Register 5663instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5664 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5665 effect(KILL cr); 5666 5667 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5668 "XOR $dst.hi,$dst.hi" %} 5669 ins_encode %{ 5670 Register Rdst = $dst$$Register; 5671 __ movzbl(Rdst, $mem$$Address); 5672 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5673 %} 5674 ins_pipe(ialu_reg_mem); 5675%} 5676 5677// Load Integer with mask 0xFFFF into Long Register 5678instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5679 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5680 effect(KILL cr); 5681 5682 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5683 "XOR $dst.hi,$dst.hi" %} 5684 ins_encode %{ 5685 Register Rdst = $dst$$Register; 5686 __ movzwl(Rdst, $mem$$Address); 5687 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5688 %} 5689 ins_pipe(ialu_reg_mem); 5690%} 5691 5692// Load Integer with 31-bit mask into Long Register 5693instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5694 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5695 effect(KILL cr); 5696 5697 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5698 "XOR $dst.hi,$dst.hi\n\t" 5699 "AND $dst.lo,$mask" %} 5700 ins_encode %{ 5701 Register Rdst = $dst$$Register; 5702 __ movl(Rdst, $mem$$Address); 5703 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5704 __ andl(Rdst, $mask$$constant); 5705 %} 5706 ins_pipe(ialu_reg_mem); 5707%} 5708 5709// Load Unsigned Integer into Long Register 5710instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5711 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5712 effect(KILL cr); 5713 5714 ins_cost(250); 5715 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5716 "XOR $dst.hi,$dst.hi" %} 5717 5718 ins_encode %{ 5719 __ movl($dst$$Register, $mem$$Address); 5720 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5721 %} 5722 5723 ins_pipe(ialu_reg_mem); 5724%} 5725 5726// Load Long. Cannot clobber address while loading, so restrict address 5727// register to ESI 5728instruct loadL(eRegL dst, load_long_memory mem) %{ 5729 predicate(!((LoadLNode*)n)->require_atomic_access()); 5730 match(Set dst (LoadL mem)); 5731 5732 ins_cost(250); 5733 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5734 "MOV $dst.hi,$mem+4" %} 5735 5736 ins_encode %{ 5737 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5738 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5739 __ movl($dst$$Register, Amemlo); 5740 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5741 %} 5742 5743 ins_pipe(ialu_reg_long_mem); 5744%} 5745 5746// Volatile Load Long. Must be atomic, so do 64-bit FILD 5747// then store it down to the stack and reload on the int 5748// side. 5749instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5750 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5751 match(Set dst (LoadL mem)); 5752 5753 ins_cost(200); 5754 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5755 "FISTp $dst" %} 5756 ins_encode(enc_loadL_volatile(mem,dst)); 5757 ins_pipe( fpu_reg_mem ); 5758%} 5759 5760instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5761 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5762 match(Set dst (LoadL mem)); 5763 effect(TEMP tmp); 5764 ins_cost(180); 5765 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5766 "MOVSD $dst,$tmp" %} 5767 ins_encode %{ 5768 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5769 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5770 %} 5771 ins_pipe( pipe_slow ); 5772%} 5773 5774instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5775 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5776 match(Set dst (LoadL mem)); 5777 effect(TEMP tmp); 5778 ins_cost(160); 5779 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5780 "MOVD $dst.lo,$tmp\n\t" 5781 "PSRLQ $tmp,32\n\t" 5782 "MOVD $dst.hi,$tmp" %} 5783 ins_encode %{ 5784 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5785 __ movdl($dst$$Register, $tmp$$XMMRegister); 5786 __ psrlq($tmp$$XMMRegister, 32); 5787 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5788 %} 5789 ins_pipe( pipe_slow ); 5790%} 5791 5792// Load Range 5793instruct loadRange(rRegI dst, memory mem) %{ 5794 match(Set dst (LoadRange mem)); 5795 5796 ins_cost(125); 5797 format %{ "MOV $dst,$mem" %} 5798 opcode(0x8B); 5799 ins_encode( OpcP, RegMem(dst,mem)); 5800 ins_pipe( ialu_reg_mem ); 5801%} 5802 5803 5804// Load Pointer 5805instruct loadP(eRegP dst, memory mem) %{ 5806 match(Set dst (LoadP mem)); 5807 5808 ins_cost(125); 5809 format %{ "MOV $dst,$mem" %} 5810 opcode(0x8B); 5811 ins_encode( OpcP, RegMem(dst,mem)); 5812 ins_pipe( ialu_reg_mem ); 5813%} 5814 5815// Load Klass Pointer 5816instruct loadKlass(eRegP dst, memory mem) %{ 5817 match(Set dst (LoadKlass mem)); 5818 5819 ins_cost(125); 5820 format %{ "MOV $dst,$mem" %} 5821 opcode(0x8B); 5822 ins_encode( OpcP, RegMem(dst,mem)); 5823 ins_pipe( ialu_reg_mem ); 5824%} 5825 5826// Load Double 5827instruct loadDPR(regDPR dst, memory mem) %{ 5828 predicate(UseSSE<=1); 5829 match(Set dst (LoadD mem)); 5830 5831 ins_cost(150); 5832 format %{ "FLD_D ST,$mem\n\t" 5833 "FSTP $dst" %} 5834 opcode(0xDD); /* DD /0 */ 5835 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5836 Pop_Reg_DPR(dst) ); 5837 ins_pipe( fpu_reg_mem ); 5838%} 5839 5840// Load Double to XMM 5841instruct loadD(regD dst, memory mem) %{ 5842 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5843 match(Set dst (LoadD mem)); 5844 ins_cost(145); 5845 format %{ "MOVSD $dst,$mem" %} 5846 ins_encode %{ 5847 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5848 %} 5849 ins_pipe( pipe_slow ); 5850%} 5851 5852instruct loadD_partial(regD dst, memory mem) %{ 5853 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5854 match(Set dst (LoadD mem)); 5855 ins_cost(145); 5856 format %{ "MOVLPD $dst,$mem" %} 5857 ins_encode %{ 5858 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5859 %} 5860 ins_pipe( pipe_slow ); 5861%} 5862 5863// Load to XMM register (single-precision floating point) 5864// MOVSS instruction 5865instruct loadF(regF dst, memory mem) %{ 5866 predicate(UseSSE>=1); 5867 match(Set dst (LoadF mem)); 5868 ins_cost(145); 5869 format %{ "MOVSS $dst,$mem" %} 5870 ins_encode %{ 5871 __ movflt ($dst$$XMMRegister, $mem$$Address); 5872 %} 5873 ins_pipe( pipe_slow ); 5874%} 5875 5876// Load Float 5877instruct loadFPR(regFPR dst, memory mem) %{ 5878 predicate(UseSSE==0); 5879 match(Set dst (LoadF mem)); 5880 5881 ins_cost(150); 5882 format %{ "FLD_S ST,$mem\n\t" 5883 "FSTP $dst" %} 5884 opcode(0xD9); /* D9 /0 */ 5885 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5886 Pop_Reg_FPR(dst) ); 5887 ins_pipe( fpu_reg_mem ); 5888%} 5889 5890// Load Effective Address 5891instruct leaP8(eRegP dst, indOffset8 mem) %{ 5892 match(Set dst mem); 5893 5894 ins_cost(110); 5895 format %{ "LEA $dst,$mem" %} 5896 opcode(0x8D); 5897 ins_encode( OpcP, RegMem(dst,mem)); 5898 ins_pipe( ialu_reg_reg_fat ); 5899%} 5900 5901instruct leaP32(eRegP dst, indOffset32 mem) %{ 5902 match(Set dst mem); 5903 5904 ins_cost(110); 5905 format %{ "LEA $dst,$mem" %} 5906 opcode(0x8D); 5907 ins_encode( OpcP, RegMem(dst,mem)); 5908 ins_pipe( ialu_reg_reg_fat ); 5909%} 5910 5911instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5912 match(Set dst mem); 5913 5914 ins_cost(110); 5915 format %{ "LEA $dst,$mem" %} 5916 opcode(0x8D); 5917 ins_encode( OpcP, RegMem(dst,mem)); 5918 ins_pipe( ialu_reg_reg_fat ); 5919%} 5920 5921instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5922 match(Set dst mem); 5923 5924 ins_cost(110); 5925 format %{ "LEA $dst,$mem" %} 5926 opcode(0x8D); 5927 ins_encode( OpcP, RegMem(dst,mem)); 5928 ins_pipe( ialu_reg_reg_fat ); 5929%} 5930 5931instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5932 match(Set dst mem); 5933 5934 ins_cost(110); 5935 format %{ "LEA $dst,$mem" %} 5936 opcode(0x8D); 5937 ins_encode( OpcP, RegMem(dst,mem)); 5938 ins_pipe( ialu_reg_reg_fat ); 5939%} 5940 5941// Load Constant 5942instruct loadConI(rRegI dst, immI src) %{ 5943 match(Set dst src); 5944 5945 format %{ "MOV $dst,$src" %} 5946 ins_encode( LdImmI(dst, src) ); 5947 ins_pipe( ialu_reg_fat ); 5948%} 5949 5950// Load Constant zero 5951instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5952 match(Set dst src); 5953 effect(KILL cr); 5954 5955 ins_cost(50); 5956 format %{ "XOR $dst,$dst" %} 5957 opcode(0x33); /* + rd */ 5958 ins_encode( OpcP, RegReg( dst, dst ) ); 5959 ins_pipe( ialu_reg ); 5960%} 5961 5962instruct loadConP(eRegP dst, immP src) %{ 5963 match(Set dst src); 5964 5965 format %{ "MOV $dst,$src" %} 5966 opcode(0xB8); /* + rd */ 5967 ins_encode( LdImmP(dst, src) ); 5968 ins_pipe( ialu_reg_fat ); 5969%} 5970 5971instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5972 match(Set dst src); 5973 effect(KILL cr); 5974 ins_cost(200); 5975 format %{ "MOV $dst.lo,$src.lo\n\t" 5976 "MOV $dst.hi,$src.hi" %} 5977 opcode(0xB8); 5978 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5979 ins_pipe( ialu_reg_long_fat ); 5980%} 5981 5982instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5983 match(Set dst src); 5984 effect(KILL cr); 5985 ins_cost(150); 5986 format %{ "XOR $dst.lo,$dst.lo\n\t" 5987 "XOR $dst.hi,$dst.hi" %} 5988 opcode(0x33,0x33); 5989 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5990 ins_pipe( ialu_reg_long ); 5991%} 5992 5993// The instruction usage is guarded by predicate in operand immFPR(). 5994instruct loadConFPR(regFPR dst, immFPR con) %{ 5995 match(Set dst con); 5996 ins_cost(125); 5997 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5998 "FSTP $dst" %} 5999 ins_encode %{ 6000 __ fld_s($constantaddress($con)); 6001 __ fstp_d($dst$$reg); 6002 %} 6003 ins_pipe(fpu_reg_con); 6004%} 6005 6006// The instruction usage is guarded by predicate in operand immFPR0(). 6007instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6008 match(Set dst con); 6009 ins_cost(125); 6010 format %{ "FLDZ ST\n\t" 6011 "FSTP $dst" %} 6012 ins_encode %{ 6013 __ fldz(); 6014 __ fstp_d($dst$$reg); 6015 %} 6016 ins_pipe(fpu_reg_con); 6017%} 6018 6019// The instruction usage is guarded by predicate in operand immFPR1(). 6020instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6021 match(Set dst con); 6022 ins_cost(125); 6023 format %{ "FLD1 ST\n\t" 6024 "FSTP $dst" %} 6025 ins_encode %{ 6026 __ fld1(); 6027 __ fstp_d($dst$$reg); 6028 %} 6029 ins_pipe(fpu_reg_con); 6030%} 6031 6032// The instruction usage is guarded by predicate in operand immF(). 6033instruct loadConF(regF dst, immF con) %{ 6034 match(Set dst con); 6035 ins_cost(125); 6036 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6037 ins_encode %{ 6038 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6039 %} 6040 ins_pipe(pipe_slow); 6041%} 6042 6043// The instruction usage is guarded by predicate in operand immF0(). 6044instruct loadConF0(regF dst, immF0 src) %{ 6045 match(Set dst src); 6046 ins_cost(100); 6047 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6048 ins_encode %{ 6049 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6050 %} 6051 ins_pipe(pipe_slow); 6052%} 6053 6054// The instruction usage is guarded by predicate in operand immDPR(). 6055instruct loadConDPR(regDPR dst, immDPR con) %{ 6056 match(Set dst con); 6057 ins_cost(125); 6058 6059 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6060 "FSTP $dst" %} 6061 ins_encode %{ 6062 __ fld_d($constantaddress($con)); 6063 __ fstp_d($dst$$reg); 6064 %} 6065 ins_pipe(fpu_reg_con); 6066%} 6067 6068// The instruction usage is guarded by predicate in operand immDPR0(). 6069instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6070 match(Set dst con); 6071 ins_cost(125); 6072 6073 format %{ "FLDZ ST\n\t" 6074 "FSTP $dst" %} 6075 ins_encode %{ 6076 __ fldz(); 6077 __ fstp_d($dst$$reg); 6078 %} 6079 ins_pipe(fpu_reg_con); 6080%} 6081 6082// The instruction usage is guarded by predicate in operand immDPR1(). 6083instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6084 match(Set dst con); 6085 ins_cost(125); 6086 6087 format %{ "FLD1 ST\n\t" 6088 "FSTP $dst" %} 6089 ins_encode %{ 6090 __ fld1(); 6091 __ fstp_d($dst$$reg); 6092 %} 6093 ins_pipe(fpu_reg_con); 6094%} 6095 6096// The instruction usage is guarded by predicate in operand immD(). 6097instruct loadConD(regD dst, immD con) %{ 6098 match(Set dst con); 6099 ins_cost(125); 6100 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6101 ins_encode %{ 6102 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6103 %} 6104 ins_pipe(pipe_slow); 6105%} 6106 6107// The instruction usage is guarded by predicate in operand immD0(). 6108instruct loadConD0(regD dst, immD0 src) %{ 6109 match(Set dst src); 6110 ins_cost(100); 6111 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6112 ins_encode %{ 6113 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6114 %} 6115 ins_pipe( pipe_slow ); 6116%} 6117 6118// Load Stack Slot 6119instruct loadSSI(rRegI dst, stackSlotI src) %{ 6120 match(Set dst src); 6121 ins_cost(125); 6122 6123 format %{ "MOV $dst,$src" %} 6124 opcode(0x8B); 6125 ins_encode( OpcP, RegMem(dst,src)); 6126 ins_pipe( ialu_reg_mem ); 6127%} 6128 6129instruct loadSSL(eRegL dst, stackSlotL src) %{ 6130 match(Set dst src); 6131 6132 ins_cost(200); 6133 format %{ "MOV $dst,$src.lo\n\t" 6134 "MOV $dst+4,$src.hi" %} 6135 opcode(0x8B, 0x8B); 6136 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6137 ins_pipe( ialu_mem_long_reg ); 6138%} 6139 6140// Load Stack Slot 6141instruct loadSSP(eRegP dst, stackSlotP src) %{ 6142 match(Set dst src); 6143 ins_cost(125); 6144 6145 format %{ "MOV $dst,$src" %} 6146 opcode(0x8B); 6147 ins_encode( OpcP, RegMem(dst,src)); 6148 ins_pipe( ialu_reg_mem ); 6149%} 6150 6151// Load Stack Slot 6152instruct loadSSF(regFPR dst, stackSlotF src) %{ 6153 match(Set dst src); 6154 ins_cost(125); 6155 6156 format %{ "FLD_S $src\n\t" 6157 "FSTP $dst" %} 6158 opcode(0xD9); /* D9 /0, FLD m32real */ 6159 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6160 Pop_Reg_FPR(dst) ); 6161 ins_pipe( fpu_reg_mem ); 6162%} 6163 6164// Load Stack Slot 6165instruct loadSSD(regDPR dst, stackSlotD src) %{ 6166 match(Set dst src); 6167 ins_cost(125); 6168 6169 format %{ "FLD_D $src\n\t" 6170 "FSTP $dst" %} 6171 opcode(0xDD); /* DD /0, FLD m64real */ 6172 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6173 Pop_Reg_DPR(dst) ); 6174 ins_pipe( fpu_reg_mem ); 6175%} 6176 6177// Prefetch instructions for allocation. 6178// Must be safe to execute with invalid address (cannot fault). 6179 6180instruct prefetchAlloc0( memory mem ) %{ 6181 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6182 match(PrefetchAllocation mem); 6183 ins_cost(0); 6184 size(0); 6185 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6186 ins_encode(); 6187 ins_pipe(empty); 6188%} 6189 6190instruct prefetchAlloc( memory mem ) %{ 6191 predicate(AllocatePrefetchInstr==3); 6192 match( PrefetchAllocation mem ); 6193 ins_cost(100); 6194 6195 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6196 ins_encode %{ 6197 __ prefetchw($mem$$Address); 6198 %} 6199 ins_pipe(ialu_mem); 6200%} 6201 6202instruct prefetchAllocNTA( memory mem ) %{ 6203 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6204 match(PrefetchAllocation mem); 6205 ins_cost(100); 6206 6207 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6208 ins_encode %{ 6209 __ prefetchnta($mem$$Address); 6210 %} 6211 ins_pipe(ialu_mem); 6212%} 6213 6214instruct prefetchAllocT0( memory mem ) %{ 6215 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6216 match(PrefetchAllocation mem); 6217 ins_cost(100); 6218 6219 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6220 ins_encode %{ 6221 __ prefetcht0($mem$$Address); 6222 %} 6223 ins_pipe(ialu_mem); 6224%} 6225 6226instruct prefetchAllocT2( memory mem ) %{ 6227 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6228 match(PrefetchAllocation mem); 6229 ins_cost(100); 6230 6231 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6232 ins_encode %{ 6233 __ prefetcht2($mem$$Address); 6234 %} 6235 ins_pipe(ialu_mem); 6236%} 6237 6238//----------Store Instructions------------------------------------------------- 6239 6240// Store Byte 6241instruct storeB(memory mem, xRegI src) %{ 6242 match(Set mem (StoreB mem src)); 6243 6244 ins_cost(125); 6245 format %{ "MOV8 $mem,$src" %} 6246 opcode(0x88); 6247 ins_encode( OpcP, RegMem( src, mem ) ); 6248 ins_pipe( ialu_mem_reg ); 6249%} 6250 6251// Store Char/Short 6252instruct storeC(memory mem, rRegI src) %{ 6253 match(Set mem (StoreC mem src)); 6254 6255 ins_cost(125); 6256 format %{ "MOV16 $mem,$src" %} 6257 opcode(0x89, 0x66); 6258 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6259 ins_pipe( ialu_mem_reg ); 6260%} 6261 6262// Store Integer 6263instruct storeI(memory mem, rRegI src) %{ 6264 match(Set mem (StoreI mem src)); 6265 6266 ins_cost(125); 6267 format %{ "MOV $mem,$src" %} 6268 opcode(0x89); 6269 ins_encode( OpcP, RegMem( src, mem ) ); 6270 ins_pipe( ialu_mem_reg ); 6271%} 6272 6273// Store Long 6274instruct storeL(long_memory mem, eRegL src) %{ 6275 predicate(!((StoreLNode*)n)->require_atomic_access()); 6276 match(Set mem (StoreL mem src)); 6277 6278 ins_cost(200); 6279 format %{ "MOV $mem,$src.lo\n\t" 6280 "MOV $mem+4,$src.hi" %} 6281 opcode(0x89, 0x89); 6282 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6283 ins_pipe( ialu_mem_long_reg ); 6284%} 6285 6286// Store Long to Integer 6287instruct storeL2I(memory mem, eRegL src) %{ 6288 match(Set mem (StoreI mem (ConvL2I src))); 6289 6290 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6291 ins_encode %{ 6292 __ movl($mem$$Address, $src$$Register); 6293 %} 6294 ins_pipe(ialu_mem_reg); 6295%} 6296 6297// Volatile Store Long. Must be atomic, so move it into 6298// the FP TOS and then do a 64-bit FIST. Has to probe the 6299// target address before the store (for null-ptr checks) 6300// so the memory operand is used twice in the encoding. 6301instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6302 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6303 match(Set mem (StoreL mem src)); 6304 effect( KILL cr ); 6305 ins_cost(400); 6306 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6307 "FILD $src\n\t" 6308 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6309 opcode(0x3B); 6310 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6311 ins_pipe( fpu_reg_mem ); 6312%} 6313 6314instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6315 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6316 match(Set mem (StoreL mem src)); 6317 effect( TEMP tmp, KILL cr ); 6318 ins_cost(380); 6319 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6320 "MOVSD $tmp,$src\n\t" 6321 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6322 ins_encode %{ 6323 __ cmpl(rax, $mem$$Address); 6324 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6325 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6326 %} 6327 ins_pipe( pipe_slow ); 6328%} 6329 6330instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6331 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6332 match(Set mem (StoreL mem src)); 6333 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6334 ins_cost(360); 6335 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6336 "MOVD $tmp,$src.lo\n\t" 6337 "MOVD $tmp2,$src.hi\n\t" 6338 "PUNPCKLDQ $tmp,$tmp2\n\t" 6339 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6340 ins_encode %{ 6341 __ cmpl(rax, $mem$$Address); 6342 __ movdl($tmp$$XMMRegister, $src$$Register); 6343 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6344 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6345 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6346 %} 6347 ins_pipe( pipe_slow ); 6348%} 6349 6350// Store Pointer; for storing unknown oops and raw pointers 6351instruct storeP(memory mem, anyRegP src) %{ 6352 match(Set mem (StoreP mem src)); 6353 6354 ins_cost(125); 6355 format %{ "MOV $mem,$src" %} 6356 opcode(0x89); 6357 ins_encode( OpcP, RegMem( src, mem ) ); 6358 ins_pipe( ialu_mem_reg ); 6359%} 6360 6361// Store Integer Immediate 6362instruct storeImmI(memory mem, immI src) %{ 6363 match(Set mem (StoreI mem src)); 6364 6365 ins_cost(150); 6366 format %{ "MOV $mem,$src" %} 6367 opcode(0xC7); /* C7 /0 */ 6368 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6369 ins_pipe( ialu_mem_imm ); 6370%} 6371 6372// Store Short/Char Immediate 6373instruct storeImmI16(memory mem, immI16 src) %{ 6374 predicate(UseStoreImmI16); 6375 match(Set mem (StoreC mem src)); 6376 6377 ins_cost(150); 6378 format %{ "MOV16 $mem,$src" %} 6379 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6380 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6381 ins_pipe( ialu_mem_imm ); 6382%} 6383 6384// Store Pointer Immediate; null pointers or constant oops that do not 6385// need card-mark barriers. 6386instruct storeImmP(memory mem, immP src) %{ 6387 match(Set mem (StoreP mem src)); 6388 6389 ins_cost(150); 6390 format %{ "MOV $mem,$src" %} 6391 opcode(0xC7); /* C7 /0 */ 6392 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6393 ins_pipe( ialu_mem_imm ); 6394%} 6395 6396// Store Byte Immediate 6397instruct storeImmB(memory mem, immI8 src) %{ 6398 match(Set mem (StoreB mem src)); 6399 6400 ins_cost(150); 6401 format %{ "MOV8 $mem,$src" %} 6402 opcode(0xC6); /* C6 /0 */ 6403 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6404 ins_pipe( ialu_mem_imm ); 6405%} 6406 6407// Store CMS card-mark Immediate 6408instruct storeImmCM(memory mem, immI8 src) %{ 6409 match(Set mem (StoreCM mem src)); 6410 6411 ins_cost(150); 6412 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6413 opcode(0xC6); /* C6 /0 */ 6414 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6415 ins_pipe( ialu_mem_imm ); 6416%} 6417 6418// Store Double 6419instruct storeDPR( memory mem, regDPR1 src) %{ 6420 predicate(UseSSE<=1); 6421 match(Set mem (StoreD mem src)); 6422 6423 ins_cost(100); 6424 format %{ "FST_D $mem,$src" %} 6425 opcode(0xDD); /* DD /2 */ 6426 ins_encode( enc_FPR_store(mem,src) ); 6427 ins_pipe( fpu_mem_reg ); 6428%} 6429 6430// Store double does rounding on x86 6431instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6432 predicate(UseSSE<=1); 6433 match(Set mem (StoreD mem (RoundDouble src))); 6434 6435 ins_cost(100); 6436 format %{ "FST_D $mem,$src\t# round" %} 6437 opcode(0xDD); /* DD /2 */ 6438 ins_encode( enc_FPR_store(mem,src) ); 6439 ins_pipe( fpu_mem_reg ); 6440%} 6441 6442// Store XMM register to memory (double-precision floating points) 6443// MOVSD instruction 6444instruct storeD(memory mem, regD src) %{ 6445 predicate(UseSSE>=2); 6446 match(Set mem (StoreD mem src)); 6447 ins_cost(95); 6448 format %{ "MOVSD $mem,$src" %} 6449 ins_encode %{ 6450 __ movdbl($mem$$Address, $src$$XMMRegister); 6451 %} 6452 ins_pipe( pipe_slow ); 6453%} 6454 6455// Store XMM register to memory (single-precision floating point) 6456// MOVSS instruction 6457instruct storeF(memory mem, regF src) %{ 6458 predicate(UseSSE>=1); 6459 match(Set mem (StoreF mem src)); 6460 ins_cost(95); 6461 format %{ "MOVSS $mem,$src" %} 6462 ins_encode %{ 6463 __ movflt($mem$$Address, $src$$XMMRegister); 6464 %} 6465 ins_pipe( pipe_slow ); 6466%} 6467 6468// Store Float 6469instruct storeFPR( memory mem, regFPR1 src) %{ 6470 predicate(UseSSE==0); 6471 match(Set mem (StoreF mem src)); 6472 6473 ins_cost(100); 6474 format %{ "FST_S $mem,$src" %} 6475 opcode(0xD9); /* D9 /2 */ 6476 ins_encode( enc_FPR_store(mem,src) ); 6477 ins_pipe( fpu_mem_reg ); 6478%} 6479 6480// Store Float does rounding on x86 6481instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6482 predicate(UseSSE==0); 6483 match(Set mem (StoreF mem (RoundFloat src))); 6484 6485 ins_cost(100); 6486 format %{ "FST_S $mem,$src\t# round" %} 6487 opcode(0xD9); /* D9 /2 */ 6488 ins_encode( enc_FPR_store(mem,src) ); 6489 ins_pipe( fpu_mem_reg ); 6490%} 6491 6492// Store Float does rounding on x86 6493instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6494 predicate(UseSSE<=1); 6495 match(Set mem (StoreF mem (ConvD2F src))); 6496 6497 ins_cost(100); 6498 format %{ "FST_S $mem,$src\t# D-round" %} 6499 opcode(0xD9); /* D9 /2 */ 6500 ins_encode( enc_FPR_store(mem,src) ); 6501 ins_pipe( fpu_mem_reg ); 6502%} 6503 6504// Store immediate Float value (it is faster than store from FPU register) 6505// The instruction usage is guarded by predicate in operand immFPR(). 6506instruct storeFPR_imm( memory mem, immFPR src) %{ 6507 match(Set mem (StoreF mem src)); 6508 6509 ins_cost(50); 6510 format %{ "MOV $mem,$src\t# store float" %} 6511 opcode(0xC7); /* C7 /0 */ 6512 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6513 ins_pipe( ialu_mem_imm ); 6514%} 6515 6516// Store immediate Float value (it is faster than store from XMM register) 6517// The instruction usage is guarded by predicate in operand immF(). 6518instruct storeF_imm( memory mem, immF src) %{ 6519 match(Set mem (StoreF mem src)); 6520 6521 ins_cost(50); 6522 format %{ "MOV $mem,$src\t# store float" %} 6523 opcode(0xC7); /* C7 /0 */ 6524 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6525 ins_pipe( ialu_mem_imm ); 6526%} 6527 6528// Store Integer to stack slot 6529instruct storeSSI(stackSlotI dst, rRegI src) %{ 6530 match(Set dst src); 6531 6532 ins_cost(100); 6533 format %{ "MOV $dst,$src" %} 6534 opcode(0x89); 6535 ins_encode( OpcPRegSS( dst, src ) ); 6536 ins_pipe( ialu_mem_reg ); 6537%} 6538 6539// Store Integer to stack slot 6540instruct storeSSP(stackSlotP dst, eRegP src) %{ 6541 match(Set dst src); 6542 6543 ins_cost(100); 6544 format %{ "MOV $dst,$src" %} 6545 opcode(0x89); 6546 ins_encode( OpcPRegSS( dst, src ) ); 6547 ins_pipe( ialu_mem_reg ); 6548%} 6549 6550// Store Long to stack slot 6551instruct storeSSL(stackSlotL dst, eRegL src) %{ 6552 match(Set dst src); 6553 6554 ins_cost(200); 6555 format %{ "MOV $dst,$src.lo\n\t" 6556 "MOV $dst+4,$src.hi" %} 6557 opcode(0x89, 0x89); 6558 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6559 ins_pipe( ialu_mem_long_reg ); 6560%} 6561 6562//----------MemBar Instructions----------------------------------------------- 6563// Memory barrier flavors 6564 6565instruct membar_acquire() %{ 6566 match(MemBarAcquire); 6567 match(LoadFence); 6568 ins_cost(400); 6569 6570 size(0); 6571 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6572 ins_encode(); 6573 ins_pipe(empty); 6574%} 6575 6576instruct membar_acquire_lock() %{ 6577 match(MemBarAcquireLock); 6578 ins_cost(0); 6579 6580 size(0); 6581 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6582 ins_encode( ); 6583 ins_pipe(empty); 6584%} 6585 6586instruct membar_release() %{ 6587 match(MemBarRelease); 6588 match(StoreFence); 6589 ins_cost(400); 6590 6591 size(0); 6592 format %{ "MEMBAR-release ! (empty encoding)" %} 6593 ins_encode( ); 6594 ins_pipe(empty); 6595%} 6596 6597instruct membar_release_lock() %{ 6598 match(MemBarReleaseLock); 6599 ins_cost(0); 6600 6601 size(0); 6602 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6603 ins_encode( ); 6604 ins_pipe(empty); 6605%} 6606 6607instruct membar_volatile(eFlagsReg cr) %{ 6608 match(MemBarVolatile); 6609 effect(KILL cr); 6610 ins_cost(400); 6611 6612 format %{ 6613 $$template 6614 if (os::is_MP()) { 6615 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6616 } else { 6617 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6618 } 6619 %} 6620 ins_encode %{ 6621 __ membar(Assembler::StoreLoad); 6622 %} 6623 ins_pipe(pipe_slow); 6624%} 6625 6626instruct unnecessary_membar_volatile() %{ 6627 match(MemBarVolatile); 6628 predicate(Matcher::post_store_load_barrier(n)); 6629 ins_cost(0); 6630 6631 size(0); 6632 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6633 ins_encode( ); 6634 ins_pipe(empty); 6635%} 6636 6637instruct membar_storestore() %{ 6638 match(MemBarStoreStore); 6639 ins_cost(0); 6640 6641 size(0); 6642 format %{ "MEMBAR-storestore (empty encoding)" %} 6643 ins_encode( ); 6644 ins_pipe(empty); 6645%} 6646 6647//----------Move Instructions-------------------------------------------------- 6648instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6649 match(Set dst (CastX2P src)); 6650 format %{ "# X2P $dst, $src" %} 6651 ins_encode( /*empty encoding*/ ); 6652 ins_cost(0); 6653 ins_pipe(empty); 6654%} 6655 6656instruct castP2X(rRegI dst, eRegP src ) %{ 6657 match(Set dst (CastP2X src)); 6658 ins_cost(50); 6659 format %{ "MOV $dst, $src\t# CastP2X" %} 6660 ins_encode( enc_Copy( dst, src) ); 6661 ins_pipe( ialu_reg_reg ); 6662%} 6663 6664//----------Conditional Move--------------------------------------------------- 6665// Conditional move 6666instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6667 predicate(!VM_Version::supports_cmov() ); 6668 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6669 ins_cost(200); 6670 format %{ "J$cop,us skip\t# signed cmove\n\t" 6671 "MOV $dst,$src\n" 6672 "skip:" %} 6673 ins_encode %{ 6674 Label Lskip; 6675 // Invert sense of branch from sense of CMOV 6676 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6677 __ movl($dst$$Register, $src$$Register); 6678 __ bind(Lskip); 6679 %} 6680 ins_pipe( pipe_cmov_reg ); 6681%} 6682 6683instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6684 predicate(!VM_Version::supports_cmov() ); 6685 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6686 ins_cost(200); 6687 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6688 "MOV $dst,$src\n" 6689 "skip:" %} 6690 ins_encode %{ 6691 Label Lskip; 6692 // Invert sense of branch from sense of CMOV 6693 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6694 __ movl($dst$$Register, $src$$Register); 6695 __ bind(Lskip); 6696 %} 6697 ins_pipe( pipe_cmov_reg ); 6698%} 6699 6700instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6701 predicate(VM_Version::supports_cmov() ); 6702 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6703 ins_cost(200); 6704 format %{ "CMOV$cop $dst,$src" %} 6705 opcode(0x0F,0x40); 6706 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6707 ins_pipe( pipe_cmov_reg ); 6708%} 6709 6710instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6711 predicate(VM_Version::supports_cmov() ); 6712 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6713 ins_cost(200); 6714 format %{ "CMOV$cop $dst,$src" %} 6715 opcode(0x0F,0x40); 6716 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6717 ins_pipe( pipe_cmov_reg ); 6718%} 6719 6720instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6721 predicate(VM_Version::supports_cmov() ); 6722 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6723 ins_cost(200); 6724 expand %{ 6725 cmovI_regU(cop, cr, dst, src); 6726 %} 6727%} 6728 6729// Conditional move 6730instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6731 predicate(VM_Version::supports_cmov() ); 6732 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6733 ins_cost(250); 6734 format %{ "CMOV$cop $dst,$src" %} 6735 opcode(0x0F,0x40); 6736 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6737 ins_pipe( pipe_cmov_mem ); 6738%} 6739 6740// Conditional move 6741instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6742 predicate(VM_Version::supports_cmov() ); 6743 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6744 ins_cost(250); 6745 format %{ "CMOV$cop $dst,$src" %} 6746 opcode(0x0F,0x40); 6747 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6748 ins_pipe( pipe_cmov_mem ); 6749%} 6750 6751instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6752 predicate(VM_Version::supports_cmov() ); 6753 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6754 ins_cost(250); 6755 expand %{ 6756 cmovI_memU(cop, cr, dst, src); 6757 %} 6758%} 6759 6760// Conditional move 6761instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6762 predicate(VM_Version::supports_cmov() ); 6763 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6764 ins_cost(200); 6765 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6766 opcode(0x0F,0x40); 6767 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6768 ins_pipe( pipe_cmov_reg ); 6769%} 6770 6771// Conditional move (non-P6 version) 6772// Note: a CMoveP is generated for stubs and native wrappers 6773// regardless of whether we are on a P6, so we 6774// emulate a cmov here 6775instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6776 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6777 ins_cost(300); 6778 format %{ "Jn$cop skip\n\t" 6779 "MOV $dst,$src\t# pointer\n" 6780 "skip:" %} 6781 opcode(0x8b); 6782 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6783 ins_pipe( pipe_cmov_reg ); 6784%} 6785 6786// Conditional move 6787instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6788 predicate(VM_Version::supports_cmov() ); 6789 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6790 ins_cost(200); 6791 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6792 opcode(0x0F,0x40); 6793 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6794 ins_pipe( pipe_cmov_reg ); 6795%} 6796 6797instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6798 predicate(VM_Version::supports_cmov() ); 6799 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6800 ins_cost(200); 6801 expand %{ 6802 cmovP_regU(cop, cr, dst, src); 6803 %} 6804%} 6805 6806// DISABLED: Requires the ADLC to emit a bottom_type call that 6807// correctly meets the two pointer arguments; one is an incoming 6808// register but the other is a memory operand. ALSO appears to 6809// be buggy with implicit null checks. 6810// 6811//// Conditional move 6812//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6813// predicate(VM_Version::supports_cmov() ); 6814// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6815// ins_cost(250); 6816// format %{ "CMOV$cop $dst,$src\t# ptr" %} 6817// opcode(0x0F,0x40); 6818// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6819// ins_pipe( pipe_cmov_mem ); 6820//%} 6821// 6822//// Conditional move 6823//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6824// predicate(VM_Version::supports_cmov() ); 6825// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6826// ins_cost(250); 6827// format %{ "CMOV$cop $dst,$src\t# ptr" %} 6828// opcode(0x0F,0x40); 6829// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6830// ins_pipe( pipe_cmov_mem ); 6831//%} 6832 6833// Conditional move 6834instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6835 predicate(UseSSE<=1); 6836 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6837 ins_cost(200); 6838 format %{ "FCMOV$cop $dst,$src\t# double" %} 6839 opcode(0xDA); 6840 ins_encode( enc_cmov_dpr(cop,src) ); 6841 ins_pipe( pipe_cmovDPR_reg ); 6842%} 6843 6844// Conditional move 6845instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6846 predicate(UseSSE==0); 6847 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6848 ins_cost(200); 6849 format %{ "FCMOV$cop $dst,$src\t# float" %} 6850 opcode(0xDA); 6851 ins_encode( enc_cmov_dpr(cop,src) ); 6852 ins_pipe( pipe_cmovDPR_reg ); 6853%} 6854 6855// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6856instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6857 predicate(UseSSE<=1); 6858 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6859 ins_cost(200); 6860 format %{ "Jn$cop skip\n\t" 6861 "MOV $dst,$src\t# double\n" 6862 "skip:" %} 6863 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6864 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6865 ins_pipe( pipe_cmovDPR_reg ); 6866%} 6867 6868// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6869instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6870 predicate(UseSSE==0); 6871 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6872 ins_cost(200); 6873 format %{ "Jn$cop skip\n\t" 6874 "MOV $dst,$src\t# float\n" 6875 "skip:" %} 6876 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6877 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6878 ins_pipe( pipe_cmovDPR_reg ); 6879%} 6880 6881// No CMOVE with SSE/SSE2 6882instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6883 predicate (UseSSE>=1); 6884 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6885 ins_cost(200); 6886 format %{ "Jn$cop skip\n\t" 6887 "MOVSS $dst,$src\t# float\n" 6888 "skip:" %} 6889 ins_encode %{ 6890 Label skip; 6891 // Invert sense of branch from sense of CMOV 6892 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6893 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6894 __ bind(skip); 6895 %} 6896 ins_pipe( pipe_slow ); 6897%} 6898 6899// No CMOVE with SSE/SSE2 6900instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6901 predicate (UseSSE>=2); 6902 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6903 ins_cost(200); 6904 format %{ "Jn$cop skip\n\t" 6905 "MOVSD $dst,$src\t# float\n" 6906 "skip:" %} 6907 ins_encode %{ 6908 Label skip; 6909 // Invert sense of branch from sense of CMOV 6910 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6911 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6912 __ bind(skip); 6913 %} 6914 ins_pipe( pipe_slow ); 6915%} 6916 6917// unsigned version 6918instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6919 predicate (UseSSE>=1); 6920 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6921 ins_cost(200); 6922 format %{ "Jn$cop skip\n\t" 6923 "MOVSS $dst,$src\t# float\n" 6924 "skip:" %} 6925 ins_encode %{ 6926 Label skip; 6927 // Invert sense of branch from sense of CMOV 6928 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6929 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6930 __ bind(skip); 6931 %} 6932 ins_pipe( pipe_slow ); 6933%} 6934 6935instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6936 predicate (UseSSE>=1); 6937 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6938 ins_cost(200); 6939 expand %{ 6940 fcmovF_regU(cop, cr, dst, src); 6941 %} 6942%} 6943 6944// unsigned version 6945instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6946 predicate (UseSSE>=2); 6947 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 format %{ "Jn$cop skip\n\t" 6950 "MOVSD $dst,$src\t# float\n" 6951 "skip:" %} 6952 ins_encode %{ 6953 Label skip; 6954 // Invert sense of branch from sense of CMOV 6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6956 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6957 __ bind(skip); 6958 %} 6959 ins_pipe( pipe_slow ); 6960%} 6961 6962instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6963 predicate (UseSSE>=2); 6964 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6965 ins_cost(200); 6966 expand %{ 6967 fcmovD_regU(cop, cr, dst, src); 6968 %} 6969%} 6970 6971instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6972 predicate(VM_Version::supports_cmov() ); 6973 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6974 ins_cost(200); 6975 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6976 "CMOV$cop $dst.hi,$src.hi" %} 6977 opcode(0x0F,0x40); 6978 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6979 ins_pipe( pipe_cmov_reg_long ); 6980%} 6981 6982instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6983 predicate(VM_Version::supports_cmov() ); 6984 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6985 ins_cost(200); 6986 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6987 "CMOV$cop $dst.hi,$src.hi" %} 6988 opcode(0x0F,0x40); 6989 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6990 ins_pipe( pipe_cmov_reg_long ); 6991%} 6992 6993instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6994 predicate(VM_Version::supports_cmov() ); 6995 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6996 ins_cost(200); 6997 expand %{ 6998 cmovL_regU(cop, cr, dst, src); 6999 %} 7000%} 7001 7002//----------Arithmetic Instructions-------------------------------------------- 7003//----------Addition Instructions---------------------------------------------- 7004 7005// Integer Addition Instructions 7006instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7007 match(Set dst (AddI dst src)); 7008 effect(KILL cr); 7009 7010 size(2); 7011 format %{ "ADD $dst,$src" %} 7012 opcode(0x03); 7013 ins_encode( OpcP, RegReg( dst, src) ); 7014 ins_pipe( ialu_reg_reg ); 7015%} 7016 7017instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7018 match(Set dst (AddI dst src)); 7019 effect(KILL cr); 7020 7021 format %{ "ADD $dst,$src" %} 7022 opcode(0x81, 0x00); /* /0 id */ 7023 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7024 ins_pipe( ialu_reg ); 7025%} 7026 7027instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7028 predicate(UseIncDec); 7029 match(Set dst (AddI dst src)); 7030 effect(KILL cr); 7031 7032 size(1); 7033 format %{ "INC $dst" %} 7034 opcode(0x40); /* */ 7035 ins_encode( Opc_plus( primary, dst ) ); 7036 ins_pipe( ialu_reg ); 7037%} 7038 7039instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7040 match(Set dst (AddI src0 src1)); 7041 ins_cost(110); 7042 7043 format %{ "LEA $dst,[$src0 + $src1]" %} 7044 opcode(0x8D); /* 0x8D /r */ 7045 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7046 ins_pipe( ialu_reg_reg ); 7047%} 7048 7049instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7050 match(Set dst (AddP src0 src1)); 7051 ins_cost(110); 7052 7053 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7054 opcode(0x8D); /* 0x8D /r */ 7055 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7056 ins_pipe( ialu_reg_reg ); 7057%} 7058 7059instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7060 predicate(UseIncDec); 7061 match(Set dst (AddI dst src)); 7062 effect(KILL cr); 7063 7064 size(1); 7065 format %{ "DEC $dst" %} 7066 opcode(0x48); /* */ 7067 ins_encode( Opc_plus( primary, dst ) ); 7068 ins_pipe( ialu_reg ); 7069%} 7070 7071instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7072 match(Set dst (AddP dst src)); 7073 effect(KILL cr); 7074 7075 size(2); 7076 format %{ "ADD $dst,$src" %} 7077 opcode(0x03); 7078 ins_encode( OpcP, RegReg( dst, src) ); 7079 ins_pipe( ialu_reg_reg ); 7080%} 7081 7082instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7083 match(Set dst (AddP dst src)); 7084 effect(KILL cr); 7085 7086 format %{ "ADD $dst,$src" %} 7087 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7088 // ins_encode( RegImm( dst, src) ); 7089 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7090 ins_pipe( ialu_reg ); 7091%} 7092 7093instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7094 match(Set dst (AddI dst (LoadI src))); 7095 effect(KILL cr); 7096 7097 ins_cost(125); 7098 format %{ "ADD $dst,$src" %} 7099 opcode(0x03); 7100 ins_encode( OpcP, RegMem( dst, src) ); 7101 ins_pipe( ialu_reg_mem ); 7102%} 7103 7104instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7105 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7106 effect(KILL cr); 7107 7108 ins_cost(150); 7109 format %{ "ADD $dst,$src" %} 7110 opcode(0x01); /* Opcode 01 /r */ 7111 ins_encode( OpcP, RegMem( src, dst ) ); 7112 ins_pipe( ialu_mem_reg ); 7113%} 7114 7115// Add Memory with Immediate 7116instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7117 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7118 effect(KILL cr); 7119 7120 ins_cost(125); 7121 format %{ "ADD $dst,$src" %} 7122 opcode(0x81); /* Opcode 81 /0 id */ 7123 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7124 ins_pipe( ialu_mem_imm ); 7125%} 7126 7127instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7128 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7129 effect(KILL cr); 7130 7131 ins_cost(125); 7132 format %{ "INC $dst" %} 7133 opcode(0xFF); /* Opcode FF /0 */ 7134 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7135 ins_pipe( ialu_mem_imm ); 7136%} 7137 7138instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7139 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7140 effect(KILL cr); 7141 7142 ins_cost(125); 7143 format %{ "DEC $dst" %} 7144 opcode(0xFF); /* Opcode FF /1 */ 7145 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7146 ins_pipe( ialu_mem_imm ); 7147%} 7148 7149 7150instruct checkCastPP( eRegP dst ) %{ 7151 match(Set dst (CheckCastPP dst)); 7152 7153 size(0); 7154 format %{ "#checkcastPP of $dst" %} 7155 ins_encode( /*empty encoding*/ ); 7156 ins_pipe( empty ); 7157%} 7158 7159instruct castPP( eRegP dst ) %{ 7160 match(Set dst (CastPP dst)); 7161 format %{ "#castPP of $dst" %} 7162 ins_encode( /*empty encoding*/ ); 7163 ins_pipe( empty ); 7164%} 7165 7166instruct castII( rRegI dst ) %{ 7167 match(Set dst (CastII dst)); 7168 format %{ "#castII of $dst" %} 7169 ins_encode( /*empty encoding*/ ); 7170 ins_cost(0); 7171 ins_pipe( empty ); 7172%} 7173 7174 7175// Load-locked - same as a regular pointer load when used with compare-swap 7176instruct loadPLocked(eRegP dst, memory mem) %{ 7177 match(Set dst (LoadPLocked mem)); 7178 7179 ins_cost(125); 7180 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7181 opcode(0x8B); 7182 ins_encode( OpcP, RegMem(dst,mem)); 7183 ins_pipe( ialu_reg_mem ); 7184%} 7185 7186// Conditional-store of the updated heap-top. 7187// Used during allocation of the shared heap. 7188// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7189instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7190 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7191 // EAX is killed if there is contention, but then it's also unused. 7192 // In the common case of no contention, EAX holds the new oop address. 7193 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7194 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7195 ins_pipe( pipe_cmpxchg ); 7196%} 7197 7198// Conditional-store of an int value. 7199// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7200instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7201 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7202 effect(KILL oldval); 7203 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7204 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7205 ins_pipe( pipe_cmpxchg ); 7206%} 7207 7208// Conditional-store of a long value. 7209// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7210instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7211 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7212 effect(KILL oldval); 7213 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7214 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7215 "XCHG EBX,ECX" 7216 %} 7217 ins_encode %{ 7218 // Note: we need to swap rbx, and rcx before and after the 7219 // cmpxchg8 instruction because the instruction uses 7220 // rcx as the high order word of the new value to store but 7221 // our register encoding uses rbx. 7222 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7223 if( os::is_MP() ) 7224 __ lock(); 7225 __ cmpxchg8($mem$$Address); 7226 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7227 %} 7228 ins_pipe( pipe_cmpxchg ); 7229%} 7230 7231// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7232 7233instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7234 predicate(VM_Version::supports_cx8()); 7235 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7236 effect(KILL cr, KILL oldval); 7237 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7238 "MOV $res,0\n\t" 7239 "JNE,s fail\n\t" 7240 "MOV $res,1\n" 7241 "fail:" %} 7242 ins_encode( enc_cmpxchg8(mem_ptr), 7243 enc_flags_ne_to_boolean(res) ); 7244 ins_pipe( pipe_cmpxchg ); 7245%} 7246 7247instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7248 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7249 effect(KILL cr, KILL oldval); 7250 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7251 "MOV $res,0\n\t" 7252 "JNE,s fail\n\t" 7253 "MOV $res,1\n" 7254 "fail:" %} 7255 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7256 ins_pipe( pipe_cmpxchg ); 7257%} 7258 7259instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7260 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7261 effect(KILL cr, KILL oldval); 7262 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7263 "MOV $res,0\n\t" 7264 "JNE,s fail\n\t" 7265 "MOV $res,1\n" 7266 "fail:" %} 7267 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7268 ins_pipe( pipe_cmpxchg ); 7269%} 7270 7271instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7272 predicate(n->as_LoadStore()->result_not_used()); 7273 match(Set dummy (GetAndAddI mem add)); 7274 effect(KILL cr); 7275 format %{ "ADDL [$mem],$add" %} 7276 ins_encode %{ 7277 if (os::is_MP()) { __ lock(); } 7278 __ addl($mem$$Address, $add$$constant); 7279 %} 7280 ins_pipe( pipe_cmpxchg ); 7281%} 7282 7283instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7284 match(Set newval (GetAndAddI mem newval)); 7285 effect(KILL cr); 7286 format %{ "XADDL [$mem],$newval" %} 7287 ins_encode %{ 7288 if (os::is_MP()) { __ lock(); } 7289 __ xaddl($mem$$Address, $newval$$Register); 7290 %} 7291 ins_pipe( pipe_cmpxchg ); 7292%} 7293 7294instruct xchgI( memory mem, rRegI newval) %{ 7295 match(Set newval (GetAndSetI mem newval)); 7296 format %{ "XCHGL $newval,[$mem]" %} 7297 ins_encode %{ 7298 __ xchgl($newval$$Register, $mem$$Address); 7299 %} 7300 ins_pipe( pipe_cmpxchg ); 7301%} 7302 7303instruct xchgP( memory mem, pRegP newval) %{ 7304 match(Set newval (GetAndSetP mem newval)); 7305 format %{ "XCHGL $newval,[$mem]" %} 7306 ins_encode %{ 7307 __ xchgl($newval$$Register, $mem$$Address); 7308 %} 7309 ins_pipe( pipe_cmpxchg ); 7310%} 7311 7312//----------Subtraction Instructions------------------------------------------- 7313 7314// Integer Subtraction Instructions 7315instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7316 match(Set dst (SubI dst src)); 7317 effect(KILL cr); 7318 7319 size(2); 7320 format %{ "SUB $dst,$src" %} 7321 opcode(0x2B); 7322 ins_encode( OpcP, RegReg( dst, src) ); 7323 ins_pipe( ialu_reg_reg ); 7324%} 7325 7326instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7327 match(Set dst (SubI dst src)); 7328 effect(KILL cr); 7329 7330 format %{ "SUB $dst,$src" %} 7331 opcode(0x81,0x05); /* Opcode 81 /5 */ 7332 // ins_encode( RegImm( dst, src) ); 7333 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7334 ins_pipe( ialu_reg ); 7335%} 7336 7337instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7338 match(Set dst (SubI dst (LoadI src))); 7339 effect(KILL cr); 7340 7341 ins_cost(125); 7342 format %{ "SUB $dst,$src" %} 7343 opcode(0x2B); 7344 ins_encode( OpcP, RegMem( dst, src) ); 7345 ins_pipe( ialu_reg_mem ); 7346%} 7347 7348instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7349 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7350 effect(KILL cr); 7351 7352 ins_cost(150); 7353 format %{ "SUB $dst,$src" %} 7354 opcode(0x29); /* Opcode 29 /r */ 7355 ins_encode( OpcP, RegMem( src, dst ) ); 7356 ins_pipe( ialu_mem_reg ); 7357%} 7358 7359// Subtract from a pointer 7360instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7361 match(Set dst (AddP dst (SubI zero src))); 7362 effect(KILL cr); 7363 7364 size(2); 7365 format %{ "SUB $dst,$src" %} 7366 opcode(0x2B); 7367 ins_encode( OpcP, RegReg( dst, src) ); 7368 ins_pipe( ialu_reg_reg ); 7369%} 7370 7371instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7372 match(Set dst (SubI zero dst)); 7373 effect(KILL cr); 7374 7375 size(2); 7376 format %{ "NEG $dst" %} 7377 opcode(0xF7,0x03); // Opcode F7 /3 7378 ins_encode( OpcP, RegOpc( dst ) ); 7379 ins_pipe( ialu_reg ); 7380%} 7381 7382//----------Multiplication/Division Instructions------------------------------- 7383// Integer Multiplication Instructions 7384// Multiply Register 7385instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7386 match(Set dst (MulI dst src)); 7387 effect(KILL cr); 7388 7389 size(3); 7390 ins_cost(300); 7391 format %{ "IMUL $dst,$src" %} 7392 opcode(0xAF, 0x0F); 7393 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7394 ins_pipe( ialu_reg_reg_alu0 ); 7395%} 7396 7397// Multiply 32-bit Immediate 7398instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7399 match(Set dst (MulI src imm)); 7400 effect(KILL cr); 7401 7402 ins_cost(300); 7403 format %{ "IMUL $dst,$src,$imm" %} 7404 opcode(0x69); /* 69 /r id */ 7405 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7406 ins_pipe( ialu_reg_reg_alu0 ); 7407%} 7408 7409instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7410 match(Set dst src); 7411 effect(KILL cr); 7412 7413 // Note that this is artificially increased to make it more expensive than loadConL 7414 ins_cost(250); 7415 format %{ "MOV EAX,$src\t// low word only" %} 7416 opcode(0xB8); 7417 ins_encode( LdImmL_Lo(dst, src) ); 7418 ins_pipe( ialu_reg_fat ); 7419%} 7420 7421// Multiply by 32-bit Immediate, taking the shifted high order results 7422// (special case for shift by 32) 7423instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7424 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7425 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7426 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7427 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7428 effect(USE src1, KILL cr); 7429 7430 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7431 ins_cost(0*100 + 1*400 - 150); 7432 format %{ "IMUL EDX:EAX,$src1" %} 7433 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7434 ins_pipe( pipe_slow ); 7435%} 7436 7437// Multiply by 32-bit Immediate, taking the shifted high order results 7438instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7439 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7440 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7441 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7442 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7443 effect(USE src1, KILL cr); 7444 7445 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7446 ins_cost(1*100 + 1*400 - 150); 7447 format %{ "IMUL EDX:EAX,$src1\n\t" 7448 "SAR EDX,$cnt-32" %} 7449 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7450 ins_pipe( pipe_slow ); 7451%} 7452 7453// Multiply Memory 32-bit Immediate 7454instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7455 match(Set dst (MulI (LoadI src) imm)); 7456 effect(KILL cr); 7457 7458 ins_cost(300); 7459 format %{ "IMUL $dst,$src,$imm" %} 7460 opcode(0x69); /* 69 /r id */ 7461 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7462 ins_pipe( ialu_reg_mem_alu0 ); 7463%} 7464 7465// Multiply Memory 7466instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7467 match(Set dst (MulI dst (LoadI src))); 7468 effect(KILL cr); 7469 7470 ins_cost(350); 7471 format %{ "IMUL $dst,$src" %} 7472 opcode(0xAF, 0x0F); 7473 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7474 ins_pipe( ialu_reg_mem_alu0 ); 7475%} 7476 7477// Multiply Register Int to Long 7478instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7479 // Basic Idea: long = (long)int * (long)int 7480 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7481 effect(DEF dst, USE src, USE src1, KILL flags); 7482 7483 ins_cost(300); 7484 format %{ "IMUL $dst,$src1" %} 7485 7486 ins_encode( long_int_multiply( dst, src1 ) ); 7487 ins_pipe( ialu_reg_reg_alu0 ); 7488%} 7489 7490instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7491 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7492 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7493 effect(KILL flags); 7494 7495 ins_cost(300); 7496 format %{ "MUL $dst,$src1" %} 7497 7498 ins_encode( long_uint_multiply(dst, src1) ); 7499 ins_pipe( ialu_reg_reg_alu0 ); 7500%} 7501 7502// Multiply Register Long 7503instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7504 match(Set dst (MulL dst src)); 7505 effect(KILL cr, TEMP tmp); 7506 ins_cost(4*100+3*400); 7507// Basic idea: lo(result) = lo(x_lo * y_lo) 7508// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7509 format %{ "MOV $tmp,$src.lo\n\t" 7510 "IMUL $tmp,EDX\n\t" 7511 "MOV EDX,$src.hi\n\t" 7512 "IMUL EDX,EAX\n\t" 7513 "ADD $tmp,EDX\n\t" 7514 "MUL EDX:EAX,$src.lo\n\t" 7515 "ADD EDX,$tmp" %} 7516 ins_encode( long_multiply( dst, src, tmp ) ); 7517 ins_pipe( pipe_slow ); 7518%} 7519 7520// Multiply Register Long where the left operand's high 32 bits are zero 7521instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7522 predicate(is_operand_hi32_zero(n->in(1))); 7523 match(Set dst (MulL dst src)); 7524 effect(KILL cr, TEMP tmp); 7525 ins_cost(2*100+2*400); 7526// Basic idea: lo(result) = lo(x_lo * y_lo) 7527// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7528 format %{ "MOV $tmp,$src.hi\n\t" 7529 "IMUL $tmp,EAX\n\t" 7530 "MUL EDX:EAX,$src.lo\n\t" 7531 "ADD EDX,$tmp" %} 7532 ins_encode %{ 7533 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7534 __ imull($tmp$$Register, rax); 7535 __ mull($src$$Register); 7536 __ addl(rdx, $tmp$$Register); 7537 %} 7538 ins_pipe( pipe_slow ); 7539%} 7540 7541// Multiply Register Long where the right operand's high 32 bits are zero 7542instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7543 predicate(is_operand_hi32_zero(n->in(2))); 7544 match(Set dst (MulL dst src)); 7545 effect(KILL cr, TEMP tmp); 7546 ins_cost(2*100+2*400); 7547// Basic idea: lo(result) = lo(x_lo * y_lo) 7548// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7549 format %{ "MOV $tmp,$src.lo\n\t" 7550 "IMUL $tmp,EDX\n\t" 7551 "MUL EDX:EAX,$src.lo\n\t" 7552 "ADD EDX,$tmp" %} 7553 ins_encode %{ 7554 __ movl($tmp$$Register, $src$$Register); 7555 __ imull($tmp$$Register, rdx); 7556 __ mull($src$$Register); 7557 __ addl(rdx, $tmp$$Register); 7558 %} 7559 ins_pipe( pipe_slow ); 7560%} 7561 7562// Multiply Register Long where the left and the right operands' high 32 bits are zero 7563instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7564 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7565 match(Set dst (MulL dst src)); 7566 effect(KILL cr); 7567 ins_cost(1*400); 7568// Basic idea: lo(result) = lo(x_lo * y_lo) 7569// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7570 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7571 ins_encode %{ 7572 __ mull($src$$Register); 7573 %} 7574 ins_pipe( pipe_slow ); 7575%} 7576 7577// Multiply Register Long by small constant 7578instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7579 match(Set dst (MulL dst src)); 7580 effect(KILL cr, TEMP tmp); 7581 ins_cost(2*100+2*400); 7582 size(12); 7583// Basic idea: lo(result) = lo(src * EAX) 7584// hi(result) = hi(src * EAX) + lo(src * EDX) 7585 format %{ "IMUL $tmp,EDX,$src\n\t" 7586 "MOV EDX,$src\n\t" 7587 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7588 "ADD EDX,$tmp" %} 7589 ins_encode( long_multiply_con( dst, src, tmp ) ); 7590 ins_pipe( pipe_slow ); 7591%} 7592 7593// Integer DIV with Register 7594instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7595 match(Set rax (DivI rax div)); 7596 effect(KILL rdx, KILL cr); 7597 size(26); 7598 ins_cost(30*100+10*100); 7599 format %{ "CMP EAX,0x80000000\n\t" 7600 "JNE,s normal\n\t" 7601 "XOR EDX,EDX\n\t" 7602 "CMP ECX,-1\n\t" 7603 "JE,s done\n" 7604 "normal: CDQ\n\t" 7605 "IDIV $div\n\t" 7606 "done:" %} 7607 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7608 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7609 ins_pipe( ialu_reg_reg_alu0 ); 7610%} 7611 7612// Divide Register Long 7613instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7614 match(Set dst (DivL src1 src2)); 7615 effect( KILL cr, KILL cx, KILL bx ); 7616 ins_cost(10000); 7617 format %{ "PUSH $src1.hi\n\t" 7618 "PUSH $src1.lo\n\t" 7619 "PUSH $src2.hi\n\t" 7620 "PUSH $src2.lo\n\t" 7621 "CALL SharedRuntime::ldiv\n\t" 7622 "ADD ESP,16" %} 7623 ins_encode( long_div(src1,src2) ); 7624 ins_pipe( pipe_slow ); 7625%} 7626 7627// Integer DIVMOD with Register, both quotient and mod results 7628instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7629 match(DivModI rax div); 7630 effect(KILL cr); 7631 size(26); 7632 ins_cost(30*100+10*100); 7633 format %{ "CMP EAX,0x80000000\n\t" 7634 "JNE,s normal\n\t" 7635 "XOR EDX,EDX\n\t" 7636 "CMP ECX,-1\n\t" 7637 "JE,s done\n" 7638 "normal: CDQ\n\t" 7639 "IDIV $div\n\t" 7640 "done:" %} 7641 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7642 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7643 ins_pipe( pipe_slow ); 7644%} 7645 7646// Integer MOD with Register 7647instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7648 match(Set rdx (ModI rax div)); 7649 effect(KILL rax, KILL cr); 7650 7651 size(26); 7652 ins_cost(300); 7653 format %{ "CDQ\n\t" 7654 "IDIV $div" %} 7655 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7656 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7657 ins_pipe( ialu_reg_reg_alu0 ); 7658%} 7659 7660// Remainder Register Long 7661instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7662 match(Set dst (ModL src1 src2)); 7663 effect( KILL cr, KILL cx, KILL bx ); 7664 ins_cost(10000); 7665 format %{ "PUSH $src1.hi\n\t" 7666 "PUSH $src1.lo\n\t" 7667 "PUSH $src2.hi\n\t" 7668 "PUSH $src2.lo\n\t" 7669 "CALL SharedRuntime::lrem\n\t" 7670 "ADD ESP,16" %} 7671 ins_encode( long_mod(src1,src2) ); 7672 ins_pipe( pipe_slow ); 7673%} 7674 7675// Divide Register Long (no special case since divisor != -1) 7676instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7677 match(Set dst (DivL dst imm)); 7678 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7679 ins_cost(1000); 7680 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7681 "XOR $tmp2,$tmp2\n\t" 7682 "CMP $tmp,EDX\n\t" 7683 "JA,s fast\n\t" 7684 "MOV $tmp2,EAX\n\t" 7685 "MOV EAX,EDX\n\t" 7686 "MOV EDX,0\n\t" 7687 "JLE,s pos\n\t" 7688 "LNEG EAX : $tmp2\n\t" 7689 "DIV $tmp # unsigned division\n\t" 7690 "XCHG EAX,$tmp2\n\t" 7691 "DIV $tmp\n\t" 7692 "LNEG $tmp2 : EAX\n\t" 7693 "JMP,s done\n" 7694 "pos:\n\t" 7695 "DIV $tmp\n\t" 7696 "XCHG EAX,$tmp2\n" 7697 "fast:\n\t" 7698 "DIV $tmp\n" 7699 "done:\n\t" 7700 "MOV EDX,$tmp2\n\t" 7701 "NEG EDX:EAX # if $imm < 0" %} 7702 ins_encode %{ 7703 int con = (int)$imm$$constant; 7704 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7705 int pcon = (con > 0) ? con : -con; 7706 Label Lfast, Lpos, Ldone; 7707 7708 __ movl($tmp$$Register, pcon); 7709 __ xorl($tmp2$$Register,$tmp2$$Register); 7710 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7711 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7712 7713 __ movl($tmp2$$Register, $dst$$Register); // save 7714 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7715 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7716 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7717 7718 // Negative dividend. 7719 // convert value to positive to use unsigned division 7720 __ lneg($dst$$Register, $tmp2$$Register); 7721 __ divl($tmp$$Register); 7722 __ xchgl($dst$$Register, $tmp2$$Register); 7723 __ divl($tmp$$Register); 7724 // revert result back to negative 7725 __ lneg($tmp2$$Register, $dst$$Register); 7726 __ jmpb(Ldone); 7727 7728 __ bind(Lpos); 7729 __ divl($tmp$$Register); // Use unsigned division 7730 __ xchgl($dst$$Register, $tmp2$$Register); 7731 // Fallthrow for final divide, tmp2 has 32 bit hi result 7732 7733 __ bind(Lfast); 7734 // fast path: src is positive 7735 __ divl($tmp$$Register); // Use unsigned division 7736 7737 __ bind(Ldone); 7738 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7739 if (con < 0) { 7740 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7741 } 7742 %} 7743 ins_pipe( pipe_slow ); 7744%} 7745 7746// Remainder Register Long (remainder fit into 32 bits) 7747instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7748 match(Set dst (ModL dst imm)); 7749 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7750 ins_cost(1000); 7751 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7752 "CMP $tmp,EDX\n\t" 7753 "JA,s fast\n\t" 7754 "MOV $tmp2,EAX\n\t" 7755 "MOV EAX,EDX\n\t" 7756 "MOV EDX,0\n\t" 7757 "JLE,s pos\n\t" 7758 "LNEG EAX : $tmp2\n\t" 7759 "DIV $tmp # unsigned division\n\t" 7760 "MOV EAX,$tmp2\n\t" 7761 "DIV $tmp\n\t" 7762 "NEG EDX\n\t" 7763 "JMP,s done\n" 7764 "pos:\n\t" 7765 "DIV $tmp\n\t" 7766 "MOV EAX,$tmp2\n" 7767 "fast:\n\t" 7768 "DIV $tmp\n" 7769 "done:\n\t" 7770 "MOV EAX,EDX\n\t" 7771 "SAR EDX,31\n\t" %} 7772 ins_encode %{ 7773 int con = (int)$imm$$constant; 7774 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7775 int pcon = (con > 0) ? con : -con; 7776 Label Lfast, Lpos, Ldone; 7777 7778 __ movl($tmp$$Register, pcon); 7779 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7780 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7781 7782 __ movl($tmp2$$Register, $dst$$Register); // save 7783 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7784 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7785 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7786 7787 // Negative dividend. 7788 // convert value to positive to use unsigned division 7789 __ lneg($dst$$Register, $tmp2$$Register); 7790 __ divl($tmp$$Register); 7791 __ movl($dst$$Register, $tmp2$$Register); 7792 __ divl($tmp$$Register); 7793 // revert remainder back to negative 7794 __ negl(HIGH_FROM_LOW($dst$$Register)); 7795 __ jmpb(Ldone); 7796 7797 __ bind(Lpos); 7798 __ divl($tmp$$Register); 7799 __ movl($dst$$Register, $tmp2$$Register); 7800 7801 __ bind(Lfast); 7802 // fast path: src is positive 7803 __ divl($tmp$$Register); 7804 7805 __ bind(Ldone); 7806 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7807 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7808 7809 %} 7810 ins_pipe( pipe_slow ); 7811%} 7812 7813// Integer Shift Instructions 7814// Shift Left by one 7815instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7816 match(Set dst (LShiftI dst shift)); 7817 effect(KILL cr); 7818 7819 size(2); 7820 format %{ "SHL $dst,$shift" %} 7821 opcode(0xD1, 0x4); /* D1 /4 */ 7822 ins_encode( OpcP, RegOpc( dst ) ); 7823 ins_pipe( ialu_reg ); 7824%} 7825 7826// Shift Left by 8-bit immediate 7827instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7828 match(Set dst (LShiftI dst shift)); 7829 effect(KILL cr); 7830 7831 size(3); 7832 format %{ "SHL $dst,$shift" %} 7833 opcode(0xC1, 0x4); /* C1 /4 ib */ 7834 ins_encode( RegOpcImm( dst, shift) ); 7835 ins_pipe( ialu_reg ); 7836%} 7837 7838// Shift Left by variable 7839instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7840 match(Set dst (LShiftI dst shift)); 7841 effect(KILL cr); 7842 7843 size(2); 7844 format %{ "SHL $dst,$shift" %} 7845 opcode(0xD3, 0x4); /* D3 /4 */ 7846 ins_encode( OpcP, RegOpc( dst ) ); 7847 ins_pipe( ialu_reg_reg ); 7848%} 7849 7850// Arithmetic shift right by one 7851instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7852 match(Set dst (RShiftI dst shift)); 7853 effect(KILL cr); 7854 7855 size(2); 7856 format %{ "SAR $dst,$shift" %} 7857 opcode(0xD1, 0x7); /* D1 /7 */ 7858 ins_encode( OpcP, RegOpc( dst ) ); 7859 ins_pipe( ialu_reg ); 7860%} 7861 7862// Arithmetic shift right by one 7863instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7864 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7865 effect(KILL cr); 7866 format %{ "SAR $dst,$shift" %} 7867 opcode(0xD1, 0x7); /* D1 /7 */ 7868 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7869 ins_pipe( ialu_mem_imm ); 7870%} 7871 7872// Arithmetic Shift Right by 8-bit immediate 7873instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7874 match(Set dst (RShiftI dst shift)); 7875 effect(KILL cr); 7876 7877 size(3); 7878 format %{ "SAR $dst,$shift" %} 7879 opcode(0xC1, 0x7); /* C1 /7 ib */ 7880 ins_encode( RegOpcImm( dst, shift ) ); 7881 ins_pipe( ialu_mem_imm ); 7882%} 7883 7884// Arithmetic Shift Right by 8-bit immediate 7885instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7886 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7887 effect(KILL cr); 7888 7889 format %{ "SAR $dst,$shift" %} 7890 opcode(0xC1, 0x7); /* C1 /7 ib */ 7891 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7892 ins_pipe( ialu_mem_imm ); 7893%} 7894 7895// Arithmetic Shift Right by variable 7896instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7897 match(Set dst (RShiftI dst shift)); 7898 effect(KILL cr); 7899 7900 size(2); 7901 format %{ "SAR $dst,$shift" %} 7902 opcode(0xD3, 0x7); /* D3 /7 */ 7903 ins_encode( OpcP, RegOpc( dst ) ); 7904 ins_pipe( ialu_reg_reg ); 7905%} 7906 7907// Logical shift right by one 7908instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7909 match(Set dst (URShiftI dst shift)); 7910 effect(KILL cr); 7911 7912 size(2); 7913 format %{ "SHR $dst,$shift" %} 7914 opcode(0xD1, 0x5); /* D1 /5 */ 7915 ins_encode( OpcP, RegOpc( dst ) ); 7916 ins_pipe( ialu_reg ); 7917%} 7918 7919// Logical Shift Right by 8-bit immediate 7920instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7921 match(Set dst (URShiftI dst shift)); 7922 effect(KILL cr); 7923 7924 size(3); 7925 format %{ "SHR $dst,$shift" %} 7926 opcode(0xC1, 0x5); /* C1 /5 ib */ 7927 ins_encode( RegOpcImm( dst, shift) ); 7928 ins_pipe( ialu_reg ); 7929%} 7930 7931 7932// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7933// This idiom is used by the compiler for the i2b bytecode. 7934instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7935 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7936 7937 size(3); 7938 format %{ "MOVSX $dst,$src :8" %} 7939 ins_encode %{ 7940 __ movsbl($dst$$Register, $src$$Register); 7941 %} 7942 ins_pipe(ialu_reg_reg); 7943%} 7944 7945// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7946// This idiom is used by the compiler the i2s bytecode. 7947instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7948 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7949 7950 size(3); 7951 format %{ "MOVSX $dst,$src :16" %} 7952 ins_encode %{ 7953 __ movswl($dst$$Register, $src$$Register); 7954 %} 7955 ins_pipe(ialu_reg_reg); 7956%} 7957 7958 7959// Logical Shift Right by variable 7960instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7961 match(Set dst (URShiftI dst shift)); 7962 effect(KILL cr); 7963 7964 size(2); 7965 format %{ "SHR $dst,$shift" %} 7966 opcode(0xD3, 0x5); /* D3 /5 */ 7967 ins_encode( OpcP, RegOpc( dst ) ); 7968 ins_pipe( ialu_reg_reg ); 7969%} 7970 7971 7972//----------Logical Instructions----------------------------------------------- 7973//----------Integer Logical Instructions--------------------------------------- 7974// And Instructions 7975// And Register with Register 7976instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7977 match(Set dst (AndI dst src)); 7978 effect(KILL cr); 7979 7980 size(2); 7981 format %{ "AND $dst,$src" %} 7982 opcode(0x23); 7983 ins_encode( OpcP, RegReg( dst, src) ); 7984 ins_pipe( ialu_reg_reg ); 7985%} 7986 7987// And Register with Immediate 7988instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7989 match(Set dst (AndI dst src)); 7990 effect(KILL cr); 7991 7992 format %{ "AND $dst,$src" %} 7993 opcode(0x81,0x04); /* Opcode 81 /4 */ 7994 // ins_encode( RegImm( dst, src) ); 7995 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7996 ins_pipe( ialu_reg ); 7997%} 7998 7999// And Register with Memory 8000instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8001 match(Set dst (AndI dst (LoadI src))); 8002 effect(KILL cr); 8003 8004 ins_cost(125); 8005 format %{ "AND $dst,$src" %} 8006 opcode(0x23); 8007 ins_encode( OpcP, RegMem( dst, src) ); 8008 ins_pipe( ialu_reg_mem ); 8009%} 8010 8011// And Memory with Register 8012instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8013 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8014 effect(KILL cr); 8015 8016 ins_cost(150); 8017 format %{ "AND $dst,$src" %} 8018 opcode(0x21); /* Opcode 21 /r */ 8019 ins_encode( OpcP, RegMem( src, dst ) ); 8020 ins_pipe( ialu_mem_reg ); 8021%} 8022 8023// And Memory with Immediate 8024instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8025 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8026 effect(KILL cr); 8027 8028 ins_cost(125); 8029 format %{ "AND $dst,$src" %} 8030 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8031 // ins_encode( MemImm( dst, src) ); 8032 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8033 ins_pipe( ialu_mem_imm ); 8034%} 8035 8036// BMI1 instructions 8037instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8038 match(Set dst (AndI (XorI src1 minus_1) src2)); 8039 predicate(UseBMI1Instructions); 8040 effect(KILL cr); 8041 8042 format %{ "ANDNL $dst, $src1, $src2" %} 8043 8044 ins_encode %{ 8045 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8046 %} 8047 ins_pipe(ialu_reg); 8048%} 8049 8050instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8051 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8052 predicate(UseBMI1Instructions); 8053 effect(KILL cr); 8054 8055 ins_cost(125); 8056 format %{ "ANDNL $dst, $src1, $src2" %} 8057 8058 ins_encode %{ 8059 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8060 %} 8061 ins_pipe(ialu_reg_mem); 8062%} 8063 8064instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8065 match(Set dst (AndI (SubI imm_zero src) src)); 8066 predicate(UseBMI1Instructions); 8067 effect(KILL cr); 8068 8069 format %{ "BLSIL $dst, $src" %} 8070 8071 ins_encode %{ 8072 __ blsil($dst$$Register, $src$$Register); 8073 %} 8074 ins_pipe(ialu_reg); 8075%} 8076 8077instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8078 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8079 predicate(UseBMI1Instructions); 8080 effect(KILL cr); 8081 8082 ins_cost(125); 8083 format %{ "BLSIL $dst, $src" %} 8084 8085 ins_encode %{ 8086 __ blsil($dst$$Register, $src$$Address); 8087 %} 8088 ins_pipe(ialu_reg_mem); 8089%} 8090 8091instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8092%{ 8093 match(Set dst (XorI (AddI src minus_1) src)); 8094 predicate(UseBMI1Instructions); 8095 effect(KILL cr); 8096 8097 format %{ "BLSMSKL $dst, $src" %} 8098 8099 ins_encode %{ 8100 __ blsmskl($dst$$Register, $src$$Register); 8101 %} 8102 8103 ins_pipe(ialu_reg); 8104%} 8105 8106instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8107%{ 8108 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8109 predicate(UseBMI1Instructions); 8110 effect(KILL cr); 8111 8112 ins_cost(125); 8113 format %{ "BLSMSKL $dst, $src" %} 8114 8115 ins_encode %{ 8116 __ blsmskl($dst$$Register, $src$$Address); 8117 %} 8118 8119 ins_pipe(ialu_reg_mem); 8120%} 8121 8122instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8123%{ 8124 match(Set dst (AndI (AddI src minus_1) src) ); 8125 predicate(UseBMI1Instructions); 8126 effect(KILL cr); 8127 8128 format %{ "BLSRL $dst, $src" %} 8129 8130 ins_encode %{ 8131 __ blsrl($dst$$Register, $src$$Register); 8132 %} 8133 8134 ins_pipe(ialu_reg); 8135%} 8136 8137instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8138%{ 8139 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8140 predicate(UseBMI1Instructions); 8141 effect(KILL cr); 8142 8143 ins_cost(125); 8144 format %{ "BLSRL $dst, $src" %} 8145 8146 ins_encode %{ 8147 __ blsrl($dst$$Register, $src$$Address); 8148 %} 8149 8150 ins_pipe(ialu_reg_mem); 8151%} 8152 8153// Or Instructions 8154// Or Register with Register 8155instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8156 match(Set dst (OrI dst src)); 8157 effect(KILL cr); 8158 8159 size(2); 8160 format %{ "OR $dst,$src" %} 8161 opcode(0x0B); 8162 ins_encode( OpcP, RegReg( dst, src) ); 8163 ins_pipe( ialu_reg_reg ); 8164%} 8165 8166instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8167 match(Set dst (OrI dst (CastP2X src))); 8168 effect(KILL cr); 8169 8170 size(2); 8171 format %{ "OR $dst,$src" %} 8172 opcode(0x0B); 8173 ins_encode( OpcP, RegReg( dst, src) ); 8174 ins_pipe( ialu_reg_reg ); 8175%} 8176 8177 8178// Or Register with Immediate 8179instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8180 match(Set dst (OrI dst src)); 8181 effect(KILL cr); 8182 8183 format %{ "OR $dst,$src" %} 8184 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8185 // ins_encode( RegImm( dst, src) ); 8186 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8187 ins_pipe( ialu_reg ); 8188%} 8189 8190// Or Register with Memory 8191instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8192 match(Set dst (OrI dst (LoadI src))); 8193 effect(KILL cr); 8194 8195 ins_cost(125); 8196 format %{ "OR $dst,$src" %} 8197 opcode(0x0B); 8198 ins_encode( OpcP, RegMem( dst, src) ); 8199 ins_pipe( ialu_reg_mem ); 8200%} 8201 8202// Or Memory with Register 8203instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8204 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8205 effect(KILL cr); 8206 8207 ins_cost(150); 8208 format %{ "OR $dst,$src" %} 8209 opcode(0x09); /* Opcode 09 /r */ 8210 ins_encode( OpcP, RegMem( src, dst ) ); 8211 ins_pipe( ialu_mem_reg ); 8212%} 8213 8214// Or Memory with Immediate 8215instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8216 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8217 effect(KILL cr); 8218 8219 ins_cost(125); 8220 format %{ "OR $dst,$src" %} 8221 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8222 // ins_encode( MemImm( dst, src) ); 8223 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8224 ins_pipe( ialu_mem_imm ); 8225%} 8226 8227// ROL/ROR 8228// ROL expand 8229instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8230 effect(USE_DEF dst, USE shift, KILL cr); 8231 8232 format %{ "ROL $dst, $shift" %} 8233 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8234 ins_encode( OpcP, RegOpc( dst )); 8235 ins_pipe( ialu_reg ); 8236%} 8237 8238instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8239 effect(USE_DEF dst, USE shift, KILL cr); 8240 8241 format %{ "ROL $dst, $shift" %} 8242 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8243 ins_encode( RegOpcImm(dst, shift) ); 8244 ins_pipe(ialu_reg); 8245%} 8246 8247instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8248 effect(USE_DEF dst, USE shift, KILL cr); 8249 8250 format %{ "ROL $dst, $shift" %} 8251 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8252 ins_encode(OpcP, RegOpc(dst)); 8253 ins_pipe( ialu_reg_reg ); 8254%} 8255// end of ROL expand 8256 8257// ROL 32bit by one once 8258instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8259 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8260 8261 expand %{ 8262 rolI_eReg_imm1(dst, lshift, cr); 8263 %} 8264%} 8265 8266// ROL 32bit var by imm8 once 8267instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8268 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8269 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8270 8271 expand %{ 8272 rolI_eReg_imm8(dst, lshift, cr); 8273 %} 8274%} 8275 8276// ROL 32bit var by var once 8277instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8278 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8279 8280 expand %{ 8281 rolI_eReg_CL(dst, shift, cr); 8282 %} 8283%} 8284 8285// ROL 32bit var by var once 8286instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8287 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8288 8289 expand %{ 8290 rolI_eReg_CL(dst, shift, cr); 8291 %} 8292%} 8293 8294// ROR expand 8295instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8296 effect(USE_DEF dst, USE shift, KILL cr); 8297 8298 format %{ "ROR $dst, $shift" %} 8299 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8300 ins_encode( OpcP, RegOpc( dst ) ); 8301 ins_pipe( ialu_reg ); 8302%} 8303 8304instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8305 effect (USE_DEF dst, USE shift, KILL cr); 8306 8307 format %{ "ROR $dst, $shift" %} 8308 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8309 ins_encode( RegOpcImm(dst, shift) ); 8310 ins_pipe( ialu_reg ); 8311%} 8312 8313instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8314 effect(USE_DEF dst, USE shift, KILL cr); 8315 8316 format %{ "ROR $dst, $shift" %} 8317 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8318 ins_encode(OpcP, RegOpc(dst)); 8319 ins_pipe( ialu_reg_reg ); 8320%} 8321// end of ROR expand 8322 8323// ROR right once 8324instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8325 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8326 8327 expand %{ 8328 rorI_eReg_imm1(dst, rshift, cr); 8329 %} 8330%} 8331 8332// ROR 32bit by immI8 once 8333instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8334 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8335 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8336 8337 expand %{ 8338 rorI_eReg_imm8(dst, rshift, cr); 8339 %} 8340%} 8341 8342// ROR 32bit var by var once 8343instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8344 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8345 8346 expand %{ 8347 rorI_eReg_CL(dst, shift, cr); 8348 %} 8349%} 8350 8351// ROR 32bit var by var once 8352instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8353 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8354 8355 expand %{ 8356 rorI_eReg_CL(dst, shift, cr); 8357 %} 8358%} 8359 8360// Xor Instructions 8361// Xor Register with Register 8362instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8363 match(Set dst (XorI dst src)); 8364 effect(KILL cr); 8365 8366 size(2); 8367 format %{ "XOR $dst,$src" %} 8368 opcode(0x33); 8369 ins_encode( OpcP, RegReg( dst, src) ); 8370 ins_pipe( ialu_reg_reg ); 8371%} 8372 8373// Xor Register with Immediate -1 8374instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8375 match(Set dst (XorI dst imm)); 8376 8377 size(2); 8378 format %{ "NOT $dst" %} 8379 ins_encode %{ 8380 __ notl($dst$$Register); 8381 %} 8382 ins_pipe( ialu_reg ); 8383%} 8384 8385// Xor Register with Immediate 8386instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8387 match(Set dst (XorI dst src)); 8388 effect(KILL cr); 8389 8390 format %{ "XOR $dst,$src" %} 8391 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8392 // ins_encode( RegImm( dst, src) ); 8393 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8394 ins_pipe( ialu_reg ); 8395%} 8396 8397// Xor Register with Memory 8398instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8399 match(Set dst (XorI dst (LoadI src))); 8400 effect(KILL cr); 8401 8402 ins_cost(125); 8403 format %{ "XOR $dst,$src" %} 8404 opcode(0x33); 8405 ins_encode( OpcP, RegMem(dst, src) ); 8406 ins_pipe( ialu_reg_mem ); 8407%} 8408 8409// Xor Memory with Register 8410instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8411 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8412 effect(KILL cr); 8413 8414 ins_cost(150); 8415 format %{ "XOR $dst,$src" %} 8416 opcode(0x31); /* Opcode 31 /r */ 8417 ins_encode( OpcP, RegMem( src, dst ) ); 8418 ins_pipe( ialu_mem_reg ); 8419%} 8420 8421// Xor Memory with Immediate 8422instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8423 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8424 effect(KILL cr); 8425 8426 ins_cost(125); 8427 format %{ "XOR $dst,$src" %} 8428 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8429 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8430 ins_pipe( ialu_mem_imm ); 8431%} 8432 8433//----------Convert Int to Boolean--------------------------------------------- 8434 8435instruct movI_nocopy(rRegI dst, rRegI src) %{ 8436 effect( DEF dst, USE src ); 8437 format %{ "MOV $dst,$src" %} 8438 ins_encode( enc_Copy( dst, src) ); 8439 ins_pipe( ialu_reg_reg ); 8440%} 8441 8442instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8443 effect( USE_DEF dst, USE src, KILL cr ); 8444 8445 size(4); 8446 format %{ "NEG $dst\n\t" 8447 "ADC $dst,$src" %} 8448 ins_encode( neg_reg(dst), 8449 OpcRegReg(0x13,dst,src) ); 8450 ins_pipe( ialu_reg_reg_long ); 8451%} 8452 8453instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8454 match(Set dst (Conv2B src)); 8455 8456 expand %{ 8457 movI_nocopy(dst,src); 8458 ci2b(dst,src,cr); 8459 %} 8460%} 8461 8462instruct movP_nocopy(rRegI dst, eRegP src) %{ 8463 effect( DEF dst, USE src ); 8464 format %{ "MOV $dst,$src" %} 8465 ins_encode( enc_Copy( dst, src) ); 8466 ins_pipe( ialu_reg_reg ); 8467%} 8468 8469instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8470 effect( USE_DEF dst, USE src, KILL cr ); 8471 format %{ "NEG $dst\n\t" 8472 "ADC $dst,$src" %} 8473 ins_encode( neg_reg(dst), 8474 OpcRegReg(0x13,dst,src) ); 8475 ins_pipe( ialu_reg_reg_long ); 8476%} 8477 8478instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8479 match(Set dst (Conv2B src)); 8480 8481 expand %{ 8482 movP_nocopy(dst,src); 8483 cp2b(dst,src,cr); 8484 %} 8485%} 8486 8487instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8488 match(Set dst (CmpLTMask p q)); 8489 effect(KILL cr); 8490 ins_cost(400); 8491 8492 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8493 format %{ "XOR $dst,$dst\n\t" 8494 "CMP $p,$q\n\t" 8495 "SETlt $dst\n\t" 8496 "NEG $dst" %} 8497 ins_encode %{ 8498 Register Rp = $p$$Register; 8499 Register Rq = $q$$Register; 8500 Register Rd = $dst$$Register; 8501 Label done; 8502 __ xorl(Rd, Rd); 8503 __ cmpl(Rp, Rq); 8504 __ setb(Assembler::less, Rd); 8505 __ negl(Rd); 8506 %} 8507 8508 ins_pipe(pipe_slow); 8509%} 8510 8511instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8512 match(Set dst (CmpLTMask dst zero)); 8513 effect(DEF dst, KILL cr); 8514 ins_cost(100); 8515 8516 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8517 ins_encode %{ 8518 __ sarl($dst$$Register, 31); 8519 %} 8520 ins_pipe(ialu_reg); 8521%} 8522 8523/* better to save a register than avoid a branch */ 8524instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8525 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8526 effect(KILL cr); 8527 ins_cost(400); 8528 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8529 "JGE done\n\t" 8530 "ADD $p,$y\n" 8531 "done: " %} 8532 ins_encode %{ 8533 Register Rp = $p$$Register; 8534 Register Rq = $q$$Register; 8535 Register Ry = $y$$Register; 8536 Label done; 8537 __ subl(Rp, Rq); 8538 __ jccb(Assembler::greaterEqual, done); 8539 __ addl(Rp, Ry); 8540 __ bind(done); 8541 %} 8542 8543 ins_pipe(pipe_cmplt); 8544%} 8545 8546/* better to save a register than avoid a branch */ 8547instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8548 match(Set y (AndI (CmpLTMask p q) y)); 8549 effect(KILL cr); 8550 8551 ins_cost(300); 8552 8553 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8554 "JLT done\n\t" 8555 "XORL $y, $y\n" 8556 "done: " %} 8557 ins_encode %{ 8558 Register Rp = $p$$Register; 8559 Register Rq = $q$$Register; 8560 Register Ry = $y$$Register; 8561 Label done; 8562 __ cmpl(Rp, Rq); 8563 __ jccb(Assembler::less, done); 8564 __ xorl(Ry, Ry); 8565 __ bind(done); 8566 %} 8567 8568 ins_pipe(pipe_cmplt); 8569%} 8570 8571/* If I enable this, I encourage spilling in the inner loop of compress. 8572instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8573 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8574*/ 8575//----------Overflow Math Instructions----------------------------------------- 8576 8577instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8578%{ 8579 match(Set cr (OverflowAddI op1 op2)); 8580 effect(DEF cr, USE_KILL op1, USE op2); 8581 8582 format %{ "ADD $op1, $op2\t# overflow check int" %} 8583 8584 ins_encode %{ 8585 __ addl($op1$$Register, $op2$$Register); 8586 %} 8587 ins_pipe(ialu_reg_reg); 8588%} 8589 8590instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8591%{ 8592 match(Set cr (OverflowAddI op1 op2)); 8593 effect(DEF cr, USE_KILL op1, USE op2); 8594 8595 format %{ "ADD $op1, $op2\t# overflow check int" %} 8596 8597 ins_encode %{ 8598 __ addl($op1$$Register, $op2$$constant); 8599 %} 8600 ins_pipe(ialu_reg_reg); 8601%} 8602 8603instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8604%{ 8605 match(Set cr (OverflowSubI op1 op2)); 8606 8607 format %{ "CMP $op1, $op2\t# overflow check int" %} 8608 ins_encode %{ 8609 __ cmpl($op1$$Register, $op2$$Register); 8610 %} 8611 ins_pipe(ialu_reg_reg); 8612%} 8613 8614instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8615%{ 8616 match(Set cr (OverflowSubI op1 op2)); 8617 8618 format %{ "CMP $op1, $op2\t# overflow check int" %} 8619 ins_encode %{ 8620 __ cmpl($op1$$Register, $op2$$constant); 8621 %} 8622 ins_pipe(ialu_reg_reg); 8623%} 8624 8625instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8626%{ 8627 match(Set cr (OverflowSubI zero op2)); 8628 effect(DEF cr, USE_KILL op2); 8629 8630 format %{ "NEG $op2\t# overflow check int" %} 8631 ins_encode %{ 8632 __ negl($op2$$Register); 8633 %} 8634 ins_pipe(ialu_reg_reg); 8635%} 8636 8637instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8638%{ 8639 match(Set cr (OverflowMulI op1 op2)); 8640 effect(DEF cr, USE_KILL op1, USE op2); 8641 8642 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8643 ins_encode %{ 8644 __ imull($op1$$Register, $op2$$Register); 8645 %} 8646 ins_pipe(ialu_reg_reg_alu0); 8647%} 8648 8649instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8650%{ 8651 match(Set cr (OverflowMulI op1 op2)); 8652 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8653 8654 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8655 ins_encode %{ 8656 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8657 %} 8658 ins_pipe(ialu_reg_reg_alu0); 8659%} 8660 8661//----------Long Instructions------------------------------------------------ 8662// Add Long Register with Register 8663instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8664 match(Set dst (AddL dst src)); 8665 effect(KILL cr); 8666 ins_cost(200); 8667 format %{ "ADD $dst.lo,$src.lo\n\t" 8668 "ADC $dst.hi,$src.hi" %} 8669 opcode(0x03, 0x13); 8670 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8671 ins_pipe( ialu_reg_reg_long ); 8672%} 8673 8674// Add Long Register with Immediate 8675instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8676 match(Set dst (AddL dst src)); 8677 effect(KILL cr); 8678 format %{ "ADD $dst.lo,$src.lo\n\t" 8679 "ADC $dst.hi,$src.hi" %} 8680 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8681 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8682 ins_pipe( ialu_reg_long ); 8683%} 8684 8685// Add Long Register with Memory 8686instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8687 match(Set dst (AddL dst (LoadL mem))); 8688 effect(KILL cr); 8689 ins_cost(125); 8690 format %{ "ADD $dst.lo,$mem\n\t" 8691 "ADC $dst.hi,$mem+4" %} 8692 opcode(0x03, 0x13); 8693 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8694 ins_pipe( ialu_reg_long_mem ); 8695%} 8696 8697// Subtract Long Register with Register. 8698instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8699 match(Set dst (SubL dst src)); 8700 effect(KILL cr); 8701 ins_cost(200); 8702 format %{ "SUB $dst.lo,$src.lo\n\t" 8703 "SBB $dst.hi,$src.hi" %} 8704 opcode(0x2B, 0x1B); 8705 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8706 ins_pipe( ialu_reg_reg_long ); 8707%} 8708 8709// Subtract Long Register with Immediate 8710instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8711 match(Set dst (SubL dst src)); 8712 effect(KILL cr); 8713 format %{ "SUB $dst.lo,$src.lo\n\t" 8714 "SBB $dst.hi,$src.hi" %} 8715 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8716 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8717 ins_pipe( ialu_reg_long ); 8718%} 8719 8720// Subtract Long Register with Memory 8721instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8722 match(Set dst (SubL dst (LoadL mem))); 8723 effect(KILL cr); 8724 ins_cost(125); 8725 format %{ "SUB $dst.lo,$mem\n\t" 8726 "SBB $dst.hi,$mem+4" %} 8727 opcode(0x2B, 0x1B); 8728 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8729 ins_pipe( ialu_reg_long_mem ); 8730%} 8731 8732instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8733 match(Set dst (SubL zero dst)); 8734 effect(KILL cr); 8735 ins_cost(300); 8736 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8737 ins_encode( neg_long(dst) ); 8738 ins_pipe( ialu_reg_reg_long ); 8739%} 8740 8741// And Long Register with Register 8742instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8743 match(Set dst (AndL dst src)); 8744 effect(KILL cr); 8745 format %{ "AND $dst.lo,$src.lo\n\t" 8746 "AND $dst.hi,$src.hi" %} 8747 opcode(0x23,0x23); 8748 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8749 ins_pipe( ialu_reg_reg_long ); 8750%} 8751 8752// And Long Register with Immediate 8753instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8754 match(Set dst (AndL dst src)); 8755 effect(KILL cr); 8756 format %{ "AND $dst.lo,$src.lo\n\t" 8757 "AND $dst.hi,$src.hi" %} 8758 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8759 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8760 ins_pipe( ialu_reg_long ); 8761%} 8762 8763// And Long Register with Memory 8764instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8765 match(Set dst (AndL dst (LoadL mem))); 8766 effect(KILL cr); 8767 ins_cost(125); 8768 format %{ "AND $dst.lo,$mem\n\t" 8769 "AND $dst.hi,$mem+4" %} 8770 opcode(0x23, 0x23); 8771 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8772 ins_pipe( ialu_reg_long_mem ); 8773%} 8774 8775// BMI1 instructions 8776instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8777 match(Set dst (AndL (XorL src1 minus_1) src2)); 8778 predicate(UseBMI1Instructions); 8779 effect(KILL cr, TEMP dst); 8780 8781 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8782 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8783 %} 8784 8785 ins_encode %{ 8786 Register Rdst = $dst$$Register; 8787 Register Rsrc1 = $src1$$Register; 8788 Register Rsrc2 = $src2$$Register; 8789 __ andnl(Rdst, Rsrc1, Rsrc2); 8790 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8791 %} 8792 ins_pipe(ialu_reg_reg_long); 8793%} 8794 8795instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8796 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8797 predicate(UseBMI1Instructions); 8798 effect(KILL cr, TEMP dst); 8799 8800 ins_cost(125); 8801 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8802 "ANDNL $dst.hi, $src1.hi, $src2+4" 8803 %} 8804 8805 ins_encode %{ 8806 Register Rdst = $dst$$Register; 8807 Register Rsrc1 = $src1$$Register; 8808 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8809 8810 __ andnl(Rdst, Rsrc1, $src2$$Address); 8811 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8812 %} 8813 ins_pipe(ialu_reg_mem); 8814%} 8815 8816instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8817 match(Set dst (AndL (SubL imm_zero src) src)); 8818 predicate(UseBMI1Instructions); 8819 effect(KILL cr, TEMP dst); 8820 8821 format %{ "MOVL $dst.hi, 0\n\t" 8822 "BLSIL $dst.lo, $src.lo\n\t" 8823 "JNZ done\n\t" 8824 "BLSIL $dst.hi, $src.hi\n" 8825 "done:" 8826 %} 8827 8828 ins_encode %{ 8829 Label done; 8830 Register Rdst = $dst$$Register; 8831 Register Rsrc = $src$$Register; 8832 __ movl(HIGH_FROM_LOW(Rdst), 0); 8833 __ blsil(Rdst, Rsrc); 8834 __ jccb(Assembler::notZero, done); 8835 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8836 __ bind(done); 8837 %} 8838 ins_pipe(ialu_reg); 8839%} 8840 8841instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8842 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8843 predicate(UseBMI1Instructions); 8844 effect(KILL cr, TEMP dst); 8845 8846 ins_cost(125); 8847 format %{ "MOVL $dst.hi, 0\n\t" 8848 "BLSIL $dst.lo, $src\n\t" 8849 "JNZ done\n\t" 8850 "BLSIL $dst.hi, $src+4\n" 8851 "done:" 8852 %} 8853 8854 ins_encode %{ 8855 Label done; 8856 Register Rdst = $dst$$Register; 8857 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8858 8859 __ movl(HIGH_FROM_LOW(Rdst), 0); 8860 __ blsil(Rdst, $src$$Address); 8861 __ jccb(Assembler::notZero, done); 8862 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8863 __ bind(done); 8864 %} 8865 ins_pipe(ialu_reg_mem); 8866%} 8867 8868instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8869%{ 8870 match(Set dst (XorL (AddL src minus_1) src)); 8871 predicate(UseBMI1Instructions); 8872 effect(KILL cr, TEMP dst); 8873 8874 format %{ "MOVL $dst.hi, 0\n\t" 8875 "BLSMSKL $dst.lo, $src.lo\n\t" 8876 "JNC done\n\t" 8877 "BLSMSKL $dst.hi, $src.hi\n" 8878 "done:" 8879 %} 8880 8881 ins_encode %{ 8882 Label done; 8883 Register Rdst = $dst$$Register; 8884 Register Rsrc = $src$$Register; 8885 __ movl(HIGH_FROM_LOW(Rdst), 0); 8886 __ blsmskl(Rdst, Rsrc); 8887 __ jccb(Assembler::carryClear, done); 8888 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8889 __ bind(done); 8890 %} 8891 8892 ins_pipe(ialu_reg); 8893%} 8894 8895instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8896%{ 8897 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8898 predicate(UseBMI1Instructions); 8899 effect(KILL cr, TEMP dst); 8900 8901 ins_cost(125); 8902 format %{ "MOVL $dst.hi, 0\n\t" 8903 "BLSMSKL $dst.lo, $src\n\t" 8904 "JNC done\n\t" 8905 "BLSMSKL $dst.hi, $src+4\n" 8906 "done:" 8907 %} 8908 8909 ins_encode %{ 8910 Label done; 8911 Register Rdst = $dst$$Register; 8912 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8913 8914 __ movl(HIGH_FROM_LOW(Rdst), 0); 8915 __ blsmskl(Rdst, $src$$Address); 8916 __ jccb(Assembler::carryClear, done); 8917 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8918 __ bind(done); 8919 %} 8920 8921 ins_pipe(ialu_reg_mem); 8922%} 8923 8924instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8925%{ 8926 match(Set dst (AndL (AddL src minus_1) src) ); 8927 predicate(UseBMI1Instructions); 8928 effect(KILL cr, TEMP dst); 8929 8930 format %{ "MOVL $dst.hi, $src.hi\n\t" 8931 "BLSRL $dst.lo, $src.lo\n\t" 8932 "JNC done\n\t" 8933 "BLSRL $dst.hi, $src.hi\n" 8934 "done:" 8935 %} 8936 8937 ins_encode %{ 8938 Label done; 8939 Register Rdst = $dst$$Register; 8940 Register Rsrc = $src$$Register; 8941 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8942 __ blsrl(Rdst, Rsrc); 8943 __ jccb(Assembler::carryClear, done); 8944 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8945 __ bind(done); 8946 %} 8947 8948 ins_pipe(ialu_reg); 8949%} 8950 8951instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8952%{ 8953 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8954 predicate(UseBMI1Instructions); 8955 effect(KILL cr, TEMP dst); 8956 8957 ins_cost(125); 8958 format %{ "MOVL $dst.hi, $src+4\n\t" 8959 "BLSRL $dst.lo, $src\n\t" 8960 "JNC done\n\t" 8961 "BLSRL $dst.hi, $src+4\n" 8962 "done:" 8963 %} 8964 8965 ins_encode %{ 8966 Label done; 8967 Register Rdst = $dst$$Register; 8968 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8969 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8970 __ blsrl(Rdst, $src$$Address); 8971 __ jccb(Assembler::carryClear, done); 8972 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8973 __ bind(done); 8974 %} 8975 8976 ins_pipe(ialu_reg_mem); 8977%} 8978 8979// Or Long Register with Register 8980instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8981 match(Set dst (OrL dst src)); 8982 effect(KILL cr); 8983 format %{ "OR $dst.lo,$src.lo\n\t" 8984 "OR $dst.hi,$src.hi" %} 8985 opcode(0x0B,0x0B); 8986 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8987 ins_pipe( ialu_reg_reg_long ); 8988%} 8989 8990// Or Long Register with Immediate 8991instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8992 match(Set dst (OrL dst src)); 8993 effect(KILL cr); 8994 format %{ "OR $dst.lo,$src.lo\n\t" 8995 "OR $dst.hi,$src.hi" %} 8996 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8997 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8998 ins_pipe( ialu_reg_long ); 8999%} 9000 9001// Or Long Register with Memory 9002instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9003 match(Set dst (OrL dst (LoadL mem))); 9004 effect(KILL cr); 9005 ins_cost(125); 9006 format %{ "OR $dst.lo,$mem\n\t" 9007 "OR $dst.hi,$mem+4" %} 9008 opcode(0x0B,0x0B); 9009 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9010 ins_pipe( ialu_reg_long_mem ); 9011%} 9012 9013// Xor Long Register with Register 9014instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9015 match(Set dst (XorL dst src)); 9016 effect(KILL cr); 9017 format %{ "XOR $dst.lo,$src.lo\n\t" 9018 "XOR $dst.hi,$src.hi" %} 9019 opcode(0x33,0x33); 9020 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9021 ins_pipe( ialu_reg_reg_long ); 9022%} 9023 9024// Xor Long Register with Immediate -1 9025instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9026 match(Set dst (XorL dst imm)); 9027 format %{ "NOT $dst.lo\n\t" 9028 "NOT $dst.hi" %} 9029 ins_encode %{ 9030 __ notl($dst$$Register); 9031 __ notl(HIGH_FROM_LOW($dst$$Register)); 9032 %} 9033 ins_pipe( ialu_reg_long ); 9034%} 9035 9036// Xor Long Register with Immediate 9037instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9038 match(Set dst (XorL dst src)); 9039 effect(KILL cr); 9040 format %{ "XOR $dst.lo,$src.lo\n\t" 9041 "XOR $dst.hi,$src.hi" %} 9042 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9043 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9044 ins_pipe( ialu_reg_long ); 9045%} 9046 9047// Xor Long Register with Memory 9048instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9049 match(Set dst (XorL dst (LoadL mem))); 9050 effect(KILL cr); 9051 ins_cost(125); 9052 format %{ "XOR $dst.lo,$mem\n\t" 9053 "XOR $dst.hi,$mem+4" %} 9054 opcode(0x33,0x33); 9055 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9056 ins_pipe( ialu_reg_long_mem ); 9057%} 9058 9059// Shift Left Long by 1 9060instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9061 predicate(UseNewLongLShift); 9062 match(Set dst (LShiftL dst cnt)); 9063 effect(KILL cr); 9064 ins_cost(100); 9065 format %{ "ADD $dst.lo,$dst.lo\n\t" 9066 "ADC $dst.hi,$dst.hi" %} 9067 ins_encode %{ 9068 __ addl($dst$$Register,$dst$$Register); 9069 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9070 %} 9071 ins_pipe( ialu_reg_long ); 9072%} 9073 9074// Shift Left Long by 2 9075instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9076 predicate(UseNewLongLShift); 9077 match(Set dst (LShiftL dst cnt)); 9078 effect(KILL cr); 9079 ins_cost(100); 9080 format %{ "ADD $dst.lo,$dst.lo\n\t" 9081 "ADC $dst.hi,$dst.hi\n\t" 9082 "ADD $dst.lo,$dst.lo\n\t" 9083 "ADC $dst.hi,$dst.hi" %} 9084 ins_encode %{ 9085 __ addl($dst$$Register,$dst$$Register); 9086 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9087 __ addl($dst$$Register,$dst$$Register); 9088 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9089 %} 9090 ins_pipe( ialu_reg_long ); 9091%} 9092 9093// Shift Left Long by 3 9094instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9095 predicate(UseNewLongLShift); 9096 match(Set dst (LShiftL dst cnt)); 9097 effect(KILL cr); 9098 ins_cost(100); 9099 format %{ "ADD $dst.lo,$dst.lo\n\t" 9100 "ADC $dst.hi,$dst.hi\n\t" 9101 "ADD $dst.lo,$dst.lo\n\t" 9102 "ADC $dst.hi,$dst.hi\n\t" 9103 "ADD $dst.lo,$dst.lo\n\t" 9104 "ADC $dst.hi,$dst.hi" %} 9105 ins_encode %{ 9106 __ addl($dst$$Register,$dst$$Register); 9107 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9108 __ addl($dst$$Register,$dst$$Register); 9109 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9110 __ addl($dst$$Register,$dst$$Register); 9111 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9112 %} 9113 ins_pipe( ialu_reg_long ); 9114%} 9115 9116// Shift Left Long by 1-31 9117instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9118 match(Set dst (LShiftL dst cnt)); 9119 effect(KILL cr); 9120 ins_cost(200); 9121 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9122 "SHL $dst.lo,$cnt" %} 9123 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9124 ins_encode( move_long_small_shift(dst,cnt) ); 9125 ins_pipe( ialu_reg_long ); 9126%} 9127 9128// Shift Left Long by 32-63 9129instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9130 match(Set dst (LShiftL dst cnt)); 9131 effect(KILL cr); 9132 ins_cost(300); 9133 format %{ "MOV $dst.hi,$dst.lo\n" 9134 "\tSHL $dst.hi,$cnt-32\n" 9135 "\tXOR $dst.lo,$dst.lo" %} 9136 opcode(0xC1, 0x4); /* C1 /4 ib */ 9137 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9138 ins_pipe( ialu_reg_long ); 9139%} 9140 9141// Shift Left Long by variable 9142instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9143 match(Set dst (LShiftL dst shift)); 9144 effect(KILL cr); 9145 ins_cost(500+200); 9146 size(17); 9147 format %{ "TEST $shift,32\n\t" 9148 "JEQ,s small\n\t" 9149 "MOV $dst.hi,$dst.lo\n\t" 9150 "XOR $dst.lo,$dst.lo\n" 9151 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9152 "SHL $dst.lo,$shift" %} 9153 ins_encode( shift_left_long( dst, shift ) ); 9154 ins_pipe( pipe_slow ); 9155%} 9156 9157// Shift Right Long by 1-31 9158instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9159 match(Set dst (URShiftL dst cnt)); 9160 effect(KILL cr); 9161 ins_cost(200); 9162 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9163 "SHR $dst.hi,$cnt" %} 9164 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9165 ins_encode( move_long_small_shift(dst,cnt) ); 9166 ins_pipe( ialu_reg_long ); 9167%} 9168 9169// Shift Right Long by 32-63 9170instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9171 match(Set dst (URShiftL dst cnt)); 9172 effect(KILL cr); 9173 ins_cost(300); 9174 format %{ "MOV $dst.lo,$dst.hi\n" 9175 "\tSHR $dst.lo,$cnt-32\n" 9176 "\tXOR $dst.hi,$dst.hi" %} 9177 opcode(0xC1, 0x5); /* C1 /5 ib */ 9178 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9179 ins_pipe( ialu_reg_long ); 9180%} 9181 9182// Shift Right Long by variable 9183instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9184 match(Set dst (URShiftL dst shift)); 9185 effect(KILL cr); 9186 ins_cost(600); 9187 size(17); 9188 format %{ "TEST $shift,32\n\t" 9189 "JEQ,s small\n\t" 9190 "MOV $dst.lo,$dst.hi\n\t" 9191 "XOR $dst.hi,$dst.hi\n" 9192 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9193 "SHR $dst.hi,$shift" %} 9194 ins_encode( shift_right_long( dst, shift ) ); 9195 ins_pipe( pipe_slow ); 9196%} 9197 9198// Shift Right Long by 1-31 9199instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9200 match(Set dst (RShiftL dst cnt)); 9201 effect(KILL cr); 9202 ins_cost(200); 9203 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9204 "SAR $dst.hi,$cnt" %} 9205 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9206 ins_encode( move_long_small_shift(dst,cnt) ); 9207 ins_pipe( ialu_reg_long ); 9208%} 9209 9210// Shift Right Long by 32-63 9211instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9212 match(Set dst (RShiftL dst cnt)); 9213 effect(KILL cr); 9214 ins_cost(300); 9215 format %{ "MOV $dst.lo,$dst.hi\n" 9216 "\tSAR $dst.lo,$cnt-32\n" 9217 "\tSAR $dst.hi,31" %} 9218 opcode(0xC1, 0x7); /* C1 /7 ib */ 9219 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9220 ins_pipe( ialu_reg_long ); 9221%} 9222 9223// Shift Right arithmetic Long by variable 9224instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9225 match(Set dst (RShiftL dst shift)); 9226 effect(KILL cr); 9227 ins_cost(600); 9228 size(18); 9229 format %{ "TEST $shift,32\n\t" 9230 "JEQ,s small\n\t" 9231 "MOV $dst.lo,$dst.hi\n\t" 9232 "SAR $dst.hi,31\n" 9233 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9234 "SAR $dst.hi,$shift" %} 9235 ins_encode( shift_right_arith_long( dst, shift ) ); 9236 ins_pipe( pipe_slow ); 9237%} 9238 9239 9240//----------Double Instructions------------------------------------------------ 9241// Double Math 9242 9243// Compare & branch 9244 9245// P6 version of float compare, sets condition codes in EFLAGS 9246instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9247 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9248 match(Set cr (CmpD src1 src2)); 9249 effect(KILL rax); 9250 ins_cost(150); 9251 format %{ "FLD $src1\n\t" 9252 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9253 "JNP exit\n\t" 9254 "MOV ah,1 // saw a NaN, set CF\n\t" 9255 "SAHF\n" 9256 "exit:\tNOP // avoid branch to branch" %} 9257 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9258 ins_encode( Push_Reg_DPR(src1), 9259 OpcP, RegOpc(src2), 9260 cmpF_P6_fixup ); 9261 ins_pipe( pipe_slow ); 9262%} 9263 9264instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9265 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9266 match(Set cr (CmpD src1 src2)); 9267 ins_cost(150); 9268 format %{ "FLD $src1\n\t" 9269 "FUCOMIP ST,$src2 // P6 instruction" %} 9270 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9271 ins_encode( Push_Reg_DPR(src1), 9272 OpcP, RegOpc(src2)); 9273 ins_pipe( pipe_slow ); 9274%} 9275 9276// Compare & branch 9277instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9278 predicate(UseSSE<=1); 9279 match(Set cr (CmpD src1 src2)); 9280 effect(KILL rax); 9281 ins_cost(200); 9282 format %{ "FLD $src1\n\t" 9283 "FCOMp $src2\n\t" 9284 "FNSTSW AX\n\t" 9285 "TEST AX,0x400\n\t" 9286 "JZ,s flags\n\t" 9287 "MOV AH,1\t# unordered treat as LT\n" 9288 "flags:\tSAHF" %} 9289 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9290 ins_encode( Push_Reg_DPR(src1), 9291 OpcP, RegOpc(src2), 9292 fpu_flags); 9293 ins_pipe( pipe_slow ); 9294%} 9295 9296// Compare vs zero into -1,0,1 9297instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9298 predicate(UseSSE<=1); 9299 match(Set dst (CmpD3 src1 zero)); 9300 effect(KILL cr, KILL rax); 9301 ins_cost(280); 9302 format %{ "FTSTD $dst,$src1" %} 9303 opcode(0xE4, 0xD9); 9304 ins_encode( Push_Reg_DPR(src1), 9305 OpcS, OpcP, PopFPU, 9306 CmpF_Result(dst)); 9307 ins_pipe( pipe_slow ); 9308%} 9309 9310// Compare into -1,0,1 9311instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9312 predicate(UseSSE<=1); 9313 match(Set dst (CmpD3 src1 src2)); 9314 effect(KILL cr, KILL rax); 9315 ins_cost(300); 9316 format %{ "FCMPD $dst,$src1,$src2" %} 9317 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9318 ins_encode( Push_Reg_DPR(src1), 9319 OpcP, RegOpc(src2), 9320 CmpF_Result(dst)); 9321 ins_pipe( pipe_slow ); 9322%} 9323 9324// float compare and set condition codes in EFLAGS by XMM regs 9325instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9326 predicate(UseSSE>=2); 9327 match(Set cr (CmpD src1 src2)); 9328 ins_cost(145); 9329 format %{ "UCOMISD $src1,$src2\n\t" 9330 "JNP,s exit\n\t" 9331 "PUSHF\t# saw NaN, set CF\n\t" 9332 "AND [rsp], #0xffffff2b\n\t" 9333 "POPF\n" 9334 "exit:" %} 9335 ins_encode %{ 9336 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9337 emit_cmpfp_fixup(_masm); 9338 %} 9339 ins_pipe( pipe_slow ); 9340%} 9341 9342instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9343 predicate(UseSSE>=2); 9344 match(Set cr (CmpD src1 src2)); 9345 ins_cost(100); 9346 format %{ "UCOMISD $src1,$src2" %} 9347 ins_encode %{ 9348 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9349 %} 9350 ins_pipe( pipe_slow ); 9351%} 9352 9353// float compare and set condition codes in EFLAGS by XMM regs 9354instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9355 predicate(UseSSE>=2); 9356 match(Set cr (CmpD src1 (LoadD src2))); 9357 ins_cost(145); 9358 format %{ "UCOMISD $src1,$src2\n\t" 9359 "JNP,s exit\n\t" 9360 "PUSHF\t# saw NaN, set CF\n\t" 9361 "AND [rsp], #0xffffff2b\n\t" 9362 "POPF\n" 9363 "exit:" %} 9364 ins_encode %{ 9365 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9366 emit_cmpfp_fixup(_masm); 9367 %} 9368 ins_pipe( pipe_slow ); 9369%} 9370 9371instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9372 predicate(UseSSE>=2); 9373 match(Set cr (CmpD src1 (LoadD src2))); 9374 ins_cost(100); 9375 format %{ "UCOMISD $src1,$src2" %} 9376 ins_encode %{ 9377 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9378 %} 9379 ins_pipe( pipe_slow ); 9380%} 9381 9382// Compare into -1,0,1 in XMM 9383instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9384 predicate(UseSSE>=2); 9385 match(Set dst (CmpD3 src1 src2)); 9386 effect(KILL cr); 9387 ins_cost(255); 9388 format %{ "UCOMISD $src1, $src2\n\t" 9389 "MOV $dst, #-1\n\t" 9390 "JP,s done\n\t" 9391 "JB,s done\n\t" 9392 "SETNE $dst\n\t" 9393 "MOVZB $dst, $dst\n" 9394 "done:" %} 9395 ins_encode %{ 9396 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9397 emit_cmpfp3(_masm, $dst$$Register); 9398 %} 9399 ins_pipe( pipe_slow ); 9400%} 9401 9402// Compare into -1,0,1 in XMM and memory 9403instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9404 predicate(UseSSE>=2); 9405 match(Set dst (CmpD3 src1 (LoadD src2))); 9406 effect(KILL cr); 9407 ins_cost(275); 9408 format %{ "UCOMISD $src1, $src2\n\t" 9409 "MOV $dst, #-1\n\t" 9410 "JP,s done\n\t" 9411 "JB,s done\n\t" 9412 "SETNE $dst\n\t" 9413 "MOVZB $dst, $dst\n" 9414 "done:" %} 9415 ins_encode %{ 9416 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9417 emit_cmpfp3(_masm, $dst$$Register); 9418 %} 9419 ins_pipe( pipe_slow ); 9420%} 9421 9422 9423instruct subDPR_reg(regDPR dst, regDPR src) %{ 9424 predicate (UseSSE <=1); 9425 match(Set dst (SubD dst src)); 9426 9427 format %{ "FLD $src\n\t" 9428 "DSUBp $dst,ST" %} 9429 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9430 ins_cost(150); 9431 ins_encode( Push_Reg_DPR(src), 9432 OpcP, RegOpc(dst) ); 9433 ins_pipe( fpu_reg_reg ); 9434%} 9435 9436instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9437 predicate (UseSSE <=1); 9438 match(Set dst (RoundDouble (SubD src1 src2))); 9439 ins_cost(250); 9440 9441 format %{ "FLD $src2\n\t" 9442 "DSUB ST,$src1\n\t" 9443 "FSTP_D $dst\t# D-round" %} 9444 opcode(0xD8, 0x5); 9445 ins_encode( Push_Reg_DPR(src2), 9446 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9447 ins_pipe( fpu_mem_reg_reg ); 9448%} 9449 9450 9451instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9452 predicate (UseSSE <=1); 9453 match(Set dst (SubD dst (LoadD src))); 9454 ins_cost(150); 9455 9456 format %{ "FLD $src\n\t" 9457 "DSUBp $dst,ST" %} 9458 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9459 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9460 OpcP, RegOpc(dst) ); 9461 ins_pipe( fpu_reg_mem ); 9462%} 9463 9464instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9465 predicate (UseSSE<=1); 9466 match(Set dst (AbsD src)); 9467 ins_cost(100); 9468 format %{ "FABS" %} 9469 opcode(0xE1, 0xD9); 9470 ins_encode( OpcS, OpcP ); 9471 ins_pipe( fpu_reg_reg ); 9472%} 9473 9474instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9475 predicate(UseSSE<=1); 9476 match(Set dst (NegD src)); 9477 ins_cost(100); 9478 format %{ "FCHS" %} 9479 opcode(0xE0, 0xD9); 9480 ins_encode( OpcS, OpcP ); 9481 ins_pipe( fpu_reg_reg ); 9482%} 9483 9484instruct addDPR_reg(regDPR dst, regDPR src) %{ 9485 predicate(UseSSE<=1); 9486 match(Set dst (AddD dst src)); 9487 format %{ "FLD $src\n\t" 9488 "DADD $dst,ST" %} 9489 size(4); 9490 ins_cost(150); 9491 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9492 ins_encode( Push_Reg_DPR(src), 9493 OpcP, RegOpc(dst) ); 9494 ins_pipe( fpu_reg_reg ); 9495%} 9496 9497 9498instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9499 predicate(UseSSE<=1); 9500 match(Set dst (RoundDouble (AddD src1 src2))); 9501 ins_cost(250); 9502 9503 format %{ "FLD $src2\n\t" 9504 "DADD ST,$src1\n\t" 9505 "FSTP_D $dst\t# D-round" %} 9506 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9507 ins_encode( Push_Reg_DPR(src2), 9508 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9509 ins_pipe( fpu_mem_reg_reg ); 9510%} 9511 9512 9513instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9514 predicate(UseSSE<=1); 9515 match(Set dst (AddD dst (LoadD src))); 9516 ins_cost(150); 9517 9518 format %{ "FLD $src\n\t" 9519 "DADDp $dst,ST" %} 9520 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9521 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9522 OpcP, RegOpc(dst) ); 9523 ins_pipe( fpu_reg_mem ); 9524%} 9525 9526// add-to-memory 9527instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9528 predicate(UseSSE<=1); 9529 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9530 ins_cost(150); 9531 9532 format %{ "FLD_D $dst\n\t" 9533 "DADD ST,$src\n\t" 9534 "FST_D $dst" %} 9535 opcode(0xDD, 0x0); 9536 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9537 Opcode(0xD8), RegOpc(src), 9538 set_instruction_start, 9539 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9540 ins_pipe( fpu_reg_mem ); 9541%} 9542 9543instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9544 predicate(UseSSE<=1); 9545 match(Set dst (AddD dst con)); 9546 ins_cost(125); 9547 format %{ "FLD1\n\t" 9548 "DADDp $dst,ST" %} 9549 ins_encode %{ 9550 __ fld1(); 9551 __ faddp($dst$$reg); 9552 %} 9553 ins_pipe(fpu_reg); 9554%} 9555 9556instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9557 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9558 match(Set dst (AddD dst con)); 9559 ins_cost(200); 9560 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9561 "DADDp $dst,ST" %} 9562 ins_encode %{ 9563 __ fld_d($constantaddress($con)); 9564 __ faddp($dst$$reg); 9565 %} 9566 ins_pipe(fpu_reg_mem); 9567%} 9568 9569instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9570 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9571 match(Set dst (RoundDouble (AddD src con))); 9572 ins_cost(200); 9573 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9574 "DADD ST,$src\n\t" 9575 "FSTP_D $dst\t# D-round" %} 9576 ins_encode %{ 9577 __ fld_d($constantaddress($con)); 9578 __ fadd($src$$reg); 9579 __ fstp_d(Address(rsp, $dst$$disp)); 9580 %} 9581 ins_pipe(fpu_mem_reg_con); 9582%} 9583 9584instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9585 predicate(UseSSE<=1); 9586 match(Set dst (MulD dst src)); 9587 format %{ "FLD $src\n\t" 9588 "DMULp $dst,ST" %} 9589 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9590 ins_cost(150); 9591 ins_encode( Push_Reg_DPR(src), 9592 OpcP, RegOpc(dst) ); 9593 ins_pipe( fpu_reg_reg ); 9594%} 9595 9596// Strict FP instruction biases argument before multiply then 9597// biases result to avoid double rounding of subnormals. 9598// 9599// scale arg1 by multiplying arg1 by 2^(-15360) 9600// load arg2 9601// multiply scaled arg1 by arg2 9602// rescale product by 2^(15360) 9603// 9604instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9605 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9606 match(Set dst (MulD dst src)); 9607 ins_cost(1); // Select this instruction for all strict FP double multiplies 9608 9609 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9610 "DMULp $dst,ST\n\t" 9611 "FLD $src\n\t" 9612 "DMULp $dst,ST\n\t" 9613 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9614 "DMULp $dst,ST\n\t" %} 9615 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9616 ins_encode( strictfp_bias1(dst), 9617 Push_Reg_DPR(src), 9618 OpcP, RegOpc(dst), 9619 strictfp_bias2(dst) ); 9620 ins_pipe( fpu_reg_reg ); 9621%} 9622 9623instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9624 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9625 match(Set dst (MulD dst con)); 9626 ins_cost(200); 9627 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9628 "DMULp $dst,ST" %} 9629 ins_encode %{ 9630 __ fld_d($constantaddress($con)); 9631 __ fmulp($dst$$reg); 9632 %} 9633 ins_pipe(fpu_reg_mem); 9634%} 9635 9636 9637instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9638 predicate( UseSSE<=1 ); 9639 match(Set dst (MulD dst (LoadD src))); 9640 ins_cost(200); 9641 format %{ "FLD_D $src\n\t" 9642 "DMULp $dst,ST" %} 9643 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9644 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9645 OpcP, RegOpc(dst) ); 9646 ins_pipe( fpu_reg_mem ); 9647%} 9648 9649// 9650// Cisc-alternate to reg-reg multiply 9651instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9652 predicate( UseSSE<=1 ); 9653 match(Set dst (MulD src (LoadD mem))); 9654 ins_cost(250); 9655 format %{ "FLD_D $mem\n\t" 9656 "DMUL ST,$src\n\t" 9657 "FSTP_D $dst" %} 9658 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9659 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9660 OpcReg_FPR(src), 9661 Pop_Reg_DPR(dst) ); 9662 ins_pipe( fpu_reg_reg_mem ); 9663%} 9664 9665 9666// MACRO3 -- addDPR a mulDPR 9667// This instruction is a '2-address' instruction in that the result goes 9668// back to src2. This eliminates a move from the macro; possibly the 9669// register allocator will have to add it back (and maybe not). 9670instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9671 predicate( UseSSE<=1 ); 9672 match(Set src2 (AddD (MulD src0 src1) src2)); 9673 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9674 "DMUL ST,$src1\n\t" 9675 "DADDp $src2,ST" %} 9676 ins_cost(250); 9677 opcode(0xDD); /* LoadD DD /0 */ 9678 ins_encode( Push_Reg_FPR(src0), 9679 FMul_ST_reg(src1), 9680 FAddP_reg_ST(src2) ); 9681 ins_pipe( fpu_reg_reg_reg ); 9682%} 9683 9684 9685// MACRO3 -- subDPR a mulDPR 9686instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9687 predicate( UseSSE<=1 ); 9688 match(Set src2 (SubD (MulD src0 src1) src2)); 9689 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9690 "DMUL ST,$src1\n\t" 9691 "DSUBRp $src2,ST" %} 9692 ins_cost(250); 9693 ins_encode( Push_Reg_FPR(src0), 9694 FMul_ST_reg(src1), 9695 Opcode(0xDE), Opc_plus(0xE0,src2)); 9696 ins_pipe( fpu_reg_reg_reg ); 9697%} 9698 9699 9700instruct divDPR_reg(regDPR dst, regDPR src) %{ 9701 predicate( UseSSE<=1 ); 9702 match(Set dst (DivD dst src)); 9703 9704 format %{ "FLD $src\n\t" 9705 "FDIVp $dst,ST" %} 9706 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9707 ins_cost(150); 9708 ins_encode( Push_Reg_DPR(src), 9709 OpcP, RegOpc(dst) ); 9710 ins_pipe( fpu_reg_reg ); 9711%} 9712 9713// Strict FP instruction biases argument before division then 9714// biases result, to avoid double rounding of subnormals. 9715// 9716// scale dividend by multiplying dividend by 2^(-15360) 9717// load divisor 9718// divide scaled dividend by divisor 9719// rescale quotient by 2^(15360) 9720// 9721instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9722 predicate (UseSSE<=1); 9723 match(Set dst (DivD dst src)); 9724 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9725 ins_cost(01); 9726 9727 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9728 "DMULp $dst,ST\n\t" 9729 "FLD $src\n\t" 9730 "FDIVp $dst,ST\n\t" 9731 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9732 "DMULp $dst,ST\n\t" %} 9733 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9734 ins_encode( strictfp_bias1(dst), 9735 Push_Reg_DPR(src), 9736 OpcP, RegOpc(dst), 9737 strictfp_bias2(dst) ); 9738 ins_pipe( fpu_reg_reg ); 9739%} 9740 9741instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9742 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9743 match(Set dst (RoundDouble (DivD src1 src2))); 9744 9745 format %{ "FLD $src1\n\t" 9746 "FDIV ST,$src2\n\t" 9747 "FSTP_D $dst\t# D-round" %} 9748 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9749 ins_encode( Push_Reg_DPR(src1), 9750 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9751 ins_pipe( fpu_mem_reg_reg ); 9752%} 9753 9754 9755instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9756 predicate(UseSSE<=1); 9757 match(Set dst (ModD dst src)); 9758 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9759 9760 format %{ "DMOD $dst,$src" %} 9761 ins_cost(250); 9762 ins_encode(Push_Reg_Mod_DPR(dst, src), 9763 emitModDPR(), 9764 Push_Result_Mod_DPR(src), 9765 Pop_Reg_DPR(dst)); 9766 ins_pipe( pipe_slow ); 9767%} 9768 9769instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9770 predicate(UseSSE>=2); 9771 match(Set dst (ModD src0 src1)); 9772 effect(KILL rax, KILL cr); 9773 9774 format %{ "SUB ESP,8\t # DMOD\n" 9775 "\tMOVSD [ESP+0],$src1\n" 9776 "\tFLD_D [ESP+0]\n" 9777 "\tMOVSD [ESP+0],$src0\n" 9778 "\tFLD_D [ESP+0]\n" 9779 "loop:\tFPREM\n" 9780 "\tFWAIT\n" 9781 "\tFNSTSW AX\n" 9782 "\tSAHF\n" 9783 "\tJP loop\n" 9784 "\tFSTP_D [ESP+0]\n" 9785 "\tMOVSD $dst,[ESP+0]\n" 9786 "\tADD ESP,8\n" 9787 "\tFSTP ST0\t # Restore FPU Stack" 9788 %} 9789 ins_cost(250); 9790 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9791 ins_pipe( pipe_slow ); 9792%} 9793 9794instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9795 predicate (UseSSE<=1); 9796 match(Set dst (SinD src)); 9797 ins_cost(1800); 9798 format %{ "DSIN $dst" %} 9799 opcode(0xD9, 0xFE); 9800 ins_encode( OpcP, OpcS ); 9801 ins_pipe( pipe_slow ); 9802%} 9803 9804instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9805 predicate (UseSSE>=2); 9806 match(Set dst (SinD dst)); 9807 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9808 ins_cost(1800); 9809 format %{ "DSIN $dst" %} 9810 opcode(0xD9, 0xFE); 9811 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9812 ins_pipe( pipe_slow ); 9813%} 9814 9815instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9816 predicate (UseSSE<=1); 9817 match(Set dst (CosD src)); 9818 ins_cost(1800); 9819 format %{ "DCOS $dst" %} 9820 opcode(0xD9, 0xFF); 9821 ins_encode( OpcP, OpcS ); 9822 ins_pipe( pipe_slow ); 9823%} 9824 9825instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9826 predicate (UseSSE>=2); 9827 match(Set dst (CosD dst)); 9828 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9829 ins_cost(1800); 9830 format %{ "DCOS $dst" %} 9831 opcode(0xD9, 0xFF); 9832 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9833 ins_pipe( pipe_slow ); 9834%} 9835 9836instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9837 predicate (UseSSE<=1); 9838 match(Set dst(TanD src)); 9839 format %{ "DTAN $dst" %} 9840 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9841 Opcode(0xDD), Opcode(0xD8)); // fstp st 9842 ins_pipe( pipe_slow ); 9843%} 9844 9845instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9846 predicate (UseSSE>=2); 9847 match(Set dst(TanD dst)); 9848 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9849 format %{ "DTAN $dst" %} 9850 ins_encode( Push_SrcD(dst), 9851 Opcode(0xD9), Opcode(0xF2), // fptan 9852 Opcode(0xDD), Opcode(0xD8), // fstp st 9853 Push_ResultD(dst) ); 9854 ins_pipe( pipe_slow ); 9855%} 9856 9857instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9858 predicate (UseSSE<=1); 9859 match(Set dst(AtanD dst src)); 9860 format %{ "DATA $dst,$src" %} 9861 opcode(0xD9, 0xF3); 9862 ins_encode( Push_Reg_DPR(src), 9863 OpcP, OpcS, RegOpc(dst) ); 9864 ins_pipe( pipe_slow ); 9865%} 9866 9867instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9868 predicate (UseSSE>=2); 9869 match(Set dst(AtanD dst src)); 9870 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9871 format %{ "DATA $dst,$src" %} 9872 opcode(0xD9, 0xF3); 9873 ins_encode( Push_SrcD(src), 9874 OpcP, OpcS, Push_ResultD(dst) ); 9875 ins_pipe( pipe_slow ); 9876%} 9877 9878instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9879 predicate (UseSSE<=1); 9880 match(Set dst (SqrtD src)); 9881 format %{ "DSQRT $dst,$src" %} 9882 opcode(0xFA, 0xD9); 9883 ins_encode( Push_Reg_DPR(src), 9884 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9885 ins_pipe( pipe_slow ); 9886%} 9887 9888instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9889 predicate (UseSSE<=1); 9890 match(Set Y (PowD X Y)); // Raise X to the Yth power 9891 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9892 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9893 ins_encode %{ 9894 __ subptr(rsp, 8); 9895 __ fld_s($X$$reg - 1); 9896 __ fast_pow(); 9897 __ addptr(rsp, 8); 9898 %} 9899 ins_pipe( pipe_slow ); 9900%} 9901 9902instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9903 predicate (UseSSE>=2); 9904 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9905 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9906 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9907 ins_encode %{ 9908 __ subptr(rsp, 8); 9909 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9910 __ fld_d(Address(rsp, 0)); 9911 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9912 __ fld_d(Address(rsp, 0)); 9913 __ fast_pow(); 9914 __ fstp_d(Address(rsp, 0)); 9915 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9916 __ addptr(rsp, 8); 9917 %} 9918 ins_pipe( pipe_slow ); 9919%} 9920 9921instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9922 predicate (UseSSE<=1); 9923 // The source Double operand on FPU stack 9924 match(Set dst (Log10D src)); 9925 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9926 // fxch ; swap ST(0) with ST(1) 9927 // fyl2x ; compute log_10(2) * log_2(x) 9928 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9929 "FXCH \n\t" 9930 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9931 %} 9932 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9933 Opcode(0xD9), Opcode(0xC9), // fxch 9934 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9935 9936 ins_pipe( pipe_slow ); 9937%} 9938 9939instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9940 predicate (UseSSE>=2); 9941 effect(KILL cr); 9942 match(Set dst (Log10D src)); 9943 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9944 // fyl2x ; compute log_10(2) * log_2(x) 9945 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9946 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9947 %} 9948 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9949 Push_SrcD(src), 9950 Opcode(0xD9), Opcode(0xF1), // fyl2x 9951 Push_ResultD(dst)); 9952 9953 ins_pipe( pipe_slow ); 9954%} 9955 9956//-------------Float Instructions------------------------------- 9957// Float Math 9958 9959// Code for float compare: 9960// fcompp(); 9961// fwait(); fnstsw_ax(); 9962// sahf(); 9963// movl(dst, unordered_result); 9964// jcc(Assembler::parity, exit); 9965// movl(dst, less_result); 9966// jcc(Assembler::below, exit); 9967// movl(dst, equal_result); 9968// jcc(Assembler::equal, exit); 9969// movl(dst, greater_result); 9970// exit: 9971 9972// P6 version of float compare, sets condition codes in EFLAGS 9973instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9974 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9975 match(Set cr (CmpF src1 src2)); 9976 effect(KILL rax); 9977 ins_cost(150); 9978 format %{ "FLD $src1\n\t" 9979 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9980 "JNP exit\n\t" 9981 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9982 "SAHF\n" 9983 "exit:\tNOP // avoid branch to branch" %} 9984 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9985 ins_encode( Push_Reg_DPR(src1), 9986 OpcP, RegOpc(src2), 9987 cmpF_P6_fixup ); 9988 ins_pipe( pipe_slow ); 9989%} 9990 9991instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9992 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9993 match(Set cr (CmpF src1 src2)); 9994 ins_cost(100); 9995 format %{ "FLD $src1\n\t" 9996 "FUCOMIP ST,$src2 // P6 instruction" %} 9997 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9998 ins_encode( Push_Reg_DPR(src1), 9999 OpcP, RegOpc(src2)); 10000 ins_pipe( pipe_slow ); 10001%} 10002 10003 10004// Compare & branch 10005instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10006 predicate(UseSSE == 0); 10007 match(Set cr (CmpF src1 src2)); 10008 effect(KILL rax); 10009 ins_cost(200); 10010 format %{ "FLD $src1\n\t" 10011 "FCOMp $src2\n\t" 10012 "FNSTSW AX\n\t" 10013 "TEST AX,0x400\n\t" 10014 "JZ,s flags\n\t" 10015 "MOV AH,1\t# unordered treat as LT\n" 10016 "flags:\tSAHF" %} 10017 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10018 ins_encode( Push_Reg_DPR(src1), 10019 OpcP, RegOpc(src2), 10020 fpu_flags); 10021 ins_pipe( pipe_slow ); 10022%} 10023 10024// Compare vs zero into -1,0,1 10025instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10026 predicate(UseSSE == 0); 10027 match(Set dst (CmpF3 src1 zero)); 10028 effect(KILL cr, KILL rax); 10029 ins_cost(280); 10030 format %{ "FTSTF $dst,$src1" %} 10031 opcode(0xE4, 0xD9); 10032 ins_encode( Push_Reg_DPR(src1), 10033 OpcS, OpcP, PopFPU, 10034 CmpF_Result(dst)); 10035 ins_pipe( pipe_slow ); 10036%} 10037 10038// Compare into -1,0,1 10039instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10040 predicate(UseSSE == 0); 10041 match(Set dst (CmpF3 src1 src2)); 10042 effect(KILL cr, KILL rax); 10043 ins_cost(300); 10044 format %{ "FCMPF $dst,$src1,$src2" %} 10045 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10046 ins_encode( Push_Reg_DPR(src1), 10047 OpcP, RegOpc(src2), 10048 CmpF_Result(dst)); 10049 ins_pipe( pipe_slow ); 10050%} 10051 10052// float compare and set condition codes in EFLAGS by XMM regs 10053instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10054 predicate(UseSSE>=1); 10055 match(Set cr (CmpF src1 src2)); 10056 ins_cost(145); 10057 format %{ "UCOMISS $src1,$src2\n\t" 10058 "JNP,s exit\n\t" 10059 "PUSHF\t# saw NaN, set CF\n\t" 10060 "AND [rsp], #0xffffff2b\n\t" 10061 "POPF\n" 10062 "exit:" %} 10063 ins_encode %{ 10064 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10065 emit_cmpfp_fixup(_masm); 10066 %} 10067 ins_pipe( pipe_slow ); 10068%} 10069 10070instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10071 predicate(UseSSE>=1); 10072 match(Set cr (CmpF src1 src2)); 10073 ins_cost(100); 10074 format %{ "UCOMISS $src1,$src2" %} 10075 ins_encode %{ 10076 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10077 %} 10078 ins_pipe( pipe_slow ); 10079%} 10080 10081// float compare and set condition codes in EFLAGS by XMM regs 10082instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10083 predicate(UseSSE>=1); 10084 match(Set cr (CmpF src1 (LoadF src2))); 10085 ins_cost(165); 10086 format %{ "UCOMISS $src1,$src2\n\t" 10087 "JNP,s exit\n\t" 10088 "PUSHF\t# saw NaN, set CF\n\t" 10089 "AND [rsp], #0xffffff2b\n\t" 10090 "POPF\n" 10091 "exit:" %} 10092 ins_encode %{ 10093 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10094 emit_cmpfp_fixup(_masm); 10095 %} 10096 ins_pipe( pipe_slow ); 10097%} 10098 10099instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10100 predicate(UseSSE>=1); 10101 match(Set cr (CmpF src1 (LoadF src2))); 10102 ins_cost(100); 10103 format %{ "UCOMISS $src1,$src2" %} 10104 ins_encode %{ 10105 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10106 %} 10107 ins_pipe( pipe_slow ); 10108%} 10109 10110// Compare into -1,0,1 in XMM 10111instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10112 predicate(UseSSE>=1); 10113 match(Set dst (CmpF3 src1 src2)); 10114 effect(KILL cr); 10115 ins_cost(255); 10116 format %{ "UCOMISS $src1, $src2\n\t" 10117 "MOV $dst, #-1\n\t" 10118 "JP,s done\n\t" 10119 "JB,s done\n\t" 10120 "SETNE $dst\n\t" 10121 "MOVZB $dst, $dst\n" 10122 "done:" %} 10123 ins_encode %{ 10124 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10125 emit_cmpfp3(_masm, $dst$$Register); 10126 %} 10127 ins_pipe( pipe_slow ); 10128%} 10129 10130// Compare into -1,0,1 in XMM and memory 10131instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10132 predicate(UseSSE>=1); 10133 match(Set dst (CmpF3 src1 (LoadF src2))); 10134 effect(KILL cr); 10135 ins_cost(275); 10136 format %{ "UCOMISS $src1, $src2\n\t" 10137 "MOV $dst, #-1\n\t" 10138 "JP,s done\n\t" 10139 "JB,s done\n\t" 10140 "SETNE $dst\n\t" 10141 "MOVZB $dst, $dst\n" 10142 "done:" %} 10143 ins_encode %{ 10144 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10145 emit_cmpfp3(_masm, $dst$$Register); 10146 %} 10147 ins_pipe( pipe_slow ); 10148%} 10149 10150// Spill to obtain 24-bit precision 10151instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10152 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10153 match(Set dst (SubF src1 src2)); 10154 10155 format %{ "FSUB $dst,$src1 - $src2" %} 10156 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10157 ins_encode( Push_Reg_FPR(src1), 10158 OpcReg_FPR(src2), 10159 Pop_Mem_FPR(dst) ); 10160 ins_pipe( fpu_mem_reg_reg ); 10161%} 10162// 10163// This instruction does not round to 24-bits 10164instruct subFPR_reg(regFPR dst, regFPR src) %{ 10165 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10166 match(Set dst (SubF dst src)); 10167 10168 format %{ "FSUB $dst,$src" %} 10169 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10170 ins_encode( Push_Reg_FPR(src), 10171 OpcP, RegOpc(dst) ); 10172 ins_pipe( fpu_reg_reg ); 10173%} 10174 10175// Spill to obtain 24-bit precision 10176instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10177 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10178 match(Set dst (AddF src1 src2)); 10179 10180 format %{ "FADD $dst,$src1,$src2" %} 10181 opcode(0xD8, 0x0); /* D8 C0+i */ 10182 ins_encode( Push_Reg_FPR(src2), 10183 OpcReg_FPR(src1), 10184 Pop_Mem_FPR(dst) ); 10185 ins_pipe( fpu_mem_reg_reg ); 10186%} 10187// 10188// This instruction does not round to 24-bits 10189instruct addFPR_reg(regFPR dst, regFPR src) %{ 10190 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10191 match(Set dst (AddF dst src)); 10192 10193 format %{ "FLD $src\n\t" 10194 "FADDp $dst,ST" %} 10195 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10196 ins_encode( Push_Reg_FPR(src), 10197 OpcP, RegOpc(dst) ); 10198 ins_pipe( fpu_reg_reg ); 10199%} 10200 10201instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10202 predicate(UseSSE==0); 10203 match(Set dst (AbsF src)); 10204 ins_cost(100); 10205 format %{ "FABS" %} 10206 opcode(0xE1, 0xD9); 10207 ins_encode( OpcS, OpcP ); 10208 ins_pipe( fpu_reg_reg ); 10209%} 10210 10211instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10212 predicate(UseSSE==0); 10213 match(Set dst (NegF src)); 10214 ins_cost(100); 10215 format %{ "FCHS" %} 10216 opcode(0xE0, 0xD9); 10217 ins_encode( OpcS, OpcP ); 10218 ins_pipe( fpu_reg_reg ); 10219%} 10220 10221// Cisc-alternate to addFPR_reg 10222// Spill to obtain 24-bit precision 10223instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10224 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10225 match(Set dst (AddF src1 (LoadF src2))); 10226 10227 format %{ "FLD $src2\n\t" 10228 "FADD ST,$src1\n\t" 10229 "FSTP_S $dst" %} 10230 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10231 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10232 OpcReg_FPR(src1), 10233 Pop_Mem_FPR(dst) ); 10234 ins_pipe( fpu_mem_reg_mem ); 10235%} 10236// 10237// Cisc-alternate to addFPR_reg 10238// This instruction does not round to 24-bits 10239instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10240 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10241 match(Set dst (AddF dst (LoadF src))); 10242 10243 format %{ "FADD $dst,$src" %} 10244 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10245 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10246 OpcP, RegOpc(dst) ); 10247 ins_pipe( fpu_reg_mem ); 10248%} 10249 10250// // Following two instructions for _222_mpegaudio 10251// Spill to obtain 24-bit precision 10252instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10253 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10254 match(Set dst (AddF src1 src2)); 10255 10256 format %{ "FADD $dst,$src1,$src2" %} 10257 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10258 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10259 OpcReg_FPR(src2), 10260 Pop_Mem_FPR(dst) ); 10261 ins_pipe( fpu_mem_reg_mem ); 10262%} 10263 10264// Cisc-spill variant 10265// Spill to obtain 24-bit precision 10266instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10267 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10268 match(Set dst (AddF src1 (LoadF src2))); 10269 10270 format %{ "FADD $dst,$src1,$src2 cisc" %} 10271 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10272 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10273 set_instruction_start, 10274 OpcP, RMopc_Mem(secondary,src1), 10275 Pop_Mem_FPR(dst) ); 10276 ins_pipe( fpu_mem_mem_mem ); 10277%} 10278 10279// Spill to obtain 24-bit precision 10280instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10281 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10282 match(Set dst (AddF src1 src2)); 10283 10284 format %{ "FADD $dst,$src1,$src2" %} 10285 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10286 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10287 set_instruction_start, 10288 OpcP, RMopc_Mem(secondary,src1), 10289 Pop_Mem_FPR(dst) ); 10290 ins_pipe( fpu_mem_mem_mem ); 10291%} 10292 10293 10294// Spill to obtain 24-bit precision 10295instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10296 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10297 match(Set dst (AddF src con)); 10298 format %{ "FLD $src\n\t" 10299 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10300 "FSTP_S $dst" %} 10301 ins_encode %{ 10302 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10303 __ fadd_s($constantaddress($con)); 10304 __ fstp_s(Address(rsp, $dst$$disp)); 10305 %} 10306 ins_pipe(fpu_mem_reg_con); 10307%} 10308// 10309// This instruction does not round to 24-bits 10310instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10311 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10312 match(Set dst (AddF src con)); 10313 format %{ "FLD $src\n\t" 10314 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10315 "FSTP $dst" %} 10316 ins_encode %{ 10317 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10318 __ fadd_s($constantaddress($con)); 10319 __ fstp_d($dst$$reg); 10320 %} 10321 ins_pipe(fpu_reg_reg_con); 10322%} 10323 10324// Spill to obtain 24-bit precision 10325instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10326 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10327 match(Set dst (MulF src1 src2)); 10328 10329 format %{ "FLD $src1\n\t" 10330 "FMUL $src2\n\t" 10331 "FSTP_S $dst" %} 10332 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10333 ins_encode( Push_Reg_FPR(src1), 10334 OpcReg_FPR(src2), 10335 Pop_Mem_FPR(dst) ); 10336 ins_pipe( fpu_mem_reg_reg ); 10337%} 10338// 10339// This instruction does not round to 24-bits 10340instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10341 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10342 match(Set dst (MulF src1 src2)); 10343 10344 format %{ "FLD $src1\n\t" 10345 "FMUL $src2\n\t" 10346 "FSTP_S $dst" %} 10347 opcode(0xD8, 0x1); /* D8 C8+i */ 10348 ins_encode( Push_Reg_FPR(src2), 10349 OpcReg_FPR(src1), 10350 Pop_Reg_FPR(dst) ); 10351 ins_pipe( fpu_reg_reg_reg ); 10352%} 10353 10354 10355// Spill to obtain 24-bit precision 10356// Cisc-alternate to reg-reg multiply 10357instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10358 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10359 match(Set dst (MulF src1 (LoadF src2))); 10360 10361 format %{ "FLD_S $src2\n\t" 10362 "FMUL $src1\n\t" 10363 "FSTP_S $dst" %} 10364 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10365 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10366 OpcReg_FPR(src1), 10367 Pop_Mem_FPR(dst) ); 10368 ins_pipe( fpu_mem_reg_mem ); 10369%} 10370// 10371// This instruction does not round to 24-bits 10372// Cisc-alternate to reg-reg multiply 10373instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10374 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10375 match(Set dst (MulF src1 (LoadF src2))); 10376 10377 format %{ "FMUL $dst,$src1,$src2" %} 10378 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10379 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10380 OpcReg_FPR(src1), 10381 Pop_Reg_FPR(dst) ); 10382 ins_pipe( fpu_reg_reg_mem ); 10383%} 10384 10385// Spill to obtain 24-bit precision 10386instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10387 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10388 match(Set dst (MulF src1 src2)); 10389 10390 format %{ "FMUL $dst,$src1,$src2" %} 10391 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10392 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10393 set_instruction_start, 10394 OpcP, RMopc_Mem(secondary,src1), 10395 Pop_Mem_FPR(dst) ); 10396 ins_pipe( fpu_mem_mem_mem ); 10397%} 10398 10399// Spill to obtain 24-bit precision 10400instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10401 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10402 match(Set dst (MulF src con)); 10403 10404 format %{ "FLD $src\n\t" 10405 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10406 "FSTP_S $dst" %} 10407 ins_encode %{ 10408 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10409 __ fmul_s($constantaddress($con)); 10410 __ fstp_s(Address(rsp, $dst$$disp)); 10411 %} 10412 ins_pipe(fpu_mem_reg_con); 10413%} 10414// 10415// This instruction does not round to 24-bits 10416instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10417 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10418 match(Set dst (MulF src con)); 10419 10420 format %{ "FLD $src\n\t" 10421 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10422 "FSTP $dst" %} 10423 ins_encode %{ 10424 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10425 __ fmul_s($constantaddress($con)); 10426 __ fstp_d($dst$$reg); 10427 %} 10428 ins_pipe(fpu_reg_reg_con); 10429%} 10430 10431 10432// 10433// MACRO1 -- subsume unshared load into mulFPR 10434// This instruction does not round to 24-bits 10435instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10436 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10437 match(Set dst (MulF (LoadF mem1) src)); 10438 10439 format %{ "FLD $mem1 ===MACRO1===\n\t" 10440 "FMUL ST,$src\n\t" 10441 "FSTP $dst" %} 10442 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10443 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10444 OpcReg_FPR(src), 10445 Pop_Reg_FPR(dst) ); 10446 ins_pipe( fpu_reg_reg_mem ); 10447%} 10448// 10449// MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10450// This instruction does not round to 24-bits 10451instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10452 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10453 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10454 ins_cost(95); 10455 10456 format %{ "FLD $mem1 ===MACRO2===\n\t" 10457 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10458 "FADD ST,$src2\n\t" 10459 "FSTP $dst" %} 10460 opcode(0xD9); /* LoadF D9 /0 */ 10461 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10462 FMul_ST_reg(src1), 10463 FAdd_ST_reg(src2), 10464 Pop_Reg_FPR(dst) ); 10465 ins_pipe( fpu_reg_mem_reg_reg ); 10466%} 10467 10468// MACRO3 -- addFPR a mulFPR 10469// This instruction does not round to 24-bits. It is a '2-address' 10470// instruction in that the result goes back to src2. This eliminates 10471// a move from the macro; possibly the register allocator will have 10472// to add it back (and maybe not). 10473instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10474 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10475 match(Set src2 (AddF (MulF src0 src1) src2)); 10476 10477 format %{ "FLD $src0 ===MACRO3===\n\t" 10478 "FMUL ST,$src1\n\t" 10479 "FADDP $src2,ST" %} 10480 opcode(0xD9); /* LoadF D9 /0 */ 10481 ins_encode( Push_Reg_FPR(src0), 10482 FMul_ST_reg(src1), 10483 FAddP_reg_ST(src2) ); 10484 ins_pipe( fpu_reg_reg_reg ); 10485%} 10486 10487// MACRO4 -- divFPR subFPR 10488// This instruction does not round to 24-bits 10489instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10490 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10491 match(Set dst (DivF (SubF src2 src1) src3)); 10492 10493 format %{ "FLD $src2 ===MACRO4===\n\t" 10494 "FSUB ST,$src1\n\t" 10495 "FDIV ST,$src3\n\t" 10496 "FSTP $dst" %} 10497 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10498 ins_encode( Push_Reg_FPR(src2), 10499 subFPR_divFPR_encode(src1,src3), 10500 Pop_Reg_FPR(dst) ); 10501 ins_pipe( fpu_reg_reg_reg_reg ); 10502%} 10503 10504// Spill to obtain 24-bit precision 10505instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10506 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10507 match(Set dst (DivF src1 src2)); 10508 10509 format %{ "FDIV $dst,$src1,$src2" %} 10510 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10511 ins_encode( Push_Reg_FPR(src1), 10512 OpcReg_FPR(src2), 10513 Pop_Mem_FPR(dst) ); 10514 ins_pipe( fpu_mem_reg_reg ); 10515%} 10516// 10517// This instruction does not round to 24-bits 10518instruct divFPR_reg(regFPR dst, regFPR src) %{ 10519 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10520 match(Set dst (DivF dst src)); 10521 10522 format %{ "FDIV $dst,$src" %} 10523 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10524 ins_encode( Push_Reg_FPR(src), 10525 OpcP, RegOpc(dst) ); 10526 ins_pipe( fpu_reg_reg ); 10527%} 10528 10529 10530// Spill to obtain 24-bit precision 10531instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10532 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10533 match(Set dst (ModF src1 src2)); 10534 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10535 10536 format %{ "FMOD $dst,$src1,$src2" %} 10537 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10538 emitModDPR(), 10539 Push_Result_Mod_DPR(src2), 10540 Pop_Mem_FPR(dst)); 10541 ins_pipe( pipe_slow ); 10542%} 10543// 10544// This instruction does not round to 24-bits 10545instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10546 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10547 match(Set dst (ModF dst src)); 10548 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10549 10550 format %{ "FMOD $dst,$src" %} 10551 ins_encode(Push_Reg_Mod_DPR(dst, src), 10552 emitModDPR(), 10553 Push_Result_Mod_DPR(src), 10554 Pop_Reg_FPR(dst)); 10555 ins_pipe( pipe_slow ); 10556%} 10557 10558instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10559 predicate(UseSSE>=1); 10560 match(Set dst (ModF src0 src1)); 10561 effect(KILL rax, KILL cr); 10562 format %{ "SUB ESP,4\t # FMOD\n" 10563 "\tMOVSS [ESP+0],$src1\n" 10564 "\tFLD_S [ESP+0]\n" 10565 "\tMOVSS [ESP+0],$src0\n" 10566 "\tFLD_S [ESP+0]\n" 10567 "loop:\tFPREM\n" 10568 "\tFWAIT\n" 10569 "\tFNSTSW AX\n" 10570 "\tSAHF\n" 10571 "\tJP loop\n" 10572 "\tFSTP_S [ESP+0]\n" 10573 "\tMOVSS $dst,[ESP+0]\n" 10574 "\tADD ESP,4\n" 10575 "\tFSTP ST0\t # Restore FPU Stack" 10576 %} 10577 ins_cost(250); 10578 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10579 ins_pipe( pipe_slow ); 10580%} 10581 10582 10583//----------Arithmetic Conversion Instructions--------------------------------- 10584// The conversions operations are all Alpha sorted. Please keep it that way! 10585 10586instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10587 predicate(UseSSE==0); 10588 match(Set dst (RoundFloat src)); 10589 ins_cost(125); 10590 format %{ "FST_S $dst,$src\t# F-round" %} 10591 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10592 ins_pipe( fpu_mem_reg ); 10593%} 10594 10595instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10596 predicate(UseSSE<=1); 10597 match(Set dst (RoundDouble src)); 10598 ins_cost(125); 10599 format %{ "FST_D $dst,$src\t# D-round" %} 10600 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10601 ins_pipe( fpu_mem_reg ); 10602%} 10603 10604// Force rounding to 24-bit precision and 6-bit exponent 10605instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10606 predicate(UseSSE==0); 10607 match(Set dst (ConvD2F src)); 10608 format %{ "FST_S $dst,$src\t# F-round" %} 10609 expand %{ 10610 roundFloat_mem_reg(dst,src); 10611 %} 10612%} 10613 10614// Force rounding to 24-bit precision and 6-bit exponent 10615instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10616 predicate(UseSSE==1); 10617 match(Set dst (ConvD2F src)); 10618 effect( KILL cr ); 10619 format %{ "SUB ESP,4\n\t" 10620 "FST_S [ESP],$src\t# F-round\n\t" 10621 "MOVSS $dst,[ESP]\n\t" 10622 "ADD ESP,4" %} 10623 ins_encode %{ 10624 __ subptr(rsp, 4); 10625 if ($src$$reg != FPR1L_enc) { 10626 __ fld_s($src$$reg-1); 10627 __ fstp_s(Address(rsp, 0)); 10628 } else { 10629 __ fst_s(Address(rsp, 0)); 10630 } 10631 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10632 __ addptr(rsp, 4); 10633 %} 10634 ins_pipe( pipe_slow ); 10635%} 10636 10637// Force rounding double precision to single precision 10638instruct convD2F_reg(regF dst, regD src) %{ 10639 predicate(UseSSE>=2); 10640 match(Set dst (ConvD2F src)); 10641 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10642 ins_encode %{ 10643 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10644 %} 10645 ins_pipe( pipe_slow ); 10646%} 10647 10648instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10649 predicate(UseSSE==0); 10650 match(Set dst (ConvF2D src)); 10651 format %{ "FST_S $dst,$src\t# D-round" %} 10652 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10653 ins_pipe( fpu_reg_reg ); 10654%} 10655 10656instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10657 predicate(UseSSE==1); 10658 match(Set dst (ConvF2D src)); 10659 format %{ "FST_D $dst,$src\t# D-round" %} 10660 expand %{ 10661 roundDouble_mem_reg(dst,src); 10662 %} 10663%} 10664 10665instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10666 predicate(UseSSE==1); 10667 match(Set dst (ConvF2D src)); 10668 effect( KILL cr ); 10669 format %{ "SUB ESP,4\n\t" 10670 "MOVSS [ESP] $src\n\t" 10671 "FLD_S [ESP]\n\t" 10672 "ADD ESP,4\n\t" 10673 "FSTP $dst\t# D-round" %} 10674 ins_encode %{ 10675 __ subptr(rsp, 4); 10676 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10677 __ fld_s(Address(rsp, 0)); 10678 __ addptr(rsp, 4); 10679 __ fstp_d($dst$$reg); 10680 %} 10681 ins_pipe( pipe_slow ); 10682%} 10683 10684instruct convF2D_reg(regD dst, regF src) %{ 10685 predicate(UseSSE>=2); 10686 match(Set dst (ConvF2D src)); 10687 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10688 ins_encode %{ 10689 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10690 %} 10691 ins_pipe( pipe_slow ); 10692%} 10693 10694// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10695instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10696 predicate(UseSSE<=1); 10697 match(Set dst (ConvD2I src)); 10698 effect( KILL tmp, KILL cr ); 10699 format %{ "FLD $src\t# Convert double to int \n\t" 10700 "FLDCW trunc mode\n\t" 10701 "SUB ESP,4\n\t" 10702 "FISTp [ESP + #0]\n\t" 10703 "FLDCW std/24-bit mode\n\t" 10704 "POP EAX\n\t" 10705 "CMP EAX,0x80000000\n\t" 10706 "JNE,s fast\n\t" 10707 "FLD_D $src\n\t" 10708 "CALL d2i_wrapper\n" 10709 "fast:" %} 10710 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10711 ins_pipe( pipe_slow ); 10712%} 10713 10714// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10715instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10716 predicate(UseSSE>=2); 10717 match(Set dst (ConvD2I src)); 10718 effect( KILL tmp, KILL cr ); 10719 format %{ "CVTTSD2SI $dst, $src\n\t" 10720 "CMP $dst,0x80000000\n\t" 10721 "JNE,s fast\n\t" 10722 "SUB ESP, 8\n\t" 10723 "MOVSD [ESP], $src\n\t" 10724 "FLD_D [ESP]\n\t" 10725 "ADD ESP, 8\n\t" 10726 "CALL d2i_wrapper\n" 10727 "fast:" %} 10728 ins_encode %{ 10729 Label fast; 10730 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10731 __ cmpl($dst$$Register, 0x80000000); 10732 __ jccb(Assembler::notEqual, fast); 10733 __ subptr(rsp, 8); 10734 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10735 __ fld_d(Address(rsp, 0)); 10736 __ addptr(rsp, 8); 10737 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10738 __ bind(fast); 10739 %} 10740 ins_pipe( pipe_slow ); 10741%} 10742 10743instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10744 predicate(UseSSE<=1); 10745 match(Set dst (ConvD2L src)); 10746 effect( KILL cr ); 10747 format %{ "FLD $src\t# Convert double to long\n\t" 10748 "FLDCW trunc mode\n\t" 10749 "SUB ESP,8\n\t" 10750 "FISTp [ESP + #0]\n\t" 10751 "FLDCW std/24-bit mode\n\t" 10752 "POP EAX\n\t" 10753 "POP EDX\n\t" 10754 "CMP EDX,0x80000000\n\t" 10755 "JNE,s fast\n\t" 10756 "TEST EAX,EAX\n\t" 10757 "JNE,s fast\n\t" 10758 "FLD $src\n\t" 10759 "CALL d2l_wrapper\n" 10760 "fast:" %} 10761 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10762 ins_pipe( pipe_slow ); 10763%} 10764 10765// XMM lacks a float/double->long conversion, so use the old FPU stack. 10766instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10767 predicate (UseSSE>=2); 10768 match(Set dst (ConvD2L src)); 10769 effect( KILL cr ); 10770 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10771 "MOVSD [ESP],$src\n\t" 10772 "FLD_D [ESP]\n\t" 10773 "FLDCW trunc mode\n\t" 10774 "FISTp [ESP + #0]\n\t" 10775 "FLDCW std/24-bit mode\n\t" 10776 "POP EAX\n\t" 10777 "POP EDX\n\t" 10778 "CMP EDX,0x80000000\n\t" 10779 "JNE,s fast\n\t" 10780 "TEST EAX,EAX\n\t" 10781 "JNE,s fast\n\t" 10782 "SUB ESP,8\n\t" 10783 "MOVSD [ESP],$src\n\t" 10784 "FLD_D [ESP]\n\t" 10785 "ADD ESP,8\n\t" 10786 "CALL d2l_wrapper\n" 10787 "fast:" %} 10788 ins_encode %{ 10789 Label fast; 10790 __ subptr(rsp, 8); 10791 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10792 __ fld_d(Address(rsp, 0)); 10793 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10794 __ fistp_d(Address(rsp, 0)); 10795 // Restore the rounding mode, mask the exception 10796 if (Compile::current()->in_24_bit_fp_mode()) { 10797 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10798 } else { 10799 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10800 } 10801 // Load the converted long, adjust CPU stack 10802 __ pop(rax); 10803 __ pop(rdx); 10804 __ cmpl(rdx, 0x80000000); 10805 __ jccb(Assembler::notEqual, fast); 10806 __ testl(rax, rax); 10807 __ jccb(Assembler::notEqual, fast); 10808 __ subptr(rsp, 8); 10809 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10810 __ fld_d(Address(rsp, 0)); 10811 __ addptr(rsp, 8); 10812 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10813 __ bind(fast); 10814 %} 10815 ins_pipe( pipe_slow ); 10816%} 10817 10818// Convert a double to an int. Java semantics require we do complex 10819// manglations in the corner cases. So we set the rounding mode to 10820// 'zero', store the darned double down as an int, and reset the 10821// rounding mode to 'nearest'. The hardware stores a flag value down 10822// if we would overflow or converted a NAN; we check for this and 10823// and go the slow path if needed. 10824instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10825 predicate(UseSSE==0); 10826 match(Set dst (ConvF2I src)); 10827 effect( KILL tmp, KILL cr ); 10828 format %{ "FLD $src\t# Convert float to int \n\t" 10829 "FLDCW trunc mode\n\t" 10830 "SUB ESP,4\n\t" 10831 "FISTp [ESP + #0]\n\t" 10832 "FLDCW std/24-bit mode\n\t" 10833 "POP EAX\n\t" 10834 "CMP EAX,0x80000000\n\t" 10835 "JNE,s fast\n\t" 10836 "FLD $src\n\t" 10837 "CALL d2i_wrapper\n" 10838 "fast:" %} 10839 // DPR2I_encoding works for FPR2I 10840 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10841 ins_pipe( pipe_slow ); 10842%} 10843 10844// Convert a float in xmm to an int reg. 10845instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10846 predicate(UseSSE>=1); 10847 match(Set dst (ConvF2I src)); 10848 effect( KILL tmp, KILL cr ); 10849 format %{ "CVTTSS2SI $dst, $src\n\t" 10850 "CMP $dst,0x80000000\n\t" 10851 "JNE,s fast\n\t" 10852 "SUB ESP, 4\n\t" 10853 "MOVSS [ESP], $src\n\t" 10854 "FLD [ESP]\n\t" 10855 "ADD ESP, 4\n\t" 10856 "CALL d2i_wrapper\n" 10857 "fast:" %} 10858 ins_encode %{ 10859 Label fast; 10860 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10861 __ cmpl($dst$$Register, 0x80000000); 10862 __ jccb(Assembler::notEqual, fast); 10863 __ subptr(rsp, 4); 10864 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10865 __ fld_s(Address(rsp, 0)); 10866 __ addptr(rsp, 4); 10867 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10868 __ bind(fast); 10869 %} 10870 ins_pipe( pipe_slow ); 10871%} 10872 10873instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10874 predicate(UseSSE==0); 10875 match(Set dst (ConvF2L src)); 10876 effect( KILL cr ); 10877 format %{ "FLD $src\t# Convert float to long\n\t" 10878 "FLDCW trunc mode\n\t" 10879 "SUB ESP,8\n\t" 10880 "FISTp [ESP + #0]\n\t" 10881 "FLDCW std/24-bit mode\n\t" 10882 "POP EAX\n\t" 10883 "POP EDX\n\t" 10884 "CMP EDX,0x80000000\n\t" 10885 "JNE,s fast\n\t" 10886 "TEST EAX,EAX\n\t" 10887 "JNE,s fast\n\t" 10888 "FLD $src\n\t" 10889 "CALL d2l_wrapper\n" 10890 "fast:" %} 10891 // DPR2L_encoding works for FPR2L 10892 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10893 ins_pipe( pipe_slow ); 10894%} 10895 10896// XMM lacks a float/double->long conversion, so use the old FPU stack. 10897instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10898 predicate (UseSSE>=1); 10899 match(Set dst (ConvF2L src)); 10900 effect( KILL cr ); 10901 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10902 "MOVSS [ESP],$src\n\t" 10903 "FLD_S [ESP]\n\t" 10904 "FLDCW trunc mode\n\t" 10905 "FISTp [ESP + #0]\n\t" 10906 "FLDCW std/24-bit mode\n\t" 10907 "POP EAX\n\t" 10908 "POP EDX\n\t" 10909 "CMP EDX,0x80000000\n\t" 10910 "JNE,s fast\n\t" 10911 "TEST EAX,EAX\n\t" 10912 "JNE,s fast\n\t" 10913 "SUB ESP,4\t# Convert float to long\n\t" 10914 "MOVSS [ESP],$src\n\t" 10915 "FLD_S [ESP]\n\t" 10916 "ADD ESP,4\n\t" 10917 "CALL d2l_wrapper\n" 10918 "fast:" %} 10919 ins_encode %{ 10920 Label fast; 10921 __ subptr(rsp, 8); 10922 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10923 __ fld_s(Address(rsp, 0)); 10924 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10925 __ fistp_d(Address(rsp, 0)); 10926 // Restore the rounding mode, mask the exception 10927 if (Compile::current()->in_24_bit_fp_mode()) { 10928 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10929 } else { 10930 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10931 } 10932 // Load the converted long, adjust CPU stack 10933 __ pop(rax); 10934 __ pop(rdx); 10935 __ cmpl(rdx, 0x80000000); 10936 __ jccb(Assembler::notEqual, fast); 10937 __ testl(rax, rax); 10938 __ jccb(Assembler::notEqual, fast); 10939 __ subptr(rsp, 4); 10940 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10941 __ fld_s(Address(rsp, 0)); 10942 __ addptr(rsp, 4); 10943 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10944 __ bind(fast); 10945 %} 10946 ins_pipe( pipe_slow ); 10947%} 10948 10949instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10950 predicate( UseSSE<=1 ); 10951 match(Set dst (ConvI2D src)); 10952 format %{ "FILD $src\n\t" 10953 "FSTP $dst" %} 10954 opcode(0xDB, 0x0); /* DB /0 */ 10955 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10956 ins_pipe( fpu_reg_mem ); 10957%} 10958 10959instruct convI2D_reg(regD dst, rRegI src) %{ 10960 predicate( UseSSE>=2 && !UseXmmI2D ); 10961 match(Set dst (ConvI2D src)); 10962 format %{ "CVTSI2SD $dst,$src" %} 10963 ins_encode %{ 10964 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10965 %} 10966 ins_pipe( pipe_slow ); 10967%} 10968 10969instruct convI2D_mem(regD dst, memory mem) %{ 10970 predicate( UseSSE>=2 ); 10971 match(Set dst (ConvI2D (LoadI mem))); 10972 format %{ "CVTSI2SD $dst,$mem" %} 10973 ins_encode %{ 10974 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10975 %} 10976 ins_pipe( pipe_slow ); 10977%} 10978 10979instruct convXI2D_reg(regD dst, rRegI src) 10980%{ 10981 predicate( UseSSE>=2 && UseXmmI2D ); 10982 match(Set dst (ConvI2D src)); 10983 10984 format %{ "MOVD $dst,$src\n\t" 10985 "CVTDQ2PD $dst,$dst\t# i2d" %} 10986 ins_encode %{ 10987 __ movdl($dst$$XMMRegister, $src$$Register); 10988 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10989 %} 10990 ins_pipe(pipe_slow); // XXX 10991%} 10992 10993instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10994 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10995 match(Set dst (ConvI2D (LoadI mem))); 10996 format %{ "FILD $mem\n\t" 10997 "FSTP $dst" %} 10998 opcode(0xDB); /* DB /0 */ 10999 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11000 Pop_Reg_DPR(dst)); 11001 ins_pipe( fpu_reg_mem ); 11002%} 11003 11004// Convert a byte to a float; no rounding step needed. 11005instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11006 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11007 match(Set dst (ConvI2F src)); 11008 format %{ "FILD $src\n\t" 11009 "FSTP $dst" %} 11010 11011 opcode(0xDB, 0x0); /* DB /0 */ 11012 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11013 ins_pipe( fpu_reg_mem ); 11014%} 11015 11016// In 24-bit mode, force exponent rounding by storing back out 11017instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11018 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11019 match(Set dst (ConvI2F src)); 11020 ins_cost(200); 11021 format %{ "FILD $src\n\t" 11022 "FSTP_S $dst" %} 11023 opcode(0xDB, 0x0); /* DB /0 */ 11024 ins_encode( Push_Mem_I(src), 11025 Pop_Mem_FPR(dst)); 11026 ins_pipe( fpu_mem_mem ); 11027%} 11028 11029// In 24-bit mode, force exponent rounding by storing back out 11030instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11031 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11032 match(Set dst (ConvI2F (LoadI mem))); 11033 ins_cost(200); 11034 format %{ "FILD $mem\n\t" 11035 "FSTP_S $dst" %} 11036 opcode(0xDB); /* DB /0 */ 11037 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11038 Pop_Mem_FPR(dst)); 11039 ins_pipe( fpu_mem_mem ); 11040%} 11041 11042// This instruction does not round to 24-bits 11043instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11044 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11045 match(Set dst (ConvI2F src)); 11046 format %{ "FILD $src\n\t" 11047 "FSTP $dst" %} 11048 opcode(0xDB, 0x0); /* DB /0 */ 11049 ins_encode( Push_Mem_I(src), 11050 Pop_Reg_FPR(dst)); 11051 ins_pipe( fpu_reg_mem ); 11052%} 11053 11054// This instruction does not round to 24-bits 11055instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11056 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11057 match(Set dst (ConvI2F (LoadI mem))); 11058 format %{ "FILD $mem\n\t" 11059 "FSTP $dst" %} 11060 opcode(0xDB); /* DB /0 */ 11061 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11062 Pop_Reg_FPR(dst)); 11063 ins_pipe( fpu_reg_mem ); 11064%} 11065 11066// Convert an int to a float in xmm; no rounding step needed. 11067instruct convI2F_reg(regF dst, rRegI src) %{ 11068 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11069 match(Set dst (ConvI2F src)); 11070 format %{ "CVTSI2SS $dst, $src" %} 11071 ins_encode %{ 11072 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11073 %} 11074 ins_pipe( pipe_slow ); 11075%} 11076 11077 instruct convXI2F_reg(regF dst, rRegI src) 11078%{ 11079 predicate( UseSSE>=2 && UseXmmI2F ); 11080 match(Set dst (ConvI2F src)); 11081 11082 format %{ "MOVD $dst,$src\n\t" 11083 "CVTDQ2PS $dst,$dst\t# i2f" %} 11084 ins_encode %{ 11085 __ movdl($dst$$XMMRegister, $src$$Register); 11086 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11087 %} 11088 ins_pipe(pipe_slow); // XXX 11089%} 11090 11091instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11092 match(Set dst (ConvI2L src)); 11093 effect(KILL cr); 11094 ins_cost(375); 11095 format %{ "MOV $dst.lo,$src\n\t" 11096 "MOV $dst.hi,$src\n\t" 11097 "SAR $dst.hi,31" %} 11098 ins_encode(convert_int_long(dst,src)); 11099 ins_pipe( ialu_reg_reg_long ); 11100%} 11101 11102// Zero-extend convert int to long 11103instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11104 match(Set dst (AndL (ConvI2L src) mask) ); 11105 effect( KILL flags ); 11106 ins_cost(250); 11107 format %{ "MOV $dst.lo,$src\n\t" 11108 "XOR $dst.hi,$dst.hi" %} 11109 opcode(0x33); // XOR 11110 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11111 ins_pipe( ialu_reg_reg_long ); 11112%} 11113 11114// Zero-extend long 11115instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11116 match(Set dst (AndL src mask) ); 11117 effect( KILL flags ); 11118 ins_cost(250); 11119 format %{ "MOV $dst.lo,$src.lo\n\t" 11120 "XOR $dst.hi,$dst.hi\n\t" %} 11121 opcode(0x33); // XOR 11122 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11123 ins_pipe( ialu_reg_reg_long ); 11124%} 11125 11126instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11127 predicate (UseSSE<=1); 11128 match(Set dst (ConvL2D src)); 11129 effect( KILL cr ); 11130 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11131 "PUSH $src.lo\n\t" 11132 "FILD ST,[ESP + #0]\n\t" 11133 "ADD ESP,8\n\t" 11134 "FSTP_D $dst\t# D-round" %} 11135 opcode(0xDF, 0x5); /* DF /5 */ 11136 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11137 ins_pipe( pipe_slow ); 11138%} 11139 11140instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11141 predicate (UseSSE>=2); 11142 match(Set dst (ConvL2D src)); 11143 effect( KILL cr ); 11144 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11145 "PUSH $src.lo\n\t" 11146 "FILD_D [ESP]\n\t" 11147 "FSTP_D [ESP]\n\t" 11148 "MOVSD $dst,[ESP]\n\t" 11149 "ADD ESP,8" %} 11150 opcode(0xDF, 0x5); /* DF /5 */ 11151 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11152 ins_pipe( pipe_slow ); 11153%} 11154 11155instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11156 predicate (UseSSE>=1); 11157 match(Set dst (ConvL2F src)); 11158 effect( KILL cr ); 11159 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11160 "PUSH $src.lo\n\t" 11161 "FILD_D [ESP]\n\t" 11162 "FSTP_S [ESP]\n\t" 11163 "MOVSS $dst,[ESP]\n\t" 11164 "ADD ESP,8" %} 11165 opcode(0xDF, 0x5); /* DF /5 */ 11166 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11167 ins_pipe( pipe_slow ); 11168%} 11169 11170instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11171 match(Set dst (ConvL2F src)); 11172 effect( KILL cr ); 11173 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11174 "PUSH $src.lo\n\t" 11175 "FILD ST,[ESP + #0]\n\t" 11176 "ADD ESP,8\n\t" 11177 "FSTP_S $dst\t# F-round" %} 11178 opcode(0xDF, 0x5); /* DF /5 */ 11179 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11180 ins_pipe( pipe_slow ); 11181%} 11182 11183instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11184 match(Set dst (ConvL2I src)); 11185 effect( DEF dst, USE src ); 11186 format %{ "MOV $dst,$src.lo" %} 11187 ins_encode(enc_CopyL_Lo(dst,src)); 11188 ins_pipe( ialu_reg_reg ); 11189%} 11190 11191instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11192 match(Set dst (MoveF2I src)); 11193 effect( DEF dst, USE src ); 11194 ins_cost(100); 11195 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11196 ins_encode %{ 11197 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11198 %} 11199 ins_pipe( ialu_reg_mem ); 11200%} 11201 11202instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11203 predicate(UseSSE==0); 11204 match(Set dst (MoveF2I src)); 11205 effect( DEF dst, USE src ); 11206 11207 ins_cost(125); 11208 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11209 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11210 ins_pipe( fpu_mem_reg ); 11211%} 11212 11213instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11214 predicate(UseSSE>=1); 11215 match(Set dst (MoveF2I src)); 11216 effect( DEF dst, USE src ); 11217 11218 ins_cost(95); 11219 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11220 ins_encode %{ 11221 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11222 %} 11223 ins_pipe( pipe_slow ); 11224%} 11225 11226instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11227 predicate(UseSSE>=2); 11228 match(Set dst (MoveF2I src)); 11229 effect( DEF dst, USE src ); 11230 ins_cost(85); 11231 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11232 ins_encode %{ 11233 __ movdl($dst$$Register, $src$$XMMRegister); 11234 %} 11235 ins_pipe( pipe_slow ); 11236%} 11237 11238instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11239 match(Set dst (MoveI2F src)); 11240 effect( DEF dst, USE src ); 11241 11242 ins_cost(100); 11243 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11244 ins_encode %{ 11245 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11246 %} 11247 ins_pipe( ialu_mem_reg ); 11248%} 11249 11250 11251instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11252 predicate(UseSSE==0); 11253 match(Set dst (MoveI2F src)); 11254 effect(DEF dst, USE src); 11255 11256 ins_cost(125); 11257 format %{ "FLD_S $src\n\t" 11258 "FSTP $dst\t# MoveI2F_stack_reg" %} 11259 opcode(0xD9); /* D9 /0, FLD m32real */ 11260 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11261 Pop_Reg_FPR(dst) ); 11262 ins_pipe( fpu_reg_mem ); 11263%} 11264 11265instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11266 predicate(UseSSE>=1); 11267 match(Set dst (MoveI2F src)); 11268 effect( DEF dst, USE src ); 11269 11270 ins_cost(95); 11271 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11272 ins_encode %{ 11273 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11274 %} 11275 ins_pipe( pipe_slow ); 11276%} 11277 11278instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11279 predicate(UseSSE>=2); 11280 match(Set dst (MoveI2F src)); 11281 effect( DEF dst, USE src ); 11282 11283 ins_cost(85); 11284 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11285 ins_encode %{ 11286 __ movdl($dst$$XMMRegister, $src$$Register); 11287 %} 11288 ins_pipe( pipe_slow ); 11289%} 11290 11291instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11292 match(Set dst (MoveD2L src)); 11293 effect(DEF dst, USE src); 11294 11295 ins_cost(250); 11296 format %{ "MOV $dst.lo,$src\n\t" 11297 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11298 opcode(0x8B, 0x8B); 11299 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11300 ins_pipe( ialu_mem_long_reg ); 11301%} 11302 11303instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11304 predicate(UseSSE<=1); 11305 match(Set dst (MoveD2L src)); 11306 effect(DEF dst, USE src); 11307 11308 ins_cost(125); 11309 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11310 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11311 ins_pipe( fpu_mem_reg ); 11312%} 11313 11314instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11315 predicate(UseSSE>=2); 11316 match(Set dst (MoveD2L src)); 11317 effect(DEF dst, USE src); 11318 ins_cost(95); 11319 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11320 ins_encode %{ 11321 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11322 %} 11323 ins_pipe( pipe_slow ); 11324%} 11325 11326instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11327 predicate(UseSSE>=2); 11328 match(Set dst (MoveD2L src)); 11329 effect(DEF dst, USE src, TEMP tmp); 11330 ins_cost(85); 11331 format %{ "MOVD $dst.lo,$src\n\t" 11332 "PSHUFLW $tmp,$src,0x4E\n\t" 11333 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11334 ins_encode %{ 11335 __ movdl($dst$$Register, $src$$XMMRegister); 11336 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11337 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11338 %} 11339 ins_pipe( pipe_slow ); 11340%} 11341 11342instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11343 match(Set dst (MoveL2D src)); 11344 effect(DEF dst, USE src); 11345 11346 ins_cost(200); 11347 format %{ "MOV $dst,$src.lo\n\t" 11348 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11349 opcode(0x89, 0x89); 11350 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11351 ins_pipe( ialu_mem_long_reg ); 11352%} 11353 11354 11355instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11356 predicate(UseSSE<=1); 11357 match(Set dst (MoveL2D src)); 11358 effect(DEF dst, USE src); 11359 ins_cost(125); 11360 11361 format %{ "FLD_D $src\n\t" 11362 "FSTP $dst\t# MoveL2D_stack_reg" %} 11363 opcode(0xDD); /* DD /0, FLD m64real */ 11364 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11365 Pop_Reg_DPR(dst) ); 11366 ins_pipe( fpu_reg_mem ); 11367%} 11368 11369 11370instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11371 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11372 match(Set dst (MoveL2D src)); 11373 effect(DEF dst, USE src); 11374 11375 ins_cost(95); 11376 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11377 ins_encode %{ 11378 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11379 %} 11380 ins_pipe( pipe_slow ); 11381%} 11382 11383instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11384 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11385 match(Set dst (MoveL2D src)); 11386 effect(DEF dst, USE src); 11387 11388 ins_cost(95); 11389 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11390 ins_encode %{ 11391 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11392 %} 11393 ins_pipe( pipe_slow ); 11394%} 11395 11396instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11397 predicate(UseSSE>=2); 11398 match(Set dst (MoveL2D src)); 11399 effect(TEMP dst, USE src, TEMP tmp); 11400 ins_cost(85); 11401 format %{ "MOVD $dst,$src.lo\n\t" 11402 "MOVD $tmp,$src.hi\n\t" 11403 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11404 ins_encode %{ 11405 __ movdl($dst$$XMMRegister, $src$$Register); 11406 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11407 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11408 %} 11409 ins_pipe( pipe_slow ); 11410%} 11411 11412 11413// ======================================================================= 11414// fast clearing of an array 11415instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11416 predicate(!UseFastStosb); 11417 match(Set dummy (ClearArray cnt base)); 11418 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11419 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11420 "SHL ECX,1\t# Convert doublewords to words\n\t" 11421 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11422 ins_encode %{ 11423 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11424 %} 11425 ins_pipe( pipe_slow ); 11426%} 11427 11428instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11429 predicate(UseFastStosb); 11430 match(Set dummy (ClearArray cnt base)); 11431 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11432 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11433 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11434 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11435 ins_encode %{ 11436 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11437 %} 11438 ins_pipe( pipe_slow ); 11439%} 11440 11441instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11442 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11443 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11444 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11445 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11446 11447 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11448 ins_encode %{ 11449 __ string_compare($str1$$Register, $str2$$Register, 11450 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11451 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11452 %} 11453 ins_pipe( pipe_slow ); 11454%} 11455 11456instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11457 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11458 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11459 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11460 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11461 11462 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11463 ins_encode %{ 11464 __ string_compare($str1$$Register, $str2$$Register, 11465 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11466 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11467 %} 11468 ins_pipe( pipe_slow ); 11469%} 11470 11471instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11472 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11473 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11474 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11475 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11476 11477 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11478 ins_encode %{ 11479 __ string_compare($str1$$Register, $str2$$Register, 11480 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11481 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11482 %} 11483 ins_pipe( pipe_slow ); 11484%} 11485 11486instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11487 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11488 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11489 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11490 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11491 11492 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11493 ins_encode %{ 11494 __ string_compare($str2$$Register, $str1$$Register, 11495 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11496 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11497 %} 11498 ins_pipe( pipe_slow ); 11499%} 11500 11501// fast string equals 11502instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11503 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11504 match(Set result (StrEquals (Binary str1 str2) cnt)); 11505 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11506 11507 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11508 ins_encode %{ 11509 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11510 $cnt$$Register, $result$$Register, $tmp3$$Register, 11511 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11512 %} 11513 11514 ins_pipe( pipe_slow ); 11515%} 11516 11517// fast search of substring with known size. 11518instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11519 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11520 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11521 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11522 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11523 11524 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11525 ins_encode %{ 11526 int icnt2 = (int)$int_cnt2$$constant; 11527 if (icnt2 >= 16) { 11528 // IndexOf for constant substrings with size >= 16 elements 11529 // which don't need to be loaded through stack. 11530 __ string_indexofC8($str1$$Register, $str2$$Register, 11531 $cnt1$$Register, $cnt2$$Register, 11532 icnt2, $result$$Register, 11533 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11534 } else { 11535 // Small strings are loaded through stack if they cross page boundary. 11536 __ string_indexof($str1$$Register, $str2$$Register, 11537 $cnt1$$Register, $cnt2$$Register, 11538 icnt2, $result$$Register, 11539 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11540 } 11541 %} 11542 ins_pipe( pipe_slow ); 11543%} 11544 11545// fast search of substring with known size. 11546instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11547 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11548 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11549 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11550 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11551 11552 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11553 ins_encode %{ 11554 int icnt2 = (int)$int_cnt2$$constant; 11555 if (icnt2 >= 8) { 11556 // IndexOf for constant substrings with size >= 8 elements 11557 // which don't need to be loaded through stack. 11558 __ string_indexofC8($str1$$Register, $str2$$Register, 11559 $cnt1$$Register, $cnt2$$Register, 11560 icnt2, $result$$Register, 11561 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11562 } else { 11563 // Small strings are loaded through stack if they cross page boundary. 11564 __ string_indexof($str1$$Register, $str2$$Register, 11565 $cnt1$$Register, $cnt2$$Register, 11566 icnt2, $result$$Register, 11567 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11568 } 11569 %} 11570 ins_pipe( pipe_slow ); 11571%} 11572 11573// fast search of substring with known size. 11574instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11575 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11576 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11577 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11578 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11579 11580 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11581 ins_encode %{ 11582 int icnt2 = (int)$int_cnt2$$constant; 11583 if (icnt2 >= 8) { 11584 // IndexOf for constant substrings with size >= 8 elements 11585 // which don't need to be loaded through stack. 11586 __ string_indexofC8($str1$$Register, $str2$$Register, 11587 $cnt1$$Register, $cnt2$$Register, 11588 icnt2, $result$$Register, 11589 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11590 } else { 11591 // Small strings are loaded through stack if they cross page boundary. 11592 __ string_indexof($str1$$Register, $str2$$Register, 11593 $cnt1$$Register, $cnt2$$Register, 11594 icnt2, $result$$Register, 11595 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11596 } 11597 %} 11598 ins_pipe( pipe_slow ); 11599%} 11600 11601instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11602 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11603 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11604 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11605 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11606 11607 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11608 ins_encode %{ 11609 __ string_indexof($str1$$Register, $str2$$Register, 11610 $cnt1$$Register, $cnt2$$Register, 11611 (-1), $result$$Register, 11612 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11613 %} 11614 ins_pipe( pipe_slow ); 11615%} 11616 11617instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11618 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11619 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11620 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11621 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11622 11623 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11624 ins_encode %{ 11625 __ string_indexof($str1$$Register, $str2$$Register, 11626 $cnt1$$Register, $cnt2$$Register, 11627 (-1), $result$$Register, 11628 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11629 %} 11630 ins_pipe( pipe_slow ); 11631%} 11632 11633instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11634 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11635 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11636 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11637 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11638 11639 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11640 ins_encode %{ 11641 __ string_indexof($str1$$Register, $str2$$Register, 11642 $cnt1$$Register, $cnt2$$Register, 11643 (-1), $result$$Register, 11644 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11645 %} 11646 ins_pipe( pipe_slow ); 11647%} 11648 11649instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11650 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11651 predicate(UseSSE42Intrinsics); 11652 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11653 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11654 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11655 ins_encode %{ 11656 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11657 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11658 %} 11659 ins_pipe( pipe_slow ); 11660%} 11661 11662// fast array equals 11663instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11664 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11665%{ 11666 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11667 match(Set result (AryEq ary1 ary2)); 11668 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11669 //ins_cost(300); 11670 11671 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11672 ins_encode %{ 11673 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11674 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11675 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11676 %} 11677 ins_pipe( pipe_slow ); 11678%} 11679 11680instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11681 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11682%{ 11683 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11684 match(Set result (AryEq ary1 ary2)); 11685 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11686 //ins_cost(300); 11687 11688 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11689 ins_encode %{ 11690 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11691 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11692 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11693 %} 11694 ins_pipe( pipe_slow ); 11695%} 11696 11697instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11698 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11699%{ 11700 match(Set result (HasNegatives ary1 len)); 11701 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11702 11703 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11704 ins_encode %{ 11705 __ has_negatives($ary1$$Register, $len$$Register, 11706 $result$$Register, $tmp3$$Register, 11707 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11708 %} 11709 ins_pipe( pipe_slow ); 11710%} 11711 11712// fast char[] to byte[] compression 11713instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11714 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11715 match(Set result (StrCompressedCopy src (Binary dst len))); 11716 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11717 11718 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11719 ins_encode %{ 11720 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11721 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11722 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11723 %} 11724 ins_pipe( pipe_slow ); 11725%} 11726 11727// fast byte[] to char[] inflation 11728instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11729 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11730 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11731 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11732 11733 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11734 ins_encode %{ 11735 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11736 $tmp1$$XMMRegister, $tmp2$$Register); 11737 %} 11738 ins_pipe( pipe_slow ); 11739%} 11740 11741// encode char[] to byte[] in ISO_8859_1 11742instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11743 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11744 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11745 match(Set result (EncodeISOArray src (Binary dst len))); 11746 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11747 11748 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11749 ins_encode %{ 11750 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11751 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11752 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11753 %} 11754 ins_pipe( pipe_slow ); 11755%} 11756 11757 11758//----------Control Flow Instructions------------------------------------------ 11759// Signed compare Instructions 11760instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11761 match(Set cr (CmpI op1 op2)); 11762 effect( DEF cr, USE op1, USE op2 ); 11763 format %{ "CMP $op1,$op2" %} 11764 opcode(0x3B); /* Opcode 3B /r */ 11765 ins_encode( OpcP, RegReg( op1, op2) ); 11766 ins_pipe( ialu_cr_reg_reg ); 11767%} 11768 11769instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11770 match(Set cr (CmpI op1 op2)); 11771 effect( DEF cr, USE op1 ); 11772 format %{ "CMP $op1,$op2" %} 11773 opcode(0x81,0x07); /* Opcode 81 /7 */ 11774 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11775 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11776 ins_pipe( ialu_cr_reg_imm ); 11777%} 11778 11779// Cisc-spilled version of cmpI_eReg 11780instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11781 match(Set cr (CmpI op1 (LoadI op2))); 11782 11783 format %{ "CMP $op1,$op2" %} 11784 ins_cost(500); 11785 opcode(0x3B); /* Opcode 3B /r */ 11786 ins_encode( OpcP, RegMem( op1, op2) ); 11787 ins_pipe( ialu_cr_reg_mem ); 11788%} 11789 11790instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11791 match(Set cr (CmpI src zero)); 11792 effect( DEF cr, USE src ); 11793 11794 format %{ "TEST $src,$src" %} 11795 opcode(0x85); 11796 ins_encode( OpcP, RegReg( src, src ) ); 11797 ins_pipe( ialu_cr_reg_imm ); 11798%} 11799 11800instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11801 match(Set cr (CmpI (AndI src con) zero)); 11802 11803 format %{ "TEST $src,$con" %} 11804 opcode(0xF7,0x00); 11805 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11806 ins_pipe( ialu_cr_reg_imm ); 11807%} 11808 11809instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11810 match(Set cr (CmpI (AndI src mem) zero)); 11811 11812 format %{ "TEST $src,$mem" %} 11813 opcode(0x85); 11814 ins_encode( OpcP, RegMem( src, mem ) ); 11815 ins_pipe( ialu_cr_reg_mem ); 11816%} 11817 11818// Unsigned compare Instructions; really, same as signed except they 11819// produce an eFlagsRegU instead of eFlagsReg. 11820instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11821 match(Set cr (CmpU op1 op2)); 11822 11823 format %{ "CMPu $op1,$op2" %} 11824 opcode(0x3B); /* Opcode 3B /r */ 11825 ins_encode( OpcP, RegReg( op1, op2) ); 11826 ins_pipe( ialu_cr_reg_reg ); 11827%} 11828 11829instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11830 match(Set cr (CmpU op1 op2)); 11831 11832 format %{ "CMPu $op1,$op2" %} 11833 opcode(0x81,0x07); /* Opcode 81 /7 */ 11834 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11835 ins_pipe( ialu_cr_reg_imm ); 11836%} 11837 11838// // Cisc-spilled version of cmpU_eReg 11839instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11840 match(Set cr (CmpU op1 (LoadI op2))); 11841 11842 format %{ "CMPu $op1,$op2" %} 11843 ins_cost(500); 11844 opcode(0x3B); /* Opcode 3B /r */ 11845 ins_encode( OpcP, RegMem( op1, op2) ); 11846 ins_pipe( ialu_cr_reg_mem ); 11847%} 11848 11849// // Cisc-spilled version of cmpU_eReg 11850//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11851// match(Set cr (CmpU (LoadI op1) op2)); 11852// 11853// format %{ "CMPu $op1,$op2" %} 11854// ins_cost(500); 11855// opcode(0x39); /* Opcode 39 /r */ 11856// ins_encode( OpcP, RegMem( op1, op2) ); 11857//%} 11858 11859instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11860 match(Set cr (CmpU src zero)); 11861 11862 format %{ "TESTu $src,$src" %} 11863 opcode(0x85); 11864 ins_encode( OpcP, RegReg( src, src ) ); 11865 ins_pipe( ialu_cr_reg_imm ); 11866%} 11867 11868// Unsigned pointer compare Instructions 11869instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11870 match(Set cr (CmpP op1 op2)); 11871 11872 format %{ "CMPu $op1,$op2" %} 11873 opcode(0x3B); /* Opcode 3B /r */ 11874 ins_encode( OpcP, RegReg( op1, op2) ); 11875 ins_pipe( ialu_cr_reg_reg ); 11876%} 11877 11878instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11879 match(Set cr (CmpP op1 op2)); 11880 11881 format %{ "CMPu $op1,$op2" %} 11882 opcode(0x81,0x07); /* Opcode 81 /7 */ 11883 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11884 ins_pipe( ialu_cr_reg_imm ); 11885%} 11886 11887// // Cisc-spilled version of cmpP_eReg 11888instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11889 match(Set cr (CmpP op1 (LoadP op2))); 11890 11891 format %{ "CMPu $op1,$op2" %} 11892 ins_cost(500); 11893 opcode(0x3B); /* Opcode 3B /r */ 11894 ins_encode( OpcP, RegMem( op1, op2) ); 11895 ins_pipe( ialu_cr_reg_mem ); 11896%} 11897 11898// // Cisc-spilled version of cmpP_eReg 11899//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11900// match(Set cr (CmpP (LoadP op1) op2)); 11901// 11902// format %{ "CMPu $op1,$op2" %} 11903// ins_cost(500); 11904// opcode(0x39); /* Opcode 39 /r */ 11905// ins_encode( OpcP, RegMem( op1, op2) ); 11906//%} 11907 11908// Compare raw pointer (used in out-of-heap check). 11909// Only works because non-oop pointers must be raw pointers 11910// and raw pointers have no anti-dependencies. 11911instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11912 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11913 match(Set cr (CmpP op1 (LoadP op2))); 11914 11915 format %{ "CMPu $op1,$op2" %} 11916 opcode(0x3B); /* Opcode 3B /r */ 11917 ins_encode( OpcP, RegMem( op1, op2) ); 11918 ins_pipe( ialu_cr_reg_mem ); 11919%} 11920 11921// 11922// This will generate a signed flags result. This should be ok 11923// since any compare to a zero should be eq/neq. 11924instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11925 match(Set cr (CmpP src zero)); 11926 11927 format %{ "TEST $src,$src" %} 11928 opcode(0x85); 11929 ins_encode( OpcP, RegReg( src, src ) ); 11930 ins_pipe( ialu_cr_reg_imm ); 11931%} 11932 11933// Cisc-spilled version of testP_reg 11934// This will generate a signed flags result. This should be ok 11935// since any compare to a zero should be eq/neq. 11936instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11937 match(Set cr (CmpP (LoadP op) zero)); 11938 11939 format %{ "TEST $op,0xFFFFFFFF" %} 11940 ins_cost(500); 11941 opcode(0xF7); /* Opcode F7 /0 */ 11942 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11943 ins_pipe( ialu_cr_reg_imm ); 11944%} 11945 11946// Yanked all unsigned pointer compare operations. 11947// Pointer compares are done with CmpP which is already unsigned. 11948 11949//----------Max and Min-------------------------------------------------------- 11950// Min Instructions 11951//// 11952// *** Min and Max using the conditional move are slower than the 11953// *** branch version on a Pentium III. 11954// // Conditional move for min 11955//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11956// effect( USE_DEF op2, USE op1, USE cr ); 11957// format %{ "CMOVlt $op2,$op1\t! min" %} 11958// opcode(0x4C,0x0F); 11959// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11960// ins_pipe( pipe_cmov_reg ); 11961//%} 11962// 11963//// Min Register with Register (P6 version) 11964//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11965// predicate(VM_Version::supports_cmov() ); 11966// match(Set op2 (MinI op1 op2)); 11967// ins_cost(200); 11968// expand %{ 11969// eFlagsReg cr; 11970// compI_eReg(cr,op1,op2); 11971// cmovI_reg_lt(op2,op1,cr); 11972// %} 11973//%} 11974 11975// Min Register with Register (generic version) 11976instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11977 match(Set dst (MinI dst src)); 11978 effect(KILL flags); 11979 ins_cost(300); 11980 11981 format %{ "MIN $dst,$src" %} 11982 opcode(0xCC); 11983 ins_encode( min_enc(dst,src) ); 11984 ins_pipe( pipe_slow ); 11985%} 11986 11987// Max Register with Register 11988// *** Min and Max using the conditional move are slower than the 11989// *** branch version on a Pentium III. 11990// // Conditional move for max 11991//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11992// effect( USE_DEF op2, USE op1, USE cr ); 11993// format %{ "CMOVgt $op2,$op1\t! max" %} 11994// opcode(0x4F,0x0F); 11995// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11996// ins_pipe( pipe_cmov_reg ); 11997//%} 11998// 11999// // Max Register with Register (P6 version) 12000//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12001// predicate(VM_Version::supports_cmov() ); 12002// match(Set op2 (MaxI op1 op2)); 12003// ins_cost(200); 12004// expand %{ 12005// eFlagsReg cr; 12006// compI_eReg(cr,op1,op2); 12007// cmovI_reg_gt(op2,op1,cr); 12008// %} 12009//%} 12010 12011// Max Register with Register (generic version) 12012instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12013 match(Set dst (MaxI dst src)); 12014 effect(KILL flags); 12015 ins_cost(300); 12016 12017 format %{ "MAX $dst,$src" %} 12018 opcode(0xCC); 12019 ins_encode( max_enc(dst,src) ); 12020 ins_pipe( pipe_slow ); 12021%} 12022 12023// ============================================================================ 12024// Counted Loop limit node which represents exact final iterator value. 12025// Note: the resulting value should fit into integer range since 12026// counted loops have limit check on overflow. 12027instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12028 match(Set limit (LoopLimit (Binary init limit) stride)); 12029 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12030 ins_cost(300); 12031 12032 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12033 ins_encode %{ 12034 int strd = (int)$stride$$constant; 12035 assert(strd != 1 && strd != -1, "sanity"); 12036 int m1 = (strd > 0) ? 1 : -1; 12037 // Convert limit to long (EAX:EDX) 12038 __ cdql(); 12039 // Convert init to long (init:tmp) 12040 __ movl($tmp$$Register, $init$$Register); 12041 __ sarl($tmp$$Register, 31); 12042 // $limit - $init 12043 __ subl($limit$$Register, $init$$Register); 12044 __ sbbl($limit_hi$$Register, $tmp$$Register); 12045 // + ($stride - 1) 12046 if (strd > 0) { 12047 __ addl($limit$$Register, (strd - 1)); 12048 __ adcl($limit_hi$$Register, 0); 12049 __ movl($tmp$$Register, strd); 12050 } else { 12051 __ addl($limit$$Register, (strd + 1)); 12052 __ adcl($limit_hi$$Register, -1); 12053 __ lneg($limit_hi$$Register, $limit$$Register); 12054 __ movl($tmp$$Register, -strd); 12055 } 12056 // signed devision: (EAX:EDX) / pos_stride 12057 __ idivl($tmp$$Register); 12058 if (strd < 0) { 12059 // restore sign 12060 __ negl($tmp$$Register); 12061 } 12062 // (EAX) * stride 12063 __ mull($tmp$$Register); 12064 // + init (ignore upper bits) 12065 __ addl($limit$$Register, $init$$Register); 12066 %} 12067 ins_pipe( pipe_slow ); 12068%} 12069 12070// ============================================================================ 12071// Branch Instructions 12072// Jump Table 12073instruct jumpXtnd(rRegI switch_val) %{ 12074 match(Jump switch_val); 12075 ins_cost(350); 12076 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12077 ins_encode %{ 12078 // Jump to Address(table_base + switch_reg) 12079 Address index(noreg, $switch_val$$Register, Address::times_1); 12080 __ jump(ArrayAddress($constantaddress, index)); 12081 %} 12082 ins_pipe(pipe_jmp); 12083%} 12084 12085// Jump Direct - Label defines a relative address from JMP+1 12086instruct jmpDir(label labl) %{ 12087 match(Goto); 12088 effect(USE labl); 12089 12090 ins_cost(300); 12091 format %{ "JMP $labl" %} 12092 size(5); 12093 ins_encode %{ 12094 Label* L = $labl$$label; 12095 __ jmp(*L, false); // Always long jump 12096 %} 12097 ins_pipe( pipe_jmp ); 12098%} 12099 12100// Jump Direct Conditional - Label defines a relative address from Jcc+1 12101instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12102 match(If cop cr); 12103 effect(USE labl); 12104 12105 ins_cost(300); 12106 format %{ "J$cop $labl" %} 12107 size(6); 12108 ins_encode %{ 12109 Label* L = $labl$$label; 12110 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12111 %} 12112 ins_pipe( pipe_jcc ); 12113%} 12114 12115// Jump Direct Conditional - Label defines a relative address from Jcc+1 12116instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12117 match(CountedLoopEnd cop cr); 12118 effect(USE labl); 12119 12120 ins_cost(300); 12121 format %{ "J$cop $labl\t# Loop end" %} 12122 size(6); 12123 ins_encode %{ 12124 Label* L = $labl$$label; 12125 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12126 %} 12127 ins_pipe( pipe_jcc ); 12128%} 12129 12130// Jump Direct Conditional - Label defines a relative address from Jcc+1 12131instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12132 match(CountedLoopEnd cop cmp); 12133 effect(USE labl); 12134 12135 ins_cost(300); 12136 format %{ "J$cop,u $labl\t# Loop end" %} 12137 size(6); 12138 ins_encode %{ 12139 Label* L = $labl$$label; 12140 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12141 %} 12142 ins_pipe( pipe_jcc ); 12143%} 12144 12145instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12146 match(CountedLoopEnd cop cmp); 12147 effect(USE labl); 12148 12149 ins_cost(200); 12150 format %{ "J$cop,u $labl\t# Loop end" %} 12151 size(6); 12152 ins_encode %{ 12153 Label* L = $labl$$label; 12154 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12155 %} 12156 ins_pipe( pipe_jcc ); 12157%} 12158 12159// Jump Direct Conditional - using unsigned comparison 12160instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12161 match(If cop cmp); 12162 effect(USE labl); 12163 12164 ins_cost(300); 12165 format %{ "J$cop,u $labl" %} 12166 size(6); 12167 ins_encode %{ 12168 Label* L = $labl$$label; 12169 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12170 %} 12171 ins_pipe(pipe_jcc); 12172%} 12173 12174instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12175 match(If cop cmp); 12176 effect(USE labl); 12177 12178 ins_cost(200); 12179 format %{ "J$cop,u $labl" %} 12180 size(6); 12181 ins_encode %{ 12182 Label* L = $labl$$label; 12183 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12184 %} 12185 ins_pipe(pipe_jcc); 12186%} 12187 12188instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12189 match(If cop cmp); 12190 effect(USE labl); 12191 12192 ins_cost(200); 12193 format %{ $$template 12194 if ($cop$$cmpcode == Assembler::notEqual) { 12195 $$emit$$"JP,u $labl\n\t" 12196 $$emit$$"J$cop,u $labl" 12197 } else { 12198 $$emit$$"JP,u done\n\t" 12199 $$emit$$"J$cop,u $labl\n\t" 12200 $$emit$$"done:" 12201 } 12202 %} 12203 ins_encode %{ 12204 Label* l = $labl$$label; 12205 if ($cop$$cmpcode == Assembler::notEqual) { 12206 __ jcc(Assembler::parity, *l, false); 12207 __ jcc(Assembler::notEqual, *l, false); 12208 } else if ($cop$$cmpcode == Assembler::equal) { 12209 Label done; 12210 __ jccb(Assembler::parity, done); 12211 __ jcc(Assembler::equal, *l, false); 12212 __ bind(done); 12213 } else { 12214 ShouldNotReachHere(); 12215 } 12216 %} 12217 ins_pipe(pipe_jcc); 12218%} 12219 12220// ============================================================================ 12221// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12222// array for an instance of the superklass. Set a hidden internal cache on a 12223// hit (cache is checked with exposed code in gen_subtype_check()). Return 12224// NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12225instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12226 match(Set result (PartialSubtypeCheck sub super)); 12227 effect( KILL rcx, KILL cr ); 12228 12229 ins_cost(1100); // slightly larger than the next version 12230 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12231 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12232 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12233 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12234 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12235 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12236 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12237 "miss:\t" %} 12238 12239 opcode(0x1); // Force a XOR of EDI 12240 ins_encode( enc_PartialSubtypeCheck() ); 12241 ins_pipe( pipe_slow ); 12242%} 12243 12244instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12245 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12246 effect( KILL rcx, KILL result ); 12247 12248 ins_cost(1000); 12249 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12250 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12251 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12252 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12253 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12254 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12255 "miss:\t" %} 12256 12257 opcode(0x0); // No need to XOR EDI 12258 ins_encode( enc_PartialSubtypeCheck() ); 12259 ins_pipe( pipe_slow ); 12260%} 12261 12262// ============================================================================ 12263// Branch Instructions -- short offset versions 12264// 12265// These instructions are used to replace jumps of a long offset (the default 12266// match) with jumps of a shorter offset. These instructions are all tagged 12267// with the ins_short_branch attribute, which causes the ADLC to suppress the 12268// match rules in general matching. Instead, the ADLC generates a conversion 12269// method in the MachNode which can be used to do in-place replacement of the 12270// long variant with the shorter variant. The compiler will determine if a 12271// branch can be taken by the is_short_branch_offset() predicate in the machine 12272// specific code section of the file. 12273 12274// Jump Direct - Label defines a relative address from JMP+1 12275instruct jmpDir_short(label labl) %{ 12276 match(Goto); 12277 effect(USE labl); 12278 12279 ins_cost(300); 12280 format %{ "JMP,s $labl" %} 12281 size(2); 12282 ins_encode %{ 12283 Label* L = $labl$$label; 12284 __ jmpb(*L); 12285 %} 12286 ins_pipe( pipe_jmp ); 12287 ins_short_branch(1); 12288%} 12289 12290// Jump Direct Conditional - Label defines a relative address from Jcc+1 12291instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12292 match(If cop cr); 12293 effect(USE labl); 12294 12295 ins_cost(300); 12296 format %{ "J$cop,s $labl" %} 12297 size(2); 12298 ins_encode %{ 12299 Label* L = $labl$$label; 12300 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12301 %} 12302 ins_pipe( pipe_jcc ); 12303 ins_short_branch(1); 12304%} 12305 12306// Jump Direct Conditional - Label defines a relative address from Jcc+1 12307instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12308 match(CountedLoopEnd cop cr); 12309 effect(USE labl); 12310 12311 ins_cost(300); 12312 format %{ "J$cop,s $labl\t# Loop end" %} 12313 size(2); 12314 ins_encode %{ 12315 Label* L = $labl$$label; 12316 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12317 %} 12318 ins_pipe( pipe_jcc ); 12319 ins_short_branch(1); 12320%} 12321 12322// Jump Direct Conditional - Label defines a relative address from Jcc+1 12323instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12324 match(CountedLoopEnd cop cmp); 12325 effect(USE labl); 12326 12327 ins_cost(300); 12328 format %{ "J$cop,us $labl\t# Loop end" %} 12329 size(2); 12330 ins_encode %{ 12331 Label* L = $labl$$label; 12332 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12333 %} 12334 ins_pipe( pipe_jcc ); 12335 ins_short_branch(1); 12336%} 12337 12338instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12339 match(CountedLoopEnd cop cmp); 12340 effect(USE labl); 12341 12342 ins_cost(300); 12343 format %{ "J$cop,us $labl\t# Loop end" %} 12344 size(2); 12345 ins_encode %{ 12346 Label* L = $labl$$label; 12347 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12348 %} 12349 ins_pipe( pipe_jcc ); 12350 ins_short_branch(1); 12351%} 12352 12353// Jump Direct Conditional - using unsigned comparison 12354instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12355 match(If cop cmp); 12356 effect(USE labl); 12357 12358 ins_cost(300); 12359 format %{ "J$cop,us $labl" %} 12360 size(2); 12361 ins_encode %{ 12362 Label* L = $labl$$label; 12363 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12364 %} 12365 ins_pipe( pipe_jcc ); 12366 ins_short_branch(1); 12367%} 12368 12369instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12370 match(If cop cmp); 12371 effect(USE labl); 12372 12373 ins_cost(300); 12374 format %{ "J$cop,us $labl" %} 12375 size(2); 12376 ins_encode %{ 12377 Label* L = $labl$$label; 12378 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12379 %} 12380 ins_pipe( pipe_jcc ); 12381 ins_short_branch(1); 12382%} 12383 12384instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12385 match(If cop cmp); 12386 effect(USE labl); 12387 12388 ins_cost(300); 12389 format %{ $$template 12390 if ($cop$$cmpcode == Assembler::notEqual) { 12391 $$emit$$"JP,u,s $labl\n\t" 12392 $$emit$$"J$cop,u,s $labl" 12393 } else { 12394 $$emit$$"JP,u,s done\n\t" 12395 $$emit$$"J$cop,u,s $labl\n\t" 12396 $$emit$$"done:" 12397 } 12398 %} 12399 size(4); 12400 ins_encode %{ 12401 Label* l = $labl$$label; 12402 if ($cop$$cmpcode == Assembler::notEqual) { 12403 __ jccb(Assembler::parity, *l); 12404 __ jccb(Assembler::notEqual, *l); 12405 } else if ($cop$$cmpcode == Assembler::equal) { 12406 Label done; 12407 __ jccb(Assembler::parity, done); 12408 __ jccb(Assembler::equal, *l); 12409 __ bind(done); 12410 } else { 12411 ShouldNotReachHere(); 12412 } 12413 %} 12414 ins_pipe(pipe_jcc); 12415 ins_short_branch(1); 12416%} 12417 12418// ============================================================================ 12419// Long Compare 12420// 12421// Currently we hold longs in 2 registers. Comparing such values efficiently 12422// is tricky. The flavor of compare used depends on whether we are testing 12423// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12424// The GE test is the negated LT test. The LE test can be had by commuting 12425// the operands (yielding a GE test) and then negating; negate again for the 12426// GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12427// NE test is negated from that. 12428 12429// Due to a shortcoming in the ADLC, it mixes up expressions like: 12430// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12431// difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12432// are collapsed internally in the ADLC's dfa-gen code. The match for 12433// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12434// foo match ends up with the wrong leaf. One fix is to not match both 12435// reg-reg and reg-zero forms of long-compare. This is unfortunate because 12436// both forms beat the trinary form of long-compare and both are very useful 12437// on Intel which has so few registers. 12438 12439// Manifest a CmpL result in an integer register. Very painful. 12440// This is the test to avoid. 12441instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12442 match(Set dst (CmpL3 src1 src2)); 12443 effect( KILL flags ); 12444 ins_cost(1000); 12445 format %{ "XOR $dst,$dst\n\t" 12446 "CMP $src1.hi,$src2.hi\n\t" 12447 "JLT,s m_one\n\t" 12448 "JGT,s p_one\n\t" 12449 "CMP $src1.lo,$src2.lo\n\t" 12450 "JB,s m_one\n\t" 12451 "JEQ,s done\n" 12452 "p_one:\tINC $dst\n\t" 12453 "JMP,s done\n" 12454 "m_one:\tDEC $dst\n" 12455 "done:" %} 12456 ins_encode %{ 12457 Label p_one, m_one, done; 12458 __ xorptr($dst$$Register, $dst$$Register); 12459 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12460 __ jccb(Assembler::less, m_one); 12461 __ jccb(Assembler::greater, p_one); 12462 __ cmpl($src1$$Register, $src2$$Register); 12463 __ jccb(Assembler::below, m_one); 12464 __ jccb(Assembler::equal, done); 12465 __ bind(p_one); 12466 __ incrementl($dst$$Register); 12467 __ jmpb(done); 12468 __ bind(m_one); 12469 __ decrementl($dst$$Register); 12470 __ bind(done); 12471 %} 12472 ins_pipe( pipe_slow ); 12473%} 12474 12475//====== 12476// Manifest a CmpL result in the normal flags. Only good for LT or GE 12477// compares. Can be used for LE or GT compares by reversing arguments. 12478// NOT GOOD FOR EQ/NE tests. 12479instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12480 match( Set flags (CmpL src zero )); 12481 ins_cost(100); 12482 format %{ "TEST $src.hi,$src.hi" %} 12483 opcode(0x85); 12484 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12485 ins_pipe( ialu_cr_reg_reg ); 12486%} 12487 12488// Manifest a CmpL result in the normal flags. Only good for LT or GE 12489// compares. Can be used for LE or GT compares by reversing arguments. 12490// NOT GOOD FOR EQ/NE tests. 12491instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12492 match( Set flags (CmpL src1 src2 )); 12493 effect( TEMP tmp ); 12494 ins_cost(300); 12495 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12496 "MOV $tmp,$src1.hi\n\t" 12497 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12498 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12499 ins_pipe( ialu_cr_reg_reg ); 12500%} 12501 12502// Long compares reg < zero/req OR reg >= zero/req. 12503// Just a wrapper for a normal branch, plus the predicate test. 12504instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12505 match(If cmp flags); 12506 effect(USE labl); 12507 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12508 expand %{ 12509 jmpCon(cmp,flags,labl); // JLT or JGE... 12510 %} 12511%} 12512 12513// Compare 2 longs and CMOVE longs. 12514instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12515 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12516 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12517 ins_cost(400); 12518 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12519 "CMOV$cmp $dst.hi,$src.hi" %} 12520 opcode(0x0F,0x40); 12521 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12522 ins_pipe( pipe_cmov_reg_long ); 12523%} 12524 12525instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12526 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12527 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12528 ins_cost(500); 12529 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12530 "CMOV$cmp $dst.hi,$src.hi" %} 12531 opcode(0x0F,0x40); 12532 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12533 ins_pipe( pipe_cmov_reg_long ); 12534%} 12535 12536// Compare 2 longs and CMOVE ints. 12537instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12538 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12539 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12540 ins_cost(200); 12541 format %{ "CMOV$cmp $dst,$src" %} 12542 opcode(0x0F,0x40); 12543 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12544 ins_pipe( pipe_cmov_reg ); 12545%} 12546 12547instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12548 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12549 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12550 ins_cost(250); 12551 format %{ "CMOV$cmp $dst,$src" %} 12552 opcode(0x0F,0x40); 12553 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12554 ins_pipe( pipe_cmov_mem ); 12555%} 12556 12557// Compare 2 longs and CMOVE ints. 12558instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12559 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12560 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12561 ins_cost(200); 12562 format %{ "CMOV$cmp $dst,$src" %} 12563 opcode(0x0F,0x40); 12564 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12565 ins_pipe( pipe_cmov_reg ); 12566%} 12567 12568// Compare 2 longs and CMOVE doubles 12569instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12570 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12571 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12572 ins_cost(200); 12573 expand %{ 12574 fcmovDPR_regS(cmp,flags,dst,src); 12575 %} 12576%} 12577 12578// Compare 2 longs and CMOVE doubles 12579instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12580 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12581 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12582 ins_cost(200); 12583 expand %{ 12584 fcmovD_regS(cmp,flags,dst,src); 12585 %} 12586%} 12587 12588instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12589 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12590 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12591 ins_cost(200); 12592 expand %{ 12593 fcmovFPR_regS(cmp,flags,dst,src); 12594 %} 12595%} 12596 12597instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12598 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12599 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12600 ins_cost(200); 12601 expand %{ 12602 fcmovF_regS(cmp,flags,dst,src); 12603 %} 12604%} 12605 12606//====== 12607// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12608instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12609 match( Set flags (CmpL src zero )); 12610 effect(TEMP tmp); 12611 ins_cost(200); 12612 format %{ "MOV $tmp,$src.lo\n\t" 12613 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12614 ins_encode( long_cmp_flags0( src, tmp ) ); 12615 ins_pipe( ialu_reg_reg_long ); 12616%} 12617 12618// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12619instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12620 match( Set flags (CmpL src1 src2 )); 12621 ins_cost(200+300); 12622 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12623 "JNE,s skip\n\t" 12624 "CMP $src1.hi,$src2.hi\n\t" 12625 "skip:\t" %} 12626 ins_encode( long_cmp_flags1( src1, src2 ) ); 12627 ins_pipe( ialu_cr_reg_reg ); 12628%} 12629 12630// Long compare reg == zero/reg OR reg != zero/reg 12631// Just a wrapper for a normal branch, plus the predicate test. 12632instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12633 match(If cmp flags); 12634 effect(USE labl); 12635 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12636 expand %{ 12637 jmpCon(cmp,flags,labl); // JEQ or JNE... 12638 %} 12639%} 12640 12641// Compare 2 longs and CMOVE longs. 12642instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12643 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12644 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12645 ins_cost(400); 12646 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12647 "CMOV$cmp $dst.hi,$src.hi" %} 12648 opcode(0x0F,0x40); 12649 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12650 ins_pipe( pipe_cmov_reg_long ); 12651%} 12652 12653instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12654 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12655 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12656 ins_cost(500); 12657 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12658 "CMOV$cmp $dst.hi,$src.hi" %} 12659 opcode(0x0F,0x40); 12660 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12661 ins_pipe( pipe_cmov_reg_long ); 12662%} 12663 12664// Compare 2 longs and CMOVE ints. 12665instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12666 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12667 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12668 ins_cost(200); 12669 format %{ "CMOV$cmp $dst,$src" %} 12670 opcode(0x0F,0x40); 12671 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12672 ins_pipe( pipe_cmov_reg ); 12673%} 12674 12675instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12676 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12677 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12678 ins_cost(250); 12679 format %{ "CMOV$cmp $dst,$src" %} 12680 opcode(0x0F,0x40); 12681 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12682 ins_pipe( pipe_cmov_mem ); 12683%} 12684 12685// Compare 2 longs and CMOVE ints. 12686instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12687 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12688 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12689 ins_cost(200); 12690 format %{ "CMOV$cmp $dst,$src" %} 12691 opcode(0x0F,0x40); 12692 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12693 ins_pipe( pipe_cmov_reg ); 12694%} 12695 12696// Compare 2 longs and CMOVE doubles 12697instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12698 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12699 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12700 ins_cost(200); 12701 expand %{ 12702 fcmovDPR_regS(cmp,flags,dst,src); 12703 %} 12704%} 12705 12706// Compare 2 longs and CMOVE doubles 12707instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12708 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12709 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12710 ins_cost(200); 12711 expand %{ 12712 fcmovD_regS(cmp,flags,dst,src); 12713 %} 12714%} 12715 12716instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12717 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12718 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12719 ins_cost(200); 12720 expand %{ 12721 fcmovFPR_regS(cmp,flags,dst,src); 12722 %} 12723%} 12724 12725instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12726 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12727 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12728 ins_cost(200); 12729 expand %{ 12730 fcmovF_regS(cmp,flags,dst,src); 12731 %} 12732%} 12733 12734//====== 12735// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12736// Same as cmpL_reg_flags_LEGT except must negate src 12737instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12738 match( Set flags (CmpL src zero )); 12739 effect( TEMP tmp ); 12740 ins_cost(300); 12741 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12742 "CMP $tmp,$src.lo\n\t" 12743 "SBB $tmp,$src.hi\n\t" %} 12744 ins_encode( long_cmp_flags3(src, tmp) ); 12745 ins_pipe( ialu_reg_reg_long ); 12746%} 12747 12748// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12749// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12750// requires a commuted test to get the same result. 12751instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12752 match( Set flags (CmpL src1 src2 )); 12753 effect( TEMP tmp ); 12754 ins_cost(300); 12755 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12756 "MOV $tmp,$src2.hi\n\t" 12757 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12758 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12759 ins_pipe( ialu_cr_reg_reg ); 12760%} 12761 12762// Long compares reg < zero/req OR reg >= zero/req. 12763// Just a wrapper for a normal branch, plus the predicate test 12764instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12765 match(If cmp flags); 12766 effect(USE labl); 12767 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12768 ins_cost(300); 12769 expand %{ 12770 jmpCon(cmp,flags,labl); // JGT or JLE... 12771 %} 12772%} 12773 12774// Compare 2 longs and CMOVE longs. 12775instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12776 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12777 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12778 ins_cost(400); 12779 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12780 "CMOV$cmp $dst.hi,$src.hi" %} 12781 opcode(0x0F,0x40); 12782 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12783 ins_pipe( pipe_cmov_reg_long ); 12784%} 12785 12786instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12787 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12788 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12789 ins_cost(500); 12790 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12791 "CMOV$cmp $dst.hi,$src.hi+4" %} 12792 opcode(0x0F,0x40); 12793 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12794 ins_pipe( pipe_cmov_reg_long ); 12795%} 12796 12797// Compare 2 longs and CMOVE ints. 12798instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12799 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12800 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12801 ins_cost(200); 12802 format %{ "CMOV$cmp $dst,$src" %} 12803 opcode(0x0F,0x40); 12804 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12805 ins_pipe( pipe_cmov_reg ); 12806%} 12807 12808instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12809 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12810 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12811 ins_cost(250); 12812 format %{ "CMOV$cmp $dst,$src" %} 12813 opcode(0x0F,0x40); 12814 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12815 ins_pipe( pipe_cmov_mem ); 12816%} 12817 12818// Compare 2 longs and CMOVE ptrs. 12819instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12820 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12821 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12822 ins_cost(200); 12823 format %{ "CMOV$cmp $dst,$src" %} 12824 opcode(0x0F,0x40); 12825 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12826 ins_pipe( pipe_cmov_reg ); 12827%} 12828 12829// Compare 2 longs and CMOVE doubles 12830instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12831 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12832 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12833 ins_cost(200); 12834 expand %{ 12835 fcmovDPR_regS(cmp,flags,dst,src); 12836 %} 12837%} 12838 12839// Compare 2 longs and CMOVE doubles 12840instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12841 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12842 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12843 ins_cost(200); 12844 expand %{ 12845 fcmovD_regS(cmp,flags,dst,src); 12846 %} 12847%} 12848 12849instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12850 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12851 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12852 ins_cost(200); 12853 expand %{ 12854 fcmovFPR_regS(cmp,flags,dst,src); 12855 %} 12856%} 12857 12858 12859instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12860 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12861 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12862 ins_cost(200); 12863 expand %{ 12864 fcmovF_regS(cmp,flags,dst,src); 12865 %} 12866%} 12867 12868 12869// ============================================================================ 12870// Procedure Call/Return Instructions 12871// Call Java Static Instruction 12872// Note: If this code changes, the corresponding ret_addr_offset() and 12873// compute_padding() functions will have to be adjusted. 12874instruct CallStaticJavaDirect(method meth) %{ 12875 match(CallStaticJava); 12876 effect(USE meth); 12877 12878 ins_cost(300); 12879 format %{ "CALL,static " %} 12880 opcode(0xE8); /* E8 cd */ 12881 ins_encode( pre_call_resets, 12882 Java_Static_Call( meth ), 12883 call_epilog, 12884 post_call_FPU ); 12885 ins_pipe( pipe_slow ); 12886 ins_alignment(4); 12887%} 12888 12889// Call Java Dynamic Instruction 12890// Note: If this code changes, the corresponding ret_addr_offset() and 12891// compute_padding() functions will have to be adjusted. 12892instruct CallDynamicJavaDirect(method meth) %{ 12893 match(CallDynamicJava); 12894 effect(USE meth); 12895 12896 ins_cost(300); 12897 format %{ "MOV EAX,(oop)-1\n\t" 12898 "CALL,dynamic" %} 12899 opcode(0xE8); /* E8 cd */ 12900 ins_encode( pre_call_resets, 12901 Java_Dynamic_Call( meth ), 12902 call_epilog, 12903 post_call_FPU ); 12904 ins_pipe( pipe_slow ); 12905 ins_alignment(4); 12906%} 12907 12908// Call Runtime Instruction 12909instruct CallRuntimeDirect(method meth) %{ 12910 match(CallRuntime ); 12911 effect(USE meth); 12912 12913 ins_cost(300); 12914 format %{ "CALL,runtime " %} 12915 opcode(0xE8); /* E8 cd */ 12916 // Use FFREEs to clear entries in float stack 12917 ins_encode( pre_call_resets, 12918 FFree_Float_Stack_All, 12919 Java_To_Runtime( meth ), 12920 post_call_FPU ); 12921 ins_pipe( pipe_slow ); 12922%} 12923 12924// Call runtime without safepoint 12925instruct CallLeafDirect(method meth) %{ 12926 match(CallLeaf); 12927 effect(USE meth); 12928 12929 ins_cost(300); 12930 format %{ "CALL_LEAF,runtime " %} 12931 opcode(0xE8); /* E8 cd */ 12932 ins_encode( pre_call_resets, 12933 FFree_Float_Stack_All, 12934 Java_To_Runtime( meth ), 12935 Verify_FPU_For_Leaf, post_call_FPU ); 12936 ins_pipe( pipe_slow ); 12937%} 12938 12939instruct CallLeafNoFPDirect(method meth) %{ 12940 match(CallLeafNoFP); 12941 effect(USE meth); 12942 12943 ins_cost(300); 12944 format %{ "CALL_LEAF_NOFP,runtime " %} 12945 opcode(0xE8); /* E8 cd */ 12946 ins_encode(Java_To_Runtime(meth)); 12947 ins_pipe( pipe_slow ); 12948%} 12949 12950 12951// Return Instruction 12952// Remove the return address & jump to it. 12953instruct Ret() %{ 12954 match(Return); 12955 format %{ "RET" %} 12956 opcode(0xC3); 12957 ins_encode(OpcP); 12958 ins_pipe( pipe_jmp ); 12959%} 12960 12961// Tail Call; Jump from runtime stub to Java code. 12962// Also known as an 'interprocedural jump'. 12963// Target of jump will eventually return to caller. 12964// TailJump below removes the return address. 12965instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12966 match(TailCall jump_target method_oop ); 12967 ins_cost(300); 12968 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12969 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12970 ins_encode( OpcP, RegOpc(jump_target) ); 12971 ins_pipe( pipe_jmp ); 12972%} 12973 12974 12975// Tail Jump; remove the return address; jump to target. 12976// TailCall above leaves the return address around. 12977instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12978 match( TailJump jump_target ex_oop ); 12979 ins_cost(300); 12980 format %{ "POP EDX\t# pop return address into dummy\n\t" 12981 "JMP $jump_target " %} 12982 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12983 ins_encode( enc_pop_rdx, 12984 OpcP, RegOpc(jump_target) ); 12985 ins_pipe( pipe_jmp ); 12986%} 12987 12988// Create exception oop: created by stack-crawling runtime code. 12989// Created exception is now available to this handler, and is setup 12990// just prior to jumping to this handler. No code emitted. 12991instruct CreateException( eAXRegP ex_oop ) 12992%{ 12993 match(Set ex_oop (CreateEx)); 12994 12995 size(0); 12996 // use the following format syntax 12997 format %{ "# exception oop is in EAX; no code emitted" %} 12998 ins_encode(); 12999 ins_pipe( empty ); 13000%} 13001 13002 13003// Rethrow exception: 13004// The exception oop will come in the first argument position. 13005// Then JUMP (not call) to the rethrow stub code. 13006instruct RethrowException() 13007%{ 13008 match(Rethrow); 13009 13010 // use the following format syntax 13011 format %{ "JMP rethrow_stub" %} 13012 ins_encode(enc_rethrow); 13013 ins_pipe( pipe_jmp ); 13014%} 13015 13016// inlined locking and unlocking 13017 13018instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13019 predicate(Compile::current()->use_rtm()); 13020 match(Set cr (FastLock object box)); 13021 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13022 ins_cost(300); 13023 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13024 ins_encode %{ 13025 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13026 $scr$$Register, $cx1$$Register, $cx2$$Register, 13027 _counters, _rtm_counters, _stack_rtm_counters, 13028 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13029 true, ra_->C->profile_rtm()); 13030 %} 13031 ins_pipe(pipe_slow); 13032%} 13033 13034instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13035 predicate(!Compile::current()->use_rtm()); 13036 match(Set cr (FastLock object box)); 13037 effect(TEMP tmp, TEMP scr, USE_KILL box); 13038 ins_cost(300); 13039 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13040 ins_encode %{ 13041 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13042 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13043 %} 13044 ins_pipe(pipe_slow); 13045%} 13046 13047instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13048 match(Set cr (FastUnlock object box)); 13049 effect(TEMP tmp, USE_KILL box); 13050 ins_cost(300); 13051 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13052 ins_encode %{ 13053 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13054 %} 13055 ins_pipe(pipe_slow); 13056%} 13057 13058 13059 13060// ============================================================================ 13061// Safepoint Instruction 13062instruct safePoint_poll(eFlagsReg cr) %{ 13063 match(SafePoint); 13064 effect(KILL cr); 13065 13066 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13067 // On SPARC that might be acceptable as we can generate the address with 13068 // just a sethi, saving an or. By polling at offset 0 we can end up 13069 // putting additional pressure on the index-0 in the D$. Because of 13070 // alignment (just like the situation at hand) the lower indices tend 13071 // to see more traffic. It'd be better to change the polling address 13072 // to offset 0 of the last $line in the polling page. 13073 13074 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13075 ins_cost(125); 13076 size(6) ; 13077 ins_encode( Safepoint_Poll() ); 13078 ins_pipe( ialu_reg_mem ); 13079%} 13080 13081 13082// ============================================================================ 13083// This name is KNOWN by the ADLC and cannot be changed. 13084// The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13085// for this guy. 13086instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13087 match(Set dst (ThreadLocal)); 13088 effect(DEF dst, KILL cr); 13089 13090 format %{ "MOV $dst, Thread::current()" %} 13091 ins_encode %{ 13092 Register dstReg = as_Register($dst$$reg); 13093 __ get_thread(dstReg); 13094 %} 13095 ins_pipe( ialu_reg_fat ); 13096%} 13097 13098 13099 13100//----------PEEPHOLE RULES----------------------------------------------------- 13101// These must follow all instruction definitions as they use the names 13102// defined in the instructions definitions. 13103// 13104// peepmatch ( root_instr_name [preceding_instruction]* ); 13105// 13106// peepconstraint %{ 13107// (instruction_number.operand_name relational_op instruction_number.operand_name 13108// [, ...] ); 13109// // instruction numbers are zero-based using left to right order in peepmatch 13110// 13111// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13112// // provide an instruction_number.operand_name for each operand that appears 13113// // in the replacement instruction's match rule 13114// 13115// ---------VM FLAGS--------------------------------------------------------- 13116// 13117// All peephole optimizations can be turned off using -XX:-OptoPeephole 13118// 13119// Each peephole rule is given an identifying number starting with zero and 13120// increasing by one in the order seen by the parser. An individual peephole 13121// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13122// on the command-line. 13123// 13124// ---------CURRENT LIMITATIONS---------------------------------------------- 13125// 13126// Only match adjacent instructions in same basic block 13127// Only equality constraints 13128// Only constraints between operands, not (0.dest_reg == EAX_enc) 13129// Only one replacement instruction 13130// 13131// ---------EXAMPLE---------------------------------------------------------- 13132// 13133// // pertinent parts of existing instructions in architecture description 13134// instruct movI(rRegI dst, rRegI src) %{ 13135// match(Set dst (CopyI src)); 13136// %} 13137// 13138// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13139// match(Set dst (AddI dst src)); 13140// effect(KILL cr); 13141// %} 13142// 13143// // Change (inc mov) to lea 13144// peephole %{ 13145// // increment preceeded by register-register move 13146// peepmatch ( incI_eReg movI ); 13147// // require that the destination register of the increment 13148// // match the destination register of the move 13149// peepconstraint ( 0.dst == 1.dst ); 13150// // construct a replacement instruction that sets 13151// // the destination to ( move's source register + one ) 13152// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13153// %} 13154// 13155// Implementation no longer uses movX instructions since 13156// machine-independent system no longer uses CopyX nodes. 13157// 13158// peephole %{ 13159// peepmatch ( incI_eReg movI ); 13160// peepconstraint ( 0.dst == 1.dst ); 13161// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13162// %} 13163// 13164// peephole %{ 13165// peepmatch ( decI_eReg movI ); 13166// peepconstraint ( 0.dst == 1.dst ); 13167// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13168// %} 13169// 13170// peephole %{ 13171// peepmatch ( addI_eReg_imm movI ); 13172// peepconstraint ( 0.dst == 1.dst ); 13173// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13174// %} 13175// 13176// peephole %{ 13177// peepmatch ( addP_eReg_imm movP ); 13178// peepconstraint ( 0.dst == 1.dst ); 13179// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13180// %} 13181 13182// // Change load of spilled value to only a spill 13183// instruct storeI(memory mem, rRegI src) %{ 13184// match(Set mem (StoreI mem src)); 13185// %} 13186// 13187// instruct loadI(rRegI dst, memory mem) %{ 13188// match(Set dst (LoadI mem)); 13189// %} 13190// 13191peephole %{ 13192 peepmatch ( loadI storeI ); 13193 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13194 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13195%} 13196 13197//----------SMARTSPILL RULES--------------------------------------------------- 13198// These must follow all instruction definitions as they use the names 13199// defined in the instructions definitions. 13200