x86_32.ad revision 9737:e286c9ccd58d
1// 2// Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// General Registers 63// Previously set EBX, ESI, and EDI as save-on-entry for java code 64// Turn off SOE in java-code due to frequent use of uncommon-traps. 65// Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77// Float registers. We treat TOS/FPR0 special. It is invisible to the 78// allocator, and only shows up in the encodings. 79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81// Ok so here's the trick FPR1 is really st(0) except in the midst 82// of emission of assembly for a machnode. During the emission the fpu stack 83// is pushed making FPR1 == st(1) temporarily. However at any safepoint 84// the stack will not have this element so FPR1 == st(0) from the 85// oopMap viewpoint. This same weirdness with numbering causes 86// instruction encoding to have to play games with the register 87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88// where it does flt->flt moves to see an example 89// 90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104// 105// Empty fill registers, which are never used, but supply alignment to xmm regs 106// 107reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116// Specify priority of register selection within phases of register 117// allocation. Highest priority is first. A useful heuristic is to 118// give registers a low priority when they are required by machine 119// instructions, like EAX and EDX. Registers which are used as 120// pairs must fall on an even boundary (witness the FPR#L's in this list). 121// For the Intel integer registers, the equivalent Long pairs are 122// EDX:EAX, EBX:ECX, and EDI:EBP. 123alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130//----------Architecture Description Register Classes-------------------------- 131// Several register classes are automatically defined based upon information in 132// this architecture description. 133// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137// 138// Class for no registers (empty set). 139reg_class no_reg(); 140 141// Class for all registers 142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143// Class for all registers (excluding EBP) 144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145// Dynamic register class that selects at runtime between register classes 146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150// Class for general registers 151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152// Class for general registers (excluding EBP). 153// This register class can be used for implicit null checks on win95. 154// It is also safe for use by tailjumps (we don't want to allocate in ebp). 155// Used also if the PreserveFramePointer flag is true. 156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157// Dynamic register class that selects between int_reg and int_reg_no_ebp. 158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160// Class of "X" registers 161reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163// Class of registers that can appear in an address with no offset. 164// EBP and ESP require an extra instruction byte for zero offset. 165// Used in fast-unlock 166reg_class p_reg(EDX, EDI, ESI, EBX); 167 168// Class for general registers excluding ECX 169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170// Class for general registers excluding ECX (and EBP) 171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175// Class for general registers excluding EAX 176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178// Class for general registers excluding EAX and EBX. 179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180// Class for general registers excluding EAX and EBX (and EBP) 181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185// Class of EAX (for multiply and divide operations) 186reg_class eax_reg(EAX); 187 188// Class of EBX (for atomic add) 189reg_class ebx_reg(EBX); 190 191// Class of ECX (for shift and JCXZ operations and cmpLTMask) 192reg_class ecx_reg(ECX); 193 194// Class of EDX (for multiply and divide operations) 195reg_class edx_reg(EDX); 196 197// Class of EDI (for synchronization) 198reg_class edi_reg(EDI); 199 200// Class of ESI (for synchronization) 201reg_class esi_reg(ESI); 202 203// Singleton class for stack pointer 204reg_class sp_reg(ESP); 205 206// Singleton class for instruction pointer 207// reg_class ip_reg(EIP); 208 209// Class of integer register pairs 210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211// Class of integer register pairs (excluding EBP and EDI); 212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213// Dynamic register class that selects between long_reg and long_reg_no_ebp. 214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216// Class of integer register pairs that aligns with calling convention 217reg_class eadx_reg( EAX,EDX ); 218reg_class ebcx_reg( ECX,EBX ); 219 220// Not AX or DX, used in divides 221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222// Not AX or DX (and neither EBP), used in divides 223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227// Floating point registers. Notice FPR0 is not a choice. 228// FPR0 is not ever allocated; we use clever encodings to fake 229// a 2-address instructions out of Intels FP stack. 230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236reg_class fp_flt_reg0( FPR1L ); 237reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242%} 243 244 245//----------SOURCE BLOCK------------------------------------------------------- 246// This is a block of C++ code which provides values, functions, and 247// definitions necessary in the rest of the architecture description 248source_hpp %{ 249// Must be visible to the DFA in dfa_x86_32.cpp 250extern bool is_operand_hi32_zero(Node* n); 251%} 252 253source %{ 254#define RELOC_IMM32 Assembler::imm_operand 255#define RELOC_DISP32 Assembler::disp32_operand 256 257#define __ _masm. 258 259// How to find the high register of a Long pair, given the low register 260#define HIGH_FROM_LOW(x) ((x)+2) 261 262// These masks are used to provide 128-bit aligned bitmasks to the XMM 263// instructions, to allow sign-masking or sign-bit flipping. They allow 264// fast versions of NegF/NegD and AbsF/AbsD. 265 266// Note: 'double' and 'long long' have 32-bits alignment on x86. 267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275} 276 277// Buffer for 128-bits masks used by SSE instructions. 278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280// Static initialization during VM startup. 281static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286// Offset hacking within calls. 287static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297} 298 299// !!!!! Special hack to get all type of calls to specify the byte offset 300// from the start of the call to the point where the return address 301// will point. 302int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304} 305 306int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308} 309 310static int sizeof_FFree_Float_Stack_All = -1; 311 312int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315} 316 317// Indicate if the safepoint node needs the polling page as an input. 318// Since x86 does have absolute addressing, it doesn't. 319bool SafePointNode::needs_polling_address_input() { 320 return false; 321} 322 323// 324// Compute padding required for nodes which need alignment 325// 326 327// The address of the call instruction needs to be 4-byte aligned to 328// ensure that it does not span a cache line so that it can be patched. 329int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333} 334 335// The address of the call instruction needs to be 4-byte aligned to 336// ensure that it does not span a cache line so that it can be patched. 337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342} 343 344// EMIT_RM() 345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348} 349 350// EMIT_CC() 351void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354} 355 356// EMIT_OPCODE() 357void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359} 360 361// EMIT_OPCODE() w/ relocation information 362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365} 366 367// EMIT_D8() 368void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370} 371 372// EMIT_D16() 373void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375} 376 377// EMIT_D32() 378void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380} 381 382// emit 32 bit value and construct relocation entry from relocInfo::relocType 383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387} 388 389// emit 32 bit value and construct relocation entry from RelocationHolder 390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392#ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396#endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399} 400 401// Access stack slot for load or store 402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413} 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480} 481 482 483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490} 491 492void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510} 511 512void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520} 521 522 523//============================================================================= 524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528} 529 530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533} 534 535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537} 538 539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541} 542 543#ifndef PRODUCT 544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546} 547#endif 548 549 550//============================================================================= 551#ifndef PRODUCT 552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604#ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609#endif 610 st->cr(); 611} 612#endif 613 614 615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632} 633 634uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636} 637 638int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640} 641 642//============================================================================= 643#ifndef PRODUCT 644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668} 669#endif 670 671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714} 715 716uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737} 738 739int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741} 742 743const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745} 746 747int MachEpilogNode::safepoint_offset() const { return 0; } 748 749//============================================================================= 750 751enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765} 766 767static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772#ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781#endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785} 786 787// Helper for XMM registers. Extra opcode bits, limited syntax. 788static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 if (reg_lo+1 == reg_hi) { // double move? 799 if (is_load) { 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } else { 805 if (is_load) { 806 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 807 } else { 808 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 809 } 810 } 811#ifndef PRODUCT 812 } else if (!do_size) { 813 if (size != 0) st->print("\n\t"); 814 if (reg_lo+1 == reg_hi) { // double move? 815 if (is_load) st->print("%s %s,[ESP + #%d]", 816 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 817 Matcher::regName[reg_lo], offset); 818 else st->print("MOVSD [ESP + #%d],%s", 819 offset, Matcher::regName[reg_lo]); 820 } else { 821 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 822 Matcher::regName[reg_lo], offset); 823 else st->print("MOVSS [ESP + #%d],%s", 824 offset, Matcher::regName[reg_lo]); 825 } 826#endif 827 } 828 bool is_single_byte = false; 829 if ((UseAVX > 2) && (offset != 0)) { 830 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 831 } 832 int offset_size = 0; 833 if (UseAVX > 2 ) { 834 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 835 } else { 836 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 837 } 838 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 840 return size+5+offset_size; 841} 842 843 844static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 845 int src_hi, int dst_hi, int size, outputStream* st ) { 846 if (cbuf) { 847 MacroAssembler _masm(cbuf); 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 850 as_XMMRegister(Matcher::_regEncode[src_lo])); 851 } else { 852 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } 855#ifndef PRODUCT 856 } else if (!do_size) { 857 if (size != 0) st->print("\n\t"); 858 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 859 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 860 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } else { 865 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 866 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } else { 868 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } 870 } 871#endif 872 } 873 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 874 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 875 int sz = (UseAVX > 2) ? 6 : 4; 876 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 877 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 878 return size + sz; 879} 880 881static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 MacroAssembler _masm(cbuf); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888#ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891#endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894} 895 896 897static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904#ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907#endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910} 911 912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916#ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920#endif 921 } 922 return size+2; 923} 924 925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931#ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935#endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953} 954 955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 src_offset += 4; 974 dst_offset += 4; 975 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 case Op_VecX: 980 case Op_VecY: 981 case Op_VecZ: 982 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 983 break; 984 default: 985 ShouldNotReachHere(); 986 } 987 if (cbuf) { 988 MacroAssembler _masm(cbuf); 989 int offset = __ offset(); 990 switch (ireg) { 991 case Op_VecS: 992 __ pushl(Address(rsp, src_offset)); 993 __ popl (Address(rsp, dst_offset)); 994 break; 995 case Op_VecD: 996 __ pushl(Address(rsp, src_offset)); 997 __ popl (Address(rsp, dst_offset)); 998 __ pushl(Address(rsp, src_offset+4)); 999 __ popl (Address(rsp, dst_offset+4)); 1000 break; 1001 case Op_VecX: 1002 __ movdqu(Address(rsp, -16), xmm0); 1003 __ movdqu(xmm0, Address(rsp, src_offset)); 1004 __ movdqu(Address(rsp, dst_offset), xmm0); 1005 __ movdqu(xmm0, Address(rsp, -16)); 1006 break; 1007 case Op_VecY: 1008 __ vmovdqu(Address(rsp, -32), xmm0); 1009 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1010 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1011 __ vmovdqu(xmm0, Address(rsp, -32)); 1012 case Op_VecZ: 1013 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1014 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1015 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1016 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1017 break; 1018 default: 1019 ShouldNotReachHere(); 1020 } 1021 int size = __ offset() - offset; 1022 assert(size == calc_size, "incorrect size calculattion"); 1023 return size; 1024#ifndef PRODUCT 1025 } else if (!do_size) { 1026 switch (ireg) { 1027 case Op_VecS: 1028 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1029 "popl [rsp + #%d]", 1030 src_offset, dst_offset); 1031 break; 1032 case Op_VecD: 1033 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1034 "popq [rsp + #%d]\n\t" 1035 "pushl [rsp + #%d]\n\t" 1036 "popq [rsp + #%d]", 1037 src_offset, dst_offset, src_offset+4, dst_offset+4); 1038 break; 1039 case Op_VecX: 1040 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1041 "movdqu xmm0, [rsp + #%d]\n\t" 1042 "movdqu [rsp + #%d], xmm0\n\t" 1043 "movdqu xmm0, [rsp - #16]", 1044 src_offset, dst_offset); 1045 break; 1046 case Op_VecY: 1047 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1048 "vmovdqu xmm0, [rsp + #%d]\n\t" 1049 "vmovdqu [rsp + #%d], xmm0\n\t" 1050 "vmovdqu xmm0, [rsp - #32]", 1051 src_offset, dst_offset); 1052 case Op_VecZ: 1053 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1054 "vmovdqu xmm0, [rsp + #%d]\n\t" 1055 "vmovdqu [rsp + #%d], xmm0\n\t" 1056 "vmovdqu xmm0, [rsp - #64]", 1057 src_offset, dst_offset); 1058 break; 1059 default: 1060 ShouldNotReachHere(); 1061 } 1062#endif 1063 } 1064 return calc_size; 1065} 1066 1067uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1068 // Get registers to move 1069 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1070 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1071 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1072 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1073 1074 enum RC src_second_rc = rc_class(src_second); 1075 enum RC src_first_rc = rc_class(src_first); 1076 enum RC dst_second_rc = rc_class(dst_second); 1077 enum RC dst_first_rc = rc_class(dst_first); 1078 1079 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1080 1081 // Generate spill code! 1082 int size = 0; 1083 1084 if( src_first == dst_first && src_second == dst_second ) 1085 return size; // Self copy, no move 1086 1087 if (bottom_type()->isa_vect() != NULL) { 1088 uint ireg = ideal_reg(); 1089 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1090 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1091 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1092 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1093 // mem -> mem 1094 int src_offset = ra_->reg2offset(src_first); 1095 int dst_offset = ra_->reg2offset(dst_first); 1096 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1097 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1098 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1099 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1100 int stack_offset = ra_->reg2offset(dst_first); 1101 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1102 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1103 int stack_offset = ra_->reg2offset(src_first); 1104 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1105 } else { 1106 ShouldNotReachHere(); 1107 } 1108 } 1109 1110 // -------------------------------------- 1111 // Check for mem-mem move. push/pop to move. 1112 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1113 if( src_second == dst_first ) { // overlapping stack copy ranges 1114 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1115 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1116 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1117 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1118 } 1119 // move low bits 1120 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1121 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1122 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1125 } 1126 return size; 1127 } 1128 1129 // -------------------------------------- 1130 // Check for integer reg-reg copy 1131 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1132 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1133 1134 // Check for integer store 1135 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1137 1138 // Check for integer load 1139 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1140 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1141 1142 // Check for integer reg-xmm reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1144 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1145 "no 64 bit integer-float reg moves" ); 1146 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1147 } 1148 // -------------------------------------- 1149 // Check for float reg-reg copy 1150 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1151 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1152 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1153 if( cbuf ) { 1154 1155 // Note the mucking with the register encode to compensate for the 0/1 1156 // indexing issue mentioned in a comment in the reg_def sections 1157 // for FPR registers many lines above here. 1158 1159 if( src_first != FPR1L_num ) { 1160 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1161 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1162 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1163 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1164 } else { 1165 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1166 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1167 } 1168#ifndef PRODUCT 1169 } else if( !do_size ) { 1170 if( size != 0 ) st->print("\n\t"); 1171 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1172 else st->print( "FST %s", Matcher::regName[dst_first]); 1173#endif 1174 } 1175 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1176 } 1177 1178 // Check for float store 1179 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1180 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1181 } 1182 1183 // Check for float load 1184 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1185 int offset = ra_->reg2offset(src_first); 1186 const char *op_str; 1187 int op; 1188 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1189 op_str = "FLD_D"; 1190 op = 0xDD; 1191 } else { // 32-bit load 1192 op_str = "FLD_S"; 1193 op = 0xD9; 1194 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1195 } 1196 if( cbuf ) { 1197 emit_opcode (*cbuf, op ); 1198 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1199 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1200 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1201#ifndef PRODUCT 1202 } else if( !do_size ) { 1203 if( size != 0 ) st->print("\n\t"); 1204 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1205#endif 1206 } 1207 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1208 return size + 3+offset_size+2; 1209 } 1210 1211 // Check for xmm reg-reg copy 1212 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1213 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1214 (src_first+1 == src_second && dst_first+1 == dst_second), 1215 "no non-adjacent float-moves" ); 1216 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1217 } 1218 1219 // Check for xmm reg-integer reg copy 1220 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1221 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1222 "no 64 bit float-integer reg moves" ); 1223 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1224 } 1225 1226 // Check for xmm store 1227 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1228 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1229 } 1230 1231 // Check for float xmm load 1232 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1233 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1234 } 1235 1236 // Copy from float reg to xmm reg 1237 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1238 // copy to the top of stack from floating point reg 1239 // and use LEA to preserve flags 1240 if( cbuf ) { 1241 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1242 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1243 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1244 emit_d8(*cbuf,0xF8); 1245#ifndef PRODUCT 1246 } else if( !do_size ) { 1247 if( size != 0 ) st->print("\n\t"); 1248 st->print("LEA ESP,[ESP-8]"); 1249#endif 1250 } 1251 size += 4; 1252 1253 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1254 1255 // Copy from the temp memory to the xmm reg. 1256 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1257 1258 if( cbuf ) { 1259 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1260 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1261 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1262 emit_d8(*cbuf,0x08); 1263#ifndef PRODUCT 1264 } else if( !do_size ) { 1265 if( size != 0 ) st->print("\n\t"); 1266 st->print("LEA ESP,[ESP+8]"); 1267#endif 1268 } 1269 size += 4; 1270 return size; 1271 } 1272 1273 assert( size > 0, "missed a case" ); 1274 1275 // -------------------------------------------------------------------- 1276 // Check for second bits still needing moving. 1277 if( src_second == dst_second ) 1278 return size; // Self copy; no move 1279 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1280 1281 // Check for second word int-int move 1282 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1283 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1284 1285 // Check for second word integer store 1286 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1287 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1288 1289 // Check for second word integer load 1290 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1291 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1292 1293 1294 Unimplemented(); 1295 return 0; // Mute compiler 1296} 1297 1298#ifndef PRODUCT 1299void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1300 implementation( NULL, ra_, false, st ); 1301} 1302#endif 1303 1304void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1305 implementation( &cbuf, ra_, false, NULL ); 1306} 1307 1308uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1309 return implementation( NULL, ra_, true, NULL ); 1310} 1311 1312 1313//============================================================================= 1314#ifndef PRODUCT 1315void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1316 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1317 int reg = ra_->get_reg_first(this); 1318 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1319} 1320#endif 1321 1322void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1323 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1324 int reg = ra_->get_encode(this); 1325 if( offset >= 128 ) { 1326 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1327 emit_rm(cbuf, 0x2, reg, 0x04); 1328 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1329 emit_d32(cbuf, offset); 1330 } 1331 else { 1332 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1333 emit_rm(cbuf, 0x1, reg, 0x04); 1334 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1335 emit_d8(cbuf, offset); 1336 } 1337} 1338 1339uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1340 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1341 if( offset >= 128 ) { 1342 return 7; 1343 } 1344 else { 1345 return 4; 1346 } 1347} 1348 1349//============================================================================= 1350#ifndef PRODUCT 1351void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1352 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1353 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1354 st->print_cr("\tNOP"); 1355 st->print_cr("\tNOP"); 1356 if( !OptoBreakpoint ) 1357 st->print_cr("\tNOP"); 1358} 1359#endif 1360 1361void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1362 MacroAssembler masm(&cbuf); 1363#ifdef ASSERT 1364 uint insts_size = cbuf.insts_size(); 1365#endif 1366 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1367 masm.jump_cc(Assembler::notEqual, 1368 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1369 /* WARNING these NOPs are critical so that verified entry point is properly 1370 aligned for patching by NativeJump::patch_verified_entry() */ 1371 int nops_cnt = 2; 1372 if( !OptoBreakpoint ) // Leave space for int3 1373 nops_cnt += 1; 1374 masm.nop(nops_cnt); 1375 1376 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1377} 1378 1379uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1380 return OptoBreakpoint ? 11 : 12; 1381} 1382 1383 1384//============================================================================= 1385 1386int Matcher::regnum_to_fpu_offset(int regnum) { 1387 return regnum - 32; // The FP registers are in the second chunk 1388} 1389 1390// This is UltraSparc specific, true just means we have fast l2f conversion 1391const bool Matcher::convL2FSupported(void) { 1392 return true; 1393} 1394 1395// Is this branch offset short enough that a short branch can be used? 1396// 1397// NOTE: If the platform does not provide any short branch variants, then 1398// this method should return false for offset 0. 1399bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1400 // The passed offset is relative to address of the branch. 1401 // On 86 a branch displacement is calculated relative to address 1402 // of a next instruction. 1403 offset -= br_size; 1404 1405 // the short version of jmpConUCF2 contains multiple branches, 1406 // making the reach slightly less 1407 if (rule == jmpConUCF2_rule) 1408 return (-126 <= offset && offset <= 125); 1409 return (-128 <= offset && offset <= 127); 1410} 1411 1412const bool Matcher::isSimpleConstant64(jlong value) { 1413 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1414 return false; 1415} 1416 1417// The ecx parameter to rep stos for the ClearArray node is in dwords. 1418const bool Matcher::init_array_count_is_in_bytes = false; 1419 1420// Threshold size for cleararray. 1421const int Matcher::init_array_short_size = 8 * BytesPerLong; 1422 1423// Needs 2 CMOV's for longs. 1424const int Matcher::long_cmove_cost() { return 1; } 1425 1426// No CMOVF/CMOVD with SSE/SSE2 1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429// Does the CPU require late expand (see block.cpp for description of late expand)? 1430const bool Matcher::require_postalloc_expand = false; 1431 1432// Should the Matcher clone shifts on addressing modes, expecting them to 1433// be subsumed into complex addressing expressions or compute them into 1434// registers? True for Intel but false for most RISCs 1435const bool Matcher::clone_shift_expressions = true; 1436 1437// Do we need to mask the count passed to shift instructions or does 1438// the cpu only look at the lower 5/6 bits anyway? 1439const bool Matcher::need_masked_shift_count = false; 1440 1441bool Matcher::narrow_oop_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444} 1445 1446bool Matcher::narrow_klass_use_complex_address() { 1447 ShouldNotCallThis(); 1448 return true; 1449} 1450 1451 1452// Is it better to copy float constants, or load them directly from memory? 1453// Intel can load a float constant from a direct address, requiring no 1454// extra registers. Most RISCs will have to materialize an address into a 1455// register first, so they would do better to copy the constant from stack. 1456const bool Matcher::rematerialize_float_constants = true; 1457 1458// If CPU can load and store mis-aligned doubles directly then no fixup is 1459// needed. Else we split the double into 2 integer pieces and move it 1460// piece-by-piece. Only happens when passing doubles into C code as the 1461// Java calling convention forces doubles to be aligned. 1462const bool Matcher::misaligned_doubles_ok = true; 1463 1464 1465void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1466 // Get the memory operand from the node 1467 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1468 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1469 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1470 uint opcnt = 1; // First operand 1471 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1472 while( idx >= skipped+num_edges ) { 1473 skipped += num_edges; 1474 opcnt++; // Bump operand count 1475 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1476 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1477 } 1478 1479 MachOper *memory = node->_opnds[opcnt]; 1480 MachOper *new_memory = NULL; 1481 switch (memory->opcode()) { 1482 case DIRECT: 1483 case INDOFFSET32X: 1484 // No transformation necessary. 1485 return; 1486 case INDIRECT: 1487 new_memory = new indirect_win95_safeOper( ); 1488 break; 1489 case INDOFFSET8: 1490 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1491 break; 1492 case INDOFFSET32: 1493 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1494 break; 1495 case INDINDEXOFFSET: 1496 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1497 break; 1498 case INDINDEXSCALE: 1499 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1500 break; 1501 case INDINDEXSCALEOFFSET: 1502 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1503 break; 1504 case LOAD_LONG_INDIRECT: 1505 case LOAD_LONG_INDOFFSET32: 1506 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1507 return; 1508 default: 1509 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1510 return; 1511 } 1512 node->_opnds[opcnt] = new_memory; 1513} 1514 1515// Advertise here if the CPU requires explicit rounding operations 1516// to implement the UseStrictFP mode. 1517const bool Matcher::strict_fp_requires_explicit_rounding = true; 1518 1519// Are floats conerted to double when stored to stack during deoptimization? 1520// On x32 it is stored with convertion only when FPU is used for floats. 1521bool Matcher::float_in_double() { return (UseSSE == 0); } 1522 1523// Do ints take an entire long register or just half? 1524const bool Matcher::int_in_long = false; 1525 1526// Return whether or not this register is ever used as an argument. This 1527// function is used on startup to build the trampoline stubs in generateOptoStub. 1528// Registers not mentioned will be killed by the VM call in the trampoline, and 1529// arguments in those registers not be available to the callee. 1530bool Matcher::can_be_java_arg( int reg ) { 1531 if( reg == ECX_num || reg == EDX_num ) return true; 1532 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1533 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1534 return false; 1535} 1536 1537bool Matcher::is_spillable_arg( int reg ) { 1538 return can_be_java_arg(reg); 1539} 1540 1541bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1542 // Use hardware integer DIV instruction when 1543 // it is faster than a code which use multiply. 1544 // Only when constant divisor fits into 32 bit 1545 // (min_jint is excluded to get only correct 1546 // positive 32 bit values from negative). 1547 return VM_Version::has_fast_idiv() && 1548 (divisor == (int)divisor && divisor != min_jint); 1549} 1550 1551// Register for DIVI projection of divmodI 1552RegMask Matcher::divI_proj_mask() { 1553 return EAX_REG_mask(); 1554} 1555 1556// Register for MODI projection of divmodI 1557RegMask Matcher::modI_proj_mask() { 1558 return EDX_REG_mask(); 1559} 1560 1561// Register for DIVL projection of divmodL 1562RegMask Matcher::divL_proj_mask() { 1563 ShouldNotReachHere(); 1564 return RegMask(); 1565} 1566 1567// Register for MODL projection of divmodL 1568RegMask Matcher::modL_proj_mask() { 1569 ShouldNotReachHere(); 1570 return RegMask(); 1571} 1572 1573const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1574 return NO_REG_mask(); 1575} 1576 1577// Returns true if the high 32 bits of the value is known to be zero. 1578bool is_operand_hi32_zero(Node* n) { 1579 int opc = n->Opcode(); 1580 if (opc == Op_AndL) { 1581 Node* o2 = n->in(2); 1582 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1583 return true; 1584 } 1585 } 1586 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 return false; 1590} 1591 1592%} 1593 1594//----------ENCODING BLOCK----------------------------------------------------- 1595// This block specifies the encoding classes used by the compiler to output 1596// byte streams. Encoding classes generate functions which are called by 1597// Machine Instruction Nodes in order to generate the bit encoding of the 1598// instruction. Operands specify their base encoding interface with the 1599// interface keyword. There are currently supported four interfaces, 1600// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1601// operand to generate a function which returns its register number when 1602// queried. CONST_INTER causes an operand to generate a function which 1603// returns the value of the constant when queried. MEMORY_INTER causes an 1604// operand to generate four functions which return the Base Register, the 1605// Index Register, the Scale Value, and the Offset Value of the operand when 1606// queried. COND_INTER causes an operand to generate six functions which 1607// return the encoding code (ie - encoding bits for the instruction) 1608// associated with each basic boolean condition for a conditional instruction. 1609// Instructions specify two basic values for encoding. They use the 1610// ins_encode keyword to specify their encoding class (which must be one of 1611// the class names specified in the encoding block), and they use the 1612// opcode keyword to specify, in order, their primary, secondary, and 1613// tertiary opcode. Only the opcode sections which a particular instruction 1614// needs for encoding need to be specified. 1615encode %{ 1616 // Build emit functions for each basic byte or larger field in the intel 1617 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1618 // code in the enc_class source block. Emit functions will live in the 1619 // main source block for now. In future, we can generalize this by 1620 // adding a syntax that specifies the sizes of fields in an order, 1621 // so that the adlc can build the emit functions automagically 1622 1623 // Emit primary opcode 1624 enc_class OpcP %{ 1625 emit_opcode(cbuf, $primary); 1626 %} 1627 1628 // Emit secondary opcode 1629 enc_class OpcS %{ 1630 emit_opcode(cbuf, $secondary); 1631 %} 1632 1633 // Emit opcode directly 1634 enc_class Opcode(immI d8) %{ 1635 emit_opcode(cbuf, $d8$$constant); 1636 %} 1637 1638 enc_class SizePrefix %{ 1639 emit_opcode(cbuf,0x66); 1640 %} 1641 1642 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1643 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1644 %} 1645 1646 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1647 emit_opcode(cbuf,$opcode$$constant); 1648 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1649 %} 1650 1651 enc_class mov_r32_imm0( rRegI dst ) %{ 1652 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1653 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1654 %} 1655 1656 enc_class cdq_enc %{ 1657 // Full implementation of Java idiv and irem; checks for 1658 // special case as described in JVM spec., p.243 & p.271. 1659 // 1660 // normal case special case 1661 // 1662 // input : rax,: dividend min_int 1663 // reg: divisor -1 1664 // 1665 // output: rax,: quotient (= rax, idiv reg) min_int 1666 // rdx: remainder (= rax, irem reg) 0 1667 // 1668 // Code sequnce: 1669 // 1670 // 81 F8 00 00 00 80 cmp rax,80000000h 1671 // 0F 85 0B 00 00 00 jne normal_case 1672 // 33 D2 xor rdx,edx 1673 // 83 F9 FF cmp rcx,0FFh 1674 // 0F 84 03 00 00 00 je done 1675 // normal_case: 1676 // 99 cdq 1677 // F7 F9 idiv rax,ecx 1678 // done: 1679 // 1680 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1681 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1682 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1683 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1684 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1686 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1687 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1688 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1689 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1690 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1691 // normal_case: 1692 emit_opcode(cbuf,0x99); // cdq 1693 // idiv (note: must be emitted by the user of this rule) 1694 // normal: 1695 %} 1696 1697 // Dense encoding for older common ops 1698 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1699 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1700 %} 1701 1702 1703 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1704 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1705 // Check for 8-bit immediate, and set sign extend bit in opcode 1706 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1707 emit_opcode(cbuf, $primary | 0x02); 1708 } 1709 else { // If 32-bit immediate 1710 emit_opcode(cbuf, $primary); 1711 } 1712 %} 1713 1714 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1715 // Emit primary opcode and set sign-extend bit 1716 // Check for 8-bit immediate, and set sign extend bit in opcode 1717 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1718 emit_opcode(cbuf, $primary | 0x02); } 1719 else { // If 32-bit immediate 1720 emit_opcode(cbuf, $primary); 1721 } 1722 // Emit r/m byte with secondary opcode, after primary opcode. 1723 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1724 %} 1725 1726 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1727 // Check for 8-bit immediate, and set sign extend bit in opcode 1728 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1729 $$$emit8$imm$$constant; 1730 } 1731 else { // If 32-bit immediate 1732 // Output immediate 1733 $$$emit32$imm$$constant; 1734 } 1735 %} 1736 1737 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1738 // Emit primary opcode and set sign-extend bit 1739 // Check for 8-bit immediate, and set sign extend bit in opcode 1740 int con = (int)$imm$$constant; // Throw away top bits 1741 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1742 // Emit r/m byte with secondary opcode, after primary opcode. 1743 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1744 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1745 else emit_d32(cbuf,con); 1746 %} 1747 1748 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1749 // Emit primary opcode and set sign-extend bit 1750 // Check for 8-bit immediate, and set sign extend bit in opcode 1751 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1752 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1753 // Emit r/m byte with tertiary opcode, after primary opcode. 1754 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1755 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1756 else emit_d32(cbuf,con); 1757 %} 1758 1759 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1760 emit_cc(cbuf, $secondary, $dst$$reg ); 1761 %} 1762 1763 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1764 int destlo = $dst$$reg; 1765 int desthi = HIGH_FROM_LOW(destlo); 1766 // bswap lo 1767 emit_opcode(cbuf, 0x0F); 1768 emit_cc(cbuf, 0xC8, destlo); 1769 // bswap hi 1770 emit_opcode(cbuf, 0x0F); 1771 emit_cc(cbuf, 0xC8, desthi); 1772 // xchg lo and hi 1773 emit_opcode(cbuf, 0x87); 1774 emit_rm(cbuf, 0x3, destlo, desthi); 1775 %} 1776 1777 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1778 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1779 %} 1780 1781 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1782 $$$emit8$primary; 1783 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1784 %} 1785 1786 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1787 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1788 emit_d8(cbuf, op >> 8 ); 1789 emit_d8(cbuf, op & 255); 1790 %} 1791 1792 // emulate a CMOV with a conditional branch around a MOV 1793 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1794 // Invert sense of branch from sense of CMOV 1795 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1796 emit_d8( cbuf, $brOffs$$constant ); 1797 %} 1798 1799 enc_class enc_PartialSubtypeCheck( ) %{ 1800 Register Redi = as_Register(EDI_enc); // result register 1801 Register Reax = as_Register(EAX_enc); // super class 1802 Register Recx = as_Register(ECX_enc); // killed 1803 Register Resi = as_Register(ESI_enc); // sub class 1804 Label miss; 1805 1806 MacroAssembler _masm(&cbuf); 1807 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1808 NULL, &miss, 1809 /*set_cond_codes:*/ true); 1810 if ($primary) { 1811 __ xorptr(Redi, Redi); 1812 } 1813 __ bind(miss); 1814 %} 1815 1816 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1817 MacroAssembler masm(&cbuf); 1818 int start = masm.offset(); 1819 if (UseSSE >= 2) { 1820 if (VerifyFPU) { 1821 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1822 } 1823 } else { 1824 // External c_calling_convention expects the FPU stack to be 'clean'. 1825 // Compiled code leaves it dirty. Do cleanup now. 1826 masm.empty_FPU_stack(); 1827 } 1828 if (sizeof_FFree_Float_Stack_All == -1) { 1829 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1830 } else { 1831 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1832 } 1833 %} 1834 1835 enc_class Verify_FPU_For_Leaf %{ 1836 if( VerifyFPU ) { 1837 MacroAssembler masm(&cbuf); 1838 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1839 } 1840 %} 1841 1842 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1843 // This is the instruction starting address for relocation info. 1844 cbuf.set_insts_mark(); 1845 $$$emit8$primary; 1846 // CALL directly to the runtime 1847 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1848 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1849 1850 if (UseSSE >= 2) { 1851 MacroAssembler _masm(&cbuf); 1852 BasicType rt = tf()->return_type(); 1853 1854 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1855 // A C runtime call where the return value is unused. In SSE2+ 1856 // mode the result needs to be removed from the FPU stack. It's 1857 // likely that this function call could be removed by the 1858 // optimizer if the C function is a pure function. 1859 __ ffree(0); 1860 } else if (rt == T_FLOAT) { 1861 __ lea(rsp, Address(rsp, -4)); 1862 __ fstp_s(Address(rsp, 0)); 1863 __ movflt(xmm0, Address(rsp, 0)); 1864 __ lea(rsp, Address(rsp, 4)); 1865 } else if (rt == T_DOUBLE) { 1866 __ lea(rsp, Address(rsp, -8)); 1867 __ fstp_d(Address(rsp, 0)); 1868 __ movdbl(xmm0, Address(rsp, 0)); 1869 __ lea(rsp, Address(rsp, 8)); 1870 } 1871 } 1872 %} 1873 1874 1875 enc_class pre_call_resets %{ 1876 // If method sets FPU control word restore it here 1877 debug_only(int off0 = cbuf.insts_size()); 1878 if (ra_->C->in_24_bit_fp_mode()) { 1879 MacroAssembler _masm(&cbuf); 1880 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1881 } 1882 if (ra_->C->max_vector_size() > 16) { 1883 // Clear upper bits of YMM registers when current compiled code uses 1884 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1885 MacroAssembler _masm(&cbuf); 1886 __ vzeroupper(); 1887 } 1888 debug_only(int off1 = cbuf.insts_size()); 1889 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1890 %} 1891 1892 enc_class post_call_FPU %{ 1893 // If method sets FPU control word do it here also 1894 if (Compile::current()->in_24_bit_fp_mode()) { 1895 MacroAssembler masm(&cbuf); 1896 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1897 } 1898 %} 1899 1900 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1901 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1902 // who we intended to call. 1903 cbuf.set_insts_mark(); 1904 $$$emit8$primary; 1905 if (!_method) { 1906 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1907 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1908 } else if (_optimized_virtual) { 1909 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1910 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1911 } else { 1912 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1913 static_call_Relocation::spec(), RELOC_IMM32 ); 1914 } 1915 if (_method) { // Emit stub for static call. 1916 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1917 if (stub == NULL) { 1918 ciEnv::current()->record_failure("CodeCache is full"); 1919 return; 1920 } 1921 } 1922 %} 1923 1924 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1925 MacroAssembler _masm(&cbuf); 1926 __ ic_call((address)$meth$$method); 1927 %} 1928 1929 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1930 int disp = in_bytes(Method::from_compiled_offset()); 1931 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1932 1933 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1934 cbuf.set_insts_mark(); 1935 $$$emit8$primary; 1936 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1937 emit_d8(cbuf, disp); // Displacement 1938 1939 %} 1940 1941// Following encoding is no longer used, but may be restored if calling 1942// convention changes significantly. 1943// Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1944// 1945// enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1946// // int ic_reg = Matcher::inline_cache_reg(); 1947// // int ic_encode = Matcher::_regEncode[ic_reg]; 1948// // int imo_reg = Matcher::interpreter_method_oop_reg(); 1949// // int imo_encode = Matcher::_regEncode[imo_reg]; 1950// 1951// // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1952// // // so we load it immediately before the call 1953// // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1954// // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1955// 1956// // xor rbp,ebp 1957// emit_opcode(cbuf, 0x33); 1958// emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1959// 1960// // CALL to interpreter. 1961// cbuf.set_insts_mark(); 1962// $$$emit8$primary; 1963// emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1964// runtime_call_Relocation::spec(), RELOC_IMM32 ); 1965// %} 1966 1967 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1968 $$$emit8$primary; 1969 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1970 $$$emit8$shift$$constant; 1971 %} 1972 1973 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1974 // Load immediate does not have a zero or sign extended version 1975 // for 8-bit immediates 1976 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1977 $$$emit32$src$$constant; 1978 %} 1979 1980 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1981 // Load immediate does not have a zero or sign extended version 1982 // for 8-bit immediates 1983 emit_opcode(cbuf, $primary + $dst$$reg); 1984 $$$emit32$src$$constant; 1985 %} 1986 1987 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1988 // Load immediate does not have a zero or sign extended version 1989 // for 8-bit immediates 1990 int dst_enc = $dst$$reg; 1991 int src_con = $src$$constant & 0x0FFFFFFFFL; 1992 if (src_con == 0) { 1993 // xor dst, dst 1994 emit_opcode(cbuf, 0x33); 1995 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1996 } else { 1997 emit_opcode(cbuf, $primary + dst_enc); 1998 emit_d32(cbuf, src_con); 1999 } 2000 %} 2001 2002 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2003 // Load immediate does not have a zero or sign extended version 2004 // for 8-bit immediates 2005 int dst_enc = $dst$$reg + 2; 2006 int src_con = ((julong)($src$$constant)) >> 32; 2007 if (src_con == 0) { 2008 // xor dst, dst 2009 emit_opcode(cbuf, 0x33); 2010 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2011 } else { 2012 emit_opcode(cbuf, $primary + dst_enc); 2013 emit_d32(cbuf, src_con); 2014 } 2015 %} 2016 2017 2018 // Encode a reg-reg copy. If it is useless, then empty encoding. 2019 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2020 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2021 %} 2022 2023 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2024 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2025 %} 2026 2027 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2028 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2029 %} 2030 2031 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2032 $$$emit8$primary; 2033 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2034 %} 2035 2036 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2037 $$$emit8$secondary; 2038 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2039 %} 2040 2041 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2042 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2043 %} 2044 2045 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2046 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2047 %} 2048 2049 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2050 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2051 %} 2052 2053 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2054 // Output immediate 2055 $$$emit32$src$$constant; 2056 %} 2057 2058 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2059 // Output Float immediate bits 2060 jfloat jf = $src$$constant; 2061 int jf_as_bits = jint_cast( jf ); 2062 emit_d32(cbuf, jf_as_bits); 2063 %} 2064 2065 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2066 // Output Float immediate bits 2067 jfloat jf = $src$$constant; 2068 int jf_as_bits = jint_cast( jf ); 2069 emit_d32(cbuf, jf_as_bits); 2070 %} 2071 2072 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2073 // Output immediate 2074 $$$emit16$src$$constant; 2075 %} 2076 2077 enc_class Con_d32(immI src) %{ 2078 emit_d32(cbuf,$src$$constant); 2079 %} 2080 2081 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2082 // Output immediate memory reference 2083 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2084 emit_d32(cbuf, 0x00); 2085 %} 2086 2087 enc_class lock_prefix( ) %{ 2088 if( os::is_MP() ) 2089 emit_opcode(cbuf,0xF0); // [Lock] 2090 %} 2091 2092 // Cmp-xchg long value. 2093 // Note: we need to swap rbx, and rcx before and after the 2094 // cmpxchg8 instruction because the instruction uses 2095 // rcx as the high order word of the new value to store but 2096 // our register encoding uses rbx,. 2097 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2098 2099 // XCHG rbx,ecx 2100 emit_opcode(cbuf,0x87); 2101 emit_opcode(cbuf,0xD9); 2102 // [Lock] 2103 if( os::is_MP() ) 2104 emit_opcode(cbuf,0xF0); 2105 // CMPXCHG8 [Eptr] 2106 emit_opcode(cbuf,0x0F); 2107 emit_opcode(cbuf,0xC7); 2108 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2109 // XCHG rbx,ecx 2110 emit_opcode(cbuf,0x87); 2111 emit_opcode(cbuf,0xD9); 2112 %} 2113 2114 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2115 // [Lock] 2116 if( os::is_MP() ) 2117 emit_opcode(cbuf,0xF0); 2118 2119 // CMPXCHG [Eptr] 2120 emit_opcode(cbuf,0x0F); 2121 emit_opcode(cbuf,0xB1); 2122 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2123 %} 2124 2125 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2126 int res_encoding = $res$$reg; 2127 2128 // MOV res,0 2129 emit_opcode( cbuf, 0xB8 + res_encoding); 2130 emit_d32( cbuf, 0 ); 2131 // JNE,s fail 2132 emit_opcode(cbuf,0x75); 2133 emit_d8(cbuf, 5 ); 2134 // MOV res,1 2135 emit_opcode( cbuf, 0xB8 + res_encoding); 2136 emit_d32( cbuf, 1 ); 2137 // fail: 2138 %} 2139 2140 enc_class set_instruction_start( ) %{ 2141 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2142 %} 2143 2144 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2145 int reg_encoding = $ereg$$reg; 2146 int base = $mem$$base; 2147 int index = $mem$$index; 2148 int scale = $mem$$scale; 2149 int displace = $mem$$disp; 2150 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2151 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2152 %} 2153 2154 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2155 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2156 int base = $mem$$base; 2157 int index = $mem$$index; 2158 int scale = $mem$$scale; 2159 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2160 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2161 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2162 %} 2163 2164 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2165 int r1, r2; 2166 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2167 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2168 emit_opcode(cbuf,0x0F); 2169 emit_opcode(cbuf,$tertiary); 2170 emit_rm(cbuf, 0x3, r1, r2); 2171 emit_d8(cbuf,$cnt$$constant); 2172 emit_d8(cbuf,$primary); 2173 emit_rm(cbuf, 0x3, $secondary, r1); 2174 emit_d8(cbuf,$cnt$$constant); 2175 %} 2176 2177 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2178 emit_opcode( cbuf, 0x8B ); // Move 2179 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2180 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2181 emit_d8(cbuf,$primary); 2182 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2183 emit_d8(cbuf,$cnt$$constant-32); 2184 } 2185 emit_d8(cbuf,$primary); 2186 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2187 emit_d8(cbuf,31); 2188 %} 2189 2190 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2191 int r1, r2; 2192 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2193 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2194 2195 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2196 emit_rm(cbuf, 0x3, r1, r2); 2197 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2198 emit_opcode(cbuf,$primary); 2199 emit_rm(cbuf, 0x3, $secondary, r1); 2200 emit_d8(cbuf,$cnt$$constant-32); 2201 } 2202 emit_opcode(cbuf,0x33); // XOR r2,r2 2203 emit_rm(cbuf, 0x3, r2, r2); 2204 %} 2205 2206 // Clone of RegMem but accepts an extra parameter to access each 2207 // half of a double in memory; it never needs relocation info. 2208 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2209 emit_opcode(cbuf,$opcode$$constant); 2210 int reg_encoding = $rm_reg$$reg; 2211 int base = $mem$$base; 2212 int index = $mem$$index; 2213 int scale = $mem$$scale; 2214 int displace = $mem$$disp + $disp_for_half$$constant; 2215 relocInfo::relocType disp_reloc = relocInfo::none; 2216 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2217 %} 2218 2219 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2220 // 2221 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2222 // and it never needs relocation information. 2223 // Frequently used to move data between FPU's Stack Top and memory. 2224 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2225 int rm_byte_opcode = $rm_opcode$$constant; 2226 int base = $mem$$base; 2227 int index = $mem$$index; 2228 int scale = $mem$$scale; 2229 int displace = $mem$$disp; 2230 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2231 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2232 %} 2233 2234 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2235 int rm_byte_opcode = $rm_opcode$$constant; 2236 int base = $mem$$base; 2237 int index = $mem$$index; 2238 int scale = $mem$$scale; 2239 int displace = $mem$$disp; 2240 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2241 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2242 %} 2243 2244 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2245 int reg_encoding = $dst$$reg; 2246 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2247 int index = 0x04; // 0x04 indicates no index 2248 int scale = 0x00; // 0x00 indicates no scale 2249 int displace = $src1$$constant; // 0x00 indicates no displacement 2250 relocInfo::relocType disp_reloc = relocInfo::none; 2251 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2252 %} 2253 2254 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2255 // Compare dst,src 2256 emit_opcode(cbuf,0x3B); 2257 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2258 // jmp dst < src around move 2259 emit_opcode(cbuf,0x7C); 2260 emit_d8(cbuf,2); 2261 // move dst,src 2262 emit_opcode(cbuf,0x8B); 2263 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2264 %} 2265 2266 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2267 // Compare dst,src 2268 emit_opcode(cbuf,0x3B); 2269 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2270 // jmp dst > src around move 2271 emit_opcode(cbuf,0x7F); 2272 emit_d8(cbuf,2); 2273 // move dst,src 2274 emit_opcode(cbuf,0x8B); 2275 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2276 %} 2277 2278 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2279 // If src is FPR1, we can just FST to store it. 2280 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2281 int reg_encoding = 0x2; // Just store 2282 int base = $mem$$base; 2283 int index = $mem$$index; 2284 int scale = $mem$$scale; 2285 int displace = $mem$$disp; 2286 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2287 if( $src$$reg != FPR1L_enc ) { 2288 reg_encoding = 0x3; // Store & pop 2289 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2290 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2291 } 2292 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2293 emit_opcode(cbuf,$primary); 2294 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2295 %} 2296 2297 enc_class neg_reg(rRegI dst) %{ 2298 // NEG $dst 2299 emit_opcode(cbuf,0xF7); 2300 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2301 %} 2302 2303 enc_class setLT_reg(eCXRegI dst) %{ 2304 // SETLT $dst 2305 emit_opcode(cbuf,0x0F); 2306 emit_opcode(cbuf,0x9C); 2307 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2308 %} 2309 2310 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2311 int tmpReg = $tmp$$reg; 2312 2313 // SUB $p,$q 2314 emit_opcode(cbuf,0x2B); 2315 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2316 // SBB $tmp,$tmp 2317 emit_opcode(cbuf,0x1B); 2318 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2319 // AND $tmp,$y 2320 emit_opcode(cbuf,0x23); 2321 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2322 // ADD $p,$tmp 2323 emit_opcode(cbuf,0x03); 2324 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2325 %} 2326 2327 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2328 // TEST shift,32 2329 emit_opcode(cbuf,0xF7); 2330 emit_rm(cbuf, 0x3, 0, ECX_enc); 2331 emit_d32(cbuf,0x20); 2332 // JEQ,s small 2333 emit_opcode(cbuf, 0x74); 2334 emit_d8(cbuf, 0x04); 2335 // MOV $dst.hi,$dst.lo 2336 emit_opcode( cbuf, 0x8B ); 2337 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2338 // CLR $dst.lo 2339 emit_opcode(cbuf, 0x33); 2340 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2341// small: 2342 // SHLD $dst.hi,$dst.lo,$shift 2343 emit_opcode(cbuf,0x0F); 2344 emit_opcode(cbuf,0xA5); 2345 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2346 // SHL $dst.lo,$shift" 2347 emit_opcode(cbuf,0xD3); 2348 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2349 %} 2350 2351 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2352 // TEST shift,32 2353 emit_opcode(cbuf,0xF7); 2354 emit_rm(cbuf, 0x3, 0, ECX_enc); 2355 emit_d32(cbuf,0x20); 2356 // JEQ,s small 2357 emit_opcode(cbuf, 0x74); 2358 emit_d8(cbuf, 0x04); 2359 // MOV $dst.lo,$dst.hi 2360 emit_opcode( cbuf, 0x8B ); 2361 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2362 // CLR $dst.hi 2363 emit_opcode(cbuf, 0x33); 2364 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2365// small: 2366 // SHRD $dst.lo,$dst.hi,$shift 2367 emit_opcode(cbuf,0x0F); 2368 emit_opcode(cbuf,0xAD); 2369 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2370 // SHR $dst.hi,$shift" 2371 emit_opcode(cbuf,0xD3); 2372 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2373 %} 2374 2375 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2376 // TEST shift,32 2377 emit_opcode(cbuf,0xF7); 2378 emit_rm(cbuf, 0x3, 0, ECX_enc); 2379 emit_d32(cbuf,0x20); 2380 // JEQ,s small 2381 emit_opcode(cbuf, 0x74); 2382 emit_d8(cbuf, 0x05); 2383 // MOV $dst.lo,$dst.hi 2384 emit_opcode( cbuf, 0x8B ); 2385 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2386 // SAR $dst.hi,31 2387 emit_opcode(cbuf, 0xC1); 2388 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2389 emit_d8(cbuf, 0x1F ); 2390// small: 2391 // SHRD $dst.lo,$dst.hi,$shift 2392 emit_opcode(cbuf,0x0F); 2393 emit_opcode(cbuf,0xAD); 2394 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2395 // SAR $dst.hi,$shift" 2396 emit_opcode(cbuf,0xD3); 2397 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2398 %} 2399 2400 2401 // ----------------- Encodings for floating point unit ----------------- 2402 // May leave result in FPU-TOS or FPU reg depending on opcodes 2403 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2404 $$$emit8$primary; 2405 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2406 %} 2407 2408 // Pop argument in FPR0 with FSTP ST(0) 2409 enc_class PopFPU() %{ 2410 emit_opcode( cbuf, 0xDD ); 2411 emit_d8( cbuf, 0xD8 ); 2412 %} 2413 2414 // !!!!! equivalent to Pop_Reg_F 2415 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2416 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2417 emit_d8( cbuf, 0xD8+$dst$$reg ); 2418 %} 2419 2420 enc_class Push_Reg_DPR( regDPR dst ) %{ 2421 emit_opcode( cbuf, 0xD9 ); 2422 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2423 %} 2424 2425 enc_class strictfp_bias1( regDPR dst ) %{ 2426 emit_opcode( cbuf, 0xDB ); // FLD m80real 2427 emit_opcode( cbuf, 0x2D ); 2428 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2429 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2430 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2431 %} 2432 2433 enc_class strictfp_bias2( regDPR dst ) %{ 2434 emit_opcode( cbuf, 0xDB ); // FLD m80real 2435 emit_opcode( cbuf, 0x2D ); 2436 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2437 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2438 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2439 %} 2440 2441 // Special case for moving an integer register to a stack slot. 2442 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2443 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2444 %} 2445 2446 // Special case for moving a register to a stack slot. 2447 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2448 // Opcode already emitted 2449 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2450 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2451 emit_d32(cbuf, $dst$$disp); // Displacement 2452 %} 2453 2454 // Push the integer in stackSlot 'src' onto FP-stack 2455 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2456 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2457 %} 2458 2459 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2460 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2461 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2462 %} 2463 2464 // Same as Pop_Mem_F except for opcode 2465 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2466 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2467 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2468 %} 2469 2470 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2471 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2472 emit_d8( cbuf, 0xD8+$dst$$reg ); 2473 %} 2474 2475 enc_class Push_Reg_FPR( regFPR dst ) %{ 2476 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2477 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2478 %} 2479 2480 // Push FPU's float to a stack-slot, and pop FPU-stack 2481 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2482 int pop = 0x02; 2483 if ($src$$reg != FPR1L_enc) { 2484 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2485 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2486 pop = 0x03; 2487 } 2488 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2489 %} 2490 2491 // Push FPU's double to a stack-slot, and pop FPU-stack 2492 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2493 int pop = 0x02; 2494 if ($src$$reg != FPR1L_enc) { 2495 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2496 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2497 pop = 0x03; 2498 } 2499 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2500 %} 2501 2502 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2503 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2504 int pop = 0xD0 - 1; // -1 since we skip FLD 2505 if ($src$$reg != FPR1L_enc) { 2506 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2507 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2508 pop = 0xD8; 2509 } 2510 emit_opcode( cbuf, 0xDD ); 2511 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2512 %} 2513 2514 2515 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2516 // load dst in FPR0 2517 emit_opcode( cbuf, 0xD9 ); 2518 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2519 if ($src$$reg != FPR1L_enc) { 2520 // fincstp 2521 emit_opcode (cbuf, 0xD9); 2522 emit_opcode (cbuf, 0xF7); 2523 // swap src with FPR1: 2524 // FXCH FPR1 with src 2525 emit_opcode(cbuf, 0xD9); 2526 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2527 // fdecstp 2528 emit_opcode (cbuf, 0xD9); 2529 emit_opcode (cbuf, 0xF6); 2530 } 2531 %} 2532 2533 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2534 MacroAssembler _masm(&cbuf); 2535 __ subptr(rsp, 8); 2536 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2537 __ fld_d(Address(rsp, 0)); 2538 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2539 __ fld_d(Address(rsp, 0)); 2540 %} 2541 2542 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2543 MacroAssembler _masm(&cbuf); 2544 __ subptr(rsp, 4); 2545 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2546 __ fld_s(Address(rsp, 0)); 2547 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2548 __ fld_s(Address(rsp, 0)); 2549 %} 2550 2551 enc_class Push_ResultD(regD dst) %{ 2552 MacroAssembler _masm(&cbuf); 2553 __ fstp_d(Address(rsp, 0)); 2554 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2555 __ addptr(rsp, 8); 2556 %} 2557 2558 enc_class Push_ResultF(regF dst, immI d8) %{ 2559 MacroAssembler _masm(&cbuf); 2560 __ fstp_s(Address(rsp, 0)); 2561 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2562 __ addptr(rsp, $d8$$constant); 2563 %} 2564 2565 enc_class Push_SrcD(regD src) %{ 2566 MacroAssembler _masm(&cbuf); 2567 __ subptr(rsp, 8); 2568 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2569 __ fld_d(Address(rsp, 0)); 2570 %} 2571 2572 enc_class push_stack_temp_qword() %{ 2573 MacroAssembler _masm(&cbuf); 2574 __ subptr(rsp, 8); 2575 %} 2576 2577 enc_class pop_stack_temp_qword() %{ 2578 MacroAssembler _masm(&cbuf); 2579 __ addptr(rsp, 8); 2580 %} 2581 2582 enc_class push_xmm_to_fpr1(regD src) %{ 2583 MacroAssembler _masm(&cbuf); 2584 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2585 __ fld_d(Address(rsp, 0)); 2586 %} 2587 2588 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2589 if ($src$$reg != FPR1L_enc) { 2590 // fincstp 2591 emit_opcode (cbuf, 0xD9); 2592 emit_opcode (cbuf, 0xF7); 2593 // FXCH FPR1 with src 2594 emit_opcode(cbuf, 0xD9); 2595 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2596 // fdecstp 2597 emit_opcode (cbuf, 0xD9); 2598 emit_opcode (cbuf, 0xF6); 2599 } 2600 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2601 // // FSTP FPR$dst$$reg 2602 // emit_opcode( cbuf, 0xDD ); 2603 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2604 %} 2605 2606 enc_class fnstsw_sahf_skip_parity() %{ 2607 // fnstsw ax 2608 emit_opcode( cbuf, 0xDF ); 2609 emit_opcode( cbuf, 0xE0 ); 2610 // sahf 2611 emit_opcode( cbuf, 0x9E ); 2612 // jnp ::skip 2613 emit_opcode( cbuf, 0x7B ); 2614 emit_opcode( cbuf, 0x05 ); 2615 %} 2616 2617 enc_class emitModDPR() %{ 2618 // fprem must be iterative 2619 // :: loop 2620 // fprem 2621 emit_opcode( cbuf, 0xD9 ); 2622 emit_opcode( cbuf, 0xF8 ); 2623 // wait 2624 emit_opcode( cbuf, 0x9b ); 2625 // fnstsw ax 2626 emit_opcode( cbuf, 0xDF ); 2627 emit_opcode( cbuf, 0xE0 ); 2628 // sahf 2629 emit_opcode( cbuf, 0x9E ); 2630 // jp ::loop 2631 emit_opcode( cbuf, 0x0F ); 2632 emit_opcode( cbuf, 0x8A ); 2633 emit_opcode( cbuf, 0xF4 ); 2634 emit_opcode( cbuf, 0xFF ); 2635 emit_opcode( cbuf, 0xFF ); 2636 emit_opcode( cbuf, 0xFF ); 2637 %} 2638 2639 enc_class fpu_flags() %{ 2640 // fnstsw_ax 2641 emit_opcode( cbuf, 0xDF); 2642 emit_opcode( cbuf, 0xE0); 2643 // test ax,0x0400 2644 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2645 emit_opcode( cbuf, 0xA9 ); 2646 emit_d16 ( cbuf, 0x0400 ); 2647 // // // This sequence works, but stalls for 12-16 cycles on PPro 2648 // // test rax,0x0400 2649 // emit_opcode( cbuf, 0xA9 ); 2650 // emit_d32 ( cbuf, 0x00000400 ); 2651 // 2652 // jz exit (no unordered comparison) 2653 emit_opcode( cbuf, 0x74 ); 2654 emit_d8 ( cbuf, 0x02 ); 2655 // mov ah,1 - treat as LT case (set carry flag) 2656 emit_opcode( cbuf, 0xB4 ); 2657 emit_d8 ( cbuf, 0x01 ); 2658 // sahf 2659 emit_opcode( cbuf, 0x9E); 2660 %} 2661 2662 enc_class cmpF_P6_fixup() %{ 2663 // Fixup the integer flags in case comparison involved a NaN 2664 // 2665 // JNP exit (no unordered comparison, P-flag is set by NaN) 2666 emit_opcode( cbuf, 0x7B ); 2667 emit_d8 ( cbuf, 0x03 ); 2668 // MOV AH,1 - treat as LT case (set carry flag) 2669 emit_opcode( cbuf, 0xB4 ); 2670 emit_d8 ( cbuf, 0x01 ); 2671 // SAHF 2672 emit_opcode( cbuf, 0x9E); 2673 // NOP // target for branch to avoid branch to branch 2674 emit_opcode( cbuf, 0x90); 2675 %} 2676 2677// fnstsw_ax(); 2678// sahf(); 2679// movl(dst, nan_result); 2680// jcc(Assembler::parity, exit); 2681// movl(dst, less_result); 2682// jcc(Assembler::below, exit); 2683// movl(dst, equal_result); 2684// jcc(Assembler::equal, exit); 2685// movl(dst, greater_result); 2686 2687// less_result = 1; 2688// greater_result = -1; 2689// equal_result = 0; 2690// nan_result = -1; 2691 2692 enc_class CmpF_Result(rRegI dst) %{ 2693 // fnstsw_ax(); 2694 emit_opcode( cbuf, 0xDF); 2695 emit_opcode( cbuf, 0xE0); 2696 // sahf 2697 emit_opcode( cbuf, 0x9E); 2698 // movl(dst, nan_result); 2699 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2700 emit_d32( cbuf, -1 ); 2701 // jcc(Assembler::parity, exit); 2702 emit_opcode( cbuf, 0x7A ); 2703 emit_d8 ( cbuf, 0x13 ); 2704 // movl(dst, less_result); 2705 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2706 emit_d32( cbuf, -1 ); 2707 // jcc(Assembler::below, exit); 2708 emit_opcode( cbuf, 0x72 ); 2709 emit_d8 ( cbuf, 0x0C ); 2710 // movl(dst, equal_result); 2711 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2712 emit_d32( cbuf, 0 ); 2713 // jcc(Assembler::equal, exit); 2714 emit_opcode( cbuf, 0x74 ); 2715 emit_d8 ( cbuf, 0x05 ); 2716 // movl(dst, greater_result); 2717 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2718 emit_d32( cbuf, 1 ); 2719 %} 2720 2721 2722 // Compare the longs and set flags 2723 // BROKEN! Do Not use as-is 2724 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2725 // CMP $src1.hi,$src2.hi 2726 emit_opcode( cbuf, 0x3B ); 2727 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2728 // JNE,s done 2729 emit_opcode(cbuf,0x75); 2730 emit_d8(cbuf, 2 ); 2731 // CMP $src1.lo,$src2.lo 2732 emit_opcode( cbuf, 0x3B ); 2733 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2734// done: 2735 %} 2736 2737 enc_class convert_int_long( regL dst, rRegI src ) %{ 2738 // mov $dst.lo,$src 2739 int dst_encoding = $dst$$reg; 2740 int src_encoding = $src$$reg; 2741 encode_Copy( cbuf, dst_encoding , src_encoding ); 2742 // mov $dst.hi,$src 2743 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2744 // sar $dst.hi,31 2745 emit_opcode( cbuf, 0xC1 ); 2746 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2747 emit_d8(cbuf, 0x1F ); 2748 %} 2749 2750 enc_class convert_long_double( eRegL src ) %{ 2751 // push $src.hi 2752 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2753 // push $src.lo 2754 emit_opcode(cbuf, 0x50+$src$$reg ); 2755 // fild 64-bits at [SP] 2756 emit_opcode(cbuf,0xdf); 2757 emit_d8(cbuf, 0x6C); 2758 emit_d8(cbuf, 0x24); 2759 emit_d8(cbuf, 0x00); 2760 // pop stack 2761 emit_opcode(cbuf, 0x83); // add SP, #8 2762 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2763 emit_d8(cbuf, 0x8); 2764 %} 2765 2766 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2767 // IMUL EDX:EAX,$src1 2768 emit_opcode( cbuf, 0xF7 ); 2769 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2770 // SAR EDX,$cnt-32 2771 int shift_count = ((int)$cnt$$constant) - 32; 2772 if (shift_count > 0) { 2773 emit_opcode(cbuf, 0xC1); 2774 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2775 emit_d8(cbuf, shift_count); 2776 } 2777 %} 2778 2779 // this version doesn't have add sp, 8 2780 enc_class convert_long_double2( eRegL src ) %{ 2781 // push $src.hi 2782 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2783 // push $src.lo 2784 emit_opcode(cbuf, 0x50+$src$$reg ); 2785 // fild 64-bits at [SP] 2786 emit_opcode(cbuf,0xdf); 2787 emit_d8(cbuf, 0x6C); 2788 emit_d8(cbuf, 0x24); 2789 emit_d8(cbuf, 0x00); 2790 %} 2791 2792 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2793 // Basic idea: long = (long)int * (long)int 2794 // IMUL EDX:EAX, src 2795 emit_opcode( cbuf, 0xF7 ); 2796 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2797 %} 2798 2799 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2800 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2801 // MUL EDX:EAX, src 2802 emit_opcode( cbuf, 0xF7 ); 2803 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2804 %} 2805 2806 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2807 // Basic idea: lo(result) = lo(x_lo * y_lo) 2808 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2809 // MOV $tmp,$src.lo 2810 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2811 // IMUL $tmp,EDX 2812 emit_opcode( cbuf, 0x0F ); 2813 emit_opcode( cbuf, 0xAF ); 2814 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2815 // MOV EDX,$src.hi 2816 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2817 // IMUL EDX,EAX 2818 emit_opcode( cbuf, 0x0F ); 2819 emit_opcode( cbuf, 0xAF ); 2820 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2821 // ADD $tmp,EDX 2822 emit_opcode( cbuf, 0x03 ); 2823 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2824 // MUL EDX:EAX,$src.lo 2825 emit_opcode( cbuf, 0xF7 ); 2826 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2827 // ADD EDX,ESI 2828 emit_opcode( cbuf, 0x03 ); 2829 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2830 %} 2831 2832 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2833 // Basic idea: lo(result) = lo(src * y_lo) 2834 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2835 // IMUL $tmp,EDX,$src 2836 emit_opcode( cbuf, 0x6B ); 2837 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2838 emit_d8( cbuf, (int)$src$$constant ); 2839 // MOV EDX,$src 2840 emit_opcode(cbuf, 0xB8 + EDX_enc); 2841 emit_d32( cbuf, (int)$src$$constant ); 2842 // MUL EDX:EAX,EDX 2843 emit_opcode( cbuf, 0xF7 ); 2844 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2845 // ADD EDX,ESI 2846 emit_opcode( cbuf, 0x03 ); 2847 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2848 %} 2849 2850 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2851 // PUSH src1.hi 2852 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2853 // PUSH src1.lo 2854 emit_opcode(cbuf, 0x50+$src1$$reg ); 2855 // PUSH src2.hi 2856 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2857 // PUSH src2.lo 2858 emit_opcode(cbuf, 0x50+$src2$$reg ); 2859 // CALL directly to the runtime 2860 cbuf.set_insts_mark(); 2861 emit_opcode(cbuf,0xE8); // Call into runtime 2862 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2863 // Restore stack 2864 emit_opcode(cbuf, 0x83); // add SP, #framesize 2865 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2866 emit_d8(cbuf, 4*4); 2867 %} 2868 2869 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2870 // PUSH src1.hi 2871 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2872 // PUSH src1.lo 2873 emit_opcode(cbuf, 0x50+$src1$$reg ); 2874 // PUSH src2.hi 2875 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2876 // PUSH src2.lo 2877 emit_opcode(cbuf, 0x50+$src2$$reg ); 2878 // CALL directly to the runtime 2879 cbuf.set_insts_mark(); 2880 emit_opcode(cbuf,0xE8); // Call into runtime 2881 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2882 // Restore stack 2883 emit_opcode(cbuf, 0x83); // add SP, #framesize 2884 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2885 emit_d8(cbuf, 4*4); 2886 %} 2887 2888 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2889 // MOV $tmp,$src.lo 2890 emit_opcode(cbuf, 0x8B); 2891 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2892 // OR $tmp,$src.hi 2893 emit_opcode(cbuf, 0x0B); 2894 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2895 %} 2896 2897 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2898 // CMP $src1.lo,$src2.lo 2899 emit_opcode( cbuf, 0x3B ); 2900 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2901 // JNE,s skip 2902 emit_cc(cbuf, 0x70, 0x5); 2903 emit_d8(cbuf,2); 2904 // CMP $src1.hi,$src2.hi 2905 emit_opcode( cbuf, 0x3B ); 2906 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2907 %} 2908 2909 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2910 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2911 emit_opcode( cbuf, 0x3B ); 2912 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2913 // MOV $tmp,$src1.hi 2914 emit_opcode( cbuf, 0x8B ); 2915 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2916 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2917 emit_opcode( cbuf, 0x1B ); 2918 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2919 %} 2920 2921 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2922 // XOR $tmp,$tmp 2923 emit_opcode(cbuf,0x33); // XOR 2924 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2925 // CMP $tmp,$src.lo 2926 emit_opcode( cbuf, 0x3B ); 2927 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2928 // SBB $tmp,$src.hi 2929 emit_opcode( cbuf, 0x1B ); 2930 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2931 %} 2932 2933 // Sniff, sniff... smells like Gnu Superoptimizer 2934 enc_class neg_long( eRegL dst ) %{ 2935 emit_opcode(cbuf,0xF7); // NEG hi 2936 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2937 emit_opcode(cbuf,0xF7); // NEG lo 2938 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2939 emit_opcode(cbuf,0x83); // SBB hi,0 2940 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2941 emit_d8 (cbuf,0 ); 2942 %} 2943 2944 enc_class enc_pop_rdx() %{ 2945 emit_opcode(cbuf,0x5A); 2946 %} 2947 2948 enc_class enc_rethrow() %{ 2949 cbuf.set_insts_mark(); 2950 emit_opcode(cbuf, 0xE9); // jmp entry 2951 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2952 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2953 %} 2954 2955 2956 // Convert a double to an int. Java semantics require we do complex 2957 // manglelations in the corner cases. So we set the rounding mode to 2958 // 'zero', store the darned double down as an int, and reset the 2959 // rounding mode to 'nearest'. The hardware throws an exception which 2960 // patches up the correct value directly to the stack. 2961 enc_class DPR2I_encoding( regDPR src ) %{ 2962 // Flip to round-to-zero mode. We attempted to allow invalid-op 2963 // exceptions here, so that a NAN or other corner-case value will 2964 // thrown an exception (but normal values get converted at full speed). 2965 // However, I2C adapters and other float-stack manglers leave pending 2966 // invalid-op exceptions hanging. We would have to clear them before 2967 // enabling them and that is more expensive than just testing for the 2968 // invalid value Intel stores down in the corner cases. 2969 emit_opcode(cbuf,0xD9); // FLDCW trunc 2970 emit_opcode(cbuf,0x2D); 2971 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2972 // Allocate a word 2973 emit_opcode(cbuf,0x83); // SUB ESP,4 2974 emit_opcode(cbuf,0xEC); 2975 emit_d8(cbuf,0x04); 2976 // Encoding assumes a double has been pushed into FPR0. 2977 // Store down the double as an int, popping the FPU stack 2978 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2979 emit_opcode(cbuf,0x1C); 2980 emit_d8(cbuf,0x24); 2981 // Restore the rounding mode; mask the exception 2982 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2983 emit_opcode(cbuf,0x2D); 2984 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2985 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2986 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2987 2988 // Load the converted int; adjust CPU stack 2989 emit_opcode(cbuf,0x58); // POP EAX 2990 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2991 emit_d32 (cbuf,0x80000000); // 0x80000000 2992 emit_opcode(cbuf,0x75); // JNE around_slow_call 2993 emit_d8 (cbuf,0x07); // Size of slow_call 2994 // Push src onto stack slow-path 2995 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2996 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2997 // CALL directly to the runtime 2998 cbuf.set_insts_mark(); 2999 emit_opcode(cbuf,0xE8); // Call into runtime 3000 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3001 // Carry on here... 3002 %} 3003 3004 enc_class DPR2L_encoding( regDPR src ) %{ 3005 emit_opcode(cbuf,0xD9); // FLDCW trunc 3006 emit_opcode(cbuf,0x2D); 3007 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3008 // Allocate a word 3009 emit_opcode(cbuf,0x83); // SUB ESP,8 3010 emit_opcode(cbuf,0xEC); 3011 emit_d8(cbuf,0x08); 3012 // Encoding assumes a double has been pushed into FPR0. 3013 // Store down the double as a long, popping the FPU stack 3014 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3015 emit_opcode(cbuf,0x3C); 3016 emit_d8(cbuf,0x24); 3017 // Restore the rounding mode; mask the exception 3018 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3019 emit_opcode(cbuf,0x2D); 3020 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3021 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3022 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3023 3024 // Load the converted int; adjust CPU stack 3025 emit_opcode(cbuf,0x58); // POP EAX 3026 emit_opcode(cbuf,0x5A); // POP EDX 3027 emit_opcode(cbuf,0x81); // CMP EDX,imm 3028 emit_d8 (cbuf,0xFA); // rdx 3029 emit_d32 (cbuf,0x80000000); // 0x80000000 3030 emit_opcode(cbuf,0x75); // JNE around_slow_call 3031 emit_d8 (cbuf,0x07+4); // Size of slow_call 3032 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3033 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3034 emit_opcode(cbuf,0x75); // JNE around_slow_call 3035 emit_d8 (cbuf,0x07); // Size of slow_call 3036 // Push src onto stack slow-path 3037 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3038 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3039 // CALL directly to the runtime 3040 cbuf.set_insts_mark(); 3041 emit_opcode(cbuf,0xE8); // Call into runtime 3042 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3043 // Carry on here... 3044 %} 3045 3046 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3047 // Operand was loaded from memory into fp ST (stack top) 3048 // FMUL ST,$src /* D8 C8+i */ 3049 emit_opcode(cbuf, 0xD8); 3050 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3051 %} 3052 3053 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3054 // FADDP ST,src2 /* D8 C0+i */ 3055 emit_opcode(cbuf, 0xD8); 3056 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3057 //could use FADDP src2,fpST /* DE C0+i */ 3058 %} 3059 3060 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3061 // FADDP src2,ST /* DE C0+i */ 3062 emit_opcode(cbuf, 0xDE); 3063 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3064 %} 3065 3066 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3067 // Operand has been loaded into fp ST (stack top) 3068 // FSUB ST,$src1 3069 emit_opcode(cbuf, 0xD8); 3070 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3071 3072 // FDIV 3073 emit_opcode(cbuf, 0xD8); 3074 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3075 %} 3076 3077 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3078 // Operand was loaded from memory into fp ST (stack top) 3079 // FADD ST,$src /* D8 C0+i */ 3080 emit_opcode(cbuf, 0xD8); 3081 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3082 3083 // FMUL ST,src2 /* D8 C*+i */ 3084 emit_opcode(cbuf, 0xD8); 3085 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3086 %} 3087 3088 3089 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3090 // Operand was loaded from memory into fp ST (stack top) 3091 // FADD ST,$src /* D8 C0+i */ 3092 emit_opcode(cbuf, 0xD8); 3093 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3094 3095 // FMULP src2,ST /* DE C8+i */ 3096 emit_opcode(cbuf, 0xDE); 3097 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3098 %} 3099 3100 // Atomically load the volatile long 3101 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3102 emit_opcode(cbuf,0xDF); 3103 int rm_byte_opcode = 0x05; 3104 int base = $mem$$base; 3105 int index = $mem$$index; 3106 int scale = $mem$$scale; 3107 int displace = $mem$$disp; 3108 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3109 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3110 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3111 %} 3112 3113 // Volatile Store Long. Must be atomic, so move it into 3114 // the FP TOS and then do a 64-bit FIST. Has to probe the 3115 // target address before the store (for null-ptr checks) 3116 // so the memory operand is used twice in the encoding. 3117 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3118 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3119 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3120 emit_opcode(cbuf,0xDF); 3121 int rm_byte_opcode = 0x07; 3122 int base = $mem$$base; 3123 int index = $mem$$index; 3124 int scale = $mem$$scale; 3125 int displace = $mem$$disp; 3126 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3127 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3128 %} 3129 3130 // Safepoint Poll. This polls the safepoint page, and causes an 3131 // exception if it is not readable. Unfortunately, it kills the condition code 3132 // in the process 3133 // We current use TESTL [spp],EDI 3134 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3135 3136 enc_class Safepoint_Poll() %{ 3137 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3138 emit_opcode(cbuf,0x85); 3139 emit_rm (cbuf, 0x0, 0x7, 0x5); 3140 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3141 %} 3142%} 3143 3144 3145//----------FRAME-------------------------------------------------------------- 3146// Definition of frame structure and management information. 3147// 3148// S T A C K L A Y O U T Allocators stack-slot number 3149// | (to get allocators register number 3150// G Owned by | | v add OptoReg::stack0()) 3151// r CALLER | | 3152// o | +--------+ pad to even-align allocators stack-slot 3153// w V | pad0 | numbers; owned by CALLER 3154// t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3155// h ^ | in | 5 3156// | | args | 4 Holes in incoming args owned by SELF 3157// | | | | 3 3158// | | +--------+ 3159// V | | old out| Empty on Intel, window on Sparc 3160// | old |preserve| Must be even aligned. 3161// | SP-+--------+----> Matcher::_old_SP, even aligned 3162// | | in | 3 area for Intel ret address 3163// Owned by |preserve| Empty on Sparc. 3164// SELF +--------+ 3165// | | pad2 | 2 pad to align old SP 3166// | +--------+ 1 3167// | | locks | 0 3168// | +--------+----> OptoReg::stack0(), even aligned 3169// | | pad1 | 11 pad to align new SP 3170// | +--------+ 3171// | | | 10 3172// | | spills | 9 spills 3173// V | | 8 (pad0 slot for callee) 3174// -----------+--------+----> Matcher::_out_arg_limit, unaligned 3175// ^ | out | 7 3176// | | args | 6 Holes in outgoing args owned by CALLEE 3177// Owned by +--------+ 3178// CALLEE | new out| 6 Empty on Intel, window on Sparc 3179// | new |preserve| Must be even-aligned. 3180// | SP-+--------+----> Matcher::_new_SP, even aligned 3181// | | | 3182// 3183// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3184// known from SELF's arguments and the Java calling convention. 3185// Region 6-7 is determined per call site. 3186// Note 2: If the calling convention leaves holes in the incoming argument 3187// area, those holes are owned by SELF. Holes in the outgoing area 3188// are owned by the CALLEE. Holes should not be nessecary in the 3189// incoming area, as the Java calling convention is completely under 3190// the control of the AD file. Doubles can be sorted and packed to 3191// avoid holes. Holes in the outgoing arguments may be nessecary for 3192// varargs C calling conventions. 3193// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3194// even aligned with pad0 as needed. 3195// Region 6 is even aligned. Region 6-7 is NOT even aligned; 3196// region 6-11 is even aligned; it may be padded out more so that 3197// the region from SP to FP meets the minimum stack alignment. 3198 3199frame %{ 3200 // What direction does stack grow in (assumed to be same for C & Java) 3201 stack_direction(TOWARDS_LOW); 3202 3203 // These three registers define part of the calling convention 3204 // between compiled code and the interpreter. 3205 inline_cache_reg(EAX); // Inline Cache Register 3206 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3207 3208 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3209 cisc_spilling_operand_name(indOffset32); 3210 3211 // Number of stack slots consumed by locking an object 3212 sync_stack_slots(1); 3213 3214 // Compiled code's Frame Pointer 3215 frame_pointer(ESP); 3216 // Interpreter stores its frame pointer in a register which is 3217 // stored to the stack by I2CAdaptors. 3218 // I2CAdaptors convert from interpreted java to compiled java. 3219 interpreter_frame_pointer(EBP); 3220 3221 // Stack alignment requirement 3222 // Alignment size in bytes (128-bit -> 16 bytes) 3223 stack_alignment(StackAlignmentInBytes); 3224 3225 // Number of stack slots between incoming argument block and the start of 3226 // a new frame. The PROLOG must add this many slots to the stack. The 3227 // EPILOG must remove this many slots. Intel needs one slot for 3228 // return address and one for rbp, (must save rbp) 3229 in_preserve_stack_slots(2+VerifyStackAtCalls); 3230 3231 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3232 // for calls to C. Supports the var-args backing area for register parms. 3233 varargs_C_out_slots_killed(0); 3234 3235 // The after-PROLOG location of the return address. Location of 3236 // return address specifies a type (REG or STACK) and a number 3237 // representing the register number (i.e. - use a register name) or 3238 // stack slot. 3239 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3240 // Otherwise, it is above the locks and verification slot and alignment word 3241 return_addr(STACK - 1 + 3242 round_to((Compile::current()->in_preserve_stack_slots() + 3243 Compile::current()->fixed_slots()), 3244 stack_alignment_in_slots())); 3245 3246 // Body of function which returns an integer array locating 3247 // arguments either in registers or in stack slots. Passed an array 3248 // of ideal registers called "sig" and a "length" count. Stack-slot 3249 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3250 // arguments for a CALLEE. Incoming stack arguments are 3251 // automatically biased by the preserve_stack_slots field above. 3252 calling_convention %{ 3253 // No difference between ingoing/outgoing just pass false 3254 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3255 %} 3256 3257 3258 // Body of function which returns an integer array locating 3259 // arguments either in registers or in stack slots. Passed an array 3260 // of ideal registers called "sig" and a "length" count. Stack-slot 3261 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3262 // arguments for a CALLEE. Incoming stack arguments are 3263 // automatically biased by the preserve_stack_slots field above. 3264 c_calling_convention %{ 3265 // This is obviously always outgoing 3266 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3267 %} 3268 3269 // Location of C & interpreter return values 3270 c_return_value %{ 3271 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3272 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3273 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3274 3275 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3276 // that C functions return float and double results in XMM0. 3277 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3278 return OptoRegPair(XMM0b_num,XMM0_num); 3279 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3280 return OptoRegPair(OptoReg::Bad,XMM0_num); 3281 3282 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3283 %} 3284 3285 // Location of return values 3286 return_value %{ 3287 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3288 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3289 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3290 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3291 return OptoRegPair(XMM0b_num,XMM0_num); 3292 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3293 return OptoRegPair(OptoReg::Bad,XMM0_num); 3294 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3295 %} 3296 3297%} 3298 3299//----------ATTRIBUTES--------------------------------------------------------- 3300//----------Operand Attributes------------------------------------------------- 3301op_attrib op_cost(0); // Required cost attribute 3302 3303//----------Instruction Attributes--------------------------------------------- 3304ins_attrib ins_cost(100); // Required cost attribute 3305ins_attrib ins_size(8); // Required size attribute (in bits) 3306ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3307 // non-matching short branch variant of some 3308 // long branch? 3309ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3310 // specifies the alignment that some part of the instruction (not 3311 // necessarily the start) requires. If > 1, a compute_padding() 3312 // function must be provided for the instruction 3313 3314//----------OPERANDS----------------------------------------------------------- 3315// Operand definitions must precede instruction definitions for correct parsing 3316// in the ADLC because operands constitute user defined types which are used in 3317// instruction definitions. 3318 3319//----------Simple Operands---------------------------------------------------- 3320// Immediate Operands 3321// Integer Immediate 3322operand immI() %{ 3323 match(ConI); 3324 3325 op_cost(10); 3326 format %{ %} 3327 interface(CONST_INTER); 3328%} 3329 3330// Constant for test vs zero 3331operand immI0() %{ 3332 predicate(n->get_int() == 0); 3333 match(ConI); 3334 3335 op_cost(0); 3336 format %{ %} 3337 interface(CONST_INTER); 3338%} 3339 3340// Constant for increment 3341operand immI1() %{ 3342 predicate(n->get_int() == 1); 3343 match(ConI); 3344 3345 op_cost(0); 3346 format %{ %} 3347 interface(CONST_INTER); 3348%} 3349 3350// Constant for decrement 3351operand immI_M1() %{ 3352 predicate(n->get_int() == -1); 3353 match(ConI); 3354 3355 op_cost(0); 3356 format %{ %} 3357 interface(CONST_INTER); 3358%} 3359 3360// Valid scale values for addressing modes 3361operand immI2() %{ 3362 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3363 match(ConI); 3364 3365 format %{ %} 3366 interface(CONST_INTER); 3367%} 3368 3369operand immI8() %{ 3370 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3371 match(ConI); 3372 3373 op_cost(5); 3374 format %{ %} 3375 interface(CONST_INTER); 3376%} 3377 3378operand immI16() %{ 3379 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3380 match(ConI); 3381 3382 op_cost(10); 3383 format %{ %} 3384 interface(CONST_INTER); 3385%} 3386 3387// Int Immediate non-negative 3388operand immU31() 3389%{ 3390 predicate(n->get_int() >= 0); 3391 match(ConI); 3392 3393 op_cost(0); 3394 format %{ %} 3395 interface(CONST_INTER); 3396%} 3397 3398// Constant for long shifts 3399operand immI_32() %{ 3400 predicate( n->get_int() == 32 ); 3401 match(ConI); 3402 3403 op_cost(0); 3404 format %{ %} 3405 interface(CONST_INTER); 3406%} 3407 3408operand immI_1_31() %{ 3409 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3410 match(ConI); 3411 3412 op_cost(0); 3413 format %{ %} 3414 interface(CONST_INTER); 3415%} 3416 3417operand immI_32_63() %{ 3418 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3419 match(ConI); 3420 op_cost(0); 3421 3422 format %{ %} 3423 interface(CONST_INTER); 3424%} 3425 3426operand immI_1() %{ 3427 predicate( n->get_int() == 1 ); 3428 match(ConI); 3429 3430 op_cost(0); 3431 format %{ %} 3432 interface(CONST_INTER); 3433%} 3434 3435operand immI_2() %{ 3436 predicate( n->get_int() == 2 ); 3437 match(ConI); 3438 3439 op_cost(0); 3440 format %{ %} 3441 interface(CONST_INTER); 3442%} 3443 3444operand immI_3() %{ 3445 predicate( n->get_int() == 3 ); 3446 match(ConI); 3447 3448 op_cost(0); 3449 format %{ %} 3450 interface(CONST_INTER); 3451%} 3452 3453// Pointer Immediate 3454operand immP() %{ 3455 match(ConP); 3456 3457 op_cost(10); 3458 format %{ %} 3459 interface(CONST_INTER); 3460%} 3461 3462// NULL Pointer Immediate 3463operand immP0() %{ 3464 predicate( n->get_ptr() == 0 ); 3465 match(ConP); 3466 op_cost(0); 3467 3468 format %{ %} 3469 interface(CONST_INTER); 3470%} 3471 3472// Long Immediate 3473operand immL() %{ 3474 match(ConL); 3475 3476 op_cost(20); 3477 format %{ %} 3478 interface(CONST_INTER); 3479%} 3480 3481// Long Immediate zero 3482operand immL0() %{ 3483 predicate( n->get_long() == 0L ); 3484 match(ConL); 3485 op_cost(0); 3486 3487 format %{ %} 3488 interface(CONST_INTER); 3489%} 3490 3491// Long Immediate zero 3492operand immL_M1() %{ 3493 predicate( n->get_long() == -1L ); 3494 match(ConL); 3495 op_cost(0); 3496 3497 format %{ %} 3498 interface(CONST_INTER); 3499%} 3500 3501// Long immediate from 0 to 127. 3502// Used for a shorter form of long mul by 10. 3503operand immL_127() %{ 3504 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3505 match(ConL); 3506 op_cost(0); 3507 3508 format %{ %} 3509 interface(CONST_INTER); 3510%} 3511 3512// Long Immediate: low 32-bit mask 3513operand immL_32bits() %{ 3514 predicate(n->get_long() == 0xFFFFFFFFL); 3515 match(ConL); 3516 op_cost(0); 3517 3518 format %{ %} 3519 interface(CONST_INTER); 3520%} 3521 3522// Long Immediate: low 32-bit mask 3523operand immL32() %{ 3524 predicate(n->get_long() == (int)(n->get_long())); 3525 match(ConL); 3526 op_cost(20); 3527 3528 format %{ %} 3529 interface(CONST_INTER); 3530%} 3531 3532//Double Immediate zero 3533operand immDPR0() %{ 3534 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3535 // bug that generates code such that NaNs compare equal to 0.0 3536 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3537 match(ConD); 3538 3539 op_cost(5); 3540 format %{ %} 3541 interface(CONST_INTER); 3542%} 3543 3544// Double Immediate one 3545operand immDPR1() %{ 3546 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3547 match(ConD); 3548 3549 op_cost(5); 3550 format %{ %} 3551 interface(CONST_INTER); 3552%} 3553 3554// Double Immediate 3555operand immDPR() %{ 3556 predicate(UseSSE<=1); 3557 match(ConD); 3558 3559 op_cost(5); 3560 format %{ %} 3561 interface(CONST_INTER); 3562%} 3563 3564operand immD() %{ 3565 predicate(UseSSE>=2); 3566 match(ConD); 3567 3568 op_cost(5); 3569 format %{ %} 3570 interface(CONST_INTER); 3571%} 3572 3573// Double Immediate zero 3574operand immD0() %{ 3575 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3576 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3577 // compare equal to -0.0. 3578 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3579 match(ConD); 3580 3581 format %{ %} 3582 interface(CONST_INTER); 3583%} 3584 3585// Float Immediate zero 3586operand immFPR0() %{ 3587 predicate(UseSSE == 0 && n->getf() == 0.0F); 3588 match(ConF); 3589 3590 op_cost(5); 3591 format %{ %} 3592 interface(CONST_INTER); 3593%} 3594 3595// Float Immediate one 3596operand immFPR1() %{ 3597 predicate(UseSSE == 0 && n->getf() == 1.0F); 3598 match(ConF); 3599 3600 op_cost(5); 3601 format %{ %} 3602 interface(CONST_INTER); 3603%} 3604 3605// Float Immediate 3606operand immFPR() %{ 3607 predicate( UseSSE == 0 ); 3608 match(ConF); 3609 3610 op_cost(5); 3611 format %{ %} 3612 interface(CONST_INTER); 3613%} 3614 3615// Float Immediate 3616operand immF() %{ 3617 predicate(UseSSE >= 1); 3618 match(ConF); 3619 3620 op_cost(5); 3621 format %{ %} 3622 interface(CONST_INTER); 3623%} 3624 3625// Float Immediate zero. Zero and not -0.0 3626operand immF0() %{ 3627 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3628 match(ConF); 3629 3630 op_cost(5); 3631 format %{ %} 3632 interface(CONST_INTER); 3633%} 3634 3635// Immediates for special shifts (sign extend) 3636 3637// Constants for increment 3638operand immI_16() %{ 3639 predicate( n->get_int() == 16 ); 3640 match(ConI); 3641 3642 format %{ %} 3643 interface(CONST_INTER); 3644%} 3645 3646operand immI_24() %{ 3647 predicate( n->get_int() == 24 ); 3648 match(ConI); 3649 3650 format %{ %} 3651 interface(CONST_INTER); 3652%} 3653 3654// Constant for byte-wide masking 3655operand immI_255() %{ 3656 predicate( n->get_int() == 255 ); 3657 match(ConI); 3658 3659 format %{ %} 3660 interface(CONST_INTER); 3661%} 3662 3663// Constant for short-wide masking 3664operand immI_65535() %{ 3665 predicate(n->get_int() == 65535); 3666 match(ConI); 3667 3668 format %{ %} 3669 interface(CONST_INTER); 3670%} 3671 3672// Register Operands 3673// Integer Register 3674operand rRegI() %{ 3675 constraint(ALLOC_IN_RC(int_reg)); 3676 match(RegI); 3677 match(xRegI); 3678 match(eAXRegI); 3679 match(eBXRegI); 3680 match(eCXRegI); 3681 match(eDXRegI); 3682 match(eDIRegI); 3683 match(eSIRegI); 3684 3685 format %{ %} 3686 interface(REG_INTER); 3687%} 3688 3689// Subset of Integer Register 3690operand xRegI(rRegI reg) %{ 3691 constraint(ALLOC_IN_RC(int_x_reg)); 3692 match(reg); 3693 match(eAXRegI); 3694 match(eBXRegI); 3695 match(eCXRegI); 3696 match(eDXRegI); 3697 3698 format %{ %} 3699 interface(REG_INTER); 3700%} 3701 3702// Special Registers 3703operand eAXRegI(xRegI reg) %{ 3704 constraint(ALLOC_IN_RC(eax_reg)); 3705 match(reg); 3706 match(rRegI); 3707 3708 format %{ "EAX" %} 3709 interface(REG_INTER); 3710%} 3711 3712// Special Registers 3713operand eBXRegI(xRegI reg) %{ 3714 constraint(ALLOC_IN_RC(ebx_reg)); 3715 match(reg); 3716 match(rRegI); 3717 3718 format %{ "EBX" %} 3719 interface(REG_INTER); 3720%} 3721 3722operand eCXRegI(xRegI reg) %{ 3723 constraint(ALLOC_IN_RC(ecx_reg)); 3724 match(reg); 3725 match(rRegI); 3726 3727 format %{ "ECX" %} 3728 interface(REG_INTER); 3729%} 3730 3731operand eDXRegI(xRegI reg) %{ 3732 constraint(ALLOC_IN_RC(edx_reg)); 3733 match(reg); 3734 match(rRegI); 3735 3736 format %{ "EDX" %} 3737 interface(REG_INTER); 3738%} 3739 3740operand eDIRegI(xRegI reg) %{ 3741 constraint(ALLOC_IN_RC(edi_reg)); 3742 match(reg); 3743 match(rRegI); 3744 3745 format %{ "EDI" %} 3746 interface(REG_INTER); 3747%} 3748 3749operand naxRegI() %{ 3750 constraint(ALLOC_IN_RC(nax_reg)); 3751 match(RegI); 3752 match(eCXRegI); 3753 match(eDXRegI); 3754 match(eSIRegI); 3755 match(eDIRegI); 3756 3757 format %{ %} 3758 interface(REG_INTER); 3759%} 3760 3761operand nadxRegI() %{ 3762 constraint(ALLOC_IN_RC(nadx_reg)); 3763 match(RegI); 3764 match(eBXRegI); 3765 match(eCXRegI); 3766 match(eSIRegI); 3767 match(eDIRegI); 3768 3769 format %{ %} 3770 interface(REG_INTER); 3771%} 3772 3773operand ncxRegI() %{ 3774 constraint(ALLOC_IN_RC(ncx_reg)); 3775 match(RegI); 3776 match(eAXRegI); 3777 match(eDXRegI); 3778 match(eSIRegI); 3779 match(eDIRegI); 3780 3781 format %{ %} 3782 interface(REG_INTER); 3783%} 3784 3785// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3786// // 3787operand eSIRegI(xRegI reg) %{ 3788 constraint(ALLOC_IN_RC(esi_reg)); 3789 match(reg); 3790 match(rRegI); 3791 3792 format %{ "ESI" %} 3793 interface(REG_INTER); 3794%} 3795 3796// Pointer Register 3797operand anyRegP() %{ 3798 constraint(ALLOC_IN_RC(any_reg)); 3799 match(RegP); 3800 match(eAXRegP); 3801 match(eBXRegP); 3802 match(eCXRegP); 3803 match(eDIRegP); 3804 match(eRegP); 3805 3806 format %{ %} 3807 interface(REG_INTER); 3808%} 3809 3810operand eRegP() %{ 3811 constraint(ALLOC_IN_RC(int_reg)); 3812 match(RegP); 3813 match(eAXRegP); 3814 match(eBXRegP); 3815 match(eCXRegP); 3816 match(eDIRegP); 3817 3818 format %{ %} 3819 interface(REG_INTER); 3820%} 3821 3822// On windows95, EBP is not safe to use for implicit null tests. 3823operand eRegP_no_EBP() %{ 3824 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3825 match(RegP); 3826 match(eAXRegP); 3827 match(eBXRegP); 3828 match(eCXRegP); 3829 match(eDIRegP); 3830 3831 op_cost(100); 3832 format %{ %} 3833 interface(REG_INTER); 3834%} 3835 3836operand naxRegP() %{ 3837 constraint(ALLOC_IN_RC(nax_reg)); 3838 match(RegP); 3839 match(eBXRegP); 3840 match(eDXRegP); 3841 match(eCXRegP); 3842 match(eSIRegP); 3843 match(eDIRegP); 3844 3845 format %{ %} 3846 interface(REG_INTER); 3847%} 3848 3849operand nabxRegP() %{ 3850 constraint(ALLOC_IN_RC(nabx_reg)); 3851 match(RegP); 3852 match(eCXRegP); 3853 match(eDXRegP); 3854 match(eSIRegP); 3855 match(eDIRegP); 3856 3857 format %{ %} 3858 interface(REG_INTER); 3859%} 3860 3861operand pRegP() %{ 3862 constraint(ALLOC_IN_RC(p_reg)); 3863 match(RegP); 3864 match(eBXRegP); 3865 match(eDXRegP); 3866 match(eSIRegP); 3867 match(eDIRegP); 3868 3869 format %{ %} 3870 interface(REG_INTER); 3871%} 3872 3873// Special Registers 3874// Return a pointer value 3875operand eAXRegP(eRegP reg) %{ 3876 constraint(ALLOC_IN_RC(eax_reg)); 3877 match(reg); 3878 format %{ "EAX" %} 3879 interface(REG_INTER); 3880%} 3881 3882// Used in AtomicAdd 3883operand eBXRegP(eRegP reg) %{ 3884 constraint(ALLOC_IN_RC(ebx_reg)); 3885 match(reg); 3886 format %{ "EBX" %} 3887 interface(REG_INTER); 3888%} 3889 3890// Tail-call (interprocedural jump) to interpreter 3891operand eCXRegP(eRegP reg) %{ 3892 constraint(ALLOC_IN_RC(ecx_reg)); 3893 match(reg); 3894 format %{ "ECX" %} 3895 interface(REG_INTER); 3896%} 3897 3898operand eSIRegP(eRegP reg) %{ 3899 constraint(ALLOC_IN_RC(esi_reg)); 3900 match(reg); 3901 format %{ "ESI" %} 3902 interface(REG_INTER); 3903%} 3904 3905// Used in rep stosw 3906operand eDIRegP(eRegP reg) %{ 3907 constraint(ALLOC_IN_RC(edi_reg)); 3908 match(reg); 3909 format %{ "EDI" %} 3910 interface(REG_INTER); 3911%} 3912 3913operand eRegL() %{ 3914 constraint(ALLOC_IN_RC(long_reg)); 3915 match(RegL); 3916 match(eADXRegL); 3917 3918 format %{ %} 3919 interface(REG_INTER); 3920%} 3921 3922operand eADXRegL( eRegL reg ) %{ 3923 constraint(ALLOC_IN_RC(eadx_reg)); 3924 match(reg); 3925 3926 format %{ "EDX:EAX" %} 3927 interface(REG_INTER); 3928%} 3929 3930operand eBCXRegL( eRegL reg ) %{ 3931 constraint(ALLOC_IN_RC(ebcx_reg)); 3932 match(reg); 3933 3934 format %{ "EBX:ECX" %} 3935 interface(REG_INTER); 3936%} 3937 3938// Special case for integer high multiply 3939operand eADXRegL_low_only() %{ 3940 constraint(ALLOC_IN_RC(eadx_reg)); 3941 match(RegL); 3942 3943 format %{ "EAX" %} 3944 interface(REG_INTER); 3945%} 3946 3947// Flags register, used as output of compare instructions 3948operand eFlagsReg() %{ 3949 constraint(ALLOC_IN_RC(int_flags)); 3950 match(RegFlags); 3951 3952 format %{ "EFLAGS" %} 3953 interface(REG_INTER); 3954%} 3955 3956// Flags register, used as output of FLOATING POINT compare instructions 3957operand eFlagsRegU() %{ 3958 constraint(ALLOC_IN_RC(int_flags)); 3959 match(RegFlags); 3960 3961 format %{ "EFLAGS_U" %} 3962 interface(REG_INTER); 3963%} 3964 3965operand eFlagsRegUCF() %{ 3966 constraint(ALLOC_IN_RC(int_flags)); 3967 match(RegFlags); 3968 predicate(false); 3969 3970 format %{ "EFLAGS_U_CF" %} 3971 interface(REG_INTER); 3972%} 3973 3974// Condition Code Register used by long compare 3975operand flagsReg_long_LTGE() %{ 3976 constraint(ALLOC_IN_RC(int_flags)); 3977 match(RegFlags); 3978 format %{ "FLAGS_LTGE" %} 3979 interface(REG_INTER); 3980%} 3981operand flagsReg_long_EQNE() %{ 3982 constraint(ALLOC_IN_RC(int_flags)); 3983 match(RegFlags); 3984 format %{ "FLAGS_EQNE" %} 3985 interface(REG_INTER); 3986%} 3987operand flagsReg_long_LEGT() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 format %{ "FLAGS_LEGT" %} 3991 interface(REG_INTER); 3992%} 3993 3994// Float register operands 3995operand regDPR() %{ 3996 predicate( UseSSE < 2 ); 3997 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3998 match(RegD); 3999 match(regDPR1); 4000 match(regDPR2); 4001 format %{ %} 4002 interface(REG_INTER); 4003%} 4004 4005operand regDPR1(regDPR reg) %{ 4006 predicate( UseSSE < 2 ); 4007 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4008 match(reg); 4009 format %{ "FPR1" %} 4010 interface(REG_INTER); 4011%} 4012 4013operand regDPR2(regDPR reg) %{ 4014 predicate( UseSSE < 2 ); 4015 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4016 match(reg); 4017 format %{ "FPR2" %} 4018 interface(REG_INTER); 4019%} 4020 4021operand regnotDPR1(regDPR reg) %{ 4022 predicate( UseSSE < 2 ); 4023 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4024 match(reg); 4025 format %{ %} 4026 interface(REG_INTER); 4027%} 4028 4029// Float register operands 4030operand regFPR() %{ 4031 predicate( UseSSE < 2 ); 4032 constraint(ALLOC_IN_RC(fp_flt_reg)); 4033 match(RegF); 4034 match(regFPR1); 4035 format %{ %} 4036 interface(REG_INTER); 4037%} 4038 4039// Float register operands 4040operand regFPR1(regFPR reg) %{ 4041 predicate( UseSSE < 2 ); 4042 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4043 match(reg); 4044 format %{ "FPR1" %} 4045 interface(REG_INTER); 4046%} 4047 4048// XMM Float register operands 4049operand regF() %{ 4050 predicate( UseSSE>=1 ); 4051 constraint(ALLOC_IN_RC(float_reg_legacy)); 4052 match(RegF); 4053 format %{ %} 4054 interface(REG_INTER); 4055%} 4056 4057// XMM Double register operands 4058operand regD() %{ 4059 predicate( UseSSE>=2 ); 4060 constraint(ALLOC_IN_RC(double_reg_legacy)); 4061 match(RegD); 4062 format %{ %} 4063 interface(REG_INTER); 4064%} 4065 4066// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4067// runtime code generation via reg_class_dynamic. 4068operand vecS() %{ 4069 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4070 match(VecS); 4071 4072 format %{ %} 4073 interface(REG_INTER); 4074%} 4075 4076operand vecD() %{ 4077 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4078 match(VecD); 4079 4080 format %{ %} 4081 interface(REG_INTER); 4082%} 4083 4084operand vecX() %{ 4085 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4086 match(VecX); 4087 4088 format %{ %} 4089 interface(REG_INTER); 4090%} 4091 4092operand vecY() %{ 4093 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4094 match(VecY); 4095 4096 format %{ %} 4097 interface(REG_INTER); 4098%} 4099 4100//----------Memory Operands---------------------------------------------------- 4101// Direct Memory Operand 4102operand direct(immP addr) %{ 4103 match(addr); 4104 4105 format %{ "[$addr]" %} 4106 interface(MEMORY_INTER) %{ 4107 base(0xFFFFFFFF); 4108 index(0x4); 4109 scale(0x0); 4110 disp($addr); 4111 %} 4112%} 4113 4114// Indirect Memory Operand 4115operand indirect(eRegP reg) %{ 4116 constraint(ALLOC_IN_RC(int_reg)); 4117 match(reg); 4118 4119 format %{ "[$reg]" %} 4120 interface(MEMORY_INTER) %{ 4121 base($reg); 4122 index(0x4); 4123 scale(0x0); 4124 disp(0x0); 4125 %} 4126%} 4127 4128// Indirect Memory Plus Short Offset Operand 4129operand indOffset8(eRegP reg, immI8 off) %{ 4130 match(AddP reg off); 4131 4132 format %{ "[$reg + $off]" %} 4133 interface(MEMORY_INTER) %{ 4134 base($reg); 4135 index(0x4); 4136 scale(0x0); 4137 disp($off); 4138 %} 4139%} 4140 4141// Indirect Memory Plus Long Offset Operand 4142operand indOffset32(eRegP reg, immI off) %{ 4143 match(AddP reg off); 4144 4145 format %{ "[$reg + $off]" %} 4146 interface(MEMORY_INTER) %{ 4147 base($reg); 4148 index(0x4); 4149 scale(0x0); 4150 disp($off); 4151 %} 4152%} 4153 4154// Indirect Memory Plus Long Offset Operand 4155operand indOffset32X(rRegI reg, immP off) %{ 4156 match(AddP off reg); 4157 4158 format %{ "[$reg + $off]" %} 4159 interface(MEMORY_INTER) %{ 4160 base($reg); 4161 index(0x4); 4162 scale(0x0); 4163 disp($off); 4164 %} 4165%} 4166 4167// Indirect Memory Plus Index Register Plus Offset Operand 4168operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4169 match(AddP (AddP reg ireg) off); 4170 4171 op_cost(10); 4172 format %{"[$reg + $off + $ireg]" %} 4173 interface(MEMORY_INTER) %{ 4174 base($reg); 4175 index($ireg); 4176 scale(0x0); 4177 disp($off); 4178 %} 4179%} 4180 4181// Indirect Memory Plus Index Register Plus Offset Operand 4182operand indIndex(eRegP reg, rRegI ireg) %{ 4183 match(AddP reg ireg); 4184 4185 op_cost(10); 4186 format %{"[$reg + $ireg]" %} 4187 interface(MEMORY_INTER) %{ 4188 base($reg); 4189 index($ireg); 4190 scale(0x0); 4191 disp(0x0); 4192 %} 4193%} 4194 4195// // ------------------------------------------------------------------------- 4196// // 486 architecture doesn't support "scale * index + offset" with out a base 4197// // ------------------------------------------------------------------------- 4198// // Scaled Memory Operands 4199// // Indirect Memory Times Scale Plus Offset Operand 4200// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4201// match(AddP off (LShiftI ireg scale)); 4202// 4203// op_cost(10); 4204// format %{"[$off + $ireg << $scale]" %} 4205// interface(MEMORY_INTER) %{ 4206// base(0x4); 4207// index($ireg); 4208// scale($scale); 4209// disp($off); 4210// %} 4211// %} 4212 4213// Indirect Memory Times Scale Plus Index Register 4214operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4215 match(AddP reg (LShiftI ireg scale)); 4216 4217 op_cost(10); 4218 format %{"[$reg + $ireg << $scale]" %} 4219 interface(MEMORY_INTER) %{ 4220 base($reg); 4221 index($ireg); 4222 scale($scale); 4223 disp(0x0); 4224 %} 4225%} 4226 4227// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4228operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4229 match(AddP (AddP reg (LShiftI ireg scale)) off); 4230 4231 op_cost(10); 4232 format %{"[$reg + $off + $ireg << $scale]" %} 4233 interface(MEMORY_INTER) %{ 4234 base($reg); 4235 index($ireg); 4236 scale($scale); 4237 disp($off); 4238 %} 4239%} 4240 4241//----------Load Long Memory Operands------------------------------------------ 4242// The load-long idiom will use it's address expression again after loading 4243// the first word of the long. If the load-long destination overlaps with 4244// registers used in the addressing expression, the 2nd half will be loaded 4245// from a clobbered address. Fix this by requiring that load-long use 4246// address registers that do not overlap with the load-long target. 4247 4248// load-long support 4249operand load_long_RegP() %{ 4250 constraint(ALLOC_IN_RC(esi_reg)); 4251 match(RegP); 4252 match(eSIRegP); 4253 op_cost(100); 4254 format %{ %} 4255 interface(REG_INTER); 4256%} 4257 4258// Indirect Memory Operand Long 4259operand load_long_indirect(load_long_RegP reg) %{ 4260 constraint(ALLOC_IN_RC(esi_reg)); 4261 match(reg); 4262 4263 format %{ "[$reg]" %} 4264 interface(MEMORY_INTER) %{ 4265 base($reg); 4266 index(0x4); 4267 scale(0x0); 4268 disp(0x0); 4269 %} 4270%} 4271 4272// Indirect Memory Plus Long Offset Operand 4273operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4274 match(AddP reg off); 4275 4276 format %{ "[$reg + $off]" %} 4277 interface(MEMORY_INTER) %{ 4278 base($reg); 4279 index(0x4); 4280 scale(0x0); 4281 disp($off); 4282 %} 4283%} 4284 4285opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4286 4287 4288//----------Special Memory Operands-------------------------------------------- 4289// Stack Slot Operand - This operand is used for loading and storing temporary 4290// values on the stack where a match requires a value to 4291// flow through memory. 4292operand stackSlotP(sRegP reg) %{ 4293 constraint(ALLOC_IN_RC(stack_slots)); 4294 // No match rule because this operand is only generated in matching 4295 format %{ "[$reg]" %} 4296 interface(MEMORY_INTER) %{ 4297 base(0x4); // ESP 4298 index(0x4); // No Index 4299 scale(0x0); // No Scale 4300 disp($reg); // Stack Offset 4301 %} 4302%} 4303 4304operand stackSlotI(sRegI reg) %{ 4305 constraint(ALLOC_IN_RC(stack_slots)); 4306 // No match rule because this operand is only generated in matching 4307 format %{ "[$reg]" %} 4308 interface(MEMORY_INTER) %{ 4309 base(0x4); // ESP 4310 index(0x4); // No Index 4311 scale(0x0); // No Scale 4312 disp($reg); // Stack Offset 4313 %} 4314%} 4315 4316operand stackSlotF(sRegF reg) %{ 4317 constraint(ALLOC_IN_RC(stack_slots)); 4318 // No match rule because this operand is only generated in matching 4319 format %{ "[$reg]" %} 4320 interface(MEMORY_INTER) %{ 4321 base(0x4); // ESP 4322 index(0x4); // No Index 4323 scale(0x0); // No Scale 4324 disp($reg); // Stack Offset 4325 %} 4326%} 4327 4328operand stackSlotD(sRegD reg) %{ 4329 constraint(ALLOC_IN_RC(stack_slots)); 4330 // No match rule because this operand is only generated in matching 4331 format %{ "[$reg]" %} 4332 interface(MEMORY_INTER) %{ 4333 base(0x4); // ESP 4334 index(0x4); // No Index 4335 scale(0x0); // No Scale 4336 disp($reg); // Stack Offset 4337 %} 4338%} 4339 4340operand stackSlotL(sRegL reg) %{ 4341 constraint(ALLOC_IN_RC(stack_slots)); 4342 // No match rule because this operand is only generated in matching 4343 format %{ "[$reg]" %} 4344 interface(MEMORY_INTER) %{ 4345 base(0x4); // ESP 4346 index(0x4); // No Index 4347 scale(0x0); // No Scale 4348 disp($reg); // Stack Offset 4349 %} 4350%} 4351 4352//----------Memory Operands - Win95 Implicit Null Variants---------------- 4353// Indirect Memory Operand 4354operand indirect_win95_safe(eRegP_no_EBP reg) 4355%{ 4356 constraint(ALLOC_IN_RC(int_reg)); 4357 match(reg); 4358 4359 op_cost(100); 4360 format %{ "[$reg]" %} 4361 interface(MEMORY_INTER) %{ 4362 base($reg); 4363 index(0x4); 4364 scale(0x0); 4365 disp(0x0); 4366 %} 4367%} 4368 4369// Indirect Memory Plus Short Offset Operand 4370operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4371%{ 4372 match(AddP reg off); 4373 4374 op_cost(100); 4375 format %{ "[$reg + $off]" %} 4376 interface(MEMORY_INTER) %{ 4377 base($reg); 4378 index(0x4); 4379 scale(0x0); 4380 disp($off); 4381 %} 4382%} 4383 4384// Indirect Memory Plus Long Offset Operand 4385operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4386%{ 4387 match(AddP reg off); 4388 4389 op_cost(100); 4390 format %{ "[$reg + $off]" %} 4391 interface(MEMORY_INTER) %{ 4392 base($reg); 4393 index(0x4); 4394 scale(0x0); 4395 disp($off); 4396 %} 4397%} 4398 4399// Indirect Memory Plus Index Register Plus Offset Operand 4400operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4401%{ 4402 match(AddP (AddP reg ireg) off); 4403 4404 op_cost(100); 4405 format %{"[$reg + $off + $ireg]" %} 4406 interface(MEMORY_INTER) %{ 4407 base($reg); 4408 index($ireg); 4409 scale(0x0); 4410 disp($off); 4411 %} 4412%} 4413 4414// Indirect Memory Times Scale Plus Index Register 4415operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4416%{ 4417 match(AddP reg (LShiftI ireg scale)); 4418 4419 op_cost(100); 4420 format %{"[$reg + $ireg << $scale]" %} 4421 interface(MEMORY_INTER) %{ 4422 base($reg); 4423 index($ireg); 4424 scale($scale); 4425 disp(0x0); 4426 %} 4427%} 4428 4429// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4430operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4431%{ 4432 match(AddP (AddP reg (LShiftI ireg scale)) off); 4433 4434 op_cost(100); 4435 format %{"[$reg + $off + $ireg << $scale]" %} 4436 interface(MEMORY_INTER) %{ 4437 base($reg); 4438 index($ireg); 4439 scale($scale); 4440 disp($off); 4441 %} 4442%} 4443 4444//----------Conditional Branch Operands---------------------------------------- 4445// Comparison Op - This is the operation of the comparison, and is limited to 4446// the following set of codes: 4447// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4448// 4449// Other attributes of the comparison, such as unsignedness, are specified 4450// by the comparison instruction that sets a condition code flags register. 4451// That result is represented by a flags operand whose subtype is appropriate 4452// to the unsignedness (etc.) of the comparison. 4453// 4454// Later, the instruction which matches both the Comparison Op (a Bool) and 4455// the flags (produced by the Cmp) specifies the coding of the comparison op 4456// by matching a specific subtype of Bool operand below, such as cmpOpU. 4457 4458// Comparision Code 4459operand cmpOp() %{ 4460 match(Bool); 4461 4462 format %{ "" %} 4463 interface(COND_INTER) %{ 4464 equal(0x4, "e"); 4465 not_equal(0x5, "ne"); 4466 less(0xC, "l"); 4467 greater_equal(0xD, "ge"); 4468 less_equal(0xE, "le"); 4469 greater(0xF, "g"); 4470 overflow(0x0, "o"); 4471 no_overflow(0x1, "no"); 4472 %} 4473%} 4474 4475// Comparison Code, unsigned compare. Used by FP also, with 4476// C2 (unordered) turned into GT or LT already. The other bits 4477// C0 and C3 are turned into Carry & Zero flags. 4478operand cmpOpU() %{ 4479 match(Bool); 4480 4481 format %{ "" %} 4482 interface(COND_INTER) %{ 4483 equal(0x4, "e"); 4484 not_equal(0x5, "ne"); 4485 less(0x2, "b"); 4486 greater_equal(0x3, "nb"); 4487 less_equal(0x6, "be"); 4488 greater(0x7, "nbe"); 4489 overflow(0x0, "o"); 4490 no_overflow(0x1, "no"); 4491 %} 4492%} 4493 4494// Floating comparisons that don't require any fixup for the unordered case 4495operand cmpOpUCF() %{ 4496 match(Bool); 4497 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4498 n->as_Bool()->_test._test == BoolTest::ge || 4499 n->as_Bool()->_test._test == BoolTest::le || 4500 n->as_Bool()->_test._test == BoolTest::gt); 4501 format %{ "" %} 4502 interface(COND_INTER) %{ 4503 equal(0x4, "e"); 4504 not_equal(0x5, "ne"); 4505 less(0x2, "b"); 4506 greater_equal(0x3, "nb"); 4507 less_equal(0x6, "be"); 4508 greater(0x7, "nbe"); 4509 overflow(0x0, "o"); 4510 no_overflow(0x1, "no"); 4511 %} 4512%} 4513 4514 4515// Floating comparisons that can be fixed up with extra conditional jumps 4516operand cmpOpUCF2() %{ 4517 match(Bool); 4518 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4519 n->as_Bool()->_test._test == BoolTest::eq); 4520 format %{ "" %} 4521 interface(COND_INTER) %{ 4522 equal(0x4, "e"); 4523 not_equal(0x5, "ne"); 4524 less(0x2, "b"); 4525 greater_equal(0x3, "nb"); 4526 less_equal(0x6, "be"); 4527 greater(0x7, "nbe"); 4528 overflow(0x0, "o"); 4529 no_overflow(0x1, "no"); 4530 %} 4531%} 4532 4533// Comparison Code for FP conditional move 4534operand cmpOp_fcmov() %{ 4535 match(Bool); 4536 4537 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4538 n->as_Bool()->_test._test != BoolTest::no_overflow); 4539 format %{ "" %} 4540 interface(COND_INTER) %{ 4541 equal (0x0C8); 4542 not_equal (0x1C8); 4543 less (0x0C0); 4544 greater_equal(0x1C0); 4545 less_equal (0x0D0); 4546 greater (0x1D0); 4547 overflow(0x0, "o"); // not really supported by the instruction 4548 no_overflow(0x1, "no"); // not really supported by the instruction 4549 %} 4550%} 4551 4552// Comparision Code used in long compares 4553operand cmpOp_commute() %{ 4554 match(Bool); 4555 4556 format %{ "" %} 4557 interface(COND_INTER) %{ 4558 equal(0x4, "e"); 4559 not_equal(0x5, "ne"); 4560 less(0xF, "g"); 4561 greater_equal(0xE, "le"); 4562 less_equal(0xD, "ge"); 4563 greater(0xC, "l"); 4564 overflow(0x0, "o"); 4565 no_overflow(0x1, "no"); 4566 %} 4567%} 4568 4569//----------OPERAND CLASSES---------------------------------------------------- 4570// Operand Classes are groups of operands that are used as to simplify 4571// instruction definitions by not requiring the AD writer to specify separate 4572// instructions for every form of operand when the instruction accepts 4573// multiple operand types with the same basic encoding and format. The classic 4574// case of this is memory operands. 4575 4576opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4577 indIndex, indIndexScale, indIndexScaleOffset); 4578 4579// Long memory operations are encoded in 2 instructions and a +4 offset. 4580// This means some kind of offset is always required and you cannot use 4581// an oop as the offset (done when working on static globals). 4582opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4583 indIndex, indIndexScale, indIndexScaleOffset); 4584 4585 4586//----------PIPELINE----------------------------------------------------------- 4587// Rules which define the behavior of the target architectures pipeline. 4588pipeline %{ 4589 4590//----------ATTRIBUTES--------------------------------------------------------- 4591attributes %{ 4592 variable_size_instructions; // Fixed size instructions 4593 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4594 instruction_unit_size = 1; // An instruction is 1 bytes long 4595 instruction_fetch_unit_size = 16; // The processor fetches one line 4596 instruction_fetch_units = 1; // of 16 bytes 4597 4598 // List of nop instructions 4599 nops( MachNop ); 4600%} 4601 4602//----------RESOURCES---------------------------------------------------------- 4603// Resources are the functional units available to the machine 4604 4605// Generic P2/P3 pipeline 4606// 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4607// 3 instructions decoded per cycle. 4608// 2 load/store ops per cycle, 1 branch, 1 FPU, 4609// 2 ALU op, only ALU0 handles mul/div instructions. 4610resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4611 MS0, MS1, MEM = MS0 | MS1, 4612 BR, FPU, 4613 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4614 4615//----------PIPELINE DESCRIPTION----------------------------------------------- 4616// Pipeline Description specifies the stages in the machine's pipeline 4617 4618// Generic P2/P3 pipeline 4619pipe_desc(S0, S1, S2, S3, S4, S5); 4620 4621//----------PIPELINE CLASSES--------------------------------------------------- 4622// Pipeline Classes describe the stages in which input and output are 4623// referenced by the hardware pipeline. 4624 4625// Naming convention: ialu or fpu 4626// Then: _reg 4627// Then: _reg if there is a 2nd register 4628// Then: _long if it's a pair of instructions implementing a long 4629// Then: _fat if it requires the big decoder 4630// Or: _mem if it requires the big decoder and a memory unit. 4631 4632// Integer ALU reg operation 4633pipe_class ialu_reg(rRegI dst) %{ 4634 single_instruction; 4635 dst : S4(write); 4636 dst : S3(read); 4637 DECODE : S0; // any decoder 4638 ALU : S3; // any alu 4639%} 4640 4641// Long ALU reg operation 4642pipe_class ialu_reg_long(eRegL dst) %{ 4643 instruction_count(2); 4644 dst : S4(write); 4645 dst : S3(read); 4646 DECODE : S0(2); // any 2 decoders 4647 ALU : S3(2); // both alus 4648%} 4649 4650// Integer ALU reg operation using big decoder 4651pipe_class ialu_reg_fat(rRegI dst) %{ 4652 single_instruction; 4653 dst : S4(write); 4654 dst : S3(read); 4655 D0 : S0; // big decoder only 4656 ALU : S3; // any alu 4657%} 4658 4659// Long ALU reg operation using big decoder 4660pipe_class ialu_reg_long_fat(eRegL dst) %{ 4661 instruction_count(2); 4662 dst : S4(write); 4663 dst : S3(read); 4664 D0 : S0(2); // big decoder only; twice 4665 ALU : S3(2); // any 2 alus 4666%} 4667 4668// Integer ALU reg-reg operation 4669pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4670 single_instruction; 4671 dst : S4(write); 4672 src : S3(read); 4673 DECODE : S0; // any decoder 4674 ALU : S3; // any alu 4675%} 4676 4677// Long ALU reg-reg operation 4678pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4679 instruction_count(2); 4680 dst : S4(write); 4681 src : S3(read); 4682 DECODE : S0(2); // any 2 decoders 4683 ALU : S3(2); // both alus 4684%} 4685 4686// Integer ALU reg-reg operation 4687pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4688 single_instruction; 4689 dst : S4(write); 4690 src : S3(read); 4691 D0 : S0; // big decoder only 4692 ALU : S3; // any alu 4693%} 4694 4695// Long ALU reg-reg operation 4696pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4697 instruction_count(2); 4698 dst : S4(write); 4699 src : S3(read); 4700 D0 : S0(2); // big decoder only; twice 4701 ALU : S3(2); // both alus 4702%} 4703 4704// Integer ALU reg-mem operation 4705pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4706 single_instruction; 4707 dst : S5(write); 4708 mem : S3(read); 4709 D0 : S0; // big decoder only 4710 ALU : S4; // any alu 4711 MEM : S3; // any mem 4712%} 4713 4714// Long ALU reg-mem operation 4715pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4716 instruction_count(2); 4717 dst : S5(write); 4718 mem : S3(read); 4719 D0 : S0(2); // big decoder only; twice 4720 ALU : S4(2); // any 2 alus 4721 MEM : S3(2); // both mems 4722%} 4723 4724// Integer mem operation (prefetch) 4725pipe_class ialu_mem(memory mem) 4726%{ 4727 single_instruction; 4728 mem : S3(read); 4729 D0 : S0; // big decoder only 4730 MEM : S3; // any mem 4731%} 4732 4733// Integer Store to Memory 4734pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4735 single_instruction; 4736 mem : S3(read); 4737 src : S5(read); 4738 D0 : S0; // big decoder only 4739 ALU : S4; // any alu 4740 MEM : S3; 4741%} 4742 4743// Long Store to Memory 4744pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4745 instruction_count(2); 4746 mem : S3(read); 4747 src : S5(read); 4748 D0 : S0(2); // big decoder only; twice 4749 ALU : S4(2); // any 2 alus 4750 MEM : S3(2); // Both mems 4751%} 4752 4753// Integer Store to Memory 4754pipe_class ialu_mem_imm(memory mem) %{ 4755 single_instruction; 4756 mem : S3(read); 4757 D0 : S0; // big decoder only 4758 ALU : S4; // any alu 4759 MEM : S3; 4760%} 4761 4762// Integer ALU0 reg-reg operation 4763pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4764 single_instruction; 4765 dst : S4(write); 4766 src : S3(read); 4767 D0 : S0; // Big decoder only 4768 ALU0 : S3; // only alu0 4769%} 4770 4771// Integer ALU0 reg-mem operation 4772pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4773 single_instruction; 4774 dst : S5(write); 4775 mem : S3(read); 4776 D0 : S0; // big decoder only 4777 ALU0 : S4; // ALU0 only 4778 MEM : S3; // any mem 4779%} 4780 4781// Integer ALU reg-reg operation 4782pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4783 single_instruction; 4784 cr : S4(write); 4785 src1 : S3(read); 4786 src2 : S3(read); 4787 DECODE : S0; // any decoder 4788 ALU : S3; // any alu 4789%} 4790 4791// Integer ALU reg-imm operation 4792pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4793 single_instruction; 4794 cr : S4(write); 4795 src1 : S3(read); 4796 DECODE : S0; // any decoder 4797 ALU : S3; // any alu 4798%} 4799 4800// Integer ALU reg-mem operation 4801pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4802 single_instruction; 4803 cr : S4(write); 4804 src1 : S3(read); 4805 src2 : S3(read); 4806 D0 : S0; // big decoder only 4807 ALU : S4; // any alu 4808 MEM : S3; 4809%} 4810 4811// Conditional move reg-reg 4812pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4813 instruction_count(4); 4814 y : S4(read); 4815 q : S3(read); 4816 p : S3(read); 4817 DECODE : S0(4); // any decoder 4818%} 4819 4820// Conditional move reg-reg 4821pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4822 single_instruction; 4823 dst : S4(write); 4824 src : S3(read); 4825 cr : S3(read); 4826 DECODE : S0; // any decoder 4827%} 4828 4829// Conditional move reg-mem 4830pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4831 single_instruction; 4832 dst : S4(write); 4833 src : S3(read); 4834 cr : S3(read); 4835 DECODE : S0; // any decoder 4836 MEM : S3; 4837%} 4838 4839// Conditional move reg-reg long 4840pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4841 single_instruction; 4842 dst : S4(write); 4843 src : S3(read); 4844 cr : S3(read); 4845 DECODE : S0(2); // any 2 decoders 4846%} 4847 4848// Conditional move double reg-reg 4849pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4850 single_instruction; 4851 dst : S4(write); 4852 src : S3(read); 4853 cr : S3(read); 4854 DECODE : S0; // any decoder 4855%} 4856 4857// Float reg-reg operation 4858pipe_class fpu_reg(regDPR dst) %{ 4859 instruction_count(2); 4860 dst : S3(read); 4861 DECODE : S0(2); // any 2 decoders 4862 FPU : S3; 4863%} 4864 4865// Float reg-reg operation 4866pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4867 instruction_count(2); 4868 dst : S4(write); 4869 src : S3(read); 4870 DECODE : S0(2); // any 2 decoders 4871 FPU : S3; 4872%} 4873 4874// Float reg-reg operation 4875pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4876 instruction_count(3); 4877 dst : S4(write); 4878 src1 : S3(read); 4879 src2 : S3(read); 4880 DECODE : S0(3); // any 3 decoders 4881 FPU : S3(2); 4882%} 4883 4884// Float reg-reg operation 4885pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4886 instruction_count(4); 4887 dst : S4(write); 4888 src1 : S3(read); 4889 src2 : S3(read); 4890 src3 : S3(read); 4891 DECODE : S0(4); // any 3 decoders 4892 FPU : S3(2); 4893%} 4894 4895// Float reg-reg operation 4896pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4897 instruction_count(4); 4898 dst : S4(write); 4899 src1 : S3(read); 4900 src2 : S3(read); 4901 src3 : S3(read); 4902 DECODE : S1(3); // any 3 decoders 4903 D0 : S0; // Big decoder only 4904 FPU : S3(2); 4905 MEM : S3; 4906%} 4907 4908// Float reg-mem operation 4909pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4910 instruction_count(2); 4911 dst : S5(write); 4912 mem : S3(read); 4913 D0 : S0; // big decoder only 4914 DECODE : S1; // any decoder for FPU POP 4915 FPU : S4; 4916 MEM : S3; // any mem 4917%} 4918 4919// Float reg-mem operation 4920pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4921 instruction_count(3); 4922 dst : S5(write); 4923 src1 : S3(read); 4924 mem : S3(read); 4925 D0 : S0; // big decoder only 4926 DECODE : S1(2); // any decoder for FPU POP 4927 FPU : S4; 4928 MEM : S3; // any mem 4929%} 4930 4931// Float mem-reg operation 4932pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4933 instruction_count(2); 4934 src : S5(read); 4935 mem : S3(read); 4936 DECODE : S0; // any decoder for FPU PUSH 4937 D0 : S1; // big decoder only 4938 FPU : S4; 4939 MEM : S3; // any mem 4940%} 4941 4942pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4943 instruction_count(3); 4944 src1 : S3(read); 4945 src2 : S3(read); 4946 mem : S3(read); 4947 DECODE : S0(2); // any decoder for FPU PUSH 4948 D0 : S1; // big decoder only 4949 FPU : S4; 4950 MEM : S3; // any mem 4951%} 4952 4953pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4954 instruction_count(3); 4955 src1 : S3(read); 4956 src2 : S3(read); 4957 mem : S4(read); 4958 DECODE : S0; // any decoder for FPU PUSH 4959 D0 : S0(2); // big decoder only 4960 FPU : S4; 4961 MEM : S3(2); // any mem 4962%} 4963 4964pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4965 instruction_count(2); 4966 src1 : S3(read); 4967 dst : S4(read); 4968 D0 : S0(2); // big decoder only 4969 MEM : S3(2); // any mem 4970%} 4971 4972pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4973 instruction_count(3); 4974 src1 : S3(read); 4975 src2 : S3(read); 4976 dst : S4(read); 4977 D0 : S0(3); // big decoder only 4978 FPU : S4; 4979 MEM : S3(3); // any mem 4980%} 4981 4982pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4983 instruction_count(3); 4984 src1 : S4(read); 4985 mem : S4(read); 4986 DECODE : S0; // any decoder for FPU PUSH 4987 D0 : S0(2); // big decoder only 4988 FPU : S4; 4989 MEM : S3(2); // any mem 4990%} 4991 4992// Float load constant 4993pipe_class fpu_reg_con(regDPR dst) %{ 4994 instruction_count(2); 4995 dst : S5(write); 4996 D0 : S0; // big decoder only for the load 4997 DECODE : S1; // any decoder for FPU POP 4998 FPU : S4; 4999 MEM : S3; // any mem 5000%} 5001 5002// Float load constant 5003pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5004 instruction_count(3); 5005 dst : S5(write); 5006 src : S3(read); 5007 D0 : S0; // big decoder only for the load 5008 DECODE : S1(2); // any decoder for FPU POP 5009 FPU : S4; 5010 MEM : S3; // any mem 5011%} 5012 5013// UnConditional branch 5014pipe_class pipe_jmp( label labl ) %{ 5015 single_instruction; 5016 BR : S3; 5017%} 5018 5019// Conditional branch 5020pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5021 single_instruction; 5022 cr : S1(read); 5023 BR : S3; 5024%} 5025 5026// Allocation idiom 5027pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5028 instruction_count(1); force_serialization; 5029 fixed_latency(6); 5030 heap_ptr : S3(read); 5031 DECODE : S0(3); 5032 D0 : S2; 5033 MEM : S3; 5034 ALU : S3(2); 5035 dst : S5(write); 5036 BR : S5; 5037%} 5038 5039// Generic big/slow expanded idiom 5040pipe_class pipe_slow( ) %{ 5041 instruction_count(10); multiple_bundles; force_serialization; 5042 fixed_latency(100); 5043 D0 : S0(2); 5044 MEM : S3(2); 5045%} 5046 5047// The real do-nothing guy 5048pipe_class empty( ) %{ 5049 instruction_count(0); 5050%} 5051 5052// Define the class for the Nop node 5053define %{ 5054 MachNop = empty; 5055%} 5056 5057%} 5058 5059//----------INSTRUCTIONS------------------------------------------------------- 5060// 5061// match -- States which machine-independent subtree may be replaced 5062// by this instruction. 5063// ins_cost -- The estimated cost of this instruction is used by instruction 5064// selection to identify a minimum cost tree of machine 5065// instructions that matches a tree of machine-independent 5066// instructions. 5067// format -- A string providing the disassembly for this instruction. 5068// The value of an instruction's operand may be inserted 5069// by referring to it with a '$' prefix. 5070// opcode -- Three instruction opcodes may be provided. These are referred 5071// to within an encode class as $primary, $secondary, and $tertiary 5072// respectively. The primary opcode is commonly used to 5073// indicate the type of machine instruction, while secondary 5074// and tertiary are often used for prefix options or addressing 5075// modes. 5076// ins_encode -- A list of encode classes with parameters. The encode class 5077// name must have been defined in an 'enc_class' specification 5078// in the encode section of the architecture description. 5079 5080//----------BSWAP-Instruction-------------------------------------------------- 5081instruct bytes_reverse_int(rRegI dst) %{ 5082 match(Set dst (ReverseBytesI dst)); 5083 5084 format %{ "BSWAP $dst" %} 5085 opcode(0x0F, 0xC8); 5086 ins_encode( OpcP, OpcSReg(dst) ); 5087 ins_pipe( ialu_reg ); 5088%} 5089 5090instruct bytes_reverse_long(eRegL dst) %{ 5091 match(Set dst (ReverseBytesL dst)); 5092 5093 format %{ "BSWAP $dst.lo\n\t" 5094 "BSWAP $dst.hi\n\t" 5095 "XCHG $dst.lo $dst.hi" %} 5096 5097 ins_cost(125); 5098 ins_encode( bswap_long_bytes(dst) ); 5099 ins_pipe( ialu_reg_reg); 5100%} 5101 5102instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5103 match(Set dst (ReverseBytesUS dst)); 5104 effect(KILL cr); 5105 5106 format %{ "BSWAP $dst\n\t" 5107 "SHR $dst,16\n\t" %} 5108 ins_encode %{ 5109 __ bswapl($dst$$Register); 5110 __ shrl($dst$$Register, 16); 5111 %} 5112 ins_pipe( ialu_reg ); 5113%} 5114 5115instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5116 match(Set dst (ReverseBytesS dst)); 5117 effect(KILL cr); 5118 5119 format %{ "BSWAP $dst\n\t" 5120 "SAR $dst,16\n\t" %} 5121 ins_encode %{ 5122 __ bswapl($dst$$Register); 5123 __ sarl($dst$$Register, 16); 5124 %} 5125 ins_pipe( ialu_reg ); 5126%} 5127 5128 5129//---------- Zeros Count Instructions ------------------------------------------ 5130 5131instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5132 predicate(UseCountLeadingZerosInstruction); 5133 match(Set dst (CountLeadingZerosI src)); 5134 effect(KILL cr); 5135 5136 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5137 ins_encode %{ 5138 __ lzcntl($dst$$Register, $src$$Register); 5139 %} 5140 ins_pipe(ialu_reg); 5141%} 5142 5143instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5144 predicate(!UseCountLeadingZerosInstruction); 5145 match(Set dst (CountLeadingZerosI src)); 5146 effect(KILL cr); 5147 5148 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5149 "JNZ skip\n\t" 5150 "MOV $dst, -1\n" 5151 "skip:\n\t" 5152 "NEG $dst\n\t" 5153 "ADD $dst, 31" %} 5154 ins_encode %{ 5155 Register Rdst = $dst$$Register; 5156 Register Rsrc = $src$$Register; 5157 Label skip; 5158 __ bsrl(Rdst, Rsrc); 5159 __ jccb(Assembler::notZero, skip); 5160 __ movl(Rdst, -1); 5161 __ bind(skip); 5162 __ negl(Rdst); 5163 __ addl(Rdst, BitsPerInt - 1); 5164 %} 5165 ins_pipe(ialu_reg); 5166%} 5167 5168instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5169 predicate(UseCountLeadingZerosInstruction); 5170 match(Set dst (CountLeadingZerosL src)); 5171 effect(TEMP dst, KILL cr); 5172 5173 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5174 "JNC done\n\t" 5175 "LZCNT $dst, $src.lo\n\t" 5176 "ADD $dst, 32\n" 5177 "done:" %} 5178 ins_encode %{ 5179 Register Rdst = $dst$$Register; 5180 Register Rsrc = $src$$Register; 5181 Label done; 5182 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5183 __ jccb(Assembler::carryClear, done); 5184 __ lzcntl(Rdst, Rsrc); 5185 __ addl(Rdst, BitsPerInt); 5186 __ bind(done); 5187 %} 5188 ins_pipe(ialu_reg); 5189%} 5190 5191instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5192 predicate(!UseCountLeadingZerosInstruction); 5193 match(Set dst (CountLeadingZerosL src)); 5194 effect(TEMP dst, KILL cr); 5195 5196 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5197 "JZ msw_is_zero\n\t" 5198 "ADD $dst, 32\n\t" 5199 "JMP not_zero\n" 5200 "msw_is_zero:\n\t" 5201 "BSR $dst, $src.lo\n\t" 5202 "JNZ not_zero\n\t" 5203 "MOV $dst, -1\n" 5204 "not_zero:\n\t" 5205 "NEG $dst\n\t" 5206 "ADD $dst, 63\n" %} 5207 ins_encode %{ 5208 Register Rdst = $dst$$Register; 5209 Register Rsrc = $src$$Register; 5210 Label msw_is_zero; 5211 Label not_zero; 5212 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5213 __ jccb(Assembler::zero, msw_is_zero); 5214 __ addl(Rdst, BitsPerInt); 5215 __ jmpb(not_zero); 5216 __ bind(msw_is_zero); 5217 __ bsrl(Rdst, Rsrc); 5218 __ jccb(Assembler::notZero, not_zero); 5219 __ movl(Rdst, -1); 5220 __ bind(not_zero); 5221 __ negl(Rdst); 5222 __ addl(Rdst, BitsPerLong - 1); 5223 %} 5224 ins_pipe(ialu_reg); 5225%} 5226 5227instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5228 predicate(UseCountTrailingZerosInstruction); 5229 match(Set dst (CountTrailingZerosI src)); 5230 effect(KILL cr); 5231 5232 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5233 ins_encode %{ 5234 __ tzcntl($dst$$Register, $src$$Register); 5235 %} 5236 ins_pipe(ialu_reg); 5237%} 5238 5239instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5240 predicate(!UseCountTrailingZerosInstruction); 5241 match(Set dst (CountTrailingZerosI src)); 5242 effect(KILL cr); 5243 5244 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5245 "JNZ done\n\t" 5246 "MOV $dst, 32\n" 5247 "done:" %} 5248 ins_encode %{ 5249 Register Rdst = $dst$$Register; 5250 Label done; 5251 __ bsfl(Rdst, $src$$Register); 5252 __ jccb(Assembler::notZero, done); 5253 __ movl(Rdst, BitsPerInt); 5254 __ bind(done); 5255 %} 5256 ins_pipe(ialu_reg); 5257%} 5258 5259instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5260 predicate(UseCountTrailingZerosInstruction); 5261 match(Set dst (CountTrailingZerosL src)); 5262 effect(TEMP dst, KILL cr); 5263 5264 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5265 "JNC done\n\t" 5266 "TZCNT $dst, $src.hi\n\t" 5267 "ADD $dst, 32\n" 5268 "done:" %} 5269 ins_encode %{ 5270 Register Rdst = $dst$$Register; 5271 Register Rsrc = $src$$Register; 5272 Label done; 5273 __ tzcntl(Rdst, Rsrc); 5274 __ jccb(Assembler::carryClear, done); 5275 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5276 __ addl(Rdst, BitsPerInt); 5277 __ bind(done); 5278 %} 5279 ins_pipe(ialu_reg); 5280%} 5281 5282instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5283 predicate(!UseCountTrailingZerosInstruction); 5284 match(Set dst (CountTrailingZerosL src)); 5285 effect(TEMP dst, KILL cr); 5286 5287 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5288 "JNZ done\n\t" 5289 "BSF $dst, $src.hi\n\t" 5290 "JNZ msw_not_zero\n\t" 5291 "MOV $dst, 32\n" 5292 "msw_not_zero:\n\t" 5293 "ADD $dst, 32\n" 5294 "done:" %} 5295 ins_encode %{ 5296 Register Rdst = $dst$$Register; 5297 Register Rsrc = $src$$Register; 5298 Label msw_not_zero; 5299 Label done; 5300 __ bsfl(Rdst, Rsrc); 5301 __ jccb(Assembler::notZero, done); 5302 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5303 __ jccb(Assembler::notZero, msw_not_zero); 5304 __ movl(Rdst, BitsPerInt); 5305 __ bind(msw_not_zero); 5306 __ addl(Rdst, BitsPerInt); 5307 __ bind(done); 5308 %} 5309 ins_pipe(ialu_reg); 5310%} 5311 5312 5313//---------- Population Count Instructions ------------------------------------- 5314 5315instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5316 predicate(UsePopCountInstruction); 5317 match(Set dst (PopCountI src)); 5318 effect(KILL cr); 5319 5320 format %{ "POPCNT $dst, $src" %} 5321 ins_encode %{ 5322 __ popcntl($dst$$Register, $src$$Register); 5323 %} 5324 ins_pipe(ialu_reg); 5325%} 5326 5327instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5328 predicate(UsePopCountInstruction); 5329 match(Set dst (PopCountI (LoadI mem))); 5330 effect(KILL cr); 5331 5332 format %{ "POPCNT $dst, $mem" %} 5333 ins_encode %{ 5334 __ popcntl($dst$$Register, $mem$$Address); 5335 %} 5336 ins_pipe(ialu_reg); 5337%} 5338 5339// Note: Long.bitCount(long) returns an int. 5340instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5341 predicate(UsePopCountInstruction); 5342 match(Set dst (PopCountL src)); 5343 effect(KILL cr, TEMP tmp, TEMP dst); 5344 5345 format %{ "POPCNT $dst, $src.lo\n\t" 5346 "POPCNT $tmp, $src.hi\n\t" 5347 "ADD $dst, $tmp" %} 5348 ins_encode %{ 5349 __ popcntl($dst$$Register, $src$$Register); 5350 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5351 __ addl($dst$$Register, $tmp$$Register); 5352 %} 5353 ins_pipe(ialu_reg); 5354%} 5355 5356// Note: Long.bitCount(long) returns an int. 5357instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5358 predicate(UsePopCountInstruction); 5359 match(Set dst (PopCountL (LoadL mem))); 5360 effect(KILL cr, TEMP tmp, TEMP dst); 5361 5362 format %{ "POPCNT $dst, $mem\n\t" 5363 "POPCNT $tmp, $mem+4\n\t" 5364 "ADD $dst, $tmp" %} 5365 ins_encode %{ 5366 //__ popcntl($dst$$Register, $mem$$Address$$first); 5367 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5368 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5369 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5370 __ addl($dst$$Register, $tmp$$Register); 5371 %} 5372 ins_pipe(ialu_reg); 5373%} 5374 5375 5376//----------Load/Store/Move Instructions--------------------------------------- 5377//----------Load Instructions-------------------------------------------------- 5378// Load Byte (8bit signed) 5379instruct loadB(xRegI dst, memory mem) %{ 5380 match(Set dst (LoadB mem)); 5381 5382 ins_cost(125); 5383 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5384 5385 ins_encode %{ 5386 __ movsbl($dst$$Register, $mem$$Address); 5387 %} 5388 5389 ins_pipe(ialu_reg_mem); 5390%} 5391 5392// Load Byte (8bit signed) into Long Register 5393instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5394 match(Set dst (ConvI2L (LoadB mem))); 5395 effect(KILL cr); 5396 5397 ins_cost(375); 5398 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5399 "MOV $dst.hi,$dst.lo\n\t" 5400 "SAR $dst.hi,7" %} 5401 5402 ins_encode %{ 5403 __ movsbl($dst$$Register, $mem$$Address); 5404 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5405 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5406 %} 5407 5408 ins_pipe(ialu_reg_mem); 5409%} 5410 5411// Load Unsigned Byte (8bit UNsigned) 5412instruct loadUB(xRegI dst, memory mem) %{ 5413 match(Set dst (LoadUB mem)); 5414 5415 ins_cost(125); 5416 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5417 5418 ins_encode %{ 5419 __ movzbl($dst$$Register, $mem$$Address); 5420 %} 5421 5422 ins_pipe(ialu_reg_mem); 5423%} 5424 5425// Load Unsigned Byte (8 bit UNsigned) into Long Register 5426instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5427 match(Set dst (ConvI2L (LoadUB mem))); 5428 effect(KILL cr); 5429 5430 ins_cost(250); 5431 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5432 "XOR $dst.hi,$dst.hi" %} 5433 5434 ins_encode %{ 5435 Register Rdst = $dst$$Register; 5436 __ movzbl(Rdst, $mem$$Address); 5437 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5438 %} 5439 5440 ins_pipe(ialu_reg_mem); 5441%} 5442 5443// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5444instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5445 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5446 effect(KILL cr); 5447 5448 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5449 "XOR $dst.hi,$dst.hi\n\t" 5450 "AND $dst.lo,right_n_bits($mask, 8)" %} 5451 ins_encode %{ 5452 Register Rdst = $dst$$Register; 5453 __ movzbl(Rdst, $mem$$Address); 5454 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5455 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5456 %} 5457 ins_pipe(ialu_reg_mem); 5458%} 5459 5460// Load Short (16bit signed) 5461instruct loadS(rRegI dst, memory mem) %{ 5462 match(Set dst (LoadS mem)); 5463 5464 ins_cost(125); 5465 format %{ "MOVSX $dst,$mem\t# short" %} 5466 5467 ins_encode %{ 5468 __ movswl($dst$$Register, $mem$$Address); 5469 %} 5470 5471 ins_pipe(ialu_reg_mem); 5472%} 5473 5474// Load Short (16 bit signed) to Byte (8 bit signed) 5475instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5476 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5477 5478 ins_cost(125); 5479 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5480 ins_encode %{ 5481 __ movsbl($dst$$Register, $mem$$Address); 5482 %} 5483 ins_pipe(ialu_reg_mem); 5484%} 5485 5486// Load Short (16bit signed) into Long Register 5487instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5488 match(Set dst (ConvI2L (LoadS mem))); 5489 effect(KILL cr); 5490 5491 ins_cost(375); 5492 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5493 "MOV $dst.hi,$dst.lo\n\t" 5494 "SAR $dst.hi,15" %} 5495 5496 ins_encode %{ 5497 __ movswl($dst$$Register, $mem$$Address); 5498 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5499 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5500 %} 5501 5502 ins_pipe(ialu_reg_mem); 5503%} 5504 5505// Load Unsigned Short/Char (16bit unsigned) 5506instruct loadUS(rRegI dst, memory mem) %{ 5507 match(Set dst (LoadUS mem)); 5508 5509 ins_cost(125); 5510 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5511 5512 ins_encode %{ 5513 __ movzwl($dst$$Register, $mem$$Address); 5514 %} 5515 5516 ins_pipe(ialu_reg_mem); 5517%} 5518 5519// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5520instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5521 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5522 5523 ins_cost(125); 5524 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5525 ins_encode %{ 5526 __ movsbl($dst$$Register, $mem$$Address); 5527 %} 5528 ins_pipe(ialu_reg_mem); 5529%} 5530 5531// Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5532instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5533 match(Set dst (ConvI2L (LoadUS mem))); 5534 effect(KILL cr); 5535 5536 ins_cost(250); 5537 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5538 "XOR $dst.hi,$dst.hi" %} 5539 5540 ins_encode %{ 5541 __ movzwl($dst$$Register, $mem$$Address); 5542 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5543 %} 5544 5545 ins_pipe(ialu_reg_mem); 5546%} 5547 5548// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5549instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5550 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5551 effect(KILL cr); 5552 5553 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5554 "XOR $dst.hi,$dst.hi" %} 5555 ins_encode %{ 5556 Register Rdst = $dst$$Register; 5557 __ movzbl(Rdst, $mem$$Address); 5558 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5559 %} 5560 ins_pipe(ialu_reg_mem); 5561%} 5562 5563// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5564instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5565 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5566 effect(KILL cr); 5567 5568 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5569 "XOR $dst.hi,$dst.hi\n\t" 5570 "AND $dst.lo,right_n_bits($mask, 16)" %} 5571 ins_encode %{ 5572 Register Rdst = $dst$$Register; 5573 __ movzwl(Rdst, $mem$$Address); 5574 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5575 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5576 %} 5577 ins_pipe(ialu_reg_mem); 5578%} 5579 5580// Load Integer 5581instruct loadI(rRegI dst, memory mem) %{ 5582 match(Set dst (LoadI mem)); 5583 5584 ins_cost(125); 5585 format %{ "MOV $dst,$mem\t# int" %} 5586 5587 ins_encode %{ 5588 __ movl($dst$$Register, $mem$$Address); 5589 %} 5590 5591 ins_pipe(ialu_reg_mem); 5592%} 5593 5594// Load Integer (32 bit signed) to Byte (8 bit signed) 5595instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5596 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5597 5598 ins_cost(125); 5599 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5600 ins_encode %{ 5601 __ movsbl($dst$$Register, $mem$$Address); 5602 %} 5603 ins_pipe(ialu_reg_mem); 5604%} 5605 5606// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5607instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5608 match(Set dst (AndI (LoadI mem) mask)); 5609 5610 ins_cost(125); 5611 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5612 ins_encode %{ 5613 __ movzbl($dst$$Register, $mem$$Address); 5614 %} 5615 ins_pipe(ialu_reg_mem); 5616%} 5617 5618// Load Integer (32 bit signed) to Short (16 bit signed) 5619instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5620 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5621 5622 ins_cost(125); 5623 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5624 ins_encode %{ 5625 __ movswl($dst$$Register, $mem$$Address); 5626 %} 5627 ins_pipe(ialu_reg_mem); 5628%} 5629 5630// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5631instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5632 match(Set dst (AndI (LoadI mem) mask)); 5633 5634 ins_cost(125); 5635 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5636 ins_encode %{ 5637 __ movzwl($dst$$Register, $mem$$Address); 5638 %} 5639 ins_pipe(ialu_reg_mem); 5640%} 5641 5642// Load Integer into Long Register 5643instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5644 match(Set dst (ConvI2L (LoadI mem))); 5645 effect(KILL cr); 5646 5647 ins_cost(375); 5648 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5649 "MOV $dst.hi,$dst.lo\n\t" 5650 "SAR $dst.hi,31" %} 5651 5652 ins_encode %{ 5653 __ movl($dst$$Register, $mem$$Address); 5654 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5655 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5656 %} 5657 5658 ins_pipe(ialu_reg_mem); 5659%} 5660 5661// Load Integer with mask 0xFF into Long Register 5662instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5663 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5664 effect(KILL cr); 5665 5666 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5667 "XOR $dst.hi,$dst.hi" %} 5668 ins_encode %{ 5669 Register Rdst = $dst$$Register; 5670 __ movzbl(Rdst, $mem$$Address); 5671 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5672 %} 5673 ins_pipe(ialu_reg_mem); 5674%} 5675 5676// Load Integer with mask 0xFFFF into Long Register 5677instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5678 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5679 effect(KILL cr); 5680 5681 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5682 "XOR $dst.hi,$dst.hi" %} 5683 ins_encode %{ 5684 Register Rdst = $dst$$Register; 5685 __ movzwl(Rdst, $mem$$Address); 5686 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5687 %} 5688 ins_pipe(ialu_reg_mem); 5689%} 5690 5691// Load Integer with 31-bit mask into Long Register 5692instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5693 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5694 effect(KILL cr); 5695 5696 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5697 "XOR $dst.hi,$dst.hi\n\t" 5698 "AND $dst.lo,$mask" %} 5699 ins_encode %{ 5700 Register Rdst = $dst$$Register; 5701 __ movl(Rdst, $mem$$Address); 5702 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5703 __ andl(Rdst, $mask$$constant); 5704 %} 5705 ins_pipe(ialu_reg_mem); 5706%} 5707 5708// Load Unsigned Integer into Long Register 5709instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5710 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5711 effect(KILL cr); 5712 5713 ins_cost(250); 5714 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5715 "XOR $dst.hi,$dst.hi" %} 5716 5717 ins_encode %{ 5718 __ movl($dst$$Register, $mem$$Address); 5719 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5720 %} 5721 5722 ins_pipe(ialu_reg_mem); 5723%} 5724 5725// Load Long. Cannot clobber address while loading, so restrict address 5726// register to ESI 5727instruct loadL(eRegL dst, load_long_memory mem) %{ 5728 predicate(!((LoadLNode*)n)->require_atomic_access()); 5729 match(Set dst (LoadL mem)); 5730 5731 ins_cost(250); 5732 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5733 "MOV $dst.hi,$mem+4" %} 5734 5735 ins_encode %{ 5736 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5737 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5738 __ movl($dst$$Register, Amemlo); 5739 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5740 %} 5741 5742 ins_pipe(ialu_reg_long_mem); 5743%} 5744 5745// Volatile Load Long. Must be atomic, so do 64-bit FILD 5746// then store it down to the stack and reload on the int 5747// side. 5748instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5749 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5750 match(Set dst (LoadL mem)); 5751 5752 ins_cost(200); 5753 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5754 "FISTp $dst" %} 5755 ins_encode(enc_loadL_volatile(mem,dst)); 5756 ins_pipe( fpu_reg_mem ); 5757%} 5758 5759instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5760 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5761 match(Set dst (LoadL mem)); 5762 effect(TEMP tmp); 5763 ins_cost(180); 5764 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5765 "MOVSD $dst,$tmp" %} 5766 ins_encode %{ 5767 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5768 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5769 %} 5770 ins_pipe( pipe_slow ); 5771%} 5772 5773instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5774 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5775 match(Set dst (LoadL mem)); 5776 effect(TEMP tmp); 5777 ins_cost(160); 5778 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5779 "MOVD $dst.lo,$tmp\n\t" 5780 "PSRLQ $tmp,32\n\t" 5781 "MOVD $dst.hi,$tmp" %} 5782 ins_encode %{ 5783 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5784 __ movdl($dst$$Register, $tmp$$XMMRegister); 5785 __ psrlq($tmp$$XMMRegister, 32); 5786 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5787 %} 5788 ins_pipe( pipe_slow ); 5789%} 5790 5791// Load Range 5792instruct loadRange(rRegI dst, memory mem) %{ 5793 match(Set dst (LoadRange mem)); 5794 5795 ins_cost(125); 5796 format %{ "MOV $dst,$mem" %} 5797 opcode(0x8B); 5798 ins_encode( OpcP, RegMem(dst,mem)); 5799 ins_pipe( ialu_reg_mem ); 5800%} 5801 5802 5803// Load Pointer 5804instruct loadP(eRegP dst, memory mem) %{ 5805 match(Set dst (LoadP mem)); 5806 5807 ins_cost(125); 5808 format %{ "MOV $dst,$mem" %} 5809 opcode(0x8B); 5810 ins_encode( OpcP, RegMem(dst,mem)); 5811 ins_pipe( ialu_reg_mem ); 5812%} 5813 5814// Load Klass Pointer 5815instruct loadKlass(eRegP dst, memory mem) %{ 5816 match(Set dst (LoadKlass mem)); 5817 5818 ins_cost(125); 5819 format %{ "MOV $dst,$mem" %} 5820 opcode(0x8B); 5821 ins_encode( OpcP, RegMem(dst,mem)); 5822 ins_pipe( ialu_reg_mem ); 5823%} 5824 5825// Load Double 5826instruct loadDPR(regDPR dst, memory mem) %{ 5827 predicate(UseSSE<=1); 5828 match(Set dst (LoadD mem)); 5829 5830 ins_cost(150); 5831 format %{ "FLD_D ST,$mem\n\t" 5832 "FSTP $dst" %} 5833 opcode(0xDD); /* DD /0 */ 5834 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5835 Pop_Reg_DPR(dst) ); 5836 ins_pipe( fpu_reg_mem ); 5837%} 5838 5839// Load Double to XMM 5840instruct loadD(regD dst, memory mem) %{ 5841 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5842 match(Set dst (LoadD mem)); 5843 ins_cost(145); 5844 format %{ "MOVSD $dst,$mem" %} 5845 ins_encode %{ 5846 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5847 %} 5848 ins_pipe( pipe_slow ); 5849%} 5850 5851instruct loadD_partial(regD dst, memory mem) %{ 5852 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5853 match(Set dst (LoadD mem)); 5854 ins_cost(145); 5855 format %{ "MOVLPD $dst,$mem" %} 5856 ins_encode %{ 5857 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5858 %} 5859 ins_pipe( pipe_slow ); 5860%} 5861 5862// Load to XMM register (single-precision floating point) 5863// MOVSS instruction 5864instruct loadF(regF dst, memory mem) %{ 5865 predicate(UseSSE>=1); 5866 match(Set dst (LoadF mem)); 5867 ins_cost(145); 5868 format %{ "MOVSS $dst,$mem" %} 5869 ins_encode %{ 5870 __ movflt ($dst$$XMMRegister, $mem$$Address); 5871 %} 5872 ins_pipe( pipe_slow ); 5873%} 5874 5875// Load Float 5876instruct loadFPR(regFPR dst, memory mem) %{ 5877 predicate(UseSSE==0); 5878 match(Set dst (LoadF mem)); 5879 5880 ins_cost(150); 5881 format %{ "FLD_S ST,$mem\n\t" 5882 "FSTP $dst" %} 5883 opcode(0xD9); /* D9 /0 */ 5884 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5885 Pop_Reg_FPR(dst) ); 5886 ins_pipe( fpu_reg_mem ); 5887%} 5888 5889// Load Effective Address 5890instruct leaP8(eRegP dst, indOffset8 mem) %{ 5891 match(Set dst mem); 5892 5893 ins_cost(110); 5894 format %{ "LEA $dst,$mem" %} 5895 opcode(0x8D); 5896 ins_encode( OpcP, RegMem(dst,mem)); 5897 ins_pipe( ialu_reg_reg_fat ); 5898%} 5899 5900instruct leaP32(eRegP dst, indOffset32 mem) %{ 5901 match(Set dst mem); 5902 5903 ins_cost(110); 5904 format %{ "LEA $dst,$mem" %} 5905 opcode(0x8D); 5906 ins_encode( OpcP, RegMem(dst,mem)); 5907 ins_pipe( ialu_reg_reg_fat ); 5908%} 5909 5910instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5911 match(Set dst mem); 5912 5913 ins_cost(110); 5914 format %{ "LEA $dst,$mem" %} 5915 opcode(0x8D); 5916 ins_encode( OpcP, RegMem(dst,mem)); 5917 ins_pipe( ialu_reg_reg_fat ); 5918%} 5919 5920instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5921 match(Set dst mem); 5922 5923 ins_cost(110); 5924 format %{ "LEA $dst,$mem" %} 5925 opcode(0x8D); 5926 ins_encode( OpcP, RegMem(dst,mem)); 5927 ins_pipe( ialu_reg_reg_fat ); 5928%} 5929 5930instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5931 match(Set dst mem); 5932 5933 ins_cost(110); 5934 format %{ "LEA $dst,$mem" %} 5935 opcode(0x8D); 5936 ins_encode( OpcP, RegMem(dst,mem)); 5937 ins_pipe( ialu_reg_reg_fat ); 5938%} 5939 5940// Load Constant 5941instruct loadConI(rRegI dst, immI src) %{ 5942 match(Set dst src); 5943 5944 format %{ "MOV $dst,$src" %} 5945 ins_encode( LdImmI(dst, src) ); 5946 ins_pipe( ialu_reg_fat ); 5947%} 5948 5949// Load Constant zero 5950instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5951 match(Set dst src); 5952 effect(KILL cr); 5953 5954 ins_cost(50); 5955 format %{ "XOR $dst,$dst" %} 5956 opcode(0x33); /* + rd */ 5957 ins_encode( OpcP, RegReg( dst, dst ) ); 5958 ins_pipe( ialu_reg ); 5959%} 5960 5961instruct loadConP(eRegP dst, immP src) %{ 5962 match(Set dst src); 5963 5964 format %{ "MOV $dst,$src" %} 5965 opcode(0xB8); /* + rd */ 5966 ins_encode( LdImmP(dst, src) ); 5967 ins_pipe( ialu_reg_fat ); 5968%} 5969 5970instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5971 match(Set dst src); 5972 effect(KILL cr); 5973 ins_cost(200); 5974 format %{ "MOV $dst.lo,$src.lo\n\t" 5975 "MOV $dst.hi,$src.hi" %} 5976 opcode(0xB8); 5977 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5978 ins_pipe( ialu_reg_long_fat ); 5979%} 5980 5981instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5982 match(Set dst src); 5983 effect(KILL cr); 5984 ins_cost(150); 5985 format %{ "XOR $dst.lo,$dst.lo\n\t" 5986 "XOR $dst.hi,$dst.hi" %} 5987 opcode(0x33,0x33); 5988 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5989 ins_pipe( ialu_reg_long ); 5990%} 5991 5992// The instruction usage is guarded by predicate in operand immFPR(). 5993instruct loadConFPR(regFPR dst, immFPR con) %{ 5994 match(Set dst con); 5995 ins_cost(125); 5996 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5997 "FSTP $dst" %} 5998 ins_encode %{ 5999 __ fld_s($constantaddress($con)); 6000 __ fstp_d($dst$$reg); 6001 %} 6002 ins_pipe(fpu_reg_con); 6003%} 6004 6005// The instruction usage is guarded by predicate in operand immFPR0(). 6006instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6007 match(Set dst con); 6008 ins_cost(125); 6009 format %{ "FLDZ ST\n\t" 6010 "FSTP $dst" %} 6011 ins_encode %{ 6012 __ fldz(); 6013 __ fstp_d($dst$$reg); 6014 %} 6015 ins_pipe(fpu_reg_con); 6016%} 6017 6018// The instruction usage is guarded by predicate in operand immFPR1(). 6019instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6020 match(Set dst con); 6021 ins_cost(125); 6022 format %{ "FLD1 ST\n\t" 6023 "FSTP $dst" %} 6024 ins_encode %{ 6025 __ fld1(); 6026 __ fstp_d($dst$$reg); 6027 %} 6028 ins_pipe(fpu_reg_con); 6029%} 6030 6031// The instruction usage is guarded by predicate in operand immF(). 6032instruct loadConF(regF dst, immF con) %{ 6033 match(Set dst con); 6034 ins_cost(125); 6035 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6036 ins_encode %{ 6037 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6038 %} 6039 ins_pipe(pipe_slow); 6040%} 6041 6042// The instruction usage is guarded by predicate in operand immF0(). 6043instruct loadConF0(regF dst, immF0 src) %{ 6044 match(Set dst src); 6045 ins_cost(100); 6046 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6047 ins_encode %{ 6048 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6049 %} 6050 ins_pipe(pipe_slow); 6051%} 6052 6053// The instruction usage is guarded by predicate in operand immDPR(). 6054instruct loadConDPR(regDPR dst, immDPR con) %{ 6055 match(Set dst con); 6056 ins_cost(125); 6057 6058 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6059 "FSTP $dst" %} 6060 ins_encode %{ 6061 __ fld_d($constantaddress($con)); 6062 __ fstp_d($dst$$reg); 6063 %} 6064 ins_pipe(fpu_reg_con); 6065%} 6066 6067// The instruction usage is guarded by predicate in operand immDPR0(). 6068instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6069 match(Set dst con); 6070 ins_cost(125); 6071 6072 format %{ "FLDZ ST\n\t" 6073 "FSTP $dst" %} 6074 ins_encode %{ 6075 __ fldz(); 6076 __ fstp_d($dst$$reg); 6077 %} 6078 ins_pipe(fpu_reg_con); 6079%} 6080 6081// The instruction usage is guarded by predicate in operand immDPR1(). 6082instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6083 match(Set dst con); 6084 ins_cost(125); 6085 6086 format %{ "FLD1 ST\n\t" 6087 "FSTP $dst" %} 6088 ins_encode %{ 6089 __ fld1(); 6090 __ fstp_d($dst$$reg); 6091 %} 6092 ins_pipe(fpu_reg_con); 6093%} 6094 6095// The instruction usage is guarded by predicate in operand immD(). 6096instruct loadConD(regD dst, immD con) %{ 6097 match(Set dst con); 6098 ins_cost(125); 6099 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6100 ins_encode %{ 6101 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6102 %} 6103 ins_pipe(pipe_slow); 6104%} 6105 6106// The instruction usage is guarded by predicate in operand immD0(). 6107instruct loadConD0(regD dst, immD0 src) %{ 6108 match(Set dst src); 6109 ins_cost(100); 6110 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6111 ins_encode %{ 6112 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6113 %} 6114 ins_pipe( pipe_slow ); 6115%} 6116 6117// Load Stack Slot 6118instruct loadSSI(rRegI dst, stackSlotI src) %{ 6119 match(Set dst src); 6120 ins_cost(125); 6121 6122 format %{ "MOV $dst,$src" %} 6123 opcode(0x8B); 6124 ins_encode( OpcP, RegMem(dst,src)); 6125 ins_pipe( ialu_reg_mem ); 6126%} 6127 6128instruct loadSSL(eRegL dst, stackSlotL src) %{ 6129 match(Set dst src); 6130 6131 ins_cost(200); 6132 format %{ "MOV $dst,$src.lo\n\t" 6133 "MOV $dst+4,$src.hi" %} 6134 opcode(0x8B, 0x8B); 6135 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6136 ins_pipe( ialu_mem_long_reg ); 6137%} 6138 6139// Load Stack Slot 6140instruct loadSSP(eRegP dst, stackSlotP src) %{ 6141 match(Set dst src); 6142 ins_cost(125); 6143 6144 format %{ "MOV $dst,$src" %} 6145 opcode(0x8B); 6146 ins_encode( OpcP, RegMem(dst,src)); 6147 ins_pipe( ialu_reg_mem ); 6148%} 6149 6150// Load Stack Slot 6151instruct loadSSF(regFPR dst, stackSlotF src) %{ 6152 match(Set dst src); 6153 ins_cost(125); 6154 6155 format %{ "FLD_S $src\n\t" 6156 "FSTP $dst" %} 6157 opcode(0xD9); /* D9 /0, FLD m32real */ 6158 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6159 Pop_Reg_FPR(dst) ); 6160 ins_pipe( fpu_reg_mem ); 6161%} 6162 6163// Load Stack Slot 6164instruct loadSSD(regDPR dst, stackSlotD src) %{ 6165 match(Set dst src); 6166 ins_cost(125); 6167 6168 format %{ "FLD_D $src\n\t" 6169 "FSTP $dst" %} 6170 opcode(0xDD); /* DD /0, FLD m64real */ 6171 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6172 Pop_Reg_DPR(dst) ); 6173 ins_pipe( fpu_reg_mem ); 6174%} 6175 6176// Prefetch instructions for allocation. 6177// Must be safe to execute with invalid address (cannot fault). 6178 6179instruct prefetchAlloc0( memory mem ) %{ 6180 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6181 match(PrefetchAllocation mem); 6182 ins_cost(0); 6183 size(0); 6184 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6185 ins_encode(); 6186 ins_pipe(empty); 6187%} 6188 6189instruct prefetchAlloc( memory mem ) %{ 6190 predicate(AllocatePrefetchInstr==3); 6191 match( PrefetchAllocation mem ); 6192 ins_cost(100); 6193 6194 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6195 ins_encode %{ 6196 __ prefetchw($mem$$Address); 6197 %} 6198 ins_pipe(ialu_mem); 6199%} 6200 6201instruct prefetchAllocNTA( memory mem ) %{ 6202 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6203 match(PrefetchAllocation mem); 6204 ins_cost(100); 6205 6206 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6207 ins_encode %{ 6208 __ prefetchnta($mem$$Address); 6209 %} 6210 ins_pipe(ialu_mem); 6211%} 6212 6213instruct prefetchAllocT0( memory mem ) %{ 6214 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6215 match(PrefetchAllocation mem); 6216 ins_cost(100); 6217 6218 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6219 ins_encode %{ 6220 __ prefetcht0($mem$$Address); 6221 %} 6222 ins_pipe(ialu_mem); 6223%} 6224 6225instruct prefetchAllocT2( memory mem ) %{ 6226 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6227 match(PrefetchAllocation mem); 6228 ins_cost(100); 6229 6230 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6231 ins_encode %{ 6232 __ prefetcht2($mem$$Address); 6233 %} 6234 ins_pipe(ialu_mem); 6235%} 6236 6237//----------Store Instructions------------------------------------------------- 6238 6239// Store Byte 6240instruct storeB(memory mem, xRegI src) %{ 6241 match(Set mem (StoreB mem src)); 6242 6243 ins_cost(125); 6244 format %{ "MOV8 $mem,$src" %} 6245 opcode(0x88); 6246 ins_encode( OpcP, RegMem( src, mem ) ); 6247 ins_pipe( ialu_mem_reg ); 6248%} 6249 6250// Store Char/Short 6251instruct storeC(memory mem, rRegI src) %{ 6252 match(Set mem (StoreC mem src)); 6253 6254 ins_cost(125); 6255 format %{ "MOV16 $mem,$src" %} 6256 opcode(0x89, 0x66); 6257 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6258 ins_pipe( ialu_mem_reg ); 6259%} 6260 6261// Store Integer 6262instruct storeI(memory mem, rRegI src) %{ 6263 match(Set mem (StoreI mem src)); 6264 6265 ins_cost(125); 6266 format %{ "MOV $mem,$src" %} 6267 opcode(0x89); 6268 ins_encode( OpcP, RegMem( src, mem ) ); 6269 ins_pipe( ialu_mem_reg ); 6270%} 6271 6272// Store Long 6273instruct storeL(long_memory mem, eRegL src) %{ 6274 predicate(!((StoreLNode*)n)->require_atomic_access()); 6275 match(Set mem (StoreL mem src)); 6276 6277 ins_cost(200); 6278 format %{ "MOV $mem,$src.lo\n\t" 6279 "MOV $mem+4,$src.hi" %} 6280 opcode(0x89, 0x89); 6281 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6282 ins_pipe( ialu_mem_long_reg ); 6283%} 6284 6285// Store Long to Integer 6286instruct storeL2I(memory mem, eRegL src) %{ 6287 match(Set mem (StoreI mem (ConvL2I src))); 6288 6289 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6290 ins_encode %{ 6291 __ movl($mem$$Address, $src$$Register); 6292 %} 6293 ins_pipe(ialu_mem_reg); 6294%} 6295 6296// Volatile Store Long. Must be atomic, so move it into 6297// the FP TOS and then do a 64-bit FIST. Has to probe the 6298// target address before the store (for null-ptr checks) 6299// so the memory operand is used twice in the encoding. 6300instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6301 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6302 match(Set mem (StoreL mem src)); 6303 effect( KILL cr ); 6304 ins_cost(400); 6305 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6306 "FILD $src\n\t" 6307 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6308 opcode(0x3B); 6309 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6310 ins_pipe( fpu_reg_mem ); 6311%} 6312 6313instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6314 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6315 match(Set mem (StoreL mem src)); 6316 effect( TEMP tmp, KILL cr ); 6317 ins_cost(380); 6318 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6319 "MOVSD $tmp,$src\n\t" 6320 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6321 ins_encode %{ 6322 __ cmpl(rax, $mem$$Address); 6323 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6324 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6325 %} 6326 ins_pipe( pipe_slow ); 6327%} 6328 6329instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6330 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6331 match(Set mem (StoreL mem src)); 6332 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6333 ins_cost(360); 6334 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6335 "MOVD $tmp,$src.lo\n\t" 6336 "MOVD $tmp2,$src.hi\n\t" 6337 "PUNPCKLDQ $tmp,$tmp2\n\t" 6338 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6339 ins_encode %{ 6340 __ cmpl(rax, $mem$$Address); 6341 __ movdl($tmp$$XMMRegister, $src$$Register); 6342 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6343 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6344 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6345 %} 6346 ins_pipe( pipe_slow ); 6347%} 6348 6349// Store Pointer; for storing unknown oops and raw pointers 6350instruct storeP(memory mem, anyRegP src) %{ 6351 match(Set mem (StoreP mem src)); 6352 6353 ins_cost(125); 6354 format %{ "MOV $mem,$src" %} 6355 opcode(0x89); 6356 ins_encode( OpcP, RegMem( src, mem ) ); 6357 ins_pipe( ialu_mem_reg ); 6358%} 6359 6360// Store Integer Immediate 6361instruct storeImmI(memory mem, immI src) %{ 6362 match(Set mem (StoreI mem src)); 6363 6364 ins_cost(150); 6365 format %{ "MOV $mem,$src" %} 6366 opcode(0xC7); /* C7 /0 */ 6367 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6368 ins_pipe( ialu_mem_imm ); 6369%} 6370 6371// Store Short/Char Immediate 6372instruct storeImmI16(memory mem, immI16 src) %{ 6373 predicate(UseStoreImmI16); 6374 match(Set mem (StoreC mem src)); 6375 6376 ins_cost(150); 6377 format %{ "MOV16 $mem,$src" %} 6378 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6379 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6380 ins_pipe( ialu_mem_imm ); 6381%} 6382 6383// Store Pointer Immediate; null pointers or constant oops that do not 6384// need card-mark barriers. 6385instruct storeImmP(memory mem, immP src) %{ 6386 match(Set mem (StoreP mem src)); 6387 6388 ins_cost(150); 6389 format %{ "MOV $mem,$src" %} 6390 opcode(0xC7); /* C7 /0 */ 6391 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6392 ins_pipe( ialu_mem_imm ); 6393%} 6394 6395// Store Byte Immediate 6396instruct storeImmB(memory mem, immI8 src) %{ 6397 match(Set mem (StoreB mem src)); 6398 6399 ins_cost(150); 6400 format %{ "MOV8 $mem,$src" %} 6401 opcode(0xC6); /* C6 /0 */ 6402 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6403 ins_pipe( ialu_mem_imm ); 6404%} 6405 6406// Store CMS card-mark Immediate 6407instruct storeImmCM(memory mem, immI8 src) %{ 6408 match(Set mem (StoreCM mem src)); 6409 6410 ins_cost(150); 6411 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6412 opcode(0xC6); /* C6 /0 */ 6413 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6414 ins_pipe( ialu_mem_imm ); 6415%} 6416 6417// Store Double 6418instruct storeDPR( memory mem, regDPR1 src) %{ 6419 predicate(UseSSE<=1); 6420 match(Set mem (StoreD mem src)); 6421 6422 ins_cost(100); 6423 format %{ "FST_D $mem,$src" %} 6424 opcode(0xDD); /* DD /2 */ 6425 ins_encode( enc_FPR_store(mem,src) ); 6426 ins_pipe( fpu_mem_reg ); 6427%} 6428 6429// Store double does rounding on x86 6430instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6431 predicate(UseSSE<=1); 6432 match(Set mem (StoreD mem (RoundDouble src))); 6433 6434 ins_cost(100); 6435 format %{ "FST_D $mem,$src\t# round" %} 6436 opcode(0xDD); /* DD /2 */ 6437 ins_encode( enc_FPR_store(mem,src) ); 6438 ins_pipe( fpu_mem_reg ); 6439%} 6440 6441// Store XMM register to memory (double-precision floating points) 6442// MOVSD instruction 6443instruct storeD(memory mem, regD src) %{ 6444 predicate(UseSSE>=2); 6445 match(Set mem (StoreD mem src)); 6446 ins_cost(95); 6447 format %{ "MOVSD $mem,$src" %} 6448 ins_encode %{ 6449 __ movdbl($mem$$Address, $src$$XMMRegister); 6450 %} 6451 ins_pipe( pipe_slow ); 6452%} 6453 6454// Store XMM register to memory (single-precision floating point) 6455// MOVSS instruction 6456instruct storeF(memory mem, regF src) %{ 6457 predicate(UseSSE>=1); 6458 match(Set mem (StoreF mem src)); 6459 ins_cost(95); 6460 format %{ "MOVSS $mem,$src" %} 6461 ins_encode %{ 6462 __ movflt($mem$$Address, $src$$XMMRegister); 6463 %} 6464 ins_pipe( pipe_slow ); 6465%} 6466 6467// Store Float 6468instruct storeFPR( memory mem, regFPR1 src) %{ 6469 predicate(UseSSE==0); 6470 match(Set mem (StoreF mem src)); 6471 6472 ins_cost(100); 6473 format %{ "FST_S $mem,$src" %} 6474 opcode(0xD9); /* D9 /2 */ 6475 ins_encode( enc_FPR_store(mem,src) ); 6476 ins_pipe( fpu_mem_reg ); 6477%} 6478 6479// Store Float does rounding on x86 6480instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6481 predicate(UseSSE==0); 6482 match(Set mem (StoreF mem (RoundFloat src))); 6483 6484 ins_cost(100); 6485 format %{ "FST_S $mem,$src\t# round" %} 6486 opcode(0xD9); /* D9 /2 */ 6487 ins_encode( enc_FPR_store(mem,src) ); 6488 ins_pipe( fpu_mem_reg ); 6489%} 6490 6491// Store Float does rounding on x86 6492instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6493 predicate(UseSSE<=1); 6494 match(Set mem (StoreF mem (ConvD2F src))); 6495 6496 ins_cost(100); 6497 format %{ "FST_S $mem,$src\t# D-round" %} 6498 opcode(0xD9); /* D9 /2 */ 6499 ins_encode( enc_FPR_store(mem,src) ); 6500 ins_pipe( fpu_mem_reg ); 6501%} 6502 6503// Store immediate Float value (it is faster than store from FPU register) 6504// The instruction usage is guarded by predicate in operand immFPR(). 6505instruct storeFPR_imm( memory mem, immFPR src) %{ 6506 match(Set mem (StoreF mem src)); 6507 6508 ins_cost(50); 6509 format %{ "MOV $mem,$src\t# store float" %} 6510 opcode(0xC7); /* C7 /0 */ 6511 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6512 ins_pipe( ialu_mem_imm ); 6513%} 6514 6515// Store immediate Float value (it is faster than store from XMM register) 6516// The instruction usage is guarded by predicate in operand immF(). 6517instruct storeF_imm( memory mem, immF src) %{ 6518 match(Set mem (StoreF mem src)); 6519 6520 ins_cost(50); 6521 format %{ "MOV $mem,$src\t# store float" %} 6522 opcode(0xC7); /* C7 /0 */ 6523 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6524 ins_pipe( ialu_mem_imm ); 6525%} 6526 6527// Store Integer to stack slot 6528instruct storeSSI(stackSlotI dst, rRegI src) %{ 6529 match(Set dst src); 6530 6531 ins_cost(100); 6532 format %{ "MOV $dst,$src" %} 6533 opcode(0x89); 6534 ins_encode( OpcPRegSS( dst, src ) ); 6535 ins_pipe( ialu_mem_reg ); 6536%} 6537 6538// Store Integer to stack slot 6539instruct storeSSP(stackSlotP dst, eRegP src) %{ 6540 match(Set dst src); 6541 6542 ins_cost(100); 6543 format %{ "MOV $dst,$src" %} 6544 opcode(0x89); 6545 ins_encode( OpcPRegSS( dst, src ) ); 6546 ins_pipe( ialu_mem_reg ); 6547%} 6548 6549// Store Long to stack slot 6550instruct storeSSL(stackSlotL dst, eRegL src) %{ 6551 match(Set dst src); 6552 6553 ins_cost(200); 6554 format %{ "MOV $dst,$src.lo\n\t" 6555 "MOV $dst+4,$src.hi" %} 6556 opcode(0x89, 0x89); 6557 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6558 ins_pipe( ialu_mem_long_reg ); 6559%} 6560 6561//----------MemBar Instructions----------------------------------------------- 6562// Memory barrier flavors 6563 6564instruct membar_acquire() %{ 6565 match(MemBarAcquire); 6566 match(LoadFence); 6567 ins_cost(400); 6568 6569 size(0); 6570 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6571 ins_encode(); 6572 ins_pipe(empty); 6573%} 6574 6575instruct membar_acquire_lock() %{ 6576 match(MemBarAcquireLock); 6577 ins_cost(0); 6578 6579 size(0); 6580 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6581 ins_encode( ); 6582 ins_pipe(empty); 6583%} 6584 6585instruct membar_release() %{ 6586 match(MemBarRelease); 6587 match(StoreFence); 6588 ins_cost(400); 6589 6590 size(0); 6591 format %{ "MEMBAR-release ! (empty encoding)" %} 6592 ins_encode( ); 6593 ins_pipe(empty); 6594%} 6595 6596instruct membar_release_lock() %{ 6597 match(MemBarReleaseLock); 6598 ins_cost(0); 6599 6600 size(0); 6601 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6602 ins_encode( ); 6603 ins_pipe(empty); 6604%} 6605 6606instruct membar_volatile(eFlagsReg cr) %{ 6607 match(MemBarVolatile); 6608 effect(KILL cr); 6609 ins_cost(400); 6610 6611 format %{ 6612 $$template 6613 if (os::is_MP()) { 6614 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6615 } else { 6616 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6617 } 6618 %} 6619 ins_encode %{ 6620 __ membar(Assembler::StoreLoad); 6621 %} 6622 ins_pipe(pipe_slow); 6623%} 6624 6625instruct unnecessary_membar_volatile() %{ 6626 match(MemBarVolatile); 6627 predicate(Matcher::post_store_load_barrier(n)); 6628 ins_cost(0); 6629 6630 size(0); 6631 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6632 ins_encode( ); 6633 ins_pipe(empty); 6634%} 6635 6636instruct membar_storestore() %{ 6637 match(MemBarStoreStore); 6638 ins_cost(0); 6639 6640 size(0); 6641 format %{ "MEMBAR-storestore (empty encoding)" %} 6642 ins_encode( ); 6643 ins_pipe(empty); 6644%} 6645 6646//----------Move Instructions-------------------------------------------------- 6647instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6648 match(Set dst (CastX2P src)); 6649 format %{ "# X2P $dst, $src" %} 6650 ins_encode( /*empty encoding*/ ); 6651 ins_cost(0); 6652 ins_pipe(empty); 6653%} 6654 6655instruct castP2X(rRegI dst, eRegP src ) %{ 6656 match(Set dst (CastP2X src)); 6657 ins_cost(50); 6658 format %{ "MOV $dst, $src\t# CastP2X" %} 6659 ins_encode( enc_Copy( dst, src) ); 6660 ins_pipe( ialu_reg_reg ); 6661%} 6662 6663//----------Conditional Move--------------------------------------------------- 6664// Conditional move 6665instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6666 predicate(!VM_Version::supports_cmov() ); 6667 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6668 ins_cost(200); 6669 format %{ "J$cop,us skip\t# signed cmove\n\t" 6670 "MOV $dst,$src\n" 6671 "skip:" %} 6672 ins_encode %{ 6673 Label Lskip; 6674 // Invert sense of branch from sense of CMOV 6675 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6676 __ movl($dst$$Register, $src$$Register); 6677 __ bind(Lskip); 6678 %} 6679 ins_pipe( pipe_cmov_reg ); 6680%} 6681 6682instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6683 predicate(!VM_Version::supports_cmov() ); 6684 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6685 ins_cost(200); 6686 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6687 "MOV $dst,$src\n" 6688 "skip:" %} 6689 ins_encode %{ 6690 Label Lskip; 6691 // Invert sense of branch from sense of CMOV 6692 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6693 __ movl($dst$$Register, $src$$Register); 6694 __ bind(Lskip); 6695 %} 6696 ins_pipe( pipe_cmov_reg ); 6697%} 6698 6699instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6700 predicate(VM_Version::supports_cmov() ); 6701 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6702 ins_cost(200); 6703 format %{ "CMOV$cop $dst,$src" %} 6704 opcode(0x0F,0x40); 6705 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6706 ins_pipe( pipe_cmov_reg ); 6707%} 6708 6709instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6710 predicate(VM_Version::supports_cmov() ); 6711 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6712 ins_cost(200); 6713 format %{ "CMOV$cop $dst,$src" %} 6714 opcode(0x0F,0x40); 6715 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6716 ins_pipe( pipe_cmov_reg ); 6717%} 6718 6719instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6720 predicate(VM_Version::supports_cmov() ); 6721 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6722 ins_cost(200); 6723 expand %{ 6724 cmovI_regU(cop, cr, dst, src); 6725 %} 6726%} 6727 6728// Conditional move 6729instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6730 predicate(VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6732 ins_cost(250); 6733 format %{ "CMOV$cop $dst,$src" %} 6734 opcode(0x0F,0x40); 6735 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6736 ins_pipe( pipe_cmov_mem ); 6737%} 6738 6739// Conditional move 6740instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6741 predicate(VM_Version::supports_cmov() ); 6742 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6743 ins_cost(250); 6744 format %{ "CMOV$cop $dst,$src" %} 6745 opcode(0x0F,0x40); 6746 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6747 ins_pipe( pipe_cmov_mem ); 6748%} 6749 6750instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6751 predicate(VM_Version::supports_cmov() ); 6752 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6753 ins_cost(250); 6754 expand %{ 6755 cmovI_memU(cop, cr, dst, src); 6756 %} 6757%} 6758 6759// Conditional move 6760instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6761 predicate(VM_Version::supports_cmov() ); 6762 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6763 ins_cost(200); 6764 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6765 opcode(0x0F,0x40); 6766 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6767 ins_pipe( pipe_cmov_reg ); 6768%} 6769 6770// Conditional move (non-P6 version) 6771// Note: a CMoveP is generated for stubs and native wrappers 6772// regardless of whether we are on a P6, so we 6773// emulate a cmov here 6774instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6775 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6776 ins_cost(300); 6777 format %{ "Jn$cop skip\n\t" 6778 "MOV $dst,$src\t# pointer\n" 6779 "skip:" %} 6780 opcode(0x8b); 6781 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6782 ins_pipe( pipe_cmov_reg ); 6783%} 6784 6785// Conditional move 6786instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6787 predicate(VM_Version::supports_cmov() ); 6788 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6789 ins_cost(200); 6790 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6791 opcode(0x0F,0x40); 6792 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6793 ins_pipe( pipe_cmov_reg ); 6794%} 6795 6796instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6797 predicate(VM_Version::supports_cmov() ); 6798 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6799 ins_cost(200); 6800 expand %{ 6801 cmovP_regU(cop, cr, dst, src); 6802 %} 6803%} 6804 6805// DISABLED: Requires the ADLC to emit a bottom_type call that 6806// correctly meets the two pointer arguments; one is an incoming 6807// register but the other is a memory operand. ALSO appears to 6808// be buggy with implicit null checks. 6809// 6810//// Conditional move 6811//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6812// predicate(VM_Version::supports_cmov() ); 6813// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6814// ins_cost(250); 6815// format %{ "CMOV$cop $dst,$src\t# ptr" %} 6816// opcode(0x0F,0x40); 6817// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6818// ins_pipe( pipe_cmov_mem ); 6819//%} 6820// 6821//// Conditional move 6822//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6823// predicate(VM_Version::supports_cmov() ); 6824// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6825// ins_cost(250); 6826// format %{ "CMOV$cop $dst,$src\t# ptr" %} 6827// opcode(0x0F,0x40); 6828// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6829// ins_pipe( pipe_cmov_mem ); 6830//%} 6831 6832// Conditional move 6833instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6834 predicate(UseSSE<=1); 6835 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6836 ins_cost(200); 6837 format %{ "FCMOV$cop $dst,$src\t# double" %} 6838 opcode(0xDA); 6839 ins_encode( enc_cmov_dpr(cop,src) ); 6840 ins_pipe( pipe_cmovDPR_reg ); 6841%} 6842 6843// Conditional move 6844instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6845 predicate(UseSSE==0); 6846 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6847 ins_cost(200); 6848 format %{ "FCMOV$cop $dst,$src\t# float" %} 6849 opcode(0xDA); 6850 ins_encode( enc_cmov_dpr(cop,src) ); 6851 ins_pipe( pipe_cmovDPR_reg ); 6852%} 6853 6854// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6855instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6856 predicate(UseSSE<=1); 6857 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6858 ins_cost(200); 6859 format %{ "Jn$cop skip\n\t" 6860 "MOV $dst,$src\t# double\n" 6861 "skip:" %} 6862 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6863 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6864 ins_pipe( pipe_cmovDPR_reg ); 6865%} 6866 6867// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6868instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6869 predicate(UseSSE==0); 6870 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6871 ins_cost(200); 6872 format %{ "Jn$cop skip\n\t" 6873 "MOV $dst,$src\t# float\n" 6874 "skip:" %} 6875 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6876 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6877 ins_pipe( pipe_cmovDPR_reg ); 6878%} 6879 6880// No CMOVE with SSE/SSE2 6881instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6882 predicate (UseSSE>=1); 6883 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6884 ins_cost(200); 6885 format %{ "Jn$cop skip\n\t" 6886 "MOVSS $dst,$src\t# float\n" 6887 "skip:" %} 6888 ins_encode %{ 6889 Label skip; 6890 // Invert sense of branch from sense of CMOV 6891 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6892 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6893 __ bind(skip); 6894 %} 6895 ins_pipe( pipe_slow ); 6896%} 6897 6898// No CMOVE with SSE/SSE2 6899instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6900 predicate (UseSSE>=2); 6901 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6902 ins_cost(200); 6903 format %{ "Jn$cop skip\n\t" 6904 "MOVSD $dst,$src\t# float\n" 6905 "skip:" %} 6906 ins_encode %{ 6907 Label skip; 6908 // Invert sense of branch from sense of CMOV 6909 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6910 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6911 __ bind(skip); 6912 %} 6913 ins_pipe( pipe_slow ); 6914%} 6915 6916// unsigned version 6917instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6918 predicate (UseSSE>=1); 6919 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6920 ins_cost(200); 6921 format %{ "Jn$cop skip\n\t" 6922 "MOVSS $dst,$src\t# float\n" 6923 "skip:" %} 6924 ins_encode %{ 6925 Label skip; 6926 // Invert sense of branch from sense of CMOV 6927 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6928 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6929 __ bind(skip); 6930 %} 6931 ins_pipe( pipe_slow ); 6932%} 6933 6934instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6935 predicate (UseSSE>=1); 6936 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6937 ins_cost(200); 6938 expand %{ 6939 fcmovF_regU(cop, cr, dst, src); 6940 %} 6941%} 6942 6943// unsigned version 6944instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6945 predicate (UseSSE>=2); 6946 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6947 ins_cost(200); 6948 format %{ "Jn$cop skip\n\t" 6949 "MOVSD $dst,$src\t# float\n" 6950 "skip:" %} 6951 ins_encode %{ 6952 Label skip; 6953 // Invert sense of branch from sense of CMOV 6954 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6955 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6956 __ bind(skip); 6957 %} 6958 ins_pipe( pipe_slow ); 6959%} 6960 6961instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6962 predicate (UseSSE>=2); 6963 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6964 ins_cost(200); 6965 expand %{ 6966 fcmovD_regU(cop, cr, dst, src); 6967 %} 6968%} 6969 6970instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6971 predicate(VM_Version::supports_cmov() ); 6972 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6973 ins_cost(200); 6974 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6975 "CMOV$cop $dst.hi,$src.hi" %} 6976 opcode(0x0F,0x40); 6977 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6978 ins_pipe( pipe_cmov_reg_long ); 6979%} 6980 6981instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6982 predicate(VM_Version::supports_cmov() ); 6983 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6984 ins_cost(200); 6985 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6986 "CMOV$cop $dst.hi,$src.hi" %} 6987 opcode(0x0F,0x40); 6988 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6989 ins_pipe( pipe_cmov_reg_long ); 6990%} 6991 6992instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6993 predicate(VM_Version::supports_cmov() ); 6994 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6995 ins_cost(200); 6996 expand %{ 6997 cmovL_regU(cop, cr, dst, src); 6998 %} 6999%} 7000 7001//----------Arithmetic Instructions-------------------------------------------- 7002//----------Addition Instructions---------------------------------------------- 7003 7004// Integer Addition Instructions 7005instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7006 match(Set dst (AddI dst src)); 7007 effect(KILL cr); 7008 7009 size(2); 7010 format %{ "ADD $dst,$src" %} 7011 opcode(0x03); 7012 ins_encode( OpcP, RegReg( dst, src) ); 7013 ins_pipe( ialu_reg_reg ); 7014%} 7015 7016instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7017 match(Set dst (AddI dst src)); 7018 effect(KILL cr); 7019 7020 format %{ "ADD $dst,$src" %} 7021 opcode(0x81, 0x00); /* /0 id */ 7022 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7023 ins_pipe( ialu_reg ); 7024%} 7025 7026instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7027 predicate(UseIncDec); 7028 match(Set dst (AddI dst src)); 7029 effect(KILL cr); 7030 7031 size(1); 7032 format %{ "INC $dst" %} 7033 opcode(0x40); /* */ 7034 ins_encode( Opc_plus( primary, dst ) ); 7035 ins_pipe( ialu_reg ); 7036%} 7037 7038instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7039 match(Set dst (AddI src0 src1)); 7040 ins_cost(110); 7041 7042 format %{ "LEA $dst,[$src0 + $src1]" %} 7043 opcode(0x8D); /* 0x8D /r */ 7044 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7045 ins_pipe( ialu_reg_reg ); 7046%} 7047 7048instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7049 match(Set dst (AddP src0 src1)); 7050 ins_cost(110); 7051 7052 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7053 opcode(0x8D); /* 0x8D /r */ 7054 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7055 ins_pipe( ialu_reg_reg ); 7056%} 7057 7058instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7059 predicate(UseIncDec); 7060 match(Set dst (AddI dst src)); 7061 effect(KILL cr); 7062 7063 size(1); 7064 format %{ "DEC $dst" %} 7065 opcode(0x48); /* */ 7066 ins_encode( Opc_plus( primary, dst ) ); 7067 ins_pipe( ialu_reg ); 7068%} 7069 7070instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7071 match(Set dst (AddP dst src)); 7072 effect(KILL cr); 7073 7074 size(2); 7075 format %{ "ADD $dst,$src" %} 7076 opcode(0x03); 7077 ins_encode( OpcP, RegReg( dst, src) ); 7078 ins_pipe( ialu_reg_reg ); 7079%} 7080 7081instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7082 match(Set dst (AddP dst src)); 7083 effect(KILL cr); 7084 7085 format %{ "ADD $dst,$src" %} 7086 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7087 // ins_encode( RegImm( dst, src) ); 7088 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7089 ins_pipe( ialu_reg ); 7090%} 7091 7092instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7093 match(Set dst (AddI dst (LoadI src))); 7094 effect(KILL cr); 7095 7096 ins_cost(125); 7097 format %{ "ADD $dst,$src" %} 7098 opcode(0x03); 7099 ins_encode( OpcP, RegMem( dst, src) ); 7100 ins_pipe( ialu_reg_mem ); 7101%} 7102 7103instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7104 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7105 effect(KILL cr); 7106 7107 ins_cost(150); 7108 format %{ "ADD $dst,$src" %} 7109 opcode(0x01); /* Opcode 01 /r */ 7110 ins_encode( OpcP, RegMem( src, dst ) ); 7111 ins_pipe( ialu_mem_reg ); 7112%} 7113 7114// Add Memory with Immediate 7115instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7116 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7117 effect(KILL cr); 7118 7119 ins_cost(125); 7120 format %{ "ADD $dst,$src" %} 7121 opcode(0x81); /* Opcode 81 /0 id */ 7122 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7123 ins_pipe( ialu_mem_imm ); 7124%} 7125 7126instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7127 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7128 effect(KILL cr); 7129 7130 ins_cost(125); 7131 format %{ "INC $dst" %} 7132 opcode(0xFF); /* Opcode FF /0 */ 7133 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7134 ins_pipe( ialu_mem_imm ); 7135%} 7136 7137instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7138 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7139 effect(KILL cr); 7140 7141 ins_cost(125); 7142 format %{ "DEC $dst" %} 7143 opcode(0xFF); /* Opcode FF /1 */ 7144 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7145 ins_pipe( ialu_mem_imm ); 7146%} 7147 7148 7149instruct checkCastPP( eRegP dst ) %{ 7150 match(Set dst (CheckCastPP dst)); 7151 7152 size(0); 7153 format %{ "#checkcastPP of $dst" %} 7154 ins_encode( /*empty encoding*/ ); 7155 ins_pipe( empty ); 7156%} 7157 7158instruct castPP( eRegP dst ) %{ 7159 match(Set dst (CastPP dst)); 7160 format %{ "#castPP of $dst" %} 7161 ins_encode( /*empty encoding*/ ); 7162 ins_pipe( empty ); 7163%} 7164 7165instruct castII( rRegI dst ) %{ 7166 match(Set dst (CastII dst)); 7167 format %{ "#castII of $dst" %} 7168 ins_encode( /*empty encoding*/ ); 7169 ins_cost(0); 7170 ins_pipe( empty ); 7171%} 7172 7173 7174// Load-locked - same as a regular pointer load when used with compare-swap 7175instruct loadPLocked(eRegP dst, memory mem) %{ 7176 match(Set dst (LoadPLocked mem)); 7177 7178 ins_cost(125); 7179 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7180 opcode(0x8B); 7181 ins_encode( OpcP, RegMem(dst,mem)); 7182 ins_pipe( ialu_reg_mem ); 7183%} 7184 7185// Conditional-store of the updated heap-top. 7186// Used during allocation of the shared heap. 7187// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7188instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7189 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7190 // EAX is killed if there is contention, but then it's also unused. 7191 // In the common case of no contention, EAX holds the new oop address. 7192 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7193 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7194 ins_pipe( pipe_cmpxchg ); 7195%} 7196 7197// Conditional-store of an int value. 7198// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7199instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7200 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7201 effect(KILL oldval); 7202 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7203 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7204 ins_pipe( pipe_cmpxchg ); 7205%} 7206 7207// Conditional-store of a long value. 7208// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7209instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7210 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7211 effect(KILL oldval); 7212 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7213 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7214 "XCHG EBX,ECX" 7215 %} 7216 ins_encode %{ 7217 // Note: we need to swap rbx, and rcx before and after the 7218 // cmpxchg8 instruction because the instruction uses 7219 // rcx as the high order word of the new value to store but 7220 // our register encoding uses rbx. 7221 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7222 if( os::is_MP() ) 7223 __ lock(); 7224 __ cmpxchg8($mem$$Address); 7225 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7226 %} 7227 ins_pipe( pipe_cmpxchg ); 7228%} 7229 7230// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7231 7232instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7233 predicate(VM_Version::supports_cx8()); 7234 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7235 effect(KILL cr, KILL oldval); 7236 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7237 "MOV $res,0\n\t" 7238 "JNE,s fail\n\t" 7239 "MOV $res,1\n" 7240 "fail:" %} 7241 ins_encode( enc_cmpxchg8(mem_ptr), 7242 enc_flags_ne_to_boolean(res) ); 7243 ins_pipe( pipe_cmpxchg ); 7244%} 7245 7246instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7247 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7248 effect(KILL cr, KILL oldval); 7249 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7250 "MOV $res,0\n\t" 7251 "JNE,s fail\n\t" 7252 "MOV $res,1\n" 7253 "fail:" %} 7254 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7255 ins_pipe( pipe_cmpxchg ); 7256%} 7257 7258instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7259 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7260 effect(KILL cr, KILL oldval); 7261 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7262 "MOV $res,0\n\t" 7263 "JNE,s fail\n\t" 7264 "MOV $res,1\n" 7265 "fail:" %} 7266 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7267 ins_pipe( pipe_cmpxchg ); 7268%} 7269 7270instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7271 predicate(n->as_LoadStore()->result_not_used()); 7272 match(Set dummy (GetAndAddI mem add)); 7273 effect(KILL cr); 7274 format %{ "ADDL [$mem],$add" %} 7275 ins_encode %{ 7276 if (os::is_MP()) { __ lock(); } 7277 __ addl($mem$$Address, $add$$constant); 7278 %} 7279 ins_pipe( pipe_cmpxchg ); 7280%} 7281 7282instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7283 match(Set newval (GetAndAddI mem newval)); 7284 effect(KILL cr); 7285 format %{ "XADDL [$mem],$newval" %} 7286 ins_encode %{ 7287 if (os::is_MP()) { __ lock(); } 7288 __ xaddl($mem$$Address, $newval$$Register); 7289 %} 7290 ins_pipe( pipe_cmpxchg ); 7291%} 7292 7293instruct xchgI( memory mem, rRegI newval) %{ 7294 match(Set newval (GetAndSetI mem newval)); 7295 format %{ "XCHGL $newval,[$mem]" %} 7296 ins_encode %{ 7297 __ xchgl($newval$$Register, $mem$$Address); 7298 %} 7299 ins_pipe( pipe_cmpxchg ); 7300%} 7301 7302instruct xchgP( memory mem, pRegP newval) %{ 7303 match(Set newval (GetAndSetP mem newval)); 7304 format %{ "XCHGL $newval,[$mem]" %} 7305 ins_encode %{ 7306 __ xchgl($newval$$Register, $mem$$Address); 7307 %} 7308 ins_pipe( pipe_cmpxchg ); 7309%} 7310 7311//----------Subtraction Instructions------------------------------------------- 7312 7313// Integer Subtraction Instructions 7314instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7315 match(Set dst (SubI dst src)); 7316 effect(KILL cr); 7317 7318 size(2); 7319 format %{ "SUB $dst,$src" %} 7320 opcode(0x2B); 7321 ins_encode( OpcP, RegReg( dst, src) ); 7322 ins_pipe( ialu_reg_reg ); 7323%} 7324 7325instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7326 match(Set dst (SubI dst src)); 7327 effect(KILL cr); 7328 7329 format %{ "SUB $dst,$src" %} 7330 opcode(0x81,0x05); /* Opcode 81 /5 */ 7331 // ins_encode( RegImm( dst, src) ); 7332 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7333 ins_pipe( ialu_reg ); 7334%} 7335 7336instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7337 match(Set dst (SubI dst (LoadI src))); 7338 effect(KILL cr); 7339 7340 ins_cost(125); 7341 format %{ "SUB $dst,$src" %} 7342 opcode(0x2B); 7343 ins_encode( OpcP, RegMem( dst, src) ); 7344 ins_pipe( ialu_reg_mem ); 7345%} 7346 7347instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7348 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7349 effect(KILL cr); 7350 7351 ins_cost(150); 7352 format %{ "SUB $dst,$src" %} 7353 opcode(0x29); /* Opcode 29 /r */ 7354 ins_encode( OpcP, RegMem( src, dst ) ); 7355 ins_pipe( ialu_mem_reg ); 7356%} 7357 7358// Subtract from a pointer 7359instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7360 match(Set dst (AddP dst (SubI zero src))); 7361 effect(KILL cr); 7362 7363 size(2); 7364 format %{ "SUB $dst,$src" %} 7365 opcode(0x2B); 7366 ins_encode( OpcP, RegReg( dst, src) ); 7367 ins_pipe( ialu_reg_reg ); 7368%} 7369 7370instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7371 match(Set dst (SubI zero dst)); 7372 effect(KILL cr); 7373 7374 size(2); 7375 format %{ "NEG $dst" %} 7376 opcode(0xF7,0x03); // Opcode F7 /3 7377 ins_encode( OpcP, RegOpc( dst ) ); 7378 ins_pipe( ialu_reg ); 7379%} 7380 7381//----------Multiplication/Division Instructions------------------------------- 7382// Integer Multiplication Instructions 7383// Multiply Register 7384instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7385 match(Set dst (MulI dst src)); 7386 effect(KILL cr); 7387 7388 size(3); 7389 ins_cost(300); 7390 format %{ "IMUL $dst,$src" %} 7391 opcode(0xAF, 0x0F); 7392 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7393 ins_pipe( ialu_reg_reg_alu0 ); 7394%} 7395 7396// Multiply 32-bit Immediate 7397instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7398 match(Set dst (MulI src imm)); 7399 effect(KILL cr); 7400 7401 ins_cost(300); 7402 format %{ "IMUL $dst,$src,$imm" %} 7403 opcode(0x69); /* 69 /r id */ 7404 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7405 ins_pipe( ialu_reg_reg_alu0 ); 7406%} 7407 7408instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7409 match(Set dst src); 7410 effect(KILL cr); 7411 7412 // Note that this is artificially increased to make it more expensive than loadConL 7413 ins_cost(250); 7414 format %{ "MOV EAX,$src\t// low word only" %} 7415 opcode(0xB8); 7416 ins_encode( LdImmL_Lo(dst, src) ); 7417 ins_pipe( ialu_reg_fat ); 7418%} 7419 7420// Multiply by 32-bit Immediate, taking the shifted high order results 7421// (special case for shift by 32) 7422instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7423 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7424 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7425 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7426 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7427 effect(USE src1, KILL cr); 7428 7429 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7430 ins_cost(0*100 + 1*400 - 150); 7431 format %{ "IMUL EDX:EAX,$src1" %} 7432 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7433 ins_pipe( pipe_slow ); 7434%} 7435 7436// Multiply by 32-bit Immediate, taking the shifted high order results 7437instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7438 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7439 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7440 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7441 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7442 effect(USE src1, KILL cr); 7443 7444 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7445 ins_cost(1*100 + 1*400 - 150); 7446 format %{ "IMUL EDX:EAX,$src1\n\t" 7447 "SAR EDX,$cnt-32" %} 7448 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7449 ins_pipe( pipe_slow ); 7450%} 7451 7452// Multiply Memory 32-bit Immediate 7453instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7454 match(Set dst (MulI (LoadI src) imm)); 7455 effect(KILL cr); 7456 7457 ins_cost(300); 7458 format %{ "IMUL $dst,$src,$imm" %} 7459 opcode(0x69); /* 69 /r id */ 7460 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7461 ins_pipe( ialu_reg_mem_alu0 ); 7462%} 7463 7464// Multiply Memory 7465instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7466 match(Set dst (MulI dst (LoadI src))); 7467 effect(KILL cr); 7468 7469 ins_cost(350); 7470 format %{ "IMUL $dst,$src" %} 7471 opcode(0xAF, 0x0F); 7472 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7473 ins_pipe( ialu_reg_mem_alu0 ); 7474%} 7475 7476// Multiply Register Int to Long 7477instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7478 // Basic Idea: long = (long)int * (long)int 7479 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7480 effect(DEF dst, USE src, USE src1, KILL flags); 7481 7482 ins_cost(300); 7483 format %{ "IMUL $dst,$src1" %} 7484 7485 ins_encode( long_int_multiply( dst, src1 ) ); 7486 ins_pipe( ialu_reg_reg_alu0 ); 7487%} 7488 7489instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7490 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7491 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7492 effect(KILL flags); 7493 7494 ins_cost(300); 7495 format %{ "MUL $dst,$src1" %} 7496 7497 ins_encode( long_uint_multiply(dst, src1) ); 7498 ins_pipe( ialu_reg_reg_alu0 ); 7499%} 7500 7501// Multiply Register Long 7502instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7503 match(Set dst (MulL dst src)); 7504 effect(KILL cr, TEMP tmp); 7505 ins_cost(4*100+3*400); 7506// Basic idea: lo(result) = lo(x_lo * y_lo) 7507// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7508 format %{ "MOV $tmp,$src.lo\n\t" 7509 "IMUL $tmp,EDX\n\t" 7510 "MOV EDX,$src.hi\n\t" 7511 "IMUL EDX,EAX\n\t" 7512 "ADD $tmp,EDX\n\t" 7513 "MUL EDX:EAX,$src.lo\n\t" 7514 "ADD EDX,$tmp" %} 7515 ins_encode( long_multiply( dst, src, tmp ) ); 7516 ins_pipe( pipe_slow ); 7517%} 7518 7519// Multiply Register Long where the left operand's high 32 bits are zero 7520instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7521 predicate(is_operand_hi32_zero(n->in(1))); 7522 match(Set dst (MulL dst src)); 7523 effect(KILL cr, TEMP tmp); 7524 ins_cost(2*100+2*400); 7525// Basic idea: lo(result) = lo(x_lo * y_lo) 7526// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7527 format %{ "MOV $tmp,$src.hi\n\t" 7528 "IMUL $tmp,EAX\n\t" 7529 "MUL EDX:EAX,$src.lo\n\t" 7530 "ADD EDX,$tmp" %} 7531 ins_encode %{ 7532 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7533 __ imull($tmp$$Register, rax); 7534 __ mull($src$$Register); 7535 __ addl(rdx, $tmp$$Register); 7536 %} 7537 ins_pipe( pipe_slow ); 7538%} 7539 7540// Multiply Register Long where the right operand's high 32 bits are zero 7541instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7542 predicate(is_operand_hi32_zero(n->in(2))); 7543 match(Set dst (MulL dst src)); 7544 effect(KILL cr, TEMP tmp); 7545 ins_cost(2*100+2*400); 7546// Basic idea: lo(result) = lo(x_lo * y_lo) 7547// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7548 format %{ "MOV $tmp,$src.lo\n\t" 7549 "IMUL $tmp,EDX\n\t" 7550 "MUL EDX:EAX,$src.lo\n\t" 7551 "ADD EDX,$tmp" %} 7552 ins_encode %{ 7553 __ movl($tmp$$Register, $src$$Register); 7554 __ imull($tmp$$Register, rdx); 7555 __ mull($src$$Register); 7556 __ addl(rdx, $tmp$$Register); 7557 %} 7558 ins_pipe( pipe_slow ); 7559%} 7560 7561// Multiply Register Long where the left and the right operands' high 32 bits are zero 7562instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7563 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7564 match(Set dst (MulL dst src)); 7565 effect(KILL cr); 7566 ins_cost(1*400); 7567// Basic idea: lo(result) = lo(x_lo * y_lo) 7568// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7569 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7570 ins_encode %{ 7571 __ mull($src$$Register); 7572 %} 7573 ins_pipe( pipe_slow ); 7574%} 7575 7576// Multiply Register Long by small constant 7577instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7578 match(Set dst (MulL dst src)); 7579 effect(KILL cr, TEMP tmp); 7580 ins_cost(2*100+2*400); 7581 size(12); 7582// Basic idea: lo(result) = lo(src * EAX) 7583// hi(result) = hi(src * EAX) + lo(src * EDX) 7584 format %{ "IMUL $tmp,EDX,$src\n\t" 7585 "MOV EDX,$src\n\t" 7586 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7587 "ADD EDX,$tmp" %} 7588 ins_encode( long_multiply_con( dst, src, tmp ) ); 7589 ins_pipe( pipe_slow ); 7590%} 7591 7592// Integer DIV with Register 7593instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7594 match(Set rax (DivI rax div)); 7595 effect(KILL rdx, KILL cr); 7596 size(26); 7597 ins_cost(30*100+10*100); 7598 format %{ "CMP EAX,0x80000000\n\t" 7599 "JNE,s normal\n\t" 7600 "XOR EDX,EDX\n\t" 7601 "CMP ECX,-1\n\t" 7602 "JE,s done\n" 7603 "normal: CDQ\n\t" 7604 "IDIV $div\n\t" 7605 "done:" %} 7606 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7607 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7608 ins_pipe( ialu_reg_reg_alu0 ); 7609%} 7610 7611// Divide Register Long 7612instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7613 match(Set dst (DivL src1 src2)); 7614 effect( KILL cr, KILL cx, KILL bx ); 7615 ins_cost(10000); 7616 format %{ "PUSH $src1.hi\n\t" 7617 "PUSH $src1.lo\n\t" 7618 "PUSH $src2.hi\n\t" 7619 "PUSH $src2.lo\n\t" 7620 "CALL SharedRuntime::ldiv\n\t" 7621 "ADD ESP,16" %} 7622 ins_encode( long_div(src1,src2) ); 7623 ins_pipe( pipe_slow ); 7624%} 7625 7626// Integer DIVMOD with Register, both quotient and mod results 7627instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7628 match(DivModI rax div); 7629 effect(KILL cr); 7630 size(26); 7631 ins_cost(30*100+10*100); 7632 format %{ "CMP EAX,0x80000000\n\t" 7633 "JNE,s normal\n\t" 7634 "XOR EDX,EDX\n\t" 7635 "CMP ECX,-1\n\t" 7636 "JE,s done\n" 7637 "normal: CDQ\n\t" 7638 "IDIV $div\n\t" 7639 "done:" %} 7640 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7641 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7642 ins_pipe( pipe_slow ); 7643%} 7644 7645// Integer MOD with Register 7646instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7647 match(Set rdx (ModI rax div)); 7648 effect(KILL rax, KILL cr); 7649 7650 size(26); 7651 ins_cost(300); 7652 format %{ "CDQ\n\t" 7653 "IDIV $div" %} 7654 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7655 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7656 ins_pipe( ialu_reg_reg_alu0 ); 7657%} 7658 7659// Remainder Register Long 7660instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7661 match(Set dst (ModL src1 src2)); 7662 effect( KILL cr, KILL cx, KILL bx ); 7663 ins_cost(10000); 7664 format %{ "PUSH $src1.hi\n\t" 7665 "PUSH $src1.lo\n\t" 7666 "PUSH $src2.hi\n\t" 7667 "PUSH $src2.lo\n\t" 7668 "CALL SharedRuntime::lrem\n\t" 7669 "ADD ESP,16" %} 7670 ins_encode( long_mod(src1,src2) ); 7671 ins_pipe( pipe_slow ); 7672%} 7673 7674// Divide Register Long (no special case since divisor != -1) 7675instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7676 match(Set dst (DivL dst imm)); 7677 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7678 ins_cost(1000); 7679 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7680 "XOR $tmp2,$tmp2\n\t" 7681 "CMP $tmp,EDX\n\t" 7682 "JA,s fast\n\t" 7683 "MOV $tmp2,EAX\n\t" 7684 "MOV EAX,EDX\n\t" 7685 "MOV EDX,0\n\t" 7686 "JLE,s pos\n\t" 7687 "LNEG EAX : $tmp2\n\t" 7688 "DIV $tmp # unsigned division\n\t" 7689 "XCHG EAX,$tmp2\n\t" 7690 "DIV $tmp\n\t" 7691 "LNEG $tmp2 : EAX\n\t" 7692 "JMP,s done\n" 7693 "pos:\n\t" 7694 "DIV $tmp\n\t" 7695 "XCHG EAX,$tmp2\n" 7696 "fast:\n\t" 7697 "DIV $tmp\n" 7698 "done:\n\t" 7699 "MOV EDX,$tmp2\n\t" 7700 "NEG EDX:EAX # if $imm < 0" %} 7701 ins_encode %{ 7702 int con = (int)$imm$$constant; 7703 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7704 int pcon = (con > 0) ? con : -con; 7705 Label Lfast, Lpos, Ldone; 7706 7707 __ movl($tmp$$Register, pcon); 7708 __ xorl($tmp2$$Register,$tmp2$$Register); 7709 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7710 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7711 7712 __ movl($tmp2$$Register, $dst$$Register); // save 7713 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7714 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7715 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7716 7717 // Negative dividend. 7718 // convert value to positive to use unsigned division 7719 __ lneg($dst$$Register, $tmp2$$Register); 7720 __ divl($tmp$$Register); 7721 __ xchgl($dst$$Register, $tmp2$$Register); 7722 __ divl($tmp$$Register); 7723 // revert result back to negative 7724 __ lneg($tmp2$$Register, $dst$$Register); 7725 __ jmpb(Ldone); 7726 7727 __ bind(Lpos); 7728 __ divl($tmp$$Register); // Use unsigned division 7729 __ xchgl($dst$$Register, $tmp2$$Register); 7730 // Fallthrow for final divide, tmp2 has 32 bit hi result 7731 7732 __ bind(Lfast); 7733 // fast path: src is positive 7734 __ divl($tmp$$Register); // Use unsigned division 7735 7736 __ bind(Ldone); 7737 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7738 if (con < 0) { 7739 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7740 } 7741 %} 7742 ins_pipe( pipe_slow ); 7743%} 7744 7745// Remainder Register Long (remainder fit into 32 bits) 7746instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7747 match(Set dst (ModL dst imm)); 7748 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7749 ins_cost(1000); 7750 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7751 "CMP $tmp,EDX\n\t" 7752 "JA,s fast\n\t" 7753 "MOV $tmp2,EAX\n\t" 7754 "MOV EAX,EDX\n\t" 7755 "MOV EDX,0\n\t" 7756 "JLE,s pos\n\t" 7757 "LNEG EAX : $tmp2\n\t" 7758 "DIV $tmp # unsigned division\n\t" 7759 "MOV EAX,$tmp2\n\t" 7760 "DIV $tmp\n\t" 7761 "NEG EDX\n\t" 7762 "JMP,s done\n" 7763 "pos:\n\t" 7764 "DIV $tmp\n\t" 7765 "MOV EAX,$tmp2\n" 7766 "fast:\n\t" 7767 "DIV $tmp\n" 7768 "done:\n\t" 7769 "MOV EAX,EDX\n\t" 7770 "SAR EDX,31\n\t" %} 7771 ins_encode %{ 7772 int con = (int)$imm$$constant; 7773 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7774 int pcon = (con > 0) ? con : -con; 7775 Label Lfast, Lpos, Ldone; 7776 7777 __ movl($tmp$$Register, pcon); 7778 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7779 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7780 7781 __ movl($tmp2$$Register, $dst$$Register); // save 7782 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7783 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7784 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7785 7786 // Negative dividend. 7787 // convert value to positive to use unsigned division 7788 __ lneg($dst$$Register, $tmp2$$Register); 7789 __ divl($tmp$$Register); 7790 __ movl($dst$$Register, $tmp2$$Register); 7791 __ divl($tmp$$Register); 7792 // revert remainder back to negative 7793 __ negl(HIGH_FROM_LOW($dst$$Register)); 7794 __ jmpb(Ldone); 7795 7796 __ bind(Lpos); 7797 __ divl($tmp$$Register); 7798 __ movl($dst$$Register, $tmp2$$Register); 7799 7800 __ bind(Lfast); 7801 // fast path: src is positive 7802 __ divl($tmp$$Register); 7803 7804 __ bind(Ldone); 7805 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7806 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7807 7808 %} 7809 ins_pipe( pipe_slow ); 7810%} 7811 7812// Integer Shift Instructions 7813// Shift Left by one 7814instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7815 match(Set dst (LShiftI dst shift)); 7816 effect(KILL cr); 7817 7818 size(2); 7819 format %{ "SHL $dst,$shift" %} 7820 opcode(0xD1, 0x4); /* D1 /4 */ 7821 ins_encode( OpcP, RegOpc( dst ) ); 7822 ins_pipe( ialu_reg ); 7823%} 7824 7825// Shift Left by 8-bit immediate 7826instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7827 match(Set dst (LShiftI dst shift)); 7828 effect(KILL cr); 7829 7830 size(3); 7831 format %{ "SHL $dst,$shift" %} 7832 opcode(0xC1, 0x4); /* C1 /4 ib */ 7833 ins_encode( RegOpcImm( dst, shift) ); 7834 ins_pipe( ialu_reg ); 7835%} 7836 7837// Shift Left by variable 7838instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7839 match(Set dst (LShiftI dst shift)); 7840 effect(KILL cr); 7841 7842 size(2); 7843 format %{ "SHL $dst,$shift" %} 7844 opcode(0xD3, 0x4); /* D3 /4 */ 7845 ins_encode( OpcP, RegOpc( dst ) ); 7846 ins_pipe( ialu_reg_reg ); 7847%} 7848 7849// Arithmetic shift right by one 7850instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7851 match(Set dst (RShiftI dst shift)); 7852 effect(KILL cr); 7853 7854 size(2); 7855 format %{ "SAR $dst,$shift" %} 7856 opcode(0xD1, 0x7); /* D1 /7 */ 7857 ins_encode( OpcP, RegOpc( dst ) ); 7858 ins_pipe( ialu_reg ); 7859%} 7860 7861// Arithmetic shift right by one 7862instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7863 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7864 effect(KILL cr); 7865 format %{ "SAR $dst,$shift" %} 7866 opcode(0xD1, 0x7); /* D1 /7 */ 7867 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7868 ins_pipe( ialu_mem_imm ); 7869%} 7870 7871// Arithmetic Shift Right by 8-bit immediate 7872instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7873 match(Set dst (RShiftI dst shift)); 7874 effect(KILL cr); 7875 7876 size(3); 7877 format %{ "SAR $dst,$shift" %} 7878 opcode(0xC1, 0x7); /* C1 /7 ib */ 7879 ins_encode( RegOpcImm( dst, shift ) ); 7880 ins_pipe( ialu_mem_imm ); 7881%} 7882 7883// Arithmetic Shift Right by 8-bit immediate 7884instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7885 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7886 effect(KILL cr); 7887 7888 format %{ "SAR $dst,$shift" %} 7889 opcode(0xC1, 0x7); /* C1 /7 ib */ 7890 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7891 ins_pipe( ialu_mem_imm ); 7892%} 7893 7894// Arithmetic Shift Right by variable 7895instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7896 match(Set dst (RShiftI dst shift)); 7897 effect(KILL cr); 7898 7899 size(2); 7900 format %{ "SAR $dst,$shift" %} 7901 opcode(0xD3, 0x7); /* D3 /7 */ 7902 ins_encode( OpcP, RegOpc( dst ) ); 7903 ins_pipe( ialu_reg_reg ); 7904%} 7905 7906// Logical shift right by one 7907instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7908 match(Set dst (URShiftI dst shift)); 7909 effect(KILL cr); 7910 7911 size(2); 7912 format %{ "SHR $dst,$shift" %} 7913 opcode(0xD1, 0x5); /* D1 /5 */ 7914 ins_encode( OpcP, RegOpc( dst ) ); 7915 ins_pipe( ialu_reg ); 7916%} 7917 7918// Logical Shift Right by 8-bit immediate 7919instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7920 match(Set dst (URShiftI dst shift)); 7921 effect(KILL cr); 7922 7923 size(3); 7924 format %{ "SHR $dst,$shift" %} 7925 opcode(0xC1, 0x5); /* C1 /5 ib */ 7926 ins_encode( RegOpcImm( dst, shift) ); 7927 ins_pipe( ialu_reg ); 7928%} 7929 7930 7931// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7932// This idiom is used by the compiler for the i2b bytecode. 7933instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7934 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7935 7936 size(3); 7937 format %{ "MOVSX $dst,$src :8" %} 7938 ins_encode %{ 7939 __ movsbl($dst$$Register, $src$$Register); 7940 %} 7941 ins_pipe(ialu_reg_reg); 7942%} 7943 7944// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7945// This idiom is used by the compiler the i2s bytecode. 7946instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7947 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7948 7949 size(3); 7950 format %{ "MOVSX $dst,$src :16" %} 7951 ins_encode %{ 7952 __ movswl($dst$$Register, $src$$Register); 7953 %} 7954 ins_pipe(ialu_reg_reg); 7955%} 7956 7957 7958// Logical Shift Right by variable 7959instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7960 match(Set dst (URShiftI dst shift)); 7961 effect(KILL cr); 7962 7963 size(2); 7964 format %{ "SHR $dst,$shift" %} 7965 opcode(0xD3, 0x5); /* D3 /5 */ 7966 ins_encode( OpcP, RegOpc( dst ) ); 7967 ins_pipe( ialu_reg_reg ); 7968%} 7969 7970 7971//----------Logical Instructions----------------------------------------------- 7972//----------Integer Logical Instructions--------------------------------------- 7973// And Instructions 7974// And Register with Register 7975instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7976 match(Set dst (AndI dst src)); 7977 effect(KILL cr); 7978 7979 size(2); 7980 format %{ "AND $dst,$src" %} 7981 opcode(0x23); 7982 ins_encode( OpcP, RegReg( dst, src) ); 7983 ins_pipe( ialu_reg_reg ); 7984%} 7985 7986// And Register with Immediate 7987instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7988 match(Set dst (AndI dst src)); 7989 effect(KILL cr); 7990 7991 format %{ "AND $dst,$src" %} 7992 opcode(0x81,0x04); /* Opcode 81 /4 */ 7993 // ins_encode( RegImm( dst, src) ); 7994 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7995 ins_pipe( ialu_reg ); 7996%} 7997 7998// And Register with Memory 7999instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8000 match(Set dst (AndI dst (LoadI src))); 8001 effect(KILL cr); 8002 8003 ins_cost(125); 8004 format %{ "AND $dst,$src" %} 8005 opcode(0x23); 8006 ins_encode( OpcP, RegMem( dst, src) ); 8007 ins_pipe( ialu_reg_mem ); 8008%} 8009 8010// And Memory with Register 8011instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8012 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8013 effect(KILL cr); 8014 8015 ins_cost(150); 8016 format %{ "AND $dst,$src" %} 8017 opcode(0x21); /* Opcode 21 /r */ 8018 ins_encode( OpcP, RegMem( src, dst ) ); 8019 ins_pipe( ialu_mem_reg ); 8020%} 8021 8022// And Memory with Immediate 8023instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8024 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8025 effect(KILL cr); 8026 8027 ins_cost(125); 8028 format %{ "AND $dst,$src" %} 8029 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8030 // ins_encode( MemImm( dst, src) ); 8031 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8032 ins_pipe( ialu_mem_imm ); 8033%} 8034 8035// BMI1 instructions 8036instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8037 match(Set dst (AndI (XorI src1 minus_1) src2)); 8038 predicate(UseBMI1Instructions); 8039 effect(KILL cr); 8040 8041 format %{ "ANDNL $dst, $src1, $src2" %} 8042 8043 ins_encode %{ 8044 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8045 %} 8046 ins_pipe(ialu_reg); 8047%} 8048 8049instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8050 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8051 predicate(UseBMI1Instructions); 8052 effect(KILL cr); 8053 8054 ins_cost(125); 8055 format %{ "ANDNL $dst, $src1, $src2" %} 8056 8057 ins_encode %{ 8058 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8059 %} 8060 ins_pipe(ialu_reg_mem); 8061%} 8062 8063instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8064 match(Set dst (AndI (SubI imm_zero src) src)); 8065 predicate(UseBMI1Instructions); 8066 effect(KILL cr); 8067 8068 format %{ "BLSIL $dst, $src" %} 8069 8070 ins_encode %{ 8071 __ blsil($dst$$Register, $src$$Register); 8072 %} 8073 ins_pipe(ialu_reg); 8074%} 8075 8076instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8077 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8078 predicate(UseBMI1Instructions); 8079 effect(KILL cr); 8080 8081 ins_cost(125); 8082 format %{ "BLSIL $dst, $src" %} 8083 8084 ins_encode %{ 8085 __ blsil($dst$$Register, $src$$Address); 8086 %} 8087 ins_pipe(ialu_reg_mem); 8088%} 8089 8090instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8091%{ 8092 match(Set dst (XorI (AddI src minus_1) src)); 8093 predicate(UseBMI1Instructions); 8094 effect(KILL cr); 8095 8096 format %{ "BLSMSKL $dst, $src" %} 8097 8098 ins_encode %{ 8099 __ blsmskl($dst$$Register, $src$$Register); 8100 %} 8101 8102 ins_pipe(ialu_reg); 8103%} 8104 8105instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8106%{ 8107 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8108 predicate(UseBMI1Instructions); 8109 effect(KILL cr); 8110 8111 ins_cost(125); 8112 format %{ "BLSMSKL $dst, $src" %} 8113 8114 ins_encode %{ 8115 __ blsmskl($dst$$Register, $src$$Address); 8116 %} 8117 8118 ins_pipe(ialu_reg_mem); 8119%} 8120 8121instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8122%{ 8123 match(Set dst (AndI (AddI src minus_1) src) ); 8124 predicate(UseBMI1Instructions); 8125 effect(KILL cr); 8126 8127 format %{ "BLSRL $dst, $src" %} 8128 8129 ins_encode %{ 8130 __ blsrl($dst$$Register, $src$$Register); 8131 %} 8132 8133 ins_pipe(ialu_reg); 8134%} 8135 8136instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8137%{ 8138 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8139 predicate(UseBMI1Instructions); 8140 effect(KILL cr); 8141 8142 ins_cost(125); 8143 format %{ "BLSRL $dst, $src" %} 8144 8145 ins_encode %{ 8146 __ blsrl($dst$$Register, $src$$Address); 8147 %} 8148 8149 ins_pipe(ialu_reg_mem); 8150%} 8151 8152// Or Instructions 8153// Or Register with Register 8154instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8155 match(Set dst (OrI dst src)); 8156 effect(KILL cr); 8157 8158 size(2); 8159 format %{ "OR $dst,$src" %} 8160 opcode(0x0B); 8161 ins_encode( OpcP, RegReg( dst, src) ); 8162 ins_pipe( ialu_reg_reg ); 8163%} 8164 8165instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8166 match(Set dst (OrI dst (CastP2X src))); 8167 effect(KILL cr); 8168 8169 size(2); 8170 format %{ "OR $dst,$src" %} 8171 opcode(0x0B); 8172 ins_encode( OpcP, RegReg( dst, src) ); 8173 ins_pipe( ialu_reg_reg ); 8174%} 8175 8176 8177// Or Register with Immediate 8178instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8179 match(Set dst (OrI dst src)); 8180 effect(KILL cr); 8181 8182 format %{ "OR $dst,$src" %} 8183 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8184 // ins_encode( RegImm( dst, src) ); 8185 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8186 ins_pipe( ialu_reg ); 8187%} 8188 8189// Or Register with Memory 8190instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8191 match(Set dst (OrI dst (LoadI src))); 8192 effect(KILL cr); 8193 8194 ins_cost(125); 8195 format %{ "OR $dst,$src" %} 8196 opcode(0x0B); 8197 ins_encode( OpcP, RegMem( dst, src) ); 8198 ins_pipe( ialu_reg_mem ); 8199%} 8200 8201// Or Memory with Register 8202instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8203 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8204 effect(KILL cr); 8205 8206 ins_cost(150); 8207 format %{ "OR $dst,$src" %} 8208 opcode(0x09); /* Opcode 09 /r */ 8209 ins_encode( OpcP, RegMem( src, dst ) ); 8210 ins_pipe( ialu_mem_reg ); 8211%} 8212 8213// Or Memory with Immediate 8214instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8215 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8216 effect(KILL cr); 8217 8218 ins_cost(125); 8219 format %{ "OR $dst,$src" %} 8220 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8221 // ins_encode( MemImm( dst, src) ); 8222 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8223 ins_pipe( ialu_mem_imm ); 8224%} 8225 8226// ROL/ROR 8227// ROL expand 8228instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8229 effect(USE_DEF dst, USE shift, KILL cr); 8230 8231 format %{ "ROL $dst, $shift" %} 8232 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8233 ins_encode( OpcP, RegOpc( dst )); 8234 ins_pipe( ialu_reg ); 8235%} 8236 8237instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8238 effect(USE_DEF dst, USE shift, KILL cr); 8239 8240 format %{ "ROL $dst, $shift" %} 8241 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8242 ins_encode( RegOpcImm(dst, shift) ); 8243 ins_pipe(ialu_reg); 8244%} 8245 8246instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8247 effect(USE_DEF dst, USE shift, KILL cr); 8248 8249 format %{ "ROL $dst, $shift" %} 8250 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8251 ins_encode(OpcP, RegOpc(dst)); 8252 ins_pipe( ialu_reg_reg ); 8253%} 8254// end of ROL expand 8255 8256// ROL 32bit by one once 8257instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8258 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8259 8260 expand %{ 8261 rolI_eReg_imm1(dst, lshift, cr); 8262 %} 8263%} 8264 8265// ROL 32bit var by imm8 once 8266instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8267 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8268 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8269 8270 expand %{ 8271 rolI_eReg_imm8(dst, lshift, cr); 8272 %} 8273%} 8274 8275// ROL 32bit var by var once 8276instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8277 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8278 8279 expand %{ 8280 rolI_eReg_CL(dst, shift, cr); 8281 %} 8282%} 8283 8284// ROL 32bit var by var once 8285instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8286 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8287 8288 expand %{ 8289 rolI_eReg_CL(dst, shift, cr); 8290 %} 8291%} 8292 8293// ROR expand 8294instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8295 effect(USE_DEF dst, USE shift, KILL cr); 8296 8297 format %{ "ROR $dst, $shift" %} 8298 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8299 ins_encode( OpcP, RegOpc( dst ) ); 8300 ins_pipe( ialu_reg ); 8301%} 8302 8303instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8304 effect (USE_DEF dst, USE shift, KILL cr); 8305 8306 format %{ "ROR $dst, $shift" %} 8307 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8308 ins_encode( RegOpcImm(dst, shift) ); 8309 ins_pipe( ialu_reg ); 8310%} 8311 8312instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8313 effect(USE_DEF dst, USE shift, KILL cr); 8314 8315 format %{ "ROR $dst, $shift" %} 8316 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8317 ins_encode(OpcP, RegOpc(dst)); 8318 ins_pipe( ialu_reg_reg ); 8319%} 8320// end of ROR expand 8321 8322// ROR right once 8323instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8324 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8325 8326 expand %{ 8327 rorI_eReg_imm1(dst, rshift, cr); 8328 %} 8329%} 8330 8331// ROR 32bit by immI8 once 8332instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8333 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8334 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8335 8336 expand %{ 8337 rorI_eReg_imm8(dst, rshift, cr); 8338 %} 8339%} 8340 8341// ROR 32bit var by var once 8342instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8343 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8344 8345 expand %{ 8346 rorI_eReg_CL(dst, shift, cr); 8347 %} 8348%} 8349 8350// ROR 32bit var by var once 8351instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8352 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8353 8354 expand %{ 8355 rorI_eReg_CL(dst, shift, cr); 8356 %} 8357%} 8358 8359// Xor Instructions 8360// Xor Register with Register 8361instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8362 match(Set dst (XorI dst src)); 8363 effect(KILL cr); 8364 8365 size(2); 8366 format %{ "XOR $dst,$src" %} 8367 opcode(0x33); 8368 ins_encode( OpcP, RegReg( dst, src) ); 8369 ins_pipe( ialu_reg_reg ); 8370%} 8371 8372// Xor Register with Immediate -1 8373instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8374 match(Set dst (XorI dst imm)); 8375 8376 size(2); 8377 format %{ "NOT $dst" %} 8378 ins_encode %{ 8379 __ notl($dst$$Register); 8380 %} 8381 ins_pipe( ialu_reg ); 8382%} 8383 8384// Xor Register with Immediate 8385instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8386 match(Set dst (XorI dst src)); 8387 effect(KILL cr); 8388 8389 format %{ "XOR $dst,$src" %} 8390 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8391 // ins_encode( RegImm( dst, src) ); 8392 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8393 ins_pipe( ialu_reg ); 8394%} 8395 8396// Xor Register with Memory 8397instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8398 match(Set dst (XorI dst (LoadI src))); 8399 effect(KILL cr); 8400 8401 ins_cost(125); 8402 format %{ "XOR $dst,$src" %} 8403 opcode(0x33); 8404 ins_encode( OpcP, RegMem(dst, src) ); 8405 ins_pipe( ialu_reg_mem ); 8406%} 8407 8408// Xor Memory with Register 8409instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8410 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8411 effect(KILL cr); 8412 8413 ins_cost(150); 8414 format %{ "XOR $dst,$src" %} 8415 opcode(0x31); /* Opcode 31 /r */ 8416 ins_encode( OpcP, RegMem( src, dst ) ); 8417 ins_pipe( ialu_mem_reg ); 8418%} 8419 8420// Xor Memory with Immediate 8421instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8422 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8423 effect(KILL cr); 8424 8425 ins_cost(125); 8426 format %{ "XOR $dst,$src" %} 8427 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8428 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8429 ins_pipe( ialu_mem_imm ); 8430%} 8431 8432//----------Convert Int to Boolean--------------------------------------------- 8433 8434instruct movI_nocopy(rRegI dst, rRegI src) %{ 8435 effect( DEF dst, USE src ); 8436 format %{ "MOV $dst,$src" %} 8437 ins_encode( enc_Copy( dst, src) ); 8438 ins_pipe( ialu_reg_reg ); 8439%} 8440 8441instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8442 effect( USE_DEF dst, USE src, KILL cr ); 8443 8444 size(4); 8445 format %{ "NEG $dst\n\t" 8446 "ADC $dst,$src" %} 8447 ins_encode( neg_reg(dst), 8448 OpcRegReg(0x13,dst,src) ); 8449 ins_pipe( ialu_reg_reg_long ); 8450%} 8451 8452instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8453 match(Set dst (Conv2B src)); 8454 8455 expand %{ 8456 movI_nocopy(dst,src); 8457 ci2b(dst,src,cr); 8458 %} 8459%} 8460 8461instruct movP_nocopy(rRegI dst, eRegP src) %{ 8462 effect( DEF dst, USE src ); 8463 format %{ "MOV $dst,$src" %} 8464 ins_encode( enc_Copy( dst, src) ); 8465 ins_pipe( ialu_reg_reg ); 8466%} 8467 8468instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8469 effect( USE_DEF dst, USE src, KILL cr ); 8470 format %{ "NEG $dst\n\t" 8471 "ADC $dst,$src" %} 8472 ins_encode( neg_reg(dst), 8473 OpcRegReg(0x13,dst,src) ); 8474 ins_pipe( ialu_reg_reg_long ); 8475%} 8476 8477instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8478 match(Set dst (Conv2B src)); 8479 8480 expand %{ 8481 movP_nocopy(dst,src); 8482 cp2b(dst,src,cr); 8483 %} 8484%} 8485 8486instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8487 match(Set dst (CmpLTMask p q)); 8488 effect(KILL cr); 8489 ins_cost(400); 8490 8491 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8492 format %{ "XOR $dst,$dst\n\t" 8493 "CMP $p,$q\n\t" 8494 "SETlt $dst\n\t" 8495 "NEG $dst" %} 8496 ins_encode %{ 8497 Register Rp = $p$$Register; 8498 Register Rq = $q$$Register; 8499 Register Rd = $dst$$Register; 8500 Label done; 8501 __ xorl(Rd, Rd); 8502 __ cmpl(Rp, Rq); 8503 __ setb(Assembler::less, Rd); 8504 __ negl(Rd); 8505 %} 8506 8507 ins_pipe(pipe_slow); 8508%} 8509 8510instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8511 match(Set dst (CmpLTMask dst zero)); 8512 effect(DEF dst, KILL cr); 8513 ins_cost(100); 8514 8515 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8516 ins_encode %{ 8517 __ sarl($dst$$Register, 31); 8518 %} 8519 ins_pipe(ialu_reg); 8520%} 8521 8522/* better to save a register than avoid a branch */ 8523instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8524 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8525 effect(KILL cr); 8526 ins_cost(400); 8527 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8528 "JGE done\n\t" 8529 "ADD $p,$y\n" 8530 "done: " %} 8531 ins_encode %{ 8532 Register Rp = $p$$Register; 8533 Register Rq = $q$$Register; 8534 Register Ry = $y$$Register; 8535 Label done; 8536 __ subl(Rp, Rq); 8537 __ jccb(Assembler::greaterEqual, done); 8538 __ addl(Rp, Ry); 8539 __ bind(done); 8540 %} 8541 8542 ins_pipe(pipe_cmplt); 8543%} 8544 8545/* better to save a register than avoid a branch */ 8546instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8547 match(Set y (AndI (CmpLTMask p q) y)); 8548 effect(KILL cr); 8549 8550 ins_cost(300); 8551 8552 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8553 "JLT done\n\t" 8554 "XORL $y, $y\n" 8555 "done: " %} 8556 ins_encode %{ 8557 Register Rp = $p$$Register; 8558 Register Rq = $q$$Register; 8559 Register Ry = $y$$Register; 8560 Label done; 8561 __ cmpl(Rp, Rq); 8562 __ jccb(Assembler::less, done); 8563 __ xorl(Ry, Ry); 8564 __ bind(done); 8565 %} 8566 8567 ins_pipe(pipe_cmplt); 8568%} 8569 8570/* If I enable this, I encourage spilling in the inner loop of compress. 8571instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8572 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8573*/ 8574//----------Overflow Math Instructions----------------------------------------- 8575 8576instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8577%{ 8578 match(Set cr (OverflowAddI op1 op2)); 8579 effect(DEF cr, USE_KILL op1, USE op2); 8580 8581 format %{ "ADD $op1, $op2\t# overflow check int" %} 8582 8583 ins_encode %{ 8584 __ addl($op1$$Register, $op2$$Register); 8585 %} 8586 ins_pipe(ialu_reg_reg); 8587%} 8588 8589instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8590%{ 8591 match(Set cr (OverflowAddI op1 op2)); 8592 effect(DEF cr, USE_KILL op1, USE op2); 8593 8594 format %{ "ADD $op1, $op2\t# overflow check int" %} 8595 8596 ins_encode %{ 8597 __ addl($op1$$Register, $op2$$constant); 8598 %} 8599 ins_pipe(ialu_reg_reg); 8600%} 8601 8602instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8603%{ 8604 match(Set cr (OverflowSubI op1 op2)); 8605 8606 format %{ "CMP $op1, $op2\t# overflow check int" %} 8607 ins_encode %{ 8608 __ cmpl($op1$$Register, $op2$$Register); 8609 %} 8610 ins_pipe(ialu_reg_reg); 8611%} 8612 8613instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8614%{ 8615 match(Set cr (OverflowSubI op1 op2)); 8616 8617 format %{ "CMP $op1, $op2\t# overflow check int" %} 8618 ins_encode %{ 8619 __ cmpl($op1$$Register, $op2$$constant); 8620 %} 8621 ins_pipe(ialu_reg_reg); 8622%} 8623 8624instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8625%{ 8626 match(Set cr (OverflowSubI zero op2)); 8627 effect(DEF cr, USE_KILL op2); 8628 8629 format %{ "NEG $op2\t# overflow check int" %} 8630 ins_encode %{ 8631 __ negl($op2$$Register); 8632 %} 8633 ins_pipe(ialu_reg_reg); 8634%} 8635 8636instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8637%{ 8638 match(Set cr (OverflowMulI op1 op2)); 8639 effect(DEF cr, USE_KILL op1, USE op2); 8640 8641 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8642 ins_encode %{ 8643 __ imull($op1$$Register, $op2$$Register); 8644 %} 8645 ins_pipe(ialu_reg_reg_alu0); 8646%} 8647 8648instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8649%{ 8650 match(Set cr (OverflowMulI op1 op2)); 8651 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8652 8653 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8654 ins_encode %{ 8655 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8656 %} 8657 ins_pipe(ialu_reg_reg_alu0); 8658%} 8659 8660//----------Long Instructions------------------------------------------------ 8661// Add Long Register with Register 8662instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8663 match(Set dst (AddL dst src)); 8664 effect(KILL cr); 8665 ins_cost(200); 8666 format %{ "ADD $dst.lo,$src.lo\n\t" 8667 "ADC $dst.hi,$src.hi" %} 8668 opcode(0x03, 0x13); 8669 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8670 ins_pipe( ialu_reg_reg_long ); 8671%} 8672 8673// Add Long Register with Immediate 8674instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8675 match(Set dst (AddL dst src)); 8676 effect(KILL cr); 8677 format %{ "ADD $dst.lo,$src.lo\n\t" 8678 "ADC $dst.hi,$src.hi" %} 8679 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8680 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8681 ins_pipe( ialu_reg_long ); 8682%} 8683 8684// Add Long Register with Memory 8685instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8686 match(Set dst (AddL dst (LoadL mem))); 8687 effect(KILL cr); 8688 ins_cost(125); 8689 format %{ "ADD $dst.lo,$mem\n\t" 8690 "ADC $dst.hi,$mem+4" %} 8691 opcode(0x03, 0x13); 8692 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8693 ins_pipe( ialu_reg_long_mem ); 8694%} 8695 8696// Subtract Long Register with Register. 8697instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8698 match(Set dst (SubL dst src)); 8699 effect(KILL cr); 8700 ins_cost(200); 8701 format %{ "SUB $dst.lo,$src.lo\n\t" 8702 "SBB $dst.hi,$src.hi" %} 8703 opcode(0x2B, 0x1B); 8704 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8705 ins_pipe( ialu_reg_reg_long ); 8706%} 8707 8708// Subtract Long Register with Immediate 8709instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8710 match(Set dst (SubL dst src)); 8711 effect(KILL cr); 8712 format %{ "SUB $dst.lo,$src.lo\n\t" 8713 "SBB $dst.hi,$src.hi" %} 8714 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8715 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8716 ins_pipe( ialu_reg_long ); 8717%} 8718 8719// Subtract Long Register with Memory 8720instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8721 match(Set dst (SubL dst (LoadL mem))); 8722 effect(KILL cr); 8723 ins_cost(125); 8724 format %{ "SUB $dst.lo,$mem\n\t" 8725 "SBB $dst.hi,$mem+4" %} 8726 opcode(0x2B, 0x1B); 8727 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8728 ins_pipe( ialu_reg_long_mem ); 8729%} 8730 8731instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8732 match(Set dst (SubL zero dst)); 8733 effect(KILL cr); 8734 ins_cost(300); 8735 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8736 ins_encode( neg_long(dst) ); 8737 ins_pipe( ialu_reg_reg_long ); 8738%} 8739 8740// And Long Register with Register 8741instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8742 match(Set dst (AndL dst src)); 8743 effect(KILL cr); 8744 format %{ "AND $dst.lo,$src.lo\n\t" 8745 "AND $dst.hi,$src.hi" %} 8746 opcode(0x23,0x23); 8747 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8748 ins_pipe( ialu_reg_reg_long ); 8749%} 8750 8751// And Long Register with Immediate 8752instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8753 match(Set dst (AndL dst src)); 8754 effect(KILL cr); 8755 format %{ "AND $dst.lo,$src.lo\n\t" 8756 "AND $dst.hi,$src.hi" %} 8757 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8758 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8759 ins_pipe( ialu_reg_long ); 8760%} 8761 8762// And Long Register with Memory 8763instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8764 match(Set dst (AndL dst (LoadL mem))); 8765 effect(KILL cr); 8766 ins_cost(125); 8767 format %{ "AND $dst.lo,$mem\n\t" 8768 "AND $dst.hi,$mem+4" %} 8769 opcode(0x23, 0x23); 8770 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8771 ins_pipe( ialu_reg_long_mem ); 8772%} 8773 8774// BMI1 instructions 8775instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8776 match(Set dst (AndL (XorL src1 minus_1) src2)); 8777 predicate(UseBMI1Instructions); 8778 effect(KILL cr, TEMP dst); 8779 8780 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8781 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8782 %} 8783 8784 ins_encode %{ 8785 Register Rdst = $dst$$Register; 8786 Register Rsrc1 = $src1$$Register; 8787 Register Rsrc2 = $src2$$Register; 8788 __ andnl(Rdst, Rsrc1, Rsrc2); 8789 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8790 %} 8791 ins_pipe(ialu_reg_reg_long); 8792%} 8793 8794instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8795 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8796 predicate(UseBMI1Instructions); 8797 effect(KILL cr, TEMP dst); 8798 8799 ins_cost(125); 8800 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8801 "ANDNL $dst.hi, $src1.hi, $src2+4" 8802 %} 8803 8804 ins_encode %{ 8805 Register Rdst = $dst$$Register; 8806 Register Rsrc1 = $src1$$Register; 8807 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8808 8809 __ andnl(Rdst, Rsrc1, $src2$$Address); 8810 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8811 %} 8812 ins_pipe(ialu_reg_mem); 8813%} 8814 8815instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8816 match(Set dst (AndL (SubL imm_zero src) src)); 8817 predicate(UseBMI1Instructions); 8818 effect(KILL cr, TEMP dst); 8819 8820 format %{ "MOVL $dst.hi, 0\n\t" 8821 "BLSIL $dst.lo, $src.lo\n\t" 8822 "JNZ done\n\t" 8823 "BLSIL $dst.hi, $src.hi\n" 8824 "done:" 8825 %} 8826 8827 ins_encode %{ 8828 Label done; 8829 Register Rdst = $dst$$Register; 8830 Register Rsrc = $src$$Register; 8831 __ movl(HIGH_FROM_LOW(Rdst), 0); 8832 __ blsil(Rdst, Rsrc); 8833 __ jccb(Assembler::notZero, done); 8834 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8835 __ bind(done); 8836 %} 8837 ins_pipe(ialu_reg); 8838%} 8839 8840instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8841 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8842 predicate(UseBMI1Instructions); 8843 effect(KILL cr, TEMP dst); 8844 8845 ins_cost(125); 8846 format %{ "MOVL $dst.hi, 0\n\t" 8847 "BLSIL $dst.lo, $src\n\t" 8848 "JNZ done\n\t" 8849 "BLSIL $dst.hi, $src+4\n" 8850 "done:" 8851 %} 8852 8853 ins_encode %{ 8854 Label done; 8855 Register Rdst = $dst$$Register; 8856 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8857 8858 __ movl(HIGH_FROM_LOW(Rdst), 0); 8859 __ blsil(Rdst, $src$$Address); 8860 __ jccb(Assembler::notZero, done); 8861 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8862 __ bind(done); 8863 %} 8864 ins_pipe(ialu_reg_mem); 8865%} 8866 8867instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8868%{ 8869 match(Set dst (XorL (AddL src minus_1) src)); 8870 predicate(UseBMI1Instructions); 8871 effect(KILL cr, TEMP dst); 8872 8873 format %{ "MOVL $dst.hi, 0\n\t" 8874 "BLSMSKL $dst.lo, $src.lo\n\t" 8875 "JNC done\n\t" 8876 "BLSMSKL $dst.hi, $src.hi\n" 8877 "done:" 8878 %} 8879 8880 ins_encode %{ 8881 Label done; 8882 Register Rdst = $dst$$Register; 8883 Register Rsrc = $src$$Register; 8884 __ movl(HIGH_FROM_LOW(Rdst), 0); 8885 __ blsmskl(Rdst, Rsrc); 8886 __ jccb(Assembler::carryClear, done); 8887 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8888 __ bind(done); 8889 %} 8890 8891 ins_pipe(ialu_reg); 8892%} 8893 8894instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8895%{ 8896 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8897 predicate(UseBMI1Instructions); 8898 effect(KILL cr, TEMP dst); 8899 8900 ins_cost(125); 8901 format %{ "MOVL $dst.hi, 0\n\t" 8902 "BLSMSKL $dst.lo, $src\n\t" 8903 "JNC done\n\t" 8904 "BLSMSKL $dst.hi, $src+4\n" 8905 "done:" 8906 %} 8907 8908 ins_encode %{ 8909 Label done; 8910 Register Rdst = $dst$$Register; 8911 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8912 8913 __ movl(HIGH_FROM_LOW(Rdst), 0); 8914 __ blsmskl(Rdst, $src$$Address); 8915 __ jccb(Assembler::carryClear, done); 8916 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8917 __ bind(done); 8918 %} 8919 8920 ins_pipe(ialu_reg_mem); 8921%} 8922 8923instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8924%{ 8925 match(Set dst (AndL (AddL src minus_1) src) ); 8926 predicate(UseBMI1Instructions); 8927 effect(KILL cr, TEMP dst); 8928 8929 format %{ "MOVL $dst.hi, $src.hi\n\t" 8930 "BLSRL $dst.lo, $src.lo\n\t" 8931 "JNC done\n\t" 8932 "BLSRL $dst.hi, $src.hi\n" 8933 "done:" 8934 %} 8935 8936 ins_encode %{ 8937 Label done; 8938 Register Rdst = $dst$$Register; 8939 Register Rsrc = $src$$Register; 8940 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8941 __ blsrl(Rdst, Rsrc); 8942 __ jccb(Assembler::carryClear, done); 8943 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8944 __ bind(done); 8945 %} 8946 8947 ins_pipe(ialu_reg); 8948%} 8949 8950instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8951%{ 8952 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8953 predicate(UseBMI1Instructions); 8954 effect(KILL cr, TEMP dst); 8955 8956 ins_cost(125); 8957 format %{ "MOVL $dst.hi, $src+4\n\t" 8958 "BLSRL $dst.lo, $src\n\t" 8959 "JNC done\n\t" 8960 "BLSRL $dst.hi, $src+4\n" 8961 "done:" 8962 %} 8963 8964 ins_encode %{ 8965 Label done; 8966 Register Rdst = $dst$$Register; 8967 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8968 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8969 __ blsrl(Rdst, $src$$Address); 8970 __ jccb(Assembler::carryClear, done); 8971 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8972 __ bind(done); 8973 %} 8974 8975 ins_pipe(ialu_reg_mem); 8976%} 8977 8978// Or Long Register with Register 8979instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8980 match(Set dst (OrL dst src)); 8981 effect(KILL cr); 8982 format %{ "OR $dst.lo,$src.lo\n\t" 8983 "OR $dst.hi,$src.hi" %} 8984 opcode(0x0B,0x0B); 8985 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8986 ins_pipe( ialu_reg_reg_long ); 8987%} 8988 8989// Or Long Register with Immediate 8990instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8991 match(Set dst (OrL dst src)); 8992 effect(KILL cr); 8993 format %{ "OR $dst.lo,$src.lo\n\t" 8994 "OR $dst.hi,$src.hi" %} 8995 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8996 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8997 ins_pipe( ialu_reg_long ); 8998%} 8999 9000// Or Long Register with Memory 9001instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9002 match(Set dst (OrL dst (LoadL mem))); 9003 effect(KILL cr); 9004 ins_cost(125); 9005 format %{ "OR $dst.lo,$mem\n\t" 9006 "OR $dst.hi,$mem+4" %} 9007 opcode(0x0B,0x0B); 9008 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9009 ins_pipe( ialu_reg_long_mem ); 9010%} 9011 9012// Xor Long Register with Register 9013instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9014 match(Set dst (XorL dst src)); 9015 effect(KILL cr); 9016 format %{ "XOR $dst.lo,$src.lo\n\t" 9017 "XOR $dst.hi,$src.hi" %} 9018 opcode(0x33,0x33); 9019 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9020 ins_pipe( ialu_reg_reg_long ); 9021%} 9022 9023// Xor Long Register with Immediate -1 9024instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9025 match(Set dst (XorL dst imm)); 9026 format %{ "NOT $dst.lo\n\t" 9027 "NOT $dst.hi" %} 9028 ins_encode %{ 9029 __ notl($dst$$Register); 9030 __ notl(HIGH_FROM_LOW($dst$$Register)); 9031 %} 9032 ins_pipe( ialu_reg_long ); 9033%} 9034 9035// Xor Long Register with Immediate 9036instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9037 match(Set dst (XorL dst src)); 9038 effect(KILL cr); 9039 format %{ "XOR $dst.lo,$src.lo\n\t" 9040 "XOR $dst.hi,$src.hi" %} 9041 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9042 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9043 ins_pipe( ialu_reg_long ); 9044%} 9045 9046// Xor Long Register with Memory 9047instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9048 match(Set dst (XorL dst (LoadL mem))); 9049 effect(KILL cr); 9050 ins_cost(125); 9051 format %{ "XOR $dst.lo,$mem\n\t" 9052 "XOR $dst.hi,$mem+4" %} 9053 opcode(0x33,0x33); 9054 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9055 ins_pipe( ialu_reg_long_mem ); 9056%} 9057 9058// Shift Left Long by 1 9059instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9060 predicate(UseNewLongLShift); 9061 match(Set dst (LShiftL dst cnt)); 9062 effect(KILL cr); 9063 ins_cost(100); 9064 format %{ "ADD $dst.lo,$dst.lo\n\t" 9065 "ADC $dst.hi,$dst.hi" %} 9066 ins_encode %{ 9067 __ addl($dst$$Register,$dst$$Register); 9068 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9069 %} 9070 ins_pipe( ialu_reg_long ); 9071%} 9072 9073// Shift Left Long by 2 9074instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9075 predicate(UseNewLongLShift); 9076 match(Set dst (LShiftL dst cnt)); 9077 effect(KILL cr); 9078 ins_cost(100); 9079 format %{ "ADD $dst.lo,$dst.lo\n\t" 9080 "ADC $dst.hi,$dst.hi\n\t" 9081 "ADD $dst.lo,$dst.lo\n\t" 9082 "ADC $dst.hi,$dst.hi" %} 9083 ins_encode %{ 9084 __ addl($dst$$Register,$dst$$Register); 9085 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9086 __ addl($dst$$Register,$dst$$Register); 9087 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9088 %} 9089 ins_pipe( ialu_reg_long ); 9090%} 9091 9092// Shift Left Long by 3 9093instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9094 predicate(UseNewLongLShift); 9095 match(Set dst (LShiftL dst cnt)); 9096 effect(KILL cr); 9097 ins_cost(100); 9098 format %{ "ADD $dst.lo,$dst.lo\n\t" 9099 "ADC $dst.hi,$dst.hi\n\t" 9100 "ADD $dst.lo,$dst.lo\n\t" 9101 "ADC $dst.hi,$dst.hi\n\t" 9102 "ADD $dst.lo,$dst.lo\n\t" 9103 "ADC $dst.hi,$dst.hi" %} 9104 ins_encode %{ 9105 __ addl($dst$$Register,$dst$$Register); 9106 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9107 __ addl($dst$$Register,$dst$$Register); 9108 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9109 __ addl($dst$$Register,$dst$$Register); 9110 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9111 %} 9112 ins_pipe( ialu_reg_long ); 9113%} 9114 9115// Shift Left Long by 1-31 9116instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9117 match(Set dst (LShiftL dst cnt)); 9118 effect(KILL cr); 9119 ins_cost(200); 9120 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9121 "SHL $dst.lo,$cnt" %} 9122 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9123 ins_encode( move_long_small_shift(dst,cnt) ); 9124 ins_pipe( ialu_reg_long ); 9125%} 9126 9127// Shift Left Long by 32-63 9128instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9129 match(Set dst (LShiftL dst cnt)); 9130 effect(KILL cr); 9131 ins_cost(300); 9132 format %{ "MOV $dst.hi,$dst.lo\n" 9133 "\tSHL $dst.hi,$cnt-32\n" 9134 "\tXOR $dst.lo,$dst.lo" %} 9135 opcode(0xC1, 0x4); /* C1 /4 ib */ 9136 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9137 ins_pipe( ialu_reg_long ); 9138%} 9139 9140// Shift Left Long by variable 9141instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9142 match(Set dst (LShiftL dst shift)); 9143 effect(KILL cr); 9144 ins_cost(500+200); 9145 size(17); 9146 format %{ "TEST $shift,32\n\t" 9147 "JEQ,s small\n\t" 9148 "MOV $dst.hi,$dst.lo\n\t" 9149 "XOR $dst.lo,$dst.lo\n" 9150 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9151 "SHL $dst.lo,$shift" %} 9152 ins_encode( shift_left_long( dst, shift ) ); 9153 ins_pipe( pipe_slow ); 9154%} 9155 9156// Shift Right Long by 1-31 9157instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9158 match(Set dst (URShiftL dst cnt)); 9159 effect(KILL cr); 9160 ins_cost(200); 9161 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9162 "SHR $dst.hi,$cnt" %} 9163 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9164 ins_encode( move_long_small_shift(dst,cnt) ); 9165 ins_pipe( ialu_reg_long ); 9166%} 9167 9168// Shift Right Long by 32-63 9169instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9170 match(Set dst (URShiftL dst cnt)); 9171 effect(KILL cr); 9172 ins_cost(300); 9173 format %{ "MOV $dst.lo,$dst.hi\n" 9174 "\tSHR $dst.lo,$cnt-32\n" 9175 "\tXOR $dst.hi,$dst.hi" %} 9176 opcode(0xC1, 0x5); /* C1 /5 ib */ 9177 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9178 ins_pipe( ialu_reg_long ); 9179%} 9180 9181// Shift Right Long by variable 9182instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9183 match(Set dst (URShiftL dst shift)); 9184 effect(KILL cr); 9185 ins_cost(600); 9186 size(17); 9187 format %{ "TEST $shift,32\n\t" 9188 "JEQ,s small\n\t" 9189 "MOV $dst.lo,$dst.hi\n\t" 9190 "XOR $dst.hi,$dst.hi\n" 9191 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9192 "SHR $dst.hi,$shift" %} 9193 ins_encode( shift_right_long( dst, shift ) ); 9194 ins_pipe( pipe_slow ); 9195%} 9196 9197// Shift Right Long by 1-31 9198instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9199 match(Set dst (RShiftL dst cnt)); 9200 effect(KILL cr); 9201 ins_cost(200); 9202 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9203 "SAR $dst.hi,$cnt" %} 9204 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9205 ins_encode( move_long_small_shift(dst,cnt) ); 9206 ins_pipe( ialu_reg_long ); 9207%} 9208 9209// Shift Right Long by 32-63 9210instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9211 match(Set dst (RShiftL dst cnt)); 9212 effect(KILL cr); 9213 ins_cost(300); 9214 format %{ "MOV $dst.lo,$dst.hi\n" 9215 "\tSAR $dst.lo,$cnt-32\n" 9216 "\tSAR $dst.hi,31" %} 9217 opcode(0xC1, 0x7); /* C1 /7 ib */ 9218 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9219 ins_pipe( ialu_reg_long ); 9220%} 9221 9222// Shift Right arithmetic Long by variable 9223instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9224 match(Set dst (RShiftL dst shift)); 9225 effect(KILL cr); 9226 ins_cost(600); 9227 size(18); 9228 format %{ "TEST $shift,32\n\t" 9229 "JEQ,s small\n\t" 9230 "MOV $dst.lo,$dst.hi\n\t" 9231 "SAR $dst.hi,31\n" 9232 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9233 "SAR $dst.hi,$shift" %} 9234 ins_encode( shift_right_arith_long( dst, shift ) ); 9235 ins_pipe( pipe_slow ); 9236%} 9237 9238 9239//----------Double Instructions------------------------------------------------ 9240// Double Math 9241 9242// Compare & branch 9243 9244// P6 version of float compare, sets condition codes in EFLAGS 9245instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9246 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9247 match(Set cr (CmpD src1 src2)); 9248 effect(KILL rax); 9249 ins_cost(150); 9250 format %{ "FLD $src1\n\t" 9251 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9252 "JNP exit\n\t" 9253 "MOV ah,1 // saw a NaN, set CF\n\t" 9254 "SAHF\n" 9255 "exit:\tNOP // avoid branch to branch" %} 9256 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9257 ins_encode( Push_Reg_DPR(src1), 9258 OpcP, RegOpc(src2), 9259 cmpF_P6_fixup ); 9260 ins_pipe( pipe_slow ); 9261%} 9262 9263instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9264 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9265 match(Set cr (CmpD src1 src2)); 9266 ins_cost(150); 9267 format %{ "FLD $src1\n\t" 9268 "FUCOMIP ST,$src2 // P6 instruction" %} 9269 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9270 ins_encode( Push_Reg_DPR(src1), 9271 OpcP, RegOpc(src2)); 9272 ins_pipe( pipe_slow ); 9273%} 9274 9275// Compare & branch 9276instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9277 predicate(UseSSE<=1); 9278 match(Set cr (CmpD src1 src2)); 9279 effect(KILL rax); 9280 ins_cost(200); 9281 format %{ "FLD $src1\n\t" 9282 "FCOMp $src2\n\t" 9283 "FNSTSW AX\n\t" 9284 "TEST AX,0x400\n\t" 9285 "JZ,s flags\n\t" 9286 "MOV AH,1\t# unordered treat as LT\n" 9287 "flags:\tSAHF" %} 9288 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9289 ins_encode( Push_Reg_DPR(src1), 9290 OpcP, RegOpc(src2), 9291 fpu_flags); 9292 ins_pipe( pipe_slow ); 9293%} 9294 9295// Compare vs zero into -1,0,1 9296instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9297 predicate(UseSSE<=1); 9298 match(Set dst (CmpD3 src1 zero)); 9299 effect(KILL cr, KILL rax); 9300 ins_cost(280); 9301 format %{ "FTSTD $dst,$src1" %} 9302 opcode(0xE4, 0xD9); 9303 ins_encode( Push_Reg_DPR(src1), 9304 OpcS, OpcP, PopFPU, 9305 CmpF_Result(dst)); 9306 ins_pipe( pipe_slow ); 9307%} 9308 9309// Compare into -1,0,1 9310instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9311 predicate(UseSSE<=1); 9312 match(Set dst (CmpD3 src1 src2)); 9313 effect(KILL cr, KILL rax); 9314 ins_cost(300); 9315 format %{ "FCMPD $dst,$src1,$src2" %} 9316 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9317 ins_encode( Push_Reg_DPR(src1), 9318 OpcP, RegOpc(src2), 9319 CmpF_Result(dst)); 9320 ins_pipe( pipe_slow ); 9321%} 9322 9323// float compare and set condition codes in EFLAGS by XMM regs 9324instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9325 predicate(UseSSE>=2); 9326 match(Set cr (CmpD src1 src2)); 9327 ins_cost(145); 9328 format %{ "UCOMISD $src1,$src2\n\t" 9329 "JNP,s exit\n\t" 9330 "PUSHF\t# saw NaN, set CF\n\t" 9331 "AND [rsp], #0xffffff2b\n\t" 9332 "POPF\n" 9333 "exit:" %} 9334 ins_encode %{ 9335 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9336 emit_cmpfp_fixup(_masm); 9337 %} 9338 ins_pipe( pipe_slow ); 9339%} 9340 9341instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9342 predicate(UseSSE>=2); 9343 match(Set cr (CmpD src1 src2)); 9344 ins_cost(100); 9345 format %{ "UCOMISD $src1,$src2" %} 9346 ins_encode %{ 9347 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9348 %} 9349 ins_pipe( pipe_slow ); 9350%} 9351 9352// float compare and set condition codes in EFLAGS by XMM regs 9353instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9354 predicate(UseSSE>=2); 9355 match(Set cr (CmpD src1 (LoadD src2))); 9356 ins_cost(145); 9357 format %{ "UCOMISD $src1,$src2\n\t" 9358 "JNP,s exit\n\t" 9359 "PUSHF\t# saw NaN, set CF\n\t" 9360 "AND [rsp], #0xffffff2b\n\t" 9361 "POPF\n" 9362 "exit:" %} 9363 ins_encode %{ 9364 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9365 emit_cmpfp_fixup(_masm); 9366 %} 9367 ins_pipe( pipe_slow ); 9368%} 9369 9370instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9371 predicate(UseSSE>=2); 9372 match(Set cr (CmpD src1 (LoadD src2))); 9373 ins_cost(100); 9374 format %{ "UCOMISD $src1,$src2" %} 9375 ins_encode %{ 9376 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9377 %} 9378 ins_pipe( pipe_slow ); 9379%} 9380 9381// Compare into -1,0,1 in XMM 9382instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9383 predicate(UseSSE>=2); 9384 match(Set dst (CmpD3 src1 src2)); 9385 effect(KILL cr); 9386 ins_cost(255); 9387 format %{ "UCOMISD $src1, $src2\n\t" 9388 "MOV $dst, #-1\n\t" 9389 "JP,s done\n\t" 9390 "JB,s done\n\t" 9391 "SETNE $dst\n\t" 9392 "MOVZB $dst, $dst\n" 9393 "done:" %} 9394 ins_encode %{ 9395 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9396 emit_cmpfp3(_masm, $dst$$Register); 9397 %} 9398 ins_pipe( pipe_slow ); 9399%} 9400 9401// Compare into -1,0,1 in XMM and memory 9402instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9403 predicate(UseSSE>=2); 9404 match(Set dst (CmpD3 src1 (LoadD src2))); 9405 effect(KILL cr); 9406 ins_cost(275); 9407 format %{ "UCOMISD $src1, $src2\n\t" 9408 "MOV $dst, #-1\n\t" 9409 "JP,s done\n\t" 9410 "JB,s done\n\t" 9411 "SETNE $dst\n\t" 9412 "MOVZB $dst, $dst\n" 9413 "done:" %} 9414 ins_encode %{ 9415 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9416 emit_cmpfp3(_masm, $dst$$Register); 9417 %} 9418 ins_pipe( pipe_slow ); 9419%} 9420 9421 9422instruct subDPR_reg(regDPR dst, regDPR src) %{ 9423 predicate (UseSSE <=1); 9424 match(Set dst (SubD dst src)); 9425 9426 format %{ "FLD $src\n\t" 9427 "DSUBp $dst,ST" %} 9428 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9429 ins_cost(150); 9430 ins_encode( Push_Reg_DPR(src), 9431 OpcP, RegOpc(dst) ); 9432 ins_pipe( fpu_reg_reg ); 9433%} 9434 9435instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9436 predicate (UseSSE <=1); 9437 match(Set dst (RoundDouble (SubD src1 src2))); 9438 ins_cost(250); 9439 9440 format %{ "FLD $src2\n\t" 9441 "DSUB ST,$src1\n\t" 9442 "FSTP_D $dst\t# D-round" %} 9443 opcode(0xD8, 0x5); 9444 ins_encode( Push_Reg_DPR(src2), 9445 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9446 ins_pipe( fpu_mem_reg_reg ); 9447%} 9448 9449 9450instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9451 predicate (UseSSE <=1); 9452 match(Set dst (SubD dst (LoadD src))); 9453 ins_cost(150); 9454 9455 format %{ "FLD $src\n\t" 9456 "DSUBp $dst,ST" %} 9457 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9458 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9459 OpcP, RegOpc(dst) ); 9460 ins_pipe( fpu_reg_mem ); 9461%} 9462 9463instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9464 predicate (UseSSE<=1); 9465 match(Set dst (AbsD src)); 9466 ins_cost(100); 9467 format %{ "FABS" %} 9468 opcode(0xE1, 0xD9); 9469 ins_encode( OpcS, OpcP ); 9470 ins_pipe( fpu_reg_reg ); 9471%} 9472 9473instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9474 predicate(UseSSE<=1); 9475 match(Set dst (NegD src)); 9476 ins_cost(100); 9477 format %{ "FCHS" %} 9478 opcode(0xE0, 0xD9); 9479 ins_encode( OpcS, OpcP ); 9480 ins_pipe( fpu_reg_reg ); 9481%} 9482 9483instruct addDPR_reg(regDPR dst, regDPR src) %{ 9484 predicate(UseSSE<=1); 9485 match(Set dst (AddD dst src)); 9486 format %{ "FLD $src\n\t" 9487 "DADD $dst,ST" %} 9488 size(4); 9489 ins_cost(150); 9490 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9491 ins_encode( Push_Reg_DPR(src), 9492 OpcP, RegOpc(dst) ); 9493 ins_pipe( fpu_reg_reg ); 9494%} 9495 9496 9497instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9498 predicate(UseSSE<=1); 9499 match(Set dst (RoundDouble (AddD src1 src2))); 9500 ins_cost(250); 9501 9502 format %{ "FLD $src2\n\t" 9503 "DADD ST,$src1\n\t" 9504 "FSTP_D $dst\t# D-round" %} 9505 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9506 ins_encode( Push_Reg_DPR(src2), 9507 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9508 ins_pipe( fpu_mem_reg_reg ); 9509%} 9510 9511 9512instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9513 predicate(UseSSE<=1); 9514 match(Set dst (AddD dst (LoadD src))); 9515 ins_cost(150); 9516 9517 format %{ "FLD $src\n\t" 9518 "DADDp $dst,ST" %} 9519 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9520 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9521 OpcP, RegOpc(dst) ); 9522 ins_pipe( fpu_reg_mem ); 9523%} 9524 9525// add-to-memory 9526instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9527 predicate(UseSSE<=1); 9528 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9529 ins_cost(150); 9530 9531 format %{ "FLD_D $dst\n\t" 9532 "DADD ST,$src\n\t" 9533 "FST_D $dst" %} 9534 opcode(0xDD, 0x0); 9535 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9536 Opcode(0xD8), RegOpc(src), 9537 set_instruction_start, 9538 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9539 ins_pipe( fpu_reg_mem ); 9540%} 9541 9542instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9543 predicate(UseSSE<=1); 9544 match(Set dst (AddD dst con)); 9545 ins_cost(125); 9546 format %{ "FLD1\n\t" 9547 "DADDp $dst,ST" %} 9548 ins_encode %{ 9549 __ fld1(); 9550 __ faddp($dst$$reg); 9551 %} 9552 ins_pipe(fpu_reg); 9553%} 9554 9555instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9556 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9557 match(Set dst (AddD dst con)); 9558 ins_cost(200); 9559 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9560 "DADDp $dst,ST" %} 9561 ins_encode %{ 9562 __ fld_d($constantaddress($con)); 9563 __ faddp($dst$$reg); 9564 %} 9565 ins_pipe(fpu_reg_mem); 9566%} 9567 9568instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9569 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9570 match(Set dst (RoundDouble (AddD src con))); 9571 ins_cost(200); 9572 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9573 "DADD ST,$src\n\t" 9574 "FSTP_D $dst\t# D-round" %} 9575 ins_encode %{ 9576 __ fld_d($constantaddress($con)); 9577 __ fadd($src$$reg); 9578 __ fstp_d(Address(rsp, $dst$$disp)); 9579 %} 9580 ins_pipe(fpu_mem_reg_con); 9581%} 9582 9583instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9584 predicate(UseSSE<=1); 9585 match(Set dst (MulD dst src)); 9586 format %{ "FLD $src\n\t" 9587 "DMULp $dst,ST" %} 9588 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9589 ins_cost(150); 9590 ins_encode( Push_Reg_DPR(src), 9591 OpcP, RegOpc(dst) ); 9592 ins_pipe( fpu_reg_reg ); 9593%} 9594 9595// Strict FP instruction biases argument before multiply then 9596// biases result to avoid double rounding of subnormals. 9597// 9598// scale arg1 by multiplying arg1 by 2^(-15360) 9599// load arg2 9600// multiply scaled arg1 by arg2 9601// rescale product by 2^(15360) 9602// 9603instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9604 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9605 match(Set dst (MulD dst src)); 9606 ins_cost(1); // Select this instruction for all strict FP double multiplies 9607 9608 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9609 "DMULp $dst,ST\n\t" 9610 "FLD $src\n\t" 9611 "DMULp $dst,ST\n\t" 9612 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9613 "DMULp $dst,ST\n\t" %} 9614 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9615 ins_encode( strictfp_bias1(dst), 9616 Push_Reg_DPR(src), 9617 OpcP, RegOpc(dst), 9618 strictfp_bias2(dst) ); 9619 ins_pipe( fpu_reg_reg ); 9620%} 9621 9622instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9623 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9624 match(Set dst (MulD dst con)); 9625 ins_cost(200); 9626 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9627 "DMULp $dst,ST" %} 9628 ins_encode %{ 9629 __ fld_d($constantaddress($con)); 9630 __ fmulp($dst$$reg); 9631 %} 9632 ins_pipe(fpu_reg_mem); 9633%} 9634 9635 9636instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9637 predicate( UseSSE<=1 ); 9638 match(Set dst (MulD dst (LoadD src))); 9639 ins_cost(200); 9640 format %{ "FLD_D $src\n\t" 9641 "DMULp $dst,ST" %} 9642 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9643 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9644 OpcP, RegOpc(dst) ); 9645 ins_pipe( fpu_reg_mem ); 9646%} 9647 9648// 9649// Cisc-alternate to reg-reg multiply 9650instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9651 predicate( UseSSE<=1 ); 9652 match(Set dst (MulD src (LoadD mem))); 9653 ins_cost(250); 9654 format %{ "FLD_D $mem\n\t" 9655 "DMUL ST,$src\n\t" 9656 "FSTP_D $dst" %} 9657 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9658 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9659 OpcReg_FPR(src), 9660 Pop_Reg_DPR(dst) ); 9661 ins_pipe( fpu_reg_reg_mem ); 9662%} 9663 9664 9665// MACRO3 -- addDPR a mulDPR 9666// This instruction is a '2-address' instruction in that the result goes 9667// back to src2. This eliminates a move from the macro; possibly the 9668// register allocator will have to add it back (and maybe not). 9669instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9670 predicate( UseSSE<=1 ); 9671 match(Set src2 (AddD (MulD src0 src1) src2)); 9672 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9673 "DMUL ST,$src1\n\t" 9674 "DADDp $src2,ST" %} 9675 ins_cost(250); 9676 opcode(0xDD); /* LoadD DD /0 */ 9677 ins_encode( Push_Reg_FPR(src0), 9678 FMul_ST_reg(src1), 9679 FAddP_reg_ST(src2) ); 9680 ins_pipe( fpu_reg_reg_reg ); 9681%} 9682 9683 9684// MACRO3 -- subDPR a mulDPR 9685instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9686 predicate( UseSSE<=1 ); 9687 match(Set src2 (SubD (MulD src0 src1) src2)); 9688 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9689 "DMUL ST,$src1\n\t" 9690 "DSUBRp $src2,ST" %} 9691 ins_cost(250); 9692 ins_encode( Push_Reg_FPR(src0), 9693 FMul_ST_reg(src1), 9694 Opcode(0xDE), Opc_plus(0xE0,src2)); 9695 ins_pipe( fpu_reg_reg_reg ); 9696%} 9697 9698 9699instruct divDPR_reg(regDPR dst, regDPR src) %{ 9700 predicate( UseSSE<=1 ); 9701 match(Set dst (DivD dst src)); 9702 9703 format %{ "FLD $src\n\t" 9704 "FDIVp $dst,ST" %} 9705 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9706 ins_cost(150); 9707 ins_encode( Push_Reg_DPR(src), 9708 OpcP, RegOpc(dst) ); 9709 ins_pipe( fpu_reg_reg ); 9710%} 9711 9712// Strict FP instruction biases argument before division then 9713// biases result, to avoid double rounding of subnormals. 9714// 9715// scale dividend by multiplying dividend by 2^(-15360) 9716// load divisor 9717// divide scaled dividend by divisor 9718// rescale quotient by 2^(15360) 9719// 9720instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9721 predicate (UseSSE<=1); 9722 match(Set dst (DivD dst src)); 9723 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9724 ins_cost(01); 9725 9726 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9727 "DMULp $dst,ST\n\t" 9728 "FLD $src\n\t" 9729 "FDIVp $dst,ST\n\t" 9730 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9731 "DMULp $dst,ST\n\t" %} 9732 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9733 ins_encode( strictfp_bias1(dst), 9734 Push_Reg_DPR(src), 9735 OpcP, RegOpc(dst), 9736 strictfp_bias2(dst) ); 9737 ins_pipe( fpu_reg_reg ); 9738%} 9739 9740instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9741 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9742 match(Set dst (RoundDouble (DivD src1 src2))); 9743 9744 format %{ "FLD $src1\n\t" 9745 "FDIV ST,$src2\n\t" 9746 "FSTP_D $dst\t# D-round" %} 9747 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9748 ins_encode( Push_Reg_DPR(src1), 9749 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9750 ins_pipe( fpu_mem_reg_reg ); 9751%} 9752 9753 9754instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9755 predicate(UseSSE<=1); 9756 match(Set dst (ModD dst src)); 9757 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9758 9759 format %{ "DMOD $dst,$src" %} 9760 ins_cost(250); 9761 ins_encode(Push_Reg_Mod_DPR(dst, src), 9762 emitModDPR(), 9763 Push_Result_Mod_DPR(src), 9764 Pop_Reg_DPR(dst)); 9765 ins_pipe( pipe_slow ); 9766%} 9767 9768instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9769 predicate(UseSSE>=2); 9770 match(Set dst (ModD src0 src1)); 9771 effect(KILL rax, KILL cr); 9772 9773 format %{ "SUB ESP,8\t # DMOD\n" 9774 "\tMOVSD [ESP+0],$src1\n" 9775 "\tFLD_D [ESP+0]\n" 9776 "\tMOVSD [ESP+0],$src0\n" 9777 "\tFLD_D [ESP+0]\n" 9778 "loop:\tFPREM\n" 9779 "\tFWAIT\n" 9780 "\tFNSTSW AX\n" 9781 "\tSAHF\n" 9782 "\tJP loop\n" 9783 "\tFSTP_D [ESP+0]\n" 9784 "\tMOVSD $dst,[ESP+0]\n" 9785 "\tADD ESP,8\n" 9786 "\tFSTP ST0\t # Restore FPU Stack" 9787 %} 9788 ins_cost(250); 9789 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9790 ins_pipe( pipe_slow ); 9791%} 9792 9793instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9794 predicate (UseSSE<=1); 9795 match(Set dst (SinD src)); 9796 ins_cost(1800); 9797 format %{ "DSIN $dst" %} 9798 opcode(0xD9, 0xFE); 9799 ins_encode( OpcP, OpcS ); 9800 ins_pipe( pipe_slow ); 9801%} 9802 9803instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9804 predicate (UseSSE>=2); 9805 match(Set dst (SinD dst)); 9806 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9807 ins_cost(1800); 9808 format %{ "DSIN $dst" %} 9809 opcode(0xD9, 0xFE); 9810 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9811 ins_pipe( pipe_slow ); 9812%} 9813 9814instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9815 predicate (UseSSE<=1); 9816 match(Set dst (CosD src)); 9817 ins_cost(1800); 9818 format %{ "DCOS $dst" %} 9819 opcode(0xD9, 0xFF); 9820 ins_encode( OpcP, OpcS ); 9821 ins_pipe( pipe_slow ); 9822%} 9823 9824instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9825 predicate (UseSSE>=2); 9826 match(Set dst (CosD dst)); 9827 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9828 ins_cost(1800); 9829 format %{ "DCOS $dst" %} 9830 opcode(0xD9, 0xFF); 9831 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9832 ins_pipe( pipe_slow ); 9833%} 9834 9835instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9836 predicate (UseSSE<=1); 9837 match(Set dst(TanD src)); 9838 format %{ "DTAN $dst" %} 9839 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9840 Opcode(0xDD), Opcode(0xD8)); // fstp st 9841 ins_pipe( pipe_slow ); 9842%} 9843 9844instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9845 predicate (UseSSE>=2); 9846 match(Set dst(TanD dst)); 9847 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9848 format %{ "DTAN $dst" %} 9849 ins_encode( Push_SrcD(dst), 9850 Opcode(0xD9), Opcode(0xF2), // fptan 9851 Opcode(0xDD), Opcode(0xD8), // fstp st 9852 Push_ResultD(dst) ); 9853 ins_pipe( pipe_slow ); 9854%} 9855 9856instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9857 predicate (UseSSE<=1); 9858 match(Set dst(AtanD dst src)); 9859 format %{ "DATA $dst,$src" %} 9860 opcode(0xD9, 0xF3); 9861 ins_encode( Push_Reg_DPR(src), 9862 OpcP, OpcS, RegOpc(dst) ); 9863 ins_pipe( pipe_slow ); 9864%} 9865 9866instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9867 predicate (UseSSE>=2); 9868 match(Set dst(AtanD dst src)); 9869 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9870 format %{ "DATA $dst,$src" %} 9871 opcode(0xD9, 0xF3); 9872 ins_encode( Push_SrcD(src), 9873 OpcP, OpcS, Push_ResultD(dst) ); 9874 ins_pipe( pipe_slow ); 9875%} 9876 9877instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9878 predicate (UseSSE<=1); 9879 match(Set dst (SqrtD src)); 9880 format %{ "DSQRT $dst,$src" %} 9881 opcode(0xFA, 0xD9); 9882 ins_encode( Push_Reg_DPR(src), 9883 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9884 ins_pipe( pipe_slow ); 9885%} 9886 9887instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9888 predicate (UseSSE<=1); 9889 match(Set Y (PowD X Y)); // Raise X to the Yth power 9890 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9891 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9892 ins_encode %{ 9893 __ subptr(rsp, 8); 9894 __ fld_s($X$$reg - 1); 9895 __ fast_pow(); 9896 __ addptr(rsp, 8); 9897 %} 9898 ins_pipe( pipe_slow ); 9899%} 9900 9901instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9902 predicate (UseSSE>=2); 9903 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9904 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9905 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9906 ins_encode %{ 9907 __ subptr(rsp, 8); 9908 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9909 __ fld_d(Address(rsp, 0)); 9910 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9911 __ fld_d(Address(rsp, 0)); 9912 __ fast_pow(); 9913 __ fstp_d(Address(rsp, 0)); 9914 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9915 __ addptr(rsp, 8); 9916 %} 9917 ins_pipe( pipe_slow ); 9918%} 9919 9920instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9921 predicate (UseSSE<=1); 9922 // The source Double operand on FPU stack 9923 match(Set dst (Log10D src)); 9924 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9925 // fxch ; swap ST(0) with ST(1) 9926 // fyl2x ; compute log_10(2) * log_2(x) 9927 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9928 "FXCH \n\t" 9929 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9930 %} 9931 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9932 Opcode(0xD9), Opcode(0xC9), // fxch 9933 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9934 9935 ins_pipe( pipe_slow ); 9936%} 9937 9938instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9939 predicate (UseSSE>=2); 9940 effect(KILL cr); 9941 match(Set dst (Log10D src)); 9942 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9943 // fyl2x ; compute log_10(2) * log_2(x) 9944 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9945 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9946 %} 9947 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9948 Push_SrcD(src), 9949 Opcode(0xD9), Opcode(0xF1), // fyl2x 9950 Push_ResultD(dst)); 9951 9952 ins_pipe( pipe_slow ); 9953%} 9954 9955//-------------Float Instructions------------------------------- 9956// Float Math 9957 9958// Code for float compare: 9959// fcompp(); 9960// fwait(); fnstsw_ax(); 9961// sahf(); 9962// movl(dst, unordered_result); 9963// jcc(Assembler::parity, exit); 9964// movl(dst, less_result); 9965// jcc(Assembler::below, exit); 9966// movl(dst, equal_result); 9967// jcc(Assembler::equal, exit); 9968// movl(dst, greater_result); 9969// exit: 9970 9971// P6 version of float compare, sets condition codes in EFLAGS 9972instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9973 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9974 match(Set cr (CmpF src1 src2)); 9975 effect(KILL rax); 9976 ins_cost(150); 9977 format %{ "FLD $src1\n\t" 9978 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9979 "JNP exit\n\t" 9980 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9981 "SAHF\n" 9982 "exit:\tNOP // avoid branch to branch" %} 9983 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9984 ins_encode( Push_Reg_DPR(src1), 9985 OpcP, RegOpc(src2), 9986 cmpF_P6_fixup ); 9987 ins_pipe( pipe_slow ); 9988%} 9989 9990instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9991 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9992 match(Set cr (CmpF src1 src2)); 9993 ins_cost(100); 9994 format %{ "FLD $src1\n\t" 9995 "FUCOMIP ST,$src2 // P6 instruction" %} 9996 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9997 ins_encode( Push_Reg_DPR(src1), 9998 OpcP, RegOpc(src2)); 9999 ins_pipe( pipe_slow ); 10000%} 10001 10002 10003// Compare & branch 10004instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10005 predicate(UseSSE == 0); 10006 match(Set cr (CmpF src1 src2)); 10007 effect(KILL rax); 10008 ins_cost(200); 10009 format %{ "FLD $src1\n\t" 10010 "FCOMp $src2\n\t" 10011 "FNSTSW AX\n\t" 10012 "TEST AX,0x400\n\t" 10013 "JZ,s flags\n\t" 10014 "MOV AH,1\t# unordered treat as LT\n" 10015 "flags:\tSAHF" %} 10016 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10017 ins_encode( Push_Reg_DPR(src1), 10018 OpcP, RegOpc(src2), 10019 fpu_flags); 10020 ins_pipe( pipe_slow ); 10021%} 10022 10023// Compare vs zero into -1,0,1 10024instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10025 predicate(UseSSE == 0); 10026 match(Set dst (CmpF3 src1 zero)); 10027 effect(KILL cr, KILL rax); 10028 ins_cost(280); 10029 format %{ "FTSTF $dst,$src1" %} 10030 opcode(0xE4, 0xD9); 10031 ins_encode( Push_Reg_DPR(src1), 10032 OpcS, OpcP, PopFPU, 10033 CmpF_Result(dst)); 10034 ins_pipe( pipe_slow ); 10035%} 10036 10037// Compare into -1,0,1 10038instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10039 predicate(UseSSE == 0); 10040 match(Set dst (CmpF3 src1 src2)); 10041 effect(KILL cr, KILL rax); 10042 ins_cost(300); 10043 format %{ "FCMPF $dst,$src1,$src2" %} 10044 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10045 ins_encode( Push_Reg_DPR(src1), 10046 OpcP, RegOpc(src2), 10047 CmpF_Result(dst)); 10048 ins_pipe( pipe_slow ); 10049%} 10050 10051// float compare and set condition codes in EFLAGS by XMM regs 10052instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10053 predicate(UseSSE>=1); 10054 match(Set cr (CmpF src1 src2)); 10055 ins_cost(145); 10056 format %{ "UCOMISS $src1,$src2\n\t" 10057 "JNP,s exit\n\t" 10058 "PUSHF\t# saw NaN, set CF\n\t" 10059 "AND [rsp], #0xffffff2b\n\t" 10060 "POPF\n" 10061 "exit:" %} 10062 ins_encode %{ 10063 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10064 emit_cmpfp_fixup(_masm); 10065 %} 10066 ins_pipe( pipe_slow ); 10067%} 10068 10069instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10070 predicate(UseSSE>=1); 10071 match(Set cr (CmpF src1 src2)); 10072 ins_cost(100); 10073 format %{ "UCOMISS $src1,$src2" %} 10074 ins_encode %{ 10075 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10076 %} 10077 ins_pipe( pipe_slow ); 10078%} 10079 10080// float compare and set condition codes in EFLAGS by XMM regs 10081instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10082 predicate(UseSSE>=1); 10083 match(Set cr (CmpF src1 (LoadF src2))); 10084 ins_cost(165); 10085 format %{ "UCOMISS $src1,$src2\n\t" 10086 "JNP,s exit\n\t" 10087 "PUSHF\t# saw NaN, set CF\n\t" 10088 "AND [rsp], #0xffffff2b\n\t" 10089 "POPF\n" 10090 "exit:" %} 10091 ins_encode %{ 10092 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10093 emit_cmpfp_fixup(_masm); 10094 %} 10095 ins_pipe( pipe_slow ); 10096%} 10097 10098instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10099 predicate(UseSSE>=1); 10100 match(Set cr (CmpF src1 (LoadF src2))); 10101 ins_cost(100); 10102 format %{ "UCOMISS $src1,$src2" %} 10103 ins_encode %{ 10104 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10105 %} 10106 ins_pipe( pipe_slow ); 10107%} 10108 10109// Compare into -1,0,1 in XMM 10110instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10111 predicate(UseSSE>=1); 10112 match(Set dst (CmpF3 src1 src2)); 10113 effect(KILL cr); 10114 ins_cost(255); 10115 format %{ "UCOMISS $src1, $src2\n\t" 10116 "MOV $dst, #-1\n\t" 10117 "JP,s done\n\t" 10118 "JB,s done\n\t" 10119 "SETNE $dst\n\t" 10120 "MOVZB $dst, $dst\n" 10121 "done:" %} 10122 ins_encode %{ 10123 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10124 emit_cmpfp3(_masm, $dst$$Register); 10125 %} 10126 ins_pipe( pipe_slow ); 10127%} 10128 10129// Compare into -1,0,1 in XMM and memory 10130instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10131 predicate(UseSSE>=1); 10132 match(Set dst (CmpF3 src1 (LoadF src2))); 10133 effect(KILL cr); 10134 ins_cost(275); 10135 format %{ "UCOMISS $src1, $src2\n\t" 10136 "MOV $dst, #-1\n\t" 10137 "JP,s done\n\t" 10138 "JB,s done\n\t" 10139 "SETNE $dst\n\t" 10140 "MOVZB $dst, $dst\n" 10141 "done:" %} 10142 ins_encode %{ 10143 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10144 emit_cmpfp3(_masm, $dst$$Register); 10145 %} 10146 ins_pipe( pipe_slow ); 10147%} 10148 10149// Spill to obtain 24-bit precision 10150instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10151 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10152 match(Set dst (SubF src1 src2)); 10153 10154 format %{ "FSUB $dst,$src1 - $src2" %} 10155 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10156 ins_encode( Push_Reg_FPR(src1), 10157 OpcReg_FPR(src2), 10158 Pop_Mem_FPR(dst) ); 10159 ins_pipe( fpu_mem_reg_reg ); 10160%} 10161// 10162// This instruction does not round to 24-bits 10163instruct subFPR_reg(regFPR dst, regFPR src) %{ 10164 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10165 match(Set dst (SubF dst src)); 10166 10167 format %{ "FSUB $dst,$src" %} 10168 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10169 ins_encode( Push_Reg_FPR(src), 10170 OpcP, RegOpc(dst) ); 10171 ins_pipe( fpu_reg_reg ); 10172%} 10173 10174// Spill to obtain 24-bit precision 10175instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10176 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10177 match(Set dst (AddF src1 src2)); 10178 10179 format %{ "FADD $dst,$src1,$src2" %} 10180 opcode(0xD8, 0x0); /* D8 C0+i */ 10181 ins_encode( Push_Reg_FPR(src2), 10182 OpcReg_FPR(src1), 10183 Pop_Mem_FPR(dst) ); 10184 ins_pipe( fpu_mem_reg_reg ); 10185%} 10186// 10187// This instruction does not round to 24-bits 10188instruct addFPR_reg(regFPR dst, regFPR src) %{ 10189 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10190 match(Set dst (AddF dst src)); 10191 10192 format %{ "FLD $src\n\t" 10193 "FADDp $dst,ST" %} 10194 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10195 ins_encode( Push_Reg_FPR(src), 10196 OpcP, RegOpc(dst) ); 10197 ins_pipe( fpu_reg_reg ); 10198%} 10199 10200instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10201 predicate(UseSSE==0); 10202 match(Set dst (AbsF src)); 10203 ins_cost(100); 10204 format %{ "FABS" %} 10205 opcode(0xE1, 0xD9); 10206 ins_encode( OpcS, OpcP ); 10207 ins_pipe( fpu_reg_reg ); 10208%} 10209 10210instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10211 predicate(UseSSE==0); 10212 match(Set dst (NegF src)); 10213 ins_cost(100); 10214 format %{ "FCHS" %} 10215 opcode(0xE0, 0xD9); 10216 ins_encode( OpcS, OpcP ); 10217 ins_pipe( fpu_reg_reg ); 10218%} 10219 10220// Cisc-alternate to addFPR_reg 10221// Spill to obtain 24-bit precision 10222instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10223 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10224 match(Set dst (AddF src1 (LoadF src2))); 10225 10226 format %{ "FLD $src2\n\t" 10227 "FADD ST,$src1\n\t" 10228 "FSTP_S $dst" %} 10229 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10230 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10231 OpcReg_FPR(src1), 10232 Pop_Mem_FPR(dst) ); 10233 ins_pipe( fpu_mem_reg_mem ); 10234%} 10235// 10236// Cisc-alternate to addFPR_reg 10237// This instruction does not round to 24-bits 10238instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10239 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10240 match(Set dst (AddF dst (LoadF src))); 10241 10242 format %{ "FADD $dst,$src" %} 10243 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10244 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10245 OpcP, RegOpc(dst) ); 10246 ins_pipe( fpu_reg_mem ); 10247%} 10248 10249// // Following two instructions for _222_mpegaudio 10250// Spill to obtain 24-bit precision 10251instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10252 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10253 match(Set dst (AddF src1 src2)); 10254 10255 format %{ "FADD $dst,$src1,$src2" %} 10256 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10257 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10258 OpcReg_FPR(src2), 10259 Pop_Mem_FPR(dst) ); 10260 ins_pipe( fpu_mem_reg_mem ); 10261%} 10262 10263// Cisc-spill variant 10264// Spill to obtain 24-bit precision 10265instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10266 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10267 match(Set dst (AddF src1 (LoadF src2))); 10268 10269 format %{ "FADD $dst,$src1,$src2 cisc" %} 10270 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10271 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10272 set_instruction_start, 10273 OpcP, RMopc_Mem(secondary,src1), 10274 Pop_Mem_FPR(dst) ); 10275 ins_pipe( fpu_mem_mem_mem ); 10276%} 10277 10278// Spill to obtain 24-bit precision 10279instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10280 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10281 match(Set dst (AddF src1 src2)); 10282 10283 format %{ "FADD $dst,$src1,$src2" %} 10284 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10285 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10286 set_instruction_start, 10287 OpcP, RMopc_Mem(secondary,src1), 10288 Pop_Mem_FPR(dst) ); 10289 ins_pipe( fpu_mem_mem_mem ); 10290%} 10291 10292 10293// Spill to obtain 24-bit precision 10294instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10295 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10296 match(Set dst (AddF src con)); 10297 format %{ "FLD $src\n\t" 10298 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10299 "FSTP_S $dst" %} 10300 ins_encode %{ 10301 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10302 __ fadd_s($constantaddress($con)); 10303 __ fstp_s(Address(rsp, $dst$$disp)); 10304 %} 10305 ins_pipe(fpu_mem_reg_con); 10306%} 10307// 10308// This instruction does not round to 24-bits 10309instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10310 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10311 match(Set dst (AddF src con)); 10312 format %{ "FLD $src\n\t" 10313 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10314 "FSTP $dst" %} 10315 ins_encode %{ 10316 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10317 __ fadd_s($constantaddress($con)); 10318 __ fstp_d($dst$$reg); 10319 %} 10320 ins_pipe(fpu_reg_reg_con); 10321%} 10322 10323// Spill to obtain 24-bit precision 10324instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10325 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10326 match(Set dst (MulF src1 src2)); 10327 10328 format %{ "FLD $src1\n\t" 10329 "FMUL $src2\n\t" 10330 "FSTP_S $dst" %} 10331 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10332 ins_encode( Push_Reg_FPR(src1), 10333 OpcReg_FPR(src2), 10334 Pop_Mem_FPR(dst) ); 10335 ins_pipe( fpu_mem_reg_reg ); 10336%} 10337// 10338// This instruction does not round to 24-bits 10339instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10340 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10341 match(Set dst (MulF src1 src2)); 10342 10343 format %{ "FLD $src1\n\t" 10344 "FMUL $src2\n\t" 10345 "FSTP_S $dst" %} 10346 opcode(0xD8, 0x1); /* D8 C8+i */ 10347 ins_encode( Push_Reg_FPR(src2), 10348 OpcReg_FPR(src1), 10349 Pop_Reg_FPR(dst) ); 10350 ins_pipe( fpu_reg_reg_reg ); 10351%} 10352 10353 10354// Spill to obtain 24-bit precision 10355// Cisc-alternate to reg-reg multiply 10356instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10357 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10358 match(Set dst (MulF src1 (LoadF src2))); 10359 10360 format %{ "FLD_S $src2\n\t" 10361 "FMUL $src1\n\t" 10362 "FSTP_S $dst" %} 10363 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10364 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10365 OpcReg_FPR(src1), 10366 Pop_Mem_FPR(dst) ); 10367 ins_pipe( fpu_mem_reg_mem ); 10368%} 10369// 10370// This instruction does not round to 24-bits 10371// Cisc-alternate to reg-reg multiply 10372instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10373 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10374 match(Set dst (MulF src1 (LoadF src2))); 10375 10376 format %{ "FMUL $dst,$src1,$src2" %} 10377 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10378 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10379 OpcReg_FPR(src1), 10380 Pop_Reg_FPR(dst) ); 10381 ins_pipe( fpu_reg_reg_mem ); 10382%} 10383 10384// Spill to obtain 24-bit precision 10385instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10386 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10387 match(Set dst (MulF src1 src2)); 10388 10389 format %{ "FMUL $dst,$src1,$src2" %} 10390 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10391 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10392 set_instruction_start, 10393 OpcP, RMopc_Mem(secondary,src1), 10394 Pop_Mem_FPR(dst) ); 10395 ins_pipe( fpu_mem_mem_mem ); 10396%} 10397 10398// Spill to obtain 24-bit precision 10399instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10400 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10401 match(Set dst (MulF src con)); 10402 10403 format %{ "FLD $src\n\t" 10404 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10405 "FSTP_S $dst" %} 10406 ins_encode %{ 10407 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10408 __ fmul_s($constantaddress($con)); 10409 __ fstp_s(Address(rsp, $dst$$disp)); 10410 %} 10411 ins_pipe(fpu_mem_reg_con); 10412%} 10413// 10414// This instruction does not round to 24-bits 10415instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10416 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10417 match(Set dst (MulF src con)); 10418 10419 format %{ "FLD $src\n\t" 10420 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10421 "FSTP $dst" %} 10422 ins_encode %{ 10423 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10424 __ fmul_s($constantaddress($con)); 10425 __ fstp_d($dst$$reg); 10426 %} 10427 ins_pipe(fpu_reg_reg_con); 10428%} 10429 10430 10431// 10432// MACRO1 -- subsume unshared load into mulFPR 10433// This instruction does not round to 24-bits 10434instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10435 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10436 match(Set dst (MulF (LoadF mem1) src)); 10437 10438 format %{ "FLD $mem1 ===MACRO1===\n\t" 10439 "FMUL ST,$src\n\t" 10440 "FSTP $dst" %} 10441 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10442 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10443 OpcReg_FPR(src), 10444 Pop_Reg_FPR(dst) ); 10445 ins_pipe( fpu_reg_reg_mem ); 10446%} 10447// 10448// MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10449// This instruction does not round to 24-bits 10450instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10451 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10452 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10453 ins_cost(95); 10454 10455 format %{ "FLD $mem1 ===MACRO2===\n\t" 10456 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10457 "FADD ST,$src2\n\t" 10458 "FSTP $dst" %} 10459 opcode(0xD9); /* LoadF D9 /0 */ 10460 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10461 FMul_ST_reg(src1), 10462 FAdd_ST_reg(src2), 10463 Pop_Reg_FPR(dst) ); 10464 ins_pipe( fpu_reg_mem_reg_reg ); 10465%} 10466 10467// MACRO3 -- addFPR a mulFPR 10468// This instruction does not round to 24-bits. It is a '2-address' 10469// instruction in that the result goes back to src2. This eliminates 10470// a move from the macro; possibly the register allocator will have 10471// to add it back (and maybe not). 10472instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10473 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10474 match(Set src2 (AddF (MulF src0 src1) src2)); 10475 10476 format %{ "FLD $src0 ===MACRO3===\n\t" 10477 "FMUL ST,$src1\n\t" 10478 "FADDP $src2,ST" %} 10479 opcode(0xD9); /* LoadF D9 /0 */ 10480 ins_encode( Push_Reg_FPR(src0), 10481 FMul_ST_reg(src1), 10482 FAddP_reg_ST(src2) ); 10483 ins_pipe( fpu_reg_reg_reg ); 10484%} 10485 10486// MACRO4 -- divFPR subFPR 10487// This instruction does not round to 24-bits 10488instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10489 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10490 match(Set dst (DivF (SubF src2 src1) src3)); 10491 10492 format %{ "FLD $src2 ===MACRO4===\n\t" 10493 "FSUB ST,$src1\n\t" 10494 "FDIV ST,$src3\n\t" 10495 "FSTP $dst" %} 10496 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10497 ins_encode( Push_Reg_FPR(src2), 10498 subFPR_divFPR_encode(src1,src3), 10499 Pop_Reg_FPR(dst) ); 10500 ins_pipe( fpu_reg_reg_reg_reg ); 10501%} 10502 10503// Spill to obtain 24-bit precision 10504instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10505 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10506 match(Set dst (DivF src1 src2)); 10507 10508 format %{ "FDIV $dst,$src1,$src2" %} 10509 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10510 ins_encode( Push_Reg_FPR(src1), 10511 OpcReg_FPR(src2), 10512 Pop_Mem_FPR(dst) ); 10513 ins_pipe( fpu_mem_reg_reg ); 10514%} 10515// 10516// This instruction does not round to 24-bits 10517instruct divFPR_reg(regFPR dst, regFPR src) %{ 10518 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10519 match(Set dst (DivF dst src)); 10520 10521 format %{ "FDIV $dst,$src" %} 10522 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10523 ins_encode( Push_Reg_FPR(src), 10524 OpcP, RegOpc(dst) ); 10525 ins_pipe( fpu_reg_reg ); 10526%} 10527 10528 10529// Spill to obtain 24-bit precision 10530instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10531 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10532 match(Set dst (ModF src1 src2)); 10533 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10534 10535 format %{ "FMOD $dst,$src1,$src2" %} 10536 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10537 emitModDPR(), 10538 Push_Result_Mod_DPR(src2), 10539 Pop_Mem_FPR(dst)); 10540 ins_pipe( pipe_slow ); 10541%} 10542// 10543// This instruction does not round to 24-bits 10544instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10545 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10546 match(Set dst (ModF dst src)); 10547 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10548 10549 format %{ "FMOD $dst,$src" %} 10550 ins_encode(Push_Reg_Mod_DPR(dst, src), 10551 emitModDPR(), 10552 Push_Result_Mod_DPR(src), 10553 Pop_Reg_FPR(dst)); 10554 ins_pipe( pipe_slow ); 10555%} 10556 10557instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10558 predicate(UseSSE>=1); 10559 match(Set dst (ModF src0 src1)); 10560 effect(KILL rax, KILL cr); 10561 format %{ "SUB ESP,4\t # FMOD\n" 10562 "\tMOVSS [ESP+0],$src1\n" 10563 "\tFLD_S [ESP+0]\n" 10564 "\tMOVSS [ESP+0],$src0\n" 10565 "\tFLD_S [ESP+0]\n" 10566 "loop:\tFPREM\n" 10567 "\tFWAIT\n" 10568 "\tFNSTSW AX\n" 10569 "\tSAHF\n" 10570 "\tJP loop\n" 10571 "\tFSTP_S [ESP+0]\n" 10572 "\tMOVSS $dst,[ESP+0]\n" 10573 "\tADD ESP,4\n" 10574 "\tFSTP ST0\t # Restore FPU Stack" 10575 %} 10576 ins_cost(250); 10577 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10578 ins_pipe( pipe_slow ); 10579%} 10580 10581 10582//----------Arithmetic Conversion Instructions--------------------------------- 10583// The conversions operations are all Alpha sorted. Please keep it that way! 10584 10585instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10586 predicate(UseSSE==0); 10587 match(Set dst (RoundFloat src)); 10588 ins_cost(125); 10589 format %{ "FST_S $dst,$src\t# F-round" %} 10590 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10591 ins_pipe( fpu_mem_reg ); 10592%} 10593 10594instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10595 predicate(UseSSE<=1); 10596 match(Set dst (RoundDouble src)); 10597 ins_cost(125); 10598 format %{ "FST_D $dst,$src\t# D-round" %} 10599 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10600 ins_pipe( fpu_mem_reg ); 10601%} 10602 10603// Force rounding to 24-bit precision and 6-bit exponent 10604instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10605 predicate(UseSSE==0); 10606 match(Set dst (ConvD2F src)); 10607 format %{ "FST_S $dst,$src\t# F-round" %} 10608 expand %{ 10609 roundFloat_mem_reg(dst,src); 10610 %} 10611%} 10612 10613// Force rounding to 24-bit precision and 6-bit exponent 10614instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10615 predicate(UseSSE==1); 10616 match(Set dst (ConvD2F src)); 10617 effect( KILL cr ); 10618 format %{ "SUB ESP,4\n\t" 10619 "FST_S [ESP],$src\t# F-round\n\t" 10620 "MOVSS $dst,[ESP]\n\t" 10621 "ADD ESP,4" %} 10622 ins_encode %{ 10623 __ subptr(rsp, 4); 10624 if ($src$$reg != FPR1L_enc) { 10625 __ fld_s($src$$reg-1); 10626 __ fstp_s(Address(rsp, 0)); 10627 } else { 10628 __ fst_s(Address(rsp, 0)); 10629 } 10630 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10631 __ addptr(rsp, 4); 10632 %} 10633 ins_pipe( pipe_slow ); 10634%} 10635 10636// Force rounding double precision to single precision 10637instruct convD2F_reg(regF dst, regD src) %{ 10638 predicate(UseSSE>=2); 10639 match(Set dst (ConvD2F src)); 10640 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10641 ins_encode %{ 10642 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10643 %} 10644 ins_pipe( pipe_slow ); 10645%} 10646 10647instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10648 predicate(UseSSE==0); 10649 match(Set dst (ConvF2D src)); 10650 format %{ "FST_S $dst,$src\t# D-round" %} 10651 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10652 ins_pipe( fpu_reg_reg ); 10653%} 10654 10655instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10656 predicate(UseSSE==1); 10657 match(Set dst (ConvF2D src)); 10658 format %{ "FST_D $dst,$src\t# D-round" %} 10659 expand %{ 10660 roundDouble_mem_reg(dst,src); 10661 %} 10662%} 10663 10664instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10665 predicate(UseSSE==1); 10666 match(Set dst (ConvF2D src)); 10667 effect( KILL cr ); 10668 format %{ "SUB ESP,4\n\t" 10669 "MOVSS [ESP] $src\n\t" 10670 "FLD_S [ESP]\n\t" 10671 "ADD ESP,4\n\t" 10672 "FSTP $dst\t# D-round" %} 10673 ins_encode %{ 10674 __ subptr(rsp, 4); 10675 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10676 __ fld_s(Address(rsp, 0)); 10677 __ addptr(rsp, 4); 10678 __ fstp_d($dst$$reg); 10679 %} 10680 ins_pipe( pipe_slow ); 10681%} 10682 10683instruct convF2D_reg(regD dst, regF src) %{ 10684 predicate(UseSSE>=2); 10685 match(Set dst (ConvF2D src)); 10686 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10687 ins_encode %{ 10688 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10689 %} 10690 ins_pipe( pipe_slow ); 10691%} 10692 10693// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10694instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10695 predicate(UseSSE<=1); 10696 match(Set dst (ConvD2I src)); 10697 effect( KILL tmp, KILL cr ); 10698 format %{ "FLD $src\t# Convert double to int \n\t" 10699 "FLDCW trunc mode\n\t" 10700 "SUB ESP,4\n\t" 10701 "FISTp [ESP + #0]\n\t" 10702 "FLDCW std/24-bit mode\n\t" 10703 "POP EAX\n\t" 10704 "CMP EAX,0x80000000\n\t" 10705 "JNE,s fast\n\t" 10706 "FLD_D $src\n\t" 10707 "CALL d2i_wrapper\n" 10708 "fast:" %} 10709 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10710 ins_pipe( pipe_slow ); 10711%} 10712 10713// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10714instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10715 predicate(UseSSE>=2); 10716 match(Set dst (ConvD2I src)); 10717 effect( KILL tmp, KILL cr ); 10718 format %{ "CVTTSD2SI $dst, $src\n\t" 10719 "CMP $dst,0x80000000\n\t" 10720 "JNE,s fast\n\t" 10721 "SUB ESP, 8\n\t" 10722 "MOVSD [ESP], $src\n\t" 10723 "FLD_D [ESP]\n\t" 10724 "ADD ESP, 8\n\t" 10725 "CALL d2i_wrapper\n" 10726 "fast:" %} 10727 ins_encode %{ 10728 Label fast; 10729 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10730 __ cmpl($dst$$Register, 0x80000000); 10731 __ jccb(Assembler::notEqual, fast); 10732 __ subptr(rsp, 8); 10733 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10734 __ fld_d(Address(rsp, 0)); 10735 __ addptr(rsp, 8); 10736 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10737 __ bind(fast); 10738 %} 10739 ins_pipe( pipe_slow ); 10740%} 10741 10742instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10743 predicate(UseSSE<=1); 10744 match(Set dst (ConvD2L src)); 10745 effect( KILL cr ); 10746 format %{ "FLD $src\t# Convert double to long\n\t" 10747 "FLDCW trunc mode\n\t" 10748 "SUB ESP,8\n\t" 10749 "FISTp [ESP + #0]\n\t" 10750 "FLDCW std/24-bit mode\n\t" 10751 "POP EAX\n\t" 10752 "POP EDX\n\t" 10753 "CMP EDX,0x80000000\n\t" 10754 "JNE,s fast\n\t" 10755 "TEST EAX,EAX\n\t" 10756 "JNE,s fast\n\t" 10757 "FLD $src\n\t" 10758 "CALL d2l_wrapper\n" 10759 "fast:" %} 10760 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10761 ins_pipe( pipe_slow ); 10762%} 10763 10764// XMM lacks a float/double->long conversion, so use the old FPU stack. 10765instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10766 predicate (UseSSE>=2); 10767 match(Set dst (ConvD2L src)); 10768 effect( KILL cr ); 10769 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10770 "MOVSD [ESP],$src\n\t" 10771 "FLD_D [ESP]\n\t" 10772 "FLDCW trunc mode\n\t" 10773 "FISTp [ESP + #0]\n\t" 10774 "FLDCW std/24-bit mode\n\t" 10775 "POP EAX\n\t" 10776 "POP EDX\n\t" 10777 "CMP EDX,0x80000000\n\t" 10778 "JNE,s fast\n\t" 10779 "TEST EAX,EAX\n\t" 10780 "JNE,s fast\n\t" 10781 "SUB ESP,8\n\t" 10782 "MOVSD [ESP],$src\n\t" 10783 "FLD_D [ESP]\n\t" 10784 "ADD ESP,8\n\t" 10785 "CALL d2l_wrapper\n" 10786 "fast:" %} 10787 ins_encode %{ 10788 Label fast; 10789 __ subptr(rsp, 8); 10790 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10791 __ fld_d(Address(rsp, 0)); 10792 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10793 __ fistp_d(Address(rsp, 0)); 10794 // Restore the rounding mode, mask the exception 10795 if (Compile::current()->in_24_bit_fp_mode()) { 10796 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10797 } else { 10798 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10799 } 10800 // Load the converted long, adjust CPU stack 10801 __ pop(rax); 10802 __ pop(rdx); 10803 __ cmpl(rdx, 0x80000000); 10804 __ jccb(Assembler::notEqual, fast); 10805 __ testl(rax, rax); 10806 __ jccb(Assembler::notEqual, fast); 10807 __ subptr(rsp, 8); 10808 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10809 __ fld_d(Address(rsp, 0)); 10810 __ addptr(rsp, 8); 10811 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10812 __ bind(fast); 10813 %} 10814 ins_pipe( pipe_slow ); 10815%} 10816 10817// Convert a double to an int. Java semantics require we do complex 10818// manglations in the corner cases. So we set the rounding mode to 10819// 'zero', store the darned double down as an int, and reset the 10820// rounding mode to 'nearest'. The hardware stores a flag value down 10821// if we would overflow or converted a NAN; we check for this and 10822// and go the slow path if needed. 10823instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10824 predicate(UseSSE==0); 10825 match(Set dst (ConvF2I src)); 10826 effect( KILL tmp, KILL cr ); 10827 format %{ "FLD $src\t# Convert float to int \n\t" 10828 "FLDCW trunc mode\n\t" 10829 "SUB ESP,4\n\t" 10830 "FISTp [ESP + #0]\n\t" 10831 "FLDCW std/24-bit mode\n\t" 10832 "POP EAX\n\t" 10833 "CMP EAX,0x80000000\n\t" 10834 "JNE,s fast\n\t" 10835 "FLD $src\n\t" 10836 "CALL d2i_wrapper\n" 10837 "fast:" %} 10838 // DPR2I_encoding works for FPR2I 10839 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10840 ins_pipe( pipe_slow ); 10841%} 10842 10843// Convert a float in xmm to an int reg. 10844instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10845 predicate(UseSSE>=1); 10846 match(Set dst (ConvF2I src)); 10847 effect( KILL tmp, KILL cr ); 10848 format %{ "CVTTSS2SI $dst, $src\n\t" 10849 "CMP $dst,0x80000000\n\t" 10850 "JNE,s fast\n\t" 10851 "SUB ESP, 4\n\t" 10852 "MOVSS [ESP], $src\n\t" 10853 "FLD [ESP]\n\t" 10854 "ADD ESP, 4\n\t" 10855 "CALL d2i_wrapper\n" 10856 "fast:" %} 10857 ins_encode %{ 10858 Label fast; 10859 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10860 __ cmpl($dst$$Register, 0x80000000); 10861 __ jccb(Assembler::notEqual, fast); 10862 __ subptr(rsp, 4); 10863 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10864 __ fld_s(Address(rsp, 0)); 10865 __ addptr(rsp, 4); 10866 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10867 __ bind(fast); 10868 %} 10869 ins_pipe( pipe_slow ); 10870%} 10871 10872instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10873 predicate(UseSSE==0); 10874 match(Set dst (ConvF2L src)); 10875 effect( KILL cr ); 10876 format %{ "FLD $src\t# Convert float to long\n\t" 10877 "FLDCW trunc mode\n\t" 10878 "SUB ESP,8\n\t" 10879 "FISTp [ESP + #0]\n\t" 10880 "FLDCW std/24-bit mode\n\t" 10881 "POP EAX\n\t" 10882 "POP EDX\n\t" 10883 "CMP EDX,0x80000000\n\t" 10884 "JNE,s fast\n\t" 10885 "TEST EAX,EAX\n\t" 10886 "JNE,s fast\n\t" 10887 "FLD $src\n\t" 10888 "CALL d2l_wrapper\n" 10889 "fast:" %} 10890 // DPR2L_encoding works for FPR2L 10891 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10892 ins_pipe( pipe_slow ); 10893%} 10894 10895// XMM lacks a float/double->long conversion, so use the old FPU stack. 10896instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10897 predicate (UseSSE>=1); 10898 match(Set dst (ConvF2L src)); 10899 effect( KILL cr ); 10900 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10901 "MOVSS [ESP],$src\n\t" 10902 "FLD_S [ESP]\n\t" 10903 "FLDCW trunc mode\n\t" 10904 "FISTp [ESP + #0]\n\t" 10905 "FLDCW std/24-bit mode\n\t" 10906 "POP EAX\n\t" 10907 "POP EDX\n\t" 10908 "CMP EDX,0x80000000\n\t" 10909 "JNE,s fast\n\t" 10910 "TEST EAX,EAX\n\t" 10911 "JNE,s fast\n\t" 10912 "SUB ESP,4\t# Convert float to long\n\t" 10913 "MOVSS [ESP],$src\n\t" 10914 "FLD_S [ESP]\n\t" 10915 "ADD ESP,4\n\t" 10916 "CALL d2l_wrapper\n" 10917 "fast:" %} 10918 ins_encode %{ 10919 Label fast; 10920 __ subptr(rsp, 8); 10921 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10922 __ fld_s(Address(rsp, 0)); 10923 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10924 __ fistp_d(Address(rsp, 0)); 10925 // Restore the rounding mode, mask the exception 10926 if (Compile::current()->in_24_bit_fp_mode()) { 10927 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10928 } else { 10929 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10930 } 10931 // Load the converted long, adjust CPU stack 10932 __ pop(rax); 10933 __ pop(rdx); 10934 __ cmpl(rdx, 0x80000000); 10935 __ jccb(Assembler::notEqual, fast); 10936 __ testl(rax, rax); 10937 __ jccb(Assembler::notEqual, fast); 10938 __ subptr(rsp, 4); 10939 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10940 __ fld_s(Address(rsp, 0)); 10941 __ addptr(rsp, 4); 10942 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10943 __ bind(fast); 10944 %} 10945 ins_pipe( pipe_slow ); 10946%} 10947 10948instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10949 predicate( UseSSE<=1 ); 10950 match(Set dst (ConvI2D src)); 10951 format %{ "FILD $src\n\t" 10952 "FSTP $dst" %} 10953 opcode(0xDB, 0x0); /* DB /0 */ 10954 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10955 ins_pipe( fpu_reg_mem ); 10956%} 10957 10958instruct convI2D_reg(regD dst, rRegI src) %{ 10959 predicate( UseSSE>=2 && !UseXmmI2D ); 10960 match(Set dst (ConvI2D src)); 10961 format %{ "CVTSI2SD $dst,$src" %} 10962 ins_encode %{ 10963 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10964 %} 10965 ins_pipe( pipe_slow ); 10966%} 10967 10968instruct convI2D_mem(regD dst, memory mem) %{ 10969 predicate( UseSSE>=2 ); 10970 match(Set dst (ConvI2D (LoadI mem))); 10971 format %{ "CVTSI2SD $dst,$mem" %} 10972 ins_encode %{ 10973 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10974 %} 10975 ins_pipe( pipe_slow ); 10976%} 10977 10978instruct convXI2D_reg(regD dst, rRegI src) 10979%{ 10980 predicate( UseSSE>=2 && UseXmmI2D ); 10981 match(Set dst (ConvI2D src)); 10982 10983 format %{ "MOVD $dst,$src\n\t" 10984 "CVTDQ2PD $dst,$dst\t# i2d" %} 10985 ins_encode %{ 10986 __ movdl($dst$$XMMRegister, $src$$Register); 10987 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10988 %} 10989 ins_pipe(pipe_slow); // XXX 10990%} 10991 10992instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10993 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10994 match(Set dst (ConvI2D (LoadI mem))); 10995 format %{ "FILD $mem\n\t" 10996 "FSTP $dst" %} 10997 opcode(0xDB); /* DB /0 */ 10998 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10999 Pop_Reg_DPR(dst)); 11000 ins_pipe( fpu_reg_mem ); 11001%} 11002 11003// Convert a byte to a float; no rounding step needed. 11004instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11005 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11006 match(Set dst (ConvI2F src)); 11007 format %{ "FILD $src\n\t" 11008 "FSTP $dst" %} 11009 11010 opcode(0xDB, 0x0); /* DB /0 */ 11011 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11012 ins_pipe( fpu_reg_mem ); 11013%} 11014 11015// In 24-bit mode, force exponent rounding by storing back out 11016instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11017 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11018 match(Set dst (ConvI2F src)); 11019 ins_cost(200); 11020 format %{ "FILD $src\n\t" 11021 "FSTP_S $dst" %} 11022 opcode(0xDB, 0x0); /* DB /0 */ 11023 ins_encode( Push_Mem_I(src), 11024 Pop_Mem_FPR(dst)); 11025 ins_pipe( fpu_mem_mem ); 11026%} 11027 11028// In 24-bit mode, force exponent rounding by storing back out 11029instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11030 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11031 match(Set dst (ConvI2F (LoadI mem))); 11032 ins_cost(200); 11033 format %{ "FILD $mem\n\t" 11034 "FSTP_S $dst" %} 11035 opcode(0xDB); /* DB /0 */ 11036 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11037 Pop_Mem_FPR(dst)); 11038 ins_pipe( fpu_mem_mem ); 11039%} 11040 11041// This instruction does not round to 24-bits 11042instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11043 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11044 match(Set dst (ConvI2F src)); 11045 format %{ "FILD $src\n\t" 11046 "FSTP $dst" %} 11047 opcode(0xDB, 0x0); /* DB /0 */ 11048 ins_encode( Push_Mem_I(src), 11049 Pop_Reg_FPR(dst)); 11050 ins_pipe( fpu_reg_mem ); 11051%} 11052 11053// This instruction does not round to 24-bits 11054instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11055 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11056 match(Set dst (ConvI2F (LoadI mem))); 11057 format %{ "FILD $mem\n\t" 11058 "FSTP $dst" %} 11059 opcode(0xDB); /* DB /0 */ 11060 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11061 Pop_Reg_FPR(dst)); 11062 ins_pipe( fpu_reg_mem ); 11063%} 11064 11065// Convert an int to a float in xmm; no rounding step needed. 11066instruct convI2F_reg(regF dst, rRegI src) %{ 11067 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11068 match(Set dst (ConvI2F src)); 11069 format %{ "CVTSI2SS $dst, $src" %} 11070 ins_encode %{ 11071 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11072 %} 11073 ins_pipe( pipe_slow ); 11074%} 11075 11076 instruct convXI2F_reg(regF dst, rRegI src) 11077%{ 11078 predicate( UseSSE>=2 && UseXmmI2F ); 11079 match(Set dst (ConvI2F src)); 11080 11081 format %{ "MOVD $dst,$src\n\t" 11082 "CVTDQ2PS $dst,$dst\t# i2f" %} 11083 ins_encode %{ 11084 __ movdl($dst$$XMMRegister, $src$$Register); 11085 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11086 %} 11087 ins_pipe(pipe_slow); // XXX 11088%} 11089 11090instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11091 match(Set dst (ConvI2L src)); 11092 effect(KILL cr); 11093 ins_cost(375); 11094 format %{ "MOV $dst.lo,$src\n\t" 11095 "MOV $dst.hi,$src\n\t" 11096 "SAR $dst.hi,31" %} 11097 ins_encode(convert_int_long(dst,src)); 11098 ins_pipe( ialu_reg_reg_long ); 11099%} 11100 11101// Zero-extend convert int to long 11102instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11103 match(Set dst (AndL (ConvI2L src) mask) ); 11104 effect( KILL flags ); 11105 ins_cost(250); 11106 format %{ "MOV $dst.lo,$src\n\t" 11107 "XOR $dst.hi,$dst.hi" %} 11108 opcode(0x33); // XOR 11109 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11110 ins_pipe( ialu_reg_reg_long ); 11111%} 11112 11113// Zero-extend long 11114instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11115 match(Set dst (AndL src mask) ); 11116 effect( KILL flags ); 11117 ins_cost(250); 11118 format %{ "MOV $dst.lo,$src.lo\n\t" 11119 "XOR $dst.hi,$dst.hi\n\t" %} 11120 opcode(0x33); // XOR 11121 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11122 ins_pipe( ialu_reg_reg_long ); 11123%} 11124 11125instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11126 predicate (UseSSE<=1); 11127 match(Set dst (ConvL2D src)); 11128 effect( KILL cr ); 11129 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11130 "PUSH $src.lo\n\t" 11131 "FILD ST,[ESP + #0]\n\t" 11132 "ADD ESP,8\n\t" 11133 "FSTP_D $dst\t# D-round" %} 11134 opcode(0xDF, 0x5); /* DF /5 */ 11135 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11136 ins_pipe( pipe_slow ); 11137%} 11138 11139instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11140 predicate (UseSSE>=2); 11141 match(Set dst (ConvL2D src)); 11142 effect( KILL cr ); 11143 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11144 "PUSH $src.lo\n\t" 11145 "FILD_D [ESP]\n\t" 11146 "FSTP_D [ESP]\n\t" 11147 "MOVSD $dst,[ESP]\n\t" 11148 "ADD ESP,8" %} 11149 opcode(0xDF, 0x5); /* DF /5 */ 11150 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11151 ins_pipe( pipe_slow ); 11152%} 11153 11154instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11155 predicate (UseSSE>=1); 11156 match(Set dst (ConvL2F src)); 11157 effect( KILL cr ); 11158 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11159 "PUSH $src.lo\n\t" 11160 "FILD_D [ESP]\n\t" 11161 "FSTP_S [ESP]\n\t" 11162 "MOVSS $dst,[ESP]\n\t" 11163 "ADD ESP,8" %} 11164 opcode(0xDF, 0x5); /* DF /5 */ 11165 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11166 ins_pipe( pipe_slow ); 11167%} 11168 11169instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11170 match(Set dst (ConvL2F src)); 11171 effect( KILL cr ); 11172 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11173 "PUSH $src.lo\n\t" 11174 "FILD ST,[ESP + #0]\n\t" 11175 "ADD ESP,8\n\t" 11176 "FSTP_S $dst\t# F-round" %} 11177 opcode(0xDF, 0x5); /* DF /5 */ 11178 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11179 ins_pipe( pipe_slow ); 11180%} 11181 11182instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11183 match(Set dst (ConvL2I src)); 11184 effect( DEF dst, USE src ); 11185 format %{ "MOV $dst,$src.lo" %} 11186 ins_encode(enc_CopyL_Lo(dst,src)); 11187 ins_pipe( ialu_reg_reg ); 11188%} 11189 11190instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11191 match(Set dst (MoveF2I src)); 11192 effect( DEF dst, USE src ); 11193 ins_cost(100); 11194 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11195 ins_encode %{ 11196 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11197 %} 11198 ins_pipe( ialu_reg_mem ); 11199%} 11200 11201instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11202 predicate(UseSSE==0); 11203 match(Set dst (MoveF2I src)); 11204 effect( DEF dst, USE src ); 11205 11206 ins_cost(125); 11207 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11208 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11209 ins_pipe( fpu_mem_reg ); 11210%} 11211 11212instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11213 predicate(UseSSE>=1); 11214 match(Set dst (MoveF2I src)); 11215 effect( DEF dst, USE src ); 11216 11217 ins_cost(95); 11218 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11219 ins_encode %{ 11220 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11221 %} 11222 ins_pipe( pipe_slow ); 11223%} 11224 11225instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11226 predicate(UseSSE>=2); 11227 match(Set dst (MoveF2I src)); 11228 effect( DEF dst, USE src ); 11229 ins_cost(85); 11230 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11231 ins_encode %{ 11232 __ movdl($dst$$Register, $src$$XMMRegister); 11233 %} 11234 ins_pipe( pipe_slow ); 11235%} 11236 11237instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11238 match(Set dst (MoveI2F src)); 11239 effect( DEF dst, USE src ); 11240 11241 ins_cost(100); 11242 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11243 ins_encode %{ 11244 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11245 %} 11246 ins_pipe( ialu_mem_reg ); 11247%} 11248 11249 11250instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11251 predicate(UseSSE==0); 11252 match(Set dst (MoveI2F src)); 11253 effect(DEF dst, USE src); 11254 11255 ins_cost(125); 11256 format %{ "FLD_S $src\n\t" 11257 "FSTP $dst\t# MoveI2F_stack_reg" %} 11258 opcode(0xD9); /* D9 /0, FLD m32real */ 11259 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11260 Pop_Reg_FPR(dst) ); 11261 ins_pipe( fpu_reg_mem ); 11262%} 11263 11264instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11265 predicate(UseSSE>=1); 11266 match(Set dst (MoveI2F src)); 11267 effect( DEF dst, USE src ); 11268 11269 ins_cost(95); 11270 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11271 ins_encode %{ 11272 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11273 %} 11274 ins_pipe( pipe_slow ); 11275%} 11276 11277instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11278 predicate(UseSSE>=2); 11279 match(Set dst (MoveI2F src)); 11280 effect( DEF dst, USE src ); 11281 11282 ins_cost(85); 11283 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11284 ins_encode %{ 11285 __ movdl($dst$$XMMRegister, $src$$Register); 11286 %} 11287 ins_pipe( pipe_slow ); 11288%} 11289 11290instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11291 match(Set dst (MoveD2L src)); 11292 effect(DEF dst, USE src); 11293 11294 ins_cost(250); 11295 format %{ "MOV $dst.lo,$src\n\t" 11296 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11297 opcode(0x8B, 0x8B); 11298 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11299 ins_pipe( ialu_mem_long_reg ); 11300%} 11301 11302instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11303 predicate(UseSSE<=1); 11304 match(Set dst (MoveD2L src)); 11305 effect(DEF dst, USE src); 11306 11307 ins_cost(125); 11308 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11309 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11310 ins_pipe( fpu_mem_reg ); 11311%} 11312 11313instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11314 predicate(UseSSE>=2); 11315 match(Set dst (MoveD2L src)); 11316 effect(DEF dst, USE src); 11317 ins_cost(95); 11318 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11319 ins_encode %{ 11320 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11321 %} 11322 ins_pipe( pipe_slow ); 11323%} 11324 11325instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11326 predicate(UseSSE>=2); 11327 match(Set dst (MoveD2L src)); 11328 effect(DEF dst, USE src, TEMP tmp); 11329 ins_cost(85); 11330 format %{ "MOVD $dst.lo,$src\n\t" 11331 "PSHUFLW $tmp,$src,0x4E\n\t" 11332 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11333 ins_encode %{ 11334 __ movdl($dst$$Register, $src$$XMMRegister); 11335 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11336 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11337 %} 11338 ins_pipe( pipe_slow ); 11339%} 11340 11341instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11342 match(Set dst (MoveL2D src)); 11343 effect(DEF dst, USE src); 11344 11345 ins_cost(200); 11346 format %{ "MOV $dst,$src.lo\n\t" 11347 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11348 opcode(0x89, 0x89); 11349 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11350 ins_pipe( ialu_mem_long_reg ); 11351%} 11352 11353 11354instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11355 predicate(UseSSE<=1); 11356 match(Set dst (MoveL2D src)); 11357 effect(DEF dst, USE src); 11358 ins_cost(125); 11359 11360 format %{ "FLD_D $src\n\t" 11361 "FSTP $dst\t# MoveL2D_stack_reg" %} 11362 opcode(0xDD); /* DD /0, FLD m64real */ 11363 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11364 Pop_Reg_DPR(dst) ); 11365 ins_pipe( fpu_reg_mem ); 11366%} 11367 11368 11369instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11370 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11371 match(Set dst (MoveL2D src)); 11372 effect(DEF dst, USE src); 11373 11374 ins_cost(95); 11375 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11376 ins_encode %{ 11377 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11378 %} 11379 ins_pipe( pipe_slow ); 11380%} 11381 11382instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11383 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11384 match(Set dst (MoveL2D src)); 11385 effect(DEF dst, USE src); 11386 11387 ins_cost(95); 11388 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11389 ins_encode %{ 11390 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11391 %} 11392 ins_pipe( pipe_slow ); 11393%} 11394 11395instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11396 predicate(UseSSE>=2); 11397 match(Set dst (MoveL2D src)); 11398 effect(TEMP dst, USE src, TEMP tmp); 11399 ins_cost(85); 11400 format %{ "MOVD $dst,$src.lo\n\t" 11401 "MOVD $tmp,$src.hi\n\t" 11402 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11403 ins_encode %{ 11404 __ movdl($dst$$XMMRegister, $src$$Register); 11405 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11406 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11407 %} 11408 ins_pipe( pipe_slow ); 11409%} 11410 11411 11412// ======================================================================= 11413// fast clearing of an array 11414instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11415 predicate(!UseFastStosb); 11416 match(Set dummy (ClearArray cnt base)); 11417 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11418 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11419 "SHL ECX,1\t# Convert doublewords to words\n\t" 11420 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11421 ins_encode %{ 11422 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11423 %} 11424 ins_pipe( pipe_slow ); 11425%} 11426 11427instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11428 predicate(UseFastStosb); 11429 match(Set dummy (ClearArray cnt base)); 11430 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11431 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11432 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11433 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11434 ins_encode %{ 11435 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11436 %} 11437 ins_pipe( pipe_slow ); 11438%} 11439 11440instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11441 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11442 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11443 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11444 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11445 11446 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11447 ins_encode %{ 11448 __ string_compare($str1$$Register, $str2$$Register, 11449 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11450 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11451 %} 11452 ins_pipe( pipe_slow ); 11453%} 11454 11455instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11456 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11457 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11458 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11459 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11460 11461 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11462 ins_encode %{ 11463 __ string_compare($str1$$Register, $str2$$Register, 11464 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11465 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11466 %} 11467 ins_pipe( pipe_slow ); 11468%} 11469 11470instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11471 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11472 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11473 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11474 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11475 11476 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11477 ins_encode %{ 11478 __ string_compare($str1$$Register, $str2$$Register, 11479 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11480 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11481 %} 11482 ins_pipe( pipe_slow ); 11483%} 11484 11485instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11486 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11487 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11488 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11489 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11490 11491 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11492 ins_encode %{ 11493 __ string_compare($str2$$Register, $str1$$Register, 11494 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11495 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11496 %} 11497 ins_pipe( pipe_slow ); 11498%} 11499 11500// fast string equals 11501instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11502 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11503 match(Set result (StrEquals (Binary str1 str2) cnt)); 11504 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11505 11506 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11507 ins_encode %{ 11508 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11509 $cnt$$Register, $result$$Register, $tmp3$$Register, 11510 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11511 %} 11512 11513 ins_pipe( pipe_slow ); 11514%} 11515 11516// fast search of substring with known size. 11517instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11518 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11519 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11520 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11521 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11522 11523 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11524 ins_encode %{ 11525 int icnt2 = (int)$int_cnt2$$constant; 11526 if (icnt2 >= 16) { 11527 // IndexOf for constant substrings with size >= 16 elements 11528 // which don't need to be loaded through stack. 11529 __ string_indexofC8($str1$$Register, $str2$$Register, 11530 $cnt1$$Register, $cnt2$$Register, 11531 icnt2, $result$$Register, 11532 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11533 } else { 11534 // Small strings are loaded through stack if they cross page boundary. 11535 __ string_indexof($str1$$Register, $str2$$Register, 11536 $cnt1$$Register, $cnt2$$Register, 11537 icnt2, $result$$Register, 11538 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11539 } 11540 %} 11541 ins_pipe( pipe_slow ); 11542%} 11543 11544// fast search of substring with known size. 11545instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11546 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11547 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11548 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11549 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11550 11551 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11552 ins_encode %{ 11553 int icnt2 = (int)$int_cnt2$$constant; 11554 if (icnt2 >= 8) { 11555 // IndexOf for constant substrings with size >= 8 elements 11556 // which don't need to be loaded through stack. 11557 __ string_indexofC8($str1$$Register, $str2$$Register, 11558 $cnt1$$Register, $cnt2$$Register, 11559 icnt2, $result$$Register, 11560 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11561 } else { 11562 // Small strings are loaded through stack if they cross page boundary. 11563 __ string_indexof($str1$$Register, $str2$$Register, 11564 $cnt1$$Register, $cnt2$$Register, 11565 icnt2, $result$$Register, 11566 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11567 } 11568 %} 11569 ins_pipe( pipe_slow ); 11570%} 11571 11572// fast search of substring with known size. 11573instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11574 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11575 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11576 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11577 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11578 11579 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11580 ins_encode %{ 11581 int icnt2 = (int)$int_cnt2$$constant; 11582 if (icnt2 >= 8) { 11583 // IndexOf for constant substrings with size >= 8 elements 11584 // which don't need to be loaded through stack. 11585 __ string_indexofC8($str1$$Register, $str2$$Register, 11586 $cnt1$$Register, $cnt2$$Register, 11587 icnt2, $result$$Register, 11588 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11589 } else { 11590 // Small strings are loaded through stack if they cross page boundary. 11591 __ string_indexof($str1$$Register, $str2$$Register, 11592 $cnt1$$Register, $cnt2$$Register, 11593 icnt2, $result$$Register, 11594 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11595 } 11596 %} 11597 ins_pipe( pipe_slow ); 11598%} 11599 11600instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11601 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11602 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11603 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11604 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11605 11606 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11607 ins_encode %{ 11608 __ string_indexof($str1$$Register, $str2$$Register, 11609 $cnt1$$Register, $cnt2$$Register, 11610 (-1), $result$$Register, 11611 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11612 %} 11613 ins_pipe( pipe_slow ); 11614%} 11615 11616instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11617 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11618 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11619 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11620 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11621 11622 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11623 ins_encode %{ 11624 __ string_indexof($str1$$Register, $str2$$Register, 11625 $cnt1$$Register, $cnt2$$Register, 11626 (-1), $result$$Register, 11627 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11628 %} 11629 ins_pipe( pipe_slow ); 11630%} 11631 11632instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11633 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11634 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11635 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11636 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11637 11638 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11639 ins_encode %{ 11640 __ string_indexof($str1$$Register, $str2$$Register, 11641 $cnt1$$Register, $cnt2$$Register, 11642 (-1), $result$$Register, 11643 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11644 %} 11645 ins_pipe( pipe_slow ); 11646%} 11647 11648instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11649 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11650 predicate(UseSSE42Intrinsics); 11651 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11652 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11653 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11654 ins_encode %{ 11655 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11656 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11657 %} 11658 ins_pipe( pipe_slow ); 11659%} 11660 11661// fast array equals 11662instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11663 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11664%{ 11665 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11666 match(Set result (AryEq ary1 ary2)); 11667 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11668 //ins_cost(300); 11669 11670 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11671 ins_encode %{ 11672 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11673 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11674 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11675 %} 11676 ins_pipe( pipe_slow ); 11677%} 11678 11679instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11680 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11681%{ 11682 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11683 match(Set result (AryEq ary1 ary2)); 11684 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11685 //ins_cost(300); 11686 11687 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11688 ins_encode %{ 11689 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11690 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11691 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11692 %} 11693 ins_pipe( pipe_slow ); 11694%} 11695 11696instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11697 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11698%{ 11699 match(Set result (HasNegatives ary1 len)); 11700 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11701 11702 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11703 ins_encode %{ 11704 __ has_negatives($ary1$$Register, $len$$Register, 11705 $result$$Register, $tmp3$$Register, 11706 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11707 %} 11708 ins_pipe( pipe_slow ); 11709%} 11710 11711// fast char[] to byte[] compression 11712instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11713 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11714 match(Set result (StrCompressedCopy src (Binary dst len))); 11715 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11716 11717 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11718 ins_encode %{ 11719 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11720 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11721 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11722 %} 11723 ins_pipe( pipe_slow ); 11724%} 11725 11726// fast byte[] to char[] inflation 11727instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11728 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11729 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11730 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11731 11732 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11733 ins_encode %{ 11734 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11735 $tmp1$$XMMRegister, $tmp2$$Register); 11736 %} 11737 ins_pipe( pipe_slow ); 11738%} 11739 11740// encode char[] to byte[] in ISO_8859_1 11741instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11742 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11743 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11744 match(Set result (EncodeISOArray src (Binary dst len))); 11745 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11746 11747 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11748 ins_encode %{ 11749 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11750 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11751 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11752 %} 11753 ins_pipe( pipe_slow ); 11754%} 11755 11756 11757//----------Control Flow Instructions------------------------------------------ 11758// Signed compare Instructions 11759instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11760 match(Set cr (CmpI op1 op2)); 11761 effect( DEF cr, USE op1, USE op2 ); 11762 format %{ "CMP $op1,$op2" %} 11763 opcode(0x3B); /* Opcode 3B /r */ 11764 ins_encode( OpcP, RegReg( op1, op2) ); 11765 ins_pipe( ialu_cr_reg_reg ); 11766%} 11767 11768instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11769 match(Set cr (CmpI op1 op2)); 11770 effect( DEF cr, USE op1 ); 11771 format %{ "CMP $op1,$op2" %} 11772 opcode(0x81,0x07); /* Opcode 81 /7 */ 11773 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11774 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11775 ins_pipe( ialu_cr_reg_imm ); 11776%} 11777 11778// Cisc-spilled version of cmpI_eReg 11779instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11780 match(Set cr (CmpI op1 (LoadI op2))); 11781 11782 format %{ "CMP $op1,$op2" %} 11783 ins_cost(500); 11784 opcode(0x3B); /* Opcode 3B /r */ 11785 ins_encode( OpcP, RegMem( op1, op2) ); 11786 ins_pipe( ialu_cr_reg_mem ); 11787%} 11788 11789instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11790 match(Set cr (CmpI src zero)); 11791 effect( DEF cr, USE src ); 11792 11793 format %{ "TEST $src,$src" %} 11794 opcode(0x85); 11795 ins_encode( OpcP, RegReg( src, src ) ); 11796 ins_pipe( ialu_cr_reg_imm ); 11797%} 11798 11799instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11800 match(Set cr (CmpI (AndI src con) zero)); 11801 11802 format %{ "TEST $src,$con" %} 11803 opcode(0xF7,0x00); 11804 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11805 ins_pipe( ialu_cr_reg_imm ); 11806%} 11807 11808instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11809 match(Set cr (CmpI (AndI src mem) zero)); 11810 11811 format %{ "TEST $src,$mem" %} 11812 opcode(0x85); 11813 ins_encode( OpcP, RegMem( src, mem ) ); 11814 ins_pipe( ialu_cr_reg_mem ); 11815%} 11816 11817// Unsigned compare Instructions; really, same as signed except they 11818// produce an eFlagsRegU instead of eFlagsReg. 11819instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11820 match(Set cr (CmpU op1 op2)); 11821 11822 format %{ "CMPu $op1,$op2" %} 11823 opcode(0x3B); /* Opcode 3B /r */ 11824 ins_encode( OpcP, RegReg( op1, op2) ); 11825 ins_pipe( ialu_cr_reg_reg ); 11826%} 11827 11828instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11829 match(Set cr (CmpU op1 op2)); 11830 11831 format %{ "CMPu $op1,$op2" %} 11832 opcode(0x81,0x07); /* Opcode 81 /7 */ 11833 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11834 ins_pipe( ialu_cr_reg_imm ); 11835%} 11836 11837// // Cisc-spilled version of cmpU_eReg 11838instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11839 match(Set cr (CmpU op1 (LoadI op2))); 11840 11841 format %{ "CMPu $op1,$op2" %} 11842 ins_cost(500); 11843 opcode(0x3B); /* Opcode 3B /r */ 11844 ins_encode( OpcP, RegMem( op1, op2) ); 11845 ins_pipe( ialu_cr_reg_mem ); 11846%} 11847 11848// // Cisc-spilled version of cmpU_eReg 11849//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11850// match(Set cr (CmpU (LoadI op1) op2)); 11851// 11852// format %{ "CMPu $op1,$op2" %} 11853// ins_cost(500); 11854// opcode(0x39); /* Opcode 39 /r */ 11855// ins_encode( OpcP, RegMem( op1, op2) ); 11856//%} 11857 11858instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11859 match(Set cr (CmpU src zero)); 11860 11861 format %{ "TESTu $src,$src" %} 11862 opcode(0x85); 11863 ins_encode( OpcP, RegReg( src, src ) ); 11864 ins_pipe( ialu_cr_reg_imm ); 11865%} 11866 11867// Unsigned pointer compare Instructions 11868instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11869 match(Set cr (CmpP op1 op2)); 11870 11871 format %{ "CMPu $op1,$op2" %} 11872 opcode(0x3B); /* Opcode 3B /r */ 11873 ins_encode( OpcP, RegReg( op1, op2) ); 11874 ins_pipe( ialu_cr_reg_reg ); 11875%} 11876 11877instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11878 match(Set cr (CmpP op1 op2)); 11879 11880 format %{ "CMPu $op1,$op2" %} 11881 opcode(0x81,0x07); /* Opcode 81 /7 */ 11882 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11883 ins_pipe( ialu_cr_reg_imm ); 11884%} 11885 11886// // Cisc-spilled version of cmpP_eReg 11887instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11888 match(Set cr (CmpP op1 (LoadP op2))); 11889 11890 format %{ "CMPu $op1,$op2" %} 11891 ins_cost(500); 11892 opcode(0x3B); /* Opcode 3B /r */ 11893 ins_encode( OpcP, RegMem( op1, op2) ); 11894 ins_pipe( ialu_cr_reg_mem ); 11895%} 11896 11897// // Cisc-spilled version of cmpP_eReg 11898//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11899// match(Set cr (CmpP (LoadP op1) op2)); 11900// 11901// format %{ "CMPu $op1,$op2" %} 11902// ins_cost(500); 11903// opcode(0x39); /* Opcode 39 /r */ 11904// ins_encode( OpcP, RegMem( op1, op2) ); 11905//%} 11906 11907// Compare raw pointer (used in out-of-heap check). 11908// Only works because non-oop pointers must be raw pointers 11909// and raw pointers have no anti-dependencies. 11910instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11911 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11912 match(Set cr (CmpP op1 (LoadP op2))); 11913 11914 format %{ "CMPu $op1,$op2" %} 11915 opcode(0x3B); /* Opcode 3B /r */ 11916 ins_encode( OpcP, RegMem( op1, op2) ); 11917 ins_pipe( ialu_cr_reg_mem ); 11918%} 11919 11920// 11921// This will generate a signed flags result. This should be ok 11922// since any compare to a zero should be eq/neq. 11923instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11924 match(Set cr (CmpP src zero)); 11925 11926 format %{ "TEST $src,$src" %} 11927 opcode(0x85); 11928 ins_encode( OpcP, RegReg( src, src ) ); 11929 ins_pipe( ialu_cr_reg_imm ); 11930%} 11931 11932// Cisc-spilled version of testP_reg 11933// This will generate a signed flags result. This should be ok 11934// since any compare to a zero should be eq/neq. 11935instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11936 match(Set cr (CmpP (LoadP op) zero)); 11937 11938 format %{ "TEST $op,0xFFFFFFFF" %} 11939 ins_cost(500); 11940 opcode(0xF7); /* Opcode F7 /0 */ 11941 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11942 ins_pipe( ialu_cr_reg_imm ); 11943%} 11944 11945// Yanked all unsigned pointer compare operations. 11946// Pointer compares are done with CmpP which is already unsigned. 11947 11948//----------Max and Min-------------------------------------------------------- 11949// Min Instructions 11950//// 11951// *** Min and Max using the conditional move are slower than the 11952// *** branch version on a Pentium III. 11953// // Conditional move for min 11954//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11955// effect( USE_DEF op2, USE op1, USE cr ); 11956// format %{ "CMOVlt $op2,$op1\t! min" %} 11957// opcode(0x4C,0x0F); 11958// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11959// ins_pipe( pipe_cmov_reg ); 11960//%} 11961// 11962//// Min Register with Register (P6 version) 11963//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11964// predicate(VM_Version::supports_cmov() ); 11965// match(Set op2 (MinI op1 op2)); 11966// ins_cost(200); 11967// expand %{ 11968// eFlagsReg cr; 11969// compI_eReg(cr,op1,op2); 11970// cmovI_reg_lt(op2,op1,cr); 11971// %} 11972//%} 11973 11974// Min Register with Register (generic version) 11975instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11976 match(Set dst (MinI dst src)); 11977 effect(KILL flags); 11978 ins_cost(300); 11979 11980 format %{ "MIN $dst,$src" %} 11981 opcode(0xCC); 11982 ins_encode( min_enc(dst,src) ); 11983 ins_pipe( pipe_slow ); 11984%} 11985 11986// Max Register with Register 11987// *** Min and Max using the conditional move are slower than the 11988// *** branch version on a Pentium III. 11989// // Conditional move for max 11990//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11991// effect( USE_DEF op2, USE op1, USE cr ); 11992// format %{ "CMOVgt $op2,$op1\t! max" %} 11993// opcode(0x4F,0x0F); 11994// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11995// ins_pipe( pipe_cmov_reg ); 11996//%} 11997// 11998// // Max Register with Register (P6 version) 11999//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12000// predicate(VM_Version::supports_cmov() ); 12001// match(Set op2 (MaxI op1 op2)); 12002// ins_cost(200); 12003// expand %{ 12004// eFlagsReg cr; 12005// compI_eReg(cr,op1,op2); 12006// cmovI_reg_gt(op2,op1,cr); 12007// %} 12008//%} 12009 12010// Max Register with Register (generic version) 12011instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12012 match(Set dst (MaxI dst src)); 12013 effect(KILL flags); 12014 ins_cost(300); 12015 12016 format %{ "MAX $dst,$src" %} 12017 opcode(0xCC); 12018 ins_encode( max_enc(dst,src) ); 12019 ins_pipe( pipe_slow ); 12020%} 12021 12022// ============================================================================ 12023// Counted Loop limit node which represents exact final iterator value. 12024// Note: the resulting value should fit into integer range since 12025// counted loops have limit check on overflow. 12026instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12027 match(Set limit (LoopLimit (Binary init limit) stride)); 12028 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12029 ins_cost(300); 12030 12031 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12032 ins_encode %{ 12033 int strd = (int)$stride$$constant; 12034 assert(strd != 1 && strd != -1, "sanity"); 12035 int m1 = (strd > 0) ? 1 : -1; 12036 // Convert limit to long (EAX:EDX) 12037 __ cdql(); 12038 // Convert init to long (init:tmp) 12039 __ movl($tmp$$Register, $init$$Register); 12040 __ sarl($tmp$$Register, 31); 12041 // $limit - $init 12042 __ subl($limit$$Register, $init$$Register); 12043 __ sbbl($limit_hi$$Register, $tmp$$Register); 12044 // + ($stride - 1) 12045 if (strd > 0) { 12046 __ addl($limit$$Register, (strd - 1)); 12047 __ adcl($limit_hi$$Register, 0); 12048 __ movl($tmp$$Register, strd); 12049 } else { 12050 __ addl($limit$$Register, (strd + 1)); 12051 __ adcl($limit_hi$$Register, -1); 12052 __ lneg($limit_hi$$Register, $limit$$Register); 12053 __ movl($tmp$$Register, -strd); 12054 } 12055 // signed devision: (EAX:EDX) / pos_stride 12056 __ idivl($tmp$$Register); 12057 if (strd < 0) { 12058 // restore sign 12059 __ negl($tmp$$Register); 12060 } 12061 // (EAX) * stride 12062 __ mull($tmp$$Register); 12063 // + init (ignore upper bits) 12064 __ addl($limit$$Register, $init$$Register); 12065 %} 12066 ins_pipe( pipe_slow ); 12067%} 12068 12069// ============================================================================ 12070// Branch Instructions 12071// Jump Table 12072instruct jumpXtnd(rRegI switch_val) %{ 12073 match(Jump switch_val); 12074 ins_cost(350); 12075 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12076 ins_encode %{ 12077 // Jump to Address(table_base + switch_reg) 12078 Address index(noreg, $switch_val$$Register, Address::times_1); 12079 __ jump(ArrayAddress($constantaddress, index)); 12080 %} 12081 ins_pipe(pipe_jmp); 12082%} 12083 12084// Jump Direct - Label defines a relative address from JMP+1 12085instruct jmpDir(label labl) %{ 12086 match(Goto); 12087 effect(USE labl); 12088 12089 ins_cost(300); 12090 format %{ "JMP $labl" %} 12091 size(5); 12092 ins_encode %{ 12093 Label* L = $labl$$label; 12094 __ jmp(*L, false); // Always long jump 12095 %} 12096 ins_pipe( pipe_jmp ); 12097%} 12098 12099// Jump Direct Conditional - Label defines a relative address from Jcc+1 12100instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12101 match(If cop cr); 12102 effect(USE labl); 12103 12104 ins_cost(300); 12105 format %{ "J$cop $labl" %} 12106 size(6); 12107 ins_encode %{ 12108 Label* L = $labl$$label; 12109 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12110 %} 12111 ins_pipe( pipe_jcc ); 12112%} 12113 12114// Jump Direct Conditional - Label defines a relative address from Jcc+1 12115instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12116 match(CountedLoopEnd cop cr); 12117 effect(USE labl); 12118 12119 ins_cost(300); 12120 format %{ "J$cop $labl\t# Loop end" %} 12121 size(6); 12122 ins_encode %{ 12123 Label* L = $labl$$label; 12124 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12125 %} 12126 ins_pipe( pipe_jcc ); 12127%} 12128 12129// Jump Direct Conditional - Label defines a relative address from Jcc+1 12130instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12131 match(CountedLoopEnd cop cmp); 12132 effect(USE labl); 12133 12134 ins_cost(300); 12135 format %{ "J$cop,u $labl\t# Loop end" %} 12136 size(6); 12137 ins_encode %{ 12138 Label* L = $labl$$label; 12139 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12140 %} 12141 ins_pipe( pipe_jcc ); 12142%} 12143 12144instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12145 match(CountedLoopEnd cop cmp); 12146 effect(USE labl); 12147 12148 ins_cost(200); 12149 format %{ "J$cop,u $labl\t# Loop end" %} 12150 size(6); 12151 ins_encode %{ 12152 Label* L = $labl$$label; 12153 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12154 %} 12155 ins_pipe( pipe_jcc ); 12156%} 12157 12158// Jump Direct Conditional - using unsigned comparison 12159instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12160 match(If cop cmp); 12161 effect(USE labl); 12162 12163 ins_cost(300); 12164 format %{ "J$cop,u $labl" %} 12165 size(6); 12166 ins_encode %{ 12167 Label* L = $labl$$label; 12168 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12169 %} 12170 ins_pipe(pipe_jcc); 12171%} 12172 12173instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12174 match(If cop cmp); 12175 effect(USE labl); 12176 12177 ins_cost(200); 12178 format %{ "J$cop,u $labl" %} 12179 size(6); 12180 ins_encode %{ 12181 Label* L = $labl$$label; 12182 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12183 %} 12184 ins_pipe(pipe_jcc); 12185%} 12186 12187instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12188 match(If cop cmp); 12189 effect(USE labl); 12190 12191 ins_cost(200); 12192 format %{ $$template 12193 if ($cop$$cmpcode == Assembler::notEqual) { 12194 $$emit$$"JP,u $labl\n\t" 12195 $$emit$$"J$cop,u $labl" 12196 } else { 12197 $$emit$$"JP,u done\n\t" 12198 $$emit$$"J$cop,u $labl\n\t" 12199 $$emit$$"done:" 12200 } 12201 %} 12202 ins_encode %{ 12203 Label* l = $labl$$label; 12204 if ($cop$$cmpcode == Assembler::notEqual) { 12205 __ jcc(Assembler::parity, *l, false); 12206 __ jcc(Assembler::notEqual, *l, false); 12207 } else if ($cop$$cmpcode == Assembler::equal) { 12208 Label done; 12209 __ jccb(Assembler::parity, done); 12210 __ jcc(Assembler::equal, *l, false); 12211 __ bind(done); 12212 } else { 12213 ShouldNotReachHere(); 12214 } 12215 %} 12216 ins_pipe(pipe_jcc); 12217%} 12218 12219// ============================================================================ 12220// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12221// array for an instance of the superklass. Set a hidden internal cache on a 12222// hit (cache is checked with exposed code in gen_subtype_check()). Return 12223// NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12224instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12225 match(Set result (PartialSubtypeCheck sub super)); 12226 effect( KILL rcx, KILL cr ); 12227 12228 ins_cost(1100); // slightly larger than the next version 12229 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12230 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12231 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12232 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12233 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12234 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12235 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12236 "miss:\t" %} 12237 12238 opcode(0x1); // Force a XOR of EDI 12239 ins_encode( enc_PartialSubtypeCheck() ); 12240 ins_pipe( pipe_slow ); 12241%} 12242 12243instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12244 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12245 effect( KILL rcx, KILL result ); 12246 12247 ins_cost(1000); 12248 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12249 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12250 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12251 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12252 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12253 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12254 "miss:\t" %} 12255 12256 opcode(0x0); // No need to XOR EDI 12257 ins_encode( enc_PartialSubtypeCheck() ); 12258 ins_pipe( pipe_slow ); 12259%} 12260 12261// ============================================================================ 12262// Branch Instructions -- short offset versions 12263// 12264// These instructions are used to replace jumps of a long offset (the default 12265// match) with jumps of a shorter offset. These instructions are all tagged 12266// with the ins_short_branch attribute, which causes the ADLC to suppress the 12267// match rules in general matching. Instead, the ADLC generates a conversion 12268// method in the MachNode which can be used to do in-place replacement of the 12269// long variant with the shorter variant. The compiler will determine if a 12270// branch can be taken by the is_short_branch_offset() predicate in the machine 12271// specific code section of the file. 12272 12273// Jump Direct - Label defines a relative address from JMP+1 12274instruct jmpDir_short(label labl) %{ 12275 match(Goto); 12276 effect(USE labl); 12277 12278 ins_cost(300); 12279 format %{ "JMP,s $labl" %} 12280 size(2); 12281 ins_encode %{ 12282 Label* L = $labl$$label; 12283 __ jmpb(*L); 12284 %} 12285 ins_pipe( pipe_jmp ); 12286 ins_short_branch(1); 12287%} 12288 12289// Jump Direct Conditional - Label defines a relative address from Jcc+1 12290instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12291 match(If cop cr); 12292 effect(USE labl); 12293 12294 ins_cost(300); 12295 format %{ "J$cop,s $labl" %} 12296 size(2); 12297 ins_encode %{ 12298 Label* L = $labl$$label; 12299 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12300 %} 12301 ins_pipe( pipe_jcc ); 12302 ins_short_branch(1); 12303%} 12304 12305// Jump Direct Conditional - Label defines a relative address from Jcc+1 12306instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12307 match(CountedLoopEnd cop cr); 12308 effect(USE labl); 12309 12310 ins_cost(300); 12311 format %{ "J$cop,s $labl\t# Loop end" %} 12312 size(2); 12313 ins_encode %{ 12314 Label* L = $labl$$label; 12315 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12316 %} 12317 ins_pipe( pipe_jcc ); 12318 ins_short_branch(1); 12319%} 12320 12321// Jump Direct Conditional - Label defines a relative address from Jcc+1 12322instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12323 match(CountedLoopEnd cop cmp); 12324 effect(USE labl); 12325 12326 ins_cost(300); 12327 format %{ "J$cop,us $labl\t# Loop end" %} 12328 size(2); 12329 ins_encode %{ 12330 Label* L = $labl$$label; 12331 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12332 %} 12333 ins_pipe( pipe_jcc ); 12334 ins_short_branch(1); 12335%} 12336 12337instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12338 match(CountedLoopEnd cop cmp); 12339 effect(USE labl); 12340 12341 ins_cost(300); 12342 format %{ "J$cop,us $labl\t# Loop end" %} 12343 size(2); 12344 ins_encode %{ 12345 Label* L = $labl$$label; 12346 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12347 %} 12348 ins_pipe( pipe_jcc ); 12349 ins_short_branch(1); 12350%} 12351 12352// Jump Direct Conditional - using unsigned comparison 12353instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12354 match(If cop cmp); 12355 effect(USE labl); 12356 12357 ins_cost(300); 12358 format %{ "J$cop,us $labl" %} 12359 size(2); 12360 ins_encode %{ 12361 Label* L = $labl$$label; 12362 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12363 %} 12364 ins_pipe( pipe_jcc ); 12365 ins_short_branch(1); 12366%} 12367 12368instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12369 match(If cop cmp); 12370 effect(USE labl); 12371 12372 ins_cost(300); 12373 format %{ "J$cop,us $labl" %} 12374 size(2); 12375 ins_encode %{ 12376 Label* L = $labl$$label; 12377 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12378 %} 12379 ins_pipe( pipe_jcc ); 12380 ins_short_branch(1); 12381%} 12382 12383instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12384 match(If cop cmp); 12385 effect(USE labl); 12386 12387 ins_cost(300); 12388 format %{ $$template 12389 if ($cop$$cmpcode == Assembler::notEqual) { 12390 $$emit$$"JP,u,s $labl\n\t" 12391 $$emit$$"J$cop,u,s $labl" 12392 } else { 12393 $$emit$$"JP,u,s done\n\t" 12394 $$emit$$"J$cop,u,s $labl\n\t" 12395 $$emit$$"done:" 12396 } 12397 %} 12398 size(4); 12399 ins_encode %{ 12400 Label* l = $labl$$label; 12401 if ($cop$$cmpcode == Assembler::notEqual) { 12402 __ jccb(Assembler::parity, *l); 12403 __ jccb(Assembler::notEqual, *l); 12404 } else if ($cop$$cmpcode == Assembler::equal) { 12405 Label done; 12406 __ jccb(Assembler::parity, done); 12407 __ jccb(Assembler::equal, *l); 12408 __ bind(done); 12409 } else { 12410 ShouldNotReachHere(); 12411 } 12412 %} 12413 ins_pipe(pipe_jcc); 12414 ins_short_branch(1); 12415%} 12416 12417// ============================================================================ 12418// Long Compare 12419// 12420// Currently we hold longs in 2 registers. Comparing such values efficiently 12421// is tricky. The flavor of compare used depends on whether we are testing 12422// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12423// The GE test is the negated LT test. The LE test can be had by commuting 12424// the operands (yielding a GE test) and then negating; negate again for the 12425// GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12426// NE test is negated from that. 12427 12428// Due to a shortcoming in the ADLC, it mixes up expressions like: 12429// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12430// difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12431// are collapsed internally in the ADLC's dfa-gen code. The match for 12432// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12433// foo match ends up with the wrong leaf. One fix is to not match both 12434// reg-reg and reg-zero forms of long-compare. This is unfortunate because 12435// both forms beat the trinary form of long-compare and both are very useful 12436// on Intel which has so few registers. 12437 12438// Manifest a CmpL result in an integer register. Very painful. 12439// This is the test to avoid. 12440instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12441 match(Set dst (CmpL3 src1 src2)); 12442 effect( KILL flags ); 12443 ins_cost(1000); 12444 format %{ "XOR $dst,$dst\n\t" 12445 "CMP $src1.hi,$src2.hi\n\t" 12446 "JLT,s m_one\n\t" 12447 "JGT,s p_one\n\t" 12448 "CMP $src1.lo,$src2.lo\n\t" 12449 "JB,s m_one\n\t" 12450 "JEQ,s done\n" 12451 "p_one:\tINC $dst\n\t" 12452 "JMP,s done\n" 12453 "m_one:\tDEC $dst\n" 12454 "done:" %} 12455 ins_encode %{ 12456 Label p_one, m_one, done; 12457 __ xorptr($dst$$Register, $dst$$Register); 12458 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12459 __ jccb(Assembler::less, m_one); 12460 __ jccb(Assembler::greater, p_one); 12461 __ cmpl($src1$$Register, $src2$$Register); 12462 __ jccb(Assembler::below, m_one); 12463 __ jccb(Assembler::equal, done); 12464 __ bind(p_one); 12465 __ incrementl($dst$$Register); 12466 __ jmpb(done); 12467 __ bind(m_one); 12468 __ decrementl($dst$$Register); 12469 __ bind(done); 12470 %} 12471 ins_pipe( pipe_slow ); 12472%} 12473 12474//====== 12475// Manifest a CmpL result in the normal flags. Only good for LT or GE 12476// compares. Can be used for LE or GT compares by reversing arguments. 12477// NOT GOOD FOR EQ/NE tests. 12478instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12479 match( Set flags (CmpL src zero )); 12480 ins_cost(100); 12481 format %{ "TEST $src.hi,$src.hi" %} 12482 opcode(0x85); 12483 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12484 ins_pipe( ialu_cr_reg_reg ); 12485%} 12486 12487// Manifest a CmpL result in the normal flags. Only good for LT or GE 12488// compares. Can be used for LE or GT compares by reversing arguments. 12489// NOT GOOD FOR EQ/NE tests. 12490instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12491 match( Set flags (CmpL src1 src2 )); 12492 effect( TEMP tmp ); 12493 ins_cost(300); 12494 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12495 "MOV $tmp,$src1.hi\n\t" 12496 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12497 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12498 ins_pipe( ialu_cr_reg_reg ); 12499%} 12500 12501// Long compares reg < zero/req OR reg >= zero/req. 12502// Just a wrapper for a normal branch, plus the predicate test. 12503instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12504 match(If cmp flags); 12505 effect(USE labl); 12506 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12507 expand %{ 12508 jmpCon(cmp,flags,labl); // JLT or JGE... 12509 %} 12510%} 12511 12512// Compare 2 longs and CMOVE longs. 12513instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12514 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12515 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12516 ins_cost(400); 12517 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12518 "CMOV$cmp $dst.hi,$src.hi" %} 12519 opcode(0x0F,0x40); 12520 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12521 ins_pipe( pipe_cmov_reg_long ); 12522%} 12523 12524instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12525 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12526 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12527 ins_cost(500); 12528 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12529 "CMOV$cmp $dst.hi,$src.hi" %} 12530 opcode(0x0F,0x40); 12531 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12532 ins_pipe( pipe_cmov_reg_long ); 12533%} 12534 12535// Compare 2 longs and CMOVE ints. 12536instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12537 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12538 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12539 ins_cost(200); 12540 format %{ "CMOV$cmp $dst,$src" %} 12541 opcode(0x0F,0x40); 12542 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12543 ins_pipe( pipe_cmov_reg ); 12544%} 12545 12546instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12547 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12548 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12549 ins_cost(250); 12550 format %{ "CMOV$cmp $dst,$src" %} 12551 opcode(0x0F,0x40); 12552 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12553 ins_pipe( pipe_cmov_mem ); 12554%} 12555 12556// Compare 2 longs and CMOVE ints. 12557instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12558 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12559 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12560 ins_cost(200); 12561 format %{ "CMOV$cmp $dst,$src" %} 12562 opcode(0x0F,0x40); 12563 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12564 ins_pipe( pipe_cmov_reg ); 12565%} 12566 12567// Compare 2 longs and CMOVE doubles 12568instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12569 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12570 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12571 ins_cost(200); 12572 expand %{ 12573 fcmovDPR_regS(cmp,flags,dst,src); 12574 %} 12575%} 12576 12577// Compare 2 longs and CMOVE doubles 12578instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12579 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12580 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12581 ins_cost(200); 12582 expand %{ 12583 fcmovD_regS(cmp,flags,dst,src); 12584 %} 12585%} 12586 12587instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12588 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12589 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12590 ins_cost(200); 12591 expand %{ 12592 fcmovFPR_regS(cmp,flags,dst,src); 12593 %} 12594%} 12595 12596instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12597 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12598 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12599 ins_cost(200); 12600 expand %{ 12601 fcmovF_regS(cmp,flags,dst,src); 12602 %} 12603%} 12604 12605//====== 12606// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12607instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12608 match( Set flags (CmpL src zero )); 12609 effect(TEMP tmp); 12610 ins_cost(200); 12611 format %{ "MOV $tmp,$src.lo\n\t" 12612 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12613 ins_encode( long_cmp_flags0( src, tmp ) ); 12614 ins_pipe( ialu_reg_reg_long ); 12615%} 12616 12617// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12618instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12619 match( Set flags (CmpL src1 src2 )); 12620 ins_cost(200+300); 12621 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12622 "JNE,s skip\n\t" 12623 "CMP $src1.hi,$src2.hi\n\t" 12624 "skip:\t" %} 12625 ins_encode( long_cmp_flags1( src1, src2 ) ); 12626 ins_pipe( ialu_cr_reg_reg ); 12627%} 12628 12629// Long compare reg == zero/reg OR reg != zero/reg 12630// Just a wrapper for a normal branch, plus the predicate test. 12631instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12632 match(If cmp flags); 12633 effect(USE labl); 12634 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12635 expand %{ 12636 jmpCon(cmp,flags,labl); // JEQ or JNE... 12637 %} 12638%} 12639 12640// Compare 2 longs and CMOVE longs. 12641instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12642 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12643 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12644 ins_cost(400); 12645 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12646 "CMOV$cmp $dst.hi,$src.hi" %} 12647 opcode(0x0F,0x40); 12648 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12649 ins_pipe( pipe_cmov_reg_long ); 12650%} 12651 12652instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12653 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12654 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12655 ins_cost(500); 12656 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12657 "CMOV$cmp $dst.hi,$src.hi" %} 12658 opcode(0x0F,0x40); 12659 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12660 ins_pipe( pipe_cmov_reg_long ); 12661%} 12662 12663// Compare 2 longs and CMOVE ints. 12664instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12665 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12666 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12667 ins_cost(200); 12668 format %{ "CMOV$cmp $dst,$src" %} 12669 opcode(0x0F,0x40); 12670 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12671 ins_pipe( pipe_cmov_reg ); 12672%} 12673 12674instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12675 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12676 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12677 ins_cost(250); 12678 format %{ "CMOV$cmp $dst,$src" %} 12679 opcode(0x0F,0x40); 12680 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12681 ins_pipe( pipe_cmov_mem ); 12682%} 12683 12684// Compare 2 longs and CMOVE ints. 12685instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12686 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12687 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12688 ins_cost(200); 12689 format %{ "CMOV$cmp $dst,$src" %} 12690 opcode(0x0F,0x40); 12691 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12692 ins_pipe( pipe_cmov_reg ); 12693%} 12694 12695// Compare 2 longs and CMOVE doubles 12696instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12697 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12698 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12699 ins_cost(200); 12700 expand %{ 12701 fcmovDPR_regS(cmp,flags,dst,src); 12702 %} 12703%} 12704 12705// Compare 2 longs and CMOVE doubles 12706instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12707 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12708 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12709 ins_cost(200); 12710 expand %{ 12711 fcmovD_regS(cmp,flags,dst,src); 12712 %} 12713%} 12714 12715instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12716 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12717 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12718 ins_cost(200); 12719 expand %{ 12720 fcmovFPR_regS(cmp,flags,dst,src); 12721 %} 12722%} 12723 12724instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12725 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12726 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12727 ins_cost(200); 12728 expand %{ 12729 fcmovF_regS(cmp,flags,dst,src); 12730 %} 12731%} 12732 12733//====== 12734// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12735// Same as cmpL_reg_flags_LEGT except must negate src 12736instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12737 match( Set flags (CmpL src zero )); 12738 effect( TEMP tmp ); 12739 ins_cost(300); 12740 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12741 "CMP $tmp,$src.lo\n\t" 12742 "SBB $tmp,$src.hi\n\t" %} 12743 ins_encode( long_cmp_flags3(src, tmp) ); 12744 ins_pipe( ialu_reg_reg_long ); 12745%} 12746 12747// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12748// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12749// requires a commuted test to get the same result. 12750instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12751 match( Set flags (CmpL src1 src2 )); 12752 effect( TEMP tmp ); 12753 ins_cost(300); 12754 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12755 "MOV $tmp,$src2.hi\n\t" 12756 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12757 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12758 ins_pipe( ialu_cr_reg_reg ); 12759%} 12760 12761// Long compares reg < zero/req OR reg >= zero/req. 12762// Just a wrapper for a normal branch, plus the predicate test 12763instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12764 match(If cmp flags); 12765 effect(USE labl); 12766 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12767 ins_cost(300); 12768 expand %{ 12769 jmpCon(cmp,flags,labl); // JGT or JLE... 12770 %} 12771%} 12772 12773// Compare 2 longs and CMOVE longs. 12774instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12775 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12776 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12777 ins_cost(400); 12778 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12779 "CMOV$cmp $dst.hi,$src.hi" %} 12780 opcode(0x0F,0x40); 12781 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12782 ins_pipe( pipe_cmov_reg_long ); 12783%} 12784 12785instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12786 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12787 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12788 ins_cost(500); 12789 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12790 "CMOV$cmp $dst.hi,$src.hi+4" %} 12791 opcode(0x0F,0x40); 12792 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12793 ins_pipe( pipe_cmov_reg_long ); 12794%} 12795 12796// Compare 2 longs and CMOVE ints. 12797instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12798 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12799 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12800 ins_cost(200); 12801 format %{ "CMOV$cmp $dst,$src" %} 12802 opcode(0x0F,0x40); 12803 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12804 ins_pipe( pipe_cmov_reg ); 12805%} 12806 12807instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12808 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12809 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12810 ins_cost(250); 12811 format %{ "CMOV$cmp $dst,$src" %} 12812 opcode(0x0F,0x40); 12813 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12814 ins_pipe( pipe_cmov_mem ); 12815%} 12816 12817// Compare 2 longs and CMOVE ptrs. 12818instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12819 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12820 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12821 ins_cost(200); 12822 format %{ "CMOV$cmp $dst,$src" %} 12823 opcode(0x0F,0x40); 12824 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12825 ins_pipe( pipe_cmov_reg ); 12826%} 12827 12828// Compare 2 longs and CMOVE doubles 12829instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12830 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12831 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12832 ins_cost(200); 12833 expand %{ 12834 fcmovDPR_regS(cmp,flags,dst,src); 12835 %} 12836%} 12837 12838// Compare 2 longs and CMOVE doubles 12839instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12840 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12841 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12842 ins_cost(200); 12843 expand %{ 12844 fcmovD_regS(cmp,flags,dst,src); 12845 %} 12846%} 12847 12848instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12849 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12850 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12851 ins_cost(200); 12852 expand %{ 12853 fcmovFPR_regS(cmp,flags,dst,src); 12854 %} 12855%} 12856 12857 12858instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12859 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12860 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12861 ins_cost(200); 12862 expand %{ 12863 fcmovF_regS(cmp,flags,dst,src); 12864 %} 12865%} 12866 12867 12868// ============================================================================ 12869// Procedure Call/Return Instructions 12870// Call Java Static Instruction 12871// Note: If this code changes, the corresponding ret_addr_offset() and 12872// compute_padding() functions will have to be adjusted. 12873instruct CallStaticJavaDirect(method meth) %{ 12874 match(CallStaticJava); 12875 effect(USE meth); 12876 12877 ins_cost(300); 12878 format %{ "CALL,static " %} 12879 opcode(0xE8); /* E8 cd */ 12880 ins_encode( pre_call_resets, 12881 Java_Static_Call( meth ), 12882 call_epilog, 12883 post_call_FPU ); 12884 ins_pipe( pipe_slow ); 12885 ins_alignment(4); 12886%} 12887 12888// Call Java Dynamic Instruction 12889// Note: If this code changes, the corresponding ret_addr_offset() and 12890// compute_padding() functions will have to be adjusted. 12891instruct CallDynamicJavaDirect(method meth) %{ 12892 match(CallDynamicJava); 12893 effect(USE meth); 12894 12895 ins_cost(300); 12896 format %{ "MOV EAX,(oop)-1\n\t" 12897 "CALL,dynamic" %} 12898 opcode(0xE8); /* E8 cd */ 12899 ins_encode( pre_call_resets, 12900 Java_Dynamic_Call( meth ), 12901 call_epilog, 12902 post_call_FPU ); 12903 ins_pipe( pipe_slow ); 12904 ins_alignment(4); 12905%} 12906 12907// Call Runtime Instruction 12908instruct CallRuntimeDirect(method meth) %{ 12909 match(CallRuntime ); 12910 effect(USE meth); 12911 12912 ins_cost(300); 12913 format %{ "CALL,runtime " %} 12914 opcode(0xE8); /* E8 cd */ 12915 // Use FFREEs to clear entries in float stack 12916 ins_encode( pre_call_resets, 12917 FFree_Float_Stack_All, 12918 Java_To_Runtime( meth ), 12919 post_call_FPU ); 12920 ins_pipe( pipe_slow ); 12921%} 12922 12923// Call runtime without safepoint 12924instruct CallLeafDirect(method meth) %{ 12925 match(CallLeaf); 12926 effect(USE meth); 12927 12928 ins_cost(300); 12929 format %{ "CALL_LEAF,runtime " %} 12930 opcode(0xE8); /* E8 cd */ 12931 ins_encode( pre_call_resets, 12932 FFree_Float_Stack_All, 12933 Java_To_Runtime( meth ), 12934 Verify_FPU_For_Leaf, post_call_FPU ); 12935 ins_pipe( pipe_slow ); 12936%} 12937 12938instruct CallLeafNoFPDirect(method meth) %{ 12939 match(CallLeafNoFP); 12940 effect(USE meth); 12941 12942 ins_cost(300); 12943 format %{ "CALL_LEAF_NOFP,runtime " %} 12944 opcode(0xE8); /* E8 cd */ 12945 ins_encode(Java_To_Runtime(meth)); 12946 ins_pipe( pipe_slow ); 12947%} 12948 12949 12950// Return Instruction 12951// Remove the return address & jump to it. 12952instruct Ret() %{ 12953 match(Return); 12954 format %{ "RET" %} 12955 opcode(0xC3); 12956 ins_encode(OpcP); 12957 ins_pipe( pipe_jmp ); 12958%} 12959 12960// Tail Call; Jump from runtime stub to Java code. 12961// Also known as an 'interprocedural jump'. 12962// Target of jump will eventually return to caller. 12963// TailJump below removes the return address. 12964instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12965 match(TailCall jump_target method_oop ); 12966 ins_cost(300); 12967 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12968 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12969 ins_encode( OpcP, RegOpc(jump_target) ); 12970 ins_pipe( pipe_jmp ); 12971%} 12972 12973 12974// Tail Jump; remove the return address; jump to target. 12975// TailCall above leaves the return address around. 12976instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12977 match( TailJump jump_target ex_oop ); 12978 ins_cost(300); 12979 format %{ "POP EDX\t# pop return address into dummy\n\t" 12980 "JMP $jump_target " %} 12981 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12982 ins_encode( enc_pop_rdx, 12983 OpcP, RegOpc(jump_target) ); 12984 ins_pipe( pipe_jmp ); 12985%} 12986 12987// Create exception oop: created by stack-crawling runtime code. 12988// Created exception is now available to this handler, and is setup 12989// just prior to jumping to this handler. No code emitted. 12990instruct CreateException( eAXRegP ex_oop ) 12991%{ 12992 match(Set ex_oop (CreateEx)); 12993 12994 size(0); 12995 // use the following format syntax 12996 format %{ "# exception oop is in EAX; no code emitted" %} 12997 ins_encode(); 12998 ins_pipe( empty ); 12999%} 13000 13001 13002// Rethrow exception: 13003// The exception oop will come in the first argument position. 13004// Then JUMP (not call) to the rethrow stub code. 13005instruct RethrowException() 13006%{ 13007 match(Rethrow); 13008 13009 // use the following format syntax 13010 format %{ "JMP rethrow_stub" %} 13011 ins_encode(enc_rethrow); 13012 ins_pipe( pipe_jmp ); 13013%} 13014 13015// inlined locking and unlocking 13016 13017instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13018 predicate(Compile::current()->use_rtm()); 13019 match(Set cr (FastLock object box)); 13020 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13021 ins_cost(300); 13022 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13023 ins_encode %{ 13024 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13025 $scr$$Register, $cx1$$Register, $cx2$$Register, 13026 _counters, _rtm_counters, _stack_rtm_counters, 13027 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13028 true, ra_->C->profile_rtm()); 13029 %} 13030 ins_pipe(pipe_slow); 13031%} 13032 13033instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13034 predicate(!Compile::current()->use_rtm()); 13035 match(Set cr (FastLock object box)); 13036 effect(TEMP tmp, TEMP scr, USE_KILL box); 13037 ins_cost(300); 13038 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13039 ins_encode %{ 13040 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13041 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13042 %} 13043 ins_pipe(pipe_slow); 13044%} 13045 13046instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13047 match(Set cr (FastUnlock object box)); 13048 effect(TEMP tmp, USE_KILL box); 13049 ins_cost(300); 13050 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13051 ins_encode %{ 13052 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13053 %} 13054 ins_pipe(pipe_slow); 13055%} 13056 13057 13058 13059// ============================================================================ 13060// Safepoint Instruction 13061instruct safePoint_poll(eFlagsReg cr) %{ 13062 match(SafePoint); 13063 effect(KILL cr); 13064 13065 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13066 // On SPARC that might be acceptable as we can generate the address with 13067 // just a sethi, saving an or. By polling at offset 0 we can end up 13068 // putting additional pressure on the index-0 in the D$. Because of 13069 // alignment (just like the situation at hand) the lower indices tend 13070 // to see more traffic. It'd be better to change the polling address 13071 // to offset 0 of the last $line in the polling page. 13072 13073 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13074 ins_cost(125); 13075 size(6) ; 13076 ins_encode( Safepoint_Poll() ); 13077 ins_pipe( ialu_reg_mem ); 13078%} 13079 13080 13081// ============================================================================ 13082// This name is KNOWN by the ADLC and cannot be changed. 13083// The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13084// for this guy. 13085instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13086 match(Set dst (ThreadLocal)); 13087 effect(DEF dst, KILL cr); 13088 13089 format %{ "MOV $dst, Thread::current()" %} 13090 ins_encode %{ 13091 Register dstReg = as_Register($dst$$reg); 13092 __ get_thread(dstReg); 13093 %} 13094 ins_pipe( ialu_reg_fat ); 13095%} 13096 13097 13098 13099//----------PEEPHOLE RULES----------------------------------------------------- 13100// These must follow all instruction definitions as they use the names 13101// defined in the instructions definitions. 13102// 13103// peepmatch ( root_instr_name [preceding_instruction]* ); 13104// 13105// peepconstraint %{ 13106// (instruction_number.operand_name relational_op instruction_number.operand_name 13107// [, ...] ); 13108// // instruction numbers are zero-based using left to right order in peepmatch 13109// 13110// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13111// // provide an instruction_number.operand_name for each operand that appears 13112// // in the replacement instruction's match rule 13113// 13114// ---------VM FLAGS--------------------------------------------------------- 13115// 13116// All peephole optimizations can be turned off using -XX:-OptoPeephole 13117// 13118// Each peephole rule is given an identifying number starting with zero and 13119// increasing by one in the order seen by the parser. An individual peephole 13120// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13121// on the command-line. 13122// 13123// ---------CURRENT LIMITATIONS---------------------------------------------- 13124// 13125// Only match adjacent instructions in same basic block 13126// Only equality constraints 13127// Only constraints between operands, not (0.dest_reg == EAX_enc) 13128// Only one replacement instruction 13129// 13130// ---------EXAMPLE---------------------------------------------------------- 13131// 13132// // pertinent parts of existing instructions in architecture description 13133// instruct movI(rRegI dst, rRegI src) %{ 13134// match(Set dst (CopyI src)); 13135// %} 13136// 13137// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13138// match(Set dst (AddI dst src)); 13139// effect(KILL cr); 13140// %} 13141// 13142// // Change (inc mov) to lea 13143// peephole %{ 13144// // increment preceeded by register-register move 13145// peepmatch ( incI_eReg movI ); 13146// // require that the destination register of the increment 13147// // match the destination register of the move 13148// peepconstraint ( 0.dst == 1.dst ); 13149// // construct a replacement instruction that sets 13150// // the destination to ( move's source register + one ) 13151// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13152// %} 13153// 13154// Implementation no longer uses movX instructions since 13155// machine-independent system no longer uses CopyX nodes. 13156// 13157// peephole %{ 13158// peepmatch ( incI_eReg movI ); 13159// peepconstraint ( 0.dst == 1.dst ); 13160// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13161// %} 13162// 13163// peephole %{ 13164// peepmatch ( decI_eReg movI ); 13165// peepconstraint ( 0.dst == 1.dst ); 13166// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13167// %} 13168// 13169// peephole %{ 13170// peepmatch ( addI_eReg_imm movI ); 13171// peepconstraint ( 0.dst == 1.dst ); 13172// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13173// %} 13174// 13175// peephole %{ 13176// peepmatch ( addP_eReg_imm movP ); 13177// peepconstraint ( 0.dst == 1.dst ); 13178// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13179// %} 13180 13181// // Change load of spilled value to only a spill 13182// instruct storeI(memory mem, rRegI src) %{ 13183// match(Set mem (StoreI mem src)); 13184// %} 13185// 13186// instruct loadI(rRegI dst, memory mem) %{ 13187// match(Set dst (LoadI mem)); 13188// %} 13189// 13190peephole %{ 13191 peepmatch ( loadI storeI ); 13192 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13193 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13194%} 13195 13196//----------SMARTSPILL RULES--------------------------------------------------- 13197// These must follow all instruction definitions as they use the names 13198// defined in the instructions definitions. 13199