x86_32.ad revision 1668:3e8fbc61cee8
1// 2// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// General Registers 63// Previously set EBX, ESI, and EDI as save-on-entry for java code 64// Turn off SOE in java-code due to frequent use of uncommon-traps. 65// Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77// Special Registers 78reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 79 80// Float registers. We treat TOS/FPR0 special. It is invisible to the 81// allocator, and only shows up in the encodings. 82reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 83reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 84// Ok so here's the trick FPR1 is really st(0) except in the midst 85// of emission of assembly for a machnode. During the emission the fpu stack 86// is pushed making FPR1 == st(1) temporarily. However at any safepoint 87// the stack will not have this element so FPR1 == st(0) from the 88// oopMap viewpoint. This same weirdness with numbering causes 89// instruction encoding to have to play games with the register 90// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 91// where it does flt->flt moves to see an example 92// 93reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 94reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 95reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 96reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 97reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 98reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 99reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 100reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 101reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 102reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 103reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 104reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 105reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 106reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 107 108// XMM registers. 128-bit registers or 4 words each, labeled a-d. 109// Word a in each register holds a Float, words ab hold a Double. 110// We currently do not use the SIMD capabilities, so registers cd 111// are unused at the moment. 112reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 113reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); 114reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 115reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); 116reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 117reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); 118reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 119reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); 120reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 121reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); 122reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 123reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); 124reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 125reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); 126reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 127reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); 128 129// Specify priority of register selection within phases of register 130// allocation. Highest priority is first. A useful heuristic is to 131// give registers a low priority when they are required by machine 132// instructions, like EAX and EDX. Registers which are used as 133// pairs must fall on an even boundary (witness the FPR#L's in this list). 134// For the Intel integer registers, the equivalent Long pairs are 135// EDX:EAX, EBX:ECX, and EDI:EBP. 136alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 139 FPR6L, FPR6H, FPR7L, FPR7H ); 140 141alloc_class chunk1( XMM0a, XMM0b, 142 XMM1a, XMM1b, 143 XMM2a, XMM2b, 144 XMM3a, XMM3b, 145 XMM4a, XMM4b, 146 XMM5a, XMM5b, 147 XMM6a, XMM6b, 148 XMM7a, XMM7b, EFLAGS); 149 150 151//----------Architecture Description Register Classes-------------------------- 152// Several register classes are automatically defined based upon information in 153// this architecture description. 154// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 155// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 156// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 157// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 158// 159// Class for all registers 160reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 161// Class for general registers 162reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 163// Class for general registers which may be used for implicit null checks on win95 164// Also safe for use by tailjump. We don't want to allocate in rbp, 165reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); 166// Class of "X" registers 167reg_class x_reg(EBX, ECX, EDX, EAX); 168// Class of registers that can appear in an address with no offset. 169// EBP and ESP require an extra instruction byte for zero offset. 170// Used in fast-unlock 171reg_class p_reg(EDX, EDI, ESI, EBX); 172// Class for general registers not including ECX 173reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX); 174// Class for general registers not including EAX 175reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 176// Class for general registers not including EAX or EBX. 177reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP); 178// Class of EAX (for multiply and divide operations) 179reg_class eax_reg(EAX); 180// Class of EBX (for atomic add) 181reg_class ebx_reg(EBX); 182// Class of ECX (for shift and JCXZ operations and cmpLTMask) 183reg_class ecx_reg(ECX); 184// Class of EDX (for multiply and divide operations) 185reg_class edx_reg(EDX); 186// Class of EDI (for synchronization) 187reg_class edi_reg(EDI); 188// Class of ESI (for synchronization) 189reg_class esi_reg(ESI); 190// Singleton class for interpreter's stack pointer 191reg_class ebp_reg(EBP); 192// Singleton class for stack pointer 193reg_class sp_reg(ESP); 194// Singleton class for instruction pointer 195// reg_class ip_reg(EIP); 196// Singleton class for condition codes 197reg_class int_flags(EFLAGS); 198// Class of integer register pairs 199reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); 200// Class of integer register pairs that aligns with calling convention 201reg_class eadx_reg( EAX,EDX ); 202reg_class ebcx_reg( ECX,EBX ); 203// Not AX or DX, used in divides 204reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP ); 205 206// Floating point registers. Notice FPR0 is not a choice. 207// FPR0 is not ever allocated; we use clever encodings to fake 208// a 2-address instructions out of Intels FP stack. 209reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 210 211// make a register class for SSE registers 212reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a); 213 214// make a double register class for SSE2 registers 215reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b, 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b ); 217 218reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 220 FPR7L,FPR7H ); 221 222reg_class flt_reg0( FPR1L ); 223reg_class dbl_reg0( FPR1L,FPR1H ); 224reg_class dbl_reg1( FPR2L,FPR2H ); 225reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 227 228// XMM6 and XMM7 could be used as temporary registers for long, float and 229// double values for SSE2. 230reg_class xdb_reg6( XMM6a,XMM6b ); 231reg_class xdb_reg7( XMM7a,XMM7b ); 232%} 233 234 235//----------SOURCE BLOCK------------------------------------------------------- 236// This is a block of C++ code which provides values, functions, and 237// definitions necessary in the rest of the architecture description 238source_hpp %{ 239// Must be visible to the DFA in dfa_x86_32.cpp 240extern bool is_operand_hi32_zero(Node* n); 241%} 242 243source %{ 244#define RELOC_IMM32 Assembler::imm_operand 245#define RELOC_DISP32 Assembler::disp32_operand 246 247#define __ _masm. 248 249// How to find the high register of a Long pair, given the low register 250#define HIGH_FROM_LOW(x) ((x)+2) 251 252// These masks are used to provide 128-bit aligned bitmasks to the XMM 253// instructions, to allow sign-masking or sign-bit flipping. They allow 254// fast versions of NegF/NegD and AbsF/AbsD. 255 256// Note: 'double' and 'long long' have 32-bits alignment on x86. 257static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 258 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 259 // of 128-bits operands for SSE instructions. 260 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 261 // Store the value to a 128-bits operand. 262 operand[0] = lo; 263 operand[1] = hi; 264 return operand; 265} 266 267// Buffer for 128-bits masks used by SSE instructions. 268static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 269 270// Static initialization during VM startup. 271static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 272static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 273static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 274static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 275 276// Offset hacking within calls. 277static int pre_call_FPU_size() { 278 if (Compile::current()->in_24_bit_fp_mode()) 279 return 6; // fldcw 280 return 0; 281} 282 283static int preserve_SP_size() { 284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) 285} 286 287// !!!!! Special hack to get all type of calls to specify the byte offset 288// from the start of the call to the point where the return address 289// will point. 290int MachCallStaticJavaNode::ret_addr_offset() { 291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points 292 if (_method_handle_invoke) 293 offset += preserve_SP_size(); 294 return offset; 295} 296 297int MachCallDynamicJavaNode::ret_addr_offset() { 298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points 299} 300 301static int sizeof_FFree_Float_Stack_All = -1; 302 303int MachCallRuntimeNode::ret_addr_offset() { 304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 305 return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size(); 306} 307 308// Indicate if the safepoint node needs the polling page as an input. 309// Since x86 does have absolute addressing, it doesn't. 310bool SafePointNode::needs_polling_address_input() { 311 return false; 312} 313 314// 315// Compute padding required for nodes which need alignment 316// 317 318// The address of the call instruction needs to be 4-byte aligned to 319// ensure that it does not span a cache line so that it can be patched. 320int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 321 current_offset += pre_call_FPU_size(); // skip fldcw, if any 322 current_offset += 1; // skip call opcode byte 323 return round_to(current_offset, alignment_required()) - current_offset; 324} 325 326// The address of the call instruction needs to be 4-byte aligned to 327// ensure that it does not span a cache line so that it can be patched. 328int CallStaticJavaHandleNode::compute_padding(int current_offset) const { 329 current_offset += pre_call_FPU_size(); // skip fldcw, if any 330 current_offset += preserve_SP_size(); // skip mov rbp, rsp 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333} 334 335// The address of the call instruction needs to be 4-byte aligned to 336// ensure that it does not span a cache line so that it can be patched. 337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_FPU_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342} 343 344#ifndef PRODUCT 345void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { 346 st->print("INT3"); 347} 348#endif 349 350// EMIT_RM() 351void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 352 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 353 cbuf.insts()->emit_int8(c); 354} 355 356// EMIT_CC() 357void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 358 unsigned char c = (unsigned char)( f1 | f2 ); 359 cbuf.insts()->emit_int8(c); 360} 361 362// EMIT_OPCODE() 363void emit_opcode(CodeBuffer &cbuf, int code) { 364 cbuf.insts()->emit_int8((unsigned char) code); 365} 366 367// EMIT_OPCODE() w/ relocation information 368void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 369 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 370 emit_opcode(cbuf, code); 371} 372 373// EMIT_D8() 374void emit_d8(CodeBuffer &cbuf, int d8) { 375 cbuf.insts()->emit_int8((unsigned char) d8); 376} 377 378// EMIT_D16() 379void emit_d16(CodeBuffer &cbuf, int d16) { 380 cbuf.insts()->emit_int16(d16); 381} 382 383// EMIT_D32() 384void emit_d32(CodeBuffer &cbuf, int d32) { 385 cbuf.insts()->emit_int32(d32); 386} 387 388// emit 32 bit value and construct relocation entry from relocInfo::relocType 389void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 390 int format) { 391 cbuf.relocate(cbuf.insts_mark(), reloc, format); 392 cbuf.insts()->emit_int32(d32); 393} 394 395// emit 32 bit value and construct relocation entry from RelocationHolder 396void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 397 int format) { 398#ifdef ASSERT 399 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 400 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 401 } 402#endif 403 cbuf.relocate(cbuf.insts_mark(), rspec, format); 404 cbuf.insts()->emit_int32(d32); 405} 406 407// Access stack slot for load or store 408void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 409 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 410 if( -128 <= disp && disp <= 127 ) { 411 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d8 (cbuf, disp); // Displacement // R/M byte 414 } else { 415 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 416 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 417 emit_d32(cbuf, disp); // Displacement // R/M byte 418 } 419} 420 421 // eRegI ereg, memory mem) %{ // emit_reg_mem 422void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) { 423 // There is no index & no scale, use form without SIB byte 424 if ((index == 0x4) && 425 (scale == 0) && (base != ESP_enc)) { 426 // If no displacement, mode is 0x0; unless base is [EBP] 427 if ( (displace == 0) && (base != EBP_enc) ) { 428 emit_rm(cbuf, 0x0, reg_encoding, base); 429 } 430 else { // If 8-bit displacement, mode 0x1 431 if ((displace >= -128) && (displace <= 127) 432 && !(displace_is_oop) ) { 433 emit_rm(cbuf, 0x1, reg_encoding, base); 434 emit_d8(cbuf, displace); 435 } 436 else { // If 32-bit displacement 437 if (base == -1) { // Special flag for absolute address 438 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 439 // (manual lies; no SIB needed here) 440 if ( displace_is_oop ) { 441 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 442 } else { 443 emit_d32 (cbuf, displace); 444 } 445 } 446 else { // Normal base + offset 447 emit_rm(cbuf, 0x2, reg_encoding, base); 448 if ( displace_is_oop ) { 449 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 450 } else { 451 emit_d32 (cbuf, displace); 452 } 453 } 454 } 455 } 456 } 457 else { // Else, encode with the SIB byte 458 // If no displacement, mode is 0x0; unless base is [EBP] 459 if (displace == 0 && (base != EBP_enc)) { // If no displacement 460 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 } 463 else { // If 8-bit displacement, mode 0x1 464 if ((displace >= -128) && (displace <= 127) 465 && !(displace_is_oop) ) { 466 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, base); 468 emit_d8(cbuf, displace); 469 } 470 else { // If 32-bit displacement 471 if (base == 0x04 ) { 472 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 473 emit_rm(cbuf, scale, index, 0x04); 474 } else { 475 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 476 emit_rm(cbuf, scale, index, base); 477 } 478 if ( displace_is_oop ) { 479 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 480 } else { 481 emit_d32 (cbuf, displace); 482 } 483 } 484 } 485 } 486} 487 488 489void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 490 if( dst_encoding == src_encoding ) { 491 // reg-reg copy, use an empty encoding 492 } else { 493 emit_opcode( cbuf, 0x8B ); 494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 495 } 496} 497 498void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 499 if( dst_encoding == src_encoding ) { 500 // reg-reg copy, use an empty encoding 501 } else { 502 MacroAssembler _masm(&cbuf); 503 504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); 505 } 506} 507 508 509//============================================================================= 510#ifndef PRODUCT 511void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 512 Compile* C = ra_->C; 513 if( C->in_24_bit_fp_mode() ) { 514 st->print("FLDCW 24 bit fpu control word"); 515 st->print_cr(""); st->print("\t"); 516 } 517 518 int framesize = C->frame_slots() << LogBytesPerInt; 519 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 520 // Remove two words for return addr and rbp, 521 framesize -= 2*wordSize; 522 523 // Calls to C2R adapters often do not accept exceptional returns. 524 // We require that their callers must bang for them. But be careful, because 525 // some VM calls (such as call site linkage) can use several kilobytes of 526 // stack. But the stack safety zone should account for that. 527 // See bugs 4446381, 4468289, 4497237. 528 if (C->need_stack_bang(framesize)) { 529 st->print_cr("# stack bang"); st->print("\t"); 530 } 531 st->print_cr("PUSHL EBP"); st->print("\t"); 532 533 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth 534 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check"); 535 st->print_cr(""); st->print("\t"); 536 framesize -= wordSize; 537 } 538 539 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) { 540 if (framesize) { 541 st->print("SUB ESP,%d\t# Create frame",framesize); 542 } 543 } else { 544 st->print("SUB ESP,%d\t# Create frame",framesize); 545 } 546} 547#endif 548 549 550void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 551 Compile* C = ra_->C; 552 553 if (UseSSE >= 2 && VerifyFPU) { 554 MacroAssembler masm(&cbuf); 555 masm.verify_FPU(0, "FPU stack must be clean on entry"); 556 } 557 558 // WARNING: Initial instruction MUST be 5 bytes or longer so that 559 // NativeJump::patch_verified_entry will be able to patch out the entry 560 // code safely. The fldcw is ok at 6 bytes, the push to verify stack 561 // depth is ok at 5 bytes, the frame allocation can be either 3 or 562 // 6 bytes. So if we don't do the fldcw or the push then we must 563 // use the 6 byte frame allocation even if we have no frame. :-( 564 // If method sets FPU control word do it now 565 if( C->in_24_bit_fp_mode() ) { 566 MacroAssembler masm(&cbuf); 567 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 568 } 569 570 int framesize = C->frame_slots() << LogBytesPerInt; 571 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 572 // Remove two words for return addr and rbp, 573 framesize -= 2*wordSize; 574 575 // Calls to C2R adapters often do not accept exceptional returns. 576 // We require that their callers must bang for them. But be careful, because 577 // some VM calls (such as call site linkage) can use several kilobytes of 578 // stack. But the stack safety zone should account for that. 579 // See bugs 4446381, 4468289, 4497237. 580 if (C->need_stack_bang(framesize)) { 581 MacroAssembler masm(&cbuf); 582 masm.generate_stack_overflow_check(framesize); 583 } 584 585 // We always push rbp, so that on return to interpreter rbp, will be 586 // restored correctly and we can correct the stack. 587 emit_opcode(cbuf, 0x50 | EBP_enc); 588 589 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth 590 emit_opcode(cbuf, 0x68); // push 0xbadb100d 591 emit_d32(cbuf, 0xbadb100d); 592 framesize -= wordSize; 593 } 594 595 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) { 596 if (framesize) { 597 emit_opcode(cbuf, 0x83); // sub SP,#framesize 598 emit_rm(cbuf, 0x3, 0x05, ESP_enc); 599 emit_d8(cbuf, framesize); 600 } 601 } else { 602 emit_opcode(cbuf, 0x81); // sub SP,#framesize 603 emit_rm(cbuf, 0x3, 0x05, ESP_enc); 604 emit_d32(cbuf, framesize); 605 } 606 C->set_frame_complete(cbuf.insts_size()); 607 608#ifdef ASSERT 609 if (VerifyStackAtCalls) { 610 Label L; 611 MacroAssembler masm(&cbuf); 612 masm.push(rax); 613 masm.mov(rax, rsp); 614 masm.andptr(rax, StackAlignmentInBytes-1); 615 masm.cmpptr(rax, StackAlignmentInBytes-wordSize); 616 masm.pop(rax); 617 masm.jcc(Assembler::equal, L); 618 masm.stop("Stack is not properly aligned!"); 619 masm.bind(L); 620 } 621#endif 622 623} 624 625uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 626 return MachNode::size(ra_); // too many variables; just compute it the hard way 627} 628 629int MachPrologNode::reloc() const { 630 return 0; // a large enough number 631} 632 633//============================================================================= 634#ifndef PRODUCT 635void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 636 Compile *C = ra_->C; 637 int framesize = C->frame_slots() << LogBytesPerInt; 638 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 639 // Remove two words for return addr and rbp, 640 framesize -= 2*wordSize; 641 642 if( C->in_24_bit_fp_mode() ) { 643 st->print("FLDCW standard control word"); 644 st->cr(); st->print("\t"); 645 } 646 if( framesize ) { 647 st->print("ADD ESP,%d\t# Destroy frame",framesize); 648 st->cr(); st->print("\t"); 649 } 650 st->print_cr("POPL EBP"); st->print("\t"); 651 if( do_polling() && C->is_method_compilation() ) { 652 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 653 st->cr(); st->print("\t"); 654 } 655} 656#endif 657 658void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 659 Compile *C = ra_->C; 660 661 // If method set FPU control word, restore to standard control word 662 if( C->in_24_bit_fp_mode() ) { 663 MacroAssembler masm(&cbuf); 664 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 665 } 666 667 int framesize = C->frame_slots() << LogBytesPerInt; 668 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 669 // Remove two words for return addr and rbp, 670 framesize -= 2*wordSize; 671 672 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 673 674 if( framesize >= 128 ) { 675 emit_opcode(cbuf, 0x81); // add SP, #framesize 676 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 677 emit_d32(cbuf, framesize); 678 } 679 else if( framesize ) { 680 emit_opcode(cbuf, 0x83); // add SP, #framesize 681 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 682 emit_d8(cbuf, framesize); 683 } 684 685 emit_opcode(cbuf, 0x58 | EBP_enc); 686 687 if( do_polling() && C->is_method_compilation() ) { 688 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 689 emit_opcode(cbuf,0x85); 690 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 691 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 692 } 693} 694 695uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 696 Compile *C = ra_->C; 697 // If method set FPU control word, restore to standard control word 698 int size = C->in_24_bit_fp_mode() ? 6 : 0; 699 if( do_polling() && C->is_method_compilation() ) size += 6; 700 701 int framesize = C->frame_slots() << LogBytesPerInt; 702 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 703 // Remove two words for return addr and rbp, 704 framesize -= 2*wordSize; 705 706 size++; // popl rbp, 707 708 if( framesize >= 128 ) { 709 size += 6; 710 } else { 711 size += framesize ? 3 : 0; 712 } 713 return size; 714} 715 716int MachEpilogNode::reloc() const { 717 return 0; // a large enough number 718} 719 720const Pipeline * MachEpilogNode::pipeline() const { 721 return MachNode::pipeline_class(); 722} 723 724int MachEpilogNode::safepoint_offset() const { return 0; } 725 726//============================================================================= 727 728enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 729static enum RC rc_class( OptoReg::Name reg ) { 730 731 if( !OptoReg::is_valid(reg) ) return rc_bad; 732 if (OptoReg::is_stack(reg)) return rc_stack; 733 734 VMReg r = OptoReg::as_VMReg(reg); 735 if (r->is_Register()) return rc_int; 736 if (r->is_FloatRegister()) { 737 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 738 return rc_float; 739 } 740 assert(r->is_XMMRegister(), "must be"); 741 return rc_xmm; 742} 743 744static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 745 int opcode, const char *op_str, int size, outputStream* st ) { 746 if( cbuf ) { 747 emit_opcode (*cbuf, opcode ); 748 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false); 749#ifndef PRODUCT 750 } else if( !do_size ) { 751 if( size != 0 ) st->print("\n\t"); 752 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 753 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 754 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 755 } else { // FLD, FST, PUSH, POP 756 st->print("%s [ESP + #%d]",op_str,offset); 757 } 758#endif 759 } 760 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 761 return size+3+offset_size; 762} 763 764// Helper for XMM registers. Extra opcode bits, limited syntax. 765static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 766 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 767 if( cbuf ) { 768 if( reg_lo+1 == reg_hi ) { // double move? 769 if( is_load && !UseXmmLoadAndClearUpper ) 770 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load 771 else 772 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise 773 } else { 774 emit_opcode(*cbuf, 0xF3 ); 775 } 776 emit_opcode(*cbuf, 0x0F ); 777 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) 778 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load 779 else 780 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); 781 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); 782#ifndef PRODUCT 783 } else if( !do_size ) { 784 if( size != 0 ) st->print("\n\t"); 785 if( reg_lo+1 == reg_hi ) { // double move? 786 if( is_load ) st->print("%s %s,[ESP + #%d]", 787 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 788 Matcher::regName[reg_lo], offset); 789 else st->print("MOVSD [ESP + #%d],%s", 790 offset, Matcher::regName[reg_lo]); 791 } else { 792 if( is_load ) st->print("MOVSS %s,[ESP + #%d]", 793 Matcher::regName[reg_lo], offset); 794 else st->print("MOVSS [ESP + #%d],%s", 795 offset, Matcher::regName[reg_lo]); 796 } 797#endif 798 } 799 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 800 return size+5+offset_size; 801} 802 803 804static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 805 int src_hi, int dst_hi, int size, outputStream* st ) { 806 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers 807 if( cbuf ) { 808 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { 809 emit_opcode(*cbuf, 0x66 ); 810 } 811 emit_opcode(*cbuf, 0x0F ); 812 emit_opcode(*cbuf, 0x28 ); 813 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 814#ifndef PRODUCT 815 } else if( !do_size ) { 816 if( size != 0 ) st->print("\n\t"); 817 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 818 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 819 } else { 820 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 821 } 822#endif 823 } 824 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); 825 } else { 826 if( cbuf ) { 827 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); 828 emit_opcode(*cbuf, 0x0F ); 829 emit_opcode(*cbuf, 0x10 ); 830 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 831#ifndef PRODUCT 832 } else if( !do_size ) { 833 if( size != 0 ) st->print("\n\t"); 834 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 835 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 836 } else { 837 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 838 } 839#endif 840 } 841 return size+4; 842 } 843} 844 845static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 846 int src_hi, int dst_hi, int size, outputStream* st ) { 847 // 32-bit 848 if (cbuf) { 849 emit_opcode(*cbuf, 0x66); 850 emit_opcode(*cbuf, 0x0F); 851 emit_opcode(*cbuf, 0x6E); 852 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); 853#ifndef PRODUCT 854 } else if (!do_size) { 855 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 856#endif 857 } 858 return 4; 859} 860 861 862static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 863 int src_hi, int dst_hi, int size, outputStream* st ) { 864 // 32-bit 865 if (cbuf) { 866 emit_opcode(*cbuf, 0x66); 867 emit_opcode(*cbuf, 0x0F); 868 emit_opcode(*cbuf, 0x7E); 869 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); 870#ifndef PRODUCT 871 } else if (!do_size) { 872 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 873#endif 874 } 875 return 4; 876} 877 878static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 879 if( cbuf ) { 880 emit_opcode(*cbuf, 0x8B ); 881 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 882#ifndef PRODUCT 883 } else if( !do_size ) { 884 if( size != 0 ) st->print("\n\t"); 885 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 886#endif 887 } 888 return size+2; 889} 890 891static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 892 int offset, int size, outputStream* st ) { 893 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 894 if( cbuf ) { 895 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 896 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 897#ifndef PRODUCT 898 } else if( !do_size ) { 899 if( size != 0 ) st->print("\n\t"); 900 st->print("FLD %s",Matcher::regName[src_lo]); 901#endif 902 } 903 size += 2; 904 } 905 906 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 907 const char *op_str; 908 int op; 909 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 910 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 911 op = 0xDD; 912 } else { // 32-bit store 913 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 914 op = 0xD9; 915 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 916 } 917 918 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 919} 920 921uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 922 // Get registers to move 923 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 924 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 925 OptoReg::Name dst_second = ra_->get_reg_second(this ); 926 OptoReg::Name dst_first = ra_->get_reg_first(this ); 927 928 enum RC src_second_rc = rc_class(src_second); 929 enum RC src_first_rc = rc_class(src_first); 930 enum RC dst_second_rc = rc_class(dst_second); 931 enum RC dst_first_rc = rc_class(dst_first); 932 933 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 934 935 // Generate spill code! 936 int size = 0; 937 938 if( src_first == dst_first && src_second == dst_second ) 939 return size; // Self copy, no move 940 941 // -------------------------------------- 942 // Check for mem-mem move. push/pop to move. 943 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 944 if( src_second == dst_first ) { // overlapping stack copy ranges 945 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 946 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 947 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 948 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 949 } 950 // move low bits 951 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 952 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 953 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 954 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 955 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 956 } 957 return size; 958 } 959 960 // -------------------------------------- 961 // Check for integer reg-reg copy 962 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 963 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 964 965 // Check for integer store 966 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 967 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 968 969 // Check for integer load 970 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 971 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 972 973 // Check for integer reg-xmm reg copy 974 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 975 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 976 "no 64 bit integer-float reg moves" ); 977 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 978 } 979 // -------------------------------------- 980 // Check for float reg-reg copy 981 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 982 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 983 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 984 if( cbuf ) { 985 986 // Note the mucking with the register encode to compensate for the 0/1 987 // indexing issue mentioned in a comment in the reg_def sections 988 // for FPR registers many lines above here. 989 990 if( src_first != FPR1L_num ) { 991 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 992 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 993 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 994 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 995 } else { 996 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 997 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 998 } 999#ifndef PRODUCT 1000 } else if( !do_size ) { 1001 if( size != 0 ) st->print("\n\t"); 1002 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1003 else st->print( "FST %s", Matcher::regName[dst_first]); 1004#endif 1005 } 1006 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1007 } 1008 1009 // Check for float store 1010 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1011 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1012 } 1013 1014 // Check for float load 1015 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1016 int offset = ra_->reg2offset(src_first); 1017 const char *op_str; 1018 int op; 1019 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1020 op_str = "FLD_D"; 1021 op = 0xDD; 1022 } else { // 32-bit load 1023 op_str = "FLD_S"; 1024 op = 0xD9; 1025 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1026 } 1027 if( cbuf ) { 1028 emit_opcode (*cbuf, op ); 1029 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false); 1030 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1031 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1032#ifndef PRODUCT 1033 } else if( !do_size ) { 1034 if( size != 0 ) st->print("\n\t"); 1035 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1036#endif 1037 } 1038 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1039 return size + 3+offset_size+2; 1040 } 1041 1042 // Check for xmm reg-reg copy 1043 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1044 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1045 (src_first+1 == src_second && dst_first+1 == dst_second), 1046 "no non-adjacent float-moves" ); 1047 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1048 } 1049 1050 // Check for xmm reg-integer reg copy 1051 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1052 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1053 "no 64 bit float-integer reg moves" ); 1054 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1055 } 1056 1057 // Check for xmm store 1058 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1059 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1060 } 1061 1062 // Check for float xmm load 1063 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1064 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1065 } 1066 1067 // Copy from float reg to xmm reg 1068 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1069 // copy to the top of stack from floating point reg 1070 // and use LEA to preserve flags 1071 if( cbuf ) { 1072 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1073 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1074 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1075 emit_d8(*cbuf,0xF8); 1076#ifndef PRODUCT 1077 } else if( !do_size ) { 1078 if( size != 0 ) st->print("\n\t"); 1079 st->print("LEA ESP,[ESP-8]"); 1080#endif 1081 } 1082 size += 4; 1083 1084 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1085 1086 // Copy from the temp memory to the xmm reg. 1087 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1088 1089 if( cbuf ) { 1090 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1091 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1092 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1093 emit_d8(*cbuf,0x08); 1094#ifndef PRODUCT 1095 } else if( !do_size ) { 1096 if( size != 0 ) st->print("\n\t"); 1097 st->print("LEA ESP,[ESP+8]"); 1098#endif 1099 } 1100 size += 4; 1101 return size; 1102 } 1103 1104 assert( size > 0, "missed a case" ); 1105 1106 // -------------------------------------------------------------------- 1107 // Check for second bits still needing moving. 1108 if( src_second == dst_second ) 1109 return size; // Self copy; no move 1110 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1111 1112 // Check for second word int-int move 1113 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1114 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1115 1116 // Check for second word integer store 1117 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1118 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1119 1120 // Check for second word integer load 1121 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1122 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1123 1124 1125 Unimplemented(); 1126} 1127 1128#ifndef PRODUCT 1129void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1130 implementation( NULL, ra_, false, st ); 1131} 1132#endif 1133 1134void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1135 implementation( &cbuf, ra_, false, NULL ); 1136} 1137 1138uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1139 return implementation( NULL, ra_, true, NULL ); 1140} 1141 1142//============================================================================= 1143#ifndef PRODUCT 1144void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { 1145 st->print("NOP \t# %d bytes pad for loops and calls", _count); 1146} 1147#endif 1148 1149void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { 1150 MacroAssembler _masm(&cbuf); 1151 __ nop(_count); 1152} 1153 1154uint MachNopNode::size(PhaseRegAlloc *) const { 1155 return _count; 1156} 1157 1158 1159//============================================================================= 1160#ifndef PRODUCT 1161void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1162 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1163 int reg = ra_->get_reg_first(this); 1164 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1165} 1166#endif 1167 1168void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1169 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1170 int reg = ra_->get_encode(this); 1171 if( offset >= 128 ) { 1172 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1173 emit_rm(cbuf, 0x2, reg, 0x04); 1174 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1175 emit_d32(cbuf, offset); 1176 } 1177 else { 1178 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1179 emit_rm(cbuf, 0x1, reg, 0x04); 1180 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1181 emit_d8(cbuf, offset); 1182 } 1183} 1184 1185uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1186 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1187 if( offset >= 128 ) { 1188 return 7; 1189 } 1190 else { 1191 return 4; 1192 } 1193} 1194 1195//============================================================================= 1196 1197// emit call stub, compiled java to interpreter 1198void emit_java_to_interp(CodeBuffer &cbuf ) { 1199 // Stub is fixed up when the corresponding call is converted from calling 1200 // compiled code to calling interpreted code. 1201 // mov rbx,0 1202 // jmp -1 1203 1204 address mark = cbuf.insts_mark(); // get mark within main instrs section 1205 1206 // Note that the code buffer's insts_mark is always relative to insts. 1207 // That's why we must use the macroassembler to generate a stub. 1208 MacroAssembler _masm(&cbuf); 1209 1210 address base = 1211 __ start_a_stub(Compile::MAX_stubs_size); 1212 if (base == NULL) return; // CodeBuffer::expand failed 1213 // static stub relocation stores the instruction address of the call 1214 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32); 1215 // static stub relocation also tags the methodOop in the code-stream. 1216 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time 1217 // This is recognized as unresolved by relocs/nativeInst/ic code 1218 __ jump(RuntimeAddress(__ pc())); 1219 1220 __ end_a_stub(); 1221 // Update current stubs pointer and restore insts_end. 1222} 1223// size of call stub, compiled java to interpretor 1224uint size_java_to_interp() { 1225 return 10; // movl; jmp 1226} 1227// relocation entries for call stub, compiled java to interpretor 1228uint reloc_java_to_interp() { 1229 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call 1230} 1231 1232//============================================================================= 1233#ifndef PRODUCT 1234void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1235 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1236 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1237 st->print_cr("\tNOP"); 1238 st->print_cr("\tNOP"); 1239 if( !OptoBreakpoint ) 1240 st->print_cr("\tNOP"); 1241} 1242#endif 1243 1244void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1245 MacroAssembler masm(&cbuf); 1246#ifdef ASSERT 1247 uint insts_size = cbuf.insts_size(); 1248#endif 1249 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1250 masm.jump_cc(Assembler::notEqual, 1251 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1252 /* WARNING these NOPs are critical so that verified entry point is properly 1253 aligned for patching by NativeJump::patch_verified_entry() */ 1254 int nops_cnt = 2; 1255 if( !OptoBreakpoint ) // Leave space for int3 1256 nops_cnt += 1; 1257 masm.nop(nops_cnt); 1258 1259 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1260} 1261 1262uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1263 return OptoBreakpoint ? 11 : 12; 1264} 1265 1266 1267//============================================================================= 1268uint size_exception_handler() { 1269 // NativeCall instruction size is the same as NativeJump. 1270 // exception handler starts out as jump and can be patched to 1271 // a call be deoptimization. (4932387) 1272 // Note that this value is also credited (in output.cpp) to 1273 // the size of the code section. 1274 return NativeJump::instruction_size; 1275} 1276 1277// Emit exception handler code. Stuff framesize into a register 1278// and call a VM stub routine. 1279int emit_exception_handler(CodeBuffer& cbuf) { 1280 1281 // Note that the code buffer's insts_mark is always relative to insts. 1282 // That's why we must use the macroassembler to generate a handler. 1283 MacroAssembler _masm(&cbuf); 1284 address base = 1285 __ start_a_stub(size_exception_handler()); 1286 if (base == NULL) return 0; // CodeBuffer::expand failed 1287 int offset = __ offset(); 1288 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1289 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1290 __ end_a_stub(); 1291 return offset; 1292} 1293 1294uint size_deopt_handler() { 1295 // NativeCall instruction size is the same as NativeJump. 1296 // exception handler starts out as jump and can be patched to 1297 // a call be deoptimization. (4932387) 1298 // Note that this value is also credited (in output.cpp) to 1299 // the size of the code section. 1300 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1301} 1302 1303// Emit deopt handler code. 1304int emit_deopt_handler(CodeBuffer& cbuf) { 1305 1306 // Note that the code buffer's insts_mark is always relative to insts. 1307 // That's why we must use the macroassembler to generate a handler. 1308 MacroAssembler _masm(&cbuf); 1309 address base = 1310 __ start_a_stub(size_exception_handler()); 1311 if (base == NULL) return 0; // CodeBuffer::expand failed 1312 int offset = __ offset(); 1313 InternalAddress here(__ pc()); 1314 __ pushptr(here.addr()); 1315 1316 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1317 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1318 __ end_a_stub(); 1319 return offset; 1320} 1321 1322 1323static void emit_double_constant(CodeBuffer& cbuf, double x) { 1324 int mark = cbuf.insts()->mark_off(); 1325 MacroAssembler _masm(&cbuf); 1326 address double_address = __ double_constant(x); 1327 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift 1328 emit_d32_reloc(cbuf, 1329 (int)double_address, 1330 internal_word_Relocation::spec(double_address), 1331 RELOC_DISP32); 1332} 1333 1334static void emit_float_constant(CodeBuffer& cbuf, float x) { 1335 int mark = cbuf.insts()->mark_off(); 1336 MacroAssembler _masm(&cbuf); 1337 address float_address = __ float_constant(x); 1338 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift 1339 emit_d32_reloc(cbuf, 1340 (int)float_address, 1341 internal_word_Relocation::spec(float_address), 1342 RELOC_DISP32); 1343} 1344 1345 1346const bool Matcher::match_rule_supported(int opcode) { 1347 if (!has_match_rule(opcode)) 1348 return false; 1349 1350 return true; // Per default match rules are supported. 1351} 1352 1353int Matcher::regnum_to_fpu_offset(int regnum) { 1354 return regnum - 32; // The FP registers are in the second chunk 1355} 1356 1357bool is_positive_zero_float(jfloat f) { 1358 return jint_cast(f) == jint_cast(0.0F); 1359} 1360 1361bool is_positive_one_float(jfloat f) { 1362 return jint_cast(f) == jint_cast(1.0F); 1363} 1364 1365bool is_positive_zero_double(jdouble d) { 1366 return jlong_cast(d) == jlong_cast(0.0); 1367} 1368 1369bool is_positive_one_double(jdouble d) { 1370 return jlong_cast(d) == jlong_cast(1.0); 1371} 1372 1373// This is UltraSparc specific, true just means we have fast l2f conversion 1374const bool Matcher::convL2FSupported(void) { 1375 return true; 1376} 1377 1378// Vector width in bytes 1379const uint Matcher::vector_width_in_bytes(void) { 1380 return UseSSE >= 2 ? 8 : 0; 1381} 1382 1383// Vector ideal reg 1384const uint Matcher::vector_ideal_reg(void) { 1385 return Op_RegD; 1386} 1387 1388// Is this branch offset short enough that a short branch can be used? 1389// 1390// NOTE: If the platform does not provide any short branch variants, then 1391// this method should return false for offset 0. 1392bool Matcher::is_short_branch_offset(int rule, int offset) { 1393 // the short version of jmpConUCF2 contains multiple branches, 1394 // making the reach slightly less 1395 if (rule == jmpConUCF2_rule) 1396 return (-126 <= offset && offset <= 125); 1397 return (-128 <= offset && offset <= 127); 1398} 1399 1400const bool Matcher::isSimpleConstant64(jlong value) { 1401 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1402 return false; 1403} 1404 1405// The ecx parameter to rep stos for the ClearArray node is in dwords. 1406const bool Matcher::init_array_count_is_in_bytes = false; 1407 1408// Threshold size for cleararray. 1409const int Matcher::init_array_short_size = 8 * BytesPerLong; 1410 1411// Should the Matcher clone shifts on addressing modes, expecting them to 1412// be subsumed into complex addressing expressions or compute them into 1413// registers? True for Intel but false for most RISCs 1414const bool Matcher::clone_shift_expressions = true; 1415 1416bool Matcher::narrow_oop_use_complex_address() { 1417 ShouldNotCallThis(); 1418 return true; 1419} 1420 1421 1422// Is it better to copy float constants, or load them directly from memory? 1423// Intel can load a float constant from a direct address, requiring no 1424// extra registers. Most RISCs will have to materialize an address into a 1425// register first, so they would do better to copy the constant from stack. 1426const bool Matcher::rematerialize_float_constants = true; 1427 1428// If CPU can load and store mis-aligned doubles directly then no fixup is 1429// needed. Else we split the double into 2 integer pieces and move it 1430// piece-by-piece. Only happens when passing doubles into C code as the 1431// Java calling convention forces doubles to be aligned. 1432const bool Matcher::misaligned_doubles_ok = true; 1433 1434 1435void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1436 // Get the memory operand from the node 1437 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1438 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1439 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1440 uint opcnt = 1; // First operand 1441 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1442 while( idx >= skipped+num_edges ) { 1443 skipped += num_edges; 1444 opcnt++; // Bump operand count 1445 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1446 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1447 } 1448 1449 MachOper *memory = node->_opnds[opcnt]; 1450 MachOper *new_memory = NULL; 1451 switch (memory->opcode()) { 1452 case DIRECT: 1453 case INDOFFSET32X: 1454 // No transformation necessary. 1455 return; 1456 case INDIRECT: 1457 new_memory = new (C) indirect_win95_safeOper( ); 1458 break; 1459 case INDOFFSET8: 1460 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1461 break; 1462 case INDOFFSET32: 1463 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1464 break; 1465 case INDINDEXOFFSET: 1466 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1467 break; 1468 case INDINDEXSCALE: 1469 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale()); 1470 break; 1471 case INDINDEXSCALEOFFSET: 1472 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1473 break; 1474 case LOAD_LONG_INDIRECT: 1475 case LOAD_LONG_INDOFFSET32: 1476 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1477 return; 1478 default: 1479 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1480 return; 1481 } 1482 node->_opnds[opcnt] = new_memory; 1483} 1484 1485// Advertise here if the CPU requires explicit rounding operations 1486// to implement the UseStrictFP mode. 1487const bool Matcher::strict_fp_requires_explicit_rounding = true; 1488 1489// Are floats conerted to double when stored to stack during deoptimization? 1490// On x32 it is stored with convertion only when FPU is used for floats. 1491bool Matcher::float_in_double() { return (UseSSE == 0); } 1492 1493// Do ints take an entire long register or just half? 1494const bool Matcher::int_in_long = false; 1495 1496// Return whether or not this register is ever used as an argument. This 1497// function is used on startup to build the trampoline stubs in generateOptoStub. 1498// Registers not mentioned will be killed by the VM call in the trampoline, and 1499// arguments in those registers not be available to the callee. 1500bool Matcher::can_be_java_arg( int reg ) { 1501 if( reg == ECX_num || reg == EDX_num ) return true; 1502 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true; 1503 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1504 return false; 1505} 1506 1507bool Matcher::is_spillable_arg( int reg ) { 1508 return can_be_java_arg(reg); 1509} 1510 1511// Register for DIVI projection of divmodI 1512RegMask Matcher::divI_proj_mask() { 1513 return EAX_REG_mask; 1514} 1515 1516// Register for MODI projection of divmodI 1517RegMask Matcher::modI_proj_mask() { 1518 return EDX_REG_mask; 1519} 1520 1521// Register for DIVL projection of divmodL 1522RegMask Matcher::divL_proj_mask() { 1523 ShouldNotReachHere(); 1524 return RegMask(); 1525} 1526 1527// Register for MODL projection of divmodL 1528RegMask Matcher::modL_proj_mask() { 1529 ShouldNotReachHere(); 1530 return RegMask(); 1531} 1532 1533const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1534 return EBP_REG_mask; 1535} 1536 1537// Returns true if the high 32 bits of the value is known to be zero. 1538bool is_operand_hi32_zero(Node* n) { 1539 int opc = n->Opcode(); 1540 if (opc == Op_LoadUI2L) { 1541 return true; 1542 } 1543 if (opc == Op_AndL) { 1544 Node* o2 = n->in(2); 1545 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1546 return true; 1547 } 1548 } 1549 return false; 1550} 1551 1552%} 1553 1554//----------ENCODING BLOCK----------------------------------------------------- 1555// This block specifies the encoding classes used by the compiler to output 1556// byte streams. Encoding classes generate functions which are called by 1557// Machine Instruction Nodes in order to generate the bit encoding of the 1558// instruction. Operands specify their base encoding interface with the 1559// interface keyword. There are currently supported four interfaces, 1560// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1561// operand to generate a function which returns its register number when 1562// queried. CONST_INTER causes an operand to generate a function which 1563// returns the value of the constant when queried. MEMORY_INTER causes an 1564// operand to generate four functions which return the Base Register, the 1565// Index Register, the Scale Value, and the Offset Value of the operand when 1566// queried. COND_INTER causes an operand to generate six functions which 1567// return the encoding code (ie - encoding bits for the instruction) 1568// associated with each basic boolean condition for a conditional instruction. 1569// Instructions specify two basic values for encoding. They use the 1570// ins_encode keyword to specify their encoding class (which must be one of 1571// the class names specified in the encoding block), and they use the 1572// opcode keyword to specify, in order, their primary, secondary, and 1573// tertiary opcode. Only the opcode sections which a particular instruction 1574// needs for encoding need to be specified. 1575encode %{ 1576 // Build emit functions for each basic byte or larger field in the intel 1577 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1578 // code in the enc_class source block. Emit functions will live in the 1579 // main source block for now. In future, we can generalize this by 1580 // adding a syntax that specifies the sizes of fields in an order, 1581 // so that the adlc can build the emit functions automagically 1582 1583 // Emit primary opcode 1584 enc_class OpcP %{ 1585 emit_opcode(cbuf, $primary); 1586 %} 1587 1588 // Emit secondary opcode 1589 enc_class OpcS %{ 1590 emit_opcode(cbuf, $secondary); 1591 %} 1592 1593 // Emit opcode directly 1594 enc_class Opcode(immI d8) %{ 1595 emit_opcode(cbuf, $d8$$constant); 1596 %} 1597 1598 enc_class SizePrefix %{ 1599 emit_opcode(cbuf,0x66); 1600 %} 1601 1602 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 1603 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1604 %} 1605 1606 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many) 1607 emit_opcode(cbuf,$opcode$$constant); 1608 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1609 %} 1610 1611 enc_class mov_r32_imm0( eRegI dst ) %{ 1612 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1613 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1614 %} 1615 1616 enc_class cdq_enc %{ 1617 // Full implementation of Java idiv and irem; checks for 1618 // special case as described in JVM spec., p.243 & p.271. 1619 // 1620 // normal case special case 1621 // 1622 // input : rax,: dividend min_int 1623 // reg: divisor -1 1624 // 1625 // output: rax,: quotient (= rax, idiv reg) min_int 1626 // rdx: remainder (= rax, irem reg) 0 1627 // 1628 // Code sequnce: 1629 // 1630 // 81 F8 00 00 00 80 cmp rax,80000000h 1631 // 0F 85 0B 00 00 00 jne normal_case 1632 // 33 D2 xor rdx,edx 1633 // 83 F9 FF cmp rcx,0FFh 1634 // 0F 84 03 00 00 00 je done 1635 // normal_case: 1636 // 99 cdq 1637 // F7 F9 idiv rax,ecx 1638 // done: 1639 // 1640 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1641 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1642 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1643 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1644 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1645 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1646 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1647 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1648 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1649 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1650 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1651 // normal_case: 1652 emit_opcode(cbuf,0x99); // cdq 1653 // idiv (note: must be emitted by the user of this rule) 1654 // normal: 1655 %} 1656 1657 // Dense encoding for older common ops 1658 enc_class Opc_plus(immI opcode, eRegI reg) %{ 1659 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1660 %} 1661 1662 1663 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1664 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1665 // Check for 8-bit immediate, and set sign extend bit in opcode 1666 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1667 emit_opcode(cbuf, $primary | 0x02); 1668 } 1669 else { // If 32-bit immediate 1670 emit_opcode(cbuf, $primary); 1671 } 1672 %} 1673 1674 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m 1675 // Emit primary opcode and set sign-extend bit 1676 // Check for 8-bit immediate, and set sign extend bit in opcode 1677 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1678 emit_opcode(cbuf, $primary | 0x02); } 1679 else { // If 32-bit immediate 1680 emit_opcode(cbuf, $primary); 1681 } 1682 // Emit r/m byte with secondary opcode, after primary opcode. 1683 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1684 %} 1685 1686 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1687 // Check for 8-bit immediate, and set sign extend bit in opcode 1688 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1689 $$$emit8$imm$$constant; 1690 } 1691 else { // If 32-bit immediate 1692 // Output immediate 1693 $$$emit32$imm$$constant; 1694 } 1695 %} 1696 1697 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1698 // Emit primary opcode and set sign-extend bit 1699 // Check for 8-bit immediate, and set sign extend bit in opcode 1700 int con = (int)$imm$$constant; // Throw away top bits 1701 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1702 // Emit r/m byte with secondary opcode, after primary opcode. 1703 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1704 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1705 else emit_d32(cbuf,con); 1706 %} 1707 1708 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1709 // Emit primary opcode and set sign-extend bit 1710 // Check for 8-bit immediate, and set sign extend bit in opcode 1711 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1712 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1713 // Emit r/m byte with tertiary opcode, after primary opcode. 1714 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1715 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1716 else emit_d32(cbuf,con); 1717 %} 1718 1719 enc_class Lbl (label labl) %{ // JMP, CALL 1720 Label *l = $labl$$label; 1721 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0); 1722 %} 1723 1724 enc_class LblShort (label labl) %{ // JMP, CALL 1725 Label *l = $labl$$label; 1726 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0; 1727 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 1728 emit_d8(cbuf, disp); 1729 %} 1730 1731 enc_class OpcSReg (eRegI dst) %{ // BSWAP 1732 emit_cc(cbuf, $secondary, $dst$$reg ); 1733 %} 1734 1735 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1736 int destlo = $dst$$reg; 1737 int desthi = HIGH_FROM_LOW(destlo); 1738 // bswap lo 1739 emit_opcode(cbuf, 0x0F); 1740 emit_cc(cbuf, 0xC8, destlo); 1741 // bswap hi 1742 emit_opcode(cbuf, 0x0F); 1743 emit_cc(cbuf, 0xC8, desthi); 1744 // xchg lo and hi 1745 emit_opcode(cbuf, 0x87); 1746 emit_rm(cbuf, 0x3, destlo, desthi); 1747 %} 1748 1749 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1750 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1751 %} 1752 1753 enc_class Jcc (cmpOp cop, label labl) %{ // JCC 1754 Label *l = $labl$$label; 1755 $$$emit8$primary; 1756 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1757 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0); 1758 %} 1759 1760 enc_class JccShort (cmpOp cop, label labl) %{ // JCC 1761 Label *l = $labl$$label; 1762 emit_cc(cbuf, $primary, $cop$$cmpcode); 1763 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0; 1764 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 1765 emit_d8(cbuf, disp); 1766 %} 1767 1768 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1769 $$$emit8$primary; 1770 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1771 %} 1772 1773 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 1774 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1775 emit_d8(cbuf, op >> 8 ); 1776 emit_d8(cbuf, op & 255); 1777 %} 1778 1779 // emulate a CMOV with a conditional branch around a MOV 1780 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1781 // Invert sense of branch from sense of CMOV 1782 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1783 emit_d8( cbuf, $brOffs$$constant ); 1784 %} 1785 1786 enc_class enc_PartialSubtypeCheck( ) %{ 1787 Register Redi = as_Register(EDI_enc); // result register 1788 Register Reax = as_Register(EAX_enc); // super class 1789 Register Recx = as_Register(ECX_enc); // killed 1790 Register Resi = as_Register(ESI_enc); // sub class 1791 Label miss; 1792 1793 MacroAssembler _masm(&cbuf); 1794 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1795 NULL, &miss, 1796 /*set_cond_codes:*/ true); 1797 if ($primary) { 1798 __ xorptr(Redi, Redi); 1799 } 1800 __ bind(miss); 1801 %} 1802 1803 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1804 MacroAssembler masm(&cbuf); 1805 int start = masm.offset(); 1806 if (UseSSE >= 2) { 1807 if (VerifyFPU) { 1808 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1809 } 1810 } else { 1811 // External c_calling_convention expects the FPU stack to be 'clean'. 1812 // Compiled code leaves it dirty. Do cleanup now. 1813 masm.empty_FPU_stack(); 1814 } 1815 if (sizeof_FFree_Float_Stack_All == -1) { 1816 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1817 } else { 1818 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1819 } 1820 %} 1821 1822 enc_class Verify_FPU_For_Leaf %{ 1823 if( VerifyFPU ) { 1824 MacroAssembler masm(&cbuf); 1825 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1826 } 1827 %} 1828 1829 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1830 // This is the instruction starting address for relocation info. 1831 cbuf.set_insts_mark(); 1832 $$$emit8$primary; 1833 // CALL directly to the runtime 1834 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1835 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1836 1837 if (UseSSE >= 2) { 1838 MacroAssembler _masm(&cbuf); 1839 BasicType rt = tf()->return_type(); 1840 1841 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1842 // A C runtime call where the return value is unused. In SSE2+ 1843 // mode the result needs to be removed from the FPU stack. It's 1844 // likely that this function call could be removed by the 1845 // optimizer if the C function is a pure function. 1846 __ ffree(0); 1847 } else if (rt == T_FLOAT) { 1848 __ lea(rsp, Address(rsp, -4)); 1849 __ fstp_s(Address(rsp, 0)); 1850 __ movflt(xmm0, Address(rsp, 0)); 1851 __ lea(rsp, Address(rsp, 4)); 1852 } else if (rt == T_DOUBLE) { 1853 __ lea(rsp, Address(rsp, -8)); 1854 __ fstp_d(Address(rsp, 0)); 1855 __ movdbl(xmm0, Address(rsp, 0)); 1856 __ lea(rsp, Address(rsp, 8)); 1857 } 1858 } 1859 %} 1860 1861 1862 enc_class pre_call_FPU %{ 1863 // If method sets FPU control word restore it here 1864 debug_only(int off0 = cbuf.insts_size()); 1865 if( Compile::current()->in_24_bit_fp_mode() ) { 1866 MacroAssembler masm(&cbuf); 1867 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1868 } 1869 debug_only(int off1 = cbuf.insts_size()); 1870 assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction"); 1871 %} 1872 1873 enc_class post_call_FPU %{ 1874 // If method sets FPU control word do it here also 1875 if( Compile::current()->in_24_bit_fp_mode() ) { 1876 MacroAssembler masm(&cbuf); 1877 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1878 } 1879 %} 1880 1881 enc_class preserve_SP %{ 1882 debug_only(int off0 = cbuf.insts_size()); 1883 MacroAssembler _masm(&cbuf); 1884 // RBP is preserved across all calls, even compiled calls. 1885 // Use it to preserve RSP in places where the callee might change the SP. 1886 __ movptr(rbp_mh_SP_save, rsp); 1887 debug_only(int off1 = cbuf.insts_size()); 1888 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 1889 %} 1890 1891 enc_class restore_SP %{ 1892 MacroAssembler _masm(&cbuf); 1893 __ movptr(rsp, rbp_mh_SP_save); 1894 %} 1895 1896 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1897 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1898 // who we intended to call. 1899 cbuf.set_insts_mark(); 1900 $$$emit8$primary; 1901 if ( !_method ) { 1902 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1903 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1904 } else if(_optimized_virtual) { 1905 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1906 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1907 } else { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 static_call_Relocation::spec(), RELOC_IMM32 ); 1910 } 1911 if( _method ) { // Emit stub for static call 1912 emit_java_to_interp(cbuf); 1913 } 1914 %} 1915 1916 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1917 // !!!!! 1918 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info 1919 // emit_call_dynamic_prologue( cbuf ); 1920 cbuf.set_insts_mark(); 1921 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1 1922 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32); 1923 address virtual_call_oop_addr = cbuf.insts_mark(); 1924 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1925 // who we intended to call. 1926 cbuf.set_insts_mark(); 1927 $$$emit8$primary; 1928 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1929 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 ); 1930 %} 1931 1932 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1933 int disp = in_bytes(methodOopDesc::from_compiled_offset()); 1934 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1935 1936 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())] 1937 cbuf.set_insts_mark(); 1938 $$$emit8$primary; 1939 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1940 emit_d8(cbuf, disp); // Displacement 1941 1942 %} 1943 1944 enc_class Xor_Reg (eRegI dst) %{ 1945 emit_opcode(cbuf, 0x33); 1946 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 1947 %} 1948 1949// Following encoding is no longer used, but may be restored if calling 1950// convention changes significantly. 1951// Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1952// 1953// enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1954// // int ic_reg = Matcher::inline_cache_reg(); 1955// // int ic_encode = Matcher::_regEncode[ic_reg]; 1956// // int imo_reg = Matcher::interpreter_method_oop_reg(); 1957// // int imo_encode = Matcher::_regEncode[imo_reg]; 1958// 1959// // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1960// // // so we load it immediately before the call 1961// // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1962// // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1963// 1964// // xor rbp,ebp 1965// emit_opcode(cbuf, 0x33); 1966// emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1967// 1968// // CALL to interpreter. 1969// cbuf.set_insts_mark(); 1970// $$$emit8$primary; 1971// emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1972// runtime_call_Relocation::spec(), RELOC_IMM32 ); 1973// %} 1974 1975 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1976 $$$emit8$primary; 1977 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1978 $$$emit8$shift$$constant; 1979 %} 1980 1981 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate 1982 // Load immediate does not have a zero or sign extended version 1983 // for 8-bit immediates 1984 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1985 $$$emit32$src$$constant; 1986 %} 1987 1988 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate 1989 // Load immediate does not have a zero or sign extended version 1990 // for 8-bit immediates 1991 emit_opcode(cbuf, $primary + $dst$$reg); 1992 $$$emit32$src$$constant; 1993 %} 1994 1995 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1996 // Load immediate does not have a zero or sign extended version 1997 // for 8-bit immediates 1998 int dst_enc = $dst$$reg; 1999 int src_con = $src$$constant & 0x0FFFFFFFFL; 2000 if (src_con == 0) { 2001 // xor dst, dst 2002 emit_opcode(cbuf, 0x33); 2003 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2004 } else { 2005 emit_opcode(cbuf, $primary + dst_enc); 2006 emit_d32(cbuf, src_con); 2007 } 2008 %} 2009 2010 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2011 // Load immediate does not have a zero or sign extended version 2012 // for 8-bit immediates 2013 int dst_enc = $dst$$reg + 2; 2014 int src_con = ((julong)($src$$constant)) >> 32; 2015 if (src_con == 0) { 2016 // xor dst, dst 2017 emit_opcode(cbuf, 0x33); 2018 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2019 } else { 2020 emit_opcode(cbuf, $primary + dst_enc); 2021 emit_d32(cbuf, src_con); 2022 } 2023 %} 2024 2025 2026 enc_class LdImmD (immD src) %{ // Load Immediate 2027 if( is_positive_zero_double($src$$constant)) { 2028 // FLDZ 2029 emit_opcode(cbuf,0xD9); 2030 emit_opcode(cbuf,0xEE); 2031 } else if( is_positive_one_double($src$$constant)) { 2032 // FLD1 2033 emit_opcode(cbuf,0xD9); 2034 emit_opcode(cbuf,0xE8); 2035 } else { 2036 emit_opcode(cbuf,0xDD); 2037 emit_rm(cbuf, 0x0, 0x0, 0x5); 2038 emit_double_constant(cbuf, $src$$constant); 2039 } 2040 %} 2041 2042 2043 enc_class LdImmF (immF src) %{ // Load Immediate 2044 if( is_positive_zero_float($src$$constant)) { 2045 emit_opcode(cbuf,0xD9); 2046 emit_opcode(cbuf,0xEE); 2047 } else if( is_positive_one_float($src$$constant)) { 2048 emit_opcode(cbuf,0xD9); 2049 emit_opcode(cbuf,0xE8); 2050 } else { 2051 $$$emit8$primary; 2052 // Load immediate does not have a zero or sign extended version 2053 // for 8-bit immediates 2054 // First load to TOS, then move to dst 2055 emit_rm(cbuf, 0x0, 0x0, 0x5); 2056 emit_float_constant(cbuf, $src$$constant); 2057 } 2058 %} 2059 2060 enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate 2061 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 2062 emit_float_constant(cbuf, $con$$constant); 2063 %} 2064 2065 enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate 2066 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 2067 emit_double_constant(cbuf, $con$$constant); 2068 %} 2069 2070 enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant 2071 // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con) 2072 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 2073 emit_opcode(cbuf, 0x0F); 2074 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 2075 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 2076 emit_double_constant(cbuf, $con$$constant); 2077 %} 2078 2079 enc_class Opc_MemImm_F(immF src) %{ 2080 cbuf.set_insts_mark(); 2081 $$$emit8$primary; 2082 emit_rm(cbuf, 0x0, $secondary, 0x5); 2083 emit_float_constant(cbuf, $src$$constant); 2084 %} 2085 2086 2087 enc_class MovI2X_reg(regX dst, eRegI src) %{ 2088 emit_opcode(cbuf, 0x66 ); // MOVD dst,src 2089 emit_opcode(cbuf, 0x0F ); 2090 emit_opcode(cbuf, 0x6E ); 2091 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2092 %} 2093 2094 enc_class MovX2I_reg(eRegI dst, regX src) %{ 2095 emit_opcode(cbuf, 0x66 ); // MOVD dst,src 2096 emit_opcode(cbuf, 0x0F ); 2097 emit_opcode(cbuf, 0x7E ); 2098 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 2099 %} 2100 2101 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ 2102 { // MOVD $dst,$src.lo 2103 emit_opcode(cbuf,0x66); 2104 emit_opcode(cbuf,0x0F); 2105 emit_opcode(cbuf,0x6E); 2106 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2107 } 2108 { // MOVD $tmp,$src.hi 2109 emit_opcode(cbuf,0x66); 2110 emit_opcode(cbuf,0x0F); 2111 emit_opcode(cbuf,0x6E); 2112 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2113 } 2114 { // PUNPCKLDQ $dst,$tmp 2115 emit_opcode(cbuf,0x66); 2116 emit_opcode(cbuf,0x0F); 2117 emit_opcode(cbuf,0x62); 2118 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); 2119 } 2120 %} 2121 2122 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ 2123 { // MOVD $dst.lo,$src 2124 emit_opcode(cbuf,0x66); 2125 emit_opcode(cbuf,0x0F); 2126 emit_opcode(cbuf,0x7E); 2127 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 2128 } 2129 { // PSHUFLW $tmp,$src,0x4E (01001110b) 2130 emit_opcode(cbuf,0xF2); 2131 emit_opcode(cbuf,0x0F); 2132 emit_opcode(cbuf,0x70); 2133 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2134 emit_d8(cbuf, 0x4E); 2135 } 2136 { // MOVD $dst.hi,$tmp 2137 emit_opcode(cbuf,0x66); 2138 emit_opcode(cbuf,0x0F); 2139 emit_opcode(cbuf,0x7E); 2140 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 2141 } 2142 %} 2143 2144 2145 // Encode a reg-reg copy. If it is useless, then empty encoding. 2146 enc_class enc_Copy( eRegI dst, eRegI src ) %{ 2147 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2148 %} 2149 2150 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ 2151 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2152 %} 2153 2154 // Encode xmm reg-reg copy. If it is useless, then empty encoding. 2155 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ 2156 encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); 2157 %} 2158 2159 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 2160 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2161 %} 2162 2163 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2164 $$$emit8$primary; 2165 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2166 %} 2167 2168 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2169 $$$emit8$secondary; 2170 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2171 %} 2172 2173 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2174 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2175 %} 2176 2177 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2178 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2179 %} 2180 2181 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{ 2182 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2183 %} 2184 2185 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2186 // Output immediate 2187 $$$emit32$src$$constant; 2188 %} 2189 2190 enc_class Con32F_as_bits(immF src) %{ // storeF_imm 2191 // Output Float immediate bits 2192 jfloat jf = $src$$constant; 2193 int jf_as_bits = jint_cast( jf ); 2194 emit_d32(cbuf, jf_as_bits); 2195 %} 2196 2197 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 2198 // Output Float immediate bits 2199 jfloat jf = $src$$constant; 2200 int jf_as_bits = jint_cast( jf ); 2201 emit_d32(cbuf, jf_as_bits); 2202 %} 2203 2204 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2205 // Output immediate 2206 $$$emit16$src$$constant; 2207 %} 2208 2209 enc_class Con_d32(immI src) %{ 2210 emit_d32(cbuf,$src$$constant); 2211 %} 2212 2213 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2214 // Output immediate memory reference 2215 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2216 emit_d32(cbuf, 0x00); 2217 %} 2218 2219 enc_class lock_prefix( ) %{ 2220 if( os::is_MP() ) 2221 emit_opcode(cbuf,0xF0); // [Lock] 2222 %} 2223 2224 // Cmp-xchg long value. 2225 // Note: we need to swap rbx, and rcx before and after the 2226 // cmpxchg8 instruction because the instruction uses 2227 // rcx as the high order word of the new value to store but 2228 // our register encoding uses rbx,. 2229 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2230 2231 // XCHG rbx,ecx 2232 emit_opcode(cbuf,0x87); 2233 emit_opcode(cbuf,0xD9); 2234 // [Lock] 2235 if( os::is_MP() ) 2236 emit_opcode(cbuf,0xF0); 2237 // CMPXCHG8 [Eptr] 2238 emit_opcode(cbuf,0x0F); 2239 emit_opcode(cbuf,0xC7); 2240 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2241 // XCHG rbx,ecx 2242 emit_opcode(cbuf,0x87); 2243 emit_opcode(cbuf,0xD9); 2244 %} 2245 2246 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2247 // [Lock] 2248 if( os::is_MP() ) 2249 emit_opcode(cbuf,0xF0); 2250 2251 // CMPXCHG [Eptr] 2252 emit_opcode(cbuf,0x0F); 2253 emit_opcode(cbuf,0xB1); 2254 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2255 %} 2256 2257 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2258 int res_encoding = $res$$reg; 2259 2260 // MOV res,0 2261 emit_opcode( cbuf, 0xB8 + res_encoding); 2262 emit_d32( cbuf, 0 ); 2263 // JNE,s fail 2264 emit_opcode(cbuf,0x75); 2265 emit_d8(cbuf, 5 ); 2266 // MOV res,1 2267 emit_opcode( cbuf, 0xB8 + res_encoding); 2268 emit_d32( cbuf, 1 ); 2269 // fail: 2270 %} 2271 2272 enc_class set_instruction_start( ) %{ 2273 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2274 %} 2275 2276 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem 2277 int reg_encoding = $ereg$$reg; 2278 int base = $mem$$base; 2279 int index = $mem$$index; 2280 int scale = $mem$$scale; 2281 int displace = $mem$$disp; 2282 bool disp_is_oop = $mem->disp_is_oop(); 2283 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2284 %} 2285 2286 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2287 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2288 int base = $mem$$base; 2289 int index = $mem$$index; 2290 int scale = $mem$$scale; 2291 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2292 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" ); 2293 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/); 2294 %} 2295 2296 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2297 int r1, r2; 2298 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2299 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2300 emit_opcode(cbuf,0x0F); 2301 emit_opcode(cbuf,$tertiary); 2302 emit_rm(cbuf, 0x3, r1, r2); 2303 emit_d8(cbuf,$cnt$$constant); 2304 emit_d8(cbuf,$primary); 2305 emit_rm(cbuf, 0x3, $secondary, r1); 2306 emit_d8(cbuf,$cnt$$constant); 2307 %} 2308 2309 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2310 emit_opcode( cbuf, 0x8B ); // Move 2311 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2312 emit_d8(cbuf,$primary); 2313 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2314 emit_d8(cbuf,$cnt$$constant-32); 2315 emit_d8(cbuf,$primary); 2316 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2317 emit_d8(cbuf,31); 2318 %} 2319 2320 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2321 int r1, r2; 2322 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2323 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2324 2325 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2326 emit_rm(cbuf, 0x3, r1, r2); 2327 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2328 emit_opcode(cbuf,$primary); 2329 emit_rm(cbuf, 0x3, $secondary, r1); 2330 emit_d8(cbuf,$cnt$$constant-32); 2331 } 2332 emit_opcode(cbuf,0x33); // XOR r2,r2 2333 emit_rm(cbuf, 0x3, r2, r2); 2334 %} 2335 2336 // Clone of RegMem but accepts an extra parameter to access each 2337 // half of a double in memory; it never needs relocation info. 2338 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{ 2339 emit_opcode(cbuf,$opcode$$constant); 2340 int reg_encoding = $rm_reg$$reg; 2341 int base = $mem$$base; 2342 int index = $mem$$index; 2343 int scale = $mem$$scale; 2344 int displace = $mem$$disp + $disp_for_half$$constant; 2345 bool disp_is_oop = false; 2346 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2347 %} 2348 2349 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2350 // 2351 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2352 // and it never needs relocation information. 2353 // Frequently used to move data between FPU's Stack Top and memory. 2354 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2355 int rm_byte_opcode = $rm_opcode$$constant; 2356 int base = $mem$$base; 2357 int index = $mem$$index; 2358 int scale = $mem$$scale; 2359 int displace = $mem$$disp; 2360 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" ); 2361 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false); 2362 %} 2363 2364 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2365 int rm_byte_opcode = $rm_opcode$$constant; 2366 int base = $mem$$base; 2367 int index = $mem$$index; 2368 int scale = $mem$$scale; 2369 int displace = $mem$$disp; 2370 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 2371 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 2372 %} 2373 2374 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea 2375 int reg_encoding = $dst$$reg; 2376 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2377 int index = 0x04; // 0x04 indicates no index 2378 int scale = 0x00; // 0x00 indicates no scale 2379 int displace = $src1$$constant; // 0x00 indicates no displacement 2380 bool disp_is_oop = false; 2381 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2382 %} 2383 2384 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN 2385 // Compare dst,src 2386 emit_opcode(cbuf,0x3B); 2387 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2388 // jmp dst < src around move 2389 emit_opcode(cbuf,0x7C); 2390 emit_d8(cbuf,2); 2391 // move dst,src 2392 emit_opcode(cbuf,0x8B); 2393 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2394 %} 2395 2396 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX 2397 // Compare dst,src 2398 emit_opcode(cbuf,0x3B); 2399 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2400 // jmp dst > src around move 2401 emit_opcode(cbuf,0x7F); 2402 emit_d8(cbuf,2); 2403 // move dst,src 2404 emit_opcode(cbuf,0x8B); 2405 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2406 %} 2407 2408 enc_class enc_FP_store(memory mem, regD src) %{ 2409 // If src is FPR1, we can just FST to store it. 2410 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2411 int reg_encoding = 0x2; // Just store 2412 int base = $mem$$base; 2413 int index = $mem$$index; 2414 int scale = $mem$$scale; 2415 int displace = $mem$$disp; 2416 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 2417 if( $src$$reg != FPR1L_enc ) { 2418 reg_encoding = 0x3; // Store & pop 2419 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2420 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2421 } 2422 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2423 emit_opcode(cbuf,$primary); 2424 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2425 %} 2426 2427 enc_class neg_reg(eRegI dst) %{ 2428 // NEG $dst 2429 emit_opcode(cbuf,0xF7); 2430 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2431 %} 2432 2433 enc_class setLT_reg(eCXRegI dst) %{ 2434 // SETLT $dst 2435 emit_opcode(cbuf,0x0F); 2436 emit_opcode(cbuf,0x9C); 2437 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2438 %} 2439 2440 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2441 int tmpReg = $tmp$$reg; 2442 2443 // SUB $p,$q 2444 emit_opcode(cbuf,0x2B); 2445 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2446 // SBB $tmp,$tmp 2447 emit_opcode(cbuf,0x1B); 2448 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2449 // AND $tmp,$y 2450 emit_opcode(cbuf,0x23); 2451 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2452 // ADD $p,$tmp 2453 emit_opcode(cbuf,0x03); 2454 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2455 %} 2456 2457 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT 2458 int tmpReg = $tmp$$reg; 2459 2460 // SUB $p,$q 2461 emit_opcode(cbuf,0x2B); 2462 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2463 // SBB $tmp,$tmp 2464 emit_opcode(cbuf,0x1B); 2465 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2466 // AND $tmp,$y 2467 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2468 emit_opcode(cbuf,0x23); 2469 int reg_encoding = tmpReg; 2470 int base = $mem$$base; 2471 int index = $mem$$index; 2472 int scale = $mem$$scale; 2473 int displace = $mem$$disp; 2474 bool disp_is_oop = $mem->disp_is_oop(); 2475 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2476 // ADD $p,$tmp 2477 emit_opcode(cbuf,0x03); 2478 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2479 %} 2480 2481 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2482 // TEST shift,32 2483 emit_opcode(cbuf,0xF7); 2484 emit_rm(cbuf, 0x3, 0, ECX_enc); 2485 emit_d32(cbuf,0x20); 2486 // JEQ,s small 2487 emit_opcode(cbuf, 0x74); 2488 emit_d8(cbuf, 0x04); 2489 // MOV $dst.hi,$dst.lo 2490 emit_opcode( cbuf, 0x8B ); 2491 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2492 // CLR $dst.lo 2493 emit_opcode(cbuf, 0x33); 2494 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2495// small: 2496 // SHLD $dst.hi,$dst.lo,$shift 2497 emit_opcode(cbuf,0x0F); 2498 emit_opcode(cbuf,0xA5); 2499 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2500 // SHL $dst.lo,$shift" 2501 emit_opcode(cbuf,0xD3); 2502 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2503 %} 2504 2505 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2506 // TEST shift,32 2507 emit_opcode(cbuf,0xF7); 2508 emit_rm(cbuf, 0x3, 0, ECX_enc); 2509 emit_d32(cbuf,0x20); 2510 // JEQ,s small 2511 emit_opcode(cbuf, 0x74); 2512 emit_d8(cbuf, 0x04); 2513 // MOV $dst.lo,$dst.hi 2514 emit_opcode( cbuf, 0x8B ); 2515 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2516 // CLR $dst.hi 2517 emit_opcode(cbuf, 0x33); 2518 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2519// small: 2520 // SHRD $dst.lo,$dst.hi,$shift 2521 emit_opcode(cbuf,0x0F); 2522 emit_opcode(cbuf,0xAD); 2523 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2524 // SHR $dst.hi,$shift" 2525 emit_opcode(cbuf,0xD3); 2526 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2527 %} 2528 2529 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2530 // TEST shift,32 2531 emit_opcode(cbuf,0xF7); 2532 emit_rm(cbuf, 0x3, 0, ECX_enc); 2533 emit_d32(cbuf,0x20); 2534 // JEQ,s small 2535 emit_opcode(cbuf, 0x74); 2536 emit_d8(cbuf, 0x05); 2537 // MOV $dst.lo,$dst.hi 2538 emit_opcode( cbuf, 0x8B ); 2539 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2540 // SAR $dst.hi,31 2541 emit_opcode(cbuf, 0xC1); 2542 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2543 emit_d8(cbuf, 0x1F ); 2544// small: 2545 // SHRD $dst.lo,$dst.hi,$shift 2546 emit_opcode(cbuf,0x0F); 2547 emit_opcode(cbuf,0xAD); 2548 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2549 // SAR $dst.hi,$shift" 2550 emit_opcode(cbuf,0xD3); 2551 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2552 %} 2553 2554 2555 // ----------------- Encodings for floating point unit ----------------- 2556 // May leave result in FPU-TOS or FPU reg depending on opcodes 2557 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 2558 $$$emit8$primary; 2559 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2560 %} 2561 2562 // Pop argument in FPR0 with FSTP ST(0) 2563 enc_class PopFPU() %{ 2564 emit_opcode( cbuf, 0xDD ); 2565 emit_d8( cbuf, 0xD8 ); 2566 %} 2567 2568 // !!!!! equivalent to Pop_Reg_F 2569 enc_class Pop_Reg_D( regD dst ) %{ 2570 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2571 emit_d8( cbuf, 0xD8+$dst$$reg ); 2572 %} 2573 2574 enc_class Push_Reg_D( regD dst ) %{ 2575 emit_opcode( cbuf, 0xD9 ); 2576 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2577 %} 2578 2579 enc_class strictfp_bias1( regD dst ) %{ 2580 emit_opcode( cbuf, 0xDB ); // FLD m80real 2581 emit_opcode( cbuf, 0x2D ); 2582 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2583 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2584 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2585 %} 2586 2587 enc_class strictfp_bias2( regD dst ) %{ 2588 emit_opcode( cbuf, 0xDB ); // FLD m80real 2589 emit_opcode( cbuf, 0x2D ); 2590 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2591 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2592 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2593 %} 2594 2595 // Special case for moving an integer register to a stack slot. 2596 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS 2597 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2598 %} 2599 2600 // Special case for moving a register to a stack slot. 2601 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS 2602 // Opcode already emitted 2603 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2604 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2605 emit_d32(cbuf, $dst$$disp); // Displacement 2606 %} 2607 2608 // Push the integer in stackSlot 'src' onto FP-stack 2609 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2610 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2611 %} 2612 2613 // Push the float in stackSlot 'src' onto FP-stack 2614 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] 2615 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); 2616 %} 2617 2618 // Push the double in stackSlot 'src' onto FP-stack 2619 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] 2620 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); 2621 %} 2622 2623 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2624 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2625 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2626 %} 2627 2628 // Same as Pop_Mem_F except for opcode 2629 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2630 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2631 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2632 %} 2633 2634 enc_class Pop_Reg_F( regF dst ) %{ 2635 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2636 emit_d8( cbuf, 0xD8+$dst$$reg ); 2637 %} 2638 2639 enc_class Push_Reg_F( regF dst ) %{ 2640 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2641 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2642 %} 2643 2644 // Push FPU's float to a stack-slot, and pop FPU-stack 2645 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 2646 int pop = 0x02; 2647 if ($src$$reg != FPR1L_enc) { 2648 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2649 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2650 pop = 0x03; 2651 } 2652 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2653 %} 2654 2655 // Push FPU's double to a stack-slot, and pop FPU-stack 2656 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 2657 int pop = 0x02; 2658 if ($src$$reg != FPR1L_enc) { 2659 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2660 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2661 pop = 0x03; 2662 } 2663 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2664 %} 2665 2666 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2667 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 2668 int pop = 0xD0 - 1; // -1 since we skip FLD 2669 if ($src$$reg != FPR1L_enc) { 2670 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2671 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2672 pop = 0xD8; 2673 } 2674 emit_opcode( cbuf, 0xDD ); 2675 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2676 %} 2677 2678 2679 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 2680 MacroAssembler masm(&cbuf); 2681 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg 2682 masm.fmul( $src2$$reg+0); // value at TOS 2683 masm.fadd( $src$$reg+0); // value at TOS 2684 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store 2685 %} 2686 2687 2688 enc_class Push_Reg_Mod_D( regD dst, regD src) %{ 2689 // load dst in FPR0 2690 emit_opcode( cbuf, 0xD9 ); 2691 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2692 if ($src$$reg != FPR1L_enc) { 2693 // fincstp 2694 emit_opcode (cbuf, 0xD9); 2695 emit_opcode (cbuf, 0xF7); 2696 // swap src with FPR1: 2697 // FXCH FPR1 with src 2698 emit_opcode(cbuf, 0xD9); 2699 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2700 // fdecstp 2701 emit_opcode (cbuf, 0xD9); 2702 emit_opcode (cbuf, 0xF6); 2703 } 2704 %} 2705 2706 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ 2707 // Allocate a word 2708 emit_opcode(cbuf,0x83); // SUB ESP,8 2709 emit_opcode(cbuf,0xEC); 2710 emit_d8(cbuf,0x08); 2711 2712 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 2713 emit_opcode (cbuf, 0x0F ); 2714 emit_opcode (cbuf, 0x11 ); 2715 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 2716 2717 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2718 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2719 2720 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 2721 emit_opcode (cbuf, 0x0F ); 2722 emit_opcode (cbuf, 0x11 ); 2723 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 2724 2725 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2726 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2727 2728 %} 2729 2730 enc_class Push_ModX_encoding( regX src0, regX src1) %{ 2731 // Allocate a word 2732 emit_opcode(cbuf,0x83); // SUB ESP,4 2733 emit_opcode(cbuf,0xEC); 2734 emit_d8(cbuf,0x04); 2735 2736 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 2737 emit_opcode (cbuf, 0x0F ); 2738 emit_opcode (cbuf, 0x11 ); 2739 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 2740 2741 emit_opcode(cbuf,0xD9 ); // FLD [ESP] 2742 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2743 2744 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 2745 emit_opcode (cbuf, 0x0F ); 2746 emit_opcode (cbuf, 0x11 ); 2747 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 2748 2749 emit_opcode(cbuf,0xD9 ); // FLD [ESP] 2750 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2751 2752 %} 2753 2754 enc_class Push_ResultXD(regXD dst) %{ 2755 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] 2756 2757 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] 2758 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 2759 emit_opcode (cbuf, 0x0F ); 2760 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 2761 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 2762 2763 emit_opcode(cbuf,0x83); // ADD ESP,8 2764 emit_opcode(cbuf,0xC4); 2765 emit_d8(cbuf,0x08); 2766 %} 2767 2768 enc_class Push_ResultX(regX dst, immI d8) %{ 2769 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] 2770 2771 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 2772 emit_opcode (cbuf, 0x0F ); 2773 emit_opcode (cbuf, 0x10 ); 2774 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 2775 2776 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) 2777 emit_opcode(cbuf,0xC4); 2778 emit_d8(cbuf,$d8$$constant); 2779 %} 2780 2781 enc_class Push_SrcXD(regXD src) %{ 2782 // Allocate a word 2783 emit_opcode(cbuf,0x83); // SUB ESP,8 2784 emit_opcode(cbuf,0xEC); 2785 emit_d8(cbuf,0x08); 2786 2787 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 2788 emit_opcode (cbuf, 0x0F ); 2789 emit_opcode (cbuf, 0x11 ); 2790 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 2791 2792 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2793 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2794 %} 2795 2796 enc_class push_stack_temp_qword() %{ 2797 emit_opcode(cbuf,0x83); // SUB ESP,8 2798 emit_opcode(cbuf,0xEC); 2799 emit_d8 (cbuf,0x08); 2800 %} 2801 2802 enc_class pop_stack_temp_qword() %{ 2803 emit_opcode(cbuf,0x83); // ADD ESP,8 2804 emit_opcode(cbuf,0xC4); 2805 emit_d8 (cbuf,0x08); 2806 %} 2807 2808 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ 2809 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src 2810 emit_opcode (cbuf, 0x0F ); 2811 emit_opcode (cbuf, 0x11 ); 2812 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); 2813 2814 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2815 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2816 %} 2817 2818 // Compute X^Y using Intel's fast hardware instructions, if possible. 2819 // Otherwise return a NaN. 2820 enc_class pow_exp_core_encoding %{ 2821 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X)) 2822 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q 2823 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q 2824 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q) 2825 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q) 2826 emit_opcode(cbuf,0x1C); 2827 emit_d8(cbuf,0x24); 2828 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1 2829 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1 2830 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q) 2831 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q) 2832 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false); 2833 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask 2834 emit_rm(cbuf, 0x3, 0x0, ECX_enc); 2835 emit_d32(cbuf,0xFFFFF800); 2836 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias 2837 emit_rm(cbuf, 0x3, 0x0, EAX_enc); 2838 emit_d32(cbuf,1023); 2839 emit_opcode(cbuf,0x8B); // mov rbx,eax 2840 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc); 2841 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position 2842 emit_rm(cbuf,0x3,0x4,EAX_enc); 2843 emit_d8(cbuf,20); 2844 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow 2845 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc); 2846 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX 2847 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc); 2848 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word 2849 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false); 2850 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q) 2851 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2852 emit_d32(cbuf,0); 2853 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q 2854 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 2855 %} 2856 2857// enc_class Pop_Reg_Mod_D( regD dst, regD src) 2858// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() 2859 2860 enc_class Push_Result_Mod_D( regD src) %{ 2861 if ($src$$reg != FPR1L_enc) { 2862 // fincstp 2863 emit_opcode (cbuf, 0xD9); 2864 emit_opcode (cbuf, 0xF7); 2865 // FXCH FPR1 with src 2866 emit_opcode(cbuf, 0xD9); 2867 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2868 // fdecstp 2869 emit_opcode (cbuf, 0xD9); 2870 emit_opcode (cbuf, 0xF6); 2871 } 2872 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2873 // // FSTP FPR$dst$$reg 2874 // emit_opcode( cbuf, 0xDD ); 2875 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2876 %} 2877 2878 enc_class fnstsw_sahf_skip_parity() %{ 2879 // fnstsw ax 2880 emit_opcode( cbuf, 0xDF ); 2881 emit_opcode( cbuf, 0xE0 ); 2882 // sahf 2883 emit_opcode( cbuf, 0x9E ); 2884 // jnp ::skip 2885 emit_opcode( cbuf, 0x7B ); 2886 emit_opcode( cbuf, 0x05 ); 2887 %} 2888 2889 enc_class emitModD() %{ 2890 // fprem must be iterative 2891 // :: loop 2892 // fprem 2893 emit_opcode( cbuf, 0xD9 ); 2894 emit_opcode( cbuf, 0xF8 ); 2895 // wait 2896 emit_opcode( cbuf, 0x9b ); 2897 // fnstsw ax 2898 emit_opcode( cbuf, 0xDF ); 2899 emit_opcode( cbuf, 0xE0 ); 2900 // sahf 2901 emit_opcode( cbuf, 0x9E ); 2902 // jp ::loop 2903 emit_opcode( cbuf, 0x0F ); 2904 emit_opcode( cbuf, 0x8A ); 2905 emit_opcode( cbuf, 0xF4 ); 2906 emit_opcode( cbuf, 0xFF ); 2907 emit_opcode( cbuf, 0xFF ); 2908 emit_opcode( cbuf, 0xFF ); 2909 %} 2910 2911 enc_class fpu_flags() %{ 2912 // fnstsw_ax 2913 emit_opcode( cbuf, 0xDF); 2914 emit_opcode( cbuf, 0xE0); 2915 // test ax,0x0400 2916 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2917 emit_opcode( cbuf, 0xA9 ); 2918 emit_d16 ( cbuf, 0x0400 ); 2919 // // // This sequence works, but stalls for 12-16 cycles on PPro 2920 // // test rax,0x0400 2921 // emit_opcode( cbuf, 0xA9 ); 2922 // emit_d32 ( cbuf, 0x00000400 ); 2923 // 2924 // jz exit (no unordered comparison) 2925 emit_opcode( cbuf, 0x74 ); 2926 emit_d8 ( cbuf, 0x02 ); 2927 // mov ah,1 - treat as LT case (set carry flag) 2928 emit_opcode( cbuf, 0xB4 ); 2929 emit_d8 ( cbuf, 0x01 ); 2930 // sahf 2931 emit_opcode( cbuf, 0x9E); 2932 %} 2933 2934 enc_class cmpF_P6_fixup() %{ 2935 // Fixup the integer flags in case comparison involved a NaN 2936 // 2937 // JNP exit (no unordered comparison, P-flag is set by NaN) 2938 emit_opcode( cbuf, 0x7B ); 2939 emit_d8 ( cbuf, 0x03 ); 2940 // MOV AH,1 - treat as LT case (set carry flag) 2941 emit_opcode( cbuf, 0xB4 ); 2942 emit_d8 ( cbuf, 0x01 ); 2943 // SAHF 2944 emit_opcode( cbuf, 0x9E); 2945 // NOP // target for branch to avoid branch to branch 2946 emit_opcode( cbuf, 0x90); 2947 %} 2948 2949// fnstsw_ax(); 2950// sahf(); 2951// movl(dst, nan_result); 2952// jcc(Assembler::parity, exit); 2953// movl(dst, less_result); 2954// jcc(Assembler::below, exit); 2955// movl(dst, equal_result); 2956// jcc(Assembler::equal, exit); 2957// movl(dst, greater_result); 2958 2959// less_result = 1; 2960// greater_result = -1; 2961// equal_result = 0; 2962// nan_result = -1; 2963 2964 enc_class CmpF_Result(eRegI dst) %{ 2965 // fnstsw_ax(); 2966 emit_opcode( cbuf, 0xDF); 2967 emit_opcode( cbuf, 0xE0); 2968 // sahf 2969 emit_opcode( cbuf, 0x9E); 2970 // movl(dst, nan_result); 2971 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2972 emit_d32( cbuf, -1 ); 2973 // jcc(Assembler::parity, exit); 2974 emit_opcode( cbuf, 0x7A ); 2975 emit_d8 ( cbuf, 0x13 ); 2976 // movl(dst, less_result); 2977 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2978 emit_d32( cbuf, -1 ); 2979 // jcc(Assembler::below, exit); 2980 emit_opcode( cbuf, 0x72 ); 2981 emit_d8 ( cbuf, 0x0C ); 2982 // movl(dst, equal_result); 2983 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2984 emit_d32( cbuf, 0 ); 2985 // jcc(Assembler::equal, exit); 2986 emit_opcode( cbuf, 0x74 ); 2987 emit_d8 ( cbuf, 0x05 ); 2988 // movl(dst, greater_result); 2989 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2990 emit_d32( cbuf, 1 ); 2991 %} 2992 2993 2994 // XMM version of CmpF_Result. Because the XMM compare 2995 // instructions set the EFLAGS directly. It becomes simpler than 2996 // the float version above. 2997 enc_class CmpX_Result(eRegI dst) %{ 2998 MacroAssembler _masm(&cbuf); 2999 Label nan, inc, done; 3000 3001 __ jccb(Assembler::parity, nan); 3002 __ jccb(Assembler::equal, done); 3003 __ jccb(Assembler::above, inc); 3004 __ bind(nan); 3005 __ decrement(as_Register($dst$$reg)); // NO L qqq 3006 __ jmpb(done); 3007 __ bind(inc); 3008 __ increment(as_Register($dst$$reg)); // NO L qqq 3009 __ bind(done); 3010 %} 3011 3012 // Compare the longs and set flags 3013 // BROKEN! Do Not use as-is 3014 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 3015 // CMP $src1.hi,$src2.hi 3016 emit_opcode( cbuf, 0x3B ); 3017 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 3018 // JNE,s done 3019 emit_opcode(cbuf,0x75); 3020 emit_d8(cbuf, 2 ); 3021 // CMP $src1.lo,$src2.lo 3022 emit_opcode( cbuf, 0x3B ); 3023 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 3024// done: 3025 %} 3026 3027 enc_class convert_int_long( regL dst, eRegI src ) %{ 3028 // mov $dst.lo,$src 3029 int dst_encoding = $dst$$reg; 3030 int src_encoding = $src$$reg; 3031 encode_Copy( cbuf, dst_encoding , src_encoding ); 3032 // mov $dst.hi,$src 3033 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 3034 // sar $dst.hi,31 3035 emit_opcode( cbuf, 0xC1 ); 3036 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 3037 emit_d8(cbuf, 0x1F ); 3038 %} 3039 3040 enc_class convert_long_double( eRegL src ) %{ 3041 // push $src.hi 3042 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 3043 // push $src.lo 3044 emit_opcode(cbuf, 0x50+$src$$reg ); 3045 // fild 64-bits at [SP] 3046 emit_opcode(cbuf,0xdf); 3047 emit_d8(cbuf, 0x6C); 3048 emit_d8(cbuf, 0x24); 3049 emit_d8(cbuf, 0x00); 3050 // pop stack 3051 emit_opcode(cbuf, 0x83); // add SP, #8 3052 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 3053 emit_d8(cbuf, 0x8); 3054 %} 3055 3056 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 3057 // IMUL EDX:EAX,$src1 3058 emit_opcode( cbuf, 0xF7 ); 3059 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 3060 // SAR EDX,$cnt-32 3061 int shift_count = ((int)$cnt$$constant) - 32; 3062 if (shift_count > 0) { 3063 emit_opcode(cbuf, 0xC1); 3064 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 3065 emit_d8(cbuf, shift_count); 3066 } 3067 %} 3068 3069 // this version doesn't have add sp, 8 3070 enc_class convert_long_double2( eRegL src ) %{ 3071 // push $src.hi 3072 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 3073 // push $src.lo 3074 emit_opcode(cbuf, 0x50+$src$$reg ); 3075 // fild 64-bits at [SP] 3076 emit_opcode(cbuf,0xdf); 3077 emit_d8(cbuf, 0x6C); 3078 emit_d8(cbuf, 0x24); 3079 emit_d8(cbuf, 0x00); 3080 %} 3081 3082 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 3083 // Basic idea: long = (long)int * (long)int 3084 // IMUL EDX:EAX, src 3085 emit_opcode( cbuf, 0xF7 ); 3086 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 3087 %} 3088 3089 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 3090 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 3091 // MUL EDX:EAX, src 3092 emit_opcode( cbuf, 0xF7 ); 3093 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 3094 %} 3095 3096 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{ 3097 // Basic idea: lo(result) = lo(x_lo * y_lo) 3098 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 3099 // MOV $tmp,$src.lo 3100 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 3101 // IMUL $tmp,EDX 3102 emit_opcode( cbuf, 0x0F ); 3103 emit_opcode( cbuf, 0xAF ); 3104 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 3105 // MOV EDX,$src.hi 3106 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 3107 // IMUL EDX,EAX 3108 emit_opcode( cbuf, 0x0F ); 3109 emit_opcode( cbuf, 0xAF ); 3110 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 3111 // ADD $tmp,EDX 3112 emit_opcode( cbuf, 0x03 ); 3113 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 3114 // MUL EDX:EAX,$src.lo 3115 emit_opcode( cbuf, 0xF7 ); 3116 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 3117 // ADD EDX,ESI 3118 emit_opcode( cbuf, 0x03 ); 3119 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 3120 %} 3121 3122 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{ 3123 // Basic idea: lo(result) = lo(src * y_lo) 3124 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 3125 // IMUL $tmp,EDX,$src 3126 emit_opcode( cbuf, 0x6B ); 3127 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 3128 emit_d8( cbuf, (int)$src$$constant ); 3129 // MOV EDX,$src 3130 emit_opcode(cbuf, 0xB8 + EDX_enc); 3131 emit_d32( cbuf, (int)$src$$constant ); 3132 // MUL EDX:EAX,EDX 3133 emit_opcode( cbuf, 0xF7 ); 3134 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 3135 // ADD EDX,ESI 3136 emit_opcode( cbuf, 0x03 ); 3137 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 3138 %} 3139 3140 enc_class long_div( eRegL src1, eRegL src2 ) %{ 3141 // PUSH src1.hi 3142 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 3143 // PUSH src1.lo 3144 emit_opcode(cbuf, 0x50+$src1$$reg ); 3145 // PUSH src2.hi 3146 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 3147 // PUSH src2.lo 3148 emit_opcode(cbuf, 0x50+$src2$$reg ); 3149 // CALL directly to the runtime 3150 cbuf.set_insts_mark(); 3151 emit_opcode(cbuf,0xE8); // Call into runtime 3152 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3153 // Restore stack 3154 emit_opcode(cbuf, 0x83); // add SP, #framesize 3155 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 3156 emit_d8(cbuf, 4*4); 3157 %} 3158 3159 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 3160 // PUSH src1.hi 3161 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 3162 // PUSH src1.lo 3163 emit_opcode(cbuf, 0x50+$src1$$reg ); 3164 // PUSH src2.hi 3165 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 3166 // PUSH src2.lo 3167 emit_opcode(cbuf, 0x50+$src2$$reg ); 3168 // CALL directly to the runtime 3169 cbuf.set_insts_mark(); 3170 emit_opcode(cbuf,0xE8); // Call into runtime 3171 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3172 // Restore stack 3173 emit_opcode(cbuf, 0x83); // add SP, #framesize 3174 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 3175 emit_d8(cbuf, 4*4); 3176 %} 3177 3178 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{ 3179 // MOV $tmp,$src.lo 3180 emit_opcode(cbuf, 0x8B); 3181 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 3182 // OR $tmp,$src.hi 3183 emit_opcode(cbuf, 0x0B); 3184 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 3185 %} 3186 3187 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 3188 // CMP $src1.lo,$src2.lo 3189 emit_opcode( cbuf, 0x3B ); 3190 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 3191 // JNE,s skip 3192 emit_cc(cbuf, 0x70, 0x5); 3193 emit_d8(cbuf,2); 3194 // CMP $src1.hi,$src2.hi 3195 emit_opcode( cbuf, 0x3B ); 3196 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 3197 %} 3198 3199 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{ 3200 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 3201 emit_opcode( cbuf, 0x3B ); 3202 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 3203 // MOV $tmp,$src1.hi 3204 emit_opcode( cbuf, 0x8B ); 3205 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 3206 // SBB $tmp,$src2.hi\t! Compute flags for long compare 3207 emit_opcode( cbuf, 0x1B ); 3208 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 3209 %} 3210 3211 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{ 3212 // XOR $tmp,$tmp 3213 emit_opcode(cbuf,0x33); // XOR 3214 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 3215 // CMP $tmp,$src.lo 3216 emit_opcode( cbuf, 0x3B ); 3217 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 3218 // SBB $tmp,$src.hi 3219 emit_opcode( cbuf, 0x1B ); 3220 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 3221 %} 3222 3223 // Sniff, sniff... smells like Gnu Superoptimizer 3224 enc_class neg_long( eRegL dst ) %{ 3225 emit_opcode(cbuf,0xF7); // NEG hi 3226 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 3227 emit_opcode(cbuf,0xF7); // NEG lo 3228 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 3229 emit_opcode(cbuf,0x83); // SBB hi,0 3230 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 3231 emit_d8 (cbuf,0 ); 3232 %} 3233 3234 enc_class movq_ld(regXD dst, memory mem) %{ 3235 MacroAssembler _masm(&cbuf); 3236 __ movq($dst$$XMMRegister, $mem$$Address); 3237 %} 3238 3239 enc_class movq_st(memory mem, regXD src) %{ 3240 MacroAssembler _masm(&cbuf); 3241 __ movq($mem$$Address, $src$$XMMRegister); 3242 %} 3243 3244 enc_class pshufd_8x8(regX dst, regX src) %{ 3245 MacroAssembler _masm(&cbuf); 3246 3247 encode_CopyXD(cbuf, $dst$$reg, $src$$reg); 3248 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); 3249 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); 3250 %} 3251 3252 enc_class pshufd_4x16(regX dst, regX src) %{ 3253 MacroAssembler _masm(&cbuf); 3254 3255 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); 3256 %} 3257 3258 enc_class pshufd(regXD dst, regXD src, int mode) %{ 3259 MacroAssembler _masm(&cbuf); 3260 3261 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); 3262 %} 3263 3264 enc_class pxor(regXD dst, regXD src) %{ 3265 MacroAssembler _masm(&cbuf); 3266 3267 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); 3268 %} 3269 3270 enc_class mov_i2x(regXD dst, eRegI src) %{ 3271 MacroAssembler _masm(&cbuf); 3272 3273 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); 3274 %} 3275 3276 3277 // Because the transitions from emitted code to the runtime 3278 // monitorenter/exit helper stubs are so slow it's critical that 3279 // we inline both the stack-locking fast-path and the inflated fast path. 3280 // 3281 // See also: cmpFastLock and cmpFastUnlock. 3282 // 3283 // What follows is a specialized inline transliteration of the code 3284 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat 3285 // another option would be to emit TrySlowEnter and TrySlowExit methods 3286 // at startup-time. These methods would accept arguments as 3287 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure 3288 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply 3289 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. 3290 // In practice, however, the # of lock sites is bounded and is usually small. 3291 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer 3292 // if the processor uses simple bimodal branch predictors keyed by EIP 3293 // Since the helper routines would be called from multiple synchronization 3294 // sites. 3295 // 3296 // An even better approach would be write "MonitorEnter()" and "MonitorExit()" 3297 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites 3298 // to those specialized methods. That'd give us a mostly platform-independent 3299 // implementation that the JITs could optimize and inline at their pleasure. 3300 // Done correctly, the only time we'd need to cross to native could would be 3301 // to park() or unpark() threads. We'd also need a few more unsafe operators 3302 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and 3303 // (b) explicit barriers or fence operations. 3304 // 3305 // TODO: 3306 // 3307 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). 3308 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. 3309 // Given TLAB allocation, Self is usually manifested in a register, so passing it into 3310 // the lock operators would typically be faster than reifying Self. 3311 // 3312 // * Ideally I'd define the primitives as: 3313 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. 3314 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED 3315 // Unfortunately ADLC bugs prevent us from expressing the ideal form. 3316 // Instead, we're stuck with a rather awkward and brittle register assignments below. 3317 // Furthermore the register assignments are overconstrained, possibly resulting in 3318 // sub-optimal code near the synchronization site. 3319 // 3320 // * Eliminate the sp-proximity tests and just use "== Self" tests instead. 3321 // Alternately, use a better sp-proximity test. 3322 // 3323 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. 3324 // Either one is sufficient to uniquely identify a thread. 3325 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. 3326 // 3327 // * Intrinsify notify() and notifyAll() for the common cases where the 3328 // object is locked by the calling thread but the waitlist is empty. 3329 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). 3330 // 3331 // * use jccb and jmpb instead of jcc and jmp to improve code density. 3332 // But beware of excessive branch density on AMD Opterons. 3333 // 3334 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success 3335 // or failure of the fast-path. If the fast-path fails then we pass 3336 // control to the slow-path, typically in C. In Fast_Lock and 3337 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 3338 // will emit a conditional branch immediately after the node. 3339 // So we have branches to branches and lots of ICC.ZF games. 3340 // Instead, it might be better to have C2 pass a "FailureLabel" 3341 // into Fast_Lock and Fast_Unlock. In the case of success, control 3342 // will drop through the node. ICC.ZF is undefined at exit. 3343 // In the case of failure, the node will branch directly to the 3344 // FailureLabel 3345 3346 3347 // obj: object to lock 3348 // box: on-stack box address (displaced header location) - KILLED 3349 // rax,: tmp -- KILLED 3350 // scr: tmp -- KILLED 3351 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ 3352 3353 Register objReg = as_Register($obj$$reg); 3354 Register boxReg = as_Register($box$$reg); 3355 Register tmpReg = as_Register($tmp$$reg); 3356 Register scrReg = as_Register($scr$$reg); 3357 3358 // Ensure the register assignents are disjoint 3359 guarantee (objReg != boxReg, "") ; 3360 guarantee (objReg != tmpReg, "") ; 3361 guarantee (objReg != scrReg, "") ; 3362 guarantee (boxReg != tmpReg, "") ; 3363 guarantee (boxReg != scrReg, "") ; 3364 guarantee (tmpReg == as_Register(EAX_enc), "") ; 3365 3366 MacroAssembler masm(&cbuf); 3367 3368 if (_counters != NULL) { 3369 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); 3370 } 3371 if (EmitSync & 1) { 3372 // set box->dhw = unused_mark (3) 3373 // Force all sync thru slow-path: slow_enter() and slow_exit() 3374 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ; 3375 masm.cmpptr (rsp, (int32_t)0) ; 3376 } else 3377 if (EmitSync & 2) { 3378 Label DONE_LABEL ; 3379 if (UseBiasedLocking) { 3380 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. 3381 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); 3382 } 3383 3384 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword 3385 masm.orptr (tmpReg, 0x1); 3386 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS 3387 if (os::is_MP()) { masm.lock(); } 3388 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg 3389 masm.jcc(Assembler::equal, DONE_LABEL); 3390 // Recursive locking 3391 masm.subptr(tmpReg, rsp); 3392 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 ); 3393 masm.movptr(Address(boxReg, 0), tmpReg); 3394 masm.bind(DONE_LABEL) ; 3395 } else { 3396 // Possible cases that we'll encounter in fast_lock 3397 // ------------------------------------------------ 3398 // * Inflated 3399 // -- unlocked 3400 // -- Locked 3401 // = by self 3402 // = by other 3403 // * biased 3404 // -- by Self 3405 // -- by other 3406 // * neutral 3407 // * stack-locked 3408 // -- by self 3409 // = sp-proximity test hits 3410 // = sp-proximity test generates false-negative 3411 // -- by other 3412 // 3413 3414 Label IsInflated, DONE_LABEL, PopDone ; 3415 3416 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage 3417 // order to reduce the number of conditional branches in the most common cases. 3418 // Beware -- there's a subtle invariant that fetch of the markword 3419 // at [FETCH], below, will never observe a biased encoding (*101b). 3420 // If this invariant is not held we risk exclusion (safety) failure. 3421 if (UseBiasedLocking && !UseOptoBiasInlining) { 3422 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); 3423 } 3424 3425 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] 3426 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) 3427 masm.jccb (Assembler::notZero, IsInflated) ; 3428 3429 // Attempt stack-locking ... 3430 masm.orptr (tmpReg, 0x1); 3431 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS 3432 if (os::is_MP()) { masm.lock(); } 3433 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg 3434 if (_counters != NULL) { 3435 masm.cond_inc32(Assembler::equal, 3436 ExternalAddress((address)_counters->fast_path_entry_count_addr())); 3437 } 3438 masm.jccb (Assembler::equal, DONE_LABEL); 3439 3440 // Recursive locking 3441 masm.subptr(tmpReg, rsp); 3442 masm.andptr(tmpReg, 0xFFFFF003 ); 3443 masm.movptr(Address(boxReg, 0), tmpReg); 3444 if (_counters != NULL) { 3445 masm.cond_inc32(Assembler::equal, 3446 ExternalAddress((address)_counters->fast_path_entry_count_addr())); 3447 } 3448 masm.jmp (DONE_LABEL) ; 3449 3450 masm.bind (IsInflated) ; 3451 3452 // The object is inflated. 3453 // 3454 // TODO-FIXME: eliminate the ugly use of manifest constants: 3455 // Use markOopDesc::monitor_value instead of "2". 3456 // use markOop::unused_mark() instead of "3". 3457 // The tmpReg value is an objectMonitor reference ORed with 3458 // markOopDesc::monitor_value (2). We can either convert tmpReg to an 3459 // objectmonitor pointer by masking off the "2" bit or we can just 3460 // use tmpReg as an objectmonitor pointer but bias the objectmonitor 3461 // field offsets with "-2" to compensate for and annul the low-order tag bit. 3462 // 3463 // I use the latter as it avoids AGI stalls. 3464 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" 3465 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". 3466 // 3467 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) 3468 3469 // boxReg refers to the on-stack BasicLock in the current frame. 3470 // We'd like to write: 3471 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. 3472 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers 3473 // additional latency as we have another ST in the store buffer that must drain. 3474 3475 if (EmitSync & 8192) { 3476 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty 3477 masm.get_thread (scrReg) ; 3478 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 3479 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov 3480 if (os::is_MP()) { masm.lock(); } 3481 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3482 } else 3483 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS 3484 masm.movptr(scrReg, boxReg) ; 3485 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 3486 3487 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 3488 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) { 3489 // prefetchw [eax + Offset(_owner)-2] 3490 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); 3491 } 3492 3493 if ((EmitSync & 64) == 0) { 3494 // Optimistic form: consider XORL tmpReg,tmpReg 3495 masm.movptr(tmpReg, NULL_WORD) ; 3496 } else { 3497 // Can suffer RTS->RTO upgrades on shared or cold $ lines 3498 // Test-And-CAS instead of CAS 3499 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner 3500 masm.testptr(tmpReg, tmpReg) ; // Locked ? 3501 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3502 } 3503 3504 // Appears unlocked - try to swing _owner from null to non-null. 3505 // Ideally, I'd manifest "Self" with get_thread and then attempt 3506 // to CAS the register containing Self into m->Owner. 3507 // But we don't have enough registers, so instead we can either try to CAS 3508 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds 3509 // we later store "Self" into m->Owner. Transiently storing a stack address 3510 // (rsp or the address of the box) into m->owner is harmless. 3511 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 3512 if (os::is_MP()) { masm.lock(); } 3513 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3514 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3 3515 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3516 masm.get_thread (scrReg) ; // beware: clobbers ICCs 3517 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 3518 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success 3519 3520 // If the CAS fails we can either retry or pass control to the slow-path. 3521 // We use the latter tactic. 3522 // Pass the CAS result in the icc.ZFlag into DONE_LABEL 3523 // If the CAS was successful ... 3524 // Self has acquired the lock 3525 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 3526 // Intentional fall-through into DONE_LABEL ... 3527 } else { 3528 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty 3529 masm.movptr(boxReg, tmpReg) ; 3530 3531 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 3532 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) { 3533 // prefetchw [eax + Offset(_owner)-2] 3534 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); 3535 } 3536 3537 if ((EmitSync & 64) == 0) { 3538 // Optimistic form 3539 masm.xorptr (tmpReg, tmpReg) ; 3540 } else { 3541 // Can suffer RTS->RTO upgrades on shared or cold $ lines 3542 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner 3543 masm.testptr(tmpReg, tmpReg) ; // Locked ? 3544 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3545 } 3546 3547 // Appears unlocked - try to swing _owner from null to non-null. 3548 // Use either "Self" (in scr) or rsp as thread identity in _owner. 3549 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 3550 masm.get_thread (scrReg) ; 3551 if (os::is_MP()) { masm.lock(); } 3552 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3553 3554 // If the CAS fails we can either retry or pass control to the slow-path. 3555 // We use the latter tactic. 3556 // Pass the CAS result in the icc.ZFlag into DONE_LABEL 3557 // If the CAS was successful ... 3558 // Self has acquired the lock 3559 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 3560 // Intentional fall-through into DONE_LABEL ... 3561 } 3562 3563 // DONE_LABEL is a hot target - we'd really like to place it at the 3564 // start of cache line by padding with NOPs. 3565 // See the AMD and Intel software optimization manuals for the 3566 // most efficient "long" NOP encodings. 3567 // Unfortunately none of our alignment mechanisms suffice. 3568 masm.bind(DONE_LABEL); 3569 3570 // Avoid branch-to-branch on AMD processors 3571 // This appears to be superstition. 3572 if (EmitSync & 32) masm.nop() ; 3573 3574 3575 // At DONE_LABEL the icc ZFlag is set as follows ... 3576 // Fast_Unlock uses the same protocol. 3577 // ZFlag == 1 -> Success 3578 // ZFlag == 0 -> Failure - force control through the slow-path 3579 } 3580 %} 3581 3582 // obj: object to unlock 3583 // box: box address (displaced header location), killed. Must be EAX. 3584 // rbx,: killed tmp; cannot be obj nor box. 3585 // 3586 // Some commentary on balanced locking: 3587 // 3588 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. 3589 // Methods that don't have provably balanced locking are forced to run in the 3590 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. 3591 // The interpreter provides two properties: 3592 // I1: At return-time the interpreter automatically and quietly unlocks any 3593 // objects acquired the current activation (frame). Recall that the 3594 // interpreter maintains an on-stack list of locks currently held by 3595 // a frame. 3596 // I2: If a method attempts to unlock an object that is not held by the 3597 // the frame the interpreter throws IMSX. 3598 // 3599 // Lets say A(), which has provably balanced locking, acquires O and then calls B(). 3600 // B() doesn't have provably balanced locking so it runs in the interpreter. 3601 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O 3602 // is still locked by A(). 3603 // 3604 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: 3605 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter 3606 // should not be unlocked by "normal" java-level locking and vice-versa. The specification 3607 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. 3608 3609 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ 3610 3611 Register objReg = as_Register($obj$$reg); 3612 Register boxReg = as_Register($box$$reg); 3613 Register tmpReg = as_Register($tmp$$reg); 3614 3615 guarantee (objReg != boxReg, "") ; 3616 guarantee (objReg != tmpReg, "") ; 3617 guarantee (boxReg != tmpReg, "") ; 3618 guarantee (boxReg == as_Register(EAX_enc), "") ; 3619 MacroAssembler masm(&cbuf); 3620 3621 if (EmitSync & 4) { 3622 // Disable - inhibit all inlining. Force control through the slow-path 3623 masm.cmpptr (rsp, 0) ; 3624 } else 3625 if (EmitSync & 8) { 3626 Label DONE_LABEL ; 3627 if (UseBiasedLocking) { 3628 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); 3629 } 3630 // classic stack-locking code ... 3631 masm.movptr(tmpReg, Address(boxReg, 0)) ; 3632 masm.testptr(tmpReg, tmpReg) ; 3633 masm.jcc (Assembler::zero, DONE_LABEL) ; 3634 if (os::is_MP()) { masm.lock(); } 3635 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box 3636 masm.bind(DONE_LABEL); 3637 } else { 3638 Label DONE_LABEL, Stacked, CheckSucc, Inflated ; 3639 3640 // Critically, the biased locking test must have precedence over 3641 // and appear before the (box->dhw == 0) recursive stack-lock test. 3642 if (UseBiasedLocking && !UseOptoBiasInlining) { 3643 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); 3644 } 3645 3646 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header 3647 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword 3648 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock 3649 3650 masm.testptr(tmpReg, 0x02) ; // Inflated? 3651 masm.jccb (Assembler::zero, Stacked) ; 3652 3653 masm.bind (Inflated) ; 3654 // It's inflated. 3655 // Despite our balanced locking property we still check that m->_owner == Self 3656 // as java routines or native JNI code called by this thread might 3657 // have released the lock. 3658 // Refer to the comments in synchronizer.cpp for how we might encode extra 3659 // state in _succ so we can avoid fetching EntryList|cxq. 3660 // 3661 // I'd like to add more cases in fast_lock() and fast_unlock() -- 3662 // such as recursive enter and exit -- but we have to be wary of 3663 // I$ bloat, T$ effects and BP$ effects. 3664 // 3665 // If there's no contention try a 1-0 exit. That is, exit without 3666 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how 3667 // we detect and recover from the race that the 1-0 exit admits. 3668 // 3669 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier 3670 // before it STs null into _owner, releasing the lock. Updates 3671 // to data protected by the critical section must be visible before 3672 // we drop the lock (and thus before any other thread could acquire 3673 // the lock and observe the fields protected by the lock). 3674 // IA32's memory-model is SPO, so STs are ordered with respect to 3675 // each other and there's no need for an explicit barrier (fence). 3676 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. 3677 3678 masm.get_thread (boxReg) ; 3679 if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) { 3680 // prefetchw [ebx + Offset(_owner)-2] 3681 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); 3682 } 3683 3684 // Note that we could employ various encoding schemes to reduce 3685 // the number of loads below (currently 4) to just 2 or 3. 3686 // Refer to the comments in synchronizer.cpp. 3687 // In practice the chain of fetches doesn't seem to impact performance, however. 3688 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { 3689 // Attempt to reduce branch density - AMD's branch predictor. 3690 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3691 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 3692 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 3693 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 3694 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3695 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 3696 masm.jmpb (DONE_LABEL) ; 3697 } else { 3698 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3699 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 3700 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3701 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 3702 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 3703 masm.jccb (Assembler::notZero, CheckSucc) ; 3704 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 3705 masm.jmpb (DONE_LABEL) ; 3706 } 3707 3708 // The Following code fragment (EmitSync & 65536) improves the performance of 3709 // contended applications and contended synchronization microbenchmarks. 3710 // Unfortunately the emission of the code - even though not executed - causes regressions 3711 // in scimark and jetstream, evidently because of $ effects. Replacing the code 3712 // with an equal number of never-executed NOPs results in the same regression. 3713 // We leave it off by default. 3714 3715 if ((EmitSync & 65536) != 0) { 3716 Label LSuccess, LGoSlowPath ; 3717 3718 masm.bind (CheckSucc) ; 3719 3720 // Optional pre-test ... it's safe to elide this 3721 if ((EmitSync & 16) == 0) { 3722 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 3723 masm.jccb (Assembler::zero, LGoSlowPath) ; 3724 } 3725 3726 // We have a classic Dekker-style idiom: 3727 // ST m->_owner = 0 ; MEMBAR; LD m->_succ 3728 // There are a number of ways to implement the barrier: 3729 // (1) lock:andl &m->_owner, 0 3730 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. 3731 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 3732 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 3733 // (2) If supported, an explicit MFENCE is appealing. 3734 // In older IA32 processors MFENCE is slower than lock:add or xchg 3735 // particularly if the write-buffer is full as might be the case if 3736 // if stores closely precede the fence or fence-equivalent instruction. 3737 // In more modern implementations MFENCE appears faster, however. 3738 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack 3739 // The $lines underlying the top-of-stack should be in M-state. 3740 // The locked add instruction is serializing, of course. 3741 // (4) Use xchg, which is serializing 3742 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works 3743 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. 3744 // The integer condition codes will tell us if succ was 0. 3745 // Since _succ and _owner should reside in the same $line and 3746 // we just stored into _owner, it's likely that the $line 3747 // remains in M-state for the lock:orl. 3748 // 3749 // We currently use (3), although it's likely that switching to (2) 3750 // is correct for the future. 3751 3752 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 3753 if (os::is_MP()) { 3754 if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 3755 masm.mfence(); 3756 } else { 3757 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 3758 } 3759 } 3760 // Ratify _succ remains non-null 3761 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 3762 masm.jccb (Assembler::notZero, LSuccess) ; 3763 3764 masm.xorptr(boxReg, boxReg) ; // box is really EAX 3765 if (os::is_MP()) { masm.lock(); } 3766 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); 3767 masm.jccb (Assembler::notEqual, LSuccess) ; 3768 // Since we're low on registers we installed rsp as a placeholding in _owner. 3769 // Now install Self over rsp. This is safe as we're transitioning from 3770 // non-null to non=null 3771 masm.get_thread (boxReg) ; 3772 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; 3773 // Intentional fall-through into LGoSlowPath ... 3774 3775 masm.bind (LGoSlowPath) ; 3776 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure 3777 masm.jmpb (DONE_LABEL) ; 3778 3779 masm.bind (LSuccess) ; 3780 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success 3781 masm.jmpb (DONE_LABEL) ; 3782 } 3783 3784 masm.bind (Stacked) ; 3785 // It's not inflated and it's not recursively stack-locked and it's not biased. 3786 // It must be stack-locked. 3787 // Try to reset the header to displaced header. 3788 // The "box" value on the stack is stable, so we can reload 3789 // and be assured we observe the same value as above. 3790 masm.movptr(tmpReg, Address(boxReg, 0)) ; 3791 if (os::is_MP()) { masm.lock(); } 3792 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box 3793 // Intention fall-thru into DONE_LABEL 3794 3795 3796 // DONE_LABEL is a hot target - we'd really like to place it at the 3797 // start of cache line by padding with NOPs. 3798 // See the AMD and Intel software optimization manuals for the 3799 // most efficient "long" NOP encodings. 3800 // Unfortunately none of our alignment mechanisms suffice. 3801 if ((EmitSync & 65536) == 0) { 3802 masm.bind (CheckSucc) ; 3803 } 3804 masm.bind(DONE_LABEL); 3805 3806 // Avoid branch to branch on AMD processors 3807 if (EmitSync & 32768) { masm.nop() ; } 3808 } 3809 %} 3810 3811 3812 enc_class enc_pop_rdx() %{ 3813 emit_opcode(cbuf,0x5A); 3814 %} 3815 3816 enc_class enc_rethrow() %{ 3817 cbuf.set_insts_mark(); 3818 emit_opcode(cbuf, 0xE9); // jmp entry 3819 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 3820 runtime_call_Relocation::spec(), RELOC_IMM32 ); 3821 %} 3822 3823 3824 // Convert a double to an int. Java semantics require we do complex 3825 // manglelations in the corner cases. So we set the rounding mode to 3826 // 'zero', store the darned double down as an int, and reset the 3827 // rounding mode to 'nearest'. The hardware throws an exception which 3828 // patches up the correct value directly to the stack. 3829 enc_class D2I_encoding( regD src ) %{ 3830 // Flip to round-to-zero mode. We attempted to allow invalid-op 3831 // exceptions here, so that a NAN or other corner-case value will 3832 // thrown an exception (but normal values get converted at full speed). 3833 // However, I2C adapters and other float-stack manglers leave pending 3834 // invalid-op exceptions hanging. We would have to clear them before 3835 // enabling them and that is more expensive than just testing for the 3836 // invalid value Intel stores down in the corner cases. 3837 emit_opcode(cbuf,0xD9); // FLDCW trunc 3838 emit_opcode(cbuf,0x2D); 3839 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3840 // Allocate a word 3841 emit_opcode(cbuf,0x83); // SUB ESP,4 3842 emit_opcode(cbuf,0xEC); 3843 emit_d8(cbuf,0x04); 3844 // Encoding assumes a double has been pushed into FPR0. 3845 // Store down the double as an int, popping the FPU stack 3846 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3847 emit_opcode(cbuf,0x1C); 3848 emit_d8(cbuf,0x24); 3849 // Restore the rounding mode; mask the exception 3850 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3851 emit_opcode(cbuf,0x2D); 3852 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3853 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3854 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3855 3856 // Load the converted int; adjust CPU stack 3857 emit_opcode(cbuf,0x58); // POP EAX 3858 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3859 emit_d32 (cbuf,0x80000000); // 0x80000000 3860 emit_opcode(cbuf,0x75); // JNE around_slow_call 3861 emit_d8 (cbuf,0x07); // Size of slow_call 3862 // Push src onto stack slow-path 3863 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3864 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3865 // CALL directly to the runtime 3866 cbuf.set_insts_mark(); 3867 emit_opcode(cbuf,0xE8); // Call into runtime 3868 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3869 // Carry on here... 3870 %} 3871 3872 enc_class D2L_encoding( regD src ) %{ 3873 emit_opcode(cbuf,0xD9); // FLDCW trunc 3874 emit_opcode(cbuf,0x2D); 3875 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3876 // Allocate a word 3877 emit_opcode(cbuf,0x83); // SUB ESP,8 3878 emit_opcode(cbuf,0xEC); 3879 emit_d8(cbuf,0x08); 3880 // Encoding assumes a double has been pushed into FPR0. 3881 // Store down the double as a long, popping the FPU stack 3882 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3883 emit_opcode(cbuf,0x3C); 3884 emit_d8(cbuf,0x24); 3885 // Restore the rounding mode; mask the exception 3886 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3887 emit_opcode(cbuf,0x2D); 3888 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3889 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3890 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3891 3892 // Load the converted int; adjust CPU stack 3893 emit_opcode(cbuf,0x58); // POP EAX 3894 emit_opcode(cbuf,0x5A); // POP EDX 3895 emit_opcode(cbuf,0x81); // CMP EDX,imm 3896 emit_d8 (cbuf,0xFA); // rdx 3897 emit_d32 (cbuf,0x80000000); // 0x80000000 3898 emit_opcode(cbuf,0x75); // JNE around_slow_call 3899 emit_d8 (cbuf,0x07+4); // Size of slow_call 3900 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3901 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3902 emit_opcode(cbuf,0x75); // JNE around_slow_call 3903 emit_d8 (cbuf,0x07); // Size of slow_call 3904 // Push src onto stack slow-path 3905 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3906 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3907 // CALL directly to the runtime 3908 cbuf.set_insts_mark(); 3909 emit_opcode(cbuf,0xE8); // Call into runtime 3910 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3911 // Carry on here... 3912 %} 3913 3914 enc_class X2L_encoding( regX src ) %{ 3915 // Allocate a word 3916 emit_opcode(cbuf,0x83); // SUB ESP,8 3917 emit_opcode(cbuf,0xEC); 3918 emit_d8(cbuf,0x08); 3919 3920 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 3921 emit_opcode (cbuf, 0x0F ); 3922 emit_opcode (cbuf, 0x11 ); 3923 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3924 3925 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 3926 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 3927 3928 emit_opcode(cbuf,0xD9); // FLDCW trunc 3929 emit_opcode(cbuf,0x2D); 3930 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3931 3932 // Encoding assumes a double has been pushed into FPR0. 3933 // Store down the double as a long, popping the FPU stack 3934 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3935 emit_opcode(cbuf,0x3C); 3936 emit_d8(cbuf,0x24); 3937 3938 // Restore the rounding mode; mask the exception 3939 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3940 emit_opcode(cbuf,0x2D); 3941 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3942 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3943 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3944 3945 // Load the converted int; adjust CPU stack 3946 emit_opcode(cbuf,0x58); // POP EAX 3947 3948 emit_opcode(cbuf,0x5A); // POP EDX 3949 3950 emit_opcode(cbuf,0x81); // CMP EDX,imm 3951 emit_d8 (cbuf,0xFA); // rdx 3952 emit_d32 (cbuf,0x80000000);// 0x80000000 3953 3954 emit_opcode(cbuf,0x75); // JNE around_slow_call 3955 emit_d8 (cbuf,0x13+4); // Size of slow_call 3956 3957 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3958 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3959 3960 emit_opcode(cbuf,0x75); // JNE around_slow_call 3961 emit_d8 (cbuf,0x13); // Size of slow_call 3962 3963 // Allocate a word 3964 emit_opcode(cbuf,0x83); // SUB ESP,4 3965 emit_opcode(cbuf,0xEC); 3966 emit_d8(cbuf,0x04); 3967 3968 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 3969 emit_opcode (cbuf, 0x0F ); 3970 emit_opcode (cbuf, 0x11 ); 3971 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3972 3973 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 3974 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 3975 3976 emit_opcode(cbuf,0x83); // ADD ESP,4 3977 emit_opcode(cbuf,0xC4); 3978 emit_d8(cbuf,0x04); 3979 3980 // CALL directly to the runtime 3981 cbuf.set_insts_mark(); 3982 emit_opcode(cbuf,0xE8); // Call into runtime 3983 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3984 // Carry on here... 3985 %} 3986 3987 enc_class XD2L_encoding( regXD src ) %{ 3988 // Allocate a word 3989 emit_opcode(cbuf,0x83); // SUB ESP,8 3990 emit_opcode(cbuf,0xEC); 3991 emit_d8(cbuf,0x08); 3992 3993 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 3994 emit_opcode (cbuf, 0x0F ); 3995 emit_opcode (cbuf, 0x11 ); 3996 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3997 3998 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 3999 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 4000 4001 emit_opcode(cbuf,0xD9); // FLDCW trunc 4002 emit_opcode(cbuf,0x2D); 4003 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 4004 4005 // Encoding assumes a double has been pushed into FPR0. 4006 // Store down the double as a long, popping the FPU stack 4007 emit_opcode(cbuf,0xDF); // FISTP [ESP] 4008 emit_opcode(cbuf,0x3C); 4009 emit_d8(cbuf,0x24); 4010 4011 // Restore the rounding mode; mask the exception 4012 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 4013 emit_opcode(cbuf,0x2D); 4014 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 4015 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 4016 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 4017 4018 // Load the converted int; adjust CPU stack 4019 emit_opcode(cbuf,0x58); // POP EAX 4020 4021 emit_opcode(cbuf,0x5A); // POP EDX 4022 4023 emit_opcode(cbuf,0x81); // CMP EDX,imm 4024 emit_d8 (cbuf,0xFA); // rdx 4025 emit_d32 (cbuf,0x80000000); // 0x80000000 4026 4027 emit_opcode(cbuf,0x75); // JNE around_slow_call 4028 emit_d8 (cbuf,0x13+4); // Size of slow_call 4029 4030 emit_opcode(cbuf,0x85); // TEST EAX,EAX 4031 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 4032 4033 emit_opcode(cbuf,0x75); // JNE around_slow_call 4034 emit_d8 (cbuf,0x13); // Size of slow_call 4035 4036 // Push src onto stack slow-path 4037 // Allocate a word 4038 emit_opcode(cbuf,0x83); // SUB ESP,8 4039 emit_opcode(cbuf,0xEC); 4040 emit_d8(cbuf,0x08); 4041 4042 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 4043 emit_opcode (cbuf, 0x0F ); 4044 emit_opcode (cbuf, 0x11 ); 4045 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 4046 4047 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 4048 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 4049 4050 emit_opcode(cbuf,0x83); // ADD ESP,8 4051 emit_opcode(cbuf,0xC4); 4052 emit_d8(cbuf,0x08); 4053 4054 // CALL directly to the runtime 4055 cbuf.set_insts_mark(); 4056 emit_opcode(cbuf,0xE8); // Call into runtime 4057 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 4058 // Carry on here... 4059 %} 4060 4061 enc_class D2X_encoding( regX dst, regD src ) %{ 4062 // Allocate a word 4063 emit_opcode(cbuf,0x83); // SUB ESP,4 4064 emit_opcode(cbuf,0xEC); 4065 emit_d8(cbuf,0x04); 4066 int pop = 0x02; 4067 if ($src$$reg != FPR1L_enc) { 4068 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 4069 emit_d8( cbuf, 0xC0-1+$src$$reg ); 4070 pop = 0x03; 4071 } 4072 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP] 4073 4074 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 4075 emit_opcode (cbuf, 0x0F ); 4076 emit_opcode (cbuf, 0x10 ); 4077 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 4078 4079 emit_opcode(cbuf,0x83); // ADD ESP,4 4080 emit_opcode(cbuf,0xC4); 4081 emit_d8(cbuf,0x04); 4082 // Carry on here... 4083 %} 4084 4085 enc_class FX2I_encoding( regX src, eRegI dst ) %{ 4086 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 4087 4088 // Compare the result to see if we need to go to the slow path 4089 emit_opcode(cbuf,0x81); // CMP dst,imm 4090 emit_rm (cbuf,0x3,0x7,$dst$$reg); 4091 emit_d32 (cbuf,0x80000000); // 0x80000000 4092 4093 emit_opcode(cbuf,0x75); // JNE around_slow_call 4094 emit_d8 (cbuf,0x13); // Size of slow_call 4095 // Store xmm to a temp memory 4096 // location and push it onto stack. 4097 4098 emit_opcode(cbuf,0x83); // SUB ESP,4 4099 emit_opcode(cbuf,0xEC); 4100 emit_d8(cbuf, $primary ? 0x8 : 0x4); 4101 4102 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm 4103 emit_opcode (cbuf, 0x0F ); 4104 emit_opcode (cbuf, 0x11 ); 4105 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 4106 4107 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP] 4108 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 4109 4110 emit_opcode(cbuf,0x83); // ADD ESP,4 4111 emit_opcode(cbuf,0xC4); 4112 emit_d8(cbuf, $primary ? 0x8 : 0x4); 4113 4114 // CALL directly to the runtime 4115 cbuf.set_insts_mark(); 4116 emit_opcode(cbuf,0xE8); // Call into runtime 4117 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 4118 4119 // Carry on here... 4120 %} 4121 4122 enc_class X2D_encoding( regD dst, regX src ) %{ 4123 // Allocate a word 4124 emit_opcode(cbuf,0x83); // SUB ESP,4 4125 emit_opcode(cbuf,0xEC); 4126 emit_d8(cbuf,0x04); 4127 4128 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm 4129 emit_opcode (cbuf, 0x0F ); 4130 emit_opcode (cbuf, 0x11 ); 4131 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 4132 4133 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 4134 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 4135 4136 emit_opcode(cbuf,0x83); // ADD ESP,4 4137 emit_opcode(cbuf,0xC4); 4138 emit_d8(cbuf,0x04); 4139 4140 // Carry on here... 4141 %} 4142 4143 enc_class AbsXF_encoding(regX dst) %{ 4144 address signmask_address=(address)float_signmask_pool; 4145 // andpd:\tANDPS $dst,[signconst] 4146 emit_opcode(cbuf, 0x0F); 4147 emit_opcode(cbuf, 0x54); 4148 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4149 emit_d32(cbuf, (int)signmask_address); 4150 %} 4151 4152 enc_class AbsXD_encoding(regXD dst) %{ 4153 address signmask_address=(address)double_signmask_pool; 4154 // andpd:\tANDPD $dst,[signconst] 4155 emit_opcode(cbuf, 0x66); 4156 emit_opcode(cbuf, 0x0F); 4157 emit_opcode(cbuf, 0x54); 4158 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4159 emit_d32(cbuf, (int)signmask_address); 4160 %} 4161 4162 enc_class NegXF_encoding(regX dst) %{ 4163 address signmask_address=(address)float_signflip_pool; 4164 // andpd:\tXORPS $dst,[signconst] 4165 emit_opcode(cbuf, 0x0F); 4166 emit_opcode(cbuf, 0x57); 4167 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4168 emit_d32(cbuf, (int)signmask_address); 4169 %} 4170 4171 enc_class NegXD_encoding(regXD dst) %{ 4172 address signmask_address=(address)double_signflip_pool; 4173 // andpd:\tXORPD $dst,[signconst] 4174 emit_opcode(cbuf, 0x66); 4175 emit_opcode(cbuf, 0x0F); 4176 emit_opcode(cbuf, 0x57); 4177 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4178 emit_d32(cbuf, (int)signmask_address); 4179 %} 4180 4181 enc_class FMul_ST_reg( eRegF src1 ) %{ 4182 // Operand was loaded from memory into fp ST (stack top) 4183 // FMUL ST,$src /* D8 C8+i */ 4184 emit_opcode(cbuf, 0xD8); 4185 emit_opcode(cbuf, 0xC8 + $src1$$reg); 4186 %} 4187 4188 enc_class FAdd_ST_reg( eRegF src2 ) %{ 4189 // FADDP ST,src2 /* D8 C0+i */ 4190 emit_opcode(cbuf, 0xD8); 4191 emit_opcode(cbuf, 0xC0 + $src2$$reg); 4192 //could use FADDP src2,fpST /* DE C0+i */ 4193 %} 4194 4195 enc_class FAddP_reg_ST( eRegF src2 ) %{ 4196 // FADDP src2,ST /* DE C0+i */ 4197 emit_opcode(cbuf, 0xDE); 4198 emit_opcode(cbuf, 0xC0 + $src2$$reg); 4199 %} 4200 4201 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 4202 // Operand has been loaded into fp ST (stack top) 4203 // FSUB ST,$src1 4204 emit_opcode(cbuf, 0xD8); 4205 emit_opcode(cbuf, 0xE0 + $src1$$reg); 4206 4207 // FDIV 4208 emit_opcode(cbuf, 0xD8); 4209 emit_opcode(cbuf, 0xF0 + $src2$$reg); 4210 %} 4211 4212 enc_class MulFAddF (eRegF src1, eRegF src2) %{ 4213 // Operand was loaded from memory into fp ST (stack top) 4214 // FADD ST,$src /* D8 C0+i */ 4215 emit_opcode(cbuf, 0xD8); 4216 emit_opcode(cbuf, 0xC0 + $src1$$reg); 4217 4218 // FMUL ST,src2 /* D8 C*+i */ 4219 emit_opcode(cbuf, 0xD8); 4220 emit_opcode(cbuf, 0xC8 + $src2$$reg); 4221 %} 4222 4223 4224 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 4225 // Operand was loaded from memory into fp ST (stack top) 4226 // FADD ST,$src /* D8 C0+i */ 4227 emit_opcode(cbuf, 0xD8); 4228 emit_opcode(cbuf, 0xC0 + $src1$$reg); 4229 4230 // FMULP src2,ST /* DE C8+i */ 4231 emit_opcode(cbuf, 0xDE); 4232 emit_opcode(cbuf, 0xC8 + $src2$$reg); 4233 %} 4234 4235 // Atomically load the volatile long 4236 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 4237 emit_opcode(cbuf,0xDF); 4238 int rm_byte_opcode = 0x05; 4239 int base = $mem$$base; 4240 int index = $mem$$index; 4241 int scale = $mem$$scale; 4242 int displace = $mem$$disp; 4243 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4244 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 4245 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 4246 %} 4247 4248 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{ 4249 { // Atomic long load 4250 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 4251 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 4252 emit_opcode(cbuf,0x0F); 4253 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 4254 int base = $mem$$base; 4255 int index = $mem$$index; 4256 int scale = $mem$$scale; 4257 int displace = $mem$$disp; 4258 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4259 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4260 } 4261 { // MOVSD $dst,$tmp ! atomic long store 4262 emit_opcode(cbuf,0xF2); 4263 emit_opcode(cbuf,0x0F); 4264 emit_opcode(cbuf,0x11); 4265 int base = $dst$$base; 4266 int index = $dst$$index; 4267 int scale = $dst$$scale; 4268 int displace = $dst$$disp; 4269 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals 4270 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4271 } 4272 %} 4273 4274 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{ 4275 { // Atomic long load 4276 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 4277 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 4278 emit_opcode(cbuf,0x0F); 4279 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 4280 int base = $mem$$base; 4281 int index = $mem$$index; 4282 int scale = $mem$$scale; 4283 int displace = $mem$$disp; 4284 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4285 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4286 } 4287 { // MOVD $dst.lo,$tmp 4288 emit_opcode(cbuf,0x66); 4289 emit_opcode(cbuf,0x0F); 4290 emit_opcode(cbuf,0x7E); 4291 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg); 4292 } 4293 { // PSRLQ $tmp,32 4294 emit_opcode(cbuf,0x66); 4295 emit_opcode(cbuf,0x0F); 4296 emit_opcode(cbuf,0x73); 4297 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg); 4298 emit_d8(cbuf, 0x20); 4299 } 4300 { // MOVD $dst.hi,$tmp 4301 emit_opcode(cbuf,0x66); 4302 emit_opcode(cbuf,0x0F); 4303 emit_opcode(cbuf,0x7E); 4304 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 4305 } 4306 %} 4307 4308 // Volatile Store Long. Must be atomic, so move it into 4309 // the FP TOS and then do a 64-bit FIST. Has to probe the 4310 // target address before the store (for null-ptr checks) 4311 // so the memory operand is used twice in the encoding. 4312 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 4313 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 4314 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 4315 emit_opcode(cbuf,0xDF); 4316 int rm_byte_opcode = 0x07; 4317 int base = $mem$$base; 4318 int index = $mem$$index; 4319 int scale = $mem$$scale; 4320 int displace = $mem$$disp; 4321 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4322 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 4323 %} 4324 4325 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{ 4326 { // Atomic long load 4327 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src] 4328 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 4329 emit_opcode(cbuf,0x0F); 4330 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 4331 int base = $src$$base; 4332 int index = $src$$index; 4333 int scale = $src$$scale; 4334 int displace = $src$$disp; 4335 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals 4336 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4337 } 4338 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop 4339 { // MOVSD $mem,$tmp ! atomic long store 4340 emit_opcode(cbuf,0xF2); 4341 emit_opcode(cbuf,0x0F); 4342 emit_opcode(cbuf,0x11); 4343 int base = $mem$$base; 4344 int index = $mem$$index; 4345 int scale = $mem$$scale; 4346 int displace = $mem$$disp; 4347 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4348 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4349 } 4350 %} 4351 4352 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{ 4353 { // MOVD $tmp,$src.lo 4354 emit_opcode(cbuf,0x66); 4355 emit_opcode(cbuf,0x0F); 4356 emit_opcode(cbuf,0x6E); 4357 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 4358 } 4359 { // MOVD $tmp2,$src.hi 4360 emit_opcode(cbuf,0x66); 4361 emit_opcode(cbuf,0x0F); 4362 emit_opcode(cbuf,0x6E); 4363 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg)); 4364 } 4365 { // PUNPCKLDQ $tmp,$tmp2 4366 emit_opcode(cbuf,0x66); 4367 emit_opcode(cbuf,0x0F); 4368 emit_opcode(cbuf,0x62); 4369 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg); 4370 } 4371 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop 4372 { // MOVSD $mem,$tmp ! atomic long store 4373 emit_opcode(cbuf,0xF2); 4374 emit_opcode(cbuf,0x0F); 4375 emit_opcode(cbuf,0x11); 4376 int base = $mem$$base; 4377 int index = $mem$$index; 4378 int scale = $mem$$scale; 4379 int displace = $mem$$disp; 4380 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4381 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4382 } 4383 %} 4384 4385 // Safepoint Poll. This polls the safepoint page, and causes an 4386 // exception if it is not readable. Unfortunately, it kills the condition code 4387 // in the process 4388 // We current use TESTL [spp],EDI 4389 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 4390 4391 enc_class Safepoint_Poll() %{ 4392 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 4393 emit_opcode(cbuf,0x85); 4394 emit_rm (cbuf, 0x0, 0x7, 0x5); 4395 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 4396 %} 4397%} 4398 4399 4400//----------FRAME-------------------------------------------------------------- 4401// Definition of frame structure and management information. 4402// 4403// S T A C K L A Y O U T Allocators stack-slot number 4404// | (to get allocators register number 4405// G Owned by | | v add OptoReg::stack0()) 4406// r CALLER | | 4407// o | +--------+ pad to even-align allocators stack-slot 4408// w V | pad0 | numbers; owned by CALLER 4409// t -----------+--------+----> Matcher::_in_arg_limit, unaligned 4410// h ^ | in | 5 4411// | | args | 4 Holes in incoming args owned by SELF 4412// | | | | 3 4413// | | +--------+ 4414// V | | old out| Empty on Intel, window on Sparc 4415// | old |preserve| Must be even aligned. 4416// | SP-+--------+----> Matcher::_old_SP, even aligned 4417// | | in | 3 area for Intel ret address 4418// Owned by |preserve| Empty on Sparc. 4419// SELF +--------+ 4420// | | pad2 | 2 pad to align old SP 4421// | +--------+ 1 4422// | | locks | 0 4423// | +--------+----> OptoReg::stack0(), even aligned 4424// | | pad1 | 11 pad to align new SP 4425// | +--------+ 4426// | | | 10 4427// | | spills | 9 spills 4428// V | | 8 (pad0 slot for callee) 4429// -----------+--------+----> Matcher::_out_arg_limit, unaligned 4430// ^ | out | 7 4431// | | args | 6 Holes in outgoing args owned by CALLEE 4432// Owned by +--------+ 4433// CALLEE | new out| 6 Empty on Intel, window on Sparc 4434// | new |preserve| Must be even-aligned. 4435// | SP-+--------+----> Matcher::_new_SP, even aligned 4436// | | | 4437// 4438// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 4439// known from SELF's arguments and the Java calling convention. 4440// Region 6-7 is determined per call site. 4441// Note 2: If the calling convention leaves holes in the incoming argument 4442// area, those holes are owned by SELF. Holes in the outgoing area 4443// are owned by the CALLEE. Holes should not be nessecary in the 4444// incoming area, as the Java calling convention is completely under 4445// the control of the AD file. Doubles can be sorted and packed to 4446// avoid holes. Holes in the outgoing arguments may be nessecary for 4447// varargs C calling conventions. 4448// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 4449// even aligned with pad0 as needed. 4450// Region 6 is even aligned. Region 6-7 is NOT even aligned; 4451// region 6-11 is even aligned; it may be padded out more so that 4452// the region from SP to FP meets the minimum stack alignment. 4453 4454frame %{ 4455 // What direction does stack grow in (assumed to be same for C & Java) 4456 stack_direction(TOWARDS_LOW); 4457 4458 // These three registers define part of the calling convention 4459 // between compiled code and the interpreter. 4460 inline_cache_reg(EAX); // Inline Cache Register 4461 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 4462 4463 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 4464 cisc_spilling_operand_name(indOffset32); 4465 4466 // Number of stack slots consumed by locking an object 4467 sync_stack_slots(1); 4468 4469 // Compiled code's Frame Pointer 4470 frame_pointer(ESP); 4471 // Interpreter stores its frame pointer in a register which is 4472 // stored to the stack by I2CAdaptors. 4473 // I2CAdaptors convert from interpreted java to compiled java. 4474 interpreter_frame_pointer(EBP); 4475 4476 // Stack alignment requirement 4477 // Alignment size in bytes (128-bit -> 16 bytes) 4478 stack_alignment(StackAlignmentInBytes); 4479 4480 // Number of stack slots between incoming argument block and the start of 4481 // a new frame. The PROLOG must add this many slots to the stack. The 4482 // EPILOG must remove this many slots. Intel needs one slot for 4483 // return address and one for rbp, (must save rbp) 4484 in_preserve_stack_slots(2+VerifyStackAtCalls); 4485 4486 // Number of outgoing stack slots killed above the out_preserve_stack_slots 4487 // for calls to C. Supports the var-args backing area for register parms. 4488 varargs_C_out_slots_killed(0); 4489 4490 // The after-PROLOG location of the return address. Location of 4491 // return address specifies a type (REG or STACK) and a number 4492 // representing the register number (i.e. - use a register name) or 4493 // stack slot. 4494 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 4495 // Otherwise, it is above the locks and verification slot and alignment word 4496 return_addr(STACK - 1 + 4497 round_to(1+VerifyStackAtCalls+ 4498 Compile::current()->fixed_slots(), 4499 (StackAlignmentInBytes/wordSize))); 4500 4501 // Body of function which returns an integer array locating 4502 // arguments either in registers or in stack slots. Passed an array 4503 // of ideal registers called "sig" and a "length" count. Stack-slot 4504 // offsets are based on outgoing arguments, i.e. a CALLER setting up 4505 // arguments for a CALLEE. Incoming stack arguments are 4506 // automatically biased by the preserve_stack_slots field above. 4507 calling_convention %{ 4508 // No difference between ingoing/outgoing just pass false 4509 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 4510 %} 4511 4512 4513 // Body of function which returns an integer array locating 4514 // arguments either in registers or in stack slots. Passed an array 4515 // of ideal registers called "sig" and a "length" count. Stack-slot 4516 // offsets are based on outgoing arguments, i.e. a CALLER setting up 4517 // arguments for a CALLEE. Incoming stack arguments are 4518 // automatically biased by the preserve_stack_slots field above. 4519 c_calling_convention %{ 4520 // This is obviously always outgoing 4521 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); 4522 %} 4523 4524 // Location of C & interpreter return values 4525 c_return_value %{ 4526 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 4527 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 4528 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 4529 4530 // in SSE2+ mode we want to keep the FPU stack clean so pretend 4531 // that C functions return float and double results in XMM0. 4532 if( ideal_reg == Op_RegD && UseSSE>=2 ) 4533 return OptoRegPair(XMM0b_num,XMM0a_num); 4534 if( ideal_reg == Op_RegF && UseSSE>=2 ) 4535 return OptoRegPair(OptoReg::Bad,XMM0a_num); 4536 4537 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 4538 %} 4539 4540 // Location of return values 4541 return_value %{ 4542 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 4543 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 4544 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 4545 if( ideal_reg == Op_RegD && UseSSE>=2 ) 4546 return OptoRegPair(XMM0b_num,XMM0a_num); 4547 if( ideal_reg == Op_RegF && UseSSE>=1 ) 4548 return OptoRegPair(OptoReg::Bad,XMM0a_num); 4549 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 4550 %} 4551 4552%} 4553 4554//----------ATTRIBUTES--------------------------------------------------------- 4555//----------Operand Attributes------------------------------------------------- 4556op_attrib op_cost(0); // Required cost attribute 4557 4558//----------Instruction Attributes--------------------------------------------- 4559ins_attrib ins_cost(100); // Required cost attribute 4560ins_attrib ins_size(8); // Required size attribute (in bits) 4561ins_attrib ins_pc_relative(0); // Required PC Relative flag 4562ins_attrib ins_short_branch(0); // Required flag: is this instruction a 4563 // non-matching short branch variant of some 4564 // long branch? 4565ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 4566 // specifies the alignment that some part of the instruction (not 4567 // necessarily the start) requires. If > 1, a compute_padding() 4568 // function must be provided for the instruction 4569 4570//----------OPERANDS----------------------------------------------------------- 4571// Operand definitions must precede instruction definitions for correct parsing 4572// in the ADLC because operands constitute user defined types which are used in 4573// instruction definitions. 4574 4575//----------Simple Operands---------------------------------------------------- 4576// Immediate Operands 4577// Integer Immediate 4578operand immI() %{ 4579 match(ConI); 4580 4581 op_cost(10); 4582 format %{ %} 4583 interface(CONST_INTER); 4584%} 4585 4586// Constant for test vs zero 4587operand immI0() %{ 4588 predicate(n->get_int() == 0); 4589 match(ConI); 4590 4591 op_cost(0); 4592 format %{ %} 4593 interface(CONST_INTER); 4594%} 4595 4596// Constant for increment 4597operand immI1() %{ 4598 predicate(n->get_int() == 1); 4599 match(ConI); 4600 4601 op_cost(0); 4602 format %{ %} 4603 interface(CONST_INTER); 4604%} 4605 4606// Constant for decrement 4607operand immI_M1() %{ 4608 predicate(n->get_int() == -1); 4609 match(ConI); 4610 4611 op_cost(0); 4612 format %{ %} 4613 interface(CONST_INTER); 4614%} 4615 4616// Valid scale values for addressing modes 4617operand immI2() %{ 4618 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 4619 match(ConI); 4620 4621 format %{ %} 4622 interface(CONST_INTER); 4623%} 4624 4625operand immI8() %{ 4626 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 4627 match(ConI); 4628 4629 op_cost(5); 4630 format %{ %} 4631 interface(CONST_INTER); 4632%} 4633 4634operand immI16() %{ 4635 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 4636 match(ConI); 4637 4638 op_cost(10); 4639 format %{ %} 4640 interface(CONST_INTER); 4641%} 4642 4643// Constant for long shifts 4644operand immI_32() %{ 4645 predicate( n->get_int() == 32 ); 4646 match(ConI); 4647 4648 op_cost(0); 4649 format %{ %} 4650 interface(CONST_INTER); 4651%} 4652 4653operand immI_1_31() %{ 4654 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 4655 match(ConI); 4656 4657 op_cost(0); 4658 format %{ %} 4659 interface(CONST_INTER); 4660%} 4661 4662operand immI_32_63() %{ 4663 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 4664 match(ConI); 4665 op_cost(0); 4666 4667 format %{ %} 4668 interface(CONST_INTER); 4669%} 4670 4671operand immI_1() %{ 4672 predicate( n->get_int() == 1 ); 4673 match(ConI); 4674 4675 op_cost(0); 4676 format %{ %} 4677 interface(CONST_INTER); 4678%} 4679 4680operand immI_2() %{ 4681 predicate( n->get_int() == 2 ); 4682 match(ConI); 4683 4684 op_cost(0); 4685 format %{ %} 4686 interface(CONST_INTER); 4687%} 4688 4689operand immI_3() %{ 4690 predicate( n->get_int() == 3 ); 4691 match(ConI); 4692 4693 op_cost(0); 4694 format %{ %} 4695 interface(CONST_INTER); 4696%} 4697 4698// Pointer Immediate 4699operand immP() %{ 4700 match(ConP); 4701 4702 op_cost(10); 4703 format %{ %} 4704 interface(CONST_INTER); 4705%} 4706 4707// NULL Pointer Immediate 4708operand immP0() %{ 4709 predicate( n->get_ptr() == 0 ); 4710 match(ConP); 4711 op_cost(0); 4712 4713 format %{ %} 4714 interface(CONST_INTER); 4715%} 4716 4717// Long Immediate 4718operand immL() %{ 4719 match(ConL); 4720 4721 op_cost(20); 4722 format %{ %} 4723 interface(CONST_INTER); 4724%} 4725 4726// Long Immediate zero 4727operand immL0() %{ 4728 predicate( n->get_long() == 0L ); 4729 match(ConL); 4730 op_cost(0); 4731 4732 format %{ %} 4733 interface(CONST_INTER); 4734%} 4735 4736// Long Immediate zero 4737operand immL_M1() %{ 4738 predicate( n->get_long() == -1L ); 4739 match(ConL); 4740 op_cost(0); 4741 4742 format %{ %} 4743 interface(CONST_INTER); 4744%} 4745 4746// Long immediate from 0 to 127. 4747// Used for a shorter form of long mul by 10. 4748operand immL_127() %{ 4749 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 4750 match(ConL); 4751 op_cost(0); 4752 4753 format %{ %} 4754 interface(CONST_INTER); 4755%} 4756 4757// Long Immediate: low 32-bit mask 4758operand immL_32bits() %{ 4759 predicate(n->get_long() == 0xFFFFFFFFL); 4760 match(ConL); 4761 op_cost(0); 4762 4763 format %{ %} 4764 interface(CONST_INTER); 4765%} 4766 4767// Long Immediate: low 32-bit mask 4768operand immL32() %{ 4769 predicate(n->get_long() == (int)(n->get_long())); 4770 match(ConL); 4771 op_cost(20); 4772 4773 format %{ %} 4774 interface(CONST_INTER); 4775%} 4776 4777//Double Immediate zero 4778operand immD0() %{ 4779 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4780 // bug that generates code such that NaNs compare equal to 0.0 4781 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 4782 match(ConD); 4783 4784 op_cost(5); 4785 format %{ %} 4786 interface(CONST_INTER); 4787%} 4788 4789// Double Immediate 4790operand immD1() %{ 4791 predicate( UseSSE<=1 && n->getd() == 1.0 ); 4792 match(ConD); 4793 4794 op_cost(5); 4795 format %{ %} 4796 interface(CONST_INTER); 4797%} 4798 4799// Double Immediate 4800operand immD() %{ 4801 predicate(UseSSE<=1); 4802 match(ConD); 4803 4804 op_cost(5); 4805 format %{ %} 4806 interface(CONST_INTER); 4807%} 4808 4809operand immXD() %{ 4810 predicate(UseSSE>=2); 4811 match(ConD); 4812 4813 op_cost(5); 4814 format %{ %} 4815 interface(CONST_INTER); 4816%} 4817 4818// Double Immediate zero 4819operand immXD0() %{ 4820 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4821 // bug that generates code such that NaNs compare equal to 0.0 AND do not 4822 // compare equal to -0.0. 4823 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 4824 match(ConD); 4825 4826 format %{ %} 4827 interface(CONST_INTER); 4828%} 4829 4830// Float Immediate zero 4831operand immF0() %{ 4832 predicate( UseSSE == 0 && n->getf() == 0.0 ); 4833 match(ConF); 4834 4835 op_cost(5); 4836 format %{ %} 4837 interface(CONST_INTER); 4838%} 4839 4840// Float Immediate 4841operand immF() %{ 4842 predicate( UseSSE == 0 ); 4843 match(ConF); 4844 4845 op_cost(5); 4846 format %{ %} 4847 interface(CONST_INTER); 4848%} 4849 4850// Float Immediate 4851operand immXF() %{ 4852 predicate(UseSSE >= 1); 4853 match(ConF); 4854 4855 op_cost(5); 4856 format %{ %} 4857 interface(CONST_INTER); 4858%} 4859 4860// Float Immediate zero. Zero and not -0.0 4861operand immXF0() %{ 4862 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 4863 match(ConF); 4864 4865 op_cost(5); 4866 format %{ %} 4867 interface(CONST_INTER); 4868%} 4869 4870// Immediates for special shifts (sign extend) 4871 4872// Constants for increment 4873operand immI_16() %{ 4874 predicate( n->get_int() == 16 ); 4875 match(ConI); 4876 4877 format %{ %} 4878 interface(CONST_INTER); 4879%} 4880 4881operand immI_24() %{ 4882 predicate( n->get_int() == 24 ); 4883 match(ConI); 4884 4885 format %{ %} 4886 interface(CONST_INTER); 4887%} 4888 4889// Constant for byte-wide masking 4890operand immI_255() %{ 4891 predicate( n->get_int() == 255 ); 4892 match(ConI); 4893 4894 format %{ %} 4895 interface(CONST_INTER); 4896%} 4897 4898// Constant for short-wide masking 4899operand immI_65535() %{ 4900 predicate(n->get_int() == 65535); 4901 match(ConI); 4902 4903 format %{ %} 4904 interface(CONST_INTER); 4905%} 4906 4907// Register Operands 4908// Integer Register 4909operand eRegI() %{ 4910 constraint(ALLOC_IN_RC(e_reg)); 4911 match(RegI); 4912 match(xRegI); 4913 match(eAXRegI); 4914 match(eBXRegI); 4915 match(eCXRegI); 4916 match(eDXRegI); 4917 match(eDIRegI); 4918 match(eSIRegI); 4919 4920 format %{ %} 4921 interface(REG_INTER); 4922%} 4923 4924// Subset of Integer Register 4925operand xRegI(eRegI reg) %{ 4926 constraint(ALLOC_IN_RC(x_reg)); 4927 match(reg); 4928 match(eAXRegI); 4929 match(eBXRegI); 4930 match(eCXRegI); 4931 match(eDXRegI); 4932 4933 format %{ %} 4934 interface(REG_INTER); 4935%} 4936 4937// Special Registers 4938operand eAXRegI(xRegI reg) %{ 4939 constraint(ALLOC_IN_RC(eax_reg)); 4940 match(reg); 4941 match(eRegI); 4942 4943 format %{ "EAX" %} 4944 interface(REG_INTER); 4945%} 4946 4947// Special Registers 4948operand eBXRegI(xRegI reg) %{ 4949 constraint(ALLOC_IN_RC(ebx_reg)); 4950 match(reg); 4951 match(eRegI); 4952 4953 format %{ "EBX" %} 4954 interface(REG_INTER); 4955%} 4956 4957operand eCXRegI(xRegI reg) %{ 4958 constraint(ALLOC_IN_RC(ecx_reg)); 4959 match(reg); 4960 match(eRegI); 4961 4962 format %{ "ECX" %} 4963 interface(REG_INTER); 4964%} 4965 4966operand eDXRegI(xRegI reg) %{ 4967 constraint(ALLOC_IN_RC(edx_reg)); 4968 match(reg); 4969 match(eRegI); 4970 4971 format %{ "EDX" %} 4972 interface(REG_INTER); 4973%} 4974 4975operand eDIRegI(xRegI reg) %{ 4976 constraint(ALLOC_IN_RC(edi_reg)); 4977 match(reg); 4978 match(eRegI); 4979 4980 format %{ "EDI" %} 4981 interface(REG_INTER); 4982%} 4983 4984operand naxRegI() %{ 4985 constraint(ALLOC_IN_RC(nax_reg)); 4986 match(RegI); 4987 match(eCXRegI); 4988 match(eDXRegI); 4989 match(eSIRegI); 4990 match(eDIRegI); 4991 4992 format %{ %} 4993 interface(REG_INTER); 4994%} 4995 4996operand nadxRegI() %{ 4997 constraint(ALLOC_IN_RC(nadx_reg)); 4998 match(RegI); 4999 match(eBXRegI); 5000 match(eCXRegI); 5001 match(eSIRegI); 5002 match(eDIRegI); 5003 5004 format %{ %} 5005 interface(REG_INTER); 5006%} 5007 5008operand ncxRegI() %{ 5009 constraint(ALLOC_IN_RC(ncx_reg)); 5010 match(RegI); 5011 match(eAXRegI); 5012 match(eDXRegI); 5013 match(eSIRegI); 5014 match(eDIRegI); 5015 5016 format %{ %} 5017 interface(REG_INTER); 5018%} 5019 5020// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 5021// // 5022operand eSIRegI(xRegI reg) %{ 5023 constraint(ALLOC_IN_RC(esi_reg)); 5024 match(reg); 5025 match(eRegI); 5026 5027 format %{ "ESI" %} 5028 interface(REG_INTER); 5029%} 5030 5031// Pointer Register 5032operand anyRegP() %{ 5033 constraint(ALLOC_IN_RC(any_reg)); 5034 match(RegP); 5035 match(eAXRegP); 5036 match(eBXRegP); 5037 match(eCXRegP); 5038 match(eDIRegP); 5039 match(eRegP); 5040 5041 format %{ %} 5042 interface(REG_INTER); 5043%} 5044 5045operand eRegP() %{ 5046 constraint(ALLOC_IN_RC(e_reg)); 5047 match(RegP); 5048 match(eAXRegP); 5049 match(eBXRegP); 5050 match(eCXRegP); 5051 match(eDIRegP); 5052 5053 format %{ %} 5054 interface(REG_INTER); 5055%} 5056 5057// On windows95, EBP is not safe to use for implicit null tests. 5058operand eRegP_no_EBP() %{ 5059 constraint(ALLOC_IN_RC(e_reg_no_rbp)); 5060 match(RegP); 5061 match(eAXRegP); 5062 match(eBXRegP); 5063 match(eCXRegP); 5064 match(eDIRegP); 5065 5066 op_cost(100); 5067 format %{ %} 5068 interface(REG_INTER); 5069%} 5070 5071operand naxRegP() %{ 5072 constraint(ALLOC_IN_RC(nax_reg)); 5073 match(RegP); 5074 match(eBXRegP); 5075 match(eDXRegP); 5076 match(eCXRegP); 5077 match(eSIRegP); 5078 match(eDIRegP); 5079 5080 format %{ %} 5081 interface(REG_INTER); 5082%} 5083 5084operand nabxRegP() %{ 5085 constraint(ALLOC_IN_RC(nabx_reg)); 5086 match(RegP); 5087 match(eCXRegP); 5088 match(eDXRegP); 5089 match(eSIRegP); 5090 match(eDIRegP); 5091 5092 format %{ %} 5093 interface(REG_INTER); 5094%} 5095 5096operand pRegP() %{ 5097 constraint(ALLOC_IN_RC(p_reg)); 5098 match(RegP); 5099 match(eBXRegP); 5100 match(eDXRegP); 5101 match(eSIRegP); 5102 match(eDIRegP); 5103 5104 format %{ %} 5105 interface(REG_INTER); 5106%} 5107 5108// Special Registers 5109// Return a pointer value 5110operand eAXRegP(eRegP reg) %{ 5111 constraint(ALLOC_IN_RC(eax_reg)); 5112 match(reg); 5113 format %{ "EAX" %} 5114 interface(REG_INTER); 5115%} 5116 5117// Used in AtomicAdd 5118operand eBXRegP(eRegP reg) %{ 5119 constraint(ALLOC_IN_RC(ebx_reg)); 5120 match(reg); 5121 format %{ "EBX" %} 5122 interface(REG_INTER); 5123%} 5124 5125// Tail-call (interprocedural jump) to interpreter 5126operand eCXRegP(eRegP reg) %{ 5127 constraint(ALLOC_IN_RC(ecx_reg)); 5128 match(reg); 5129 format %{ "ECX" %} 5130 interface(REG_INTER); 5131%} 5132 5133operand eSIRegP(eRegP reg) %{ 5134 constraint(ALLOC_IN_RC(esi_reg)); 5135 match(reg); 5136 format %{ "ESI" %} 5137 interface(REG_INTER); 5138%} 5139 5140// Used in rep stosw 5141operand eDIRegP(eRegP reg) %{ 5142 constraint(ALLOC_IN_RC(edi_reg)); 5143 match(reg); 5144 format %{ "EDI" %} 5145 interface(REG_INTER); 5146%} 5147 5148operand eBPRegP() %{ 5149 constraint(ALLOC_IN_RC(ebp_reg)); 5150 match(RegP); 5151 format %{ "EBP" %} 5152 interface(REG_INTER); 5153%} 5154 5155operand eRegL() %{ 5156 constraint(ALLOC_IN_RC(long_reg)); 5157 match(RegL); 5158 match(eADXRegL); 5159 5160 format %{ %} 5161 interface(REG_INTER); 5162%} 5163 5164operand eADXRegL( eRegL reg ) %{ 5165 constraint(ALLOC_IN_RC(eadx_reg)); 5166 match(reg); 5167 5168 format %{ "EDX:EAX" %} 5169 interface(REG_INTER); 5170%} 5171 5172operand eBCXRegL( eRegL reg ) %{ 5173 constraint(ALLOC_IN_RC(ebcx_reg)); 5174 match(reg); 5175 5176 format %{ "EBX:ECX" %} 5177 interface(REG_INTER); 5178%} 5179 5180// Special case for integer high multiply 5181operand eADXRegL_low_only() %{ 5182 constraint(ALLOC_IN_RC(eadx_reg)); 5183 match(RegL); 5184 5185 format %{ "EAX" %} 5186 interface(REG_INTER); 5187%} 5188 5189// Flags register, used as output of compare instructions 5190operand eFlagsReg() %{ 5191 constraint(ALLOC_IN_RC(int_flags)); 5192 match(RegFlags); 5193 5194 format %{ "EFLAGS" %} 5195 interface(REG_INTER); 5196%} 5197 5198// Flags register, used as output of FLOATING POINT compare instructions 5199operand eFlagsRegU() %{ 5200 constraint(ALLOC_IN_RC(int_flags)); 5201 match(RegFlags); 5202 5203 format %{ "EFLAGS_U" %} 5204 interface(REG_INTER); 5205%} 5206 5207operand eFlagsRegUCF() %{ 5208 constraint(ALLOC_IN_RC(int_flags)); 5209 match(RegFlags); 5210 predicate(false); 5211 5212 format %{ "EFLAGS_U_CF" %} 5213 interface(REG_INTER); 5214%} 5215 5216// Condition Code Register used by long compare 5217operand flagsReg_long_LTGE() %{ 5218 constraint(ALLOC_IN_RC(int_flags)); 5219 match(RegFlags); 5220 format %{ "FLAGS_LTGE" %} 5221 interface(REG_INTER); 5222%} 5223operand flagsReg_long_EQNE() %{ 5224 constraint(ALLOC_IN_RC(int_flags)); 5225 match(RegFlags); 5226 format %{ "FLAGS_EQNE" %} 5227 interface(REG_INTER); 5228%} 5229operand flagsReg_long_LEGT() %{ 5230 constraint(ALLOC_IN_RC(int_flags)); 5231 match(RegFlags); 5232 format %{ "FLAGS_LEGT" %} 5233 interface(REG_INTER); 5234%} 5235 5236// Float register operands 5237operand regD() %{ 5238 predicate( UseSSE < 2 ); 5239 constraint(ALLOC_IN_RC(dbl_reg)); 5240 match(RegD); 5241 match(regDPR1); 5242 match(regDPR2); 5243 format %{ %} 5244 interface(REG_INTER); 5245%} 5246 5247operand regDPR1(regD reg) %{ 5248 predicate( UseSSE < 2 ); 5249 constraint(ALLOC_IN_RC(dbl_reg0)); 5250 match(reg); 5251 format %{ "FPR1" %} 5252 interface(REG_INTER); 5253%} 5254 5255operand regDPR2(regD reg) %{ 5256 predicate( UseSSE < 2 ); 5257 constraint(ALLOC_IN_RC(dbl_reg1)); 5258 match(reg); 5259 format %{ "FPR2" %} 5260 interface(REG_INTER); 5261%} 5262 5263operand regnotDPR1(regD reg) %{ 5264 predicate( UseSSE < 2 ); 5265 constraint(ALLOC_IN_RC(dbl_notreg0)); 5266 match(reg); 5267 format %{ %} 5268 interface(REG_INTER); 5269%} 5270 5271// XMM Double register operands 5272operand regXD() %{ 5273 predicate( UseSSE>=2 ); 5274 constraint(ALLOC_IN_RC(xdb_reg)); 5275 match(RegD); 5276 match(regXD6); 5277 match(regXD7); 5278 format %{ %} 5279 interface(REG_INTER); 5280%} 5281 5282// XMM6 double register operands 5283operand regXD6(regXD reg) %{ 5284 predicate( UseSSE>=2 ); 5285 constraint(ALLOC_IN_RC(xdb_reg6)); 5286 match(reg); 5287 format %{ "XMM6" %} 5288 interface(REG_INTER); 5289%} 5290 5291// XMM7 double register operands 5292operand regXD7(regXD reg) %{ 5293 predicate( UseSSE>=2 ); 5294 constraint(ALLOC_IN_RC(xdb_reg7)); 5295 match(reg); 5296 format %{ "XMM7" %} 5297 interface(REG_INTER); 5298%} 5299 5300// Float register operands 5301operand regF() %{ 5302 predicate( UseSSE < 2 ); 5303 constraint(ALLOC_IN_RC(flt_reg)); 5304 match(RegF); 5305 match(regFPR1); 5306 format %{ %} 5307 interface(REG_INTER); 5308%} 5309 5310// Float register operands 5311operand regFPR1(regF reg) %{ 5312 predicate( UseSSE < 2 ); 5313 constraint(ALLOC_IN_RC(flt_reg0)); 5314 match(reg); 5315 format %{ "FPR1" %} 5316 interface(REG_INTER); 5317%} 5318 5319// XMM register operands 5320operand regX() %{ 5321 predicate( UseSSE>=1 ); 5322 constraint(ALLOC_IN_RC(xmm_reg)); 5323 match(RegF); 5324 format %{ %} 5325 interface(REG_INTER); 5326%} 5327 5328 5329//----------Memory Operands---------------------------------------------------- 5330// Direct Memory Operand 5331operand direct(immP addr) %{ 5332 match(addr); 5333 5334 format %{ "[$addr]" %} 5335 interface(MEMORY_INTER) %{ 5336 base(0xFFFFFFFF); 5337 index(0x4); 5338 scale(0x0); 5339 disp($addr); 5340 %} 5341%} 5342 5343// Indirect Memory Operand 5344operand indirect(eRegP reg) %{ 5345 constraint(ALLOC_IN_RC(e_reg)); 5346 match(reg); 5347 5348 format %{ "[$reg]" %} 5349 interface(MEMORY_INTER) %{ 5350 base($reg); 5351 index(0x4); 5352 scale(0x0); 5353 disp(0x0); 5354 %} 5355%} 5356 5357// Indirect Memory Plus Short Offset Operand 5358operand indOffset8(eRegP reg, immI8 off) %{ 5359 match(AddP reg off); 5360 5361 format %{ "[$reg + $off]" %} 5362 interface(MEMORY_INTER) %{ 5363 base($reg); 5364 index(0x4); 5365 scale(0x0); 5366 disp($off); 5367 %} 5368%} 5369 5370// Indirect Memory Plus Long Offset Operand 5371operand indOffset32(eRegP reg, immI off) %{ 5372 match(AddP reg off); 5373 5374 format %{ "[$reg + $off]" %} 5375 interface(MEMORY_INTER) %{ 5376 base($reg); 5377 index(0x4); 5378 scale(0x0); 5379 disp($off); 5380 %} 5381%} 5382 5383// Indirect Memory Plus Long Offset Operand 5384operand indOffset32X(eRegI reg, immP off) %{ 5385 match(AddP off reg); 5386 5387 format %{ "[$reg + $off]" %} 5388 interface(MEMORY_INTER) %{ 5389 base($reg); 5390 index(0x4); 5391 scale(0x0); 5392 disp($off); 5393 %} 5394%} 5395 5396// Indirect Memory Plus Index Register Plus Offset Operand 5397operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{ 5398 match(AddP (AddP reg ireg) off); 5399 5400 op_cost(10); 5401 format %{"[$reg + $off + $ireg]" %} 5402 interface(MEMORY_INTER) %{ 5403 base($reg); 5404 index($ireg); 5405 scale(0x0); 5406 disp($off); 5407 %} 5408%} 5409 5410// Indirect Memory Plus Index Register Plus Offset Operand 5411operand indIndex(eRegP reg, eRegI ireg) %{ 5412 match(AddP reg ireg); 5413 5414 op_cost(10); 5415 format %{"[$reg + $ireg]" %} 5416 interface(MEMORY_INTER) %{ 5417 base($reg); 5418 index($ireg); 5419 scale(0x0); 5420 disp(0x0); 5421 %} 5422%} 5423 5424// // ------------------------------------------------------------------------- 5425// // 486 architecture doesn't support "scale * index + offset" with out a base 5426// // ------------------------------------------------------------------------- 5427// // Scaled Memory Operands 5428// // Indirect Memory Times Scale Plus Offset Operand 5429// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{ 5430// match(AddP off (LShiftI ireg scale)); 5431// 5432// op_cost(10); 5433// format %{"[$off + $ireg << $scale]" %} 5434// interface(MEMORY_INTER) %{ 5435// base(0x4); 5436// index($ireg); 5437// scale($scale); 5438// disp($off); 5439// %} 5440// %} 5441 5442// Indirect Memory Times Scale Plus Index Register 5443operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{ 5444 match(AddP reg (LShiftI ireg scale)); 5445 5446 op_cost(10); 5447 format %{"[$reg + $ireg << $scale]" %} 5448 interface(MEMORY_INTER) %{ 5449 base($reg); 5450 index($ireg); 5451 scale($scale); 5452 disp(0x0); 5453 %} 5454%} 5455 5456// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 5457operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{ 5458 match(AddP (AddP reg (LShiftI ireg scale)) off); 5459 5460 op_cost(10); 5461 format %{"[$reg + $off + $ireg << $scale]" %} 5462 interface(MEMORY_INTER) %{ 5463 base($reg); 5464 index($ireg); 5465 scale($scale); 5466 disp($off); 5467 %} 5468%} 5469 5470//----------Load Long Memory Operands------------------------------------------ 5471// The load-long idiom will use it's address expression again after loading 5472// the first word of the long. If the load-long destination overlaps with 5473// registers used in the addressing expression, the 2nd half will be loaded 5474// from a clobbered address. Fix this by requiring that load-long use 5475// address registers that do not overlap with the load-long target. 5476 5477// load-long support 5478operand load_long_RegP() %{ 5479 constraint(ALLOC_IN_RC(esi_reg)); 5480 match(RegP); 5481 match(eSIRegP); 5482 op_cost(100); 5483 format %{ %} 5484 interface(REG_INTER); 5485%} 5486 5487// Indirect Memory Operand Long 5488operand load_long_indirect(load_long_RegP reg) %{ 5489 constraint(ALLOC_IN_RC(esi_reg)); 5490 match(reg); 5491 5492 format %{ "[$reg]" %} 5493 interface(MEMORY_INTER) %{ 5494 base($reg); 5495 index(0x4); 5496 scale(0x0); 5497 disp(0x0); 5498 %} 5499%} 5500 5501// Indirect Memory Plus Long Offset Operand 5502operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 5503 match(AddP reg off); 5504 5505 format %{ "[$reg + $off]" %} 5506 interface(MEMORY_INTER) %{ 5507 base($reg); 5508 index(0x4); 5509 scale(0x0); 5510 disp($off); 5511 %} 5512%} 5513 5514opclass load_long_memory(load_long_indirect, load_long_indOffset32); 5515 5516 5517//----------Special Memory Operands-------------------------------------------- 5518// Stack Slot Operand - This operand is used for loading and storing temporary 5519// values on the stack where a match requires a value to 5520// flow through memory. 5521operand stackSlotP(sRegP reg) %{ 5522 constraint(ALLOC_IN_RC(stack_slots)); 5523 // No match rule because this operand is only generated in matching 5524 format %{ "[$reg]" %} 5525 interface(MEMORY_INTER) %{ 5526 base(0x4); // ESP 5527 index(0x4); // No Index 5528 scale(0x0); // No Scale 5529 disp($reg); // Stack Offset 5530 %} 5531%} 5532 5533operand stackSlotI(sRegI reg) %{ 5534 constraint(ALLOC_IN_RC(stack_slots)); 5535 // No match rule because this operand is only generated in matching 5536 format %{ "[$reg]" %} 5537 interface(MEMORY_INTER) %{ 5538 base(0x4); // ESP 5539 index(0x4); // No Index 5540 scale(0x0); // No Scale 5541 disp($reg); // Stack Offset 5542 %} 5543%} 5544 5545operand stackSlotF(sRegF reg) %{ 5546 constraint(ALLOC_IN_RC(stack_slots)); 5547 // No match rule because this operand is only generated in matching 5548 format %{ "[$reg]" %} 5549 interface(MEMORY_INTER) %{ 5550 base(0x4); // ESP 5551 index(0x4); // No Index 5552 scale(0x0); // No Scale 5553 disp($reg); // Stack Offset 5554 %} 5555%} 5556 5557operand stackSlotD(sRegD reg) %{ 5558 constraint(ALLOC_IN_RC(stack_slots)); 5559 // No match rule because this operand is only generated in matching 5560 format %{ "[$reg]" %} 5561 interface(MEMORY_INTER) %{ 5562 base(0x4); // ESP 5563 index(0x4); // No Index 5564 scale(0x0); // No Scale 5565 disp($reg); // Stack Offset 5566 %} 5567%} 5568 5569operand stackSlotL(sRegL reg) %{ 5570 constraint(ALLOC_IN_RC(stack_slots)); 5571 // No match rule because this operand is only generated in matching 5572 format %{ "[$reg]" %} 5573 interface(MEMORY_INTER) %{ 5574 base(0x4); // ESP 5575 index(0x4); // No Index 5576 scale(0x0); // No Scale 5577 disp($reg); // Stack Offset 5578 %} 5579%} 5580 5581//----------Memory Operands - Win95 Implicit Null Variants---------------- 5582// Indirect Memory Operand 5583operand indirect_win95_safe(eRegP_no_EBP reg) 5584%{ 5585 constraint(ALLOC_IN_RC(e_reg)); 5586 match(reg); 5587 5588 op_cost(100); 5589 format %{ "[$reg]" %} 5590 interface(MEMORY_INTER) %{ 5591 base($reg); 5592 index(0x4); 5593 scale(0x0); 5594 disp(0x0); 5595 %} 5596%} 5597 5598// Indirect Memory Plus Short Offset Operand 5599operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 5600%{ 5601 match(AddP reg off); 5602 5603 op_cost(100); 5604 format %{ "[$reg + $off]" %} 5605 interface(MEMORY_INTER) %{ 5606 base($reg); 5607 index(0x4); 5608 scale(0x0); 5609 disp($off); 5610 %} 5611%} 5612 5613// Indirect Memory Plus Long Offset Operand 5614operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 5615%{ 5616 match(AddP reg off); 5617 5618 op_cost(100); 5619 format %{ "[$reg + $off]" %} 5620 interface(MEMORY_INTER) %{ 5621 base($reg); 5622 index(0x4); 5623 scale(0x0); 5624 disp($off); 5625 %} 5626%} 5627 5628// Indirect Memory Plus Index Register Plus Offset Operand 5629operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off) 5630%{ 5631 match(AddP (AddP reg ireg) off); 5632 5633 op_cost(100); 5634 format %{"[$reg + $off + $ireg]" %} 5635 interface(MEMORY_INTER) %{ 5636 base($reg); 5637 index($ireg); 5638 scale(0x0); 5639 disp($off); 5640 %} 5641%} 5642 5643// Indirect Memory Times Scale Plus Index Register 5644operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale) 5645%{ 5646 match(AddP reg (LShiftI ireg scale)); 5647 5648 op_cost(100); 5649 format %{"[$reg + $ireg << $scale]" %} 5650 interface(MEMORY_INTER) %{ 5651 base($reg); 5652 index($ireg); 5653 scale($scale); 5654 disp(0x0); 5655 %} 5656%} 5657 5658// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 5659operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale) 5660%{ 5661 match(AddP (AddP reg (LShiftI ireg scale)) off); 5662 5663 op_cost(100); 5664 format %{"[$reg + $off + $ireg << $scale]" %} 5665 interface(MEMORY_INTER) %{ 5666 base($reg); 5667 index($ireg); 5668 scale($scale); 5669 disp($off); 5670 %} 5671%} 5672 5673//----------Conditional Branch Operands---------------------------------------- 5674// Comparison Op - This is the operation of the comparison, and is limited to 5675// the following set of codes: 5676// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 5677// 5678// Other attributes of the comparison, such as unsignedness, are specified 5679// by the comparison instruction that sets a condition code flags register. 5680// That result is represented by a flags operand whose subtype is appropriate 5681// to the unsignedness (etc.) of the comparison. 5682// 5683// Later, the instruction which matches both the Comparison Op (a Bool) and 5684// the flags (produced by the Cmp) specifies the coding of the comparison op 5685// by matching a specific subtype of Bool operand below, such as cmpOpU. 5686 5687// Comparision Code 5688operand cmpOp() %{ 5689 match(Bool); 5690 5691 format %{ "" %} 5692 interface(COND_INTER) %{ 5693 equal(0x4, "e"); 5694 not_equal(0x5, "ne"); 5695 less(0xC, "l"); 5696 greater_equal(0xD, "ge"); 5697 less_equal(0xE, "le"); 5698 greater(0xF, "g"); 5699 %} 5700%} 5701 5702// Comparison Code, unsigned compare. Used by FP also, with 5703// C2 (unordered) turned into GT or LT already. The other bits 5704// C0 and C3 are turned into Carry & Zero flags. 5705operand cmpOpU() %{ 5706 match(Bool); 5707 5708 format %{ "" %} 5709 interface(COND_INTER) %{ 5710 equal(0x4, "e"); 5711 not_equal(0x5, "ne"); 5712 less(0x2, "b"); 5713 greater_equal(0x3, "nb"); 5714 less_equal(0x6, "be"); 5715 greater(0x7, "nbe"); 5716 %} 5717%} 5718 5719// Floating comparisons that don't require any fixup for the unordered case 5720operand cmpOpUCF() %{ 5721 match(Bool); 5722 predicate(n->as_Bool()->_test._test == BoolTest::lt || 5723 n->as_Bool()->_test._test == BoolTest::ge || 5724 n->as_Bool()->_test._test == BoolTest::le || 5725 n->as_Bool()->_test._test == BoolTest::gt); 5726 format %{ "" %} 5727 interface(COND_INTER) %{ 5728 equal(0x4, "e"); 5729 not_equal(0x5, "ne"); 5730 less(0x2, "b"); 5731 greater_equal(0x3, "nb"); 5732 less_equal(0x6, "be"); 5733 greater(0x7, "nbe"); 5734 %} 5735%} 5736 5737 5738// Floating comparisons that can be fixed up with extra conditional jumps 5739operand cmpOpUCF2() %{ 5740 match(Bool); 5741 predicate(n->as_Bool()->_test._test == BoolTest::ne || 5742 n->as_Bool()->_test._test == BoolTest::eq); 5743 format %{ "" %} 5744 interface(COND_INTER) %{ 5745 equal(0x4, "e"); 5746 not_equal(0x5, "ne"); 5747 less(0x2, "b"); 5748 greater_equal(0x3, "nb"); 5749 less_equal(0x6, "be"); 5750 greater(0x7, "nbe"); 5751 %} 5752%} 5753 5754// Comparison Code for FP conditional move 5755operand cmpOp_fcmov() %{ 5756 match(Bool); 5757 5758 format %{ "" %} 5759 interface(COND_INTER) %{ 5760 equal (0x0C8); 5761 not_equal (0x1C8); 5762 less (0x0C0); 5763 greater_equal(0x1C0); 5764 less_equal (0x0D0); 5765 greater (0x1D0); 5766 %} 5767%} 5768 5769// Comparision Code used in long compares 5770operand cmpOp_commute() %{ 5771 match(Bool); 5772 5773 format %{ "" %} 5774 interface(COND_INTER) %{ 5775 equal(0x4, "e"); 5776 not_equal(0x5, "ne"); 5777 less(0xF, "g"); 5778 greater_equal(0xE, "le"); 5779 less_equal(0xD, "ge"); 5780 greater(0xC, "l"); 5781 %} 5782%} 5783 5784//----------OPERAND CLASSES---------------------------------------------------- 5785// Operand Classes are groups of operands that are used as to simplify 5786// instruction definitions by not requiring the AD writer to specify separate 5787// instructions for every form of operand when the instruction accepts 5788// multiple operand types with the same basic encoding and format. The classic 5789// case of this is memory operands. 5790 5791opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 5792 indIndex, indIndexScale, indIndexScaleOffset); 5793 5794// Long memory operations are encoded in 2 instructions and a +4 offset. 5795// This means some kind of offset is always required and you cannot use 5796// an oop as the offset (done when working on static globals). 5797opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 5798 indIndex, indIndexScale, indIndexScaleOffset); 5799 5800 5801//----------PIPELINE----------------------------------------------------------- 5802// Rules which define the behavior of the target architectures pipeline. 5803pipeline %{ 5804 5805//----------ATTRIBUTES--------------------------------------------------------- 5806attributes %{ 5807 variable_size_instructions; // Fixed size instructions 5808 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 5809 instruction_unit_size = 1; // An instruction is 1 bytes long 5810 instruction_fetch_unit_size = 16; // The processor fetches one line 5811 instruction_fetch_units = 1; // of 16 bytes 5812 5813 // List of nop instructions 5814 nops( MachNop ); 5815%} 5816 5817//----------RESOURCES---------------------------------------------------------- 5818// Resources are the functional units available to the machine 5819 5820// Generic P2/P3 pipeline 5821// 3 decoders, only D0 handles big operands; a "bundle" is the limit of 5822// 3 instructions decoded per cycle. 5823// 2 load/store ops per cycle, 1 branch, 1 FPU, 5824// 2 ALU op, only ALU0 handles mul/div instructions. 5825resources( D0, D1, D2, DECODE = D0 | D1 | D2, 5826 MS0, MS1, MEM = MS0 | MS1, 5827 BR, FPU, 5828 ALU0, ALU1, ALU = ALU0 | ALU1 ); 5829 5830//----------PIPELINE DESCRIPTION----------------------------------------------- 5831// Pipeline Description specifies the stages in the machine's pipeline 5832 5833// Generic P2/P3 pipeline 5834pipe_desc(S0, S1, S2, S3, S4, S5); 5835 5836//----------PIPELINE CLASSES--------------------------------------------------- 5837// Pipeline Classes describe the stages in which input and output are 5838// referenced by the hardware pipeline. 5839 5840// Naming convention: ialu or fpu 5841// Then: _reg 5842// Then: _reg if there is a 2nd register 5843// Then: _long if it's a pair of instructions implementing a long 5844// Then: _fat if it requires the big decoder 5845// Or: _mem if it requires the big decoder and a memory unit. 5846 5847// Integer ALU reg operation 5848pipe_class ialu_reg(eRegI dst) %{ 5849 single_instruction; 5850 dst : S4(write); 5851 dst : S3(read); 5852 DECODE : S0; // any decoder 5853 ALU : S3; // any alu 5854%} 5855 5856// Long ALU reg operation 5857pipe_class ialu_reg_long(eRegL dst) %{ 5858 instruction_count(2); 5859 dst : S4(write); 5860 dst : S3(read); 5861 DECODE : S0(2); // any 2 decoders 5862 ALU : S3(2); // both alus 5863%} 5864 5865// Integer ALU reg operation using big decoder 5866pipe_class ialu_reg_fat(eRegI dst) %{ 5867 single_instruction; 5868 dst : S4(write); 5869 dst : S3(read); 5870 D0 : S0; // big decoder only 5871 ALU : S3; // any alu 5872%} 5873 5874// Long ALU reg operation using big decoder 5875pipe_class ialu_reg_long_fat(eRegL dst) %{ 5876 instruction_count(2); 5877 dst : S4(write); 5878 dst : S3(read); 5879 D0 : S0(2); // big decoder only; twice 5880 ALU : S3(2); // any 2 alus 5881%} 5882 5883// Integer ALU reg-reg operation 5884pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{ 5885 single_instruction; 5886 dst : S4(write); 5887 src : S3(read); 5888 DECODE : S0; // any decoder 5889 ALU : S3; // any alu 5890%} 5891 5892// Long ALU reg-reg operation 5893pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 5894 instruction_count(2); 5895 dst : S4(write); 5896 src : S3(read); 5897 DECODE : S0(2); // any 2 decoders 5898 ALU : S3(2); // both alus 5899%} 5900 5901// Integer ALU reg-reg operation 5902pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{ 5903 single_instruction; 5904 dst : S4(write); 5905 src : S3(read); 5906 D0 : S0; // big decoder only 5907 ALU : S3; // any alu 5908%} 5909 5910// Long ALU reg-reg operation 5911pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 5912 instruction_count(2); 5913 dst : S4(write); 5914 src : S3(read); 5915 D0 : S0(2); // big decoder only; twice 5916 ALU : S3(2); // both alus 5917%} 5918 5919// Integer ALU reg-mem operation 5920pipe_class ialu_reg_mem(eRegI dst, memory mem) %{ 5921 single_instruction; 5922 dst : S5(write); 5923 mem : S3(read); 5924 D0 : S0; // big decoder only 5925 ALU : S4; // any alu 5926 MEM : S3; // any mem 5927%} 5928 5929// Long ALU reg-mem operation 5930pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 5931 instruction_count(2); 5932 dst : S5(write); 5933 mem : S3(read); 5934 D0 : S0(2); // big decoder only; twice 5935 ALU : S4(2); // any 2 alus 5936 MEM : S3(2); // both mems 5937%} 5938 5939// Integer mem operation (prefetch) 5940pipe_class ialu_mem(memory mem) 5941%{ 5942 single_instruction; 5943 mem : S3(read); 5944 D0 : S0; // big decoder only 5945 MEM : S3; // any mem 5946%} 5947 5948// Integer Store to Memory 5949pipe_class ialu_mem_reg(memory mem, eRegI src) %{ 5950 single_instruction; 5951 mem : S3(read); 5952 src : S5(read); 5953 D0 : S0; // big decoder only 5954 ALU : S4; // any alu 5955 MEM : S3; 5956%} 5957 5958// Long Store to Memory 5959pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 5960 instruction_count(2); 5961 mem : S3(read); 5962 src : S5(read); 5963 D0 : S0(2); // big decoder only; twice 5964 ALU : S4(2); // any 2 alus 5965 MEM : S3(2); // Both mems 5966%} 5967 5968// Integer Store to Memory 5969pipe_class ialu_mem_imm(memory mem) %{ 5970 single_instruction; 5971 mem : S3(read); 5972 D0 : S0; // big decoder only 5973 ALU : S4; // any alu 5974 MEM : S3; 5975%} 5976 5977// Integer ALU0 reg-reg operation 5978pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{ 5979 single_instruction; 5980 dst : S4(write); 5981 src : S3(read); 5982 D0 : S0; // Big decoder only 5983 ALU0 : S3; // only alu0 5984%} 5985 5986// Integer ALU0 reg-mem operation 5987pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{ 5988 single_instruction; 5989 dst : S5(write); 5990 mem : S3(read); 5991 D0 : S0; // big decoder only 5992 ALU0 : S4; // ALU0 only 5993 MEM : S3; // any mem 5994%} 5995 5996// Integer ALU reg-reg operation 5997pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{ 5998 single_instruction; 5999 cr : S4(write); 6000 src1 : S3(read); 6001 src2 : S3(read); 6002 DECODE : S0; // any decoder 6003 ALU : S3; // any alu 6004%} 6005 6006// Integer ALU reg-imm operation 6007pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{ 6008 single_instruction; 6009 cr : S4(write); 6010 src1 : S3(read); 6011 DECODE : S0; // any decoder 6012 ALU : S3; // any alu 6013%} 6014 6015// Integer ALU reg-mem operation 6016pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{ 6017 single_instruction; 6018 cr : S4(write); 6019 src1 : S3(read); 6020 src2 : S3(read); 6021 D0 : S0; // big decoder only 6022 ALU : S4; // any alu 6023 MEM : S3; 6024%} 6025 6026// Conditional move reg-reg 6027pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{ 6028 instruction_count(4); 6029 y : S4(read); 6030 q : S3(read); 6031 p : S3(read); 6032 DECODE : S0(4); // any decoder 6033%} 6034 6035// Conditional move reg-reg 6036pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{ 6037 single_instruction; 6038 dst : S4(write); 6039 src : S3(read); 6040 cr : S3(read); 6041 DECODE : S0; // any decoder 6042%} 6043 6044// Conditional move reg-mem 6045pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{ 6046 single_instruction; 6047 dst : S4(write); 6048 src : S3(read); 6049 cr : S3(read); 6050 DECODE : S0; // any decoder 6051 MEM : S3; 6052%} 6053 6054// Conditional move reg-reg long 6055pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 6056 single_instruction; 6057 dst : S4(write); 6058 src : S3(read); 6059 cr : S3(read); 6060 DECODE : S0(2); // any 2 decoders 6061%} 6062 6063// Conditional move double reg-reg 6064pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 6065 single_instruction; 6066 dst : S4(write); 6067 src : S3(read); 6068 cr : S3(read); 6069 DECODE : S0; // any decoder 6070%} 6071 6072// Float reg-reg operation 6073pipe_class fpu_reg(regD dst) %{ 6074 instruction_count(2); 6075 dst : S3(read); 6076 DECODE : S0(2); // any 2 decoders 6077 FPU : S3; 6078%} 6079 6080// Float reg-reg operation 6081pipe_class fpu_reg_reg(regD dst, regD src) %{ 6082 instruction_count(2); 6083 dst : S4(write); 6084 src : S3(read); 6085 DECODE : S0(2); // any 2 decoders 6086 FPU : S3; 6087%} 6088 6089// Float reg-reg operation 6090pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 6091 instruction_count(3); 6092 dst : S4(write); 6093 src1 : S3(read); 6094 src2 : S3(read); 6095 DECODE : S0(3); // any 3 decoders 6096 FPU : S3(2); 6097%} 6098 6099// Float reg-reg operation 6100pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 6101 instruction_count(4); 6102 dst : S4(write); 6103 src1 : S3(read); 6104 src2 : S3(read); 6105 src3 : S3(read); 6106 DECODE : S0(4); // any 3 decoders 6107 FPU : S3(2); 6108%} 6109 6110// Float reg-reg operation 6111pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 6112 instruction_count(4); 6113 dst : S4(write); 6114 src1 : S3(read); 6115 src2 : S3(read); 6116 src3 : S3(read); 6117 DECODE : S1(3); // any 3 decoders 6118 D0 : S0; // Big decoder only 6119 FPU : S3(2); 6120 MEM : S3; 6121%} 6122 6123// Float reg-mem operation 6124pipe_class fpu_reg_mem(regD dst, memory mem) %{ 6125 instruction_count(2); 6126 dst : S5(write); 6127 mem : S3(read); 6128 D0 : S0; // big decoder only 6129 DECODE : S1; // any decoder for FPU POP 6130 FPU : S4; 6131 MEM : S3; // any mem 6132%} 6133 6134// Float reg-mem operation 6135pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 6136 instruction_count(3); 6137 dst : S5(write); 6138 src1 : S3(read); 6139 mem : S3(read); 6140 D0 : S0; // big decoder only 6141 DECODE : S1(2); // any decoder for FPU POP 6142 FPU : S4; 6143 MEM : S3; // any mem 6144%} 6145 6146// Float mem-reg operation 6147pipe_class fpu_mem_reg(memory mem, regD src) %{ 6148 instruction_count(2); 6149 src : S5(read); 6150 mem : S3(read); 6151 DECODE : S0; // any decoder for FPU PUSH 6152 D0 : S1; // big decoder only 6153 FPU : S4; 6154 MEM : S3; // any mem 6155%} 6156 6157pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 6158 instruction_count(3); 6159 src1 : S3(read); 6160 src2 : S3(read); 6161 mem : S3(read); 6162 DECODE : S0(2); // any decoder for FPU PUSH 6163 D0 : S1; // big decoder only 6164 FPU : S4; 6165 MEM : S3; // any mem 6166%} 6167 6168pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 6169 instruction_count(3); 6170 src1 : S3(read); 6171 src2 : S3(read); 6172 mem : S4(read); 6173 DECODE : S0; // any decoder for FPU PUSH 6174 D0 : S0(2); // big decoder only 6175 FPU : S4; 6176 MEM : S3(2); // any mem 6177%} 6178 6179pipe_class fpu_mem_mem(memory dst, memory src1) %{ 6180 instruction_count(2); 6181 src1 : S3(read); 6182 dst : S4(read); 6183 D0 : S0(2); // big decoder only 6184 MEM : S3(2); // any mem 6185%} 6186 6187pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 6188 instruction_count(3); 6189 src1 : S3(read); 6190 src2 : S3(read); 6191 dst : S4(read); 6192 D0 : S0(3); // big decoder only 6193 FPU : S4; 6194 MEM : S3(3); // any mem 6195%} 6196 6197pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 6198 instruction_count(3); 6199 src1 : S4(read); 6200 mem : S4(read); 6201 DECODE : S0; // any decoder for FPU PUSH 6202 D0 : S0(2); // big decoder only 6203 FPU : S4; 6204 MEM : S3(2); // any mem 6205%} 6206 6207// Float load constant 6208pipe_class fpu_reg_con(regD dst) %{ 6209 instruction_count(2); 6210 dst : S5(write); 6211 D0 : S0; // big decoder only for the load 6212 DECODE : S1; // any decoder for FPU POP 6213 FPU : S4; 6214 MEM : S3; // any mem 6215%} 6216 6217// Float load constant 6218pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 6219 instruction_count(3); 6220 dst : S5(write); 6221 src : S3(read); 6222 D0 : S0; // big decoder only for the load 6223 DECODE : S1(2); // any decoder for FPU POP 6224 FPU : S4; 6225 MEM : S3; // any mem 6226%} 6227 6228// UnConditional branch 6229pipe_class pipe_jmp( label labl ) %{ 6230 single_instruction; 6231 BR : S3; 6232%} 6233 6234// Conditional branch 6235pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 6236 single_instruction; 6237 cr : S1(read); 6238 BR : S3; 6239%} 6240 6241// Allocation idiom 6242pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 6243 instruction_count(1); force_serialization; 6244 fixed_latency(6); 6245 heap_ptr : S3(read); 6246 DECODE : S0(3); 6247 D0 : S2; 6248 MEM : S3; 6249 ALU : S3(2); 6250 dst : S5(write); 6251 BR : S5; 6252%} 6253 6254// Generic big/slow expanded idiom 6255pipe_class pipe_slow( ) %{ 6256 instruction_count(10); multiple_bundles; force_serialization; 6257 fixed_latency(100); 6258 D0 : S0(2); 6259 MEM : S3(2); 6260%} 6261 6262// The real do-nothing guy 6263pipe_class empty( ) %{ 6264 instruction_count(0); 6265%} 6266 6267// Define the class for the Nop node 6268define %{ 6269 MachNop = empty; 6270%} 6271 6272%} 6273 6274//----------INSTRUCTIONS------------------------------------------------------- 6275// 6276// match -- States which machine-independent subtree may be replaced 6277// by this instruction. 6278// ins_cost -- The estimated cost of this instruction is used by instruction 6279// selection to identify a minimum cost tree of machine 6280// instructions that matches a tree of machine-independent 6281// instructions. 6282// format -- A string providing the disassembly for this instruction. 6283// The value of an instruction's operand may be inserted 6284// by referring to it with a '$' prefix. 6285// opcode -- Three instruction opcodes may be provided. These are referred 6286// to within an encode class as $primary, $secondary, and $tertiary 6287// respectively. The primary opcode is commonly used to 6288// indicate the type of machine instruction, while secondary 6289// and tertiary are often used for prefix options or addressing 6290// modes. 6291// ins_encode -- A list of encode classes with parameters. The encode class 6292// name must have been defined in an 'enc_class' specification 6293// in the encode section of the architecture description. 6294 6295//----------BSWAP-Instruction-------------------------------------------------- 6296instruct bytes_reverse_int(eRegI dst) %{ 6297 match(Set dst (ReverseBytesI dst)); 6298 6299 format %{ "BSWAP $dst" %} 6300 opcode(0x0F, 0xC8); 6301 ins_encode( OpcP, OpcSReg(dst) ); 6302 ins_pipe( ialu_reg ); 6303%} 6304 6305instruct bytes_reverse_long(eRegL dst) %{ 6306 match(Set dst (ReverseBytesL dst)); 6307 6308 format %{ "BSWAP $dst.lo\n\t" 6309 "BSWAP $dst.hi\n\t" 6310 "XCHG $dst.lo $dst.hi" %} 6311 6312 ins_cost(125); 6313 ins_encode( bswap_long_bytes(dst) ); 6314 ins_pipe( ialu_reg_reg); 6315%} 6316 6317instruct bytes_reverse_unsigned_short(eRegI dst) %{ 6318 match(Set dst (ReverseBytesUS dst)); 6319 6320 format %{ "BSWAP $dst\n\t" 6321 "SHR $dst,16\n\t" %} 6322 ins_encode %{ 6323 __ bswapl($dst$$Register); 6324 __ shrl($dst$$Register, 16); 6325 %} 6326 ins_pipe( ialu_reg ); 6327%} 6328 6329instruct bytes_reverse_short(eRegI dst) %{ 6330 match(Set dst (ReverseBytesS dst)); 6331 6332 format %{ "BSWAP $dst\n\t" 6333 "SAR $dst,16\n\t" %} 6334 ins_encode %{ 6335 __ bswapl($dst$$Register); 6336 __ sarl($dst$$Register, 16); 6337 %} 6338 ins_pipe( ialu_reg ); 6339%} 6340 6341 6342//---------- Zeros Count Instructions ------------------------------------------ 6343 6344instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ 6345 predicate(UseCountLeadingZerosInstruction); 6346 match(Set dst (CountLeadingZerosI src)); 6347 effect(KILL cr); 6348 6349 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 6350 ins_encode %{ 6351 __ lzcntl($dst$$Register, $src$$Register); 6352 %} 6353 ins_pipe(ialu_reg); 6354%} 6355 6356instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{ 6357 predicate(!UseCountLeadingZerosInstruction); 6358 match(Set dst (CountLeadingZerosI src)); 6359 effect(KILL cr); 6360 6361 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 6362 "JNZ skip\n\t" 6363 "MOV $dst, -1\n" 6364 "skip:\n\t" 6365 "NEG $dst\n\t" 6366 "ADD $dst, 31" %} 6367 ins_encode %{ 6368 Register Rdst = $dst$$Register; 6369 Register Rsrc = $src$$Register; 6370 Label skip; 6371 __ bsrl(Rdst, Rsrc); 6372 __ jccb(Assembler::notZero, skip); 6373 __ movl(Rdst, -1); 6374 __ bind(skip); 6375 __ negl(Rdst); 6376 __ addl(Rdst, BitsPerInt - 1); 6377 %} 6378 ins_pipe(ialu_reg); 6379%} 6380 6381instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ 6382 predicate(UseCountLeadingZerosInstruction); 6383 match(Set dst (CountLeadingZerosL src)); 6384 effect(TEMP dst, KILL cr); 6385 6386 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 6387 "JNC done\n\t" 6388 "LZCNT $dst, $src.lo\n\t" 6389 "ADD $dst, 32\n" 6390 "done:" %} 6391 ins_encode %{ 6392 Register Rdst = $dst$$Register; 6393 Register Rsrc = $src$$Register; 6394 Label done; 6395 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 6396 __ jccb(Assembler::carryClear, done); 6397 __ lzcntl(Rdst, Rsrc); 6398 __ addl(Rdst, BitsPerInt); 6399 __ bind(done); 6400 %} 6401 ins_pipe(ialu_reg); 6402%} 6403 6404instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{ 6405 predicate(!UseCountLeadingZerosInstruction); 6406 match(Set dst (CountLeadingZerosL src)); 6407 effect(TEMP dst, KILL cr); 6408 6409 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 6410 "JZ msw_is_zero\n\t" 6411 "ADD $dst, 32\n\t" 6412 "JMP not_zero\n" 6413 "msw_is_zero:\n\t" 6414 "BSR $dst, $src.lo\n\t" 6415 "JNZ not_zero\n\t" 6416 "MOV $dst, -1\n" 6417 "not_zero:\n\t" 6418 "NEG $dst\n\t" 6419 "ADD $dst, 63\n" %} 6420 ins_encode %{ 6421 Register Rdst = $dst$$Register; 6422 Register Rsrc = $src$$Register; 6423 Label msw_is_zero; 6424 Label not_zero; 6425 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 6426 __ jccb(Assembler::zero, msw_is_zero); 6427 __ addl(Rdst, BitsPerInt); 6428 __ jmpb(not_zero); 6429 __ bind(msw_is_zero); 6430 __ bsrl(Rdst, Rsrc); 6431 __ jccb(Assembler::notZero, not_zero); 6432 __ movl(Rdst, -1); 6433 __ bind(not_zero); 6434 __ negl(Rdst); 6435 __ addl(Rdst, BitsPerLong - 1); 6436 %} 6437 ins_pipe(ialu_reg); 6438%} 6439 6440instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ 6441 match(Set dst (CountTrailingZerosI src)); 6442 effect(KILL cr); 6443 6444 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 6445 "JNZ done\n\t" 6446 "MOV $dst, 32\n" 6447 "done:" %} 6448 ins_encode %{ 6449 Register Rdst = $dst$$Register; 6450 Label done; 6451 __ bsfl(Rdst, $src$$Register); 6452 __ jccb(Assembler::notZero, done); 6453 __ movl(Rdst, BitsPerInt); 6454 __ bind(done); 6455 %} 6456 ins_pipe(ialu_reg); 6457%} 6458 6459instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ 6460 match(Set dst (CountTrailingZerosL src)); 6461 effect(TEMP dst, KILL cr); 6462 6463 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 6464 "JNZ done\n\t" 6465 "BSF $dst, $src.hi\n\t" 6466 "JNZ msw_not_zero\n\t" 6467 "MOV $dst, 32\n" 6468 "msw_not_zero:\n\t" 6469 "ADD $dst, 32\n" 6470 "done:" %} 6471 ins_encode %{ 6472 Register Rdst = $dst$$Register; 6473 Register Rsrc = $src$$Register; 6474 Label msw_not_zero; 6475 Label done; 6476 __ bsfl(Rdst, Rsrc); 6477 __ jccb(Assembler::notZero, done); 6478 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 6479 __ jccb(Assembler::notZero, msw_not_zero); 6480 __ movl(Rdst, BitsPerInt); 6481 __ bind(msw_not_zero); 6482 __ addl(Rdst, BitsPerInt); 6483 __ bind(done); 6484 %} 6485 ins_pipe(ialu_reg); 6486%} 6487 6488 6489//---------- Population Count Instructions ------------------------------------- 6490 6491instruct popCountI(eRegI dst, eRegI src) %{ 6492 predicate(UsePopCountInstruction); 6493 match(Set dst (PopCountI src)); 6494 6495 format %{ "POPCNT $dst, $src" %} 6496 ins_encode %{ 6497 __ popcntl($dst$$Register, $src$$Register); 6498 %} 6499 ins_pipe(ialu_reg); 6500%} 6501 6502instruct popCountI_mem(eRegI dst, memory mem) %{ 6503 predicate(UsePopCountInstruction); 6504 match(Set dst (PopCountI (LoadI mem))); 6505 6506 format %{ "POPCNT $dst, $mem" %} 6507 ins_encode %{ 6508 __ popcntl($dst$$Register, $mem$$Address); 6509 %} 6510 ins_pipe(ialu_reg); 6511%} 6512 6513// Note: Long.bitCount(long) returns an int. 6514instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 6515 predicate(UsePopCountInstruction); 6516 match(Set dst (PopCountL src)); 6517 effect(KILL cr, TEMP tmp, TEMP dst); 6518 6519 format %{ "POPCNT $dst, $src.lo\n\t" 6520 "POPCNT $tmp, $src.hi\n\t" 6521 "ADD $dst, $tmp" %} 6522 ins_encode %{ 6523 __ popcntl($dst$$Register, $src$$Register); 6524 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 6525 __ addl($dst$$Register, $tmp$$Register); 6526 %} 6527 ins_pipe(ialu_reg); 6528%} 6529 6530// Note: Long.bitCount(long) returns an int. 6531instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{ 6532 predicate(UsePopCountInstruction); 6533 match(Set dst (PopCountL (LoadL mem))); 6534 effect(KILL cr, TEMP tmp, TEMP dst); 6535 6536 format %{ "POPCNT $dst, $mem\n\t" 6537 "POPCNT $tmp, $mem+4\n\t" 6538 "ADD $dst, $tmp" %} 6539 ins_encode %{ 6540 //__ popcntl($dst$$Register, $mem$$Address$$first); 6541 //__ popcntl($tmp$$Register, $mem$$Address$$second); 6542 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false)); 6543 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false)); 6544 __ addl($dst$$Register, $tmp$$Register); 6545 %} 6546 ins_pipe(ialu_reg); 6547%} 6548 6549 6550//----------Load/Store/Move Instructions--------------------------------------- 6551//----------Load Instructions-------------------------------------------------- 6552// Load Byte (8bit signed) 6553instruct loadB(xRegI dst, memory mem) %{ 6554 match(Set dst (LoadB mem)); 6555 6556 ins_cost(125); 6557 format %{ "MOVSX8 $dst,$mem\t# byte" %} 6558 6559 ins_encode %{ 6560 __ movsbl($dst$$Register, $mem$$Address); 6561 %} 6562 6563 ins_pipe(ialu_reg_mem); 6564%} 6565 6566// Load Byte (8bit signed) into Long Register 6567instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6568 match(Set dst (ConvI2L (LoadB mem))); 6569 effect(KILL cr); 6570 6571 ins_cost(375); 6572 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 6573 "MOV $dst.hi,$dst.lo\n\t" 6574 "SAR $dst.hi,7" %} 6575 6576 ins_encode %{ 6577 __ movsbl($dst$$Register, $mem$$Address); 6578 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 6579 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 6580 %} 6581 6582 ins_pipe(ialu_reg_mem); 6583%} 6584 6585// Load Unsigned Byte (8bit UNsigned) 6586instruct loadUB(xRegI dst, memory mem) %{ 6587 match(Set dst (LoadUB mem)); 6588 6589 ins_cost(125); 6590 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 6591 6592 ins_encode %{ 6593 __ movzbl($dst$$Register, $mem$$Address); 6594 %} 6595 6596 ins_pipe(ialu_reg_mem); 6597%} 6598 6599// Load Unsigned Byte (8 bit UNsigned) into Long Register 6600instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6601 match(Set dst (ConvI2L (LoadUB mem))); 6602 effect(KILL cr); 6603 6604 ins_cost(250); 6605 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 6606 "XOR $dst.hi,$dst.hi" %} 6607 6608 ins_encode %{ 6609 Register Rdst = $dst$$Register; 6610 __ movzbl(Rdst, $mem$$Address); 6611 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6612 %} 6613 6614 ins_pipe(ialu_reg_mem); 6615%} 6616 6617// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 6618instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{ 6619 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 6620 effect(KILL cr); 6621 6622 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t" 6623 "XOR $dst.hi,$dst.hi\n\t" 6624 "AND $dst.lo,$mask" %} 6625 ins_encode %{ 6626 Register Rdst = $dst$$Register; 6627 __ movzbl(Rdst, $mem$$Address); 6628 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6629 __ andl(Rdst, $mask$$constant); 6630 %} 6631 ins_pipe(ialu_reg_mem); 6632%} 6633 6634// Load Short (16bit signed) 6635instruct loadS(eRegI dst, memory mem) %{ 6636 match(Set dst (LoadS mem)); 6637 6638 ins_cost(125); 6639 format %{ "MOVSX $dst,$mem\t# short" %} 6640 6641 ins_encode %{ 6642 __ movswl($dst$$Register, $mem$$Address); 6643 %} 6644 6645 ins_pipe(ialu_reg_mem); 6646%} 6647 6648// Load Short (16 bit signed) to Byte (8 bit signed) 6649instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ 6650 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 6651 6652 ins_cost(125); 6653 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 6654 ins_encode %{ 6655 __ movsbl($dst$$Register, $mem$$Address); 6656 %} 6657 ins_pipe(ialu_reg_mem); 6658%} 6659 6660// Load Short (16bit signed) into Long Register 6661instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6662 match(Set dst (ConvI2L (LoadS mem))); 6663 effect(KILL cr); 6664 6665 ins_cost(375); 6666 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 6667 "MOV $dst.hi,$dst.lo\n\t" 6668 "SAR $dst.hi,15" %} 6669 6670 ins_encode %{ 6671 __ movswl($dst$$Register, $mem$$Address); 6672 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 6673 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 6674 %} 6675 6676 ins_pipe(ialu_reg_mem); 6677%} 6678 6679// Load Unsigned Short/Char (16bit unsigned) 6680instruct loadUS(eRegI dst, memory mem) %{ 6681 match(Set dst (LoadUS mem)); 6682 6683 ins_cost(125); 6684 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 6685 6686 ins_encode %{ 6687 __ movzwl($dst$$Register, $mem$$Address); 6688 %} 6689 6690 ins_pipe(ialu_reg_mem); 6691%} 6692 6693// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 6694instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ 6695 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 6696 6697 ins_cost(125); 6698 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 6699 ins_encode %{ 6700 __ movsbl($dst$$Register, $mem$$Address); 6701 %} 6702 ins_pipe(ialu_reg_mem); 6703%} 6704 6705// Load Unsigned Short/Char (16 bit UNsigned) into Long Register 6706instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6707 match(Set dst (ConvI2L (LoadUS mem))); 6708 effect(KILL cr); 6709 6710 ins_cost(250); 6711 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 6712 "XOR $dst.hi,$dst.hi" %} 6713 6714 ins_encode %{ 6715 __ movzwl($dst$$Register, $mem$$Address); 6716 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 6717 %} 6718 6719 ins_pipe(ialu_reg_mem); 6720%} 6721 6722// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 6723instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 6724 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 6725 effect(KILL cr); 6726 6727 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 6728 "XOR $dst.hi,$dst.hi" %} 6729 ins_encode %{ 6730 Register Rdst = $dst$$Register; 6731 __ movzbl(Rdst, $mem$$Address); 6732 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6733 %} 6734 ins_pipe(ialu_reg_mem); 6735%} 6736 6737// Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register 6738instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{ 6739 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 6740 effect(KILL cr); 6741 6742 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t" 6743 "XOR $dst.hi,$dst.hi\n\t" 6744 "AND $dst.lo,$mask" %} 6745 ins_encode %{ 6746 Register Rdst = $dst$$Register; 6747 __ movzwl(Rdst, $mem$$Address); 6748 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6749 __ andl(Rdst, $mask$$constant); 6750 %} 6751 ins_pipe(ialu_reg_mem); 6752%} 6753 6754// Load Integer 6755instruct loadI(eRegI dst, memory mem) %{ 6756 match(Set dst (LoadI mem)); 6757 6758 ins_cost(125); 6759 format %{ "MOV $dst,$mem\t# int" %} 6760 6761 ins_encode %{ 6762 __ movl($dst$$Register, $mem$$Address); 6763 %} 6764 6765 ins_pipe(ialu_reg_mem); 6766%} 6767 6768// Load Integer (32 bit signed) to Byte (8 bit signed) 6769instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{ 6770 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 6771 6772 ins_cost(125); 6773 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 6774 ins_encode %{ 6775 __ movsbl($dst$$Register, $mem$$Address); 6776 %} 6777 ins_pipe(ialu_reg_mem); 6778%} 6779 6780// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 6781instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{ 6782 match(Set dst (AndI (LoadI mem) mask)); 6783 6784 ins_cost(125); 6785 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 6786 ins_encode %{ 6787 __ movzbl($dst$$Register, $mem$$Address); 6788 %} 6789 ins_pipe(ialu_reg_mem); 6790%} 6791 6792// Load Integer (32 bit signed) to Short (16 bit signed) 6793instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{ 6794 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 6795 6796 ins_cost(125); 6797 format %{ "MOVSX $dst, $mem\t# int -> short" %} 6798 ins_encode %{ 6799 __ movswl($dst$$Register, $mem$$Address); 6800 %} 6801 ins_pipe(ialu_reg_mem); 6802%} 6803 6804// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 6805instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{ 6806 match(Set dst (AndI (LoadI mem) mask)); 6807 6808 ins_cost(125); 6809 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 6810 ins_encode %{ 6811 __ movzwl($dst$$Register, $mem$$Address); 6812 %} 6813 ins_pipe(ialu_reg_mem); 6814%} 6815 6816// Load Integer into Long Register 6817instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6818 match(Set dst (ConvI2L (LoadI mem))); 6819 effect(KILL cr); 6820 6821 ins_cost(375); 6822 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 6823 "MOV $dst.hi,$dst.lo\n\t" 6824 "SAR $dst.hi,31" %} 6825 6826 ins_encode %{ 6827 __ movl($dst$$Register, $mem$$Address); 6828 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 6829 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 6830 %} 6831 6832 ins_pipe(ialu_reg_mem); 6833%} 6834 6835// Load Integer with mask 0xFF into Long Register 6836instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 6837 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 6838 effect(KILL cr); 6839 6840 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 6841 "XOR $dst.hi,$dst.hi" %} 6842 ins_encode %{ 6843 Register Rdst = $dst$$Register; 6844 __ movzbl(Rdst, $mem$$Address); 6845 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6846 %} 6847 ins_pipe(ialu_reg_mem); 6848%} 6849 6850// Load Integer with mask 0xFFFF into Long Register 6851instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 6852 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 6853 effect(KILL cr); 6854 6855 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 6856 "XOR $dst.hi,$dst.hi" %} 6857 ins_encode %{ 6858 Register Rdst = $dst$$Register; 6859 __ movzwl(Rdst, $mem$$Address); 6860 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6861 %} 6862 ins_pipe(ialu_reg_mem); 6863%} 6864 6865// Load Integer with 32-bit mask into Long Register 6866instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 6867 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 6868 effect(KILL cr); 6869 6870 format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t" 6871 "XOR $dst.hi,$dst.hi\n\t" 6872 "AND $dst.lo,$mask" %} 6873 ins_encode %{ 6874 Register Rdst = $dst$$Register; 6875 __ movl(Rdst, $mem$$Address); 6876 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6877 __ andl(Rdst, $mask$$constant); 6878 %} 6879 ins_pipe(ialu_reg_mem); 6880%} 6881 6882// Load Unsigned Integer into Long Register 6883instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6884 match(Set dst (LoadUI2L mem)); 6885 effect(KILL cr); 6886 6887 ins_cost(250); 6888 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 6889 "XOR $dst.hi,$dst.hi" %} 6890 6891 ins_encode %{ 6892 __ movl($dst$$Register, $mem$$Address); 6893 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 6894 %} 6895 6896 ins_pipe(ialu_reg_mem); 6897%} 6898 6899// Load Long. Cannot clobber address while loading, so restrict address 6900// register to ESI 6901instruct loadL(eRegL dst, load_long_memory mem) %{ 6902 predicate(!((LoadLNode*)n)->require_atomic_access()); 6903 match(Set dst (LoadL mem)); 6904 6905 ins_cost(250); 6906 format %{ "MOV $dst.lo,$mem\t# long\n\t" 6907 "MOV $dst.hi,$mem+4" %} 6908 6909 ins_encode %{ 6910 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false); 6911 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false); 6912 __ movl($dst$$Register, Amemlo); 6913 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 6914 %} 6915 6916 ins_pipe(ialu_reg_long_mem); 6917%} 6918 6919// Volatile Load Long. Must be atomic, so do 64-bit FILD 6920// then store it down to the stack and reload on the int 6921// side. 6922instruct loadL_volatile(stackSlotL dst, memory mem) %{ 6923 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 6924 match(Set dst (LoadL mem)); 6925 6926 ins_cost(200); 6927 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 6928 "FISTp $dst" %} 6929 ins_encode(enc_loadL_volatile(mem,dst)); 6930 ins_pipe( fpu_reg_mem ); 6931%} 6932 6933instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 6934 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6935 match(Set dst (LoadL mem)); 6936 effect(TEMP tmp); 6937 ins_cost(180); 6938 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6939 "MOVSD $dst,$tmp" %} 6940 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 6941 ins_pipe( pipe_slow ); 6942%} 6943 6944instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 6945 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6946 match(Set dst (LoadL mem)); 6947 effect(TEMP tmp); 6948 ins_cost(160); 6949 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6950 "MOVD $dst.lo,$tmp\n\t" 6951 "PSRLQ $tmp,32\n\t" 6952 "MOVD $dst.hi,$tmp" %} 6953 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 6954 ins_pipe( pipe_slow ); 6955%} 6956 6957// Load Range 6958instruct loadRange(eRegI dst, memory mem) %{ 6959 match(Set dst (LoadRange mem)); 6960 6961 ins_cost(125); 6962 format %{ "MOV $dst,$mem" %} 6963 opcode(0x8B); 6964 ins_encode( OpcP, RegMem(dst,mem)); 6965 ins_pipe( ialu_reg_mem ); 6966%} 6967 6968 6969// Load Pointer 6970instruct loadP(eRegP dst, memory mem) %{ 6971 match(Set dst (LoadP mem)); 6972 6973 ins_cost(125); 6974 format %{ "MOV $dst,$mem" %} 6975 opcode(0x8B); 6976 ins_encode( OpcP, RegMem(dst,mem)); 6977 ins_pipe( ialu_reg_mem ); 6978%} 6979 6980// Load Klass Pointer 6981instruct loadKlass(eRegP dst, memory mem) %{ 6982 match(Set dst (LoadKlass mem)); 6983 6984 ins_cost(125); 6985 format %{ "MOV $dst,$mem" %} 6986 opcode(0x8B); 6987 ins_encode( OpcP, RegMem(dst,mem)); 6988 ins_pipe( ialu_reg_mem ); 6989%} 6990 6991// Load Double 6992instruct loadD(regD dst, memory mem) %{ 6993 predicate(UseSSE<=1); 6994 match(Set dst (LoadD mem)); 6995 6996 ins_cost(150); 6997 format %{ "FLD_D ST,$mem\n\t" 6998 "FSTP $dst" %} 6999 opcode(0xDD); /* DD /0 */ 7000 ins_encode( OpcP, RMopc_Mem(0x00,mem), 7001 Pop_Reg_D(dst) ); 7002 ins_pipe( fpu_reg_mem ); 7003%} 7004 7005// Load Double to XMM 7006instruct loadXD(regXD dst, memory mem) %{ 7007 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 7008 match(Set dst (LoadD mem)); 7009 ins_cost(145); 7010 format %{ "MOVSD $dst,$mem" %} 7011 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 7012 ins_pipe( pipe_slow ); 7013%} 7014 7015instruct loadXD_partial(regXD dst, memory mem) %{ 7016 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 7017 match(Set dst (LoadD mem)); 7018 ins_cost(145); 7019 format %{ "MOVLPD $dst,$mem" %} 7020 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); 7021 ins_pipe( pipe_slow ); 7022%} 7023 7024// Load to XMM register (single-precision floating point) 7025// MOVSS instruction 7026instruct loadX(regX dst, memory mem) %{ 7027 predicate(UseSSE>=1); 7028 match(Set dst (LoadF mem)); 7029 ins_cost(145); 7030 format %{ "MOVSS $dst,$mem" %} 7031 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 7032 ins_pipe( pipe_slow ); 7033%} 7034 7035// Load Float 7036instruct loadF(regF dst, memory mem) %{ 7037 predicate(UseSSE==0); 7038 match(Set dst (LoadF mem)); 7039 7040 ins_cost(150); 7041 format %{ "FLD_S ST,$mem\n\t" 7042 "FSTP $dst" %} 7043 opcode(0xD9); /* D9 /0 */ 7044 ins_encode( OpcP, RMopc_Mem(0x00,mem), 7045 Pop_Reg_F(dst) ); 7046 ins_pipe( fpu_reg_mem ); 7047%} 7048 7049// Load Aligned Packed Byte to XMM register 7050instruct loadA8B(regXD dst, memory mem) %{ 7051 predicate(UseSSE>=1); 7052 match(Set dst (Load8B mem)); 7053 ins_cost(125); 7054 format %{ "MOVQ $dst,$mem\t! packed8B" %} 7055 ins_encode( movq_ld(dst, mem)); 7056 ins_pipe( pipe_slow ); 7057%} 7058 7059// Load Aligned Packed Short to XMM register 7060instruct loadA4S(regXD dst, memory mem) %{ 7061 predicate(UseSSE>=1); 7062 match(Set dst (Load4S mem)); 7063 ins_cost(125); 7064 format %{ "MOVQ $dst,$mem\t! packed4S" %} 7065 ins_encode( movq_ld(dst, mem)); 7066 ins_pipe( pipe_slow ); 7067%} 7068 7069// Load Aligned Packed Char to XMM register 7070instruct loadA4C(regXD dst, memory mem) %{ 7071 predicate(UseSSE>=1); 7072 match(Set dst (Load4C mem)); 7073 ins_cost(125); 7074 format %{ "MOVQ $dst,$mem\t! packed4C" %} 7075 ins_encode( movq_ld(dst, mem)); 7076 ins_pipe( pipe_slow ); 7077%} 7078 7079// Load Aligned Packed Integer to XMM register 7080instruct load2IU(regXD dst, memory mem) %{ 7081 predicate(UseSSE>=1); 7082 match(Set dst (Load2I mem)); 7083 ins_cost(125); 7084 format %{ "MOVQ $dst,$mem\t! packed2I" %} 7085 ins_encode( movq_ld(dst, mem)); 7086 ins_pipe( pipe_slow ); 7087%} 7088 7089// Load Aligned Packed Single to XMM 7090instruct loadA2F(regXD dst, memory mem) %{ 7091 predicate(UseSSE>=1); 7092 match(Set dst (Load2F mem)); 7093 ins_cost(145); 7094 format %{ "MOVQ $dst,$mem\t! packed2F" %} 7095 ins_encode( movq_ld(dst, mem)); 7096 ins_pipe( pipe_slow ); 7097%} 7098 7099// Load Effective Address 7100instruct leaP8(eRegP dst, indOffset8 mem) %{ 7101 match(Set dst mem); 7102 7103 ins_cost(110); 7104 format %{ "LEA $dst,$mem" %} 7105 opcode(0x8D); 7106 ins_encode( OpcP, RegMem(dst,mem)); 7107 ins_pipe( ialu_reg_reg_fat ); 7108%} 7109 7110instruct leaP32(eRegP dst, indOffset32 mem) %{ 7111 match(Set dst mem); 7112 7113 ins_cost(110); 7114 format %{ "LEA $dst,$mem" %} 7115 opcode(0x8D); 7116 ins_encode( OpcP, RegMem(dst,mem)); 7117 ins_pipe( ialu_reg_reg_fat ); 7118%} 7119 7120instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 7121 match(Set dst mem); 7122 7123 ins_cost(110); 7124 format %{ "LEA $dst,$mem" %} 7125 opcode(0x8D); 7126 ins_encode( OpcP, RegMem(dst,mem)); 7127 ins_pipe( ialu_reg_reg_fat ); 7128%} 7129 7130instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 7131 match(Set dst mem); 7132 7133 ins_cost(110); 7134 format %{ "LEA $dst,$mem" %} 7135 opcode(0x8D); 7136 ins_encode( OpcP, RegMem(dst,mem)); 7137 ins_pipe( ialu_reg_reg_fat ); 7138%} 7139 7140instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 7141 match(Set dst mem); 7142 7143 ins_cost(110); 7144 format %{ "LEA $dst,$mem" %} 7145 opcode(0x8D); 7146 ins_encode( OpcP, RegMem(dst,mem)); 7147 ins_pipe( ialu_reg_reg_fat ); 7148%} 7149 7150// Load Constant 7151instruct loadConI(eRegI dst, immI src) %{ 7152 match(Set dst src); 7153 7154 format %{ "MOV $dst,$src" %} 7155 ins_encode( LdImmI(dst, src) ); 7156 ins_pipe( ialu_reg_fat ); 7157%} 7158 7159// Load Constant zero 7160instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{ 7161 match(Set dst src); 7162 effect(KILL cr); 7163 7164 ins_cost(50); 7165 format %{ "XOR $dst,$dst" %} 7166 opcode(0x33); /* + rd */ 7167 ins_encode( OpcP, RegReg( dst, dst ) ); 7168 ins_pipe( ialu_reg ); 7169%} 7170 7171instruct loadConP(eRegP dst, immP src) %{ 7172 match(Set dst src); 7173 7174 format %{ "MOV $dst,$src" %} 7175 opcode(0xB8); /* + rd */ 7176 ins_encode( LdImmP(dst, src) ); 7177 ins_pipe( ialu_reg_fat ); 7178%} 7179 7180instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 7181 match(Set dst src); 7182 effect(KILL cr); 7183 ins_cost(200); 7184 format %{ "MOV $dst.lo,$src.lo\n\t" 7185 "MOV $dst.hi,$src.hi" %} 7186 opcode(0xB8); 7187 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 7188 ins_pipe( ialu_reg_long_fat ); 7189%} 7190 7191instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 7192 match(Set dst src); 7193 effect(KILL cr); 7194 ins_cost(150); 7195 format %{ "XOR $dst.lo,$dst.lo\n\t" 7196 "XOR $dst.hi,$dst.hi" %} 7197 opcode(0x33,0x33); 7198 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 7199 ins_pipe( ialu_reg_long ); 7200%} 7201 7202// The instruction usage is guarded by predicate in operand immF(). 7203instruct loadConF(regF dst, immF src) %{ 7204 match(Set dst src); 7205 ins_cost(125); 7206 7207 format %{ "FLD_S ST,$src\n\t" 7208 "FSTP $dst" %} 7209 opcode(0xD9, 0x00); /* D9 /0 */ 7210 ins_encode(LdImmF(src), Pop_Reg_F(dst) ); 7211 ins_pipe( fpu_reg_con ); 7212%} 7213 7214// The instruction usage is guarded by predicate in operand immXF(). 7215instruct loadConX(regX dst, immXF con) %{ 7216 match(Set dst con); 7217 ins_cost(125); 7218 format %{ "MOVSS $dst,[$con]" %} 7219 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con)); 7220 ins_pipe( pipe_slow ); 7221%} 7222 7223// The instruction usage is guarded by predicate in operand immXF0(). 7224instruct loadConX0(regX dst, immXF0 src) %{ 7225 match(Set dst src); 7226 ins_cost(100); 7227 format %{ "XORPS $dst,$dst\t# float 0.0" %} 7228 ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 7229 ins_pipe( pipe_slow ); 7230%} 7231 7232// The instruction usage is guarded by predicate in operand immD(). 7233instruct loadConD(regD dst, immD src) %{ 7234 match(Set dst src); 7235 ins_cost(125); 7236 7237 format %{ "FLD_D ST,$src\n\t" 7238 "FSTP $dst" %} 7239 ins_encode(LdImmD(src), Pop_Reg_D(dst) ); 7240 ins_pipe( fpu_reg_con ); 7241%} 7242 7243// The instruction usage is guarded by predicate in operand immXD(). 7244instruct loadConXD(regXD dst, immXD con) %{ 7245 match(Set dst con); 7246 ins_cost(125); 7247 format %{ "MOVSD $dst,[$con]" %} 7248 ins_encode(load_conXD(dst, con)); 7249 ins_pipe( pipe_slow ); 7250%} 7251 7252// The instruction usage is guarded by predicate in operand immXD0(). 7253instruct loadConXD0(regXD dst, immXD0 src) %{ 7254 match(Set dst src); 7255 ins_cost(100); 7256 format %{ "XORPD $dst,$dst\t# double 0.0" %} 7257 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 7258 ins_pipe( pipe_slow ); 7259%} 7260 7261// Load Stack Slot 7262instruct loadSSI(eRegI dst, stackSlotI src) %{ 7263 match(Set dst src); 7264 ins_cost(125); 7265 7266 format %{ "MOV $dst,$src" %} 7267 opcode(0x8B); 7268 ins_encode( OpcP, RegMem(dst,src)); 7269 ins_pipe( ialu_reg_mem ); 7270%} 7271 7272instruct loadSSL(eRegL dst, stackSlotL src) %{ 7273 match(Set dst src); 7274 7275 ins_cost(200); 7276 format %{ "MOV $dst,$src.lo\n\t" 7277 "MOV $dst+4,$src.hi" %} 7278 opcode(0x8B, 0x8B); 7279 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 7280 ins_pipe( ialu_mem_long_reg ); 7281%} 7282 7283// Load Stack Slot 7284instruct loadSSP(eRegP dst, stackSlotP src) %{ 7285 match(Set dst src); 7286 ins_cost(125); 7287 7288 format %{ "MOV $dst,$src" %} 7289 opcode(0x8B); 7290 ins_encode( OpcP, RegMem(dst,src)); 7291 ins_pipe( ialu_reg_mem ); 7292%} 7293 7294// Load Stack Slot 7295instruct loadSSF(regF dst, stackSlotF src) %{ 7296 match(Set dst src); 7297 ins_cost(125); 7298 7299 format %{ "FLD_S $src\n\t" 7300 "FSTP $dst" %} 7301 opcode(0xD9); /* D9 /0, FLD m32real */ 7302 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 7303 Pop_Reg_F(dst) ); 7304 ins_pipe( fpu_reg_mem ); 7305%} 7306 7307// Load Stack Slot 7308instruct loadSSD(regD dst, stackSlotD src) %{ 7309 match(Set dst src); 7310 ins_cost(125); 7311 7312 format %{ "FLD_D $src\n\t" 7313 "FSTP $dst" %} 7314 opcode(0xDD); /* DD /0, FLD m64real */ 7315 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 7316 Pop_Reg_D(dst) ); 7317 ins_pipe( fpu_reg_mem ); 7318%} 7319 7320// Prefetch instructions. 7321// Must be safe to execute with invalid address (cannot fault). 7322 7323instruct prefetchr0( memory mem ) %{ 7324 predicate(UseSSE==0 && !VM_Version::supports_3dnow()); 7325 match(PrefetchRead mem); 7326 ins_cost(0); 7327 size(0); 7328 format %{ "PREFETCHR (non-SSE is empty encoding)" %} 7329 ins_encode(); 7330 ins_pipe(empty); 7331%} 7332 7333instruct prefetchr( memory mem ) %{ 7334 predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3); 7335 match(PrefetchRead mem); 7336 ins_cost(100); 7337 7338 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %} 7339 opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */ 7340 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 7341 ins_pipe(ialu_mem); 7342%} 7343 7344instruct prefetchrNTA( memory mem ) %{ 7345 predicate(UseSSE>=1 && ReadPrefetchInstr==0); 7346 match(PrefetchRead mem); 7347 ins_cost(100); 7348 7349 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %} 7350 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */ 7351 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 7352 ins_pipe(ialu_mem); 7353%} 7354 7355instruct prefetchrT0( memory mem ) %{ 7356 predicate(UseSSE>=1 && ReadPrefetchInstr==1); 7357 match(PrefetchRead mem); 7358 ins_cost(100); 7359 7360 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %} 7361 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */ 7362 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 7363 ins_pipe(ialu_mem); 7364%} 7365 7366instruct prefetchrT2( memory mem ) %{ 7367 predicate(UseSSE>=1 && ReadPrefetchInstr==2); 7368 match(PrefetchRead mem); 7369 ins_cost(100); 7370 7371 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %} 7372 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */ 7373 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem)); 7374 ins_pipe(ialu_mem); 7375%} 7376 7377instruct prefetchw0( memory mem ) %{ 7378 predicate(UseSSE==0 && !VM_Version::supports_3dnow()); 7379 match(PrefetchWrite mem); 7380 ins_cost(0); 7381 size(0); 7382 format %{ "Prefetch (non-SSE is empty encoding)" %} 7383 ins_encode(); 7384 ins_pipe(empty); 7385%} 7386 7387instruct prefetchw( memory mem ) %{ 7388 predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3); 7389 match( PrefetchWrite mem ); 7390 ins_cost(100); 7391 7392 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %} 7393 opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */ 7394 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 7395 ins_pipe(ialu_mem); 7396%} 7397 7398instruct prefetchwNTA( memory mem ) %{ 7399 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 7400 match(PrefetchWrite mem); 7401 ins_cost(100); 7402 7403 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %} 7404 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */ 7405 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 7406 ins_pipe(ialu_mem); 7407%} 7408 7409instruct prefetchwT0( memory mem ) %{ 7410 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 7411 match(PrefetchWrite mem); 7412 ins_cost(100); 7413 7414 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %} 7415 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */ 7416 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 7417 ins_pipe(ialu_mem); 7418%} 7419 7420instruct prefetchwT2( memory mem ) %{ 7421 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 7422 match(PrefetchWrite mem); 7423 ins_cost(100); 7424 7425 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %} 7426 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */ 7427 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem)); 7428 ins_pipe(ialu_mem); 7429%} 7430 7431//----------Store Instructions------------------------------------------------- 7432 7433// Store Byte 7434instruct storeB(memory mem, xRegI src) %{ 7435 match(Set mem (StoreB mem src)); 7436 7437 ins_cost(125); 7438 format %{ "MOV8 $mem,$src" %} 7439 opcode(0x88); 7440 ins_encode( OpcP, RegMem( src, mem ) ); 7441 ins_pipe( ialu_mem_reg ); 7442%} 7443 7444// Store Char/Short 7445instruct storeC(memory mem, eRegI src) %{ 7446 match(Set mem (StoreC mem src)); 7447 7448 ins_cost(125); 7449 format %{ "MOV16 $mem,$src" %} 7450 opcode(0x89, 0x66); 7451 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 7452 ins_pipe( ialu_mem_reg ); 7453%} 7454 7455// Store Integer 7456instruct storeI(memory mem, eRegI src) %{ 7457 match(Set mem (StoreI mem src)); 7458 7459 ins_cost(125); 7460 format %{ "MOV $mem,$src" %} 7461 opcode(0x89); 7462 ins_encode( OpcP, RegMem( src, mem ) ); 7463 ins_pipe( ialu_mem_reg ); 7464%} 7465 7466// Store Long 7467instruct storeL(long_memory mem, eRegL src) %{ 7468 predicate(!((StoreLNode*)n)->require_atomic_access()); 7469 match(Set mem (StoreL mem src)); 7470 7471 ins_cost(200); 7472 format %{ "MOV $mem,$src.lo\n\t" 7473 "MOV $mem+4,$src.hi" %} 7474 opcode(0x89, 0x89); 7475 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 7476 ins_pipe( ialu_mem_long_reg ); 7477%} 7478 7479// Store Long to Integer 7480instruct storeL2I(memory mem, eRegL src) %{ 7481 match(Set mem (StoreI mem (ConvL2I src))); 7482 7483 format %{ "MOV $mem,$src.lo\t# long -> int" %} 7484 ins_encode %{ 7485 __ movl($mem$$Address, $src$$Register); 7486 %} 7487 ins_pipe(ialu_mem_reg); 7488%} 7489 7490// Volatile Store Long. Must be atomic, so move it into 7491// the FP TOS and then do a 64-bit FIST. Has to probe the 7492// target address before the store (for null-ptr checks) 7493// so the memory operand is used twice in the encoding. 7494instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 7495 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 7496 match(Set mem (StoreL mem src)); 7497 effect( KILL cr ); 7498 ins_cost(400); 7499 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7500 "FILD $src\n\t" 7501 "FISTp $mem\t # 64-bit atomic volatile long store" %} 7502 opcode(0x3B); 7503 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 7504 ins_pipe( fpu_reg_mem ); 7505%} 7506 7507instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 7508 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7509 match(Set mem (StoreL mem src)); 7510 effect( TEMP tmp, KILL cr ); 7511 ins_cost(380); 7512 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7513 "MOVSD $tmp,$src\n\t" 7514 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 7515 opcode(0x3B); 7516 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); 7517 ins_pipe( pipe_slow ); 7518%} 7519 7520instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 7521 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7522 match(Set mem (StoreL mem src)); 7523 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 7524 ins_cost(360); 7525 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7526 "MOVD $tmp,$src.lo\n\t" 7527 "MOVD $tmp2,$src.hi\n\t" 7528 "PUNPCKLDQ $tmp,$tmp2\n\t" 7529 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 7530 opcode(0x3B); 7531 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); 7532 ins_pipe( pipe_slow ); 7533%} 7534 7535// Store Pointer; for storing unknown oops and raw pointers 7536instruct storeP(memory mem, anyRegP src) %{ 7537 match(Set mem (StoreP mem src)); 7538 7539 ins_cost(125); 7540 format %{ "MOV $mem,$src" %} 7541 opcode(0x89); 7542 ins_encode( OpcP, RegMem( src, mem ) ); 7543 ins_pipe( ialu_mem_reg ); 7544%} 7545 7546// Store Integer Immediate 7547instruct storeImmI(memory mem, immI src) %{ 7548 match(Set mem (StoreI mem src)); 7549 7550 ins_cost(150); 7551 format %{ "MOV $mem,$src" %} 7552 opcode(0xC7); /* C7 /0 */ 7553 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 7554 ins_pipe( ialu_mem_imm ); 7555%} 7556 7557// Store Short/Char Immediate 7558instruct storeImmI16(memory mem, immI16 src) %{ 7559 predicate(UseStoreImmI16); 7560 match(Set mem (StoreC mem src)); 7561 7562 ins_cost(150); 7563 format %{ "MOV16 $mem,$src" %} 7564 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 7565 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 7566 ins_pipe( ialu_mem_imm ); 7567%} 7568 7569// Store Pointer Immediate; null pointers or constant oops that do not 7570// need card-mark barriers. 7571instruct storeImmP(memory mem, immP src) %{ 7572 match(Set mem (StoreP mem src)); 7573 7574 ins_cost(150); 7575 format %{ "MOV $mem,$src" %} 7576 opcode(0xC7); /* C7 /0 */ 7577 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 7578 ins_pipe( ialu_mem_imm ); 7579%} 7580 7581// Store Byte Immediate 7582instruct storeImmB(memory mem, immI8 src) %{ 7583 match(Set mem (StoreB mem src)); 7584 7585 ins_cost(150); 7586 format %{ "MOV8 $mem,$src" %} 7587 opcode(0xC6); /* C6 /0 */ 7588 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7589 ins_pipe( ialu_mem_imm ); 7590%} 7591 7592// Store Aligned Packed Byte XMM register to memory 7593instruct storeA8B(memory mem, regXD src) %{ 7594 predicate(UseSSE>=1); 7595 match(Set mem (Store8B mem src)); 7596 ins_cost(145); 7597 format %{ "MOVQ $mem,$src\t! packed8B" %} 7598 ins_encode( movq_st(mem, src)); 7599 ins_pipe( pipe_slow ); 7600%} 7601 7602// Store Aligned Packed Char/Short XMM register to memory 7603instruct storeA4C(memory mem, regXD src) %{ 7604 predicate(UseSSE>=1); 7605 match(Set mem (Store4C mem src)); 7606 ins_cost(145); 7607 format %{ "MOVQ $mem,$src\t! packed4C" %} 7608 ins_encode( movq_st(mem, src)); 7609 ins_pipe( pipe_slow ); 7610%} 7611 7612// Store Aligned Packed Integer XMM register to memory 7613instruct storeA2I(memory mem, regXD src) %{ 7614 predicate(UseSSE>=1); 7615 match(Set mem (Store2I mem src)); 7616 ins_cost(145); 7617 format %{ "MOVQ $mem,$src\t! packed2I" %} 7618 ins_encode( movq_st(mem, src)); 7619 ins_pipe( pipe_slow ); 7620%} 7621 7622// Store CMS card-mark Immediate 7623instruct storeImmCM(memory mem, immI8 src) %{ 7624 match(Set mem (StoreCM mem src)); 7625 7626 ins_cost(150); 7627 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 7628 opcode(0xC6); /* C6 /0 */ 7629 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7630 ins_pipe( ialu_mem_imm ); 7631%} 7632 7633// Store Double 7634instruct storeD( memory mem, regDPR1 src) %{ 7635 predicate(UseSSE<=1); 7636 match(Set mem (StoreD mem src)); 7637 7638 ins_cost(100); 7639 format %{ "FST_D $mem,$src" %} 7640 opcode(0xDD); /* DD /2 */ 7641 ins_encode( enc_FP_store(mem,src) ); 7642 ins_pipe( fpu_mem_reg ); 7643%} 7644 7645// Store double does rounding on x86 7646instruct storeD_rounded( memory mem, regDPR1 src) %{ 7647 predicate(UseSSE<=1); 7648 match(Set mem (StoreD mem (RoundDouble src))); 7649 7650 ins_cost(100); 7651 format %{ "FST_D $mem,$src\t# round" %} 7652 opcode(0xDD); /* DD /2 */ 7653 ins_encode( enc_FP_store(mem,src) ); 7654 ins_pipe( fpu_mem_reg ); 7655%} 7656 7657// Store XMM register to memory (double-precision floating points) 7658// MOVSD instruction 7659instruct storeXD(memory mem, regXD src) %{ 7660 predicate(UseSSE>=2); 7661 match(Set mem (StoreD mem src)); 7662 ins_cost(95); 7663 format %{ "MOVSD $mem,$src" %} 7664 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 7665 ins_pipe( pipe_slow ); 7666%} 7667 7668// Store XMM register to memory (single-precision floating point) 7669// MOVSS instruction 7670instruct storeX(memory mem, regX src) %{ 7671 predicate(UseSSE>=1); 7672 match(Set mem (StoreF mem src)); 7673 ins_cost(95); 7674 format %{ "MOVSS $mem,$src" %} 7675 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 7676 ins_pipe( pipe_slow ); 7677%} 7678 7679// Store Aligned Packed Single Float XMM register to memory 7680instruct storeA2F(memory mem, regXD src) %{ 7681 predicate(UseSSE>=1); 7682 match(Set mem (Store2F mem src)); 7683 ins_cost(145); 7684 format %{ "MOVQ $mem,$src\t! packed2F" %} 7685 ins_encode( movq_st(mem, src)); 7686 ins_pipe( pipe_slow ); 7687%} 7688 7689// Store Float 7690instruct storeF( memory mem, regFPR1 src) %{ 7691 predicate(UseSSE==0); 7692 match(Set mem (StoreF mem src)); 7693 7694 ins_cost(100); 7695 format %{ "FST_S $mem,$src" %} 7696 opcode(0xD9); /* D9 /2 */ 7697 ins_encode( enc_FP_store(mem,src) ); 7698 ins_pipe( fpu_mem_reg ); 7699%} 7700 7701// Store Float does rounding on x86 7702instruct storeF_rounded( memory mem, regFPR1 src) %{ 7703 predicate(UseSSE==0); 7704 match(Set mem (StoreF mem (RoundFloat src))); 7705 7706 ins_cost(100); 7707 format %{ "FST_S $mem,$src\t# round" %} 7708 opcode(0xD9); /* D9 /2 */ 7709 ins_encode( enc_FP_store(mem,src) ); 7710 ins_pipe( fpu_mem_reg ); 7711%} 7712 7713// Store Float does rounding on x86 7714instruct storeF_Drounded( memory mem, regDPR1 src) %{ 7715 predicate(UseSSE<=1); 7716 match(Set mem (StoreF mem (ConvD2F src))); 7717 7718 ins_cost(100); 7719 format %{ "FST_S $mem,$src\t# D-round" %} 7720 opcode(0xD9); /* D9 /2 */ 7721 ins_encode( enc_FP_store(mem,src) ); 7722 ins_pipe( fpu_mem_reg ); 7723%} 7724 7725// Store immediate Float value (it is faster than store from FPU register) 7726// The instruction usage is guarded by predicate in operand immF(). 7727instruct storeF_imm( memory mem, immF src) %{ 7728 match(Set mem (StoreF mem src)); 7729 7730 ins_cost(50); 7731 format %{ "MOV $mem,$src\t# store float" %} 7732 opcode(0xC7); /* C7 /0 */ 7733 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 7734 ins_pipe( ialu_mem_imm ); 7735%} 7736 7737// Store immediate Float value (it is faster than store from XMM register) 7738// The instruction usage is guarded by predicate in operand immXF(). 7739instruct storeX_imm( memory mem, immXF src) %{ 7740 match(Set mem (StoreF mem src)); 7741 7742 ins_cost(50); 7743 format %{ "MOV $mem,$src\t# store float" %} 7744 opcode(0xC7); /* C7 /0 */ 7745 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); 7746 ins_pipe( ialu_mem_imm ); 7747%} 7748 7749// Store Integer to stack slot 7750instruct storeSSI(stackSlotI dst, eRegI src) %{ 7751 match(Set dst src); 7752 7753 ins_cost(100); 7754 format %{ "MOV $dst,$src" %} 7755 opcode(0x89); 7756 ins_encode( OpcPRegSS( dst, src ) ); 7757 ins_pipe( ialu_mem_reg ); 7758%} 7759 7760// Store Integer to stack slot 7761instruct storeSSP(stackSlotP dst, eRegP src) %{ 7762 match(Set dst src); 7763 7764 ins_cost(100); 7765 format %{ "MOV $dst,$src" %} 7766 opcode(0x89); 7767 ins_encode( OpcPRegSS( dst, src ) ); 7768 ins_pipe( ialu_mem_reg ); 7769%} 7770 7771// Store Long to stack slot 7772instruct storeSSL(stackSlotL dst, eRegL src) %{ 7773 match(Set dst src); 7774 7775 ins_cost(200); 7776 format %{ "MOV $dst,$src.lo\n\t" 7777 "MOV $dst+4,$src.hi" %} 7778 opcode(0x89, 0x89); 7779 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 7780 ins_pipe( ialu_mem_long_reg ); 7781%} 7782 7783//----------MemBar Instructions----------------------------------------------- 7784// Memory barrier flavors 7785 7786instruct membar_acquire() %{ 7787 match(MemBarAcquire); 7788 ins_cost(400); 7789 7790 size(0); 7791 format %{ "MEMBAR-acquire ! (empty encoding)" %} 7792 ins_encode(); 7793 ins_pipe(empty); 7794%} 7795 7796instruct membar_acquire_lock() %{ 7797 match(MemBarAcquire); 7798 predicate(Matcher::prior_fast_lock(n)); 7799 ins_cost(0); 7800 7801 size(0); 7802 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 7803 ins_encode( ); 7804 ins_pipe(empty); 7805%} 7806 7807instruct membar_release() %{ 7808 match(MemBarRelease); 7809 ins_cost(400); 7810 7811 size(0); 7812 format %{ "MEMBAR-release ! (empty encoding)" %} 7813 ins_encode( ); 7814 ins_pipe(empty); 7815%} 7816 7817instruct membar_release_lock() %{ 7818 match(MemBarRelease); 7819 predicate(Matcher::post_fast_unlock(n)); 7820 ins_cost(0); 7821 7822 size(0); 7823 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 7824 ins_encode( ); 7825 ins_pipe(empty); 7826%} 7827 7828instruct membar_volatile(eFlagsReg cr) %{ 7829 match(MemBarVolatile); 7830 effect(KILL cr); 7831 ins_cost(400); 7832 7833 format %{ 7834 $$template 7835 if (os::is_MP()) { 7836 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 7837 } else { 7838 $$emit$$"MEMBAR-volatile ! (empty encoding)" 7839 } 7840 %} 7841 ins_encode %{ 7842 __ membar(Assembler::StoreLoad); 7843 %} 7844 ins_pipe(pipe_slow); 7845%} 7846 7847instruct unnecessary_membar_volatile() %{ 7848 match(MemBarVolatile); 7849 predicate(Matcher::post_store_load_barrier(n)); 7850 ins_cost(0); 7851 7852 size(0); 7853 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 7854 ins_encode( ); 7855 ins_pipe(empty); 7856%} 7857 7858//----------Move Instructions-------------------------------------------------- 7859instruct castX2P(eAXRegP dst, eAXRegI src) %{ 7860 match(Set dst (CastX2P src)); 7861 format %{ "# X2P $dst, $src" %} 7862 ins_encode( /*empty encoding*/ ); 7863 ins_cost(0); 7864 ins_pipe(empty); 7865%} 7866 7867instruct castP2X(eRegI dst, eRegP src ) %{ 7868 match(Set dst (CastP2X src)); 7869 ins_cost(50); 7870 format %{ "MOV $dst, $src\t# CastP2X" %} 7871 ins_encode( enc_Copy( dst, src) ); 7872 ins_pipe( ialu_reg_reg ); 7873%} 7874 7875//----------Conditional Move--------------------------------------------------- 7876// Conditional move 7877instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{ 7878 predicate(VM_Version::supports_cmov() ); 7879 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 7880 ins_cost(200); 7881 format %{ "CMOV$cop $dst,$src" %} 7882 opcode(0x0F,0x40); 7883 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7884 ins_pipe( pipe_cmov_reg ); 7885%} 7886 7887instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{ 7888 predicate(VM_Version::supports_cmov() ); 7889 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 7890 ins_cost(200); 7891 format %{ "CMOV$cop $dst,$src" %} 7892 opcode(0x0F,0x40); 7893 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7894 ins_pipe( pipe_cmov_reg ); 7895%} 7896 7897instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{ 7898 predicate(VM_Version::supports_cmov() ); 7899 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 7900 ins_cost(200); 7901 expand %{ 7902 cmovI_regU(cop, cr, dst, src); 7903 %} 7904%} 7905 7906// Conditional move 7907instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{ 7908 predicate(VM_Version::supports_cmov() ); 7909 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 7910 ins_cost(250); 7911 format %{ "CMOV$cop $dst,$src" %} 7912 opcode(0x0F,0x40); 7913 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7914 ins_pipe( pipe_cmov_mem ); 7915%} 7916 7917// Conditional move 7918instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{ 7919 predicate(VM_Version::supports_cmov() ); 7920 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 7921 ins_cost(250); 7922 format %{ "CMOV$cop $dst,$src" %} 7923 opcode(0x0F,0x40); 7924 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7925 ins_pipe( pipe_cmov_mem ); 7926%} 7927 7928instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{ 7929 predicate(VM_Version::supports_cmov() ); 7930 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 7931 ins_cost(250); 7932 expand %{ 7933 cmovI_memU(cop, cr, dst, src); 7934 %} 7935%} 7936 7937// Conditional move 7938instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 7939 predicate(VM_Version::supports_cmov() ); 7940 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7941 ins_cost(200); 7942 format %{ "CMOV$cop $dst,$src\t# ptr" %} 7943 opcode(0x0F,0x40); 7944 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7945 ins_pipe( pipe_cmov_reg ); 7946%} 7947 7948// Conditional move (non-P6 version) 7949// Note: a CMoveP is generated for stubs and native wrappers 7950// regardless of whether we are on a P6, so we 7951// emulate a cmov here 7952instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 7953 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7954 ins_cost(300); 7955 format %{ "Jn$cop skip\n\t" 7956 "MOV $dst,$src\t# pointer\n" 7957 "skip:" %} 7958 opcode(0x8b); 7959 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 7960 ins_pipe( pipe_cmov_reg ); 7961%} 7962 7963// Conditional move 7964instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 7965 predicate(VM_Version::supports_cmov() ); 7966 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7967 ins_cost(200); 7968 format %{ "CMOV$cop $dst,$src\t# ptr" %} 7969 opcode(0x0F,0x40); 7970 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7971 ins_pipe( pipe_cmov_reg ); 7972%} 7973 7974instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 7975 predicate(VM_Version::supports_cmov() ); 7976 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7977 ins_cost(200); 7978 expand %{ 7979 cmovP_regU(cop, cr, dst, src); 7980 %} 7981%} 7982 7983// DISABLED: Requires the ADLC to emit a bottom_type call that 7984// correctly meets the two pointer arguments; one is an incoming 7985// register but the other is a memory operand. ALSO appears to 7986// be buggy with implicit null checks. 7987// 7988//// Conditional move 7989//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 7990// predicate(VM_Version::supports_cmov() ); 7991// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 7992// ins_cost(250); 7993// format %{ "CMOV$cop $dst,$src\t# ptr" %} 7994// opcode(0x0F,0x40); 7995// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7996// ins_pipe( pipe_cmov_mem ); 7997//%} 7998// 7999//// Conditional move 8000//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 8001// predicate(VM_Version::supports_cmov() ); 8002// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 8003// ins_cost(250); 8004// format %{ "CMOV$cop $dst,$src\t# ptr" %} 8005// opcode(0x0F,0x40); 8006// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 8007// ins_pipe( pipe_cmov_mem ); 8008//%} 8009 8010// Conditional move 8011instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 8012 predicate(UseSSE<=1); 8013 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8014 ins_cost(200); 8015 format %{ "FCMOV$cop $dst,$src\t# double" %} 8016 opcode(0xDA); 8017 ins_encode( enc_cmov_d(cop,src) ); 8018 ins_pipe( pipe_cmovD_reg ); 8019%} 8020 8021// Conditional move 8022instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 8023 predicate(UseSSE==0); 8024 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8025 ins_cost(200); 8026 format %{ "FCMOV$cop $dst,$src\t# float" %} 8027 opcode(0xDA); 8028 ins_encode( enc_cmov_d(cop,src) ); 8029 ins_pipe( pipe_cmovD_reg ); 8030%} 8031 8032// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 8033instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 8034 predicate(UseSSE<=1); 8035 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8036 ins_cost(200); 8037 format %{ "Jn$cop skip\n\t" 8038 "MOV $dst,$src\t# double\n" 8039 "skip:" %} 8040 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 8041 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 8042 ins_pipe( pipe_cmovD_reg ); 8043%} 8044 8045// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 8046instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 8047 predicate(UseSSE==0); 8048 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8049 ins_cost(200); 8050 format %{ "Jn$cop skip\n\t" 8051 "MOV $dst,$src\t# float\n" 8052 "skip:" %} 8053 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 8054 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 8055 ins_pipe( pipe_cmovD_reg ); 8056%} 8057 8058// No CMOVE with SSE/SSE2 8059instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 8060 predicate (UseSSE>=1); 8061 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8062 ins_cost(200); 8063 format %{ "Jn$cop skip\n\t" 8064 "MOVSS $dst,$src\t# float\n" 8065 "skip:" %} 8066 ins_encode %{ 8067 Label skip; 8068 // Invert sense of branch from sense of CMOV 8069 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8070 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8071 __ bind(skip); 8072 %} 8073 ins_pipe( pipe_slow ); 8074%} 8075 8076// No CMOVE with SSE/SSE2 8077instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 8078 predicate (UseSSE>=2); 8079 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8080 ins_cost(200); 8081 format %{ "Jn$cop skip\n\t" 8082 "MOVSD $dst,$src\t# float\n" 8083 "skip:" %} 8084 ins_encode %{ 8085 Label skip; 8086 // Invert sense of branch from sense of CMOV 8087 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8088 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8089 __ bind(skip); 8090 %} 8091 ins_pipe( pipe_slow ); 8092%} 8093 8094// unsigned version 8095instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 8096 predicate (UseSSE>=1); 8097 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8098 ins_cost(200); 8099 format %{ "Jn$cop skip\n\t" 8100 "MOVSS $dst,$src\t# float\n" 8101 "skip:" %} 8102 ins_encode %{ 8103 Label skip; 8104 // Invert sense of branch from sense of CMOV 8105 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8106 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8107 __ bind(skip); 8108 %} 8109 ins_pipe( pipe_slow ); 8110%} 8111 8112instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ 8113 predicate (UseSSE>=1); 8114 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8115 ins_cost(200); 8116 expand %{ 8117 fcmovX_regU(cop, cr, dst, src); 8118 %} 8119%} 8120 8121// unsigned version 8122instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 8123 predicate (UseSSE>=2); 8124 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8125 ins_cost(200); 8126 format %{ "Jn$cop skip\n\t" 8127 "MOVSD $dst,$src\t# float\n" 8128 "skip:" %} 8129 ins_encode %{ 8130 Label skip; 8131 // Invert sense of branch from sense of CMOV 8132 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8133 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8134 __ bind(skip); 8135 %} 8136 ins_pipe( pipe_slow ); 8137%} 8138 8139instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ 8140 predicate (UseSSE>=2); 8141 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8142 ins_cost(200); 8143 expand %{ 8144 fcmovXD_regU(cop, cr, dst, src); 8145 %} 8146%} 8147 8148instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 8149 predicate(VM_Version::supports_cmov() ); 8150 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 8151 ins_cost(200); 8152 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 8153 "CMOV$cop $dst.hi,$src.hi" %} 8154 opcode(0x0F,0x40); 8155 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 8156 ins_pipe( pipe_cmov_reg_long ); 8157%} 8158 8159instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 8160 predicate(VM_Version::supports_cmov() ); 8161 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 8162 ins_cost(200); 8163 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 8164 "CMOV$cop $dst.hi,$src.hi" %} 8165 opcode(0x0F,0x40); 8166 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 8167 ins_pipe( pipe_cmov_reg_long ); 8168%} 8169 8170instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 8171 predicate(VM_Version::supports_cmov() ); 8172 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 8173 ins_cost(200); 8174 expand %{ 8175 cmovL_regU(cop, cr, dst, src); 8176 %} 8177%} 8178 8179//----------Arithmetic Instructions-------------------------------------------- 8180//----------Addition Instructions---------------------------------------------- 8181// Integer Addition Instructions 8182instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 8183 match(Set dst (AddI dst src)); 8184 effect(KILL cr); 8185 8186 size(2); 8187 format %{ "ADD $dst,$src" %} 8188 opcode(0x03); 8189 ins_encode( OpcP, RegReg( dst, src) ); 8190 ins_pipe( ialu_reg_reg ); 8191%} 8192 8193instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 8194 match(Set dst (AddI dst src)); 8195 effect(KILL cr); 8196 8197 format %{ "ADD $dst,$src" %} 8198 opcode(0x81, 0x00); /* /0 id */ 8199 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8200 ins_pipe( ialu_reg ); 8201%} 8202 8203instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 8204 predicate(UseIncDec); 8205 match(Set dst (AddI dst src)); 8206 effect(KILL cr); 8207 8208 size(1); 8209 format %{ "INC $dst" %} 8210 opcode(0x40); /* */ 8211 ins_encode( Opc_plus( primary, dst ) ); 8212 ins_pipe( ialu_reg ); 8213%} 8214 8215instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{ 8216 match(Set dst (AddI src0 src1)); 8217 ins_cost(110); 8218 8219 format %{ "LEA $dst,[$src0 + $src1]" %} 8220 opcode(0x8D); /* 0x8D /r */ 8221 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 8222 ins_pipe( ialu_reg_reg ); 8223%} 8224 8225instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 8226 match(Set dst (AddP src0 src1)); 8227 ins_cost(110); 8228 8229 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 8230 opcode(0x8D); /* 0x8D /r */ 8231 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 8232 ins_pipe( ialu_reg_reg ); 8233%} 8234 8235instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{ 8236 predicate(UseIncDec); 8237 match(Set dst (AddI dst src)); 8238 effect(KILL cr); 8239 8240 size(1); 8241 format %{ "DEC $dst" %} 8242 opcode(0x48); /* */ 8243 ins_encode( Opc_plus( primary, dst ) ); 8244 ins_pipe( ialu_reg ); 8245%} 8246 8247instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{ 8248 match(Set dst (AddP dst src)); 8249 effect(KILL cr); 8250 8251 size(2); 8252 format %{ "ADD $dst,$src" %} 8253 opcode(0x03); 8254 ins_encode( OpcP, RegReg( dst, src) ); 8255 ins_pipe( ialu_reg_reg ); 8256%} 8257 8258instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 8259 match(Set dst (AddP dst src)); 8260 effect(KILL cr); 8261 8262 format %{ "ADD $dst,$src" %} 8263 opcode(0x81,0x00); /* Opcode 81 /0 id */ 8264 // ins_encode( RegImm( dst, src) ); 8265 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8266 ins_pipe( ialu_reg ); 8267%} 8268 8269instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 8270 match(Set dst (AddI dst (LoadI src))); 8271 effect(KILL cr); 8272 8273 ins_cost(125); 8274 format %{ "ADD $dst,$src" %} 8275 opcode(0x03); 8276 ins_encode( OpcP, RegMem( dst, src) ); 8277 ins_pipe( ialu_reg_mem ); 8278%} 8279 8280instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 8281 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8282 effect(KILL cr); 8283 8284 ins_cost(150); 8285 format %{ "ADD $dst,$src" %} 8286 opcode(0x01); /* Opcode 01 /r */ 8287 ins_encode( OpcP, RegMem( src, dst ) ); 8288 ins_pipe( ialu_mem_reg ); 8289%} 8290 8291// Add Memory with Immediate 8292instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8293 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8294 effect(KILL cr); 8295 8296 ins_cost(125); 8297 format %{ "ADD $dst,$src" %} 8298 opcode(0x81); /* Opcode 81 /0 id */ 8299 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 8300 ins_pipe( ialu_mem_imm ); 8301%} 8302 8303instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 8304 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8305 effect(KILL cr); 8306 8307 ins_cost(125); 8308 format %{ "INC $dst" %} 8309 opcode(0xFF); /* Opcode FF /0 */ 8310 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 8311 ins_pipe( ialu_mem_imm ); 8312%} 8313 8314instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 8315 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8316 effect(KILL cr); 8317 8318 ins_cost(125); 8319 format %{ "DEC $dst" %} 8320 opcode(0xFF); /* Opcode FF /1 */ 8321 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 8322 ins_pipe( ialu_mem_imm ); 8323%} 8324 8325 8326instruct checkCastPP( eRegP dst ) %{ 8327 match(Set dst (CheckCastPP dst)); 8328 8329 size(0); 8330 format %{ "#checkcastPP of $dst" %} 8331 ins_encode( /*empty encoding*/ ); 8332 ins_pipe( empty ); 8333%} 8334 8335instruct castPP( eRegP dst ) %{ 8336 match(Set dst (CastPP dst)); 8337 format %{ "#castPP of $dst" %} 8338 ins_encode( /*empty encoding*/ ); 8339 ins_pipe( empty ); 8340%} 8341 8342instruct castII( eRegI dst ) %{ 8343 match(Set dst (CastII dst)); 8344 format %{ "#castII of $dst" %} 8345 ins_encode( /*empty encoding*/ ); 8346 ins_cost(0); 8347 ins_pipe( empty ); 8348%} 8349 8350 8351// Load-locked - same as a regular pointer load when used with compare-swap 8352instruct loadPLocked(eRegP dst, memory mem) %{ 8353 match(Set dst (LoadPLocked mem)); 8354 8355 ins_cost(125); 8356 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 8357 opcode(0x8B); 8358 ins_encode( OpcP, RegMem(dst,mem)); 8359 ins_pipe( ialu_reg_mem ); 8360%} 8361 8362// LoadLong-locked - same as a volatile long load when used with compare-swap 8363instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ 8364 predicate(UseSSE<=1); 8365 match(Set dst (LoadLLocked mem)); 8366 8367 ins_cost(200); 8368 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 8369 "FISTp $dst" %} 8370 ins_encode(enc_loadL_volatile(mem,dst)); 8371 ins_pipe( fpu_reg_mem ); 8372%} 8373 8374instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ 8375 predicate(UseSSE>=2); 8376 match(Set dst (LoadLLocked mem)); 8377 effect(TEMP tmp); 8378 ins_cost(180); 8379 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 8380 "MOVSD $dst,$tmp" %} 8381 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 8382 ins_pipe( pipe_slow ); 8383%} 8384 8385instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ 8386 predicate(UseSSE>=2); 8387 match(Set dst (LoadLLocked mem)); 8388 effect(TEMP tmp); 8389 ins_cost(160); 8390 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 8391 "MOVD $dst.lo,$tmp\n\t" 8392 "PSRLQ $tmp,32\n\t" 8393 "MOVD $dst.hi,$tmp" %} 8394 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 8395 ins_pipe( pipe_slow ); 8396%} 8397 8398// Conditional-store of the updated heap-top. 8399// Used during allocation of the shared heap. 8400// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 8401instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 8402 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 8403 // EAX is killed if there is contention, but then it's also unused. 8404 // In the common case of no contention, EAX holds the new oop address. 8405 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 8406 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 8407 ins_pipe( pipe_cmpxchg ); 8408%} 8409 8410// Conditional-store of an int value. 8411// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 8412instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{ 8413 match(Set cr (StoreIConditional mem (Binary oldval newval))); 8414 effect(KILL oldval); 8415 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 8416 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 8417 ins_pipe( pipe_cmpxchg ); 8418%} 8419 8420// Conditional-store of a long value. 8421// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 8422instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 8423 match(Set cr (StoreLConditional mem (Binary oldval newval))); 8424 effect(KILL oldval); 8425 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 8426 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 8427 "XCHG EBX,ECX" 8428 %} 8429 ins_encode %{ 8430 // Note: we need to swap rbx, and rcx before and after the 8431 // cmpxchg8 instruction because the instruction uses 8432 // rcx as the high order word of the new value to store but 8433 // our register encoding uses rbx. 8434 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 8435 if( os::is_MP() ) 8436 __ lock(); 8437 __ cmpxchg8($mem$$Address); 8438 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 8439 %} 8440 ins_pipe( pipe_cmpxchg ); 8441%} 8442 8443// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 8444 8445instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 8446 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 8447 effect(KILL cr, KILL oldval); 8448 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 8449 "MOV $res,0\n\t" 8450 "JNE,s fail\n\t" 8451 "MOV $res,1\n" 8452 "fail:" %} 8453 ins_encode( enc_cmpxchg8(mem_ptr), 8454 enc_flags_ne_to_boolean(res) ); 8455 ins_pipe( pipe_cmpxchg ); 8456%} 8457 8458instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 8459 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 8460 effect(KILL cr, KILL oldval); 8461 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 8462 "MOV $res,0\n\t" 8463 "JNE,s fail\n\t" 8464 "MOV $res,1\n" 8465 "fail:" %} 8466 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 8467 ins_pipe( pipe_cmpxchg ); 8468%} 8469 8470instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 8471 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 8472 effect(KILL cr, KILL oldval); 8473 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 8474 "MOV $res,0\n\t" 8475 "JNE,s fail\n\t" 8476 "MOV $res,1\n" 8477 "fail:" %} 8478 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 8479 ins_pipe( pipe_cmpxchg ); 8480%} 8481 8482//----------Subtraction Instructions------------------------------------------- 8483// Integer Subtraction Instructions 8484instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 8485 match(Set dst (SubI dst src)); 8486 effect(KILL cr); 8487 8488 size(2); 8489 format %{ "SUB $dst,$src" %} 8490 opcode(0x2B); 8491 ins_encode( OpcP, RegReg( dst, src) ); 8492 ins_pipe( ialu_reg_reg ); 8493%} 8494 8495instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 8496 match(Set dst (SubI dst src)); 8497 effect(KILL cr); 8498 8499 format %{ "SUB $dst,$src" %} 8500 opcode(0x81,0x05); /* Opcode 81 /5 */ 8501 // ins_encode( RegImm( dst, src) ); 8502 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8503 ins_pipe( ialu_reg ); 8504%} 8505 8506instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 8507 match(Set dst (SubI dst (LoadI src))); 8508 effect(KILL cr); 8509 8510 ins_cost(125); 8511 format %{ "SUB $dst,$src" %} 8512 opcode(0x2B); 8513 ins_encode( OpcP, RegMem( dst, src) ); 8514 ins_pipe( ialu_reg_mem ); 8515%} 8516 8517instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 8518 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 8519 effect(KILL cr); 8520 8521 ins_cost(150); 8522 format %{ "SUB $dst,$src" %} 8523 opcode(0x29); /* Opcode 29 /r */ 8524 ins_encode( OpcP, RegMem( src, dst ) ); 8525 ins_pipe( ialu_mem_reg ); 8526%} 8527 8528// Subtract from a pointer 8529instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{ 8530 match(Set dst (AddP dst (SubI zero src))); 8531 effect(KILL cr); 8532 8533 size(2); 8534 format %{ "SUB $dst,$src" %} 8535 opcode(0x2B); 8536 ins_encode( OpcP, RegReg( dst, src) ); 8537 ins_pipe( ialu_reg_reg ); 8538%} 8539 8540instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{ 8541 match(Set dst (SubI zero dst)); 8542 effect(KILL cr); 8543 8544 size(2); 8545 format %{ "NEG $dst" %} 8546 opcode(0xF7,0x03); // Opcode F7 /3 8547 ins_encode( OpcP, RegOpc( dst ) ); 8548 ins_pipe( ialu_reg ); 8549%} 8550 8551 8552//----------Multiplication/Division Instructions------------------------------- 8553// Integer Multiplication Instructions 8554// Multiply Register 8555instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 8556 match(Set dst (MulI dst src)); 8557 effect(KILL cr); 8558 8559 size(3); 8560 ins_cost(300); 8561 format %{ "IMUL $dst,$src" %} 8562 opcode(0xAF, 0x0F); 8563 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 8564 ins_pipe( ialu_reg_reg_alu0 ); 8565%} 8566 8567// Multiply 32-bit Immediate 8568instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{ 8569 match(Set dst (MulI src imm)); 8570 effect(KILL cr); 8571 8572 ins_cost(300); 8573 format %{ "IMUL $dst,$src,$imm" %} 8574 opcode(0x69); /* 69 /r id */ 8575 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 8576 ins_pipe( ialu_reg_reg_alu0 ); 8577%} 8578 8579instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 8580 match(Set dst src); 8581 effect(KILL cr); 8582 8583 // Note that this is artificially increased to make it more expensive than loadConL 8584 ins_cost(250); 8585 format %{ "MOV EAX,$src\t// low word only" %} 8586 opcode(0xB8); 8587 ins_encode( LdImmL_Lo(dst, src) ); 8588 ins_pipe( ialu_reg_fat ); 8589%} 8590 8591// Multiply by 32-bit Immediate, taking the shifted high order results 8592// (special case for shift by 32) 8593instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 8594 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 8595 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 8596 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 8597 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 8598 effect(USE src1, KILL cr); 8599 8600 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 8601 ins_cost(0*100 + 1*400 - 150); 8602 format %{ "IMUL EDX:EAX,$src1" %} 8603 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 8604 ins_pipe( pipe_slow ); 8605%} 8606 8607// Multiply by 32-bit Immediate, taking the shifted high order results 8608instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 8609 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 8610 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 8611 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 8612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 8613 effect(USE src1, KILL cr); 8614 8615 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 8616 ins_cost(1*100 + 1*400 - 150); 8617 format %{ "IMUL EDX:EAX,$src1\n\t" 8618 "SAR EDX,$cnt-32" %} 8619 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 8620 ins_pipe( pipe_slow ); 8621%} 8622 8623// Multiply Memory 32-bit Immediate 8624instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{ 8625 match(Set dst (MulI (LoadI src) imm)); 8626 effect(KILL cr); 8627 8628 ins_cost(300); 8629 format %{ "IMUL $dst,$src,$imm" %} 8630 opcode(0x69); /* 69 /r id */ 8631 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 8632 ins_pipe( ialu_reg_mem_alu0 ); 8633%} 8634 8635// Multiply Memory 8636instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{ 8637 match(Set dst (MulI dst (LoadI src))); 8638 effect(KILL cr); 8639 8640 ins_cost(350); 8641 format %{ "IMUL $dst,$src" %} 8642 opcode(0xAF, 0x0F); 8643 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 8644 ins_pipe( ialu_reg_mem_alu0 ); 8645%} 8646 8647// Multiply Register Int to Long 8648instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 8649 // Basic Idea: long = (long)int * (long)int 8650 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 8651 effect(DEF dst, USE src, USE src1, KILL flags); 8652 8653 ins_cost(300); 8654 format %{ "IMUL $dst,$src1" %} 8655 8656 ins_encode( long_int_multiply( dst, src1 ) ); 8657 ins_pipe( ialu_reg_reg_alu0 ); 8658%} 8659 8660instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 8661 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 8662 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 8663 effect(KILL flags); 8664 8665 ins_cost(300); 8666 format %{ "MUL $dst,$src1" %} 8667 8668 ins_encode( long_uint_multiply(dst, src1) ); 8669 ins_pipe( ialu_reg_reg_alu0 ); 8670%} 8671 8672// Multiply Register Long 8673instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8674 match(Set dst (MulL dst src)); 8675 effect(KILL cr, TEMP tmp); 8676 ins_cost(4*100+3*400); 8677// Basic idea: lo(result) = lo(x_lo * y_lo) 8678// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 8679 format %{ "MOV $tmp,$src.lo\n\t" 8680 "IMUL $tmp,EDX\n\t" 8681 "MOV EDX,$src.hi\n\t" 8682 "IMUL EDX,EAX\n\t" 8683 "ADD $tmp,EDX\n\t" 8684 "MUL EDX:EAX,$src.lo\n\t" 8685 "ADD EDX,$tmp" %} 8686 ins_encode( long_multiply( dst, src, tmp ) ); 8687 ins_pipe( pipe_slow ); 8688%} 8689 8690// Multiply Register Long where the left operand's high 32 bits are zero 8691instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8692 predicate(is_operand_hi32_zero(n->in(1))); 8693 match(Set dst (MulL dst src)); 8694 effect(KILL cr, TEMP tmp); 8695 ins_cost(2*100+2*400); 8696// Basic idea: lo(result) = lo(x_lo * y_lo) 8697// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 8698 format %{ "MOV $tmp,$src.hi\n\t" 8699 "IMUL $tmp,EAX\n\t" 8700 "MUL EDX:EAX,$src.lo\n\t" 8701 "ADD EDX,$tmp" %} 8702 ins_encode %{ 8703 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 8704 __ imull($tmp$$Register, rax); 8705 __ mull($src$$Register); 8706 __ addl(rdx, $tmp$$Register); 8707 %} 8708 ins_pipe( pipe_slow ); 8709%} 8710 8711// Multiply Register Long where the right operand's high 32 bits are zero 8712instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8713 predicate(is_operand_hi32_zero(n->in(2))); 8714 match(Set dst (MulL dst src)); 8715 effect(KILL cr, TEMP tmp); 8716 ins_cost(2*100+2*400); 8717// Basic idea: lo(result) = lo(x_lo * y_lo) 8718// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 8719 format %{ "MOV $tmp,$src.lo\n\t" 8720 "IMUL $tmp,EDX\n\t" 8721 "MUL EDX:EAX,$src.lo\n\t" 8722 "ADD EDX,$tmp" %} 8723 ins_encode %{ 8724 __ movl($tmp$$Register, $src$$Register); 8725 __ imull($tmp$$Register, rdx); 8726 __ mull($src$$Register); 8727 __ addl(rdx, $tmp$$Register); 8728 %} 8729 ins_pipe( pipe_slow ); 8730%} 8731 8732// Multiply Register Long where the left and the right operands' high 32 bits are zero 8733instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 8734 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 8735 match(Set dst (MulL dst src)); 8736 effect(KILL cr); 8737 ins_cost(1*400); 8738// Basic idea: lo(result) = lo(x_lo * y_lo) 8739// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 8740 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 8741 ins_encode %{ 8742 __ mull($src$$Register); 8743 %} 8744 ins_pipe( pipe_slow ); 8745%} 8746 8747// Multiply Register Long by small constant 8748instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{ 8749 match(Set dst (MulL dst src)); 8750 effect(KILL cr, TEMP tmp); 8751 ins_cost(2*100+2*400); 8752 size(12); 8753// Basic idea: lo(result) = lo(src * EAX) 8754// hi(result) = hi(src * EAX) + lo(src * EDX) 8755 format %{ "IMUL $tmp,EDX,$src\n\t" 8756 "MOV EDX,$src\n\t" 8757 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 8758 "ADD EDX,$tmp" %} 8759 ins_encode( long_multiply_con( dst, src, tmp ) ); 8760 ins_pipe( pipe_slow ); 8761%} 8762 8763// Integer DIV with Register 8764instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 8765 match(Set rax (DivI rax div)); 8766 effect(KILL rdx, KILL cr); 8767 size(26); 8768 ins_cost(30*100+10*100); 8769 format %{ "CMP EAX,0x80000000\n\t" 8770 "JNE,s normal\n\t" 8771 "XOR EDX,EDX\n\t" 8772 "CMP ECX,-1\n\t" 8773 "JE,s done\n" 8774 "normal: CDQ\n\t" 8775 "IDIV $div\n\t" 8776 "done:" %} 8777 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8778 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8779 ins_pipe( ialu_reg_reg_alu0 ); 8780%} 8781 8782// Divide Register Long 8783instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 8784 match(Set dst (DivL src1 src2)); 8785 effect( KILL cr, KILL cx, KILL bx ); 8786 ins_cost(10000); 8787 format %{ "PUSH $src1.hi\n\t" 8788 "PUSH $src1.lo\n\t" 8789 "PUSH $src2.hi\n\t" 8790 "PUSH $src2.lo\n\t" 8791 "CALL SharedRuntime::ldiv\n\t" 8792 "ADD ESP,16" %} 8793 ins_encode( long_div(src1,src2) ); 8794 ins_pipe( pipe_slow ); 8795%} 8796 8797// Integer DIVMOD with Register, both quotient and mod results 8798instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 8799 match(DivModI rax div); 8800 effect(KILL cr); 8801 size(26); 8802 ins_cost(30*100+10*100); 8803 format %{ "CMP EAX,0x80000000\n\t" 8804 "JNE,s normal\n\t" 8805 "XOR EDX,EDX\n\t" 8806 "CMP ECX,-1\n\t" 8807 "JE,s done\n" 8808 "normal: CDQ\n\t" 8809 "IDIV $div\n\t" 8810 "done:" %} 8811 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8812 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8813 ins_pipe( pipe_slow ); 8814%} 8815 8816// Integer MOD with Register 8817instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 8818 match(Set rdx (ModI rax div)); 8819 effect(KILL rax, KILL cr); 8820 8821 size(26); 8822 ins_cost(300); 8823 format %{ "CDQ\n\t" 8824 "IDIV $div" %} 8825 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8826 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8827 ins_pipe( ialu_reg_reg_alu0 ); 8828%} 8829 8830// Remainder Register Long 8831instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 8832 match(Set dst (ModL src1 src2)); 8833 effect( KILL cr, KILL cx, KILL bx ); 8834 ins_cost(10000); 8835 format %{ "PUSH $src1.hi\n\t" 8836 "PUSH $src1.lo\n\t" 8837 "PUSH $src2.hi\n\t" 8838 "PUSH $src2.lo\n\t" 8839 "CALL SharedRuntime::lrem\n\t" 8840 "ADD ESP,16" %} 8841 ins_encode( long_mod(src1,src2) ); 8842 ins_pipe( pipe_slow ); 8843%} 8844 8845// Integer Shift Instructions 8846// Shift Left by one 8847instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 8848 match(Set dst (LShiftI dst shift)); 8849 effect(KILL cr); 8850 8851 size(2); 8852 format %{ "SHL $dst,$shift" %} 8853 opcode(0xD1, 0x4); /* D1 /4 */ 8854 ins_encode( OpcP, RegOpc( dst ) ); 8855 ins_pipe( ialu_reg ); 8856%} 8857 8858// Shift Left by 8-bit immediate 8859instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 8860 match(Set dst (LShiftI dst shift)); 8861 effect(KILL cr); 8862 8863 size(3); 8864 format %{ "SHL $dst,$shift" %} 8865 opcode(0xC1, 0x4); /* C1 /4 ib */ 8866 ins_encode( RegOpcImm( dst, shift) ); 8867 ins_pipe( ialu_reg ); 8868%} 8869 8870// Shift Left by variable 8871instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8872 match(Set dst (LShiftI dst shift)); 8873 effect(KILL cr); 8874 8875 size(2); 8876 format %{ "SHL $dst,$shift" %} 8877 opcode(0xD3, 0x4); /* D3 /4 */ 8878 ins_encode( OpcP, RegOpc( dst ) ); 8879 ins_pipe( ialu_reg_reg ); 8880%} 8881 8882// Arithmetic shift right by one 8883instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 8884 match(Set dst (RShiftI dst shift)); 8885 effect(KILL cr); 8886 8887 size(2); 8888 format %{ "SAR $dst,$shift" %} 8889 opcode(0xD1, 0x7); /* D1 /7 */ 8890 ins_encode( OpcP, RegOpc( dst ) ); 8891 ins_pipe( ialu_reg ); 8892%} 8893 8894// Arithmetic shift right by one 8895instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8896 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8897 effect(KILL cr); 8898 format %{ "SAR $dst,$shift" %} 8899 opcode(0xD1, 0x7); /* D1 /7 */ 8900 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8901 ins_pipe( ialu_mem_imm ); 8902%} 8903 8904// Arithmetic Shift Right by 8-bit immediate 8905instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 8906 match(Set dst (RShiftI dst shift)); 8907 effect(KILL cr); 8908 8909 size(3); 8910 format %{ "SAR $dst,$shift" %} 8911 opcode(0xC1, 0x7); /* C1 /7 ib */ 8912 ins_encode( RegOpcImm( dst, shift ) ); 8913 ins_pipe( ialu_mem_imm ); 8914%} 8915 8916// Arithmetic Shift Right by 8-bit immediate 8917instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8918 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8919 effect(KILL cr); 8920 8921 format %{ "SAR $dst,$shift" %} 8922 opcode(0xC1, 0x7); /* C1 /7 ib */ 8923 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8924 ins_pipe( ialu_mem_imm ); 8925%} 8926 8927// Arithmetic Shift Right by variable 8928instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8929 match(Set dst (RShiftI dst shift)); 8930 effect(KILL cr); 8931 8932 size(2); 8933 format %{ "SAR $dst,$shift" %} 8934 opcode(0xD3, 0x7); /* D3 /7 */ 8935 ins_encode( OpcP, RegOpc( dst ) ); 8936 ins_pipe( ialu_reg_reg ); 8937%} 8938 8939// Logical shift right by one 8940instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 8941 match(Set dst (URShiftI dst shift)); 8942 effect(KILL cr); 8943 8944 size(2); 8945 format %{ "SHR $dst,$shift" %} 8946 opcode(0xD1, 0x5); /* D1 /5 */ 8947 ins_encode( OpcP, RegOpc( dst ) ); 8948 ins_pipe( ialu_reg ); 8949%} 8950 8951// Logical Shift Right by 8-bit immediate 8952instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 8953 match(Set dst (URShiftI dst shift)); 8954 effect(KILL cr); 8955 8956 size(3); 8957 format %{ "SHR $dst,$shift" %} 8958 opcode(0xC1, 0x5); /* C1 /5 ib */ 8959 ins_encode( RegOpcImm( dst, shift) ); 8960 ins_pipe( ialu_reg ); 8961%} 8962 8963 8964// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8965// This idiom is used by the compiler for the i2b bytecode. 8966instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{ 8967 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8968 8969 size(3); 8970 format %{ "MOVSX $dst,$src :8" %} 8971 ins_encode %{ 8972 __ movsbl($dst$$Register, $src$$Register); 8973 %} 8974 ins_pipe(ialu_reg_reg); 8975%} 8976 8977// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8978// This idiom is used by the compiler the i2s bytecode. 8979instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{ 8980 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8981 8982 size(3); 8983 format %{ "MOVSX $dst,$src :16" %} 8984 ins_encode %{ 8985 __ movswl($dst$$Register, $src$$Register); 8986 %} 8987 ins_pipe(ialu_reg_reg); 8988%} 8989 8990 8991// Logical Shift Right by variable 8992instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8993 match(Set dst (URShiftI dst shift)); 8994 effect(KILL cr); 8995 8996 size(2); 8997 format %{ "SHR $dst,$shift" %} 8998 opcode(0xD3, 0x5); /* D3 /5 */ 8999 ins_encode( OpcP, RegOpc( dst ) ); 9000 ins_pipe( ialu_reg_reg ); 9001%} 9002 9003 9004//----------Logical Instructions----------------------------------------------- 9005//----------Integer Logical Instructions--------------------------------------- 9006// And Instructions 9007// And Register with Register 9008instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 9009 match(Set dst (AndI dst src)); 9010 effect(KILL cr); 9011 9012 size(2); 9013 format %{ "AND $dst,$src" %} 9014 opcode(0x23); 9015 ins_encode( OpcP, RegReg( dst, src) ); 9016 ins_pipe( ialu_reg_reg ); 9017%} 9018 9019// And Register with Immediate 9020instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 9021 match(Set dst (AndI dst src)); 9022 effect(KILL cr); 9023 9024 format %{ "AND $dst,$src" %} 9025 opcode(0x81,0x04); /* Opcode 81 /4 */ 9026 // ins_encode( RegImm( dst, src) ); 9027 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 9028 ins_pipe( ialu_reg ); 9029%} 9030 9031// And Register with Memory 9032instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 9033 match(Set dst (AndI dst (LoadI src))); 9034 effect(KILL cr); 9035 9036 ins_cost(125); 9037 format %{ "AND $dst,$src" %} 9038 opcode(0x23); 9039 ins_encode( OpcP, RegMem( dst, src) ); 9040 ins_pipe( ialu_reg_mem ); 9041%} 9042 9043// And Memory with Register 9044instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 9045 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 9046 effect(KILL cr); 9047 9048 ins_cost(150); 9049 format %{ "AND $dst,$src" %} 9050 opcode(0x21); /* Opcode 21 /r */ 9051 ins_encode( OpcP, RegMem( src, dst ) ); 9052 ins_pipe( ialu_mem_reg ); 9053%} 9054 9055// And Memory with Immediate 9056instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 9057 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 9058 effect(KILL cr); 9059 9060 ins_cost(125); 9061 format %{ "AND $dst,$src" %} 9062 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 9063 // ins_encode( MemImm( dst, src) ); 9064 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 9065 ins_pipe( ialu_mem_imm ); 9066%} 9067 9068// Or Instructions 9069// Or Register with Register 9070instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 9071 match(Set dst (OrI dst src)); 9072 effect(KILL cr); 9073 9074 size(2); 9075 format %{ "OR $dst,$src" %} 9076 opcode(0x0B); 9077 ins_encode( OpcP, RegReg( dst, src) ); 9078 ins_pipe( ialu_reg_reg ); 9079%} 9080 9081instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{ 9082 match(Set dst (OrI dst (CastP2X src))); 9083 effect(KILL cr); 9084 9085 size(2); 9086 format %{ "OR $dst,$src" %} 9087 opcode(0x0B); 9088 ins_encode( OpcP, RegReg( dst, src) ); 9089 ins_pipe( ialu_reg_reg ); 9090%} 9091 9092 9093// Or Register with Immediate 9094instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 9095 match(Set dst (OrI dst src)); 9096 effect(KILL cr); 9097 9098 format %{ "OR $dst,$src" %} 9099 opcode(0x81,0x01); /* Opcode 81 /1 id */ 9100 // ins_encode( RegImm( dst, src) ); 9101 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 9102 ins_pipe( ialu_reg ); 9103%} 9104 9105// Or Register with Memory 9106instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 9107 match(Set dst (OrI dst (LoadI src))); 9108 effect(KILL cr); 9109 9110 ins_cost(125); 9111 format %{ "OR $dst,$src" %} 9112 opcode(0x0B); 9113 ins_encode( OpcP, RegMem( dst, src) ); 9114 ins_pipe( ialu_reg_mem ); 9115%} 9116 9117// Or Memory with Register 9118instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 9119 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 9120 effect(KILL cr); 9121 9122 ins_cost(150); 9123 format %{ "OR $dst,$src" %} 9124 opcode(0x09); /* Opcode 09 /r */ 9125 ins_encode( OpcP, RegMem( src, dst ) ); 9126 ins_pipe( ialu_mem_reg ); 9127%} 9128 9129// Or Memory with Immediate 9130instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 9131 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 9132 effect(KILL cr); 9133 9134 ins_cost(125); 9135 format %{ "OR $dst,$src" %} 9136 opcode(0x81,0x1); /* Opcode 81 /1 id */ 9137 // ins_encode( MemImm( dst, src) ); 9138 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 9139 ins_pipe( ialu_mem_imm ); 9140%} 9141 9142// ROL/ROR 9143// ROL expand 9144instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9145 effect(USE_DEF dst, USE shift, KILL cr); 9146 9147 format %{ "ROL $dst, $shift" %} 9148 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 9149 ins_encode( OpcP, RegOpc( dst )); 9150 ins_pipe( ialu_reg ); 9151%} 9152 9153instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9154 effect(USE_DEF dst, USE shift, KILL cr); 9155 9156 format %{ "ROL $dst, $shift" %} 9157 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 9158 ins_encode( RegOpcImm(dst, shift) ); 9159 ins_pipe(ialu_reg); 9160%} 9161 9162instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 9163 effect(USE_DEF dst, USE shift, KILL cr); 9164 9165 format %{ "ROL $dst, $shift" %} 9166 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 9167 ins_encode(OpcP, RegOpc(dst)); 9168 ins_pipe( ialu_reg_reg ); 9169%} 9170// end of ROL expand 9171 9172// ROL 32bit by one once 9173instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 9174 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 9175 9176 expand %{ 9177 rolI_eReg_imm1(dst, lshift, cr); 9178 %} 9179%} 9180 9181// ROL 32bit var by imm8 once 9182instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 9183 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 9184 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 9185 9186 expand %{ 9187 rolI_eReg_imm8(dst, lshift, cr); 9188 %} 9189%} 9190 9191// ROL 32bit var by var once 9192instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 9193 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 9194 9195 expand %{ 9196 rolI_eReg_CL(dst, shift, cr); 9197 %} 9198%} 9199 9200// ROL 32bit var by var once 9201instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 9202 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 9203 9204 expand %{ 9205 rolI_eReg_CL(dst, shift, cr); 9206 %} 9207%} 9208 9209// ROR expand 9210instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9211 effect(USE_DEF dst, USE shift, KILL cr); 9212 9213 format %{ "ROR $dst, $shift" %} 9214 opcode(0xD1,0x1); /* Opcode D1 /1 */ 9215 ins_encode( OpcP, RegOpc( dst ) ); 9216 ins_pipe( ialu_reg ); 9217%} 9218 9219instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9220 effect (USE_DEF dst, USE shift, KILL cr); 9221 9222 format %{ "ROR $dst, $shift" %} 9223 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 9224 ins_encode( RegOpcImm(dst, shift) ); 9225 ins_pipe( ialu_reg ); 9226%} 9227 9228instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 9229 effect(USE_DEF dst, USE shift, KILL cr); 9230 9231 format %{ "ROR $dst, $shift" %} 9232 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 9233 ins_encode(OpcP, RegOpc(dst)); 9234 ins_pipe( ialu_reg_reg ); 9235%} 9236// end of ROR expand 9237 9238// ROR right once 9239instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 9240 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 9241 9242 expand %{ 9243 rorI_eReg_imm1(dst, rshift, cr); 9244 %} 9245%} 9246 9247// ROR 32bit by immI8 once 9248instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 9249 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 9250 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 9251 9252 expand %{ 9253 rorI_eReg_imm8(dst, rshift, cr); 9254 %} 9255%} 9256 9257// ROR 32bit var by var once 9258instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 9259 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 9260 9261 expand %{ 9262 rorI_eReg_CL(dst, shift, cr); 9263 %} 9264%} 9265 9266// ROR 32bit var by var once 9267instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 9268 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 9269 9270 expand %{ 9271 rorI_eReg_CL(dst, shift, cr); 9272 %} 9273%} 9274 9275// Xor Instructions 9276// Xor Register with Register 9277instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 9278 match(Set dst (XorI dst src)); 9279 effect(KILL cr); 9280 9281 size(2); 9282 format %{ "XOR $dst,$src" %} 9283 opcode(0x33); 9284 ins_encode( OpcP, RegReg( dst, src) ); 9285 ins_pipe( ialu_reg_reg ); 9286%} 9287 9288// Xor Register with Immediate -1 9289instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{ 9290 match(Set dst (XorI dst imm)); 9291 9292 size(2); 9293 format %{ "NOT $dst" %} 9294 ins_encode %{ 9295 __ notl($dst$$Register); 9296 %} 9297 ins_pipe( ialu_reg ); 9298%} 9299 9300// Xor Register with Immediate 9301instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 9302 match(Set dst (XorI dst src)); 9303 effect(KILL cr); 9304 9305 format %{ "XOR $dst,$src" %} 9306 opcode(0x81,0x06); /* Opcode 81 /6 id */ 9307 // ins_encode( RegImm( dst, src) ); 9308 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 9309 ins_pipe( ialu_reg ); 9310%} 9311 9312// Xor Register with Memory 9313instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 9314 match(Set dst (XorI dst (LoadI src))); 9315 effect(KILL cr); 9316 9317 ins_cost(125); 9318 format %{ "XOR $dst,$src" %} 9319 opcode(0x33); 9320 ins_encode( OpcP, RegMem(dst, src) ); 9321 ins_pipe( ialu_reg_mem ); 9322%} 9323 9324// Xor Memory with Register 9325instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 9326 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 9327 effect(KILL cr); 9328 9329 ins_cost(150); 9330 format %{ "XOR $dst,$src" %} 9331 opcode(0x31); /* Opcode 31 /r */ 9332 ins_encode( OpcP, RegMem( src, dst ) ); 9333 ins_pipe( ialu_mem_reg ); 9334%} 9335 9336// Xor Memory with Immediate 9337instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 9338 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 9339 effect(KILL cr); 9340 9341 ins_cost(125); 9342 format %{ "XOR $dst,$src" %} 9343 opcode(0x81,0x6); /* Opcode 81 /6 id */ 9344 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 9345 ins_pipe( ialu_mem_imm ); 9346%} 9347 9348//----------Convert Int to Boolean--------------------------------------------- 9349 9350instruct movI_nocopy(eRegI dst, eRegI src) %{ 9351 effect( DEF dst, USE src ); 9352 format %{ "MOV $dst,$src" %} 9353 ins_encode( enc_Copy( dst, src) ); 9354 ins_pipe( ialu_reg_reg ); 9355%} 9356 9357instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{ 9358 effect( USE_DEF dst, USE src, KILL cr ); 9359 9360 size(4); 9361 format %{ "NEG $dst\n\t" 9362 "ADC $dst,$src" %} 9363 ins_encode( neg_reg(dst), 9364 OpcRegReg(0x13,dst,src) ); 9365 ins_pipe( ialu_reg_reg_long ); 9366%} 9367 9368instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{ 9369 match(Set dst (Conv2B src)); 9370 9371 expand %{ 9372 movI_nocopy(dst,src); 9373 ci2b(dst,src,cr); 9374 %} 9375%} 9376 9377instruct movP_nocopy(eRegI dst, eRegP src) %{ 9378 effect( DEF dst, USE src ); 9379 format %{ "MOV $dst,$src" %} 9380 ins_encode( enc_Copy( dst, src) ); 9381 ins_pipe( ialu_reg_reg ); 9382%} 9383 9384instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{ 9385 effect( USE_DEF dst, USE src, KILL cr ); 9386 format %{ "NEG $dst\n\t" 9387 "ADC $dst,$src" %} 9388 ins_encode( neg_reg(dst), 9389 OpcRegReg(0x13,dst,src) ); 9390 ins_pipe( ialu_reg_reg_long ); 9391%} 9392 9393instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{ 9394 match(Set dst (Conv2B src)); 9395 9396 expand %{ 9397 movP_nocopy(dst,src); 9398 cp2b(dst,src,cr); 9399 %} 9400%} 9401 9402instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{ 9403 match(Set dst (CmpLTMask p q)); 9404 effect( KILL cr ); 9405 ins_cost(400); 9406 9407 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 9408 format %{ "XOR $dst,$dst\n\t" 9409 "CMP $p,$q\n\t" 9410 "SETlt $dst\n\t" 9411 "NEG $dst" %} 9412 ins_encode( OpcRegReg(0x33,dst,dst), 9413 OpcRegReg(0x3B,p,q), 9414 setLT_reg(dst), neg_reg(dst) ); 9415 ins_pipe( pipe_slow ); 9416%} 9417 9418instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{ 9419 match(Set dst (CmpLTMask dst zero)); 9420 effect( DEF dst, KILL cr ); 9421 ins_cost(100); 9422 9423 format %{ "SAR $dst,31" %} 9424 opcode(0xC1, 0x7); /* C1 /7 ib */ 9425 ins_encode( RegOpcImm( dst, 0x1F ) ); 9426 ins_pipe( ialu_reg ); 9427%} 9428 9429 9430instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{ 9431 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 9432 effect( KILL tmp, KILL cr ); 9433 ins_cost(400); 9434 // annoyingly, $tmp has no edges so you cant ask for it in 9435 // any format or encoding 9436 format %{ "SUB $p,$q\n\t" 9437 "SBB ECX,ECX\n\t" 9438 "AND ECX,$y\n\t" 9439 "ADD $p,ECX" %} 9440 ins_encode( enc_cmpLTP(p,q,y,tmp) ); 9441 ins_pipe( pipe_cmplt ); 9442%} 9443 9444/* If I enable this, I encourage spilling in the inner loop of compress. 9445instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{ 9446 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 9447 effect( USE_KILL tmp, KILL cr ); 9448 ins_cost(400); 9449 9450 format %{ "SUB $p,$q\n\t" 9451 "SBB ECX,ECX\n\t" 9452 "AND ECX,$y\n\t" 9453 "ADD $p,ECX" %} 9454 ins_encode( enc_cmpLTP_mem(p,q,y,tmp) ); 9455%} 9456*/ 9457 9458//----------Long Instructions------------------------------------------------ 9459// Add Long Register with Register 9460instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9461 match(Set dst (AddL dst src)); 9462 effect(KILL cr); 9463 ins_cost(200); 9464 format %{ "ADD $dst.lo,$src.lo\n\t" 9465 "ADC $dst.hi,$src.hi" %} 9466 opcode(0x03, 0x13); 9467 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9468 ins_pipe( ialu_reg_reg_long ); 9469%} 9470 9471// Add Long Register with Immediate 9472instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9473 match(Set dst (AddL dst src)); 9474 effect(KILL cr); 9475 format %{ "ADD $dst.lo,$src.lo\n\t" 9476 "ADC $dst.hi,$src.hi" %} 9477 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 9478 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9479 ins_pipe( ialu_reg_long ); 9480%} 9481 9482// Add Long Register with Memory 9483instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9484 match(Set dst (AddL dst (LoadL mem))); 9485 effect(KILL cr); 9486 ins_cost(125); 9487 format %{ "ADD $dst.lo,$mem\n\t" 9488 "ADC $dst.hi,$mem+4" %} 9489 opcode(0x03, 0x13); 9490 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9491 ins_pipe( ialu_reg_long_mem ); 9492%} 9493 9494// Subtract Long Register with Register. 9495instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9496 match(Set dst (SubL dst src)); 9497 effect(KILL cr); 9498 ins_cost(200); 9499 format %{ "SUB $dst.lo,$src.lo\n\t" 9500 "SBB $dst.hi,$src.hi" %} 9501 opcode(0x2B, 0x1B); 9502 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9503 ins_pipe( ialu_reg_reg_long ); 9504%} 9505 9506// Subtract Long Register with Immediate 9507instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9508 match(Set dst (SubL dst src)); 9509 effect(KILL cr); 9510 format %{ "SUB $dst.lo,$src.lo\n\t" 9511 "SBB $dst.hi,$src.hi" %} 9512 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9513 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9514 ins_pipe( ialu_reg_long ); 9515%} 9516 9517// Subtract Long Register with Memory 9518instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9519 match(Set dst (SubL dst (LoadL mem))); 9520 effect(KILL cr); 9521 ins_cost(125); 9522 format %{ "SUB $dst.lo,$mem\n\t" 9523 "SBB $dst.hi,$mem+4" %} 9524 opcode(0x2B, 0x1B); 9525 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9526 ins_pipe( ialu_reg_long_mem ); 9527%} 9528 9529instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9530 match(Set dst (SubL zero dst)); 9531 effect(KILL cr); 9532 ins_cost(300); 9533 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9534 ins_encode( neg_long(dst) ); 9535 ins_pipe( ialu_reg_reg_long ); 9536%} 9537 9538// And Long Register with Register 9539instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9540 match(Set dst (AndL dst src)); 9541 effect(KILL cr); 9542 format %{ "AND $dst.lo,$src.lo\n\t" 9543 "AND $dst.hi,$src.hi" %} 9544 opcode(0x23,0x23); 9545 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9546 ins_pipe( ialu_reg_reg_long ); 9547%} 9548 9549// And Long Register with Immediate 9550instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9551 match(Set dst (AndL dst src)); 9552 effect(KILL cr); 9553 format %{ "AND $dst.lo,$src.lo\n\t" 9554 "AND $dst.hi,$src.hi" %} 9555 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9556 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9557 ins_pipe( ialu_reg_long ); 9558%} 9559 9560// And Long Register with Memory 9561instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9562 match(Set dst (AndL dst (LoadL mem))); 9563 effect(KILL cr); 9564 ins_cost(125); 9565 format %{ "AND $dst.lo,$mem\n\t" 9566 "AND $dst.hi,$mem+4" %} 9567 opcode(0x23, 0x23); 9568 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9569 ins_pipe( ialu_reg_long_mem ); 9570%} 9571 9572// Or Long Register with Register 9573instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9574 match(Set dst (OrL dst src)); 9575 effect(KILL cr); 9576 format %{ "OR $dst.lo,$src.lo\n\t" 9577 "OR $dst.hi,$src.hi" %} 9578 opcode(0x0B,0x0B); 9579 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9580 ins_pipe( ialu_reg_reg_long ); 9581%} 9582 9583// Or Long Register with Immediate 9584instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9585 match(Set dst (OrL dst src)); 9586 effect(KILL cr); 9587 format %{ "OR $dst.lo,$src.lo\n\t" 9588 "OR $dst.hi,$src.hi" %} 9589 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9590 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9591 ins_pipe( ialu_reg_long ); 9592%} 9593 9594// Or Long Register with Memory 9595instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9596 match(Set dst (OrL dst (LoadL mem))); 9597 effect(KILL cr); 9598 ins_cost(125); 9599 format %{ "OR $dst.lo,$mem\n\t" 9600 "OR $dst.hi,$mem+4" %} 9601 opcode(0x0B,0x0B); 9602 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9603 ins_pipe( ialu_reg_long_mem ); 9604%} 9605 9606// Xor Long Register with Register 9607instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9608 match(Set dst (XorL dst src)); 9609 effect(KILL cr); 9610 format %{ "XOR $dst.lo,$src.lo\n\t" 9611 "XOR $dst.hi,$src.hi" %} 9612 opcode(0x33,0x33); 9613 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9614 ins_pipe( ialu_reg_reg_long ); 9615%} 9616 9617// Xor Long Register with Immediate -1 9618instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9619 match(Set dst (XorL dst imm)); 9620 format %{ "NOT $dst.lo\n\t" 9621 "NOT $dst.hi" %} 9622 ins_encode %{ 9623 __ notl($dst$$Register); 9624 __ notl(HIGH_FROM_LOW($dst$$Register)); 9625 %} 9626 ins_pipe( ialu_reg_long ); 9627%} 9628 9629// Xor Long Register with Immediate 9630instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9631 match(Set dst (XorL dst src)); 9632 effect(KILL cr); 9633 format %{ "XOR $dst.lo,$src.lo\n\t" 9634 "XOR $dst.hi,$src.hi" %} 9635 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9636 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9637 ins_pipe( ialu_reg_long ); 9638%} 9639 9640// Xor Long Register with Memory 9641instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9642 match(Set dst (XorL dst (LoadL mem))); 9643 effect(KILL cr); 9644 ins_cost(125); 9645 format %{ "XOR $dst.lo,$mem\n\t" 9646 "XOR $dst.hi,$mem+4" %} 9647 opcode(0x33,0x33); 9648 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9649 ins_pipe( ialu_reg_long_mem ); 9650%} 9651 9652// Shift Left Long by 1 9653instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9654 predicate(UseNewLongLShift); 9655 match(Set dst (LShiftL dst cnt)); 9656 effect(KILL cr); 9657 ins_cost(100); 9658 format %{ "ADD $dst.lo,$dst.lo\n\t" 9659 "ADC $dst.hi,$dst.hi" %} 9660 ins_encode %{ 9661 __ addl($dst$$Register,$dst$$Register); 9662 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9663 %} 9664 ins_pipe( ialu_reg_long ); 9665%} 9666 9667// Shift Left Long by 2 9668instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9669 predicate(UseNewLongLShift); 9670 match(Set dst (LShiftL dst cnt)); 9671 effect(KILL cr); 9672 ins_cost(100); 9673 format %{ "ADD $dst.lo,$dst.lo\n\t" 9674 "ADC $dst.hi,$dst.hi\n\t" 9675 "ADD $dst.lo,$dst.lo\n\t" 9676 "ADC $dst.hi,$dst.hi" %} 9677 ins_encode %{ 9678 __ addl($dst$$Register,$dst$$Register); 9679 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9680 __ addl($dst$$Register,$dst$$Register); 9681 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9682 %} 9683 ins_pipe( ialu_reg_long ); 9684%} 9685 9686// Shift Left Long by 3 9687instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9688 predicate(UseNewLongLShift); 9689 match(Set dst (LShiftL dst cnt)); 9690 effect(KILL cr); 9691 ins_cost(100); 9692 format %{ "ADD $dst.lo,$dst.lo\n\t" 9693 "ADC $dst.hi,$dst.hi\n\t" 9694 "ADD $dst.lo,$dst.lo\n\t" 9695 "ADC $dst.hi,$dst.hi\n\t" 9696 "ADD $dst.lo,$dst.lo\n\t" 9697 "ADC $dst.hi,$dst.hi" %} 9698 ins_encode %{ 9699 __ addl($dst$$Register,$dst$$Register); 9700 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9701 __ addl($dst$$Register,$dst$$Register); 9702 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9703 __ addl($dst$$Register,$dst$$Register); 9704 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9705 %} 9706 ins_pipe( ialu_reg_long ); 9707%} 9708 9709// Shift Left Long by 1-31 9710instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9711 match(Set dst (LShiftL dst cnt)); 9712 effect(KILL cr); 9713 ins_cost(200); 9714 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9715 "SHL $dst.lo,$cnt" %} 9716 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9717 ins_encode( move_long_small_shift(dst,cnt) ); 9718 ins_pipe( ialu_reg_long ); 9719%} 9720 9721// Shift Left Long by 32-63 9722instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9723 match(Set dst (LShiftL dst cnt)); 9724 effect(KILL cr); 9725 ins_cost(300); 9726 format %{ "MOV $dst.hi,$dst.lo\n" 9727 "\tSHL $dst.hi,$cnt-32\n" 9728 "\tXOR $dst.lo,$dst.lo" %} 9729 opcode(0xC1, 0x4); /* C1 /4 ib */ 9730 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9731 ins_pipe( ialu_reg_long ); 9732%} 9733 9734// Shift Left Long by variable 9735instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9736 match(Set dst (LShiftL dst shift)); 9737 effect(KILL cr); 9738 ins_cost(500+200); 9739 size(17); 9740 format %{ "TEST $shift,32\n\t" 9741 "JEQ,s small\n\t" 9742 "MOV $dst.hi,$dst.lo\n\t" 9743 "XOR $dst.lo,$dst.lo\n" 9744 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9745 "SHL $dst.lo,$shift" %} 9746 ins_encode( shift_left_long( dst, shift ) ); 9747 ins_pipe( pipe_slow ); 9748%} 9749 9750// Shift Right Long by 1-31 9751instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9752 match(Set dst (URShiftL dst cnt)); 9753 effect(KILL cr); 9754 ins_cost(200); 9755 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9756 "SHR $dst.hi,$cnt" %} 9757 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9758 ins_encode( move_long_small_shift(dst,cnt) ); 9759 ins_pipe( ialu_reg_long ); 9760%} 9761 9762// Shift Right Long by 32-63 9763instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9764 match(Set dst (URShiftL dst cnt)); 9765 effect(KILL cr); 9766 ins_cost(300); 9767 format %{ "MOV $dst.lo,$dst.hi\n" 9768 "\tSHR $dst.lo,$cnt-32\n" 9769 "\tXOR $dst.hi,$dst.hi" %} 9770 opcode(0xC1, 0x5); /* C1 /5 ib */ 9771 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9772 ins_pipe( ialu_reg_long ); 9773%} 9774 9775// Shift Right Long by variable 9776instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9777 match(Set dst (URShiftL dst shift)); 9778 effect(KILL cr); 9779 ins_cost(600); 9780 size(17); 9781 format %{ "TEST $shift,32\n\t" 9782 "JEQ,s small\n\t" 9783 "MOV $dst.lo,$dst.hi\n\t" 9784 "XOR $dst.hi,$dst.hi\n" 9785 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9786 "SHR $dst.hi,$shift" %} 9787 ins_encode( shift_right_long( dst, shift ) ); 9788 ins_pipe( pipe_slow ); 9789%} 9790 9791// Shift Right Long by 1-31 9792instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9793 match(Set dst (RShiftL dst cnt)); 9794 effect(KILL cr); 9795 ins_cost(200); 9796 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9797 "SAR $dst.hi,$cnt" %} 9798 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9799 ins_encode( move_long_small_shift(dst,cnt) ); 9800 ins_pipe( ialu_reg_long ); 9801%} 9802 9803// Shift Right Long by 32-63 9804instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9805 match(Set dst (RShiftL dst cnt)); 9806 effect(KILL cr); 9807 ins_cost(300); 9808 format %{ "MOV $dst.lo,$dst.hi\n" 9809 "\tSAR $dst.lo,$cnt-32\n" 9810 "\tSAR $dst.hi,31" %} 9811 opcode(0xC1, 0x7); /* C1 /7 ib */ 9812 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9813 ins_pipe( ialu_reg_long ); 9814%} 9815 9816// Shift Right arithmetic Long by variable 9817instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9818 match(Set dst (RShiftL dst shift)); 9819 effect(KILL cr); 9820 ins_cost(600); 9821 size(18); 9822 format %{ "TEST $shift,32\n\t" 9823 "JEQ,s small\n\t" 9824 "MOV $dst.lo,$dst.hi\n\t" 9825 "SAR $dst.hi,31\n" 9826 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9827 "SAR $dst.hi,$shift" %} 9828 ins_encode( shift_right_arith_long( dst, shift ) ); 9829 ins_pipe( pipe_slow ); 9830%} 9831 9832 9833//----------Double Instructions------------------------------------------------ 9834// Double Math 9835 9836// Compare & branch 9837 9838// P6 version of float compare, sets condition codes in EFLAGS 9839instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9840 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9841 match(Set cr (CmpD src1 src2)); 9842 effect(KILL rax); 9843 ins_cost(150); 9844 format %{ "FLD $src1\n\t" 9845 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9846 "JNP exit\n\t" 9847 "MOV ah,1 // saw a NaN, set CF\n\t" 9848 "SAHF\n" 9849 "exit:\tNOP // avoid branch to branch" %} 9850 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9851 ins_encode( Push_Reg_D(src1), 9852 OpcP, RegOpc(src2), 9853 cmpF_P6_fixup ); 9854 ins_pipe( pipe_slow ); 9855%} 9856 9857instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9858 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9859 match(Set cr (CmpD src1 src2)); 9860 ins_cost(150); 9861 format %{ "FLD $src1\n\t" 9862 "FUCOMIP ST,$src2 // P6 instruction" %} 9863 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9864 ins_encode( Push_Reg_D(src1), 9865 OpcP, RegOpc(src2)); 9866 ins_pipe( pipe_slow ); 9867%} 9868 9869// Compare & branch 9870instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9871 predicate(UseSSE<=1); 9872 match(Set cr (CmpD src1 src2)); 9873 effect(KILL rax); 9874 ins_cost(200); 9875 format %{ "FLD $src1\n\t" 9876 "FCOMp $src2\n\t" 9877 "FNSTSW AX\n\t" 9878 "TEST AX,0x400\n\t" 9879 "JZ,s flags\n\t" 9880 "MOV AH,1\t# unordered treat as LT\n" 9881 "flags:\tSAHF" %} 9882 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9883 ins_encode( Push_Reg_D(src1), 9884 OpcP, RegOpc(src2), 9885 fpu_flags); 9886 ins_pipe( pipe_slow ); 9887%} 9888 9889// Compare vs zero into -1,0,1 9890instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 9891 predicate(UseSSE<=1); 9892 match(Set dst (CmpD3 src1 zero)); 9893 effect(KILL cr, KILL rax); 9894 ins_cost(280); 9895 format %{ "FTSTD $dst,$src1" %} 9896 opcode(0xE4, 0xD9); 9897 ins_encode( Push_Reg_D(src1), 9898 OpcS, OpcP, PopFPU, 9899 CmpF_Result(dst)); 9900 ins_pipe( pipe_slow ); 9901%} 9902 9903// Compare into -1,0,1 9904instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 9905 predicate(UseSSE<=1); 9906 match(Set dst (CmpD3 src1 src2)); 9907 effect(KILL cr, KILL rax); 9908 ins_cost(300); 9909 format %{ "FCMPD $dst,$src1,$src2" %} 9910 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9911 ins_encode( Push_Reg_D(src1), 9912 OpcP, RegOpc(src2), 9913 CmpF_Result(dst)); 9914 ins_pipe( pipe_slow ); 9915%} 9916 9917// float compare and set condition codes in EFLAGS by XMM regs 9918instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ 9919 predicate(UseSSE>=2); 9920 match(Set cr (CmpD dst src)); 9921 effect(KILL rax); 9922 ins_cost(125); 9923 format %{ "COMISD $dst,$src\n" 9924 "\tJNP exit\n" 9925 "\tMOV ah,1 // saw a NaN, set CF\n" 9926 "\tSAHF\n" 9927 "exit:\tNOP // avoid branch to branch" %} 9928 opcode(0x66, 0x0F, 0x2F); 9929 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); 9930 ins_pipe( pipe_slow ); 9931%} 9932 9933instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ 9934 predicate(UseSSE>=2); 9935 match(Set cr (CmpD dst src)); 9936 ins_cost(100); 9937 format %{ "COMISD $dst,$src" %} 9938 opcode(0x66, 0x0F, 0x2F); 9939 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 9940 ins_pipe( pipe_slow ); 9941%} 9942 9943// float compare and set condition codes in EFLAGS by XMM regs 9944instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ 9945 predicate(UseSSE>=2); 9946 match(Set cr (CmpD dst (LoadD src))); 9947 effect(KILL rax); 9948 ins_cost(145); 9949 format %{ "COMISD $dst,$src\n" 9950 "\tJNP exit\n" 9951 "\tMOV ah,1 // saw a NaN, set CF\n" 9952 "\tSAHF\n" 9953 "exit:\tNOP // avoid branch to branch" %} 9954 opcode(0x66, 0x0F, 0x2F); 9955 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); 9956 ins_pipe( pipe_slow ); 9957%} 9958 9959instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ 9960 predicate(UseSSE>=2); 9961 match(Set cr (CmpD dst (LoadD src))); 9962 ins_cost(100); 9963 format %{ "COMISD $dst,$src" %} 9964 opcode(0x66, 0x0F, 0x2F); 9965 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); 9966 ins_pipe( pipe_slow ); 9967%} 9968 9969// Compare into -1,0,1 in XMM 9970instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 9971 predicate(UseSSE>=2); 9972 match(Set dst (CmpD3 src1 src2)); 9973 effect(KILL cr); 9974 ins_cost(255); 9975 format %{ "XOR $dst,$dst\n" 9976 "\tCOMISD $src1,$src2\n" 9977 "\tJP,s nan\n" 9978 "\tJEQ,s exit\n" 9979 "\tJA,s inc\n" 9980 "nan:\tDEC $dst\n" 9981 "\tJMP,s exit\n" 9982 "inc:\tINC $dst\n" 9983 "exit:" 9984 %} 9985 opcode(0x66, 0x0F, 0x2F); 9986 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2), 9987 CmpX_Result(dst)); 9988 ins_pipe( pipe_slow ); 9989%} 9990 9991// Compare into -1,0,1 in XMM and memory 9992instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ 9993 predicate(UseSSE>=2); 9994 match(Set dst (CmpD3 src1 (LoadD mem))); 9995 effect(KILL cr); 9996 ins_cost(275); 9997 format %{ "COMISD $src1,$mem\n" 9998 "\tMOV $dst,0\t\t# do not blow flags\n" 9999 "\tJP,s nan\n" 10000 "\tJEQ,s exit\n" 10001 "\tJA,s inc\n" 10002 "nan:\tDEC $dst\n" 10003 "\tJMP,s exit\n" 10004 "inc:\tINC $dst\n" 10005 "exit:" 10006 %} 10007 opcode(0x66, 0x0F, 0x2F); 10008 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem), 10009 LdImmI(dst,0x0), CmpX_Result(dst)); 10010 ins_pipe( pipe_slow ); 10011%} 10012 10013 10014instruct subD_reg(regD dst, regD src) %{ 10015 predicate (UseSSE <=1); 10016 match(Set dst (SubD dst src)); 10017 10018 format %{ "FLD $src\n\t" 10019 "DSUBp $dst,ST" %} 10020 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10021 ins_cost(150); 10022 ins_encode( Push_Reg_D(src), 10023 OpcP, RegOpc(dst) ); 10024 ins_pipe( fpu_reg_reg ); 10025%} 10026 10027instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10028 predicate (UseSSE <=1); 10029 match(Set dst (RoundDouble (SubD src1 src2))); 10030 ins_cost(250); 10031 10032 format %{ "FLD $src2\n\t" 10033 "DSUB ST,$src1\n\t" 10034 "FSTP_D $dst\t# D-round" %} 10035 opcode(0xD8, 0x5); 10036 ins_encode( Push_Reg_D(src2), 10037 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 10038 ins_pipe( fpu_mem_reg_reg ); 10039%} 10040 10041 10042instruct subD_reg_mem(regD dst, memory src) %{ 10043 predicate (UseSSE <=1); 10044 match(Set dst (SubD dst (LoadD src))); 10045 ins_cost(150); 10046 10047 format %{ "FLD $src\n\t" 10048 "DSUBp $dst,ST" %} 10049 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 10050 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10051 OpcP, RegOpc(dst) ); 10052 ins_pipe( fpu_reg_mem ); 10053%} 10054 10055instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 10056 predicate (UseSSE<=1); 10057 match(Set dst (AbsD src)); 10058 ins_cost(100); 10059 format %{ "FABS" %} 10060 opcode(0xE1, 0xD9); 10061 ins_encode( OpcS, OpcP ); 10062 ins_pipe( fpu_reg_reg ); 10063%} 10064 10065instruct absXD_reg( regXD dst ) %{ 10066 predicate(UseSSE>=2); 10067 match(Set dst (AbsD dst)); 10068 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} 10069 ins_encode( AbsXD_encoding(dst)); 10070 ins_pipe( pipe_slow ); 10071%} 10072 10073instruct negD_reg(regDPR1 dst, regDPR1 src) %{ 10074 predicate(UseSSE<=1); 10075 match(Set dst (NegD src)); 10076 ins_cost(100); 10077 format %{ "FCHS" %} 10078 opcode(0xE0, 0xD9); 10079 ins_encode( OpcS, OpcP ); 10080 ins_pipe( fpu_reg_reg ); 10081%} 10082 10083instruct negXD_reg( regXD dst ) %{ 10084 predicate(UseSSE>=2); 10085 match(Set dst (NegD dst)); 10086 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} 10087 ins_encode %{ 10088 __ xorpd($dst$$XMMRegister, 10089 ExternalAddress((address)double_signflip_pool)); 10090 %} 10091 ins_pipe( pipe_slow ); 10092%} 10093 10094instruct addD_reg(regD dst, regD src) %{ 10095 predicate(UseSSE<=1); 10096 match(Set dst (AddD dst src)); 10097 format %{ "FLD $src\n\t" 10098 "DADD $dst,ST" %} 10099 size(4); 10100 ins_cost(150); 10101 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10102 ins_encode( Push_Reg_D(src), 10103 OpcP, RegOpc(dst) ); 10104 ins_pipe( fpu_reg_reg ); 10105%} 10106 10107 10108instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10109 predicate(UseSSE<=1); 10110 match(Set dst (RoundDouble (AddD src1 src2))); 10111 ins_cost(250); 10112 10113 format %{ "FLD $src2\n\t" 10114 "DADD ST,$src1\n\t" 10115 "FSTP_D $dst\t# D-round" %} 10116 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 10117 ins_encode( Push_Reg_D(src2), 10118 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 10119 ins_pipe( fpu_mem_reg_reg ); 10120%} 10121 10122 10123instruct addD_reg_mem(regD dst, memory src) %{ 10124 predicate(UseSSE<=1); 10125 match(Set dst (AddD dst (LoadD src))); 10126 ins_cost(150); 10127 10128 format %{ "FLD $src\n\t" 10129 "DADDp $dst,ST" %} 10130 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 10131 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10132 OpcP, RegOpc(dst) ); 10133 ins_pipe( fpu_reg_mem ); 10134%} 10135 10136// add-to-memory 10137instruct addD_mem_reg(memory dst, regD src) %{ 10138 predicate(UseSSE<=1); 10139 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 10140 ins_cost(150); 10141 10142 format %{ "FLD_D $dst\n\t" 10143 "DADD ST,$src\n\t" 10144 "FST_D $dst" %} 10145 opcode(0xDD, 0x0); 10146 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 10147 Opcode(0xD8), RegOpc(src), 10148 set_instruction_start, 10149 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 10150 ins_pipe( fpu_reg_mem ); 10151%} 10152 10153instruct addD_reg_imm1(regD dst, immD1 src) %{ 10154 predicate(UseSSE<=1); 10155 match(Set dst (AddD dst src)); 10156 ins_cost(125); 10157 format %{ "FLD1\n\t" 10158 "DADDp $dst,ST" %} 10159 opcode(0xDE, 0x00); 10160 ins_encode( LdImmD(src), 10161 OpcP, RegOpc(dst) ); 10162 ins_pipe( fpu_reg ); 10163%} 10164 10165instruct addD_reg_imm(regD dst, immD src) %{ 10166 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 10167 match(Set dst (AddD dst src)); 10168 ins_cost(200); 10169 format %{ "FLD_D [$src]\n\t" 10170 "DADDp $dst,ST" %} 10171 opcode(0xDE, 0x00); /* DE /0 */ 10172 ins_encode( LdImmD(src), 10173 OpcP, RegOpc(dst)); 10174 ins_pipe( fpu_reg_mem ); 10175%} 10176 10177instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 10178 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 10179 match(Set dst (RoundDouble (AddD src con))); 10180 ins_cost(200); 10181 format %{ "FLD_D [$con]\n\t" 10182 "DADD ST,$src\n\t" 10183 "FSTP_D $dst\t# D-round" %} 10184 opcode(0xD8, 0x00); /* D8 /0 */ 10185 ins_encode( LdImmD(con), 10186 OpcP, RegOpc(src), Pop_Mem_D(dst)); 10187 ins_pipe( fpu_mem_reg_con ); 10188%} 10189 10190// Add two double precision floating point values in xmm 10191instruct addXD_reg(regXD dst, regXD src) %{ 10192 predicate(UseSSE>=2); 10193 match(Set dst (AddD dst src)); 10194 format %{ "ADDSD $dst,$src" %} 10195 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 10196 ins_pipe( pipe_slow ); 10197%} 10198 10199instruct addXD_imm(regXD dst, immXD con) %{ 10200 predicate(UseSSE>=2); 10201 match(Set dst (AddD dst con)); 10202 format %{ "ADDSD $dst,[$con]" %} 10203 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) ); 10204 ins_pipe( pipe_slow ); 10205%} 10206 10207instruct addXD_mem(regXD dst, memory mem) %{ 10208 predicate(UseSSE>=2); 10209 match(Set dst (AddD dst (LoadD mem))); 10210 format %{ "ADDSD $dst,$mem" %} 10211 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); 10212 ins_pipe( pipe_slow ); 10213%} 10214 10215// Sub two double precision floating point values in xmm 10216instruct subXD_reg(regXD dst, regXD src) %{ 10217 predicate(UseSSE>=2); 10218 match(Set dst (SubD dst src)); 10219 format %{ "SUBSD $dst,$src" %} 10220 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 10221 ins_pipe( pipe_slow ); 10222%} 10223 10224instruct subXD_imm(regXD dst, immXD con) %{ 10225 predicate(UseSSE>=2); 10226 match(Set dst (SubD dst con)); 10227 format %{ "SUBSD $dst,[$con]" %} 10228 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) ); 10229 ins_pipe( pipe_slow ); 10230%} 10231 10232instruct subXD_mem(regXD dst, memory mem) %{ 10233 predicate(UseSSE>=2); 10234 match(Set dst (SubD dst (LoadD mem))); 10235 format %{ "SUBSD $dst,$mem" %} 10236 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 10237 ins_pipe( pipe_slow ); 10238%} 10239 10240// Mul two double precision floating point values in xmm 10241instruct mulXD_reg(regXD dst, regXD src) %{ 10242 predicate(UseSSE>=2); 10243 match(Set dst (MulD dst src)); 10244 format %{ "MULSD $dst,$src" %} 10245 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 10246 ins_pipe( pipe_slow ); 10247%} 10248 10249instruct mulXD_imm(regXD dst, immXD con) %{ 10250 predicate(UseSSE>=2); 10251 match(Set dst (MulD dst con)); 10252 format %{ "MULSD $dst,[$con]" %} 10253 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) ); 10254 ins_pipe( pipe_slow ); 10255%} 10256 10257instruct mulXD_mem(regXD dst, memory mem) %{ 10258 predicate(UseSSE>=2); 10259 match(Set dst (MulD dst (LoadD mem))); 10260 format %{ "MULSD $dst,$mem" %} 10261 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 10262 ins_pipe( pipe_slow ); 10263%} 10264 10265// Div two double precision floating point values in xmm 10266instruct divXD_reg(regXD dst, regXD src) %{ 10267 predicate(UseSSE>=2); 10268 match(Set dst (DivD dst src)); 10269 format %{ "DIVSD $dst,$src" %} 10270 opcode(0xF2, 0x0F, 0x5E); 10271 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 10272 ins_pipe( pipe_slow ); 10273%} 10274 10275instruct divXD_imm(regXD dst, immXD con) %{ 10276 predicate(UseSSE>=2); 10277 match(Set dst (DivD dst con)); 10278 format %{ "DIVSD $dst,[$con]" %} 10279 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con)); 10280 ins_pipe( pipe_slow ); 10281%} 10282 10283instruct divXD_mem(regXD dst, memory mem) %{ 10284 predicate(UseSSE>=2); 10285 match(Set dst (DivD dst (LoadD mem))); 10286 format %{ "DIVSD $dst,$mem" %} 10287 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 10288 ins_pipe( pipe_slow ); 10289%} 10290 10291 10292instruct mulD_reg(regD dst, regD src) %{ 10293 predicate(UseSSE<=1); 10294 match(Set dst (MulD dst src)); 10295 format %{ "FLD $src\n\t" 10296 "DMULp $dst,ST" %} 10297 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 10298 ins_cost(150); 10299 ins_encode( Push_Reg_D(src), 10300 OpcP, RegOpc(dst) ); 10301 ins_pipe( fpu_reg_reg ); 10302%} 10303 10304// Strict FP instruction biases argument before multiply then 10305// biases result to avoid double rounding of subnormals. 10306// 10307// scale arg1 by multiplying arg1 by 2^(-15360) 10308// load arg2 10309// multiply scaled arg1 by arg2 10310// rescale product by 2^(15360) 10311// 10312instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 10313 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10314 match(Set dst (MulD dst src)); 10315 ins_cost(1); // Select this instruction for all strict FP double multiplies 10316 10317 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10318 "DMULp $dst,ST\n\t" 10319 "FLD $src\n\t" 10320 "DMULp $dst,ST\n\t" 10321 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10322 "DMULp $dst,ST\n\t" %} 10323 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 10324 ins_encode( strictfp_bias1(dst), 10325 Push_Reg_D(src), 10326 OpcP, RegOpc(dst), 10327 strictfp_bias2(dst) ); 10328 ins_pipe( fpu_reg_reg ); 10329%} 10330 10331instruct mulD_reg_imm(regD dst, immD src) %{ 10332 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 10333 match(Set dst (MulD dst src)); 10334 ins_cost(200); 10335 format %{ "FLD_D [$src]\n\t" 10336 "DMULp $dst,ST" %} 10337 opcode(0xDE, 0x1); /* DE /1 */ 10338 ins_encode( LdImmD(src), 10339 OpcP, RegOpc(dst) ); 10340 ins_pipe( fpu_reg_mem ); 10341%} 10342 10343 10344instruct mulD_reg_mem(regD dst, memory src) %{ 10345 predicate( UseSSE<=1 ); 10346 match(Set dst (MulD dst (LoadD src))); 10347 ins_cost(200); 10348 format %{ "FLD_D $src\n\t" 10349 "DMULp $dst,ST" %} 10350 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 10351 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10352 OpcP, RegOpc(dst) ); 10353 ins_pipe( fpu_reg_mem ); 10354%} 10355 10356// 10357// Cisc-alternate to reg-reg multiply 10358instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 10359 predicate( UseSSE<=1 ); 10360 match(Set dst (MulD src (LoadD mem))); 10361 ins_cost(250); 10362 format %{ "FLD_D $mem\n\t" 10363 "DMUL ST,$src\n\t" 10364 "FSTP_D $dst" %} 10365 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 10366 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 10367 OpcReg_F(src), 10368 Pop_Reg_D(dst) ); 10369 ins_pipe( fpu_reg_reg_mem ); 10370%} 10371 10372 10373// MACRO3 -- addD a mulD 10374// This instruction is a '2-address' instruction in that the result goes 10375// back to src2. This eliminates a move from the macro; possibly the 10376// register allocator will have to add it back (and maybe not). 10377instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 10378 predicate( UseSSE<=1 ); 10379 match(Set src2 (AddD (MulD src0 src1) src2)); 10380 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10381 "DMUL ST,$src1\n\t" 10382 "DADDp $src2,ST" %} 10383 ins_cost(250); 10384 opcode(0xDD); /* LoadD DD /0 */ 10385 ins_encode( Push_Reg_F(src0), 10386 FMul_ST_reg(src1), 10387 FAddP_reg_ST(src2) ); 10388 ins_pipe( fpu_reg_reg_reg ); 10389%} 10390 10391 10392// MACRO3 -- subD a mulD 10393instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 10394 predicate( UseSSE<=1 ); 10395 match(Set src2 (SubD (MulD src0 src1) src2)); 10396 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10397 "DMUL ST,$src1\n\t" 10398 "DSUBRp $src2,ST" %} 10399 ins_cost(250); 10400 ins_encode( Push_Reg_F(src0), 10401 FMul_ST_reg(src1), 10402 Opcode(0xDE), Opc_plus(0xE0,src2)); 10403 ins_pipe( fpu_reg_reg_reg ); 10404%} 10405 10406 10407instruct divD_reg(regD dst, regD src) %{ 10408 predicate( UseSSE<=1 ); 10409 match(Set dst (DivD dst src)); 10410 10411 format %{ "FLD $src\n\t" 10412 "FDIVp $dst,ST" %} 10413 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10414 ins_cost(150); 10415 ins_encode( Push_Reg_D(src), 10416 OpcP, RegOpc(dst) ); 10417 ins_pipe( fpu_reg_reg ); 10418%} 10419 10420// Strict FP instruction biases argument before division then 10421// biases result, to avoid double rounding of subnormals. 10422// 10423// scale dividend by multiplying dividend by 2^(-15360) 10424// load divisor 10425// divide scaled dividend by divisor 10426// rescale quotient by 2^(15360) 10427// 10428instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 10429 predicate (UseSSE<=1); 10430 match(Set dst (DivD dst src)); 10431 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10432 ins_cost(01); 10433 10434 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10435 "DMULp $dst,ST\n\t" 10436 "FLD $src\n\t" 10437 "FDIVp $dst,ST\n\t" 10438 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10439 "DMULp $dst,ST\n\t" %} 10440 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10441 ins_encode( strictfp_bias1(dst), 10442 Push_Reg_D(src), 10443 OpcP, RegOpc(dst), 10444 strictfp_bias2(dst) ); 10445 ins_pipe( fpu_reg_reg ); 10446%} 10447 10448instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10449 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10450 match(Set dst (RoundDouble (DivD src1 src2))); 10451 10452 format %{ "FLD $src1\n\t" 10453 "FDIV ST,$src2\n\t" 10454 "FSTP_D $dst\t# D-round" %} 10455 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10456 ins_encode( Push_Reg_D(src1), 10457 OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 10458 ins_pipe( fpu_mem_reg_reg ); 10459%} 10460 10461 10462instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 10463 predicate(UseSSE<=1); 10464 match(Set dst (ModD dst src)); 10465 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10466 10467 format %{ "DMOD $dst,$src" %} 10468 ins_cost(250); 10469 ins_encode(Push_Reg_Mod_D(dst, src), 10470 emitModD(), 10471 Push_Result_Mod_D(src), 10472 Pop_Reg_D(dst)); 10473 ins_pipe( pipe_slow ); 10474%} 10475 10476instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 10477 predicate(UseSSE>=2); 10478 match(Set dst (ModD src0 src1)); 10479 effect(KILL rax, KILL cr); 10480 10481 format %{ "SUB ESP,8\t # DMOD\n" 10482 "\tMOVSD [ESP+0],$src1\n" 10483 "\tFLD_D [ESP+0]\n" 10484 "\tMOVSD [ESP+0],$src0\n" 10485 "\tFLD_D [ESP+0]\n" 10486 "loop:\tFPREM\n" 10487 "\tFWAIT\n" 10488 "\tFNSTSW AX\n" 10489 "\tSAHF\n" 10490 "\tJP loop\n" 10491 "\tFSTP_D [ESP+0]\n" 10492 "\tMOVSD $dst,[ESP+0]\n" 10493 "\tADD ESP,8\n" 10494 "\tFSTP ST0\t # Restore FPU Stack" 10495 %} 10496 ins_cost(250); 10497 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 10498 ins_pipe( pipe_slow ); 10499%} 10500 10501instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 10502 predicate (UseSSE<=1); 10503 match(Set dst (SinD src)); 10504 ins_cost(1800); 10505 format %{ "DSIN $dst" %} 10506 opcode(0xD9, 0xFE); 10507 ins_encode( OpcP, OpcS ); 10508 ins_pipe( pipe_slow ); 10509%} 10510 10511instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 10512 predicate (UseSSE>=2); 10513 match(Set dst (SinD dst)); 10514 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10515 ins_cost(1800); 10516 format %{ "DSIN $dst" %} 10517 opcode(0xD9, 0xFE); 10518 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10519 ins_pipe( pipe_slow ); 10520%} 10521 10522instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 10523 predicate (UseSSE<=1); 10524 match(Set dst (CosD src)); 10525 ins_cost(1800); 10526 format %{ "DCOS $dst" %} 10527 opcode(0xD9, 0xFF); 10528 ins_encode( OpcP, OpcS ); 10529 ins_pipe( pipe_slow ); 10530%} 10531 10532instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 10533 predicate (UseSSE>=2); 10534 match(Set dst (CosD dst)); 10535 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10536 ins_cost(1800); 10537 format %{ "DCOS $dst" %} 10538 opcode(0xD9, 0xFF); 10539 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10540 ins_pipe( pipe_slow ); 10541%} 10542 10543instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 10544 predicate (UseSSE<=1); 10545 match(Set dst(TanD src)); 10546 format %{ "DTAN $dst" %} 10547 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 10548 Opcode(0xDD), Opcode(0xD8)); // fstp st 10549 ins_pipe( pipe_slow ); 10550%} 10551 10552instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 10553 predicate (UseSSE>=2); 10554 match(Set dst(TanD dst)); 10555 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10556 format %{ "DTAN $dst" %} 10557 ins_encode( Push_SrcXD(dst), 10558 Opcode(0xD9), Opcode(0xF2), // fptan 10559 Opcode(0xDD), Opcode(0xD8), // fstp st 10560 Push_ResultXD(dst) ); 10561 ins_pipe( pipe_slow ); 10562%} 10563 10564instruct atanD_reg(regD dst, regD src) %{ 10565 predicate (UseSSE<=1); 10566 match(Set dst(AtanD dst src)); 10567 format %{ "DATA $dst,$src" %} 10568 opcode(0xD9, 0xF3); 10569 ins_encode( Push_Reg_D(src), 10570 OpcP, OpcS, RegOpc(dst) ); 10571 ins_pipe( pipe_slow ); 10572%} 10573 10574instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10575 predicate (UseSSE>=2); 10576 match(Set dst(AtanD dst src)); 10577 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10578 format %{ "DATA $dst,$src" %} 10579 opcode(0xD9, 0xF3); 10580 ins_encode( Push_SrcXD(src), 10581 OpcP, OpcS, Push_ResultXD(dst) ); 10582 ins_pipe( pipe_slow ); 10583%} 10584 10585instruct sqrtD_reg(regD dst, regD src) %{ 10586 predicate (UseSSE<=1); 10587 match(Set dst (SqrtD src)); 10588 format %{ "DSQRT $dst,$src" %} 10589 opcode(0xFA, 0xD9); 10590 ins_encode( Push_Reg_D(src), 10591 OpcS, OpcP, Pop_Reg_D(dst) ); 10592 ins_pipe( pipe_slow ); 10593%} 10594 10595instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10596 predicate (UseSSE<=1); 10597 match(Set Y (PowD X Y)); // Raise X to the Yth power 10598 effect(KILL rax, KILL rbx, KILL rcx); 10599 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10600 "FLD_D $X\n\t" 10601 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" 10602 10603 "FDUP \t\t\t# Q Q\n\t" 10604 "FRNDINT\t\t\t# int(Q) Q\n\t" 10605 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10606 "FISTP dword [ESP]\n\t" 10607 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10608 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10609 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10610 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10611 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10612 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10613 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10614 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10615 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10616 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10617 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10618 "MOV [ESP+0],0\n\t" 10619 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10620 10621 "ADD ESP,8" 10622 %} 10623 ins_encode( push_stack_temp_qword, 10624 Push_Reg_D(X), 10625 Opcode(0xD9), Opcode(0xF1), // fyl2x 10626 pow_exp_core_encoding, 10627 pop_stack_temp_qword); 10628 ins_pipe( pipe_slow ); 10629%} 10630 10631instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 10632 predicate (UseSSE>=2); 10633 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 10634 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 10635 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10636 "MOVSD [ESP],$src1\n\t" 10637 "FLD FPR1,$src1\n\t" 10638 "MOVSD [ESP],$src0\n\t" 10639 "FLD FPR1,$src0\n\t" 10640 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" 10641 10642 "FDUP \t\t\t# Q Q\n\t" 10643 "FRNDINT\t\t\t# int(Q) Q\n\t" 10644 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10645 "FISTP dword [ESP]\n\t" 10646 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10647 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10648 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10649 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10650 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10651 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10652 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10653 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10654 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10655 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10656 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10657 "MOV [ESP+0],0\n\t" 10658 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10659 10660 "FST_D [ESP]\n\t" 10661 "MOVSD $dst,[ESP]\n\t" 10662 "ADD ESP,8" 10663 %} 10664 ins_encode( push_stack_temp_qword, 10665 push_xmm_to_fpr1(src1), 10666 push_xmm_to_fpr1(src0), 10667 Opcode(0xD9), Opcode(0xF1), // fyl2x 10668 pow_exp_core_encoding, 10669 Push_ResultXD(dst) ); 10670 ins_pipe( pipe_slow ); 10671%} 10672 10673 10674instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10675 predicate (UseSSE<=1); 10676 match(Set dpr1 (ExpD dpr1)); 10677 effect(KILL rax, KILL rbx, KILL rcx); 10678 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" 10679 "FLDL2E \t\t\t# Ld log2(e) X\n\t" 10680 "FMULP \t\t\t# Q=X*log2(e)\n\t" 10681 10682 "FDUP \t\t\t# Q Q\n\t" 10683 "FRNDINT\t\t\t# int(Q) Q\n\t" 10684 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10685 "FISTP dword [ESP]\n\t" 10686 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10687 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10688 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10689 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10690 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10691 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10692 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10693 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10694 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10695 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10696 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10697 "MOV [ESP+0],0\n\t" 10698 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10699 10700 "ADD ESP,8" 10701 %} 10702 ins_encode( push_stack_temp_qword, 10703 Opcode(0xD9), Opcode(0xEA), // fldl2e 10704 Opcode(0xDE), Opcode(0xC9), // fmulp 10705 pow_exp_core_encoding, 10706 pop_stack_temp_qword); 10707 ins_pipe( pipe_slow ); 10708%} 10709 10710instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10711 predicate (UseSSE>=2); 10712 match(Set dst (ExpD src)); 10713 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 10714 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" 10715 "MOVSD [ESP],$src\n\t" 10716 "FLDL2E \t\t\t# Ld log2(e) X\n\t" 10717 "FMULP \t\t\t# Q=X*log2(e) X\n\t" 10718 10719 "FDUP \t\t\t# Q Q\n\t" 10720 "FRNDINT\t\t\t# int(Q) Q\n\t" 10721 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10722 "FISTP dword [ESP]\n\t" 10723 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10724 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10725 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10726 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10727 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10728 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10729 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10730 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10731 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10732 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10733 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10734 "MOV [ESP+0],0\n\t" 10735 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10736 10737 "FST_D [ESP]\n\t" 10738 "MOVSD $dst,[ESP]\n\t" 10739 "ADD ESP,8" 10740 %} 10741 ins_encode( Push_SrcXD(src), 10742 Opcode(0xD9), Opcode(0xEA), // fldl2e 10743 Opcode(0xDE), Opcode(0xC9), // fmulp 10744 pow_exp_core_encoding, 10745 Push_ResultXD(dst) ); 10746 ins_pipe( pipe_slow ); 10747%} 10748 10749 10750 10751instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 10752 predicate (UseSSE<=1); 10753 // The source Double operand on FPU stack 10754 match(Set dst (Log10D src)); 10755 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10756 // fxch ; swap ST(0) with ST(1) 10757 // fyl2x ; compute log_10(2) * log_2(x) 10758 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10759 "FXCH \n\t" 10760 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10761 %} 10762 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10763 Opcode(0xD9), Opcode(0xC9), // fxch 10764 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10765 10766 ins_pipe( pipe_slow ); 10767%} 10768 10769instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10770 predicate (UseSSE>=2); 10771 effect(KILL cr); 10772 match(Set dst (Log10D src)); 10773 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10774 // fyl2x ; compute log_10(2) * log_2(x) 10775 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10776 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10777 %} 10778 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10779 Push_SrcXD(src), 10780 Opcode(0xD9), Opcode(0xF1), // fyl2x 10781 Push_ResultXD(dst)); 10782 10783 ins_pipe( pipe_slow ); 10784%} 10785 10786instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 10787 predicate (UseSSE<=1); 10788 // The source Double operand on FPU stack 10789 match(Set dst (LogD src)); 10790 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10791 // fxch ; swap ST(0) with ST(1) 10792 // fyl2x ; compute log_e(2) * log_2(x) 10793 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10794 "FXCH \n\t" 10795 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10796 %} 10797 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10798 Opcode(0xD9), Opcode(0xC9), // fxch 10799 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10800 10801 ins_pipe( pipe_slow ); 10802%} 10803 10804instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10805 predicate (UseSSE>=2); 10806 effect(KILL cr); 10807 // The source and result Double operands in XMM registers 10808 match(Set dst (LogD src)); 10809 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10810 // fyl2x ; compute log_e(2) * log_2(x) 10811 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10812 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10813 %} 10814 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10815 Push_SrcXD(src), 10816 Opcode(0xD9), Opcode(0xF1), // fyl2x 10817 Push_ResultXD(dst)); 10818 ins_pipe( pipe_slow ); 10819%} 10820 10821//-------------Float Instructions------------------------------- 10822// Float Math 10823 10824// Code for float compare: 10825// fcompp(); 10826// fwait(); fnstsw_ax(); 10827// sahf(); 10828// movl(dst, unordered_result); 10829// jcc(Assembler::parity, exit); 10830// movl(dst, less_result); 10831// jcc(Assembler::below, exit); 10832// movl(dst, equal_result); 10833// jcc(Assembler::equal, exit); 10834// movl(dst, greater_result); 10835// exit: 10836 10837// P6 version of float compare, sets condition codes in EFLAGS 10838instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 10839 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10840 match(Set cr (CmpF src1 src2)); 10841 effect(KILL rax); 10842 ins_cost(150); 10843 format %{ "FLD $src1\n\t" 10844 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10845 "JNP exit\n\t" 10846 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10847 "SAHF\n" 10848 "exit:\tNOP // avoid branch to branch" %} 10849 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10850 ins_encode( Push_Reg_D(src1), 10851 OpcP, RegOpc(src2), 10852 cmpF_P6_fixup ); 10853 ins_pipe( pipe_slow ); 10854%} 10855 10856instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10857 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10858 match(Set cr (CmpF src1 src2)); 10859 ins_cost(100); 10860 format %{ "FLD $src1\n\t" 10861 "FUCOMIP ST,$src2 // P6 instruction" %} 10862 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10863 ins_encode( Push_Reg_D(src1), 10864 OpcP, RegOpc(src2)); 10865 ins_pipe( pipe_slow ); 10866%} 10867 10868 10869// Compare & branch 10870instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 10871 predicate(UseSSE == 0); 10872 match(Set cr (CmpF src1 src2)); 10873 effect(KILL rax); 10874 ins_cost(200); 10875 format %{ "FLD $src1\n\t" 10876 "FCOMp $src2\n\t" 10877 "FNSTSW AX\n\t" 10878 "TEST AX,0x400\n\t" 10879 "JZ,s flags\n\t" 10880 "MOV AH,1\t# unordered treat as LT\n" 10881 "flags:\tSAHF" %} 10882 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10883 ins_encode( Push_Reg_D(src1), 10884 OpcP, RegOpc(src2), 10885 fpu_flags); 10886 ins_pipe( pipe_slow ); 10887%} 10888 10889// Compare vs zero into -1,0,1 10890instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 10891 predicate(UseSSE == 0); 10892 match(Set dst (CmpF3 src1 zero)); 10893 effect(KILL cr, KILL rax); 10894 ins_cost(280); 10895 format %{ "FTSTF $dst,$src1" %} 10896 opcode(0xE4, 0xD9); 10897 ins_encode( Push_Reg_D(src1), 10898 OpcS, OpcP, PopFPU, 10899 CmpF_Result(dst)); 10900 ins_pipe( pipe_slow ); 10901%} 10902 10903// Compare into -1,0,1 10904instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 10905 predicate(UseSSE == 0); 10906 match(Set dst (CmpF3 src1 src2)); 10907 effect(KILL cr, KILL rax); 10908 ins_cost(300); 10909 format %{ "FCMPF $dst,$src1,$src2" %} 10910 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10911 ins_encode( Push_Reg_D(src1), 10912 OpcP, RegOpc(src2), 10913 CmpF_Result(dst)); 10914 ins_pipe( pipe_slow ); 10915%} 10916 10917// float compare and set condition codes in EFLAGS by XMM regs 10918instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ 10919 predicate(UseSSE>=1); 10920 match(Set cr (CmpF dst src)); 10921 effect(KILL rax); 10922 ins_cost(145); 10923 format %{ "COMISS $dst,$src\n" 10924 "\tJNP exit\n" 10925 "\tMOV ah,1 // saw a NaN, set CF\n" 10926 "\tSAHF\n" 10927 "exit:\tNOP // avoid branch to branch" %} 10928 opcode(0x0F, 0x2F); 10929 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); 10930 ins_pipe( pipe_slow ); 10931%} 10932 10933instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ 10934 predicate(UseSSE>=1); 10935 match(Set cr (CmpF dst src)); 10936 ins_cost(100); 10937 format %{ "COMISS $dst,$src" %} 10938 opcode(0x0F, 0x2F); 10939 ins_encode(OpcP, OpcS, RegReg(dst, src)); 10940 ins_pipe( pipe_slow ); 10941%} 10942 10943// float compare and set condition codes in EFLAGS by XMM regs 10944instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ 10945 predicate(UseSSE>=1); 10946 match(Set cr (CmpF dst (LoadF src))); 10947 effect(KILL rax); 10948 ins_cost(165); 10949 format %{ "COMISS $dst,$src\n" 10950 "\tJNP exit\n" 10951 "\tMOV ah,1 // saw a NaN, set CF\n" 10952 "\tSAHF\n" 10953 "exit:\tNOP // avoid branch to branch" %} 10954 opcode(0x0F, 0x2F); 10955 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); 10956 ins_pipe( pipe_slow ); 10957%} 10958 10959instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ 10960 predicate(UseSSE>=1); 10961 match(Set cr (CmpF dst (LoadF src))); 10962 ins_cost(100); 10963 format %{ "COMISS $dst,$src" %} 10964 opcode(0x0F, 0x2F); 10965 ins_encode(OpcP, OpcS, RegMem(dst, src)); 10966 ins_pipe( pipe_slow ); 10967%} 10968 10969// Compare into -1,0,1 in XMM 10970instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 10971 predicate(UseSSE>=1); 10972 match(Set dst (CmpF3 src1 src2)); 10973 effect(KILL cr); 10974 ins_cost(255); 10975 format %{ "XOR $dst,$dst\n" 10976 "\tCOMISS $src1,$src2\n" 10977 "\tJP,s nan\n" 10978 "\tJEQ,s exit\n" 10979 "\tJA,s inc\n" 10980 "nan:\tDEC $dst\n" 10981 "\tJMP,s exit\n" 10982 "inc:\tINC $dst\n" 10983 "exit:" 10984 %} 10985 opcode(0x0F, 0x2F); 10986 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst)); 10987 ins_pipe( pipe_slow ); 10988%} 10989 10990// Compare into -1,0,1 in XMM and memory 10991instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ 10992 predicate(UseSSE>=1); 10993 match(Set dst (CmpF3 src1 (LoadF mem))); 10994 effect(KILL cr); 10995 ins_cost(275); 10996 format %{ "COMISS $src1,$mem\n" 10997 "\tMOV $dst,0\t\t# do not blow flags\n" 10998 "\tJP,s nan\n" 10999 "\tJEQ,s exit\n" 11000 "\tJA,s inc\n" 11001 "nan:\tDEC $dst\n" 11002 "\tJMP,s exit\n" 11003 "inc:\tINC $dst\n" 11004 "exit:" 11005 %} 11006 opcode(0x0F, 0x2F); 11007 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst)); 11008 ins_pipe( pipe_slow ); 11009%} 11010 11011// Spill to obtain 24-bit precision 11012instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11013 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11014 match(Set dst (SubF src1 src2)); 11015 11016 format %{ "FSUB $dst,$src1 - $src2" %} 11017 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 11018 ins_encode( Push_Reg_F(src1), 11019 OpcReg_F(src2), 11020 Pop_Mem_F(dst) ); 11021 ins_pipe( fpu_mem_reg_reg ); 11022%} 11023// 11024// This instruction does not round to 24-bits 11025instruct subF_reg(regF dst, regF src) %{ 11026 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11027 match(Set dst (SubF dst src)); 11028 11029 format %{ "FSUB $dst,$src" %} 11030 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 11031 ins_encode( Push_Reg_F(src), 11032 OpcP, RegOpc(dst) ); 11033 ins_pipe( fpu_reg_reg ); 11034%} 11035 11036// Spill to obtain 24-bit precision 11037instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11038 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11039 match(Set dst (AddF src1 src2)); 11040 11041 format %{ "FADD $dst,$src1,$src2" %} 11042 opcode(0xD8, 0x0); /* D8 C0+i */ 11043 ins_encode( Push_Reg_F(src2), 11044 OpcReg_F(src1), 11045 Pop_Mem_F(dst) ); 11046 ins_pipe( fpu_mem_reg_reg ); 11047%} 11048// 11049// This instruction does not round to 24-bits 11050instruct addF_reg(regF dst, regF src) %{ 11051 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11052 match(Set dst (AddF dst src)); 11053 11054 format %{ "FLD $src\n\t" 11055 "FADDp $dst,ST" %} 11056 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 11057 ins_encode( Push_Reg_F(src), 11058 OpcP, RegOpc(dst) ); 11059 ins_pipe( fpu_reg_reg ); 11060%} 11061 11062// Add two single precision floating point values in xmm 11063instruct addX_reg(regX dst, regX src) %{ 11064 predicate(UseSSE>=1); 11065 match(Set dst (AddF dst src)); 11066 format %{ "ADDSS $dst,$src" %} 11067 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 11068 ins_pipe( pipe_slow ); 11069%} 11070 11071instruct addX_imm(regX dst, immXF con) %{ 11072 predicate(UseSSE>=1); 11073 match(Set dst (AddF dst con)); 11074 format %{ "ADDSS $dst,[$con]" %} 11075 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) ); 11076 ins_pipe( pipe_slow ); 11077%} 11078 11079instruct addX_mem(regX dst, memory mem) %{ 11080 predicate(UseSSE>=1); 11081 match(Set dst (AddF dst (LoadF mem))); 11082 format %{ "ADDSS $dst,$mem" %} 11083 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); 11084 ins_pipe( pipe_slow ); 11085%} 11086 11087// Subtract two single precision floating point values in xmm 11088instruct subX_reg(regX dst, regX src) %{ 11089 predicate(UseSSE>=1); 11090 match(Set dst (SubF dst src)); 11091 format %{ "SUBSS $dst,$src" %} 11092 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 11093 ins_pipe( pipe_slow ); 11094%} 11095 11096instruct subX_imm(regX dst, immXF con) %{ 11097 predicate(UseSSE>=1); 11098 match(Set dst (SubF dst con)); 11099 format %{ "SUBSS $dst,[$con]" %} 11100 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) ); 11101 ins_pipe( pipe_slow ); 11102%} 11103 11104instruct subX_mem(regX dst, memory mem) %{ 11105 predicate(UseSSE>=1); 11106 match(Set dst (SubF dst (LoadF mem))); 11107 format %{ "SUBSS $dst,$mem" %} 11108 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 11109 ins_pipe( pipe_slow ); 11110%} 11111 11112// Multiply two single precision floating point values in xmm 11113instruct mulX_reg(regX dst, regX src) %{ 11114 predicate(UseSSE>=1); 11115 match(Set dst (MulF dst src)); 11116 format %{ "MULSS $dst,$src" %} 11117 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 11118 ins_pipe( pipe_slow ); 11119%} 11120 11121instruct mulX_imm(regX dst, immXF con) %{ 11122 predicate(UseSSE>=1); 11123 match(Set dst (MulF dst con)); 11124 format %{ "MULSS $dst,[$con]" %} 11125 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) ); 11126 ins_pipe( pipe_slow ); 11127%} 11128 11129instruct mulX_mem(regX dst, memory mem) %{ 11130 predicate(UseSSE>=1); 11131 match(Set dst (MulF dst (LoadF mem))); 11132 format %{ "MULSS $dst,$mem" %} 11133 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 11134 ins_pipe( pipe_slow ); 11135%} 11136 11137// Divide two single precision floating point values in xmm 11138instruct divX_reg(regX dst, regX src) %{ 11139 predicate(UseSSE>=1); 11140 match(Set dst (DivF dst src)); 11141 format %{ "DIVSS $dst,$src" %} 11142 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 11143 ins_pipe( pipe_slow ); 11144%} 11145 11146instruct divX_imm(regX dst, immXF con) %{ 11147 predicate(UseSSE>=1); 11148 match(Set dst (DivF dst con)); 11149 format %{ "DIVSS $dst,[$con]" %} 11150 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) ); 11151 ins_pipe( pipe_slow ); 11152%} 11153 11154instruct divX_mem(regX dst, memory mem) %{ 11155 predicate(UseSSE>=1); 11156 match(Set dst (DivF dst (LoadF mem))); 11157 format %{ "DIVSS $dst,$mem" %} 11158 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 11159 ins_pipe( pipe_slow ); 11160%} 11161 11162// Get the square root of a single precision floating point values in xmm 11163instruct sqrtX_reg(regX dst, regX src) %{ 11164 predicate(UseSSE>=1); 11165 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 11166 format %{ "SQRTSS $dst,$src" %} 11167 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 11168 ins_pipe( pipe_slow ); 11169%} 11170 11171instruct sqrtX_mem(regX dst, memory mem) %{ 11172 predicate(UseSSE>=1); 11173 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); 11174 format %{ "SQRTSS $dst,$mem" %} 11175 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 11176 ins_pipe( pipe_slow ); 11177%} 11178 11179// Get the square root of a double precision floating point values in xmm 11180instruct sqrtXD_reg(regXD dst, regXD src) %{ 11181 predicate(UseSSE>=2); 11182 match(Set dst (SqrtD src)); 11183 format %{ "SQRTSD $dst,$src" %} 11184 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 11185 ins_pipe( pipe_slow ); 11186%} 11187 11188instruct sqrtXD_mem(regXD dst, memory mem) %{ 11189 predicate(UseSSE>=2); 11190 match(Set dst (SqrtD (LoadD mem))); 11191 format %{ "SQRTSD $dst,$mem" %} 11192 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 11193 ins_pipe( pipe_slow ); 11194%} 11195 11196instruct absF_reg(regFPR1 dst, regFPR1 src) %{ 11197 predicate(UseSSE==0); 11198 match(Set dst (AbsF src)); 11199 ins_cost(100); 11200 format %{ "FABS" %} 11201 opcode(0xE1, 0xD9); 11202 ins_encode( OpcS, OpcP ); 11203 ins_pipe( fpu_reg_reg ); 11204%} 11205 11206instruct absX_reg(regX dst ) %{ 11207 predicate(UseSSE>=1); 11208 match(Set dst (AbsF dst)); 11209 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} 11210 ins_encode( AbsXF_encoding(dst)); 11211 ins_pipe( pipe_slow ); 11212%} 11213 11214instruct negF_reg(regFPR1 dst, regFPR1 src) %{ 11215 predicate(UseSSE==0); 11216 match(Set dst (NegF src)); 11217 ins_cost(100); 11218 format %{ "FCHS" %} 11219 opcode(0xE0, 0xD9); 11220 ins_encode( OpcS, OpcP ); 11221 ins_pipe( fpu_reg_reg ); 11222%} 11223 11224instruct negX_reg( regX dst ) %{ 11225 predicate(UseSSE>=1); 11226 match(Set dst (NegF dst)); 11227 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} 11228 ins_encode( NegXF_encoding(dst)); 11229 ins_pipe( pipe_slow ); 11230%} 11231 11232// Cisc-alternate to addF_reg 11233// Spill to obtain 24-bit precision 11234instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 11235 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11236 match(Set dst (AddF src1 (LoadF src2))); 11237 11238 format %{ "FLD $src2\n\t" 11239 "FADD ST,$src1\n\t" 11240 "FSTP_S $dst" %} 11241 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 11242 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11243 OpcReg_F(src1), 11244 Pop_Mem_F(dst) ); 11245 ins_pipe( fpu_mem_reg_mem ); 11246%} 11247// 11248// Cisc-alternate to addF_reg 11249// This instruction does not round to 24-bits 11250instruct addF_reg_mem(regF dst, memory src) %{ 11251 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11252 match(Set dst (AddF dst (LoadF src))); 11253 11254 format %{ "FADD $dst,$src" %} 11255 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 11256 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 11257 OpcP, RegOpc(dst) ); 11258 ins_pipe( fpu_reg_mem ); 11259%} 11260 11261// // Following two instructions for _222_mpegaudio 11262// Spill to obtain 24-bit precision 11263instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 11264 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11265 match(Set dst (AddF src1 src2)); 11266 11267 format %{ "FADD $dst,$src1,$src2" %} 11268 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 11269 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 11270 OpcReg_F(src2), 11271 Pop_Mem_F(dst) ); 11272 ins_pipe( fpu_mem_reg_mem ); 11273%} 11274 11275// Cisc-spill variant 11276// Spill to obtain 24-bit precision 11277instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 11278 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11279 match(Set dst (AddF src1 (LoadF src2))); 11280 11281 format %{ "FADD $dst,$src1,$src2 cisc" %} 11282 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 11283 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11284 set_instruction_start, 11285 OpcP, RMopc_Mem(secondary,src1), 11286 Pop_Mem_F(dst) ); 11287 ins_pipe( fpu_mem_mem_mem ); 11288%} 11289 11290// Spill to obtain 24-bit precision 11291instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 11292 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11293 match(Set dst (AddF src1 src2)); 11294 11295 format %{ "FADD $dst,$src1,$src2" %} 11296 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 11297 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11298 set_instruction_start, 11299 OpcP, RMopc_Mem(secondary,src1), 11300 Pop_Mem_F(dst) ); 11301 ins_pipe( fpu_mem_mem_mem ); 11302%} 11303 11304 11305// Spill to obtain 24-bit precision 11306instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{ 11307 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11308 match(Set dst (AddF src1 src2)); 11309 format %{ "FLD $src1\n\t" 11310 "FADD $src2\n\t" 11311 "FSTP_S $dst" %} 11312 opcode(0xD8, 0x00); /* D8 /0 */ 11313 ins_encode( Push_Reg_F(src1), 11314 Opc_MemImm_F(src2), 11315 Pop_Mem_F(dst)); 11316 ins_pipe( fpu_mem_reg_con ); 11317%} 11318// 11319// This instruction does not round to 24-bits 11320instruct addF_reg_imm(regF dst, regF src1, immF src2) %{ 11321 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11322 match(Set dst (AddF src1 src2)); 11323 format %{ "FLD $src1\n\t" 11324 "FADD $src2\n\t" 11325 "FSTP_S $dst" %} 11326 opcode(0xD8, 0x00); /* D8 /0 */ 11327 ins_encode( Push_Reg_F(src1), 11328 Opc_MemImm_F(src2), 11329 Pop_Reg_F(dst)); 11330 ins_pipe( fpu_reg_reg_con ); 11331%} 11332 11333// Spill to obtain 24-bit precision 11334instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11335 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11336 match(Set dst (MulF src1 src2)); 11337 11338 format %{ "FLD $src1\n\t" 11339 "FMUL $src2\n\t" 11340 "FSTP_S $dst" %} 11341 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 11342 ins_encode( Push_Reg_F(src1), 11343 OpcReg_F(src2), 11344 Pop_Mem_F(dst) ); 11345 ins_pipe( fpu_mem_reg_reg ); 11346%} 11347// 11348// This instruction does not round to 24-bits 11349instruct mulF_reg(regF dst, regF src1, regF src2) %{ 11350 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11351 match(Set dst (MulF src1 src2)); 11352 11353 format %{ "FLD $src1\n\t" 11354 "FMUL $src2\n\t" 11355 "FSTP_S $dst" %} 11356 opcode(0xD8, 0x1); /* D8 C8+i */ 11357 ins_encode( Push_Reg_F(src2), 11358 OpcReg_F(src1), 11359 Pop_Reg_F(dst) ); 11360 ins_pipe( fpu_reg_reg_reg ); 11361%} 11362 11363 11364// Spill to obtain 24-bit precision 11365// Cisc-alternate to reg-reg multiply 11366instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 11367 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11368 match(Set dst (MulF src1 (LoadF src2))); 11369 11370 format %{ "FLD_S $src2\n\t" 11371 "FMUL $src1\n\t" 11372 "FSTP_S $dst" %} 11373 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 11374 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11375 OpcReg_F(src1), 11376 Pop_Mem_F(dst) ); 11377 ins_pipe( fpu_mem_reg_mem ); 11378%} 11379// 11380// This instruction does not round to 24-bits 11381// Cisc-alternate to reg-reg multiply 11382instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 11383 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11384 match(Set dst (MulF src1 (LoadF src2))); 11385 11386 format %{ "FMUL $dst,$src1,$src2" %} 11387 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 11388 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11389 OpcReg_F(src1), 11390 Pop_Reg_F(dst) ); 11391 ins_pipe( fpu_reg_reg_mem ); 11392%} 11393 11394// Spill to obtain 24-bit precision 11395instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 11396 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11397 match(Set dst (MulF src1 src2)); 11398 11399 format %{ "FMUL $dst,$src1,$src2" %} 11400 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 11401 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11402 set_instruction_start, 11403 OpcP, RMopc_Mem(secondary,src1), 11404 Pop_Mem_F(dst) ); 11405 ins_pipe( fpu_mem_mem_mem ); 11406%} 11407 11408// Spill to obtain 24-bit precision 11409instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{ 11410 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11411 match(Set dst (MulF src1 src2)); 11412 11413 format %{ "FMULc $dst,$src1,$src2" %} 11414 opcode(0xD8, 0x1); /* D8 /1*/ 11415 ins_encode( Push_Reg_F(src1), 11416 Opc_MemImm_F(src2), 11417 Pop_Mem_F(dst)); 11418 ins_pipe( fpu_mem_reg_con ); 11419%} 11420// 11421// This instruction does not round to 24-bits 11422instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{ 11423 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11424 match(Set dst (MulF src1 src2)); 11425 11426 format %{ "FMULc $dst. $src1, $src2" %} 11427 opcode(0xD8, 0x1); /* D8 /1*/ 11428 ins_encode( Push_Reg_F(src1), 11429 Opc_MemImm_F(src2), 11430 Pop_Reg_F(dst)); 11431 ins_pipe( fpu_reg_reg_con ); 11432%} 11433 11434 11435// 11436// MACRO1 -- subsume unshared load into mulF 11437// This instruction does not round to 24-bits 11438instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 11439 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11440 match(Set dst (MulF (LoadF mem1) src)); 11441 11442 format %{ "FLD $mem1 ===MACRO1===\n\t" 11443 "FMUL ST,$src\n\t" 11444 "FSTP $dst" %} 11445 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 11446 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 11447 OpcReg_F(src), 11448 Pop_Reg_F(dst) ); 11449 ins_pipe( fpu_reg_reg_mem ); 11450%} 11451// 11452// MACRO2 -- addF a mulF which subsumed an unshared load 11453// This instruction does not round to 24-bits 11454instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 11455 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11456 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 11457 ins_cost(95); 11458 11459 format %{ "FLD $mem1 ===MACRO2===\n\t" 11460 "FMUL ST,$src1 subsume mulF left load\n\t" 11461 "FADD ST,$src2\n\t" 11462 "FSTP $dst" %} 11463 opcode(0xD9); /* LoadF D9 /0 */ 11464 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 11465 FMul_ST_reg(src1), 11466 FAdd_ST_reg(src2), 11467 Pop_Reg_F(dst) ); 11468 ins_pipe( fpu_reg_mem_reg_reg ); 11469%} 11470 11471// MACRO3 -- addF a mulF 11472// This instruction does not round to 24-bits. It is a '2-address' 11473// instruction in that the result goes back to src2. This eliminates 11474// a move from the macro; possibly the register allocator will have 11475// to add it back (and maybe not). 11476instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 11477 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11478 match(Set src2 (AddF (MulF src0 src1) src2)); 11479 11480 format %{ "FLD $src0 ===MACRO3===\n\t" 11481 "FMUL ST,$src1\n\t" 11482 "FADDP $src2,ST" %} 11483 opcode(0xD9); /* LoadF D9 /0 */ 11484 ins_encode( Push_Reg_F(src0), 11485 FMul_ST_reg(src1), 11486 FAddP_reg_ST(src2) ); 11487 ins_pipe( fpu_reg_reg_reg ); 11488%} 11489 11490// MACRO4 -- divF subF 11491// This instruction does not round to 24-bits 11492instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 11493 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11494 match(Set dst (DivF (SubF src2 src1) src3)); 11495 11496 format %{ "FLD $src2 ===MACRO4===\n\t" 11497 "FSUB ST,$src1\n\t" 11498 "FDIV ST,$src3\n\t" 11499 "FSTP $dst" %} 11500 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 11501 ins_encode( Push_Reg_F(src2), 11502 subF_divF_encode(src1,src3), 11503 Pop_Reg_F(dst) ); 11504 ins_pipe( fpu_reg_reg_reg_reg ); 11505%} 11506 11507// Spill to obtain 24-bit precision 11508instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11509 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11510 match(Set dst (DivF src1 src2)); 11511 11512 format %{ "FDIV $dst,$src1,$src2" %} 11513 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 11514 ins_encode( Push_Reg_F(src1), 11515 OpcReg_F(src2), 11516 Pop_Mem_F(dst) ); 11517 ins_pipe( fpu_mem_reg_reg ); 11518%} 11519// 11520// This instruction does not round to 24-bits 11521instruct divF_reg(regF dst, regF src) %{ 11522 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11523 match(Set dst (DivF dst src)); 11524 11525 format %{ "FDIV $dst,$src" %} 11526 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 11527 ins_encode( Push_Reg_F(src), 11528 OpcP, RegOpc(dst) ); 11529 ins_pipe( fpu_reg_reg ); 11530%} 11531 11532 11533// Spill to obtain 24-bit precision 11534instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 11535 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11536 match(Set dst (ModF src1 src2)); 11537 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 11538 11539 format %{ "FMOD $dst,$src1,$src2" %} 11540 ins_encode( Push_Reg_Mod_D(src1, src2), 11541 emitModD(), 11542 Push_Result_Mod_D(src2), 11543 Pop_Mem_F(dst)); 11544 ins_pipe( pipe_slow ); 11545%} 11546// 11547// This instruction does not round to 24-bits 11548instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 11549 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11550 match(Set dst (ModF dst src)); 11551 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 11552 11553 format %{ "FMOD $dst,$src" %} 11554 ins_encode(Push_Reg_Mod_D(dst, src), 11555 emitModD(), 11556 Push_Result_Mod_D(src), 11557 Pop_Reg_F(dst)); 11558 ins_pipe( pipe_slow ); 11559%} 11560 11561instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 11562 predicate(UseSSE>=1); 11563 match(Set dst (ModF src0 src1)); 11564 effect(KILL rax, KILL cr); 11565 format %{ "SUB ESP,4\t # FMOD\n" 11566 "\tMOVSS [ESP+0],$src1\n" 11567 "\tFLD_S [ESP+0]\n" 11568 "\tMOVSS [ESP+0],$src0\n" 11569 "\tFLD_S [ESP+0]\n" 11570 "loop:\tFPREM\n" 11571 "\tFWAIT\n" 11572 "\tFNSTSW AX\n" 11573 "\tSAHF\n" 11574 "\tJP loop\n" 11575 "\tFSTP_S [ESP+0]\n" 11576 "\tMOVSS $dst,[ESP+0]\n" 11577 "\tADD ESP,4\n" 11578 "\tFSTP ST0\t # Restore FPU Stack" 11579 %} 11580 ins_cost(250); 11581 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 11582 ins_pipe( pipe_slow ); 11583%} 11584 11585 11586//----------Arithmetic Conversion Instructions--------------------------------- 11587// The conversions operations are all Alpha sorted. Please keep it that way! 11588 11589instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 11590 predicate(UseSSE==0); 11591 match(Set dst (RoundFloat src)); 11592 ins_cost(125); 11593 format %{ "FST_S $dst,$src\t# F-round" %} 11594 ins_encode( Pop_Mem_Reg_F(dst, src) ); 11595 ins_pipe( fpu_mem_reg ); 11596%} 11597 11598instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 11599 predicate(UseSSE<=1); 11600 match(Set dst (RoundDouble src)); 11601 ins_cost(125); 11602 format %{ "FST_D $dst,$src\t# D-round" %} 11603 ins_encode( Pop_Mem_Reg_D(dst, src) ); 11604 ins_pipe( fpu_mem_reg ); 11605%} 11606 11607// Force rounding to 24-bit precision and 6-bit exponent 11608instruct convD2F_reg(stackSlotF dst, regD src) %{ 11609 predicate(UseSSE==0); 11610 match(Set dst (ConvD2F src)); 11611 format %{ "FST_S $dst,$src\t# F-round" %} 11612 expand %{ 11613 roundFloat_mem_reg(dst,src); 11614 %} 11615%} 11616 11617// Force rounding to 24-bit precision and 6-bit exponent 11618instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 11619 predicate(UseSSE==1); 11620 match(Set dst (ConvD2F src)); 11621 effect( KILL cr ); 11622 format %{ "SUB ESP,4\n\t" 11623 "FST_S [ESP],$src\t# F-round\n\t" 11624 "MOVSS $dst,[ESP]\n\t" 11625 "ADD ESP,4" %} 11626 ins_encode( D2X_encoding(dst, src) ); 11627 ins_pipe( pipe_slow ); 11628%} 11629 11630// Force rounding double precision to single precision 11631instruct convXD2X_reg(regX dst, regXD src) %{ 11632 predicate(UseSSE>=2); 11633 match(Set dst (ConvD2F src)); 11634 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 11635 opcode(0xF2, 0x0F, 0x5A); 11636 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11637 ins_pipe( pipe_slow ); 11638%} 11639 11640instruct convF2D_reg_reg(regD dst, regF src) %{ 11641 predicate(UseSSE==0); 11642 match(Set dst (ConvF2D src)); 11643 format %{ "FST_S $dst,$src\t# D-round" %} 11644 ins_encode( Pop_Reg_Reg_D(dst, src)); 11645 ins_pipe( fpu_reg_reg ); 11646%} 11647 11648instruct convF2D_reg(stackSlotD dst, regF src) %{ 11649 predicate(UseSSE==1); 11650 match(Set dst (ConvF2D src)); 11651 format %{ "FST_D $dst,$src\t# D-round" %} 11652 expand %{ 11653 roundDouble_mem_reg(dst,src); 11654 %} 11655%} 11656 11657instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 11658 predicate(UseSSE==1); 11659 match(Set dst (ConvF2D src)); 11660 effect( KILL cr ); 11661 format %{ "SUB ESP,4\n\t" 11662 "MOVSS [ESP] $src\n\t" 11663 "FLD_S [ESP]\n\t" 11664 "ADD ESP,4\n\t" 11665 "FSTP $dst\t# D-round" %} 11666 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); 11667 ins_pipe( pipe_slow ); 11668%} 11669 11670instruct convX2XD_reg(regXD dst, regX src) %{ 11671 predicate(UseSSE>=2); 11672 match(Set dst (ConvF2D src)); 11673 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 11674 opcode(0xF3, 0x0F, 0x5A); 11675 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11676 ins_pipe( pipe_slow ); 11677%} 11678 11679// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11680instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 11681 predicate(UseSSE<=1); 11682 match(Set dst (ConvD2I src)); 11683 effect( KILL tmp, KILL cr ); 11684 format %{ "FLD $src\t# Convert double to int \n\t" 11685 "FLDCW trunc mode\n\t" 11686 "SUB ESP,4\n\t" 11687 "FISTp [ESP + #0]\n\t" 11688 "FLDCW std/24-bit mode\n\t" 11689 "POP EAX\n\t" 11690 "CMP EAX,0x80000000\n\t" 11691 "JNE,s fast\n\t" 11692 "FLD_D $src\n\t" 11693 "CALL d2i_wrapper\n" 11694 "fast:" %} 11695 ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 11696 ins_pipe( pipe_slow ); 11697%} 11698 11699// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11700instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 11701 predicate(UseSSE>=2); 11702 match(Set dst (ConvD2I src)); 11703 effect( KILL tmp, KILL cr ); 11704 format %{ "CVTTSD2SI $dst, $src\n\t" 11705 "CMP $dst,0x80000000\n\t" 11706 "JNE,s fast\n\t" 11707 "SUB ESP, 8\n\t" 11708 "MOVSD [ESP], $src\n\t" 11709 "FLD_D [ESP]\n\t" 11710 "ADD ESP, 8\n\t" 11711 "CALL d2i_wrapper\n" 11712 "fast:" %} 11713 opcode(0x1); // double-precision conversion 11714 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 11715 ins_pipe( pipe_slow ); 11716%} 11717 11718instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 11719 predicate(UseSSE<=1); 11720 match(Set dst (ConvD2L src)); 11721 effect( KILL cr ); 11722 format %{ "FLD $src\t# Convert double to long\n\t" 11723 "FLDCW trunc mode\n\t" 11724 "SUB ESP,8\n\t" 11725 "FISTp [ESP + #0]\n\t" 11726 "FLDCW std/24-bit mode\n\t" 11727 "POP EAX\n\t" 11728 "POP EDX\n\t" 11729 "CMP EDX,0x80000000\n\t" 11730 "JNE,s fast\n\t" 11731 "TEST EAX,EAX\n\t" 11732 "JNE,s fast\n\t" 11733 "FLD $src\n\t" 11734 "CALL d2l_wrapper\n" 11735 "fast:" %} 11736 ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 11737 ins_pipe( pipe_slow ); 11738%} 11739 11740// XMM lacks a float/double->long conversion, so use the old FPU stack. 11741instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 11742 predicate (UseSSE>=2); 11743 match(Set dst (ConvD2L src)); 11744 effect( KILL cr ); 11745 format %{ "SUB ESP,8\t# Convert double to long\n\t" 11746 "MOVSD [ESP],$src\n\t" 11747 "FLD_D [ESP]\n\t" 11748 "FLDCW trunc mode\n\t" 11749 "FISTp [ESP + #0]\n\t" 11750 "FLDCW std/24-bit mode\n\t" 11751 "POP EAX\n\t" 11752 "POP EDX\n\t" 11753 "CMP EDX,0x80000000\n\t" 11754 "JNE,s fast\n\t" 11755 "TEST EAX,EAX\n\t" 11756 "JNE,s fast\n\t" 11757 "SUB ESP,8\n\t" 11758 "MOVSD [ESP],$src\n\t" 11759 "FLD_D [ESP]\n\t" 11760 "CALL d2l_wrapper\n" 11761 "fast:" %} 11762 ins_encode( XD2L_encoding(src) ); 11763 ins_pipe( pipe_slow ); 11764%} 11765 11766// Convert a double to an int. Java semantics require we do complex 11767// manglations in the corner cases. So we set the rounding mode to 11768// 'zero', store the darned double down as an int, and reset the 11769// rounding mode to 'nearest'. The hardware stores a flag value down 11770// if we would overflow or converted a NAN; we check for this and 11771// and go the slow path if needed. 11772instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11773 predicate(UseSSE==0); 11774 match(Set dst (ConvF2I src)); 11775 effect( KILL tmp, KILL cr ); 11776 format %{ "FLD $src\t# Convert float to int \n\t" 11777 "FLDCW trunc mode\n\t" 11778 "SUB ESP,4\n\t" 11779 "FISTp [ESP + #0]\n\t" 11780 "FLDCW std/24-bit mode\n\t" 11781 "POP EAX\n\t" 11782 "CMP EAX,0x80000000\n\t" 11783 "JNE,s fast\n\t" 11784 "FLD $src\n\t" 11785 "CALL d2i_wrapper\n" 11786 "fast:" %} 11787 // D2I_encoding works for F2I 11788 ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 11789 ins_pipe( pipe_slow ); 11790%} 11791 11792// Convert a float in xmm to an int reg. 11793instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 11794 predicate(UseSSE>=1); 11795 match(Set dst (ConvF2I src)); 11796 effect( KILL tmp, KILL cr ); 11797 format %{ "CVTTSS2SI $dst, $src\n\t" 11798 "CMP $dst,0x80000000\n\t" 11799 "JNE,s fast\n\t" 11800 "SUB ESP, 4\n\t" 11801 "MOVSS [ESP], $src\n\t" 11802 "FLD [ESP]\n\t" 11803 "ADD ESP, 4\n\t" 11804 "CALL d2i_wrapper\n" 11805 "fast:" %} 11806 opcode(0x0); // single-precision conversion 11807 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 11808 ins_pipe( pipe_slow ); 11809%} 11810 11811instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11812 predicate(UseSSE==0); 11813 match(Set dst (ConvF2L src)); 11814 effect( KILL cr ); 11815 format %{ "FLD $src\t# Convert float to long\n\t" 11816 "FLDCW trunc mode\n\t" 11817 "SUB ESP,8\n\t" 11818 "FISTp [ESP + #0]\n\t" 11819 "FLDCW std/24-bit mode\n\t" 11820 "POP EAX\n\t" 11821 "POP EDX\n\t" 11822 "CMP EDX,0x80000000\n\t" 11823 "JNE,s fast\n\t" 11824 "TEST EAX,EAX\n\t" 11825 "JNE,s fast\n\t" 11826 "FLD $src\n\t" 11827 "CALL d2l_wrapper\n" 11828 "fast:" %} 11829 // D2L_encoding works for F2L 11830 ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 11831 ins_pipe( pipe_slow ); 11832%} 11833 11834// XMM lacks a float/double->long conversion, so use the old FPU stack. 11835instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 11836 predicate (UseSSE>=1); 11837 match(Set dst (ConvF2L src)); 11838 effect( KILL cr ); 11839 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11840 "MOVSS [ESP],$src\n\t" 11841 "FLD_S [ESP]\n\t" 11842 "FLDCW trunc mode\n\t" 11843 "FISTp [ESP + #0]\n\t" 11844 "FLDCW std/24-bit mode\n\t" 11845 "POP EAX\n\t" 11846 "POP EDX\n\t" 11847 "CMP EDX,0x80000000\n\t" 11848 "JNE,s fast\n\t" 11849 "TEST EAX,EAX\n\t" 11850 "JNE,s fast\n\t" 11851 "SUB ESP,4\t# Convert float to long\n\t" 11852 "MOVSS [ESP],$src\n\t" 11853 "FLD_S [ESP]\n\t" 11854 "ADD ESP,4\n\t" 11855 "CALL d2l_wrapper\n" 11856 "fast:" %} 11857 ins_encode( X2L_encoding(src) ); 11858 ins_pipe( pipe_slow ); 11859%} 11860 11861instruct convI2D_reg(regD dst, stackSlotI src) %{ 11862 predicate( UseSSE<=1 ); 11863 match(Set dst (ConvI2D src)); 11864 format %{ "FILD $src\n\t" 11865 "FSTP $dst" %} 11866 opcode(0xDB, 0x0); /* DB /0 */ 11867 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 11868 ins_pipe( fpu_reg_mem ); 11869%} 11870 11871instruct convI2XD_reg(regXD dst, eRegI src) %{ 11872 predicate( UseSSE>=2 && !UseXmmI2D ); 11873 match(Set dst (ConvI2D src)); 11874 format %{ "CVTSI2SD $dst,$src" %} 11875 opcode(0xF2, 0x0F, 0x2A); 11876 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11877 ins_pipe( pipe_slow ); 11878%} 11879 11880instruct convI2XD_mem(regXD dst, memory mem) %{ 11881 predicate( UseSSE>=2 ); 11882 match(Set dst (ConvI2D (LoadI mem))); 11883 format %{ "CVTSI2SD $dst,$mem" %} 11884 opcode(0xF2, 0x0F, 0x2A); 11885 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); 11886 ins_pipe( pipe_slow ); 11887%} 11888 11889instruct convXI2XD_reg(regXD dst, eRegI src) 11890%{ 11891 predicate( UseSSE>=2 && UseXmmI2D ); 11892 match(Set dst (ConvI2D src)); 11893 11894 format %{ "MOVD $dst,$src\n\t" 11895 "CVTDQ2PD $dst,$dst\t# i2d" %} 11896 ins_encode %{ 11897 __ movdl($dst$$XMMRegister, $src$$Register); 11898 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11899 %} 11900 ins_pipe(pipe_slow); // XXX 11901%} 11902 11903instruct convI2D_mem(regD dst, memory mem) %{ 11904 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11905 match(Set dst (ConvI2D (LoadI mem))); 11906 format %{ "FILD $mem\n\t" 11907 "FSTP $dst" %} 11908 opcode(0xDB); /* DB /0 */ 11909 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11910 Pop_Reg_D(dst)); 11911 ins_pipe( fpu_reg_mem ); 11912%} 11913 11914// Convert a byte to a float; no rounding step needed. 11915instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 11916 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11917 match(Set dst (ConvI2F src)); 11918 format %{ "FILD $src\n\t" 11919 "FSTP $dst" %} 11920 11921 opcode(0xDB, 0x0); /* DB /0 */ 11922 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 11923 ins_pipe( fpu_reg_mem ); 11924%} 11925 11926// In 24-bit mode, force exponent rounding by storing back out 11927instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 11928 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11929 match(Set dst (ConvI2F src)); 11930 ins_cost(200); 11931 format %{ "FILD $src\n\t" 11932 "FSTP_S $dst" %} 11933 opcode(0xDB, 0x0); /* DB /0 */ 11934 ins_encode( Push_Mem_I(src), 11935 Pop_Mem_F(dst)); 11936 ins_pipe( fpu_mem_mem ); 11937%} 11938 11939// In 24-bit mode, force exponent rounding by storing back out 11940instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 11941 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11942 match(Set dst (ConvI2F (LoadI mem))); 11943 ins_cost(200); 11944 format %{ "FILD $mem\n\t" 11945 "FSTP_S $dst" %} 11946 opcode(0xDB); /* DB /0 */ 11947 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11948 Pop_Mem_F(dst)); 11949 ins_pipe( fpu_mem_mem ); 11950%} 11951 11952// This instruction does not round to 24-bits 11953instruct convI2F_reg(regF dst, stackSlotI src) %{ 11954 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11955 match(Set dst (ConvI2F src)); 11956 format %{ "FILD $src\n\t" 11957 "FSTP $dst" %} 11958 opcode(0xDB, 0x0); /* DB /0 */ 11959 ins_encode( Push_Mem_I(src), 11960 Pop_Reg_F(dst)); 11961 ins_pipe( fpu_reg_mem ); 11962%} 11963 11964// This instruction does not round to 24-bits 11965instruct convI2F_mem(regF dst, memory mem) %{ 11966 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11967 match(Set dst (ConvI2F (LoadI mem))); 11968 format %{ "FILD $mem\n\t" 11969 "FSTP $dst" %} 11970 opcode(0xDB); /* DB /0 */ 11971 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11972 Pop_Reg_F(dst)); 11973 ins_pipe( fpu_reg_mem ); 11974%} 11975 11976// Convert an int to a float in xmm; no rounding step needed. 11977instruct convI2X_reg(regX dst, eRegI src) %{ 11978 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11979 match(Set dst (ConvI2F src)); 11980 format %{ "CVTSI2SS $dst, $src" %} 11981 11982 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ 11983 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11984 ins_pipe( pipe_slow ); 11985%} 11986 11987 instruct convXI2X_reg(regX dst, eRegI src) 11988%{ 11989 predicate( UseSSE>=2 && UseXmmI2F ); 11990 match(Set dst (ConvI2F src)); 11991 11992 format %{ "MOVD $dst,$src\n\t" 11993 "CVTDQ2PS $dst,$dst\t# i2f" %} 11994 ins_encode %{ 11995 __ movdl($dst$$XMMRegister, $src$$Register); 11996 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11997 %} 11998 ins_pipe(pipe_slow); // XXX 11999%} 12000 12001instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ 12002 match(Set dst (ConvI2L src)); 12003 effect(KILL cr); 12004 ins_cost(375); 12005 format %{ "MOV $dst.lo,$src\n\t" 12006 "MOV $dst.hi,$src\n\t" 12007 "SAR $dst.hi,31" %} 12008 ins_encode(convert_int_long(dst,src)); 12009 ins_pipe( ialu_reg_reg_long ); 12010%} 12011 12012// Zero-extend convert int to long 12013instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{ 12014 match(Set dst (AndL (ConvI2L src) mask) ); 12015 effect( KILL flags ); 12016 ins_cost(250); 12017 format %{ "MOV $dst.lo,$src\n\t" 12018 "XOR $dst.hi,$dst.hi" %} 12019 opcode(0x33); // XOR 12020 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 12021 ins_pipe( ialu_reg_reg_long ); 12022%} 12023 12024// Zero-extend long 12025instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 12026 match(Set dst (AndL src mask) ); 12027 effect( KILL flags ); 12028 ins_cost(250); 12029 format %{ "MOV $dst.lo,$src.lo\n\t" 12030 "XOR $dst.hi,$dst.hi\n\t" %} 12031 opcode(0x33); // XOR 12032 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 12033 ins_pipe( ialu_reg_reg_long ); 12034%} 12035 12036instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 12037 predicate (UseSSE<=1); 12038 match(Set dst (ConvL2D src)); 12039 effect( KILL cr ); 12040 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 12041 "PUSH $src.lo\n\t" 12042 "FILD ST,[ESP + #0]\n\t" 12043 "ADD ESP,8\n\t" 12044 "FSTP_D $dst\t# D-round" %} 12045 opcode(0xDF, 0x5); /* DF /5 */ 12046 ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 12047 ins_pipe( pipe_slow ); 12048%} 12049 12050instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 12051 predicate (UseSSE>=2); 12052 match(Set dst (ConvL2D src)); 12053 effect( KILL cr ); 12054 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 12055 "PUSH $src.lo\n\t" 12056 "FILD_D [ESP]\n\t" 12057 "FSTP_D [ESP]\n\t" 12058 "MOVSD $dst,[ESP]\n\t" 12059 "ADD ESP,8" %} 12060 opcode(0xDF, 0x5); /* DF /5 */ 12061 ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 12062 ins_pipe( pipe_slow ); 12063%} 12064 12065instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 12066 predicate (UseSSE>=1); 12067 match(Set dst (ConvL2F src)); 12068 effect( KILL cr ); 12069 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 12070 "PUSH $src.lo\n\t" 12071 "FILD_D [ESP]\n\t" 12072 "FSTP_S [ESP]\n\t" 12073 "MOVSS $dst,[ESP]\n\t" 12074 "ADD ESP,8" %} 12075 opcode(0xDF, 0x5); /* DF /5 */ 12076 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 12077 ins_pipe( pipe_slow ); 12078%} 12079 12080instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 12081 match(Set dst (ConvL2F src)); 12082 effect( KILL cr ); 12083 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 12084 "PUSH $src.lo\n\t" 12085 "FILD ST,[ESP + #0]\n\t" 12086 "ADD ESP,8\n\t" 12087 "FSTP_S $dst\t# F-round" %} 12088 opcode(0xDF, 0x5); /* DF /5 */ 12089 ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 12090 ins_pipe( pipe_slow ); 12091%} 12092 12093instruct convL2I_reg( eRegI dst, eRegL src ) %{ 12094 match(Set dst (ConvL2I src)); 12095 effect( DEF dst, USE src ); 12096 format %{ "MOV $dst,$src.lo" %} 12097 ins_encode(enc_CopyL_Lo(dst,src)); 12098 ins_pipe( ialu_reg_reg ); 12099%} 12100 12101 12102instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ 12103 match(Set dst (MoveF2I src)); 12104 effect( DEF dst, USE src ); 12105 ins_cost(100); 12106 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 12107 opcode(0x8B); 12108 ins_encode( OpcP, RegMem(dst,src)); 12109 ins_pipe( ialu_reg_mem ); 12110%} 12111 12112instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 12113 predicate(UseSSE==0); 12114 match(Set dst (MoveF2I src)); 12115 effect( DEF dst, USE src ); 12116 12117 ins_cost(125); 12118 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 12119 ins_encode( Pop_Mem_Reg_F(dst, src) ); 12120 ins_pipe( fpu_mem_reg ); 12121%} 12122 12123instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 12124 predicate(UseSSE>=1); 12125 match(Set dst (MoveF2I src)); 12126 effect( DEF dst, USE src ); 12127 12128 ins_cost(95); 12129 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 12130 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); 12131 ins_pipe( pipe_slow ); 12132%} 12133 12134instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 12135 predicate(UseSSE>=2); 12136 match(Set dst (MoveF2I src)); 12137 effect( DEF dst, USE src ); 12138 ins_cost(85); 12139 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 12140 ins_encode( MovX2I_reg(dst, src)); 12141 ins_pipe( pipe_slow ); 12142%} 12143 12144instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ 12145 match(Set dst (MoveI2F src)); 12146 effect( DEF dst, USE src ); 12147 12148 ins_cost(100); 12149 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 12150 opcode(0x89); 12151 ins_encode( OpcPRegSS( dst, src ) ); 12152 ins_pipe( ialu_mem_reg ); 12153%} 12154 12155 12156instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 12157 predicate(UseSSE==0); 12158 match(Set dst (MoveI2F src)); 12159 effect(DEF dst, USE src); 12160 12161 ins_cost(125); 12162 format %{ "FLD_S $src\n\t" 12163 "FSTP $dst\t# MoveI2F_stack_reg" %} 12164 opcode(0xD9); /* D9 /0, FLD m32real */ 12165 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 12166 Pop_Reg_F(dst) ); 12167 ins_pipe( fpu_reg_mem ); 12168%} 12169 12170instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 12171 predicate(UseSSE>=1); 12172 match(Set dst (MoveI2F src)); 12173 effect( DEF dst, USE src ); 12174 12175 ins_cost(95); 12176 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 12177 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 12178 ins_pipe( pipe_slow ); 12179%} 12180 12181instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 12182 predicate(UseSSE>=2); 12183 match(Set dst (MoveI2F src)); 12184 effect( DEF dst, USE src ); 12185 12186 ins_cost(85); 12187 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 12188 ins_encode( MovI2X_reg(dst, src) ); 12189 ins_pipe( pipe_slow ); 12190%} 12191 12192instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 12193 match(Set dst (MoveD2L src)); 12194 effect(DEF dst, USE src); 12195 12196 ins_cost(250); 12197 format %{ "MOV $dst.lo,$src\n\t" 12198 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 12199 opcode(0x8B, 0x8B); 12200 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 12201 ins_pipe( ialu_mem_long_reg ); 12202%} 12203 12204instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 12205 predicate(UseSSE<=1); 12206 match(Set dst (MoveD2L src)); 12207 effect(DEF dst, USE src); 12208 12209 ins_cost(125); 12210 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 12211 ins_encode( Pop_Mem_Reg_D(dst, src) ); 12212 ins_pipe( fpu_mem_reg ); 12213%} 12214 12215instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 12216 predicate(UseSSE>=2); 12217 match(Set dst (MoveD2L src)); 12218 effect(DEF dst, USE src); 12219 ins_cost(95); 12220 12221 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 12222 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); 12223 ins_pipe( pipe_slow ); 12224%} 12225 12226instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 12227 predicate(UseSSE>=2); 12228 match(Set dst (MoveD2L src)); 12229 effect(DEF dst, USE src, TEMP tmp); 12230 ins_cost(85); 12231 format %{ "MOVD $dst.lo,$src\n\t" 12232 "PSHUFLW $tmp,$src,0x4E\n\t" 12233 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 12234 ins_encode( MovXD2L_reg(dst, src, tmp) ); 12235 ins_pipe( pipe_slow ); 12236%} 12237 12238instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 12239 match(Set dst (MoveL2D src)); 12240 effect(DEF dst, USE src); 12241 12242 ins_cost(200); 12243 format %{ "MOV $dst,$src.lo\n\t" 12244 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 12245 opcode(0x89, 0x89); 12246 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 12247 ins_pipe( ialu_mem_long_reg ); 12248%} 12249 12250 12251instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 12252 predicate(UseSSE<=1); 12253 match(Set dst (MoveL2D src)); 12254 effect(DEF dst, USE src); 12255 ins_cost(125); 12256 12257 format %{ "FLD_D $src\n\t" 12258 "FSTP $dst\t# MoveL2D_stack_reg" %} 12259 opcode(0xDD); /* DD /0, FLD m64real */ 12260 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 12261 Pop_Reg_D(dst) ); 12262 ins_pipe( fpu_reg_mem ); 12263%} 12264 12265 12266instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 12267 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 12268 match(Set dst (MoveL2D src)); 12269 effect(DEF dst, USE src); 12270 12271 ins_cost(95); 12272 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 12273 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 12274 ins_pipe( pipe_slow ); 12275%} 12276 12277instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 12278 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 12279 match(Set dst (MoveL2D src)); 12280 effect(DEF dst, USE src); 12281 12282 ins_cost(95); 12283 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 12284 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); 12285 ins_pipe( pipe_slow ); 12286%} 12287 12288instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 12289 predicate(UseSSE>=2); 12290 match(Set dst (MoveL2D src)); 12291 effect(TEMP dst, USE src, TEMP tmp); 12292 ins_cost(85); 12293 format %{ "MOVD $dst,$src.lo\n\t" 12294 "MOVD $tmp,$src.hi\n\t" 12295 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 12296 ins_encode( MovL2XD_reg(dst, src, tmp) ); 12297 ins_pipe( pipe_slow ); 12298%} 12299 12300// Replicate scalar to packed byte (1 byte) values in xmm 12301instruct Repl8B_reg(regXD dst, regXD src) %{ 12302 predicate(UseSSE>=2); 12303 match(Set dst (Replicate8B src)); 12304 format %{ "MOVDQA $dst,$src\n\t" 12305 "PUNPCKLBW $dst,$dst\n\t" 12306 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 12307 ins_encode( pshufd_8x8(dst, src)); 12308 ins_pipe( pipe_slow ); 12309%} 12310 12311// Replicate scalar to packed byte (1 byte) values in xmm 12312instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 12313 predicate(UseSSE>=2); 12314 match(Set dst (Replicate8B src)); 12315 format %{ "MOVD $dst,$src\n\t" 12316 "PUNPCKLBW $dst,$dst\n\t" 12317 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 12318 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); 12319 ins_pipe( pipe_slow ); 12320%} 12321 12322// Replicate scalar zero to packed byte (1 byte) values in xmm 12323instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 12324 predicate(UseSSE>=2); 12325 match(Set dst (Replicate8B zero)); 12326 format %{ "PXOR $dst,$dst\t! replicate8B" %} 12327 ins_encode( pxor(dst, dst)); 12328 ins_pipe( fpu_reg_reg ); 12329%} 12330 12331// Replicate scalar to packed shore (2 byte) values in xmm 12332instruct Repl4S_reg(regXD dst, regXD src) %{ 12333 predicate(UseSSE>=2); 12334 match(Set dst (Replicate4S src)); 12335 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 12336 ins_encode( pshufd_4x16(dst, src)); 12337 ins_pipe( fpu_reg_reg ); 12338%} 12339 12340// Replicate scalar to packed shore (2 byte) values in xmm 12341instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 12342 predicate(UseSSE>=2); 12343 match(Set dst (Replicate4S src)); 12344 format %{ "MOVD $dst,$src\n\t" 12345 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 12346 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 12347 ins_pipe( fpu_reg_reg ); 12348%} 12349 12350// Replicate scalar zero to packed short (2 byte) values in xmm 12351instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 12352 predicate(UseSSE>=2); 12353 match(Set dst (Replicate4S zero)); 12354 format %{ "PXOR $dst,$dst\t! replicate4S" %} 12355 ins_encode( pxor(dst, dst)); 12356 ins_pipe( fpu_reg_reg ); 12357%} 12358 12359// Replicate scalar to packed char (2 byte) values in xmm 12360instruct Repl4C_reg(regXD dst, regXD src) %{ 12361 predicate(UseSSE>=2); 12362 match(Set dst (Replicate4C src)); 12363 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 12364 ins_encode( pshufd_4x16(dst, src)); 12365 ins_pipe( fpu_reg_reg ); 12366%} 12367 12368// Replicate scalar to packed char (2 byte) values in xmm 12369instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 12370 predicate(UseSSE>=2); 12371 match(Set dst (Replicate4C src)); 12372 format %{ "MOVD $dst,$src\n\t" 12373 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 12374 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 12375 ins_pipe( fpu_reg_reg ); 12376%} 12377 12378// Replicate scalar zero to packed char (2 byte) values in xmm 12379instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 12380 predicate(UseSSE>=2); 12381 match(Set dst (Replicate4C zero)); 12382 format %{ "PXOR $dst,$dst\t! replicate4C" %} 12383 ins_encode( pxor(dst, dst)); 12384 ins_pipe( fpu_reg_reg ); 12385%} 12386 12387// Replicate scalar to packed integer (4 byte) values in xmm 12388instruct Repl2I_reg(regXD dst, regXD src) %{ 12389 predicate(UseSSE>=2); 12390 match(Set dst (Replicate2I src)); 12391 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 12392 ins_encode( pshufd(dst, src, 0x00)); 12393 ins_pipe( fpu_reg_reg ); 12394%} 12395 12396// Replicate scalar to packed integer (4 byte) values in xmm 12397instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 12398 predicate(UseSSE>=2); 12399 match(Set dst (Replicate2I src)); 12400 format %{ "MOVD $dst,$src\n\t" 12401 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 12402 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); 12403 ins_pipe( fpu_reg_reg ); 12404%} 12405 12406// Replicate scalar zero to packed integer (2 byte) values in xmm 12407instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 12408 predicate(UseSSE>=2); 12409 match(Set dst (Replicate2I zero)); 12410 format %{ "PXOR $dst,$dst\t! replicate2I" %} 12411 ins_encode( pxor(dst, dst)); 12412 ins_pipe( fpu_reg_reg ); 12413%} 12414 12415// Replicate scalar to packed single precision floating point values in xmm 12416instruct Repl2F_reg(regXD dst, regXD src) %{ 12417 predicate(UseSSE>=2); 12418 match(Set dst (Replicate2F src)); 12419 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12420 ins_encode( pshufd(dst, src, 0xe0)); 12421 ins_pipe( fpu_reg_reg ); 12422%} 12423 12424// Replicate scalar to packed single precision floating point values in xmm 12425instruct Repl2F_regX(regXD dst, regX src) %{ 12426 predicate(UseSSE>=2); 12427 match(Set dst (Replicate2F src)); 12428 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12429 ins_encode( pshufd(dst, src, 0xe0)); 12430 ins_pipe( fpu_reg_reg ); 12431%} 12432 12433// Replicate scalar to packed single precision floating point values in xmm 12434instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 12435 predicate(UseSSE>=2); 12436 match(Set dst (Replicate2F zero)); 12437 format %{ "PXOR $dst,$dst\t! replicate2F" %} 12438 ins_encode( pxor(dst, dst)); 12439 ins_pipe( fpu_reg_reg ); 12440%} 12441 12442// ======================================================================= 12443// fast clearing of an array 12444instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 12445 match(Set dummy (ClearArray cnt base)); 12446 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 12447 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t" 12448 "XOR EAX,EAX\n\t" 12449 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 12450 opcode(0,0x4); 12451 ins_encode( Opcode(0xD1), RegOpc(ECX), 12452 OpcRegReg(0x33,EAX,EAX), 12453 Opcode(0xF3), Opcode(0xAB) ); 12454 ins_pipe( pipe_slow ); 12455%} 12456 12457instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2, 12458 eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{ 12459 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 12460 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 12461 12462 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %} 12463 ins_encode %{ 12464 __ string_compare($str1$$Register, $str2$$Register, 12465 $cnt1$$Register, $cnt2$$Register, $result$$Register, 12466 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 12467 %} 12468 ins_pipe( pipe_slow ); 12469%} 12470 12471// fast string equals 12472instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 12473 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 12474 match(Set result (StrEquals (Binary str1 str2) cnt)); 12475 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 12476 12477 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12478 ins_encode %{ 12479 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 12480 $cnt$$Register, $result$$Register, $tmp3$$Register, 12481 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 12482 %} 12483 ins_pipe( pipe_slow ); 12484%} 12485 12486instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12487 eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12488 predicate(UseSSE42Intrinsics); 12489 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12490 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr); 12491 12492 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp2, $tmp1" %} 12493 ins_encode %{ 12494 __ string_indexof($str1$$Register, $str2$$Register, 12495 $cnt1$$Register, $cnt2$$Register, $result$$Register, 12496 $tmp1$$XMMRegister, $tmp2$$Register); 12497 %} 12498 ins_pipe( pipe_slow ); 12499%} 12500 12501// fast array equals 12502instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12503 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12504%{ 12505 match(Set result (AryEq ary1 ary2)); 12506 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12507 //ins_cost(300); 12508 12509 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12510 ins_encode %{ 12511 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 12512 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12513 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 12514 %} 12515 ins_pipe( pipe_slow ); 12516%} 12517 12518//----------Control Flow Instructions------------------------------------------ 12519// Signed compare Instructions 12520instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{ 12521 match(Set cr (CmpI op1 op2)); 12522 effect( DEF cr, USE op1, USE op2 ); 12523 format %{ "CMP $op1,$op2" %} 12524 opcode(0x3B); /* Opcode 3B /r */ 12525 ins_encode( OpcP, RegReg( op1, op2) ); 12526 ins_pipe( ialu_cr_reg_reg ); 12527%} 12528 12529instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{ 12530 match(Set cr (CmpI op1 op2)); 12531 effect( DEF cr, USE op1 ); 12532 format %{ "CMP $op1,$op2" %} 12533 opcode(0x81,0x07); /* Opcode 81 /7 */ 12534 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12535 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12536 ins_pipe( ialu_cr_reg_imm ); 12537%} 12538 12539// Cisc-spilled version of cmpI_eReg 12540instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{ 12541 match(Set cr (CmpI op1 (LoadI op2))); 12542 12543 format %{ "CMP $op1,$op2" %} 12544 ins_cost(500); 12545 opcode(0x3B); /* Opcode 3B /r */ 12546 ins_encode( OpcP, RegMem( op1, op2) ); 12547 ins_pipe( ialu_cr_reg_mem ); 12548%} 12549 12550instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{ 12551 match(Set cr (CmpI src zero)); 12552 effect( DEF cr, USE src ); 12553 12554 format %{ "TEST $src,$src" %} 12555 opcode(0x85); 12556 ins_encode( OpcP, RegReg( src, src ) ); 12557 ins_pipe( ialu_cr_reg_imm ); 12558%} 12559 12560instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{ 12561 match(Set cr (CmpI (AndI src con) zero)); 12562 12563 format %{ "TEST $src,$con" %} 12564 opcode(0xF7,0x00); 12565 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12566 ins_pipe( ialu_cr_reg_imm ); 12567%} 12568 12569instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{ 12570 match(Set cr (CmpI (AndI src mem) zero)); 12571 12572 format %{ "TEST $src,$mem" %} 12573 opcode(0x85); 12574 ins_encode( OpcP, RegMem( src, mem ) ); 12575 ins_pipe( ialu_cr_reg_mem ); 12576%} 12577 12578// Unsigned compare Instructions; really, same as signed except they 12579// produce an eFlagsRegU instead of eFlagsReg. 12580instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{ 12581 match(Set cr (CmpU op1 op2)); 12582 12583 format %{ "CMPu $op1,$op2" %} 12584 opcode(0x3B); /* Opcode 3B /r */ 12585 ins_encode( OpcP, RegReg( op1, op2) ); 12586 ins_pipe( ialu_cr_reg_reg ); 12587%} 12588 12589instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{ 12590 match(Set cr (CmpU op1 op2)); 12591 12592 format %{ "CMPu $op1,$op2" %} 12593 opcode(0x81,0x07); /* Opcode 81 /7 */ 12594 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12595 ins_pipe( ialu_cr_reg_imm ); 12596%} 12597 12598// // Cisc-spilled version of cmpU_eReg 12599instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{ 12600 match(Set cr (CmpU op1 (LoadI op2))); 12601 12602 format %{ "CMPu $op1,$op2" %} 12603 ins_cost(500); 12604 opcode(0x3B); /* Opcode 3B /r */ 12605 ins_encode( OpcP, RegMem( op1, op2) ); 12606 ins_pipe( ialu_cr_reg_mem ); 12607%} 12608 12609// // Cisc-spilled version of cmpU_eReg 12610//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{ 12611// match(Set cr (CmpU (LoadI op1) op2)); 12612// 12613// format %{ "CMPu $op1,$op2" %} 12614// ins_cost(500); 12615// opcode(0x39); /* Opcode 39 /r */ 12616// ins_encode( OpcP, RegMem( op1, op2) ); 12617//%} 12618 12619instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{ 12620 match(Set cr (CmpU src zero)); 12621 12622 format %{ "TESTu $src,$src" %} 12623 opcode(0x85); 12624 ins_encode( OpcP, RegReg( src, src ) ); 12625 ins_pipe( ialu_cr_reg_imm ); 12626%} 12627 12628// Unsigned pointer compare Instructions 12629instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12630 match(Set cr (CmpP op1 op2)); 12631 12632 format %{ "CMPu $op1,$op2" %} 12633 opcode(0x3B); /* Opcode 3B /r */ 12634 ins_encode( OpcP, RegReg( op1, op2) ); 12635 ins_pipe( ialu_cr_reg_reg ); 12636%} 12637 12638instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12639 match(Set cr (CmpP op1 op2)); 12640 12641 format %{ "CMPu $op1,$op2" %} 12642 opcode(0x81,0x07); /* Opcode 81 /7 */ 12643 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12644 ins_pipe( ialu_cr_reg_imm ); 12645%} 12646 12647// // Cisc-spilled version of cmpP_eReg 12648instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12649 match(Set cr (CmpP op1 (LoadP op2))); 12650 12651 format %{ "CMPu $op1,$op2" %} 12652 ins_cost(500); 12653 opcode(0x3B); /* Opcode 3B /r */ 12654 ins_encode( OpcP, RegMem( op1, op2) ); 12655 ins_pipe( ialu_cr_reg_mem ); 12656%} 12657 12658// // Cisc-spilled version of cmpP_eReg 12659//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12660// match(Set cr (CmpP (LoadP op1) op2)); 12661// 12662// format %{ "CMPu $op1,$op2" %} 12663// ins_cost(500); 12664// opcode(0x39); /* Opcode 39 /r */ 12665// ins_encode( OpcP, RegMem( op1, op2) ); 12666//%} 12667 12668// Compare raw pointer (used in out-of-heap check). 12669// Only works because non-oop pointers must be raw pointers 12670// and raw pointers have no anti-dependencies. 12671instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12672 predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() ); 12673 match(Set cr (CmpP op1 (LoadP op2))); 12674 12675 format %{ "CMPu $op1,$op2" %} 12676 opcode(0x3B); /* Opcode 3B /r */ 12677 ins_encode( OpcP, RegMem( op1, op2) ); 12678 ins_pipe( ialu_cr_reg_mem ); 12679%} 12680 12681// 12682// This will generate a signed flags result. This should be ok 12683// since any compare to a zero should be eq/neq. 12684instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12685 match(Set cr (CmpP src zero)); 12686 12687 format %{ "TEST $src,$src" %} 12688 opcode(0x85); 12689 ins_encode( OpcP, RegReg( src, src ) ); 12690 ins_pipe( ialu_cr_reg_imm ); 12691%} 12692 12693// Cisc-spilled version of testP_reg 12694// This will generate a signed flags result. This should be ok 12695// since any compare to a zero should be eq/neq. 12696instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12697 match(Set cr (CmpP (LoadP op) zero)); 12698 12699 format %{ "TEST $op,0xFFFFFFFF" %} 12700 ins_cost(500); 12701 opcode(0xF7); /* Opcode F7 /0 */ 12702 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12703 ins_pipe( ialu_cr_reg_imm ); 12704%} 12705 12706// Yanked all unsigned pointer compare operations. 12707// Pointer compares are done with CmpP which is already unsigned. 12708 12709//----------Max and Min-------------------------------------------------------- 12710// Min Instructions 12711//// 12712// *** Min and Max using the conditional move are slower than the 12713// *** branch version on a Pentium III. 12714// // Conditional move for min 12715//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ 12716// effect( USE_DEF op2, USE op1, USE cr ); 12717// format %{ "CMOVlt $op2,$op1\t! min" %} 12718// opcode(0x4C,0x0F); 12719// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12720// ins_pipe( pipe_cmov_reg ); 12721//%} 12722// 12723//// Min Register with Register (P6 version) 12724//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ 12725// predicate(VM_Version::supports_cmov() ); 12726// match(Set op2 (MinI op1 op2)); 12727// ins_cost(200); 12728// expand %{ 12729// eFlagsReg cr; 12730// compI_eReg(cr,op1,op2); 12731// cmovI_reg_lt(op2,op1,cr); 12732// %} 12733//%} 12734 12735// Min Register with Register (generic version) 12736instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ 12737 match(Set dst (MinI dst src)); 12738 effect(KILL flags); 12739 ins_cost(300); 12740 12741 format %{ "MIN $dst,$src" %} 12742 opcode(0xCC); 12743 ins_encode( min_enc(dst,src) ); 12744 ins_pipe( pipe_slow ); 12745%} 12746 12747// Max Register with Register 12748// *** Min and Max using the conditional move are slower than the 12749// *** branch version on a Pentium III. 12750// // Conditional move for max 12751//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ 12752// effect( USE_DEF op2, USE op1, USE cr ); 12753// format %{ "CMOVgt $op2,$op1\t! max" %} 12754// opcode(0x4F,0x0F); 12755// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12756// ins_pipe( pipe_cmov_reg ); 12757//%} 12758// 12759// // Max Register with Register (P6 version) 12760//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ 12761// predicate(VM_Version::supports_cmov() ); 12762// match(Set op2 (MaxI op1 op2)); 12763// ins_cost(200); 12764// expand %{ 12765// eFlagsReg cr; 12766// compI_eReg(cr,op1,op2); 12767// cmovI_reg_gt(op2,op1,cr); 12768// %} 12769//%} 12770 12771// Max Register with Register (generic version) 12772instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ 12773 match(Set dst (MaxI dst src)); 12774 effect(KILL flags); 12775 ins_cost(300); 12776 12777 format %{ "MAX $dst,$src" %} 12778 opcode(0xCC); 12779 ins_encode( max_enc(dst,src) ); 12780 ins_pipe( pipe_slow ); 12781%} 12782 12783// ============================================================================ 12784// Branch Instructions 12785// Jump Table 12786instruct jumpXtnd(eRegI switch_val) %{ 12787 match(Jump switch_val); 12788 ins_cost(350); 12789 12790 format %{ "JMP [table_base](,$switch_val,1)\n\t" %} 12791 12792 ins_encode %{ 12793 address table_base = __ address_table_constant(_index2label); 12794 12795 // Jump to Address(table_base + switch_reg) 12796 InternalAddress table(table_base); 12797 Address index(noreg, $switch_val$$Register, Address::times_1); 12798 __ jump(ArrayAddress(table, index)); 12799 %} 12800 ins_pc_relative(1); 12801 ins_pipe(pipe_jmp); 12802%} 12803 12804// Jump Direct - Label defines a relative address from JMP+1 12805instruct jmpDir(label labl) %{ 12806 match(Goto); 12807 effect(USE labl); 12808 12809 ins_cost(300); 12810 format %{ "JMP $labl" %} 12811 size(5); 12812 opcode(0xE9); 12813 ins_encode( OpcP, Lbl( labl ) ); 12814 ins_pipe( pipe_jmp ); 12815 ins_pc_relative(1); 12816%} 12817 12818// Jump Direct Conditional - Label defines a relative address from Jcc+1 12819instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12820 match(If cop cr); 12821 effect(USE labl); 12822 12823 ins_cost(300); 12824 format %{ "J$cop $labl" %} 12825 size(6); 12826 opcode(0x0F, 0x80); 12827 ins_encode( Jcc( cop, labl) ); 12828 ins_pipe( pipe_jcc ); 12829 ins_pc_relative(1); 12830%} 12831 12832// Jump Direct Conditional - Label defines a relative address from Jcc+1 12833instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12834 match(CountedLoopEnd cop cr); 12835 effect(USE labl); 12836 12837 ins_cost(300); 12838 format %{ "J$cop $labl\t# Loop end" %} 12839 size(6); 12840 opcode(0x0F, 0x80); 12841 ins_encode( Jcc( cop, labl) ); 12842 ins_pipe( pipe_jcc ); 12843 ins_pc_relative(1); 12844%} 12845 12846// Jump Direct Conditional - Label defines a relative address from Jcc+1 12847instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12848 match(CountedLoopEnd cop cmp); 12849 effect(USE labl); 12850 12851 ins_cost(300); 12852 format %{ "J$cop,u $labl\t# Loop end" %} 12853 size(6); 12854 opcode(0x0F, 0x80); 12855 ins_encode( Jcc( cop, labl) ); 12856 ins_pipe( pipe_jcc ); 12857 ins_pc_relative(1); 12858%} 12859 12860instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12861 match(CountedLoopEnd cop cmp); 12862 effect(USE labl); 12863 12864 ins_cost(200); 12865 format %{ "J$cop,u $labl\t# Loop end" %} 12866 size(6); 12867 opcode(0x0F, 0x80); 12868 ins_encode( Jcc( cop, labl) ); 12869 ins_pipe( pipe_jcc ); 12870 ins_pc_relative(1); 12871%} 12872 12873// Jump Direct Conditional - using unsigned comparison 12874instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12875 match(If cop cmp); 12876 effect(USE labl); 12877 12878 ins_cost(300); 12879 format %{ "J$cop,u $labl" %} 12880 size(6); 12881 opcode(0x0F, 0x80); 12882 ins_encode(Jcc(cop, labl)); 12883 ins_pipe(pipe_jcc); 12884 ins_pc_relative(1); 12885%} 12886 12887instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12888 match(If cop cmp); 12889 effect(USE labl); 12890 12891 ins_cost(200); 12892 format %{ "J$cop,u $labl" %} 12893 size(6); 12894 opcode(0x0F, 0x80); 12895 ins_encode(Jcc(cop, labl)); 12896 ins_pipe(pipe_jcc); 12897 ins_pc_relative(1); 12898%} 12899 12900instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12901 match(If cop cmp); 12902 effect(USE labl); 12903 12904 ins_cost(200); 12905 format %{ $$template 12906 if ($cop$$cmpcode == Assembler::notEqual) { 12907 $$emit$$"JP,u $labl\n\t" 12908 $$emit$$"J$cop,u $labl" 12909 } else { 12910 $$emit$$"JP,u done\n\t" 12911 $$emit$$"J$cop,u $labl\n\t" 12912 $$emit$$"done:" 12913 } 12914 %} 12915 size(12); 12916 opcode(0x0F, 0x80); 12917 ins_encode %{ 12918 Label* l = $labl$$label; 12919 $$$emit8$primary; 12920 emit_cc(cbuf, $secondary, Assembler::parity); 12921 int parity_disp = -1; 12922 bool ok = false; 12923 if ($cop$$cmpcode == Assembler::notEqual) { 12924 // the two jumps 6 bytes apart so the jump distances are too 12925 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; 12926 } else if ($cop$$cmpcode == Assembler::equal) { 12927 parity_disp = 6; 12928 ok = true; 12929 } else { 12930 ShouldNotReachHere(); 12931 } 12932 emit_d32(cbuf, parity_disp); 12933 $$$emit8$primary; 12934 emit_cc(cbuf, $secondary, $cop$$cmpcode); 12935 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; 12936 emit_d32(cbuf, disp); 12937 %} 12938 ins_pipe(pipe_jcc); 12939 ins_pc_relative(1); 12940%} 12941 12942// ============================================================================ 12943// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12944// array for an instance of the superklass. Set a hidden internal cache on a 12945// hit (cache is checked with exposed code in gen_subtype_check()). Return 12946// NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12947instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12948 match(Set result (PartialSubtypeCheck sub super)); 12949 effect( KILL rcx, KILL cr ); 12950 12951 ins_cost(1100); // slightly larger than the next version 12952 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12953 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 12954 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12955 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12956 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12957 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12958 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12959 "miss:\t" %} 12960 12961 opcode(0x1); // Force a XOR of EDI 12962 ins_encode( enc_PartialSubtypeCheck() ); 12963 ins_pipe( pipe_slow ); 12964%} 12965 12966instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12967 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12968 effect( KILL rcx, KILL result ); 12969 12970 ins_cost(1000); 12971 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12972 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 12973 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12974 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12975 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12976 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12977 "miss:\t" %} 12978 12979 opcode(0x0); // No need to XOR EDI 12980 ins_encode( enc_PartialSubtypeCheck() ); 12981 ins_pipe( pipe_slow ); 12982%} 12983 12984// ============================================================================ 12985// Branch Instructions -- short offset versions 12986// 12987// These instructions are used to replace jumps of a long offset (the default 12988// match) with jumps of a shorter offset. These instructions are all tagged 12989// with the ins_short_branch attribute, which causes the ADLC to suppress the 12990// match rules in general matching. Instead, the ADLC generates a conversion 12991// method in the MachNode which can be used to do in-place replacement of the 12992// long variant with the shorter variant. The compiler will determine if a 12993// branch can be taken by the is_short_branch_offset() predicate in the machine 12994// specific code section of the file. 12995 12996// Jump Direct - Label defines a relative address from JMP+1 12997instruct jmpDir_short(label labl) %{ 12998 match(Goto); 12999 effect(USE labl); 13000 13001 ins_cost(300); 13002 format %{ "JMP,s $labl" %} 13003 size(2); 13004 opcode(0xEB); 13005 ins_encode( OpcP, LblShort( labl ) ); 13006 ins_pipe( pipe_jmp ); 13007 ins_pc_relative(1); 13008 ins_short_branch(1); 13009%} 13010 13011// Jump Direct Conditional - Label defines a relative address from Jcc+1 13012instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 13013 match(If cop cr); 13014 effect(USE labl); 13015 13016 ins_cost(300); 13017 format %{ "J$cop,s $labl" %} 13018 size(2); 13019 opcode(0x70); 13020 ins_encode( JccShort( cop, labl) ); 13021 ins_pipe( pipe_jcc ); 13022 ins_pc_relative(1); 13023 ins_short_branch(1); 13024%} 13025 13026// Jump Direct Conditional - Label defines a relative address from Jcc+1 13027instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 13028 match(CountedLoopEnd cop cr); 13029 effect(USE labl); 13030 13031 ins_cost(300); 13032 format %{ "J$cop,s $labl\t# Loop end" %} 13033 size(2); 13034 opcode(0x70); 13035 ins_encode( JccShort( cop, labl) ); 13036 ins_pipe( pipe_jcc ); 13037 ins_pc_relative(1); 13038 ins_short_branch(1); 13039%} 13040 13041// Jump Direct Conditional - Label defines a relative address from Jcc+1 13042instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 13043 match(CountedLoopEnd cop cmp); 13044 effect(USE labl); 13045 13046 ins_cost(300); 13047 format %{ "J$cop,us $labl\t# Loop end" %} 13048 size(2); 13049 opcode(0x70); 13050 ins_encode( JccShort( cop, labl) ); 13051 ins_pipe( pipe_jcc ); 13052 ins_pc_relative(1); 13053 ins_short_branch(1); 13054%} 13055 13056instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 13057 match(CountedLoopEnd cop cmp); 13058 effect(USE labl); 13059 13060 ins_cost(300); 13061 format %{ "J$cop,us $labl\t# Loop end" %} 13062 size(2); 13063 opcode(0x70); 13064 ins_encode( JccShort( cop, labl) ); 13065 ins_pipe( pipe_jcc ); 13066 ins_pc_relative(1); 13067 ins_short_branch(1); 13068%} 13069 13070// Jump Direct Conditional - using unsigned comparison 13071instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 13072 match(If cop cmp); 13073 effect(USE labl); 13074 13075 ins_cost(300); 13076 format %{ "J$cop,us $labl" %} 13077 size(2); 13078 opcode(0x70); 13079 ins_encode( JccShort( cop, labl) ); 13080 ins_pipe( pipe_jcc ); 13081 ins_pc_relative(1); 13082 ins_short_branch(1); 13083%} 13084 13085instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 13086 match(If cop cmp); 13087 effect(USE labl); 13088 13089 ins_cost(300); 13090 format %{ "J$cop,us $labl" %} 13091 size(2); 13092 opcode(0x70); 13093 ins_encode( JccShort( cop, labl) ); 13094 ins_pipe( pipe_jcc ); 13095 ins_pc_relative(1); 13096 ins_short_branch(1); 13097%} 13098 13099instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 13100 match(If cop cmp); 13101 effect(USE labl); 13102 13103 ins_cost(300); 13104 format %{ $$template 13105 if ($cop$$cmpcode == Assembler::notEqual) { 13106 $$emit$$"JP,u,s $labl\n\t" 13107 $$emit$$"J$cop,u,s $labl" 13108 } else { 13109 $$emit$$"JP,u,s done\n\t" 13110 $$emit$$"J$cop,u,s $labl\n\t" 13111 $$emit$$"done:" 13112 } 13113 %} 13114 size(4); 13115 opcode(0x70); 13116 ins_encode %{ 13117 Label* l = $labl$$label; 13118 emit_cc(cbuf, $primary, Assembler::parity); 13119 int parity_disp = -1; 13120 if ($cop$$cmpcode == Assembler::notEqual) { 13121 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; 13122 } else if ($cop$$cmpcode == Assembler::equal) { 13123 parity_disp = 2; 13124 } else { 13125 ShouldNotReachHere(); 13126 } 13127 emit_d8(cbuf, parity_disp); 13128 emit_cc(cbuf, $primary, $cop$$cmpcode); 13129 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; 13130 emit_d8(cbuf, disp); 13131 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 13132 assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp"); 13133 %} 13134 ins_pipe(pipe_jcc); 13135 ins_pc_relative(1); 13136 ins_short_branch(1); 13137%} 13138 13139// ============================================================================ 13140// Long Compare 13141// 13142// Currently we hold longs in 2 registers. Comparing such values efficiently 13143// is tricky. The flavor of compare used depends on whether we are testing 13144// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 13145// The GE test is the negated LT test. The LE test can be had by commuting 13146// the operands (yielding a GE test) and then negating; negate again for the 13147// GT test. The EQ test is done by ORcc'ing the high and low halves, and the 13148// NE test is negated from that. 13149 13150// Due to a shortcoming in the ADLC, it mixes up expressions like: 13151// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 13152// difference between 'Y' and '0L'. The tree-matches for the CmpI sections 13153// are collapsed internally in the ADLC's dfa-gen code. The match for 13154// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 13155// foo match ends up with the wrong leaf. One fix is to not match both 13156// reg-reg and reg-zero forms of long-compare. This is unfortunate because 13157// both forms beat the trinary form of long-compare and both are very useful 13158// on Intel which has so few registers. 13159 13160// Manifest a CmpL result in an integer register. Very painful. 13161// This is the test to avoid. 13162instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 13163 match(Set dst (CmpL3 src1 src2)); 13164 effect( KILL flags ); 13165 ins_cost(1000); 13166 format %{ "XOR $dst,$dst\n\t" 13167 "CMP $src1.hi,$src2.hi\n\t" 13168 "JLT,s m_one\n\t" 13169 "JGT,s p_one\n\t" 13170 "CMP $src1.lo,$src2.lo\n\t" 13171 "JB,s m_one\n\t" 13172 "JEQ,s done\n" 13173 "p_one:\tINC $dst\n\t" 13174 "JMP,s done\n" 13175 "m_one:\tDEC $dst\n" 13176 "done:" %} 13177 ins_encode %{ 13178 Label p_one, m_one, done; 13179 __ xorptr($dst$$Register, $dst$$Register); 13180 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13181 __ jccb(Assembler::less, m_one); 13182 __ jccb(Assembler::greater, p_one); 13183 __ cmpl($src1$$Register, $src2$$Register); 13184 __ jccb(Assembler::below, m_one); 13185 __ jccb(Assembler::equal, done); 13186 __ bind(p_one); 13187 __ incrementl($dst$$Register); 13188 __ jmpb(done); 13189 __ bind(m_one); 13190 __ decrementl($dst$$Register); 13191 __ bind(done); 13192 %} 13193 ins_pipe( pipe_slow ); 13194%} 13195 13196//====== 13197// Manifest a CmpL result in the normal flags. Only good for LT or GE 13198// compares. Can be used for LE or GT compares by reversing arguments. 13199// NOT GOOD FOR EQ/NE tests. 13200instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13201 match( Set flags (CmpL src zero )); 13202 ins_cost(100); 13203 format %{ "TEST $src.hi,$src.hi" %} 13204 opcode(0x85); 13205 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13206 ins_pipe( ialu_cr_reg_reg ); 13207%} 13208 13209// Manifest a CmpL result in the normal flags. Only good for LT or GE 13210// compares. Can be used for LE or GT compares by reversing arguments. 13211// NOT GOOD FOR EQ/NE tests. 13212instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{ 13213 match( Set flags (CmpL src1 src2 )); 13214 effect( TEMP tmp ); 13215 ins_cost(300); 13216 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13217 "MOV $tmp,$src1.hi\n\t" 13218 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13219 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13220 ins_pipe( ialu_cr_reg_reg ); 13221%} 13222 13223// Long compares reg < zero/req OR reg >= zero/req. 13224// Just a wrapper for a normal branch, plus the predicate test. 13225instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13226 match(If cmp flags); 13227 effect(USE labl); 13228 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13229 expand %{ 13230 jmpCon(cmp,flags,labl); // JLT or JGE... 13231 %} 13232%} 13233 13234// Compare 2 longs and CMOVE longs. 13235instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13236 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13237 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13238 ins_cost(400); 13239 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13240 "CMOV$cmp $dst.hi,$src.hi" %} 13241 opcode(0x0F,0x40); 13242 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13243 ins_pipe( pipe_cmov_reg_long ); 13244%} 13245 13246instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13247 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13248 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13249 ins_cost(500); 13250 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13251 "CMOV$cmp $dst.hi,$src.hi" %} 13252 opcode(0x0F,0x40); 13253 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13254 ins_pipe( pipe_cmov_reg_long ); 13255%} 13256 13257// Compare 2 longs and CMOVE ints. 13258instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{ 13259 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13260 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13261 ins_cost(200); 13262 format %{ "CMOV$cmp $dst,$src" %} 13263 opcode(0x0F,0x40); 13264 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13265 ins_pipe( pipe_cmov_reg ); 13266%} 13267 13268instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{ 13269 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13270 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13271 ins_cost(250); 13272 format %{ "CMOV$cmp $dst,$src" %} 13273 opcode(0x0F,0x40); 13274 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13275 ins_pipe( pipe_cmov_mem ); 13276%} 13277 13278// Compare 2 longs and CMOVE ints. 13279instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13280 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13281 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13282 ins_cost(200); 13283 format %{ "CMOV$cmp $dst,$src" %} 13284 opcode(0x0F,0x40); 13285 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13286 ins_pipe( pipe_cmov_reg ); 13287%} 13288 13289// Compare 2 longs and CMOVE doubles 13290instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13291 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13292 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13293 ins_cost(200); 13294 expand %{ 13295 fcmovD_regS(cmp,flags,dst,src); 13296 %} 13297%} 13298 13299// Compare 2 longs and CMOVE doubles 13300instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 13301 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13302 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13303 ins_cost(200); 13304 expand %{ 13305 fcmovXD_regS(cmp,flags,dst,src); 13306 %} 13307%} 13308 13309instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13310 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13311 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13312 ins_cost(200); 13313 expand %{ 13314 fcmovF_regS(cmp,flags,dst,src); 13315 %} 13316%} 13317 13318instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 13319 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13320 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13321 ins_cost(200); 13322 expand %{ 13323 fcmovX_regS(cmp,flags,dst,src); 13324 %} 13325%} 13326 13327//====== 13328// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13329instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{ 13330 match( Set flags (CmpL src zero )); 13331 effect(TEMP tmp); 13332 ins_cost(200); 13333 format %{ "MOV $tmp,$src.lo\n\t" 13334 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13335 ins_encode( long_cmp_flags0( src, tmp ) ); 13336 ins_pipe( ialu_reg_reg_long ); 13337%} 13338 13339// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13340instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13341 match( Set flags (CmpL src1 src2 )); 13342 ins_cost(200+300); 13343 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13344 "JNE,s skip\n\t" 13345 "CMP $src1.hi,$src2.hi\n\t" 13346 "skip:\t" %} 13347 ins_encode( long_cmp_flags1( src1, src2 ) ); 13348 ins_pipe( ialu_cr_reg_reg ); 13349%} 13350 13351// Long compare reg == zero/reg OR reg != zero/reg 13352// Just a wrapper for a normal branch, plus the predicate test. 13353instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13354 match(If cmp flags); 13355 effect(USE labl); 13356 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13357 expand %{ 13358 jmpCon(cmp,flags,labl); // JEQ or JNE... 13359 %} 13360%} 13361 13362// Compare 2 longs and CMOVE longs. 13363instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13364 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13365 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13366 ins_cost(400); 13367 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13368 "CMOV$cmp $dst.hi,$src.hi" %} 13369 opcode(0x0F,0x40); 13370 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13371 ins_pipe( pipe_cmov_reg_long ); 13372%} 13373 13374instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13375 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13376 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13377 ins_cost(500); 13378 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13379 "CMOV$cmp $dst.hi,$src.hi" %} 13380 opcode(0x0F,0x40); 13381 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13382 ins_pipe( pipe_cmov_reg_long ); 13383%} 13384 13385// Compare 2 longs and CMOVE ints. 13386instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{ 13387 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13388 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13389 ins_cost(200); 13390 format %{ "CMOV$cmp $dst,$src" %} 13391 opcode(0x0F,0x40); 13392 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13393 ins_pipe( pipe_cmov_reg ); 13394%} 13395 13396instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{ 13397 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13398 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13399 ins_cost(250); 13400 format %{ "CMOV$cmp $dst,$src" %} 13401 opcode(0x0F,0x40); 13402 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13403 ins_pipe( pipe_cmov_mem ); 13404%} 13405 13406// Compare 2 longs and CMOVE ints. 13407instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13408 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13409 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13410 ins_cost(200); 13411 format %{ "CMOV$cmp $dst,$src" %} 13412 opcode(0x0F,0x40); 13413 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13414 ins_pipe( pipe_cmov_reg ); 13415%} 13416 13417// Compare 2 longs and CMOVE doubles 13418instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13419 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13420 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13421 ins_cost(200); 13422 expand %{ 13423 fcmovD_regS(cmp,flags,dst,src); 13424 %} 13425%} 13426 13427// Compare 2 longs and CMOVE doubles 13428instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 13429 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13430 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13431 ins_cost(200); 13432 expand %{ 13433 fcmovXD_regS(cmp,flags,dst,src); 13434 %} 13435%} 13436 13437instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13438 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13439 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13440 ins_cost(200); 13441 expand %{ 13442 fcmovF_regS(cmp,flags,dst,src); 13443 %} 13444%} 13445 13446instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 13447 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13448 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13449 ins_cost(200); 13450 expand %{ 13451 fcmovX_regS(cmp,flags,dst,src); 13452 %} 13453%} 13454 13455//====== 13456// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13457// Same as cmpL_reg_flags_LEGT except must negate src 13458instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{ 13459 match( Set flags (CmpL src zero )); 13460 effect( TEMP tmp ); 13461 ins_cost(300); 13462 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13463 "CMP $tmp,$src.lo\n\t" 13464 "SBB $tmp,$src.hi\n\t" %} 13465 ins_encode( long_cmp_flags3(src, tmp) ); 13466 ins_pipe( ialu_reg_reg_long ); 13467%} 13468 13469// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13470// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13471// requires a commuted test to get the same result. 13472instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{ 13473 match( Set flags (CmpL src1 src2 )); 13474 effect( TEMP tmp ); 13475 ins_cost(300); 13476 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13477 "MOV $tmp,$src2.hi\n\t" 13478 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13479 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13480 ins_pipe( ialu_cr_reg_reg ); 13481%} 13482 13483// Long compares reg < zero/req OR reg >= zero/req. 13484// Just a wrapper for a normal branch, plus the predicate test 13485instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13486 match(If cmp flags); 13487 effect(USE labl); 13488 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13489 ins_cost(300); 13490 expand %{ 13491 jmpCon(cmp,flags,labl); // JGT or JLE... 13492 %} 13493%} 13494 13495// Compare 2 longs and CMOVE longs. 13496instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13497 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13498 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13499 ins_cost(400); 13500 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13501 "CMOV$cmp $dst.hi,$src.hi" %} 13502 opcode(0x0F,0x40); 13503 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13504 ins_pipe( pipe_cmov_reg_long ); 13505%} 13506 13507instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13508 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13509 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13510 ins_cost(500); 13511 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13512 "CMOV$cmp $dst.hi,$src.hi+4" %} 13513 opcode(0x0F,0x40); 13514 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13515 ins_pipe( pipe_cmov_reg_long ); 13516%} 13517 13518// Compare 2 longs and CMOVE ints. 13519instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{ 13520 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13521 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13522 ins_cost(200); 13523 format %{ "CMOV$cmp $dst,$src" %} 13524 opcode(0x0F,0x40); 13525 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13526 ins_pipe( pipe_cmov_reg ); 13527%} 13528 13529instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{ 13530 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13531 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13532 ins_cost(250); 13533 format %{ "CMOV$cmp $dst,$src" %} 13534 opcode(0x0F,0x40); 13535 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13536 ins_pipe( pipe_cmov_mem ); 13537%} 13538 13539// Compare 2 longs and CMOVE ptrs. 13540instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13541 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13542 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13543 ins_cost(200); 13544 format %{ "CMOV$cmp $dst,$src" %} 13545 opcode(0x0F,0x40); 13546 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13547 ins_pipe( pipe_cmov_reg ); 13548%} 13549 13550// Compare 2 longs and CMOVE doubles 13551instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13552 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13553 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13554 ins_cost(200); 13555 expand %{ 13556 fcmovD_regS(cmp,flags,dst,src); 13557 %} 13558%} 13559 13560// Compare 2 longs and CMOVE doubles 13561instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 13562 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13563 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13564 ins_cost(200); 13565 expand %{ 13566 fcmovXD_regS(cmp,flags,dst,src); 13567 %} 13568%} 13569 13570instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13571 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13572 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13573 ins_cost(200); 13574 expand %{ 13575 fcmovF_regS(cmp,flags,dst,src); 13576 %} 13577%} 13578 13579 13580instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 13581 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13582 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13583 ins_cost(200); 13584 expand %{ 13585 fcmovX_regS(cmp,flags,dst,src); 13586 %} 13587%} 13588 13589 13590// ============================================================================ 13591// Procedure Call/Return Instructions 13592// Call Java Static Instruction 13593// Note: If this code changes, the corresponding ret_addr_offset() and 13594// compute_padding() functions will have to be adjusted. 13595instruct CallStaticJavaDirect(method meth) %{ 13596 match(CallStaticJava); 13597 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); 13598 effect(USE meth); 13599 13600 ins_cost(300); 13601 format %{ "CALL,static " %} 13602 opcode(0xE8); /* E8 cd */ 13603 ins_encode( pre_call_FPU, 13604 Java_Static_Call( meth ), 13605 call_epilog, 13606 post_call_FPU ); 13607 ins_pipe( pipe_slow ); 13608 ins_pc_relative(1); 13609 ins_alignment(4); 13610%} 13611 13612// Call Java Static Instruction (method handle version) 13613// Note: If this code changes, the corresponding ret_addr_offset() and 13614// compute_padding() functions will have to be adjusted. 13615instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{ 13616 match(CallStaticJava); 13617 predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); 13618 effect(USE meth); 13619 // EBP is saved by all callees (for interpreter stack correction). 13620 // We use it here for a similar purpose, in {preserve,restore}_SP. 13621 13622 ins_cost(300); 13623 format %{ "CALL,static/MethodHandle " %} 13624 opcode(0xE8); /* E8 cd */ 13625 ins_encode( pre_call_FPU, 13626 preserve_SP, 13627 Java_Static_Call( meth ), 13628 restore_SP, 13629 call_epilog, 13630 post_call_FPU ); 13631 ins_pipe( pipe_slow ); 13632 ins_pc_relative(1); 13633 ins_alignment(4); 13634%} 13635 13636// Call Java Dynamic Instruction 13637// Note: If this code changes, the corresponding ret_addr_offset() and 13638// compute_padding() functions will have to be adjusted. 13639instruct CallDynamicJavaDirect(method meth) %{ 13640 match(CallDynamicJava); 13641 effect(USE meth); 13642 13643 ins_cost(300); 13644 format %{ "MOV EAX,(oop)-1\n\t" 13645 "CALL,dynamic" %} 13646 opcode(0xE8); /* E8 cd */ 13647 ins_encode( pre_call_FPU, 13648 Java_Dynamic_Call( meth ), 13649 call_epilog, 13650 post_call_FPU ); 13651 ins_pipe( pipe_slow ); 13652 ins_pc_relative(1); 13653 ins_alignment(4); 13654%} 13655 13656// Call Runtime Instruction 13657instruct CallRuntimeDirect(method meth) %{ 13658 match(CallRuntime ); 13659 effect(USE meth); 13660 13661 ins_cost(300); 13662 format %{ "CALL,runtime " %} 13663 opcode(0xE8); /* E8 cd */ 13664 // Use FFREEs to clear entries in float stack 13665 ins_encode( pre_call_FPU, 13666 FFree_Float_Stack_All, 13667 Java_To_Runtime( meth ), 13668 post_call_FPU ); 13669 ins_pipe( pipe_slow ); 13670 ins_pc_relative(1); 13671%} 13672 13673// Call runtime without safepoint 13674instruct CallLeafDirect(method meth) %{ 13675 match(CallLeaf); 13676 effect(USE meth); 13677 13678 ins_cost(300); 13679 format %{ "CALL_LEAF,runtime " %} 13680 opcode(0xE8); /* E8 cd */ 13681 ins_encode( pre_call_FPU, 13682 FFree_Float_Stack_All, 13683 Java_To_Runtime( meth ), 13684 Verify_FPU_For_Leaf, post_call_FPU ); 13685 ins_pipe( pipe_slow ); 13686 ins_pc_relative(1); 13687%} 13688 13689instruct CallLeafNoFPDirect(method meth) %{ 13690 match(CallLeafNoFP); 13691 effect(USE meth); 13692 13693 ins_cost(300); 13694 format %{ "CALL_LEAF_NOFP,runtime " %} 13695 opcode(0xE8); /* E8 cd */ 13696 ins_encode(Java_To_Runtime(meth)); 13697 ins_pipe( pipe_slow ); 13698 ins_pc_relative(1); 13699%} 13700 13701 13702// Return Instruction 13703// Remove the return address & jump to it. 13704instruct Ret() %{ 13705 match(Return); 13706 format %{ "RET" %} 13707 opcode(0xC3); 13708 ins_encode(OpcP); 13709 ins_pipe( pipe_jmp ); 13710%} 13711 13712// Tail Call; Jump from runtime stub to Java code. 13713// Also known as an 'interprocedural jump'. 13714// Target of jump will eventually return to caller. 13715// TailJump below removes the return address. 13716instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13717 match(TailCall jump_target method_oop ); 13718 ins_cost(300); 13719 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13720 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13721 ins_encode( OpcP, RegOpc(jump_target) ); 13722 ins_pipe( pipe_jmp ); 13723%} 13724 13725 13726// Tail Jump; remove the return address; jump to target. 13727// TailCall above leaves the return address around. 13728instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13729 match( TailJump jump_target ex_oop ); 13730 ins_cost(300); 13731 format %{ "POP EDX\t# pop return address into dummy\n\t" 13732 "JMP $jump_target " %} 13733 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13734 ins_encode( enc_pop_rdx, 13735 OpcP, RegOpc(jump_target) ); 13736 ins_pipe( pipe_jmp ); 13737%} 13738 13739// Create exception oop: created by stack-crawling runtime code. 13740// Created exception is now available to this handler, and is setup 13741// just prior to jumping to this handler. No code emitted. 13742instruct CreateException( eAXRegP ex_oop ) 13743%{ 13744 match(Set ex_oop (CreateEx)); 13745 13746 size(0); 13747 // use the following format syntax 13748 format %{ "# exception oop is in EAX; no code emitted" %} 13749 ins_encode(); 13750 ins_pipe( empty ); 13751%} 13752 13753 13754// Rethrow exception: 13755// The exception oop will come in the first argument position. 13756// Then JUMP (not call) to the rethrow stub code. 13757instruct RethrowException() 13758%{ 13759 match(Rethrow); 13760 13761 // use the following format syntax 13762 format %{ "JMP rethrow_stub" %} 13763 ins_encode(enc_rethrow); 13764 ins_pipe( pipe_jmp ); 13765%} 13766 13767// inlined locking and unlocking 13768 13769 13770instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{ 13771 match( Set cr (FastLock object box) ); 13772 effect( TEMP tmp, TEMP scr ); 13773 ins_cost(300); 13774 format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %} 13775 ins_encode( Fast_Lock(object,box,tmp,scr) ); 13776 ins_pipe( pipe_slow ); 13777 ins_pc_relative(1); 13778%} 13779 13780instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13781 match( Set cr (FastUnlock object box) ); 13782 effect( TEMP tmp ); 13783 ins_cost(300); 13784 format %{ "FASTUNLOCK $object, $box, $tmp" %} 13785 ins_encode( Fast_Unlock(object,box,tmp) ); 13786 ins_pipe( pipe_slow ); 13787 ins_pc_relative(1); 13788%} 13789 13790 13791 13792// ============================================================================ 13793// Safepoint Instruction 13794instruct safePoint_poll(eFlagsReg cr) %{ 13795 match(SafePoint); 13796 effect(KILL cr); 13797 13798 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13799 // On SPARC that might be acceptable as we can generate the address with 13800 // just a sethi, saving an or. By polling at offset 0 we can end up 13801 // putting additional pressure on the index-0 in the D$. Because of 13802 // alignment (just like the situation at hand) the lower indices tend 13803 // to see more traffic. It'd be better to change the polling address 13804 // to offset 0 of the last $line in the polling page. 13805 13806 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13807 ins_cost(125); 13808 size(6) ; 13809 ins_encode( Safepoint_Poll() ); 13810 ins_pipe( ialu_reg_mem ); 13811%} 13812 13813//----------PEEPHOLE RULES----------------------------------------------------- 13814// These must follow all instruction definitions as they use the names 13815// defined in the instructions definitions. 13816// 13817// peepmatch ( root_instr_name [preceding_instruction]* ); 13818// 13819// peepconstraint %{ 13820// (instruction_number.operand_name relational_op instruction_number.operand_name 13821// [, ...] ); 13822// // instruction numbers are zero-based using left to right order in peepmatch 13823// 13824// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13825// // provide an instruction_number.operand_name for each operand that appears 13826// // in the replacement instruction's match rule 13827// 13828// ---------VM FLAGS--------------------------------------------------------- 13829// 13830// All peephole optimizations can be turned off using -XX:-OptoPeephole 13831// 13832// Each peephole rule is given an identifying number starting with zero and 13833// increasing by one in the order seen by the parser. An individual peephole 13834// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13835// on the command-line. 13836// 13837// ---------CURRENT LIMITATIONS---------------------------------------------- 13838// 13839// Only match adjacent instructions in same basic block 13840// Only equality constraints 13841// Only constraints between operands, not (0.dest_reg == EAX_enc) 13842// Only one replacement instruction 13843// 13844// ---------EXAMPLE---------------------------------------------------------- 13845// 13846// // pertinent parts of existing instructions in architecture description 13847// instruct movI(eRegI dst, eRegI src) %{ 13848// match(Set dst (CopyI src)); 13849// %} 13850// 13851// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 13852// match(Set dst (AddI dst src)); 13853// effect(KILL cr); 13854// %} 13855// 13856// // Change (inc mov) to lea 13857// peephole %{ 13858// // increment preceeded by register-register move 13859// peepmatch ( incI_eReg movI ); 13860// // require that the destination register of the increment 13861// // match the destination register of the move 13862// peepconstraint ( 0.dst == 1.dst ); 13863// // construct a replacement instruction that sets 13864// // the destination to ( move's source register + one ) 13865// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13866// %} 13867// 13868// Implementation no longer uses movX instructions since 13869// machine-independent system no longer uses CopyX nodes. 13870// 13871// peephole %{ 13872// peepmatch ( incI_eReg movI ); 13873// peepconstraint ( 0.dst == 1.dst ); 13874// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13875// %} 13876// 13877// peephole %{ 13878// peepmatch ( decI_eReg movI ); 13879// peepconstraint ( 0.dst == 1.dst ); 13880// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13881// %} 13882// 13883// peephole %{ 13884// peepmatch ( addI_eReg_imm movI ); 13885// peepconstraint ( 0.dst == 1.dst ); 13886// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13887// %} 13888// 13889// peephole %{ 13890// peepmatch ( addP_eReg_imm movP ); 13891// peepconstraint ( 0.dst == 1.dst ); 13892// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13893// %} 13894 13895// // Change load of spilled value to only a spill 13896// instruct storeI(memory mem, eRegI src) %{ 13897// match(Set mem (StoreI mem src)); 13898// %} 13899// 13900// instruct loadI(eRegI dst, memory mem) %{ 13901// match(Set dst (LoadI mem)); 13902// %} 13903// 13904peephole %{ 13905 peepmatch ( loadI storeI ); 13906 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13907 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13908%} 13909 13910//----------SMARTSPILL RULES--------------------------------------------------- 13911// These must follow all instruction definitions as they use the names 13912// defined in the instructions definitions. 13913