assembler_x86.cpp revision 579:0fbdb4381b99
1/* 2 * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 20 * CA 95054 USA or visit www.sun.com if you need additional information or 21 * have any questions. 22 * 23 */ 24 25#include "incls/_precompiled.incl" 26#include "incls/_assembler_x86.cpp.incl" 27 28// Implementation of AddressLiteral 29 30AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 31 _is_lval = false; 32 _target = target; 33 switch (rtype) { 34 case relocInfo::oop_type: 35 // Oops are a special case. Normally they would be their own section 36 // but in cases like icBuffer they are literals in the code stream that 37 // we don't have a section for. We use none so that we get a literal address 38 // which is always patchable. 39 break; 40 case relocInfo::external_word_type: 41 _rspec = external_word_Relocation::spec(target); 42 break; 43 case relocInfo::internal_word_type: 44 _rspec = internal_word_Relocation::spec(target); 45 break; 46 case relocInfo::opt_virtual_call_type: 47 _rspec = opt_virtual_call_Relocation::spec(); 48 break; 49 case relocInfo::static_call_type: 50 _rspec = static_call_Relocation::spec(); 51 break; 52 case relocInfo::runtime_call_type: 53 _rspec = runtime_call_Relocation::spec(); 54 break; 55 case relocInfo::poll_type: 56 case relocInfo::poll_return_type: 57 _rspec = Relocation::spec_simple(rtype); 58 break; 59 case relocInfo::none: 60 break; 61 default: 62 ShouldNotReachHere(); 63 break; 64 } 65} 66 67// Implementation of Address 68 69#ifdef _LP64 70 71Address Address::make_array(ArrayAddress adr) { 72 // Not implementable on 64bit machines 73 // Should have been handled higher up the call chain. 74 ShouldNotReachHere(); 75 return Address(); 76} 77 78// exceedingly dangerous constructor 79Address::Address(int disp, address loc, relocInfo::relocType rtype) { 80 _base = noreg; 81 _index = noreg; 82 _scale = no_scale; 83 _disp = disp; 84 switch (rtype) { 85 case relocInfo::external_word_type: 86 _rspec = external_word_Relocation::spec(loc); 87 break; 88 case relocInfo::internal_word_type: 89 _rspec = internal_word_Relocation::spec(loc); 90 break; 91 case relocInfo::runtime_call_type: 92 // HMM 93 _rspec = runtime_call_Relocation::spec(); 94 break; 95 case relocInfo::poll_type: 96 case relocInfo::poll_return_type: 97 _rspec = Relocation::spec_simple(rtype); 98 break; 99 case relocInfo::none: 100 break; 101 default: 102 ShouldNotReachHere(); 103 } 104} 105#else // LP64 106 107Address Address::make_array(ArrayAddress adr) { 108 AddressLiteral base = adr.base(); 109 Address index = adr.index(); 110 assert(index._disp == 0, "must not have disp"); // maybe it can? 111 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 112 array._rspec = base._rspec; 113 return array; 114} 115 116// exceedingly dangerous constructor 117Address::Address(address loc, RelocationHolder spec) { 118 _base = noreg; 119 _index = noreg; 120 _scale = no_scale; 121 _disp = (intptr_t) loc; 122 _rspec = spec; 123} 124 125#endif // _LP64 126 127 128 129// Convert the raw encoding form into the form expected by the constructor for 130// Address. An index of 4 (rsp) corresponds to having no index, so convert 131// that to noreg for the Address constructor. 132Address Address::make_raw(int base, int index, int scale, int disp) { 133 bool valid_index = index != rsp->encoding(); 134 if (valid_index) { 135 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 136 return madr; 137 } else { 138 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 139 return madr; 140 } 141} 142 143// Implementation of Assembler 144 145int AbstractAssembler::code_fill_byte() { 146 return (u_char)'\xF4'; // hlt 147} 148 149// make this go away someday 150void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 151 if (rtype == relocInfo::none) 152 emit_long(data); 153 else emit_data(data, Relocation::spec_simple(rtype), format); 154} 155 156void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 157 assert(imm_operand == 0, "default format must be immediate in this file"); 158 assert(inst_mark() != NULL, "must be inside InstructionMark"); 159 if (rspec.type() != relocInfo::none) { 160 #ifdef ASSERT 161 check_relocation(rspec, format); 162 #endif 163 // Do not use AbstractAssembler::relocate, which is not intended for 164 // embedded words. Instead, relocate to the enclosing instruction. 165 166 // hack. call32 is too wide for mask so use disp32 167 if (format == call32_operand) 168 code_section()->relocate(inst_mark(), rspec, disp32_operand); 169 else 170 code_section()->relocate(inst_mark(), rspec, format); 171 } 172 emit_long(data); 173} 174 175static int encode(Register r) { 176 int enc = r->encoding(); 177 if (enc >= 8) { 178 enc -= 8; 179 } 180 return enc; 181} 182 183static int encode(XMMRegister r) { 184 int enc = r->encoding(); 185 if (enc >= 8) { 186 enc -= 8; 187 } 188 return enc; 189} 190 191void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 192 assert(dst->has_byte_register(), "must have byte register"); 193 assert(isByte(op1) && isByte(op2), "wrong opcode"); 194 assert(isByte(imm8), "not a byte"); 195 assert((op1 & 0x01) == 0, "should be 8bit operation"); 196 emit_byte(op1); 197 emit_byte(op2 | encode(dst)); 198 emit_byte(imm8); 199} 200 201 202void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 203 assert(isByte(op1) && isByte(op2), "wrong opcode"); 204 assert((op1 & 0x01) == 1, "should be 32bit operation"); 205 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 206 if (is8bit(imm32)) { 207 emit_byte(op1 | 0x02); // set sign bit 208 emit_byte(op2 | encode(dst)); 209 emit_byte(imm32 & 0xFF); 210 } else { 211 emit_byte(op1); 212 emit_byte(op2 | encode(dst)); 213 emit_long(imm32); 214 } 215} 216 217// immediate-to-memory forms 218void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 219 assert((op1 & 0x01) == 1, "should be 32bit operation"); 220 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 221 if (is8bit(imm32)) { 222 emit_byte(op1 | 0x02); // set sign bit 223 emit_operand(rm, adr, 1); 224 emit_byte(imm32 & 0xFF); 225 } else { 226 emit_byte(op1); 227 emit_operand(rm, adr, 4); 228 emit_long(imm32); 229 } 230} 231 232void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 233 LP64_ONLY(ShouldNotReachHere()); 234 assert(isByte(op1) && isByte(op2), "wrong opcode"); 235 assert((op1 & 0x01) == 1, "should be 32bit operation"); 236 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 237 InstructionMark im(this); 238 emit_byte(op1); 239 emit_byte(op2 | encode(dst)); 240 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 241} 242 243 244void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 245 assert(isByte(op1) && isByte(op2), "wrong opcode"); 246 emit_byte(op1); 247 emit_byte(op2 | encode(dst) << 3 | encode(src)); 248} 249 250 251void Assembler::emit_operand(Register reg, Register base, Register index, 252 Address::ScaleFactor scale, int disp, 253 RelocationHolder const& rspec, 254 int rip_relative_correction) { 255 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 256 257 // Encode the registers as needed in the fields they are used in 258 259 int regenc = encode(reg) << 3; 260 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 261 int baseenc = base->is_valid() ? encode(base) : 0; 262 263 if (base->is_valid()) { 264 if (index->is_valid()) { 265 assert(scale != Address::no_scale, "inconsistent address"); 266 // [base + index*scale + disp] 267 if (disp == 0 && rtype == relocInfo::none && 268 base != rbp LP64_ONLY(&& base != r13)) { 269 // [base + index*scale] 270 // [00 reg 100][ss index base] 271 assert(index != rsp, "illegal addressing mode"); 272 emit_byte(0x04 | regenc); 273 emit_byte(scale << 6 | indexenc | baseenc); 274 } else if (is8bit(disp) && rtype == relocInfo::none) { 275 // [base + index*scale + imm8] 276 // [01 reg 100][ss index base] imm8 277 assert(index != rsp, "illegal addressing mode"); 278 emit_byte(0x44 | regenc); 279 emit_byte(scale << 6 | indexenc | baseenc); 280 emit_byte(disp & 0xFF); 281 } else { 282 // [base + index*scale + disp32] 283 // [10 reg 100][ss index base] disp32 284 assert(index != rsp, "illegal addressing mode"); 285 emit_byte(0x84 | regenc); 286 emit_byte(scale << 6 | indexenc | baseenc); 287 emit_data(disp, rspec, disp32_operand); 288 } 289 } else if (base == rsp LP64_ONLY(|| base == r12)) { 290 // [rsp + disp] 291 if (disp == 0 && rtype == relocInfo::none) { 292 // [rsp] 293 // [00 reg 100][00 100 100] 294 emit_byte(0x04 | regenc); 295 emit_byte(0x24); 296 } else if (is8bit(disp) && rtype == relocInfo::none) { 297 // [rsp + imm8] 298 // [01 reg 100][00 100 100] disp8 299 emit_byte(0x44 | regenc); 300 emit_byte(0x24); 301 emit_byte(disp & 0xFF); 302 } else { 303 // [rsp + imm32] 304 // [10 reg 100][00 100 100] disp32 305 emit_byte(0x84 | regenc); 306 emit_byte(0x24); 307 emit_data(disp, rspec, disp32_operand); 308 } 309 } else { 310 // [base + disp] 311 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 312 if (disp == 0 && rtype == relocInfo::none && 313 base != rbp LP64_ONLY(&& base != r13)) { 314 // [base] 315 // [00 reg base] 316 emit_byte(0x00 | regenc | baseenc); 317 } else if (is8bit(disp) && rtype == relocInfo::none) { 318 // [base + disp8] 319 // [01 reg base] disp8 320 emit_byte(0x40 | regenc | baseenc); 321 emit_byte(disp & 0xFF); 322 } else { 323 // [base + disp32] 324 // [10 reg base] disp32 325 emit_byte(0x80 | regenc | baseenc); 326 emit_data(disp, rspec, disp32_operand); 327 } 328 } 329 } else { 330 if (index->is_valid()) { 331 assert(scale != Address::no_scale, "inconsistent address"); 332 // [index*scale + disp] 333 // [00 reg 100][ss index 101] disp32 334 assert(index != rsp, "illegal addressing mode"); 335 emit_byte(0x04 | regenc); 336 emit_byte(scale << 6 | indexenc | 0x05); 337 emit_data(disp, rspec, disp32_operand); 338 } else if (rtype != relocInfo::none ) { 339 // [disp] (64bit) RIP-RELATIVE (32bit) abs 340 // [00 000 101] disp32 341 342 emit_byte(0x05 | regenc); 343 // Note that the RIP-rel. correction applies to the generated 344 // disp field, but _not_ to the target address in the rspec. 345 346 // disp was created by converting the target address minus the pc 347 // at the start of the instruction. That needs more correction here. 348 // intptr_t disp = target - next_ip; 349 assert(inst_mark() != NULL, "must be inside InstructionMark"); 350 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 351 int64_t adjusted = disp; 352 // Do rip-rel adjustment for 64bit 353 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 354 assert(is_simm32(adjusted), 355 "must be 32bit offset (RIP relative address)"); 356 emit_data((int32_t) adjusted, rspec, disp32_operand); 357 358 } else { 359 // 32bit never did this, did everything as the rip-rel/disp code above 360 // [disp] ABSOLUTE 361 // [00 reg 100][00 100 101] disp32 362 emit_byte(0x04 | regenc); 363 emit_byte(0x25); 364 emit_data(disp, rspec, disp32_operand); 365 } 366 } 367} 368 369void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 370 Address::ScaleFactor scale, int disp, 371 RelocationHolder const& rspec) { 372 emit_operand((Register)reg, base, index, scale, disp, rspec); 373} 374 375// Secret local extension to Assembler::WhichOperand: 376#define end_pc_operand (_WhichOperand_limit) 377 378address Assembler::locate_operand(address inst, WhichOperand which) { 379 // Decode the given instruction, and return the address of 380 // an embedded 32-bit operand word. 381 382 // If "which" is disp32_operand, selects the displacement portion 383 // of an effective address specifier. 384 // If "which" is imm64_operand, selects the trailing immediate constant. 385 // If "which" is call32_operand, selects the displacement of a call or jump. 386 // Caller is responsible for ensuring that there is such an operand, 387 // and that it is 32/64 bits wide. 388 389 // If "which" is end_pc_operand, find the end of the instruction. 390 391 address ip = inst; 392 bool is_64bit = false; 393 394 debug_only(bool has_disp32 = false); 395 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 396 397 again_after_prefix: 398 switch (0xFF & *ip++) { 399 400 // These convenience macros generate groups of "case" labels for the switch. 401#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 402#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 403 case (x)+4: case (x)+5: case (x)+6: case (x)+7 404#define REP16(x) REP8((x)+0): \ 405 case REP8((x)+8) 406 407 case CS_segment: 408 case SS_segment: 409 case DS_segment: 410 case ES_segment: 411 case FS_segment: 412 case GS_segment: 413 // Seems dubious 414 LP64_ONLY(assert(false, "shouldn't have that prefix")); 415 assert(ip == inst+1, "only one prefix allowed"); 416 goto again_after_prefix; 417 418 case 0x67: 419 case REX: 420 case REX_B: 421 case REX_X: 422 case REX_XB: 423 case REX_R: 424 case REX_RB: 425 case REX_RX: 426 case REX_RXB: 427 NOT_LP64(assert(false, "64bit prefixes")); 428 goto again_after_prefix; 429 430 case REX_W: 431 case REX_WB: 432 case REX_WX: 433 case REX_WXB: 434 case REX_WR: 435 case REX_WRB: 436 case REX_WRX: 437 case REX_WRXB: 438 NOT_LP64(assert(false, "64bit prefixes")); 439 is_64bit = true; 440 goto again_after_prefix; 441 442 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 443 case 0x88: // movb a, r 444 case 0x89: // movl a, r 445 case 0x8A: // movb r, a 446 case 0x8B: // movl r, a 447 case 0x8F: // popl a 448 debug_only(has_disp32 = true); 449 break; 450 451 case 0x68: // pushq #32 452 if (which == end_pc_operand) { 453 return ip + 4; 454 } 455 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 456 return ip; // not produced by emit_operand 457 458 case 0x66: // movw ... (size prefix) 459 again_after_size_prefix2: 460 switch (0xFF & *ip++) { 461 case REX: 462 case REX_B: 463 case REX_X: 464 case REX_XB: 465 case REX_R: 466 case REX_RB: 467 case REX_RX: 468 case REX_RXB: 469 case REX_W: 470 case REX_WB: 471 case REX_WX: 472 case REX_WXB: 473 case REX_WR: 474 case REX_WRB: 475 case REX_WRX: 476 case REX_WRXB: 477 NOT_LP64(assert(false, "64bit prefix found")); 478 goto again_after_size_prefix2; 479 case 0x8B: // movw r, a 480 case 0x89: // movw a, r 481 debug_only(has_disp32 = true); 482 break; 483 case 0xC7: // movw a, #16 484 debug_only(has_disp32 = true); 485 tail_size = 2; // the imm16 486 break; 487 case 0x0F: // several SSE/SSE2 variants 488 ip--; // reparse the 0x0F 489 goto again_after_prefix; 490 default: 491 ShouldNotReachHere(); 492 } 493 break; 494 495 case REP8(0xB8): // movl/q r, #32/#64(oop?) 496 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 497 // these asserts are somewhat nonsensical 498#ifndef _LP64 499 assert(which == imm_operand || which == disp32_operand, ""); 500#else 501 assert((which == call32_operand || which == imm_operand) && is_64bit || 502 which == narrow_oop_operand && !is_64bit, ""); 503#endif // _LP64 504 return ip; 505 506 case 0x69: // imul r, a, #32 507 case 0xC7: // movl a, #32(oop?) 508 tail_size = 4; 509 debug_only(has_disp32 = true); // has both kinds of operands! 510 break; 511 512 case 0x0F: // movx..., etc. 513 switch (0xFF & *ip++) { 514 case 0x12: // movlps 515 case 0x28: // movaps 516 case 0x2E: // ucomiss 517 case 0x2F: // comiss 518 case 0x54: // andps 519 case 0x55: // andnps 520 case 0x56: // orps 521 case 0x57: // xorps 522 case 0x6E: // movd 523 case 0x7E: // movd 524 case 0xAE: // ldmxcsr a 525 // 64bit side says it these have both operands but that doesn't 526 // appear to be true 527 debug_only(has_disp32 = true); 528 break; 529 530 case 0xAD: // shrd r, a, %cl 531 case 0xAF: // imul r, a 532 case 0xBE: // movsbl r, a (movsxb) 533 case 0xBF: // movswl r, a (movsxw) 534 case 0xB6: // movzbl r, a (movzxb) 535 case 0xB7: // movzwl r, a (movzxw) 536 case REP16(0x40): // cmovl cc, r, a 537 case 0xB0: // cmpxchgb 538 case 0xB1: // cmpxchg 539 case 0xC1: // xaddl 540 case 0xC7: // cmpxchg8 541 case REP16(0x90): // setcc a 542 debug_only(has_disp32 = true); 543 // fall out of the switch to decode the address 544 break; 545 546 case 0xAC: // shrd r, a, #8 547 debug_only(has_disp32 = true); 548 tail_size = 1; // the imm8 549 break; 550 551 case REP16(0x80): // jcc rdisp32 552 if (which == end_pc_operand) return ip + 4; 553 assert(which == call32_operand, "jcc has no disp32 or imm"); 554 return ip; 555 default: 556 ShouldNotReachHere(); 557 } 558 break; 559 560 case 0x81: // addl a, #32; addl r, #32 561 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 562 // on 32bit in the case of cmpl, the imm might be an oop 563 tail_size = 4; 564 debug_only(has_disp32 = true); // has both kinds of operands! 565 break; 566 567 case 0x83: // addl a, #8; addl r, #8 568 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 569 debug_only(has_disp32 = true); // has both kinds of operands! 570 tail_size = 1; 571 break; 572 573 case 0x9B: 574 switch (0xFF & *ip++) { 575 case 0xD9: // fnstcw a 576 debug_only(has_disp32 = true); 577 break; 578 default: 579 ShouldNotReachHere(); 580 } 581 break; 582 583 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 584 case REP4(0x10): // adc... 585 case REP4(0x20): // and... 586 case REP4(0x30): // xor... 587 case REP4(0x08): // or... 588 case REP4(0x18): // sbb... 589 case REP4(0x28): // sub... 590 case 0xF7: // mull a 591 case 0x8D: // lea r, a 592 case 0x87: // xchg r, a 593 case REP4(0x38): // cmp... 594 case 0x85: // test r, a 595 debug_only(has_disp32 = true); // has both kinds of operands! 596 break; 597 598 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 599 case 0xC6: // movb a, #8 600 case 0x80: // cmpb a, #8 601 case 0x6B: // imul r, a, #8 602 debug_only(has_disp32 = true); // has both kinds of operands! 603 tail_size = 1; // the imm8 604 break; 605 606 case 0xE8: // call rdisp32 607 case 0xE9: // jmp rdisp32 608 if (which == end_pc_operand) return ip + 4; 609 assert(which == call32_operand, "call has no disp32 or imm"); 610 return ip; 611 612 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 613 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 614 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 615 case 0xDD: // fld_d a; fst_d a; fstp_d a 616 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 617 case 0xDF: // fild_d a; fistp_d a 618 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 619 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 620 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 621 debug_only(has_disp32 = true); 622 break; 623 624 case 0xF0: // Lock 625 assert(os::is_MP(), "only on MP"); 626 goto again_after_prefix; 627 628 case 0xF3: // For SSE 629 case 0xF2: // For SSE2 630 switch (0xFF & *ip++) { 631 case REX: 632 case REX_B: 633 case REX_X: 634 case REX_XB: 635 case REX_R: 636 case REX_RB: 637 case REX_RX: 638 case REX_RXB: 639 case REX_W: 640 case REX_WB: 641 case REX_WX: 642 case REX_WXB: 643 case REX_WR: 644 case REX_WRB: 645 case REX_WRX: 646 case REX_WRXB: 647 NOT_LP64(assert(false, "found 64bit prefix")); 648 ip++; 649 default: 650 ip++; 651 } 652 debug_only(has_disp32 = true); // has both kinds of operands! 653 break; 654 655 default: 656 ShouldNotReachHere(); 657 658#undef REP8 659#undef REP16 660 } 661 662 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 663#ifdef _LP64 664 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 665#else 666 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 667 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 668#endif // LP64 669 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 670 671 // parse the output of emit_operand 672 int op2 = 0xFF & *ip++; 673 int base = op2 & 0x07; 674 int op3 = -1; 675 const int b100 = 4; 676 const int b101 = 5; 677 if (base == b100 && (op2 >> 6) != 3) { 678 op3 = 0xFF & *ip++; 679 base = op3 & 0x07; // refetch the base 680 } 681 // now ip points at the disp (if any) 682 683 switch (op2 >> 6) { 684 case 0: 685 // [00 reg 100][ss index base] 686 // [00 reg 100][00 100 esp] 687 // [00 reg base] 688 // [00 reg 100][ss index 101][disp32] 689 // [00 reg 101] [disp32] 690 691 if (base == b101) { 692 if (which == disp32_operand) 693 return ip; // caller wants the disp32 694 ip += 4; // skip the disp32 695 } 696 break; 697 698 case 1: 699 // [01 reg 100][ss index base][disp8] 700 // [01 reg 100][00 100 esp][disp8] 701 // [01 reg base] [disp8] 702 ip += 1; // skip the disp8 703 break; 704 705 case 2: 706 // [10 reg 100][ss index base][disp32] 707 // [10 reg 100][00 100 esp][disp32] 708 // [10 reg base] [disp32] 709 if (which == disp32_operand) 710 return ip; // caller wants the disp32 711 ip += 4; // skip the disp32 712 break; 713 714 case 3: 715 // [11 reg base] (not a memory addressing mode) 716 break; 717 } 718 719 if (which == end_pc_operand) { 720 return ip + tail_size; 721 } 722 723#ifdef _LP64 724 assert(false, "fix locate_operand"); 725#else 726 assert(which == imm_operand, "instruction has only an imm field"); 727#endif // LP64 728 return ip; 729} 730 731address Assembler::locate_next_instruction(address inst) { 732 // Secretly share code with locate_operand: 733 return locate_operand(inst, end_pc_operand); 734} 735 736 737#ifdef ASSERT 738void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 739 address inst = inst_mark(); 740 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 741 address opnd; 742 743 Relocation* r = rspec.reloc(); 744 if (r->type() == relocInfo::none) { 745 return; 746 } else if (r->is_call() || format == call32_operand) { 747 // assert(format == imm32_operand, "cannot specify a nonzero format"); 748 opnd = locate_operand(inst, call32_operand); 749 } else if (r->is_data()) { 750 assert(format == imm_operand || format == disp32_operand 751 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 752 opnd = locate_operand(inst, (WhichOperand)format); 753 } else { 754 assert(format == imm_operand, "cannot specify a format"); 755 return; 756 } 757 assert(opnd == pc(), "must put operand where relocs can find it"); 758} 759#endif // ASSERT 760 761void Assembler::emit_operand32(Register reg, Address adr) { 762 assert(reg->encoding() < 8, "no extended registers"); 763 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 764 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 765 adr._rspec); 766} 767 768void Assembler::emit_operand(Register reg, Address adr, 769 int rip_relative_correction) { 770 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 771 adr._rspec, 772 rip_relative_correction); 773} 774 775void Assembler::emit_operand(XMMRegister reg, Address adr) { 776 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 777 adr._rspec); 778} 779 780// MMX operations 781void Assembler::emit_operand(MMXRegister reg, Address adr) { 782 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 783 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 784} 785 786// work around gcc (3.2.1-7a) bug 787void Assembler::emit_operand(Address adr, MMXRegister reg) { 788 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 789 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 790} 791 792 793void Assembler::emit_farith(int b1, int b2, int i) { 794 assert(isByte(b1) && isByte(b2), "wrong opcode"); 795 assert(0 <= i && i < 8, "illegal stack offset"); 796 emit_byte(b1); 797 emit_byte(b2 + i); 798} 799 800 801// Now the Assembler instruction (identical for 32/64 bits) 802 803void Assembler::adcl(Register dst, int32_t imm32) { 804 prefix(dst); 805 emit_arith(0x81, 0xD0, dst, imm32); 806} 807 808void Assembler::adcl(Register dst, Address src) { 809 InstructionMark im(this); 810 prefix(src, dst); 811 emit_byte(0x13); 812 emit_operand(dst, src); 813} 814 815void Assembler::adcl(Register dst, Register src) { 816 (void) prefix_and_encode(dst->encoding(), src->encoding()); 817 emit_arith(0x13, 0xC0, dst, src); 818} 819 820void Assembler::addl(Address dst, int32_t imm32) { 821 InstructionMark im(this); 822 prefix(dst); 823 emit_arith_operand(0x81, rax, dst, imm32); 824} 825 826void Assembler::addl(Address dst, Register src) { 827 InstructionMark im(this); 828 prefix(dst, src); 829 emit_byte(0x01); 830 emit_operand(src, dst); 831} 832 833void Assembler::addl(Register dst, int32_t imm32) { 834 prefix(dst); 835 emit_arith(0x81, 0xC0, dst, imm32); 836} 837 838void Assembler::addl(Register dst, Address src) { 839 InstructionMark im(this); 840 prefix(src, dst); 841 emit_byte(0x03); 842 emit_operand(dst, src); 843} 844 845void Assembler::addl(Register dst, Register src) { 846 (void) prefix_and_encode(dst->encoding(), src->encoding()); 847 emit_arith(0x03, 0xC0, dst, src); 848} 849 850void Assembler::addr_nop_4() { 851 // 4 bytes: NOP DWORD PTR [EAX+0] 852 emit_byte(0x0F); 853 emit_byte(0x1F); 854 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 855 emit_byte(0); // 8-bits offset (1 byte) 856} 857 858void Assembler::addr_nop_5() { 859 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 860 emit_byte(0x0F); 861 emit_byte(0x1F); 862 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 863 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 864 emit_byte(0); // 8-bits offset (1 byte) 865} 866 867void Assembler::addr_nop_7() { 868 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 869 emit_byte(0x0F); 870 emit_byte(0x1F); 871 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 872 emit_long(0); // 32-bits offset (4 bytes) 873} 874 875void Assembler::addr_nop_8() { 876 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 877 emit_byte(0x0F); 878 emit_byte(0x1F); 879 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 880 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 881 emit_long(0); // 32-bits offset (4 bytes) 882} 883 884void Assembler::addsd(XMMRegister dst, XMMRegister src) { 885 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 886 emit_byte(0xF2); 887 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 888 emit_byte(0x0F); 889 emit_byte(0x58); 890 emit_byte(0xC0 | encode); 891} 892 893void Assembler::addsd(XMMRegister dst, Address src) { 894 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 895 InstructionMark im(this); 896 emit_byte(0xF2); 897 prefix(src, dst); 898 emit_byte(0x0F); 899 emit_byte(0x58); 900 emit_operand(dst, src); 901} 902 903void Assembler::addss(XMMRegister dst, XMMRegister src) { 904 NOT_LP64(assert(VM_Version::supports_sse(), "")); 905 emit_byte(0xF3); 906 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 907 emit_byte(0x0F); 908 emit_byte(0x58); 909 emit_byte(0xC0 | encode); 910} 911 912void Assembler::addss(XMMRegister dst, Address src) { 913 NOT_LP64(assert(VM_Version::supports_sse(), "")); 914 InstructionMark im(this); 915 emit_byte(0xF3); 916 prefix(src, dst); 917 emit_byte(0x0F); 918 emit_byte(0x58); 919 emit_operand(dst, src); 920} 921 922void Assembler::andl(Register dst, int32_t imm32) { 923 prefix(dst); 924 emit_arith(0x81, 0xE0, dst, imm32); 925} 926 927void Assembler::andl(Register dst, Address src) { 928 InstructionMark im(this); 929 prefix(src, dst); 930 emit_byte(0x23); 931 emit_operand(dst, src); 932} 933 934void Assembler::andl(Register dst, Register src) { 935 (void) prefix_and_encode(dst->encoding(), src->encoding()); 936 emit_arith(0x23, 0xC0, dst, src); 937} 938 939void Assembler::andpd(XMMRegister dst, Address src) { 940 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 941 InstructionMark im(this); 942 emit_byte(0x66); 943 prefix(src, dst); 944 emit_byte(0x0F); 945 emit_byte(0x54); 946 emit_operand(dst, src); 947} 948 949void Assembler::bswapl(Register reg) { // bswap 950 int encode = prefix_and_encode(reg->encoding()); 951 emit_byte(0x0F); 952 emit_byte(0xC8 | encode); 953} 954 955void Assembler::call(Label& L, relocInfo::relocType rtype) { 956 // suspect disp32 is always good 957 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 958 959 if (L.is_bound()) { 960 const int long_size = 5; 961 int offs = (int)( target(L) - pc() ); 962 assert(offs <= 0, "assembler error"); 963 InstructionMark im(this); 964 // 1110 1000 #32-bit disp 965 emit_byte(0xE8); 966 emit_data(offs - long_size, rtype, operand); 967 } else { 968 InstructionMark im(this); 969 // 1110 1000 #32-bit disp 970 L.add_patch_at(code(), locator()); 971 972 emit_byte(0xE8); 973 emit_data(int(0), rtype, operand); 974 } 975} 976 977void Assembler::call(Register dst) { 978 // This was originally using a 32bit register encoding 979 // and surely we want 64bit! 980 // this is a 32bit encoding but in 64bit mode the default 981 // operand size is 64bit so there is no need for the 982 // wide prefix. So prefix only happens if we use the 983 // new registers. Much like push/pop. 984 int x = offset(); 985 // this may be true but dbx disassembles it as if it 986 // were 32bits... 987 // int encode = prefix_and_encode(dst->encoding()); 988 // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 989 int encode = prefixq_and_encode(dst->encoding()); 990 991 emit_byte(0xFF); 992 emit_byte(0xD0 | encode); 993} 994 995 996void Assembler::call(Address adr) { 997 InstructionMark im(this); 998 prefix(adr); 999 emit_byte(0xFF); 1000 emit_operand(rdx, adr); 1001} 1002 1003void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1004 assert(entry != NULL, "call most probably wrong"); 1005 InstructionMark im(this); 1006 emit_byte(0xE8); 1007 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1008 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1009 // Technically, should use call32_operand, but this format is 1010 // implied by the fact that we're emitting a call instruction. 1011 1012 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1013 emit_data((int) disp, rspec, operand); 1014} 1015 1016void Assembler::cdql() { 1017 emit_byte(0x99); 1018} 1019 1020void Assembler::cmovl(Condition cc, Register dst, Register src) { 1021 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1022 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1023 emit_byte(0x0F); 1024 emit_byte(0x40 | cc); 1025 emit_byte(0xC0 | encode); 1026} 1027 1028 1029void Assembler::cmovl(Condition cc, Register dst, Address src) { 1030 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1031 prefix(src, dst); 1032 emit_byte(0x0F); 1033 emit_byte(0x40 | cc); 1034 emit_operand(dst, src); 1035} 1036 1037void Assembler::cmpb(Address dst, int imm8) { 1038 InstructionMark im(this); 1039 prefix(dst); 1040 emit_byte(0x80); 1041 emit_operand(rdi, dst, 1); 1042 emit_byte(imm8); 1043} 1044 1045void Assembler::cmpl(Address dst, int32_t imm32) { 1046 InstructionMark im(this); 1047 prefix(dst); 1048 emit_byte(0x81); 1049 emit_operand(rdi, dst, 4); 1050 emit_long(imm32); 1051} 1052 1053void Assembler::cmpl(Register dst, int32_t imm32) { 1054 prefix(dst); 1055 emit_arith(0x81, 0xF8, dst, imm32); 1056} 1057 1058void Assembler::cmpl(Register dst, Register src) { 1059 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1060 emit_arith(0x3B, 0xC0, dst, src); 1061} 1062 1063 1064void Assembler::cmpl(Register dst, Address src) { 1065 InstructionMark im(this); 1066 prefix(src, dst); 1067 emit_byte(0x3B); 1068 emit_operand(dst, src); 1069} 1070 1071void Assembler::cmpw(Address dst, int imm16) { 1072 InstructionMark im(this); 1073 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1074 emit_byte(0x66); 1075 emit_byte(0x81); 1076 emit_operand(rdi, dst, 2); 1077 emit_word(imm16); 1078} 1079 1080// The 32-bit cmpxchg compares the value at adr with the contents of rax, 1081// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1082// The ZF is set if the compared values were equal, and cleared otherwise. 1083void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1084 if (Atomics & 2) { 1085 // caveat: no instructionmark, so this isn't relocatable. 1086 // Emit a synthetic, non-atomic, CAS equivalent. 1087 // Beware. The synthetic form sets all ICCs, not just ZF. 1088 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1089 cmpl(rax, adr); 1090 movl(rax, adr); 1091 if (reg != rax) { 1092 Label L ; 1093 jcc(Assembler::notEqual, L); 1094 movl(adr, reg); 1095 bind(L); 1096 } 1097 } else { 1098 InstructionMark im(this); 1099 prefix(adr, reg); 1100 emit_byte(0x0F); 1101 emit_byte(0xB1); 1102 emit_operand(reg, adr); 1103 } 1104} 1105 1106void Assembler::comisd(XMMRegister dst, Address src) { 1107 // NOTE: dbx seems to decode this as comiss even though the 1108 // 0x66 is there. Strangly ucomisd comes out correct 1109 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1110 emit_byte(0x66); 1111 comiss(dst, src); 1112} 1113 1114void Assembler::comiss(XMMRegister dst, Address src) { 1115 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1116 1117 InstructionMark im(this); 1118 prefix(src, dst); 1119 emit_byte(0x0F); 1120 emit_byte(0x2F); 1121 emit_operand(dst, src); 1122} 1123 1124void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1125 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1126 emit_byte(0xF3); 1127 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1128 emit_byte(0x0F); 1129 emit_byte(0xE6); 1130 emit_byte(0xC0 | encode); 1131} 1132 1133void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1134 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1135 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1136 emit_byte(0x0F); 1137 emit_byte(0x5B); 1138 emit_byte(0xC0 | encode); 1139} 1140 1141void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1142 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1143 emit_byte(0xF2); 1144 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1145 emit_byte(0x0F); 1146 emit_byte(0x5A); 1147 emit_byte(0xC0 | encode); 1148} 1149 1150void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1151 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1152 emit_byte(0xF2); 1153 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1154 emit_byte(0x0F); 1155 emit_byte(0x2A); 1156 emit_byte(0xC0 | encode); 1157} 1158 1159void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1160 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1161 emit_byte(0xF3); 1162 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1163 emit_byte(0x0F); 1164 emit_byte(0x2A); 1165 emit_byte(0xC0 | encode); 1166} 1167 1168void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1169 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1170 emit_byte(0xF3); 1171 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1172 emit_byte(0x0F); 1173 emit_byte(0x5A); 1174 emit_byte(0xC0 | encode); 1175} 1176 1177void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1178 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1179 emit_byte(0xF2); 1180 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1181 emit_byte(0x0F); 1182 emit_byte(0x2C); 1183 emit_byte(0xC0 | encode); 1184} 1185 1186void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1187 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1188 emit_byte(0xF3); 1189 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1190 emit_byte(0x0F); 1191 emit_byte(0x2C); 1192 emit_byte(0xC0 | encode); 1193} 1194 1195void Assembler::decl(Address dst) { 1196 // Don't use it directly. Use MacroAssembler::decrement() instead. 1197 InstructionMark im(this); 1198 prefix(dst); 1199 emit_byte(0xFF); 1200 emit_operand(rcx, dst); 1201} 1202 1203void Assembler::divsd(XMMRegister dst, Address src) { 1204 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1205 InstructionMark im(this); 1206 emit_byte(0xF2); 1207 prefix(src, dst); 1208 emit_byte(0x0F); 1209 emit_byte(0x5E); 1210 emit_operand(dst, src); 1211} 1212 1213void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1214 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1215 emit_byte(0xF2); 1216 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1217 emit_byte(0x0F); 1218 emit_byte(0x5E); 1219 emit_byte(0xC0 | encode); 1220} 1221 1222void Assembler::divss(XMMRegister dst, Address src) { 1223 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1224 InstructionMark im(this); 1225 emit_byte(0xF3); 1226 prefix(src, dst); 1227 emit_byte(0x0F); 1228 emit_byte(0x5E); 1229 emit_operand(dst, src); 1230} 1231 1232void Assembler::divss(XMMRegister dst, XMMRegister src) { 1233 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1234 emit_byte(0xF3); 1235 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1236 emit_byte(0x0F); 1237 emit_byte(0x5E); 1238 emit_byte(0xC0 | encode); 1239} 1240 1241void Assembler::emms() { 1242 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1243 emit_byte(0x0F); 1244 emit_byte(0x77); 1245} 1246 1247void Assembler::hlt() { 1248 emit_byte(0xF4); 1249} 1250 1251void Assembler::idivl(Register src) { 1252 int encode = prefix_and_encode(src->encoding()); 1253 emit_byte(0xF7); 1254 emit_byte(0xF8 | encode); 1255} 1256 1257void Assembler::imull(Register dst, Register src) { 1258 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1259 emit_byte(0x0F); 1260 emit_byte(0xAF); 1261 emit_byte(0xC0 | encode); 1262} 1263 1264 1265void Assembler::imull(Register dst, Register src, int value) { 1266 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1267 if (is8bit(value)) { 1268 emit_byte(0x6B); 1269 emit_byte(0xC0 | encode); 1270 emit_byte(value); 1271 } else { 1272 emit_byte(0x69); 1273 emit_byte(0xC0 | encode); 1274 emit_long(value); 1275 } 1276} 1277 1278void Assembler::incl(Address dst) { 1279 // Don't use it directly. Use MacroAssembler::increment() instead. 1280 InstructionMark im(this); 1281 prefix(dst); 1282 emit_byte(0xFF); 1283 emit_operand(rax, dst); 1284} 1285 1286void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { 1287 InstructionMark im(this); 1288 relocate(rtype); 1289 assert((0 <= cc) && (cc < 16), "illegal cc"); 1290 if (L.is_bound()) { 1291 address dst = target(L); 1292 assert(dst != NULL, "jcc most probably wrong"); 1293 1294 const int short_size = 2; 1295 const int long_size = 6; 1296 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1297 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1298 // 0111 tttn #8-bit disp 1299 emit_byte(0x70 | cc); 1300 emit_byte((offs - short_size) & 0xFF); 1301 } else { 1302 // 0000 1111 1000 tttn #32-bit disp 1303 assert(is_simm32(offs - long_size), 1304 "must be 32bit offset (call4)"); 1305 emit_byte(0x0F); 1306 emit_byte(0x80 | cc); 1307 emit_long(offs - long_size); 1308 } 1309 } else { 1310 // Note: could eliminate cond. jumps to this jump if condition 1311 // is the same however, seems to be rather unlikely case. 1312 // Note: use jccb() if label to be bound is very close to get 1313 // an 8-bit displacement 1314 L.add_patch_at(code(), locator()); 1315 emit_byte(0x0F); 1316 emit_byte(0x80 | cc); 1317 emit_long(0); 1318 } 1319} 1320 1321void Assembler::jccb(Condition cc, Label& L) { 1322 if (L.is_bound()) { 1323 const int short_size = 2; 1324 address entry = target(L); 1325 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1326 "Dispacement too large for a short jmp"); 1327 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1328 // 0111 tttn #8-bit disp 1329 emit_byte(0x70 | cc); 1330 emit_byte((offs - short_size) & 0xFF); 1331 } else { 1332 InstructionMark im(this); 1333 L.add_patch_at(code(), locator()); 1334 emit_byte(0x70 | cc); 1335 emit_byte(0); 1336 } 1337} 1338 1339void Assembler::jmp(Address adr) { 1340 InstructionMark im(this); 1341 prefix(adr); 1342 emit_byte(0xFF); 1343 emit_operand(rsp, adr); 1344} 1345 1346void Assembler::jmp(Label& L, relocInfo::relocType rtype) { 1347 if (L.is_bound()) { 1348 address entry = target(L); 1349 assert(entry != NULL, "jmp most probably wrong"); 1350 InstructionMark im(this); 1351 const int short_size = 2; 1352 const int long_size = 5; 1353 intptr_t offs = entry - _code_pos; 1354 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1355 emit_byte(0xEB); 1356 emit_byte((offs - short_size) & 0xFF); 1357 } else { 1358 emit_byte(0xE9); 1359 emit_long(offs - long_size); 1360 } 1361 } else { 1362 // By default, forward jumps are always 32-bit displacements, since 1363 // we can't yet know where the label will be bound. If you're sure that 1364 // the forward jump will not run beyond 256 bytes, use jmpb to 1365 // force an 8-bit displacement. 1366 InstructionMark im(this); 1367 relocate(rtype); 1368 L.add_patch_at(code(), locator()); 1369 emit_byte(0xE9); 1370 emit_long(0); 1371 } 1372} 1373 1374void Assembler::jmp(Register entry) { 1375 int encode = prefix_and_encode(entry->encoding()); 1376 emit_byte(0xFF); 1377 emit_byte(0xE0 | encode); 1378} 1379 1380void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1381 InstructionMark im(this); 1382 emit_byte(0xE9); 1383 assert(dest != NULL, "must have a target"); 1384 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1385 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1386 emit_data(disp, rspec.reloc(), call32_operand); 1387} 1388 1389void Assembler::jmpb(Label& L) { 1390 if (L.is_bound()) { 1391 const int short_size = 2; 1392 address entry = target(L); 1393 assert(is8bit((entry - _code_pos) + short_size), 1394 "Dispacement too large for a short jmp"); 1395 assert(entry != NULL, "jmp most probably wrong"); 1396 intptr_t offs = entry - _code_pos; 1397 emit_byte(0xEB); 1398 emit_byte((offs - short_size) & 0xFF); 1399 } else { 1400 InstructionMark im(this); 1401 L.add_patch_at(code(), locator()); 1402 emit_byte(0xEB); 1403 emit_byte(0); 1404 } 1405} 1406 1407void Assembler::ldmxcsr( Address src) { 1408 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1409 InstructionMark im(this); 1410 prefix(src); 1411 emit_byte(0x0F); 1412 emit_byte(0xAE); 1413 emit_operand(as_Register(2), src); 1414} 1415 1416void Assembler::leal(Register dst, Address src) { 1417 InstructionMark im(this); 1418#ifdef _LP64 1419 emit_byte(0x67); // addr32 1420 prefix(src, dst); 1421#endif // LP64 1422 emit_byte(0x8D); 1423 emit_operand(dst, src); 1424} 1425 1426void Assembler::lock() { 1427 if (Atomics & 1) { 1428 // Emit either nothing, a NOP, or a NOP: prefix 1429 emit_byte(0x90) ; 1430 } else { 1431 emit_byte(0xF0); 1432 } 1433} 1434 1435// Serializes memory. 1436void Assembler::mfence() { 1437 // Memory barriers are only needed on multiprocessors 1438 if (os::is_MP()) { 1439 if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) { 1440 emit_byte( 0x0F ); // MFENCE; faster blows no regs 1441 emit_byte( 0xAE ); 1442 emit_byte( 0xF0 ); 1443 } else { 1444 // All usable chips support "locked" instructions which suffice 1445 // as barriers, and are much faster than the alternative of 1446 // using cpuid instruction. We use here a locked add [esp],0. 1447 // This is conveniently otherwise a no-op except for blowing 1448 // flags (which we save and restore.) 1449 pushf(); // Save eflags register 1450 lock(); 1451 addl(Address(rsp, 0), 0);// Assert the lock# signal here 1452 popf(); // Restore eflags register 1453 } 1454 } 1455} 1456 1457void Assembler::mov(Register dst, Register src) { 1458 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1459} 1460 1461void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1462 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1463 int dstenc = dst->encoding(); 1464 int srcenc = src->encoding(); 1465 emit_byte(0x66); 1466 if (dstenc < 8) { 1467 if (srcenc >= 8) { 1468 prefix(REX_B); 1469 srcenc -= 8; 1470 } 1471 } else { 1472 if (srcenc < 8) { 1473 prefix(REX_R); 1474 } else { 1475 prefix(REX_RB); 1476 srcenc -= 8; 1477 } 1478 dstenc -= 8; 1479 } 1480 emit_byte(0x0F); 1481 emit_byte(0x28); 1482 emit_byte(0xC0 | dstenc << 3 | srcenc); 1483} 1484 1485void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1486 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1487 int dstenc = dst->encoding(); 1488 int srcenc = src->encoding(); 1489 if (dstenc < 8) { 1490 if (srcenc >= 8) { 1491 prefix(REX_B); 1492 srcenc -= 8; 1493 } 1494 } else { 1495 if (srcenc < 8) { 1496 prefix(REX_R); 1497 } else { 1498 prefix(REX_RB); 1499 srcenc -= 8; 1500 } 1501 dstenc -= 8; 1502 } 1503 emit_byte(0x0F); 1504 emit_byte(0x28); 1505 emit_byte(0xC0 | dstenc << 3 | srcenc); 1506} 1507 1508void Assembler::movb(Register dst, Address src) { 1509 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1510 InstructionMark im(this); 1511 prefix(src, dst, true); 1512 emit_byte(0x8A); 1513 emit_operand(dst, src); 1514} 1515 1516 1517void Assembler::movb(Address dst, int imm8) { 1518 InstructionMark im(this); 1519 prefix(dst); 1520 emit_byte(0xC6); 1521 emit_operand(rax, dst, 1); 1522 emit_byte(imm8); 1523} 1524 1525 1526void Assembler::movb(Address dst, Register src) { 1527 assert(src->has_byte_register(), "must have byte register"); 1528 InstructionMark im(this); 1529 prefix(dst, src, true); 1530 emit_byte(0x88); 1531 emit_operand(src, dst); 1532} 1533 1534void Assembler::movdl(XMMRegister dst, Register src) { 1535 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1536 emit_byte(0x66); 1537 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1538 emit_byte(0x0F); 1539 emit_byte(0x6E); 1540 emit_byte(0xC0 | encode); 1541} 1542 1543void Assembler::movdl(Register dst, XMMRegister src) { 1544 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1545 emit_byte(0x66); 1546 // swap src/dst to get correct prefix 1547 int encode = prefix_and_encode(src->encoding(), dst->encoding()); 1548 emit_byte(0x0F); 1549 emit_byte(0x7E); 1550 emit_byte(0xC0 | encode); 1551} 1552 1553void Assembler::movdqa(XMMRegister dst, Address src) { 1554 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1555 InstructionMark im(this); 1556 emit_byte(0x66); 1557 prefix(src, dst); 1558 emit_byte(0x0F); 1559 emit_byte(0x6F); 1560 emit_operand(dst, src); 1561} 1562 1563void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1564 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1565 emit_byte(0x66); 1566 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1567 emit_byte(0x0F); 1568 emit_byte(0x6F); 1569 emit_byte(0xC0 | encode); 1570} 1571 1572void Assembler::movdqa(Address dst, XMMRegister src) { 1573 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1574 InstructionMark im(this); 1575 emit_byte(0x66); 1576 prefix(dst, src); 1577 emit_byte(0x0F); 1578 emit_byte(0x7F); 1579 emit_operand(src, dst); 1580} 1581 1582void Assembler::movdqu(XMMRegister dst, Address src) { 1583 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1584 InstructionMark im(this); 1585 emit_byte(0xF3); 1586 prefix(src, dst); 1587 emit_byte(0x0F); 1588 emit_byte(0x6F); 1589 emit_operand(dst, src); 1590} 1591 1592void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1593 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1594 emit_byte(0xF3); 1595 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1596 emit_byte(0x0F); 1597 emit_byte(0x6F); 1598 emit_byte(0xC0 | encode); 1599} 1600 1601void Assembler::movdqu(Address dst, XMMRegister src) { 1602 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1603 InstructionMark im(this); 1604 emit_byte(0xF3); 1605 prefix(dst, src); 1606 emit_byte(0x0F); 1607 emit_byte(0x7F); 1608 emit_operand(src, dst); 1609} 1610 1611// Uses zero extension on 64bit 1612 1613void Assembler::movl(Register dst, int32_t imm32) { 1614 int encode = prefix_and_encode(dst->encoding()); 1615 emit_byte(0xB8 | encode); 1616 emit_long(imm32); 1617} 1618 1619void Assembler::movl(Register dst, Register src) { 1620 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1621 emit_byte(0x8B); 1622 emit_byte(0xC0 | encode); 1623} 1624 1625void Assembler::movl(Register dst, Address src) { 1626 InstructionMark im(this); 1627 prefix(src, dst); 1628 emit_byte(0x8B); 1629 emit_operand(dst, src); 1630} 1631 1632void Assembler::movl(Address dst, int32_t imm32) { 1633 InstructionMark im(this); 1634 prefix(dst); 1635 emit_byte(0xC7); 1636 emit_operand(rax, dst, 4); 1637 emit_long(imm32); 1638} 1639 1640void Assembler::movl(Address dst, Register src) { 1641 InstructionMark im(this); 1642 prefix(dst, src); 1643 emit_byte(0x89); 1644 emit_operand(src, dst); 1645} 1646 1647// New cpus require to use movsd and movss to avoid partial register stall 1648// when loading from memory. But for old Opteron use movlpd instead of movsd. 1649// The selection is done in MacroAssembler::movdbl() and movflt(). 1650void Assembler::movlpd(XMMRegister dst, Address src) { 1651 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1652 InstructionMark im(this); 1653 emit_byte(0x66); 1654 prefix(src, dst); 1655 emit_byte(0x0F); 1656 emit_byte(0x12); 1657 emit_operand(dst, src); 1658} 1659 1660void Assembler::movq( MMXRegister dst, Address src ) { 1661 assert( VM_Version::supports_mmx(), "" ); 1662 emit_byte(0x0F); 1663 emit_byte(0x6F); 1664 emit_operand(dst, src); 1665} 1666 1667void Assembler::movq( Address dst, MMXRegister src ) { 1668 assert( VM_Version::supports_mmx(), "" ); 1669 emit_byte(0x0F); 1670 emit_byte(0x7F); 1671 // workaround gcc (3.2.1-7a) bug 1672 // In that version of gcc with only an emit_operand(MMX, Address) 1673 // gcc will tail jump and try and reverse the parameters completely 1674 // obliterating dst in the process. By having a version available 1675 // that doesn't need to swap the args at the tail jump the bug is 1676 // avoided. 1677 emit_operand(dst, src); 1678} 1679 1680void Assembler::movq(XMMRegister dst, Address src) { 1681 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1682 InstructionMark im(this); 1683 emit_byte(0xF3); 1684 prefix(src, dst); 1685 emit_byte(0x0F); 1686 emit_byte(0x7E); 1687 emit_operand(dst, src); 1688} 1689 1690void Assembler::movq(Address dst, XMMRegister src) { 1691 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1692 InstructionMark im(this); 1693 emit_byte(0x66); 1694 prefix(dst, src); 1695 emit_byte(0x0F); 1696 emit_byte(0xD6); 1697 emit_operand(src, dst); 1698} 1699 1700void Assembler::movsbl(Register dst, Address src) { // movsxb 1701 InstructionMark im(this); 1702 prefix(src, dst); 1703 emit_byte(0x0F); 1704 emit_byte(0xBE); 1705 emit_operand(dst, src); 1706} 1707 1708void Assembler::movsbl(Register dst, Register src) { // movsxb 1709 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1710 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1711 emit_byte(0x0F); 1712 emit_byte(0xBE); 1713 emit_byte(0xC0 | encode); 1714} 1715 1716void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1717 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1718 emit_byte(0xF2); 1719 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1720 emit_byte(0x0F); 1721 emit_byte(0x10); 1722 emit_byte(0xC0 | encode); 1723} 1724 1725void Assembler::movsd(XMMRegister dst, Address src) { 1726 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1727 InstructionMark im(this); 1728 emit_byte(0xF2); 1729 prefix(src, dst); 1730 emit_byte(0x0F); 1731 emit_byte(0x10); 1732 emit_operand(dst, src); 1733} 1734 1735void Assembler::movsd(Address dst, XMMRegister src) { 1736 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1737 InstructionMark im(this); 1738 emit_byte(0xF2); 1739 prefix(dst, src); 1740 emit_byte(0x0F); 1741 emit_byte(0x11); 1742 emit_operand(src, dst); 1743} 1744 1745void Assembler::movss(XMMRegister dst, XMMRegister src) { 1746 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1747 emit_byte(0xF3); 1748 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1749 emit_byte(0x0F); 1750 emit_byte(0x10); 1751 emit_byte(0xC0 | encode); 1752} 1753 1754void Assembler::movss(XMMRegister dst, Address src) { 1755 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1756 InstructionMark im(this); 1757 emit_byte(0xF3); 1758 prefix(src, dst); 1759 emit_byte(0x0F); 1760 emit_byte(0x10); 1761 emit_operand(dst, src); 1762} 1763 1764void Assembler::movss(Address dst, XMMRegister src) { 1765 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1766 InstructionMark im(this); 1767 emit_byte(0xF3); 1768 prefix(dst, src); 1769 emit_byte(0x0F); 1770 emit_byte(0x11); 1771 emit_operand(src, dst); 1772} 1773 1774void Assembler::movswl(Register dst, Address src) { // movsxw 1775 InstructionMark im(this); 1776 prefix(src, dst); 1777 emit_byte(0x0F); 1778 emit_byte(0xBF); 1779 emit_operand(dst, src); 1780} 1781 1782void Assembler::movswl(Register dst, Register src) { // movsxw 1783 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1784 emit_byte(0x0F); 1785 emit_byte(0xBF); 1786 emit_byte(0xC0 | encode); 1787} 1788 1789void Assembler::movw(Address dst, int imm16) { 1790 InstructionMark im(this); 1791 1792 emit_byte(0x66); // switch to 16-bit mode 1793 prefix(dst); 1794 emit_byte(0xC7); 1795 emit_operand(rax, dst, 2); 1796 emit_word(imm16); 1797} 1798 1799void Assembler::movw(Register dst, Address src) { 1800 InstructionMark im(this); 1801 emit_byte(0x66); 1802 prefix(src, dst); 1803 emit_byte(0x8B); 1804 emit_operand(dst, src); 1805} 1806 1807void Assembler::movw(Address dst, Register src) { 1808 InstructionMark im(this); 1809 emit_byte(0x66); 1810 prefix(dst, src); 1811 emit_byte(0x89); 1812 emit_operand(src, dst); 1813} 1814 1815void Assembler::movzbl(Register dst, Address src) { // movzxb 1816 InstructionMark im(this); 1817 prefix(src, dst); 1818 emit_byte(0x0F); 1819 emit_byte(0xB6); 1820 emit_operand(dst, src); 1821} 1822 1823void Assembler::movzbl(Register dst, Register src) { // movzxb 1824 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1825 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1826 emit_byte(0x0F); 1827 emit_byte(0xB6); 1828 emit_byte(0xC0 | encode); 1829} 1830 1831void Assembler::movzwl(Register dst, Address src) { // movzxw 1832 InstructionMark im(this); 1833 prefix(src, dst); 1834 emit_byte(0x0F); 1835 emit_byte(0xB7); 1836 emit_operand(dst, src); 1837} 1838 1839void Assembler::movzwl(Register dst, Register src) { // movzxw 1840 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1841 emit_byte(0x0F); 1842 emit_byte(0xB7); 1843 emit_byte(0xC0 | encode); 1844} 1845 1846void Assembler::mull(Address src) { 1847 InstructionMark im(this); 1848 prefix(src); 1849 emit_byte(0xF7); 1850 emit_operand(rsp, src); 1851} 1852 1853void Assembler::mull(Register src) { 1854 int encode = prefix_and_encode(src->encoding()); 1855 emit_byte(0xF7); 1856 emit_byte(0xE0 | encode); 1857} 1858 1859void Assembler::mulsd(XMMRegister dst, Address src) { 1860 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1861 InstructionMark im(this); 1862 emit_byte(0xF2); 1863 prefix(src, dst); 1864 emit_byte(0x0F); 1865 emit_byte(0x59); 1866 emit_operand(dst, src); 1867} 1868 1869void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1870 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1871 emit_byte(0xF2); 1872 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1873 emit_byte(0x0F); 1874 emit_byte(0x59); 1875 emit_byte(0xC0 | encode); 1876} 1877 1878void Assembler::mulss(XMMRegister dst, Address src) { 1879 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1880 InstructionMark im(this); 1881 emit_byte(0xF3); 1882 prefix(src, dst); 1883 emit_byte(0x0F); 1884 emit_byte(0x59); 1885 emit_operand(dst, src); 1886} 1887 1888void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1889 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1890 emit_byte(0xF3); 1891 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1892 emit_byte(0x0F); 1893 emit_byte(0x59); 1894 emit_byte(0xC0 | encode); 1895} 1896 1897void Assembler::negl(Register dst) { 1898 int encode = prefix_and_encode(dst->encoding()); 1899 emit_byte(0xF7); 1900 emit_byte(0xD8 | encode); 1901} 1902 1903void Assembler::nop(int i) { 1904#ifdef ASSERT 1905 assert(i > 0, " "); 1906 // The fancy nops aren't currently recognized by debuggers making it a 1907 // pain to disassemble code while debugging. If asserts are on clearly 1908 // speed is not an issue so simply use the single byte traditional nop 1909 // to do alignment. 1910 1911 for (; i > 0 ; i--) emit_byte(0x90); 1912 return; 1913 1914#endif // ASSERT 1915 1916 if (UseAddressNop && VM_Version::is_intel()) { 1917 // 1918 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1919 // 1: 0x90 1920 // 2: 0x66 0x90 1921 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1922 // 4: 0x0F 0x1F 0x40 0x00 1923 // 5: 0x0F 0x1F 0x44 0x00 0x00 1924 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1925 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1926 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1927 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1928 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1929 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1930 1931 // The rest coding is Intel specific - don't use consecutive address nops 1932 1933 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1934 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1935 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1936 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1937 1938 while(i >= 15) { 1939 // For Intel don't generate consecutive addess nops (mix with regular nops) 1940 i -= 15; 1941 emit_byte(0x66); // size prefix 1942 emit_byte(0x66); // size prefix 1943 emit_byte(0x66); // size prefix 1944 addr_nop_8(); 1945 emit_byte(0x66); // size prefix 1946 emit_byte(0x66); // size prefix 1947 emit_byte(0x66); // size prefix 1948 emit_byte(0x90); // nop 1949 } 1950 switch (i) { 1951 case 14: 1952 emit_byte(0x66); // size prefix 1953 case 13: 1954 emit_byte(0x66); // size prefix 1955 case 12: 1956 addr_nop_8(); 1957 emit_byte(0x66); // size prefix 1958 emit_byte(0x66); // size prefix 1959 emit_byte(0x66); // size prefix 1960 emit_byte(0x90); // nop 1961 break; 1962 case 11: 1963 emit_byte(0x66); // size prefix 1964 case 10: 1965 emit_byte(0x66); // size prefix 1966 case 9: 1967 emit_byte(0x66); // size prefix 1968 case 8: 1969 addr_nop_8(); 1970 break; 1971 case 7: 1972 addr_nop_7(); 1973 break; 1974 case 6: 1975 emit_byte(0x66); // size prefix 1976 case 5: 1977 addr_nop_5(); 1978 break; 1979 case 4: 1980 addr_nop_4(); 1981 break; 1982 case 3: 1983 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1984 emit_byte(0x66); // size prefix 1985 case 2: 1986 emit_byte(0x66); // size prefix 1987 case 1: 1988 emit_byte(0x90); // nop 1989 break; 1990 default: 1991 assert(i == 0, " "); 1992 } 1993 return; 1994 } 1995 if (UseAddressNop && VM_Version::is_amd()) { 1996 // 1997 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 1998 // 1: 0x90 1999 // 2: 0x66 0x90 2000 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2001 // 4: 0x0F 0x1F 0x40 0x00 2002 // 5: 0x0F 0x1F 0x44 0x00 0x00 2003 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2004 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2005 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2006 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2007 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2008 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2009 2010 // The rest coding is AMD specific - use consecutive address nops 2011 2012 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2013 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2014 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2015 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2016 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2017 // Size prefixes (0x66) are added for larger sizes 2018 2019 while(i >= 22) { 2020 i -= 11; 2021 emit_byte(0x66); // size prefix 2022 emit_byte(0x66); // size prefix 2023 emit_byte(0x66); // size prefix 2024 addr_nop_8(); 2025 } 2026 // Generate first nop for size between 21-12 2027 switch (i) { 2028 case 21: 2029 i -= 1; 2030 emit_byte(0x66); // size prefix 2031 case 20: 2032 case 19: 2033 i -= 1; 2034 emit_byte(0x66); // size prefix 2035 case 18: 2036 case 17: 2037 i -= 1; 2038 emit_byte(0x66); // size prefix 2039 case 16: 2040 case 15: 2041 i -= 8; 2042 addr_nop_8(); 2043 break; 2044 case 14: 2045 case 13: 2046 i -= 7; 2047 addr_nop_7(); 2048 break; 2049 case 12: 2050 i -= 6; 2051 emit_byte(0x66); // size prefix 2052 addr_nop_5(); 2053 break; 2054 default: 2055 assert(i < 12, " "); 2056 } 2057 2058 // Generate second nop for size between 11-1 2059 switch (i) { 2060 case 11: 2061 emit_byte(0x66); // size prefix 2062 case 10: 2063 emit_byte(0x66); // size prefix 2064 case 9: 2065 emit_byte(0x66); // size prefix 2066 case 8: 2067 addr_nop_8(); 2068 break; 2069 case 7: 2070 addr_nop_7(); 2071 break; 2072 case 6: 2073 emit_byte(0x66); // size prefix 2074 case 5: 2075 addr_nop_5(); 2076 break; 2077 case 4: 2078 addr_nop_4(); 2079 break; 2080 case 3: 2081 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2082 emit_byte(0x66); // size prefix 2083 case 2: 2084 emit_byte(0x66); // size prefix 2085 case 1: 2086 emit_byte(0x90); // nop 2087 break; 2088 default: 2089 assert(i == 0, " "); 2090 } 2091 return; 2092 } 2093 2094 // Using nops with size prefixes "0x66 0x90". 2095 // From AMD Optimization Guide: 2096 // 1: 0x90 2097 // 2: 0x66 0x90 2098 // 3: 0x66 0x66 0x90 2099 // 4: 0x66 0x66 0x66 0x90 2100 // 5: 0x66 0x66 0x90 0x66 0x90 2101 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2102 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2103 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2104 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2105 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2106 // 2107 while(i > 12) { 2108 i -= 4; 2109 emit_byte(0x66); // size prefix 2110 emit_byte(0x66); 2111 emit_byte(0x66); 2112 emit_byte(0x90); // nop 2113 } 2114 // 1 - 12 nops 2115 if(i > 8) { 2116 if(i > 9) { 2117 i -= 1; 2118 emit_byte(0x66); 2119 } 2120 i -= 3; 2121 emit_byte(0x66); 2122 emit_byte(0x66); 2123 emit_byte(0x90); 2124 } 2125 // 1 - 8 nops 2126 if(i > 4) { 2127 if(i > 6) { 2128 i -= 1; 2129 emit_byte(0x66); 2130 } 2131 i -= 3; 2132 emit_byte(0x66); 2133 emit_byte(0x66); 2134 emit_byte(0x90); 2135 } 2136 switch (i) { 2137 case 4: 2138 emit_byte(0x66); 2139 case 3: 2140 emit_byte(0x66); 2141 case 2: 2142 emit_byte(0x66); 2143 case 1: 2144 emit_byte(0x90); 2145 break; 2146 default: 2147 assert(i == 0, " "); 2148 } 2149} 2150 2151void Assembler::notl(Register dst) { 2152 int encode = prefix_and_encode(dst->encoding()); 2153 emit_byte(0xF7); 2154 emit_byte(0xD0 | encode ); 2155} 2156 2157void Assembler::orl(Address dst, int32_t imm32) { 2158 InstructionMark im(this); 2159 prefix(dst); 2160 emit_byte(0x81); 2161 emit_operand(rcx, dst, 4); 2162 emit_long(imm32); 2163} 2164 2165void Assembler::orl(Register dst, int32_t imm32) { 2166 prefix(dst); 2167 emit_arith(0x81, 0xC8, dst, imm32); 2168} 2169 2170 2171void Assembler::orl(Register dst, Address src) { 2172 InstructionMark im(this); 2173 prefix(src, dst); 2174 emit_byte(0x0B); 2175 emit_operand(dst, src); 2176} 2177 2178 2179void Assembler::orl(Register dst, Register src) { 2180 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2181 emit_arith(0x0B, 0xC0, dst, src); 2182} 2183 2184// generic 2185void Assembler::pop(Register dst) { 2186 int encode = prefix_and_encode(dst->encoding()); 2187 emit_byte(0x58 | encode); 2188} 2189 2190void Assembler::popf() { 2191 emit_byte(0x9D); 2192} 2193 2194void Assembler::popl(Address dst) { 2195 // NOTE: this will adjust stack by 8byte on 64bits 2196 InstructionMark im(this); 2197 prefix(dst); 2198 emit_byte(0x8F); 2199 emit_operand(rax, dst); 2200} 2201 2202void Assembler::prefetch_prefix(Address src) { 2203 prefix(src); 2204 emit_byte(0x0F); 2205} 2206 2207void Assembler::prefetchnta(Address src) { 2208 NOT_LP64(assert(VM_Version::supports_sse2(), "must support")); 2209 InstructionMark im(this); 2210 prefetch_prefix(src); 2211 emit_byte(0x18); 2212 emit_operand(rax, src); // 0, src 2213} 2214 2215void Assembler::prefetchr(Address src) { 2216 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support")); 2217 InstructionMark im(this); 2218 prefetch_prefix(src); 2219 emit_byte(0x0D); 2220 emit_operand(rax, src); // 0, src 2221} 2222 2223void Assembler::prefetcht0(Address src) { 2224 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2225 InstructionMark im(this); 2226 prefetch_prefix(src); 2227 emit_byte(0x18); 2228 emit_operand(rcx, src); // 1, src 2229} 2230 2231void Assembler::prefetcht1(Address src) { 2232 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2233 InstructionMark im(this); 2234 prefetch_prefix(src); 2235 emit_byte(0x18); 2236 emit_operand(rdx, src); // 2, src 2237} 2238 2239void Assembler::prefetcht2(Address src) { 2240 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2241 InstructionMark im(this); 2242 prefetch_prefix(src); 2243 emit_byte(0x18); 2244 emit_operand(rbx, src); // 3, src 2245} 2246 2247void Assembler::prefetchw(Address src) { 2248 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support")); 2249 InstructionMark im(this); 2250 prefetch_prefix(src); 2251 emit_byte(0x0D); 2252 emit_operand(rcx, src); // 1, src 2253} 2254 2255void Assembler::prefix(Prefix p) { 2256 a_byte(p); 2257} 2258 2259void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2260 assert(isByte(mode), "invalid value"); 2261 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2262 2263 emit_byte(0x66); 2264 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2265 emit_byte(0x0F); 2266 emit_byte(0x70); 2267 emit_byte(0xC0 | encode); 2268 emit_byte(mode & 0xFF); 2269 2270} 2271 2272void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2273 assert(isByte(mode), "invalid value"); 2274 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2275 2276 InstructionMark im(this); 2277 emit_byte(0x66); 2278 prefix(src, dst); 2279 emit_byte(0x0F); 2280 emit_byte(0x70); 2281 emit_operand(dst, src); 2282 emit_byte(mode & 0xFF); 2283} 2284 2285void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2286 assert(isByte(mode), "invalid value"); 2287 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2288 2289 emit_byte(0xF2); 2290 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2291 emit_byte(0x0F); 2292 emit_byte(0x70); 2293 emit_byte(0xC0 | encode); 2294 emit_byte(mode & 0xFF); 2295} 2296 2297void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2298 assert(isByte(mode), "invalid value"); 2299 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2300 2301 InstructionMark im(this); 2302 emit_byte(0xF2); 2303 prefix(src, dst); // QQ new 2304 emit_byte(0x0F); 2305 emit_byte(0x70); 2306 emit_operand(dst, src); 2307 emit_byte(mode & 0xFF); 2308} 2309 2310void Assembler::psrlq(XMMRegister dst, int shift) { 2311 // HMM Table D-1 says sse2 or mmx 2312 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2313 2314 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); 2315 emit_byte(0x66); 2316 emit_byte(0x0F); 2317 emit_byte(0x73); 2318 emit_byte(0xC0 | encode); 2319 emit_byte(shift); 2320} 2321 2322void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2323 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2324 emit_byte(0x66); 2325 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2326 emit_byte(0x0F); 2327 emit_byte(0x60); 2328 emit_byte(0xC0 | encode); 2329} 2330 2331void Assembler::push(int32_t imm32) { 2332 // in 64bits we push 64bits onto the stack but only 2333 // take a 32bit immediate 2334 emit_byte(0x68); 2335 emit_long(imm32); 2336} 2337 2338void Assembler::push(Register src) { 2339 int encode = prefix_and_encode(src->encoding()); 2340 2341 emit_byte(0x50 | encode); 2342} 2343 2344void Assembler::pushf() { 2345 emit_byte(0x9C); 2346} 2347 2348void Assembler::pushl(Address src) { 2349 // Note this will push 64bit on 64bit 2350 InstructionMark im(this); 2351 prefix(src); 2352 emit_byte(0xFF); 2353 emit_operand(rsi, src); 2354} 2355 2356void Assembler::pxor(XMMRegister dst, Address src) { 2357 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2358 InstructionMark im(this); 2359 emit_byte(0x66); 2360 prefix(src, dst); 2361 emit_byte(0x0F); 2362 emit_byte(0xEF); 2363 emit_operand(dst, src); 2364} 2365 2366void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2367 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2368 InstructionMark im(this); 2369 emit_byte(0x66); 2370 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2371 emit_byte(0x0F); 2372 emit_byte(0xEF); 2373 emit_byte(0xC0 | encode); 2374} 2375 2376void Assembler::rcll(Register dst, int imm8) { 2377 assert(isShiftCount(imm8), "illegal shift count"); 2378 int encode = prefix_and_encode(dst->encoding()); 2379 if (imm8 == 1) { 2380 emit_byte(0xD1); 2381 emit_byte(0xD0 | encode); 2382 } else { 2383 emit_byte(0xC1); 2384 emit_byte(0xD0 | encode); 2385 emit_byte(imm8); 2386 } 2387} 2388 2389// copies data from [esi] to [edi] using rcx pointer sized words 2390// generic 2391void Assembler::rep_mov() { 2392 emit_byte(0xF3); 2393 // MOVSQ 2394 LP64_ONLY(prefix(REX_W)); 2395 emit_byte(0xA5); 2396} 2397 2398// sets rcx pointer sized words with rax, value at [edi] 2399// generic 2400void Assembler::rep_set() { // rep_set 2401 emit_byte(0xF3); 2402 // STOSQ 2403 LP64_ONLY(prefix(REX_W)); 2404 emit_byte(0xAB); 2405} 2406 2407// scans rcx pointer sized words at [edi] for occurance of rax, 2408// generic 2409void Assembler::repne_scan() { // repne_scan 2410 emit_byte(0xF2); 2411 // SCASQ 2412 LP64_ONLY(prefix(REX_W)); 2413 emit_byte(0xAF); 2414} 2415 2416#ifdef _LP64 2417// scans rcx 4 byte words at [edi] for occurance of rax, 2418// generic 2419void Assembler::repne_scanl() { // repne_scan 2420 emit_byte(0xF2); 2421 // SCASL 2422 emit_byte(0xAF); 2423} 2424#endif 2425 2426void Assembler::ret(int imm16) { 2427 if (imm16 == 0) { 2428 emit_byte(0xC3); 2429 } else { 2430 emit_byte(0xC2); 2431 emit_word(imm16); 2432 } 2433} 2434 2435void Assembler::sahf() { 2436#ifdef _LP64 2437 // Not supported in 64bit mode 2438 ShouldNotReachHere(); 2439#endif 2440 emit_byte(0x9E); 2441} 2442 2443void Assembler::sarl(Register dst, int imm8) { 2444 int encode = prefix_and_encode(dst->encoding()); 2445 assert(isShiftCount(imm8), "illegal shift count"); 2446 if (imm8 == 1) { 2447 emit_byte(0xD1); 2448 emit_byte(0xF8 | encode); 2449 } else { 2450 emit_byte(0xC1); 2451 emit_byte(0xF8 | encode); 2452 emit_byte(imm8); 2453 } 2454} 2455 2456void Assembler::sarl(Register dst) { 2457 int encode = prefix_and_encode(dst->encoding()); 2458 emit_byte(0xD3); 2459 emit_byte(0xF8 | encode); 2460} 2461 2462void Assembler::sbbl(Address dst, int32_t imm32) { 2463 InstructionMark im(this); 2464 prefix(dst); 2465 emit_arith_operand(0x81, rbx, dst, imm32); 2466} 2467 2468void Assembler::sbbl(Register dst, int32_t imm32) { 2469 prefix(dst); 2470 emit_arith(0x81, 0xD8, dst, imm32); 2471} 2472 2473 2474void Assembler::sbbl(Register dst, Address src) { 2475 InstructionMark im(this); 2476 prefix(src, dst); 2477 emit_byte(0x1B); 2478 emit_operand(dst, src); 2479} 2480 2481void Assembler::sbbl(Register dst, Register src) { 2482 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2483 emit_arith(0x1B, 0xC0, dst, src); 2484} 2485 2486void Assembler::setb(Condition cc, Register dst) { 2487 assert(0 <= cc && cc < 16, "illegal cc"); 2488 int encode = prefix_and_encode(dst->encoding(), true); 2489 emit_byte(0x0F); 2490 emit_byte(0x90 | cc); 2491 emit_byte(0xC0 | encode); 2492} 2493 2494void Assembler::shll(Register dst, int imm8) { 2495 assert(isShiftCount(imm8), "illegal shift count"); 2496 int encode = prefix_and_encode(dst->encoding()); 2497 if (imm8 == 1 ) { 2498 emit_byte(0xD1); 2499 emit_byte(0xE0 | encode); 2500 } else { 2501 emit_byte(0xC1); 2502 emit_byte(0xE0 | encode); 2503 emit_byte(imm8); 2504 } 2505} 2506 2507void Assembler::shll(Register dst) { 2508 int encode = prefix_and_encode(dst->encoding()); 2509 emit_byte(0xD3); 2510 emit_byte(0xE0 | encode); 2511} 2512 2513void Assembler::shrl(Register dst, int imm8) { 2514 assert(isShiftCount(imm8), "illegal shift count"); 2515 int encode = prefix_and_encode(dst->encoding()); 2516 emit_byte(0xC1); 2517 emit_byte(0xE8 | encode); 2518 emit_byte(imm8); 2519} 2520 2521void Assembler::shrl(Register dst) { 2522 int encode = prefix_and_encode(dst->encoding()); 2523 emit_byte(0xD3); 2524 emit_byte(0xE8 | encode); 2525} 2526 2527// copies a single word from [esi] to [edi] 2528void Assembler::smovl() { 2529 emit_byte(0xA5); 2530} 2531 2532void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2533 // HMM Table D-1 says sse2 2534 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2535 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2536 emit_byte(0xF2); 2537 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2538 emit_byte(0x0F); 2539 emit_byte(0x51); 2540 emit_byte(0xC0 | encode); 2541} 2542 2543void Assembler::stmxcsr( Address dst) { 2544 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2545 InstructionMark im(this); 2546 prefix(dst); 2547 emit_byte(0x0F); 2548 emit_byte(0xAE); 2549 emit_operand(as_Register(3), dst); 2550} 2551 2552void Assembler::subl(Address dst, int32_t imm32) { 2553 InstructionMark im(this); 2554 prefix(dst); 2555 if (is8bit(imm32)) { 2556 emit_byte(0x83); 2557 emit_operand(rbp, dst, 1); 2558 emit_byte(imm32 & 0xFF); 2559 } else { 2560 emit_byte(0x81); 2561 emit_operand(rbp, dst, 4); 2562 emit_long(imm32); 2563 } 2564} 2565 2566void Assembler::subl(Register dst, int32_t imm32) { 2567 prefix(dst); 2568 emit_arith(0x81, 0xE8, dst, imm32); 2569} 2570 2571void Assembler::subl(Address dst, Register src) { 2572 InstructionMark im(this); 2573 prefix(dst, src); 2574 emit_byte(0x29); 2575 emit_operand(src, dst); 2576} 2577 2578void Assembler::subl(Register dst, Address src) { 2579 InstructionMark im(this); 2580 prefix(src, dst); 2581 emit_byte(0x2B); 2582 emit_operand(dst, src); 2583} 2584 2585void Assembler::subl(Register dst, Register src) { 2586 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2587 emit_arith(0x2B, 0xC0, dst, src); 2588} 2589 2590void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2591 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2592 emit_byte(0xF2); 2593 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2594 emit_byte(0x0F); 2595 emit_byte(0x5C); 2596 emit_byte(0xC0 | encode); 2597} 2598 2599void Assembler::subsd(XMMRegister dst, Address src) { 2600 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2601 InstructionMark im(this); 2602 emit_byte(0xF2); 2603 prefix(src, dst); 2604 emit_byte(0x0F); 2605 emit_byte(0x5C); 2606 emit_operand(dst, src); 2607} 2608 2609void Assembler::subss(XMMRegister dst, XMMRegister src) { 2610 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2611 emit_byte(0xF3); 2612 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2613 emit_byte(0x0F); 2614 emit_byte(0x5C); 2615 emit_byte(0xC0 | encode); 2616} 2617 2618void Assembler::subss(XMMRegister dst, Address src) { 2619 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2620 InstructionMark im(this); 2621 emit_byte(0xF3); 2622 prefix(src, dst); 2623 emit_byte(0x0F); 2624 emit_byte(0x5C); 2625 emit_operand(dst, src); 2626} 2627 2628void Assembler::testb(Register dst, int imm8) { 2629 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2630 (void) prefix_and_encode(dst->encoding(), true); 2631 emit_arith_b(0xF6, 0xC0, dst, imm8); 2632} 2633 2634void Assembler::testl(Register dst, int32_t imm32) { 2635 // not using emit_arith because test 2636 // doesn't support sign-extension of 2637 // 8bit operands 2638 int encode = dst->encoding(); 2639 if (encode == 0) { 2640 emit_byte(0xA9); 2641 } else { 2642 encode = prefix_and_encode(encode); 2643 emit_byte(0xF7); 2644 emit_byte(0xC0 | encode); 2645 } 2646 emit_long(imm32); 2647} 2648 2649void Assembler::testl(Register dst, Register src) { 2650 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2651 emit_arith(0x85, 0xC0, dst, src); 2652} 2653 2654void Assembler::testl(Register dst, Address src) { 2655 InstructionMark im(this); 2656 prefix(src, dst); 2657 emit_byte(0x85); 2658 emit_operand(dst, src); 2659} 2660 2661void Assembler::ucomisd(XMMRegister dst, Address src) { 2662 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2663 emit_byte(0x66); 2664 ucomiss(dst, src); 2665} 2666 2667void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2668 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2669 emit_byte(0x66); 2670 ucomiss(dst, src); 2671} 2672 2673void Assembler::ucomiss(XMMRegister dst, Address src) { 2674 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2675 2676 InstructionMark im(this); 2677 prefix(src, dst); 2678 emit_byte(0x0F); 2679 emit_byte(0x2E); 2680 emit_operand(dst, src); 2681} 2682 2683void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2684 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2685 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2686 emit_byte(0x0F); 2687 emit_byte(0x2E); 2688 emit_byte(0xC0 | encode); 2689} 2690 2691 2692void Assembler::xaddl(Address dst, Register src) { 2693 InstructionMark im(this); 2694 prefix(dst, src); 2695 emit_byte(0x0F); 2696 emit_byte(0xC1); 2697 emit_operand(src, dst); 2698} 2699 2700void Assembler::xchgl(Register dst, Address src) { // xchg 2701 InstructionMark im(this); 2702 prefix(src, dst); 2703 emit_byte(0x87); 2704 emit_operand(dst, src); 2705} 2706 2707void Assembler::xchgl(Register dst, Register src) { 2708 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2709 emit_byte(0x87); 2710 emit_byte(0xc0 | encode); 2711} 2712 2713void Assembler::xorl(Register dst, int32_t imm32) { 2714 prefix(dst); 2715 emit_arith(0x81, 0xF0, dst, imm32); 2716} 2717 2718void Assembler::xorl(Register dst, Address src) { 2719 InstructionMark im(this); 2720 prefix(src, dst); 2721 emit_byte(0x33); 2722 emit_operand(dst, src); 2723} 2724 2725void Assembler::xorl(Register dst, Register src) { 2726 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2727 emit_arith(0x33, 0xC0, dst, src); 2728} 2729 2730void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2731 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2732 emit_byte(0x66); 2733 xorps(dst, src); 2734} 2735 2736void Assembler::xorpd(XMMRegister dst, Address src) { 2737 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2738 InstructionMark im(this); 2739 emit_byte(0x66); 2740 prefix(src, dst); 2741 emit_byte(0x0F); 2742 emit_byte(0x57); 2743 emit_operand(dst, src); 2744} 2745 2746 2747void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2748 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2749 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2750 emit_byte(0x0F); 2751 emit_byte(0x57); 2752 emit_byte(0xC0 | encode); 2753} 2754 2755void Assembler::xorps(XMMRegister dst, Address src) { 2756 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2757 InstructionMark im(this); 2758 prefix(src, dst); 2759 emit_byte(0x0F); 2760 emit_byte(0x57); 2761 emit_operand(dst, src); 2762} 2763 2764#ifndef _LP64 2765// 32bit only pieces of the assembler 2766 2767void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 2768 // NO PREFIX AS NEVER 64BIT 2769 InstructionMark im(this); 2770 emit_byte(0x81); 2771 emit_byte(0xF8 | src1->encoding()); 2772 emit_data(imm32, rspec, 0); 2773} 2774 2775void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 2776 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 2777 InstructionMark im(this); 2778 emit_byte(0x81); 2779 emit_operand(rdi, src1); 2780 emit_data(imm32, rspec, 0); 2781} 2782 2783// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 2784// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 2785// into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 2786void Assembler::cmpxchg8(Address adr) { 2787 InstructionMark im(this); 2788 emit_byte(0x0F); 2789 emit_byte(0xc7); 2790 emit_operand(rcx, adr); 2791} 2792 2793void Assembler::decl(Register dst) { 2794 // Don't use it directly. Use MacroAssembler::decrementl() instead. 2795 emit_byte(0x48 | dst->encoding()); 2796} 2797 2798#endif // _LP64 2799 2800// 64bit typically doesn't use the x87 but needs to for the trig funcs 2801 2802void Assembler::fabs() { 2803 emit_byte(0xD9); 2804 emit_byte(0xE1); 2805} 2806 2807void Assembler::fadd(int i) { 2808 emit_farith(0xD8, 0xC0, i); 2809} 2810 2811void Assembler::fadd_d(Address src) { 2812 InstructionMark im(this); 2813 emit_byte(0xDC); 2814 emit_operand32(rax, src); 2815} 2816 2817void Assembler::fadd_s(Address src) { 2818 InstructionMark im(this); 2819 emit_byte(0xD8); 2820 emit_operand32(rax, src); 2821} 2822 2823void Assembler::fadda(int i) { 2824 emit_farith(0xDC, 0xC0, i); 2825} 2826 2827void Assembler::faddp(int i) { 2828 emit_farith(0xDE, 0xC0, i); 2829} 2830 2831void Assembler::fchs() { 2832 emit_byte(0xD9); 2833 emit_byte(0xE0); 2834} 2835 2836void Assembler::fcom(int i) { 2837 emit_farith(0xD8, 0xD0, i); 2838} 2839 2840void Assembler::fcomp(int i) { 2841 emit_farith(0xD8, 0xD8, i); 2842} 2843 2844void Assembler::fcomp_d(Address src) { 2845 InstructionMark im(this); 2846 emit_byte(0xDC); 2847 emit_operand32(rbx, src); 2848} 2849 2850void Assembler::fcomp_s(Address src) { 2851 InstructionMark im(this); 2852 emit_byte(0xD8); 2853 emit_operand32(rbx, src); 2854} 2855 2856void Assembler::fcompp() { 2857 emit_byte(0xDE); 2858 emit_byte(0xD9); 2859} 2860 2861void Assembler::fcos() { 2862 emit_byte(0xD9); 2863 emit_byte(0xFF); 2864} 2865 2866void Assembler::fdecstp() { 2867 emit_byte(0xD9); 2868 emit_byte(0xF6); 2869} 2870 2871void Assembler::fdiv(int i) { 2872 emit_farith(0xD8, 0xF0, i); 2873} 2874 2875void Assembler::fdiv_d(Address src) { 2876 InstructionMark im(this); 2877 emit_byte(0xDC); 2878 emit_operand32(rsi, src); 2879} 2880 2881void Assembler::fdiv_s(Address src) { 2882 InstructionMark im(this); 2883 emit_byte(0xD8); 2884 emit_operand32(rsi, src); 2885} 2886 2887void Assembler::fdiva(int i) { 2888 emit_farith(0xDC, 0xF8, i); 2889} 2890 2891// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 2892// is erroneous for some of the floating-point instructions below. 2893 2894void Assembler::fdivp(int i) { 2895 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 2896} 2897 2898void Assembler::fdivr(int i) { 2899 emit_farith(0xD8, 0xF8, i); 2900} 2901 2902void Assembler::fdivr_d(Address src) { 2903 InstructionMark im(this); 2904 emit_byte(0xDC); 2905 emit_operand32(rdi, src); 2906} 2907 2908void Assembler::fdivr_s(Address src) { 2909 InstructionMark im(this); 2910 emit_byte(0xD8); 2911 emit_operand32(rdi, src); 2912} 2913 2914void Assembler::fdivra(int i) { 2915 emit_farith(0xDC, 0xF0, i); 2916} 2917 2918void Assembler::fdivrp(int i) { 2919 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 2920} 2921 2922void Assembler::ffree(int i) { 2923 emit_farith(0xDD, 0xC0, i); 2924} 2925 2926void Assembler::fild_d(Address adr) { 2927 InstructionMark im(this); 2928 emit_byte(0xDF); 2929 emit_operand32(rbp, adr); 2930} 2931 2932void Assembler::fild_s(Address adr) { 2933 InstructionMark im(this); 2934 emit_byte(0xDB); 2935 emit_operand32(rax, adr); 2936} 2937 2938void Assembler::fincstp() { 2939 emit_byte(0xD9); 2940 emit_byte(0xF7); 2941} 2942 2943void Assembler::finit() { 2944 emit_byte(0x9B); 2945 emit_byte(0xDB); 2946 emit_byte(0xE3); 2947} 2948 2949void Assembler::fist_s(Address adr) { 2950 InstructionMark im(this); 2951 emit_byte(0xDB); 2952 emit_operand32(rdx, adr); 2953} 2954 2955void Assembler::fistp_d(Address adr) { 2956 InstructionMark im(this); 2957 emit_byte(0xDF); 2958 emit_operand32(rdi, adr); 2959} 2960 2961void Assembler::fistp_s(Address adr) { 2962 InstructionMark im(this); 2963 emit_byte(0xDB); 2964 emit_operand32(rbx, adr); 2965} 2966 2967void Assembler::fld1() { 2968 emit_byte(0xD9); 2969 emit_byte(0xE8); 2970} 2971 2972void Assembler::fld_d(Address adr) { 2973 InstructionMark im(this); 2974 emit_byte(0xDD); 2975 emit_operand32(rax, adr); 2976} 2977 2978void Assembler::fld_s(Address adr) { 2979 InstructionMark im(this); 2980 emit_byte(0xD9); 2981 emit_operand32(rax, adr); 2982} 2983 2984 2985void Assembler::fld_s(int index) { 2986 emit_farith(0xD9, 0xC0, index); 2987} 2988 2989void Assembler::fld_x(Address adr) { 2990 InstructionMark im(this); 2991 emit_byte(0xDB); 2992 emit_operand32(rbp, adr); 2993} 2994 2995void Assembler::fldcw(Address src) { 2996 InstructionMark im(this); 2997 emit_byte(0xd9); 2998 emit_operand32(rbp, src); 2999} 3000 3001void Assembler::fldenv(Address src) { 3002 InstructionMark im(this); 3003 emit_byte(0xD9); 3004 emit_operand32(rsp, src); 3005} 3006 3007void Assembler::fldlg2() { 3008 emit_byte(0xD9); 3009 emit_byte(0xEC); 3010} 3011 3012void Assembler::fldln2() { 3013 emit_byte(0xD9); 3014 emit_byte(0xED); 3015} 3016 3017void Assembler::fldz() { 3018 emit_byte(0xD9); 3019 emit_byte(0xEE); 3020} 3021 3022void Assembler::flog() { 3023 fldln2(); 3024 fxch(); 3025 fyl2x(); 3026} 3027 3028void Assembler::flog10() { 3029 fldlg2(); 3030 fxch(); 3031 fyl2x(); 3032} 3033 3034void Assembler::fmul(int i) { 3035 emit_farith(0xD8, 0xC8, i); 3036} 3037 3038void Assembler::fmul_d(Address src) { 3039 InstructionMark im(this); 3040 emit_byte(0xDC); 3041 emit_operand32(rcx, src); 3042} 3043 3044void Assembler::fmul_s(Address src) { 3045 InstructionMark im(this); 3046 emit_byte(0xD8); 3047 emit_operand32(rcx, src); 3048} 3049 3050void Assembler::fmula(int i) { 3051 emit_farith(0xDC, 0xC8, i); 3052} 3053 3054void Assembler::fmulp(int i) { 3055 emit_farith(0xDE, 0xC8, i); 3056} 3057 3058void Assembler::fnsave(Address dst) { 3059 InstructionMark im(this); 3060 emit_byte(0xDD); 3061 emit_operand32(rsi, dst); 3062} 3063 3064void Assembler::fnstcw(Address src) { 3065 InstructionMark im(this); 3066 emit_byte(0x9B); 3067 emit_byte(0xD9); 3068 emit_operand32(rdi, src); 3069} 3070 3071void Assembler::fnstsw_ax() { 3072 emit_byte(0xdF); 3073 emit_byte(0xE0); 3074} 3075 3076void Assembler::fprem() { 3077 emit_byte(0xD9); 3078 emit_byte(0xF8); 3079} 3080 3081void Assembler::fprem1() { 3082 emit_byte(0xD9); 3083 emit_byte(0xF5); 3084} 3085 3086void Assembler::frstor(Address src) { 3087 InstructionMark im(this); 3088 emit_byte(0xDD); 3089 emit_operand32(rsp, src); 3090} 3091 3092void Assembler::fsin() { 3093 emit_byte(0xD9); 3094 emit_byte(0xFE); 3095} 3096 3097void Assembler::fsqrt() { 3098 emit_byte(0xD9); 3099 emit_byte(0xFA); 3100} 3101 3102void Assembler::fst_d(Address adr) { 3103 InstructionMark im(this); 3104 emit_byte(0xDD); 3105 emit_operand32(rdx, adr); 3106} 3107 3108void Assembler::fst_s(Address adr) { 3109 InstructionMark im(this); 3110 emit_byte(0xD9); 3111 emit_operand32(rdx, adr); 3112} 3113 3114void Assembler::fstp_d(Address adr) { 3115 InstructionMark im(this); 3116 emit_byte(0xDD); 3117 emit_operand32(rbx, adr); 3118} 3119 3120void Assembler::fstp_d(int index) { 3121 emit_farith(0xDD, 0xD8, index); 3122} 3123 3124void Assembler::fstp_s(Address adr) { 3125 InstructionMark im(this); 3126 emit_byte(0xD9); 3127 emit_operand32(rbx, adr); 3128} 3129 3130void Assembler::fstp_x(Address adr) { 3131 InstructionMark im(this); 3132 emit_byte(0xDB); 3133 emit_operand32(rdi, adr); 3134} 3135 3136void Assembler::fsub(int i) { 3137 emit_farith(0xD8, 0xE0, i); 3138} 3139 3140void Assembler::fsub_d(Address src) { 3141 InstructionMark im(this); 3142 emit_byte(0xDC); 3143 emit_operand32(rsp, src); 3144} 3145 3146void Assembler::fsub_s(Address src) { 3147 InstructionMark im(this); 3148 emit_byte(0xD8); 3149 emit_operand32(rsp, src); 3150} 3151 3152void Assembler::fsuba(int i) { 3153 emit_farith(0xDC, 0xE8, i); 3154} 3155 3156void Assembler::fsubp(int i) { 3157 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3158} 3159 3160void Assembler::fsubr(int i) { 3161 emit_farith(0xD8, 0xE8, i); 3162} 3163 3164void Assembler::fsubr_d(Address src) { 3165 InstructionMark im(this); 3166 emit_byte(0xDC); 3167 emit_operand32(rbp, src); 3168} 3169 3170void Assembler::fsubr_s(Address src) { 3171 InstructionMark im(this); 3172 emit_byte(0xD8); 3173 emit_operand32(rbp, src); 3174} 3175 3176void Assembler::fsubra(int i) { 3177 emit_farith(0xDC, 0xE0, i); 3178} 3179 3180void Assembler::fsubrp(int i) { 3181 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3182} 3183 3184void Assembler::ftan() { 3185 emit_byte(0xD9); 3186 emit_byte(0xF2); 3187 emit_byte(0xDD); 3188 emit_byte(0xD8); 3189} 3190 3191void Assembler::ftst() { 3192 emit_byte(0xD9); 3193 emit_byte(0xE4); 3194} 3195 3196void Assembler::fucomi(int i) { 3197 // make sure the instruction is supported (introduced for P6, together with cmov) 3198 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3199 emit_farith(0xDB, 0xE8, i); 3200} 3201 3202void Assembler::fucomip(int i) { 3203 // make sure the instruction is supported (introduced for P6, together with cmov) 3204 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3205 emit_farith(0xDF, 0xE8, i); 3206} 3207 3208void Assembler::fwait() { 3209 emit_byte(0x9B); 3210} 3211 3212void Assembler::fxch(int i) { 3213 emit_farith(0xD9, 0xC8, i); 3214} 3215 3216void Assembler::fyl2x() { 3217 emit_byte(0xD9); 3218 emit_byte(0xF1); 3219} 3220 3221void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) { 3222 InstructionMark im(this); 3223 int encode = prefix_and_encode(dst->encoding()); 3224 emit_byte(0xB8 | encode); 3225 emit_data((int)imm32, rspec, format); 3226} 3227 3228#ifndef _LP64 3229 3230void Assembler::incl(Register dst) { 3231 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3232 emit_byte(0x40 | dst->encoding()); 3233} 3234 3235void Assembler::lea(Register dst, Address src) { 3236 leal(dst, src); 3237} 3238 3239void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3240 InstructionMark im(this); 3241 emit_byte(0xC7); 3242 emit_operand(rax, dst); 3243 emit_data((int)imm32, rspec, 0); 3244} 3245 3246 3247void Assembler::popa() { // 32bit 3248 emit_byte(0x61); 3249} 3250 3251void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3252 InstructionMark im(this); 3253 emit_byte(0x68); 3254 emit_data(imm32, rspec, 0); 3255} 3256 3257void Assembler::pusha() { // 32bit 3258 emit_byte(0x60); 3259} 3260 3261void Assembler::set_byte_if_not_zero(Register dst) { 3262 emit_byte(0x0F); 3263 emit_byte(0x95); 3264 emit_byte(0xE0 | dst->encoding()); 3265} 3266 3267void Assembler::shldl(Register dst, Register src) { 3268 emit_byte(0x0F); 3269 emit_byte(0xA5); 3270 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3271} 3272 3273void Assembler::shrdl(Register dst, Register src) { 3274 emit_byte(0x0F); 3275 emit_byte(0xAD); 3276 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3277} 3278 3279#else // LP64 3280 3281// 64bit only pieces of the assembler 3282// This should only be used by 64bit instructions that can use rip-relative 3283// it cannot be used by instructions that want an immediate value. 3284 3285bool Assembler::reachable(AddressLiteral adr) { 3286 int64_t disp; 3287 // None will force a 64bit literal to the code stream. Likely a placeholder 3288 // for something that will be patched later and we need to certain it will 3289 // always be reachable. 3290 if (adr.reloc() == relocInfo::none) { 3291 return false; 3292 } 3293 if (adr.reloc() == relocInfo::internal_word_type) { 3294 // This should be rip relative and easily reachable. 3295 return true; 3296 } 3297 if (adr.reloc() == relocInfo::virtual_call_type || 3298 adr.reloc() == relocInfo::opt_virtual_call_type || 3299 adr.reloc() == relocInfo::static_call_type || 3300 adr.reloc() == relocInfo::static_stub_type ) { 3301 // This should be rip relative within the code cache and easily 3302 // reachable until we get huge code caches. (At which point 3303 // ic code is going to have issues). 3304 return true; 3305 } 3306 if (adr.reloc() != relocInfo::external_word_type && 3307 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3308 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3309 adr.reloc() != relocInfo::runtime_call_type ) { 3310 return false; 3311 } 3312 3313 // Stress the correction code 3314 if (ForceUnreachable) { 3315 // Must be runtimecall reloc, see if it is in the codecache 3316 // Flipping stuff in the codecache to be unreachable causes issues 3317 // with things like inline caches where the additional instructions 3318 // are not handled. 3319 if (CodeCache::find_blob(adr._target) == NULL) { 3320 return false; 3321 } 3322 } 3323 // For external_word_type/runtime_call_type if it is reachable from where we 3324 // are now (possibly a temp buffer) and where we might end up 3325 // anywhere in the codeCache then we are always reachable. 3326 // This would have to change if we ever save/restore shared code 3327 // to be more pessimistic. 3328 3329 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3330 if (!is_simm32(disp)) return false; 3331 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3332 if (!is_simm32(disp)) return false; 3333 3334 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3335 3336 // Because rip relative is a disp + address_of_next_instruction and we 3337 // don't know the value of address_of_next_instruction we apply a fudge factor 3338 // to make sure we will be ok no matter the size of the instruction we get placed into. 3339 // We don't have to fudge the checks above here because they are already worst case. 3340 3341 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3342 // + 4 because better safe than sorry. 3343 const int fudge = 12 + 4; 3344 if (disp < 0) { 3345 disp -= fudge; 3346 } else { 3347 disp += fudge; 3348 } 3349 return is_simm32(disp); 3350} 3351 3352void Assembler::emit_data64(jlong data, 3353 relocInfo::relocType rtype, 3354 int format) { 3355 if (rtype == relocInfo::none) { 3356 emit_long64(data); 3357 } else { 3358 emit_data64(data, Relocation::spec_simple(rtype), format); 3359 } 3360} 3361 3362void Assembler::emit_data64(jlong data, 3363 RelocationHolder const& rspec, 3364 int format) { 3365 assert(imm_operand == 0, "default format must be immediate in this file"); 3366 assert(imm_operand == format, "must be immediate"); 3367 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3368 // Do not use AbstractAssembler::relocate, which is not intended for 3369 // embedded words. Instead, relocate to the enclosing instruction. 3370 code_section()->relocate(inst_mark(), rspec, format); 3371#ifdef ASSERT 3372 check_relocation(rspec, format); 3373#endif 3374 emit_long64(data); 3375} 3376 3377int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3378 if (reg_enc >= 8) { 3379 prefix(REX_B); 3380 reg_enc -= 8; 3381 } else if (byteinst && reg_enc >= 4) { 3382 prefix(REX); 3383 } 3384 return reg_enc; 3385} 3386 3387int Assembler::prefixq_and_encode(int reg_enc) { 3388 if (reg_enc < 8) { 3389 prefix(REX_W); 3390 } else { 3391 prefix(REX_WB); 3392 reg_enc -= 8; 3393 } 3394 return reg_enc; 3395} 3396 3397int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3398 if (dst_enc < 8) { 3399 if (src_enc >= 8) { 3400 prefix(REX_B); 3401 src_enc -= 8; 3402 } else if (byteinst && src_enc >= 4) { 3403 prefix(REX); 3404 } 3405 } else { 3406 if (src_enc < 8) { 3407 prefix(REX_R); 3408 } else { 3409 prefix(REX_RB); 3410 src_enc -= 8; 3411 } 3412 dst_enc -= 8; 3413 } 3414 return dst_enc << 3 | src_enc; 3415} 3416 3417int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3418 if (dst_enc < 8) { 3419 if (src_enc < 8) { 3420 prefix(REX_W); 3421 } else { 3422 prefix(REX_WB); 3423 src_enc -= 8; 3424 } 3425 } else { 3426 if (src_enc < 8) { 3427 prefix(REX_WR); 3428 } else { 3429 prefix(REX_WRB); 3430 src_enc -= 8; 3431 } 3432 dst_enc -= 8; 3433 } 3434 return dst_enc << 3 | src_enc; 3435} 3436 3437void Assembler::prefix(Register reg) { 3438 if (reg->encoding() >= 8) { 3439 prefix(REX_B); 3440 } 3441} 3442 3443void Assembler::prefix(Address adr) { 3444 if (adr.base_needs_rex()) { 3445 if (adr.index_needs_rex()) { 3446 prefix(REX_XB); 3447 } else { 3448 prefix(REX_B); 3449 } 3450 } else { 3451 if (adr.index_needs_rex()) { 3452 prefix(REX_X); 3453 } 3454 } 3455} 3456 3457void Assembler::prefixq(Address adr) { 3458 if (adr.base_needs_rex()) { 3459 if (adr.index_needs_rex()) { 3460 prefix(REX_WXB); 3461 } else { 3462 prefix(REX_WB); 3463 } 3464 } else { 3465 if (adr.index_needs_rex()) { 3466 prefix(REX_WX); 3467 } else { 3468 prefix(REX_W); 3469 } 3470 } 3471} 3472 3473 3474void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3475 if (reg->encoding() < 8) { 3476 if (adr.base_needs_rex()) { 3477 if (adr.index_needs_rex()) { 3478 prefix(REX_XB); 3479 } else { 3480 prefix(REX_B); 3481 } 3482 } else { 3483 if (adr.index_needs_rex()) { 3484 prefix(REX_X); 3485 } else if (reg->encoding() >= 4 ) { 3486 prefix(REX); 3487 } 3488 } 3489 } else { 3490 if (adr.base_needs_rex()) { 3491 if (adr.index_needs_rex()) { 3492 prefix(REX_RXB); 3493 } else { 3494 prefix(REX_RB); 3495 } 3496 } else { 3497 if (adr.index_needs_rex()) { 3498 prefix(REX_RX); 3499 } else { 3500 prefix(REX_R); 3501 } 3502 } 3503 } 3504} 3505 3506void Assembler::prefixq(Address adr, Register src) { 3507 if (src->encoding() < 8) { 3508 if (adr.base_needs_rex()) { 3509 if (adr.index_needs_rex()) { 3510 prefix(REX_WXB); 3511 } else { 3512 prefix(REX_WB); 3513 } 3514 } else { 3515 if (adr.index_needs_rex()) { 3516 prefix(REX_WX); 3517 } else { 3518 prefix(REX_W); 3519 } 3520 } 3521 } else { 3522 if (adr.base_needs_rex()) { 3523 if (adr.index_needs_rex()) { 3524 prefix(REX_WRXB); 3525 } else { 3526 prefix(REX_WRB); 3527 } 3528 } else { 3529 if (adr.index_needs_rex()) { 3530 prefix(REX_WRX); 3531 } else { 3532 prefix(REX_WR); 3533 } 3534 } 3535 } 3536} 3537 3538void Assembler::prefix(Address adr, XMMRegister reg) { 3539 if (reg->encoding() < 8) { 3540 if (adr.base_needs_rex()) { 3541 if (adr.index_needs_rex()) { 3542 prefix(REX_XB); 3543 } else { 3544 prefix(REX_B); 3545 } 3546 } else { 3547 if (adr.index_needs_rex()) { 3548 prefix(REX_X); 3549 } 3550 } 3551 } else { 3552 if (adr.base_needs_rex()) { 3553 if (adr.index_needs_rex()) { 3554 prefix(REX_RXB); 3555 } else { 3556 prefix(REX_RB); 3557 } 3558 } else { 3559 if (adr.index_needs_rex()) { 3560 prefix(REX_RX); 3561 } else { 3562 prefix(REX_R); 3563 } 3564 } 3565 } 3566} 3567 3568void Assembler::adcq(Register dst, int32_t imm32) { 3569 (void) prefixq_and_encode(dst->encoding()); 3570 emit_arith(0x81, 0xD0, dst, imm32); 3571} 3572 3573void Assembler::adcq(Register dst, Address src) { 3574 InstructionMark im(this); 3575 prefixq(src, dst); 3576 emit_byte(0x13); 3577 emit_operand(dst, src); 3578} 3579 3580void Assembler::adcq(Register dst, Register src) { 3581 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3582 emit_arith(0x13, 0xC0, dst, src); 3583} 3584 3585void Assembler::addq(Address dst, int32_t imm32) { 3586 InstructionMark im(this); 3587 prefixq(dst); 3588 emit_arith_operand(0x81, rax, dst,imm32); 3589} 3590 3591void Assembler::addq(Address dst, Register src) { 3592 InstructionMark im(this); 3593 prefixq(dst, src); 3594 emit_byte(0x01); 3595 emit_operand(src, dst); 3596} 3597 3598void Assembler::addq(Register dst, int32_t imm32) { 3599 (void) prefixq_and_encode(dst->encoding()); 3600 emit_arith(0x81, 0xC0, dst, imm32); 3601} 3602 3603void Assembler::addq(Register dst, Address src) { 3604 InstructionMark im(this); 3605 prefixq(src, dst); 3606 emit_byte(0x03); 3607 emit_operand(dst, src); 3608} 3609 3610void Assembler::addq(Register dst, Register src) { 3611 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3612 emit_arith(0x03, 0xC0, dst, src); 3613} 3614 3615void Assembler::andq(Register dst, int32_t imm32) { 3616 (void) prefixq_and_encode(dst->encoding()); 3617 emit_arith(0x81, 0xE0, dst, imm32); 3618} 3619 3620void Assembler::andq(Register dst, Address src) { 3621 InstructionMark im(this); 3622 prefixq(src, dst); 3623 emit_byte(0x23); 3624 emit_operand(dst, src); 3625} 3626 3627void Assembler::andq(Register dst, Register src) { 3628 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3629 emit_arith(0x23, 0xC0, dst, src); 3630} 3631 3632void Assembler::bswapq(Register reg) { 3633 int encode = prefixq_and_encode(reg->encoding()); 3634 emit_byte(0x0F); 3635 emit_byte(0xC8 | encode); 3636} 3637 3638void Assembler::cdqq() { 3639 prefix(REX_W); 3640 emit_byte(0x99); 3641} 3642 3643void Assembler::clflush(Address adr) { 3644 prefix(adr); 3645 emit_byte(0x0F); 3646 emit_byte(0xAE); 3647 emit_operand(rdi, adr); 3648} 3649 3650void Assembler::cmovq(Condition cc, Register dst, Register src) { 3651 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3652 emit_byte(0x0F); 3653 emit_byte(0x40 | cc); 3654 emit_byte(0xC0 | encode); 3655} 3656 3657void Assembler::cmovq(Condition cc, Register dst, Address src) { 3658 InstructionMark im(this); 3659 prefixq(src, dst); 3660 emit_byte(0x0F); 3661 emit_byte(0x40 | cc); 3662 emit_operand(dst, src); 3663} 3664 3665void Assembler::cmpq(Address dst, int32_t imm32) { 3666 InstructionMark im(this); 3667 prefixq(dst); 3668 emit_byte(0x81); 3669 emit_operand(rdi, dst, 4); 3670 emit_long(imm32); 3671} 3672 3673void Assembler::cmpq(Register dst, int32_t imm32) { 3674 (void) prefixq_and_encode(dst->encoding()); 3675 emit_arith(0x81, 0xF8, dst, imm32); 3676} 3677 3678void Assembler::cmpq(Address dst, Register src) { 3679 InstructionMark im(this); 3680 prefixq(dst, src); 3681 emit_byte(0x3B); 3682 emit_operand(src, dst); 3683} 3684 3685void Assembler::cmpq(Register dst, Register src) { 3686 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3687 emit_arith(0x3B, 0xC0, dst, src); 3688} 3689 3690void Assembler::cmpq(Register dst, Address src) { 3691 InstructionMark im(this); 3692 prefixq(src, dst); 3693 emit_byte(0x3B); 3694 emit_operand(dst, src); 3695} 3696 3697void Assembler::cmpxchgq(Register reg, Address adr) { 3698 InstructionMark im(this); 3699 prefixq(adr, reg); 3700 emit_byte(0x0F); 3701 emit_byte(0xB1); 3702 emit_operand(reg, adr); 3703} 3704 3705void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 3706 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3707 emit_byte(0xF2); 3708 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3709 emit_byte(0x0F); 3710 emit_byte(0x2A); 3711 emit_byte(0xC0 | encode); 3712} 3713 3714void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 3715 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3716 emit_byte(0xF3); 3717 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3718 emit_byte(0x0F); 3719 emit_byte(0x2A); 3720 emit_byte(0xC0 | encode); 3721} 3722 3723void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 3724 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3725 emit_byte(0xF2); 3726 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3727 emit_byte(0x0F); 3728 emit_byte(0x2C); 3729 emit_byte(0xC0 | encode); 3730} 3731 3732void Assembler::cvttss2siq(Register dst, XMMRegister src) { 3733 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3734 emit_byte(0xF3); 3735 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3736 emit_byte(0x0F); 3737 emit_byte(0x2C); 3738 emit_byte(0xC0 | encode); 3739} 3740 3741void Assembler::decl(Register dst) { 3742 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3743 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3744 int encode = prefix_and_encode(dst->encoding()); 3745 emit_byte(0xFF); 3746 emit_byte(0xC8 | encode); 3747} 3748 3749void Assembler::decq(Register dst) { 3750 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3751 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3752 int encode = prefixq_and_encode(dst->encoding()); 3753 emit_byte(0xFF); 3754 emit_byte(0xC8 | encode); 3755} 3756 3757void Assembler::decq(Address dst) { 3758 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3759 InstructionMark im(this); 3760 prefixq(dst); 3761 emit_byte(0xFF); 3762 emit_operand(rcx, dst); 3763} 3764 3765void Assembler::fxrstor(Address src) { 3766 prefixq(src); 3767 emit_byte(0x0F); 3768 emit_byte(0xAE); 3769 emit_operand(as_Register(1), src); 3770} 3771 3772void Assembler::fxsave(Address dst) { 3773 prefixq(dst); 3774 emit_byte(0x0F); 3775 emit_byte(0xAE); 3776 emit_operand(as_Register(0), dst); 3777} 3778 3779void Assembler::idivq(Register src) { 3780 int encode = prefixq_and_encode(src->encoding()); 3781 emit_byte(0xF7); 3782 emit_byte(0xF8 | encode); 3783} 3784 3785void Assembler::imulq(Register dst, Register src) { 3786 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3787 emit_byte(0x0F); 3788 emit_byte(0xAF); 3789 emit_byte(0xC0 | encode); 3790} 3791 3792void Assembler::imulq(Register dst, Register src, int value) { 3793 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3794 if (is8bit(value)) { 3795 emit_byte(0x6B); 3796 emit_byte(0xC0 | encode); 3797 emit_byte(value); 3798 } else { 3799 emit_byte(0x69); 3800 emit_byte(0xC0 | encode); 3801 emit_long(value); 3802 } 3803} 3804 3805void Assembler::incl(Register dst) { 3806 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3807 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3808 int encode = prefix_and_encode(dst->encoding()); 3809 emit_byte(0xFF); 3810 emit_byte(0xC0 | encode); 3811} 3812 3813void Assembler::incq(Register dst) { 3814 // Don't use it directly. Use MacroAssembler::incrementq() instead. 3815 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3816 int encode = prefixq_and_encode(dst->encoding()); 3817 emit_byte(0xFF); 3818 emit_byte(0xC0 | encode); 3819} 3820 3821void Assembler::incq(Address dst) { 3822 // Don't use it directly. Use MacroAssembler::incrementq() instead. 3823 InstructionMark im(this); 3824 prefixq(dst); 3825 emit_byte(0xFF); 3826 emit_operand(rax, dst); 3827} 3828 3829void Assembler::lea(Register dst, Address src) { 3830 leaq(dst, src); 3831} 3832 3833void Assembler::leaq(Register dst, Address src) { 3834 InstructionMark im(this); 3835 prefixq(src, dst); 3836 emit_byte(0x8D); 3837 emit_operand(dst, src); 3838} 3839 3840void Assembler::mov64(Register dst, int64_t imm64) { 3841 InstructionMark im(this); 3842 int encode = prefixq_and_encode(dst->encoding()); 3843 emit_byte(0xB8 | encode); 3844 emit_long64(imm64); 3845} 3846 3847void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 3848 InstructionMark im(this); 3849 int encode = prefixq_and_encode(dst->encoding()); 3850 emit_byte(0xB8 | encode); 3851 emit_data64(imm64, rspec); 3852} 3853 3854void Assembler::movdq(XMMRegister dst, Register src) { 3855 // table D-1 says MMX/SSE2 3856 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 3857 emit_byte(0x66); 3858 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3859 emit_byte(0x0F); 3860 emit_byte(0x6E); 3861 emit_byte(0xC0 | encode); 3862} 3863 3864void Assembler::movdq(Register dst, XMMRegister src) { 3865 // table D-1 says MMX/SSE2 3866 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 3867 emit_byte(0x66); 3868 // swap src/dst to get correct prefix 3869 int encode = prefixq_and_encode(src->encoding(), dst->encoding()); 3870 emit_byte(0x0F); 3871 emit_byte(0x7E); 3872 emit_byte(0xC0 | encode); 3873} 3874 3875void Assembler::movq(Register dst, Register src) { 3876 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3877 emit_byte(0x8B); 3878 emit_byte(0xC0 | encode); 3879} 3880 3881void Assembler::movq(Register dst, Address src) { 3882 InstructionMark im(this); 3883 prefixq(src, dst); 3884 emit_byte(0x8B); 3885 emit_operand(dst, src); 3886} 3887 3888void Assembler::movq(Address dst, Register src) { 3889 InstructionMark im(this); 3890 prefixq(dst, src); 3891 emit_byte(0x89); 3892 emit_operand(src, dst); 3893} 3894 3895void Assembler::movslq(Register dst, int32_t imm32) { 3896 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 3897 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 3898 // as a result we shouldn't use until tested at runtime... 3899 ShouldNotReachHere(); 3900 InstructionMark im(this); 3901 int encode = prefixq_and_encode(dst->encoding()); 3902 emit_byte(0xC7 | encode); 3903 emit_long(imm32); 3904} 3905 3906void Assembler::movslq(Address dst, int32_t imm32) { 3907 assert(is_simm32(imm32), "lost bits"); 3908 InstructionMark im(this); 3909 prefixq(dst); 3910 emit_byte(0xC7); 3911 emit_operand(rax, dst, 4); 3912 emit_long(imm32); 3913} 3914 3915void Assembler::movslq(Register dst, Address src) { 3916 InstructionMark im(this); 3917 prefixq(src, dst); 3918 emit_byte(0x63); 3919 emit_operand(dst, src); 3920} 3921 3922void Assembler::movslq(Register dst, Register src) { 3923 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3924 emit_byte(0x63); 3925 emit_byte(0xC0 | encode); 3926} 3927 3928void Assembler::negq(Register dst) { 3929 int encode = prefixq_and_encode(dst->encoding()); 3930 emit_byte(0xF7); 3931 emit_byte(0xD8 | encode); 3932} 3933 3934void Assembler::notq(Register dst) { 3935 int encode = prefixq_and_encode(dst->encoding()); 3936 emit_byte(0xF7); 3937 emit_byte(0xD0 | encode); 3938} 3939 3940void Assembler::orq(Address dst, int32_t imm32) { 3941 InstructionMark im(this); 3942 prefixq(dst); 3943 emit_byte(0x81); 3944 emit_operand(rcx, dst, 4); 3945 emit_long(imm32); 3946} 3947 3948void Assembler::orq(Register dst, int32_t imm32) { 3949 (void) prefixq_and_encode(dst->encoding()); 3950 emit_arith(0x81, 0xC8, dst, imm32); 3951} 3952 3953void Assembler::orq(Register dst, Address src) { 3954 InstructionMark im(this); 3955 prefixq(src, dst); 3956 emit_byte(0x0B); 3957 emit_operand(dst, src); 3958} 3959 3960void Assembler::orq(Register dst, Register src) { 3961 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3962 emit_arith(0x0B, 0xC0, dst, src); 3963} 3964 3965void Assembler::popa() { // 64bit 3966 movq(r15, Address(rsp, 0)); 3967 movq(r14, Address(rsp, wordSize)); 3968 movq(r13, Address(rsp, 2 * wordSize)); 3969 movq(r12, Address(rsp, 3 * wordSize)); 3970 movq(r11, Address(rsp, 4 * wordSize)); 3971 movq(r10, Address(rsp, 5 * wordSize)); 3972 movq(r9, Address(rsp, 6 * wordSize)); 3973 movq(r8, Address(rsp, 7 * wordSize)); 3974 movq(rdi, Address(rsp, 8 * wordSize)); 3975 movq(rsi, Address(rsp, 9 * wordSize)); 3976 movq(rbp, Address(rsp, 10 * wordSize)); 3977 // skip rsp 3978 movq(rbx, Address(rsp, 12 * wordSize)); 3979 movq(rdx, Address(rsp, 13 * wordSize)); 3980 movq(rcx, Address(rsp, 14 * wordSize)); 3981 movq(rax, Address(rsp, 15 * wordSize)); 3982 3983 addq(rsp, 16 * wordSize); 3984} 3985 3986void Assembler::popq(Address dst) { 3987 InstructionMark im(this); 3988 prefixq(dst); 3989 emit_byte(0x8F); 3990 emit_operand(rax, dst); 3991} 3992 3993void Assembler::pusha() { // 64bit 3994 // we have to store original rsp. ABI says that 128 bytes 3995 // below rsp are local scratch. 3996 movq(Address(rsp, -5 * wordSize), rsp); 3997 3998 subq(rsp, 16 * wordSize); 3999 4000 movq(Address(rsp, 15 * wordSize), rax); 4001 movq(Address(rsp, 14 * wordSize), rcx); 4002 movq(Address(rsp, 13 * wordSize), rdx); 4003 movq(Address(rsp, 12 * wordSize), rbx); 4004 // skip rsp 4005 movq(Address(rsp, 10 * wordSize), rbp); 4006 movq(Address(rsp, 9 * wordSize), rsi); 4007 movq(Address(rsp, 8 * wordSize), rdi); 4008 movq(Address(rsp, 7 * wordSize), r8); 4009 movq(Address(rsp, 6 * wordSize), r9); 4010 movq(Address(rsp, 5 * wordSize), r10); 4011 movq(Address(rsp, 4 * wordSize), r11); 4012 movq(Address(rsp, 3 * wordSize), r12); 4013 movq(Address(rsp, 2 * wordSize), r13); 4014 movq(Address(rsp, wordSize), r14); 4015 movq(Address(rsp, 0), r15); 4016} 4017 4018void Assembler::pushq(Address src) { 4019 InstructionMark im(this); 4020 prefixq(src); 4021 emit_byte(0xFF); 4022 emit_operand(rsi, src); 4023} 4024 4025void Assembler::rclq(Register dst, int imm8) { 4026 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4027 int encode = prefixq_and_encode(dst->encoding()); 4028 if (imm8 == 1) { 4029 emit_byte(0xD1); 4030 emit_byte(0xD0 | encode); 4031 } else { 4032 emit_byte(0xC1); 4033 emit_byte(0xD0 | encode); 4034 emit_byte(imm8); 4035 } 4036} 4037void Assembler::sarq(Register dst, int imm8) { 4038 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4039 int encode = prefixq_and_encode(dst->encoding()); 4040 if (imm8 == 1) { 4041 emit_byte(0xD1); 4042 emit_byte(0xF8 | encode); 4043 } else { 4044 emit_byte(0xC1); 4045 emit_byte(0xF8 | encode); 4046 emit_byte(imm8); 4047 } 4048} 4049 4050void Assembler::sarq(Register dst) { 4051 int encode = prefixq_and_encode(dst->encoding()); 4052 emit_byte(0xD3); 4053 emit_byte(0xF8 | encode); 4054} 4055void Assembler::sbbq(Address dst, int32_t imm32) { 4056 InstructionMark im(this); 4057 prefixq(dst); 4058 emit_arith_operand(0x81, rbx, dst, imm32); 4059} 4060 4061void Assembler::sbbq(Register dst, int32_t imm32) { 4062 (void) prefixq_and_encode(dst->encoding()); 4063 emit_arith(0x81, 0xD8, dst, imm32); 4064} 4065 4066void Assembler::sbbq(Register dst, Address src) { 4067 InstructionMark im(this); 4068 prefixq(src, dst); 4069 emit_byte(0x1B); 4070 emit_operand(dst, src); 4071} 4072 4073void Assembler::sbbq(Register dst, Register src) { 4074 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4075 emit_arith(0x1B, 0xC0, dst, src); 4076} 4077 4078void Assembler::shlq(Register dst, int imm8) { 4079 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4080 int encode = prefixq_and_encode(dst->encoding()); 4081 if (imm8 == 1) { 4082 emit_byte(0xD1); 4083 emit_byte(0xE0 | encode); 4084 } else { 4085 emit_byte(0xC1); 4086 emit_byte(0xE0 | encode); 4087 emit_byte(imm8); 4088 } 4089} 4090 4091void Assembler::shlq(Register dst) { 4092 int encode = prefixq_and_encode(dst->encoding()); 4093 emit_byte(0xD3); 4094 emit_byte(0xE0 | encode); 4095} 4096 4097void Assembler::shrq(Register dst, int imm8) { 4098 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4099 int encode = prefixq_and_encode(dst->encoding()); 4100 emit_byte(0xC1); 4101 emit_byte(0xE8 | encode); 4102 emit_byte(imm8); 4103} 4104 4105void Assembler::shrq(Register dst) { 4106 int encode = prefixq_and_encode(dst->encoding()); 4107 emit_byte(0xD3); 4108 emit_byte(0xE8 | encode); 4109} 4110 4111void Assembler::sqrtsd(XMMRegister dst, Address src) { 4112 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4113 InstructionMark im(this); 4114 emit_byte(0xF2); 4115 prefix(src, dst); 4116 emit_byte(0x0F); 4117 emit_byte(0x51); 4118 emit_operand(dst, src); 4119} 4120 4121void Assembler::subq(Address dst, int32_t imm32) { 4122 InstructionMark im(this); 4123 prefixq(dst); 4124 if (is8bit(imm32)) { 4125 emit_byte(0x83); 4126 emit_operand(rbp, dst, 1); 4127 emit_byte(imm32 & 0xFF); 4128 } else { 4129 emit_byte(0x81); 4130 emit_operand(rbp, dst, 4); 4131 emit_long(imm32); 4132 } 4133} 4134 4135void Assembler::subq(Register dst, int32_t imm32) { 4136 (void) prefixq_and_encode(dst->encoding()); 4137 emit_arith(0x81, 0xE8, dst, imm32); 4138} 4139 4140void Assembler::subq(Address dst, Register src) { 4141 InstructionMark im(this); 4142 prefixq(dst, src); 4143 emit_byte(0x29); 4144 emit_operand(src, dst); 4145} 4146 4147void Assembler::subq(Register dst, Address src) { 4148 InstructionMark im(this); 4149 prefixq(src, dst); 4150 emit_byte(0x2B); 4151 emit_operand(dst, src); 4152} 4153 4154void Assembler::subq(Register dst, Register src) { 4155 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4156 emit_arith(0x2B, 0xC0, dst, src); 4157} 4158 4159void Assembler::testq(Register dst, int32_t imm32) { 4160 // not using emit_arith because test 4161 // doesn't support sign-extension of 4162 // 8bit operands 4163 int encode = dst->encoding(); 4164 if (encode == 0) { 4165 prefix(REX_W); 4166 emit_byte(0xA9); 4167 } else { 4168 encode = prefixq_and_encode(encode); 4169 emit_byte(0xF7); 4170 emit_byte(0xC0 | encode); 4171 } 4172 emit_long(imm32); 4173} 4174 4175void Assembler::testq(Register dst, Register src) { 4176 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4177 emit_arith(0x85, 0xC0, dst, src); 4178} 4179 4180void Assembler::xaddq(Address dst, Register src) { 4181 InstructionMark im(this); 4182 prefixq(dst, src); 4183 emit_byte(0x0F); 4184 emit_byte(0xC1); 4185 emit_operand(src, dst); 4186} 4187 4188void Assembler::xchgq(Register dst, Address src) { 4189 InstructionMark im(this); 4190 prefixq(src, dst); 4191 emit_byte(0x87); 4192 emit_operand(dst, src); 4193} 4194 4195void Assembler::xchgq(Register dst, Register src) { 4196 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4197 emit_byte(0x87); 4198 emit_byte(0xc0 | encode); 4199} 4200 4201void Assembler::xorq(Register dst, Register src) { 4202 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4203 emit_arith(0x33, 0xC0, dst, src); 4204} 4205 4206void Assembler::xorq(Register dst, Address src) { 4207 InstructionMark im(this); 4208 prefixq(src, dst); 4209 emit_byte(0x33); 4210 emit_operand(dst, src); 4211} 4212 4213#endif // !LP64 4214 4215static Assembler::Condition reverse[] = { 4216 Assembler::noOverflow /* overflow = 0x0 */ , 4217 Assembler::overflow /* noOverflow = 0x1 */ , 4218 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4219 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4220 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4221 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4222 Assembler::above /* belowEqual = 0x6 */ , 4223 Assembler::belowEqual /* above = 0x7 */ , 4224 Assembler::positive /* negative = 0x8 */ , 4225 Assembler::negative /* positive = 0x9 */ , 4226 Assembler::noParity /* parity = 0xa */ , 4227 Assembler::parity /* noParity = 0xb */ , 4228 Assembler::greaterEqual /* less = 0xc */ , 4229 Assembler::less /* greaterEqual = 0xd */ , 4230 Assembler::greater /* lessEqual = 0xe */ , 4231 Assembler::lessEqual /* greater = 0xf, */ 4232 4233}; 4234 4235 4236// Implementation of MacroAssembler 4237 4238// First all the versions that have distinct versions depending on 32/64 bit 4239// Unless the difference is trivial (1 line or so). 4240 4241#ifndef _LP64 4242 4243// 32bit versions 4244 4245Address MacroAssembler::as_Address(AddressLiteral adr) { 4246 return Address(adr.target(), adr.rspec()); 4247} 4248 4249Address MacroAssembler::as_Address(ArrayAddress adr) { 4250 return Address::make_array(adr); 4251} 4252 4253int MacroAssembler::biased_locking_enter(Register lock_reg, 4254 Register obj_reg, 4255 Register swap_reg, 4256 Register tmp_reg, 4257 bool swap_reg_contains_mark, 4258 Label& done, 4259 Label* slow_case, 4260 BiasedLockingCounters* counters) { 4261 assert(UseBiasedLocking, "why call this otherwise?"); 4262 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4263 assert_different_registers(lock_reg, obj_reg, swap_reg); 4264 4265 if (PrintBiasedLockingStatistics && counters == NULL) 4266 counters = BiasedLocking::counters(); 4267 4268 bool need_tmp_reg = false; 4269 if (tmp_reg == noreg) { 4270 need_tmp_reg = true; 4271 tmp_reg = lock_reg; 4272 } else { 4273 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4274 } 4275 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4276 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4277 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4278 Address saved_mark_addr(lock_reg, 0); 4279 4280 // Biased locking 4281 // See whether the lock is currently biased toward our thread and 4282 // whether the epoch is still valid 4283 // Note that the runtime guarantees sufficient alignment of JavaThread 4284 // pointers to allow age to be placed into low bits 4285 // First check to see whether biasing is even enabled for this object 4286 Label cas_label; 4287 int null_check_offset = -1; 4288 if (!swap_reg_contains_mark) { 4289 null_check_offset = offset(); 4290 movl(swap_reg, mark_addr); 4291 } 4292 if (need_tmp_reg) { 4293 push(tmp_reg); 4294 } 4295 movl(tmp_reg, swap_reg); 4296 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4297 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4298 if (need_tmp_reg) { 4299 pop(tmp_reg); 4300 } 4301 jcc(Assembler::notEqual, cas_label); 4302 // The bias pattern is present in the object's header. Need to check 4303 // whether the bias owner and the epoch are both still current. 4304 // Note that because there is no current thread register on x86 we 4305 // need to store off the mark word we read out of the object to 4306 // avoid reloading it and needing to recheck invariants below. This 4307 // store is unfortunate but it makes the overall code shorter and 4308 // simpler. 4309 movl(saved_mark_addr, swap_reg); 4310 if (need_tmp_reg) { 4311 push(tmp_reg); 4312 } 4313 get_thread(tmp_reg); 4314 xorl(swap_reg, tmp_reg); 4315 if (swap_reg_contains_mark) { 4316 null_check_offset = offset(); 4317 } 4318 movl(tmp_reg, klass_addr); 4319 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4320 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4321 if (need_tmp_reg) { 4322 pop(tmp_reg); 4323 } 4324 if (counters != NULL) { 4325 cond_inc32(Assembler::zero, 4326 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4327 } 4328 jcc(Assembler::equal, done); 4329 4330 Label try_revoke_bias; 4331 Label try_rebias; 4332 4333 // At this point we know that the header has the bias pattern and 4334 // that we are not the bias owner in the current epoch. We need to 4335 // figure out more details about the state of the header in order to 4336 // know what operations can be legally performed on the object's 4337 // header. 4338 4339 // If the low three bits in the xor result aren't clear, that means 4340 // the prototype header is no longer biased and we have to revoke 4341 // the bias on this object. 4342 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4343 jcc(Assembler::notZero, try_revoke_bias); 4344 4345 // Biasing is still enabled for this data type. See whether the 4346 // epoch of the current bias is still valid, meaning that the epoch 4347 // bits of the mark word are equal to the epoch bits of the 4348 // prototype header. (Note that the prototype header's epoch bits 4349 // only change at a safepoint.) If not, attempt to rebias the object 4350 // toward the current thread. Note that we must be absolutely sure 4351 // that the current epoch is invalid in order to do this because 4352 // otherwise the manipulations it performs on the mark word are 4353 // illegal. 4354 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4355 jcc(Assembler::notZero, try_rebias); 4356 4357 // The epoch of the current bias is still valid but we know nothing 4358 // about the owner; it might be set or it might be clear. Try to 4359 // acquire the bias of the object using an atomic operation. If this 4360 // fails we will go in to the runtime to revoke the object's bias. 4361 // Note that we first construct the presumed unbiased header so we 4362 // don't accidentally blow away another thread's valid bias. 4363 movl(swap_reg, saved_mark_addr); 4364 andl(swap_reg, 4365 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4366 if (need_tmp_reg) { 4367 push(tmp_reg); 4368 } 4369 get_thread(tmp_reg); 4370 orl(tmp_reg, swap_reg); 4371 if (os::is_MP()) { 4372 lock(); 4373 } 4374 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4375 if (need_tmp_reg) { 4376 pop(tmp_reg); 4377 } 4378 // If the biasing toward our thread failed, this means that 4379 // another thread succeeded in biasing it toward itself and we 4380 // need to revoke that bias. The revocation will occur in the 4381 // interpreter runtime in the slow case. 4382 if (counters != NULL) { 4383 cond_inc32(Assembler::zero, 4384 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4385 } 4386 if (slow_case != NULL) { 4387 jcc(Assembler::notZero, *slow_case); 4388 } 4389 jmp(done); 4390 4391 bind(try_rebias); 4392 // At this point we know the epoch has expired, meaning that the 4393 // current "bias owner", if any, is actually invalid. Under these 4394 // circumstances _only_, we are allowed to use the current header's 4395 // value as the comparison value when doing the cas to acquire the 4396 // bias in the current epoch. In other words, we allow transfer of 4397 // the bias from one thread to another directly in this situation. 4398 // 4399 // FIXME: due to a lack of registers we currently blow away the age 4400 // bits in this situation. Should attempt to preserve them. 4401 if (need_tmp_reg) { 4402 push(tmp_reg); 4403 } 4404 get_thread(tmp_reg); 4405 movl(swap_reg, klass_addr); 4406 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4407 movl(swap_reg, saved_mark_addr); 4408 if (os::is_MP()) { 4409 lock(); 4410 } 4411 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4412 if (need_tmp_reg) { 4413 pop(tmp_reg); 4414 } 4415 // If the biasing toward our thread failed, then another thread 4416 // succeeded in biasing it toward itself and we need to revoke that 4417 // bias. The revocation will occur in the runtime in the slow case. 4418 if (counters != NULL) { 4419 cond_inc32(Assembler::zero, 4420 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 4421 } 4422 if (slow_case != NULL) { 4423 jcc(Assembler::notZero, *slow_case); 4424 } 4425 jmp(done); 4426 4427 bind(try_revoke_bias); 4428 // The prototype mark in the klass doesn't have the bias bit set any 4429 // more, indicating that objects of this data type are not supposed 4430 // to be biased any more. We are going to try to reset the mark of 4431 // this object to the prototype value and fall through to the 4432 // CAS-based locking scheme. Note that if our CAS fails, it means 4433 // that another thread raced us for the privilege of revoking the 4434 // bias of this particular object, so it's okay to continue in the 4435 // normal locking code. 4436 // 4437 // FIXME: due to a lack of registers we currently blow away the age 4438 // bits in this situation. Should attempt to preserve them. 4439 movl(swap_reg, saved_mark_addr); 4440 if (need_tmp_reg) { 4441 push(tmp_reg); 4442 } 4443 movl(tmp_reg, klass_addr); 4444 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4445 if (os::is_MP()) { 4446 lock(); 4447 } 4448 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4449 if (need_tmp_reg) { 4450 pop(tmp_reg); 4451 } 4452 // Fall through to the normal CAS-based lock, because no matter what 4453 // the result of the above CAS, some thread must have succeeded in 4454 // removing the bias bit from the object's header. 4455 if (counters != NULL) { 4456 cond_inc32(Assembler::zero, 4457 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 4458 } 4459 4460 bind(cas_label); 4461 4462 return null_check_offset; 4463} 4464void MacroAssembler::call_VM_leaf_base(address entry_point, 4465 int number_of_arguments) { 4466 call(RuntimeAddress(entry_point)); 4467 increment(rsp, number_of_arguments * wordSize); 4468} 4469 4470void MacroAssembler::cmpoop(Address src1, jobject obj) { 4471 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4472} 4473 4474void MacroAssembler::cmpoop(Register src1, jobject obj) { 4475 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4476} 4477 4478void MacroAssembler::extend_sign(Register hi, Register lo) { 4479 // According to Intel Doc. AP-526, "Integer Divide", p.18. 4480 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 4481 cdql(); 4482 } else { 4483 movl(hi, lo); 4484 sarl(hi, 31); 4485 } 4486} 4487 4488void MacroAssembler::fat_nop() { 4489 // A 5 byte nop that is safe for patching (see patch_verified_entry) 4490 emit_byte(0x26); // es: 4491 emit_byte(0x2e); // cs: 4492 emit_byte(0x64); // fs: 4493 emit_byte(0x65); // gs: 4494 emit_byte(0x90); 4495} 4496 4497void MacroAssembler::jC2(Register tmp, Label& L) { 4498 // set parity bit if FPU flag C2 is set (via rax) 4499 save_rax(tmp); 4500 fwait(); fnstsw_ax(); 4501 sahf(); 4502 restore_rax(tmp); 4503 // branch 4504 jcc(Assembler::parity, L); 4505} 4506 4507void MacroAssembler::jnC2(Register tmp, Label& L) { 4508 // set parity bit if FPU flag C2 is set (via rax) 4509 save_rax(tmp); 4510 fwait(); fnstsw_ax(); 4511 sahf(); 4512 restore_rax(tmp); 4513 // branch 4514 jcc(Assembler::noParity, L); 4515} 4516 4517// 32bit can do a case table jump in one instruction but we no longer allow the base 4518// to be installed in the Address class 4519void MacroAssembler::jump(ArrayAddress entry) { 4520 jmp(as_Address(entry)); 4521} 4522 4523// Note: y_lo will be destroyed 4524void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 4525 // Long compare for Java (semantics as described in JVM spec.) 4526 Label high, low, done; 4527 4528 cmpl(x_hi, y_hi); 4529 jcc(Assembler::less, low); 4530 jcc(Assembler::greater, high); 4531 // x_hi is the return register 4532 xorl(x_hi, x_hi); 4533 cmpl(x_lo, y_lo); 4534 jcc(Assembler::below, low); 4535 jcc(Assembler::equal, done); 4536 4537 bind(high); 4538 xorl(x_hi, x_hi); 4539 increment(x_hi); 4540 jmp(done); 4541 4542 bind(low); 4543 xorl(x_hi, x_hi); 4544 decrementl(x_hi); 4545 4546 bind(done); 4547} 4548 4549void MacroAssembler::lea(Register dst, AddressLiteral src) { 4550 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 4551} 4552 4553void MacroAssembler::lea(Address dst, AddressLiteral adr) { 4554 // leal(dst, as_Address(adr)); 4555 // see note in movl as to why we must use a move 4556 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 4557} 4558 4559void MacroAssembler::leave() { 4560 mov(rsp, rbp); 4561 pop(rbp); 4562} 4563 4564void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 4565 // Multiplication of two Java long values stored on the stack 4566 // as illustrated below. Result is in rdx:rax. 4567 // 4568 // rsp ---> [ ?? ] \ \ 4569 // .... | y_rsp_offset | 4570 // [ y_lo ] / (in bytes) | x_rsp_offset 4571 // [ y_hi ] | (in bytes) 4572 // .... | 4573 // [ x_lo ] / 4574 // [ x_hi ] 4575 // .... 4576 // 4577 // Basic idea: lo(result) = lo(x_lo * y_lo) 4578 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 4579 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 4580 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 4581 Label quick; 4582 // load x_hi, y_hi and check if quick 4583 // multiplication is possible 4584 movl(rbx, x_hi); 4585 movl(rcx, y_hi); 4586 movl(rax, rbx); 4587 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 4588 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 4589 // do full multiplication 4590 // 1st step 4591 mull(y_lo); // x_hi * y_lo 4592 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 4593 // 2nd step 4594 movl(rax, x_lo); 4595 mull(rcx); // x_lo * y_hi 4596 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 4597 // 3rd step 4598 bind(quick); // note: rbx, = 0 if quick multiply! 4599 movl(rax, x_lo); 4600 mull(y_lo); // x_lo * y_lo 4601 addl(rdx, rbx); // correct hi(x_lo * y_lo) 4602} 4603 4604void MacroAssembler::lneg(Register hi, Register lo) { 4605 negl(lo); 4606 adcl(hi, 0); 4607 negl(hi); 4608} 4609 4610void MacroAssembler::lshl(Register hi, Register lo) { 4611 // Java shift left long support (semantics as described in JVM spec., p.305) 4612 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 4613 // shift value is in rcx ! 4614 assert(hi != rcx, "must not use rcx"); 4615 assert(lo != rcx, "must not use rcx"); 4616 const Register s = rcx; // shift count 4617 const int n = BitsPerWord; 4618 Label L; 4619 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4620 cmpl(s, n); // if (s < n) 4621 jcc(Assembler::less, L); // else (s >= n) 4622 movl(hi, lo); // x := x << n 4623 xorl(lo, lo); 4624 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4625 bind(L); // s (mod n) < n 4626 shldl(hi, lo); // x := x << s 4627 shll(lo); 4628} 4629 4630 4631void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 4632 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 4633 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 4634 assert(hi != rcx, "must not use rcx"); 4635 assert(lo != rcx, "must not use rcx"); 4636 const Register s = rcx; // shift count 4637 const int n = BitsPerWord; 4638 Label L; 4639 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4640 cmpl(s, n); // if (s < n) 4641 jcc(Assembler::less, L); // else (s >= n) 4642 movl(lo, hi); // x := x >> n 4643 if (sign_extension) sarl(hi, 31); 4644 else xorl(hi, hi); 4645 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4646 bind(L); // s (mod n) < n 4647 shrdl(lo, hi); // x := x >> s 4648 if (sign_extension) sarl(hi); 4649 else shrl(hi); 4650} 4651 4652void MacroAssembler::movoop(Register dst, jobject obj) { 4653 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4654} 4655 4656void MacroAssembler::movoop(Address dst, jobject obj) { 4657 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4658} 4659 4660void MacroAssembler::movptr(Register dst, AddressLiteral src) { 4661 if (src.is_lval()) { 4662 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 4663 } else { 4664 movl(dst, as_Address(src)); 4665 } 4666} 4667 4668void MacroAssembler::movptr(ArrayAddress dst, Register src) { 4669 movl(as_Address(dst), src); 4670} 4671 4672void MacroAssembler::movptr(Register dst, ArrayAddress src) { 4673 movl(dst, as_Address(src)); 4674} 4675 4676// src should NEVER be a real pointer. Use AddressLiteral for true pointers 4677void MacroAssembler::movptr(Address dst, intptr_t src) { 4678 movl(dst, src); 4679} 4680 4681 4682void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 4683 movsd(dst, as_Address(src)); 4684} 4685 4686void MacroAssembler::pop_callee_saved_registers() { 4687 pop(rcx); 4688 pop(rdx); 4689 pop(rdi); 4690 pop(rsi); 4691} 4692 4693void MacroAssembler::pop_fTOS() { 4694 fld_d(Address(rsp, 0)); 4695 addl(rsp, 2 * wordSize); 4696} 4697 4698void MacroAssembler::push_callee_saved_registers() { 4699 push(rsi); 4700 push(rdi); 4701 push(rdx); 4702 push(rcx); 4703} 4704 4705void MacroAssembler::push_fTOS() { 4706 subl(rsp, 2 * wordSize); 4707 fstp_d(Address(rsp, 0)); 4708} 4709 4710 4711void MacroAssembler::pushoop(jobject obj) { 4712 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 4713} 4714 4715 4716void MacroAssembler::pushptr(AddressLiteral src) { 4717 if (src.is_lval()) { 4718 push_literal32((int32_t)src.target(), src.rspec()); 4719 } else { 4720 pushl(as_Address(src)); 4721 } 4722} 4723 4724void MacroAssembler::set_word_if_not_zero(Register dst) { 4725 xorl(dst, dst); 4726 set_byte_if_not_zero(dst); 4727} 4728 4729static void pass_arg0(MacroAssembler* masm, Register arg) { 4730 masm->push(arg); 4731} 4732 4733static void pass_arg1(MacroAssembler* masm, Register arg) { 4734 masm->push(arg); 4735} 4736 4737static void pass_arg2(MacroAssembler* masm, Register arg) { 4738 masm->push(arg); 4739} 4740 4741static void pass_arg3(MacroAssembler* masm, Register arg) { 4742 masm->push(arg); 4743} 4744 4745#ifndef PRODUCT 4746extern "C" void findpc(intptr_t x); 4747#endif 4748 4749void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 4750 // In order to get locks to work, we need to fake a in_VM state 4751 JavaThread* thread = JavaThread::current(); 4752 JavaThreadState saved_state = thread->thread_state(); 4753 thread->set_thread_state(_thread_in_vm); 4754 if (ShowMessageBoxOnError) { 4755 JavaThread* thread = JavaThread::current(); 4756 JavaThreadState saved_state = thread->thread_state(); 4757 thread->set_thread_state(_thread_in_vm); 4758 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 4759 ttyLocker ttyl; 4760 BytecodeCounter::print(); 4761 } 4762 // To see where a verify_oop failed, get $ebx+40/X for this frame. 4763 // This is the value of eip which points to where verify_oop will return. 4764 if (os::message_box(msg, "Execution stopped, print registers?")) { 4765 ttyLocker ttyl; 4766 tty->print_cr("eip = 0x%08x", eip); 4767#ifndef PRODUCT 4768 tty->cr(); 4769 findpc(eip); 4770 tty->cr(); 4771#endif 4772 tty->print_cr("rax, = 0x%08x", rax); 4773 tty->print_cr("rbx, = 0x%08x", rbx); 4774 tty->print_cr("rcx = 0x%08x", rcx); 4775 tty->print_cr("rdx = 0x%08x", rdx); 4776 tty->print_cr("rdi = 0x%08x", rdi); 4777 tty->print_cr("rsi = 0x%08x", rsi); 4778 tty->print_cr("rbp, = 0x%08x", rbp); 4779 tty->print_cr("rsp = 0x%08x", rsp); 4780 BREAKPOINT; 4781 } 4782 } else { 4783 ttyLocker ttyl; 4784 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 4785 assert(false, "DEBUG MESSAGE"); 4786 } 4787 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 4788} 4789 4790void MacroAssembler::stop(const char* msg) { 4791 ExternalAddress message((address)msg); 4792 // push address of message 4793 pushptr(message.addr()); 4794 { Label L; call(L, relocInfo::none); bind(L); } // push eip 4795 pusha(); // push registers 4796 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 4797 hlt(); 4798} 4799 4800void MacroAssembler::warn(const char* msg) { 4801 push_CPU_state(); 4802 4803 ExternalAddress message((address) msg); 4804 // push address of message 4805 pushptr(message.addr()); 4806 4807 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 4808 addl(rsp, wordSize); // discard argument 4809 pop_CPU_state(); 4810} 4811 4812#else // _LP64 4813 4814// 64 bit versions 4815 4816Address MacroAssembler::as_Address(AddressLiteral adr) { 4817 // amd64 always does this as a pc-rel 4818 // we can be absolute or disp based on the instruction type 4819 // jmp/call are displacements others are absolute 4820 assert(!adr.is_lval(), "must be rval"); 4821 assert(reachable(adr), "must be"); 4822 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 4823 4824} 4825 4826Address MacroAssembler::as_Address(ArrayAddress adr) { 4827 AddressLiteral base = adr.base(); 4828 lea(rscratch1, base); 4829 Address index = adr.index(); 4830 assert(index._disp == 0, "must not have disp"); // maybe it can? 4831 Address array(rscratch1, index._index, index._scale, index._disp); 4832 return array; 4833} 4834 4835int MacroAssembler::biased_locking_enter(Register lock_reg, 4836 Register obj_reg, 4837 Register swap_reg, 4838 Register tmp_reg, 4839 bool swap_reg_contains_mark, 4840 Label& done, 4841 Label* slow_case, 4842 BiasedLockingCounters* counters) { 4843 assert(UseBiasedLocking, "why call this otherwise?"); 4844 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 4845 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 4846 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4847 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4848 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4849 Address saved_mark_addr(lock_reg, 0); 4850 4851 if (PrintBiasedLockingStatistics && counters == NULL) 4852 counters = BiasedLocking::counters(); 4853 4854 // Biased locking 4855 // See whether the lock is currently biased toward our thread and 4856 // whether the epoch is still valid 4857 // Note that the runtime guarantees sufficient alignment of JavaThread 4858 // pointers to allow age to be placed into low bits 4859 // First check to see whether biasing is even enabled for this object 4860 Label cas_label; 4861 int null_check_offset = -1; 4862 if (!swap_reg_contains_mark) { 4863 null_check_offset = offset(); 4864 movq(swap_reg, mark_addr); 4865 } 4866 movq(tmp_reg, swap_reg); 4867 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4868 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 4869 jcc(Assembler::notEqual, cas_label); 4870 // The bias pattern is present in the object's header. Need to check 4871 // whether the bias owner and the epoch are both still current. 4872 load_prototype_header(tmp_reg, obj_reg); 4873 orq(tmp_reg, r15_thread); 4874 xorq(tmp_reg, swap_reg); 4875 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 4876 if (counters != NULL) { 4877 cond_inc32(Assembler::zero, 4878 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 4879 } 4880 jcc(Assembler::equal, done); 4881 4882 Label try_revoke_bias; 4883 Label try_rebias; 4884 4885 // At this point we know that the header has the bias pattern and 4886 // that we are not the bias owner in the current epoch. We need to 4887 // figure out more details about the state of the header in order to 4888 // know what operations can be legally performed on the object's 4889 // header. 4890 4891 // If the low three bits in the xor result aren't clear, that means 4892 // the prototype header is no longer biased and we have to revoke 4893 // the bias on this object. 4894 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4895 jcc(Assembler::notZero, try_revoke_bias); 4896 4897 // Biasing is still enabled for this data type. See whether the 4898 // epoch of the current bias is still valid, meaning that the epoch 4899 // bits of the mark word are equal to the epoch bits of the 4900 // prototype header. (Note that the prototype header's epoch bits 4901 // only change at a safepoint.) If not, attempt to rebias the object 4902 // toward the current thread. Note that we must be absolutely sure 4903 // that the current epoch is invalid in order to do this because 4904 // otherwise the manipulations it performs on the mark word are 4905 // illegal. 4906 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 4907 jcc(Assembler::notZero, try_rebias); 4908 4909 // The epoch of the current bias is still valid but we know nothing 4910 // about the owner; it might be set or it might be clear. Try to 4911 // acquire the bias of the object using an atomic operation. If this 4912 // fails we will go in to the runtime to revoke the object's bias. 4913 // Note that we first construct the presumed unbiased header so we 4914 // don't accidentally blow away another thread's valid bias. 4915 andq(swap_reg, 4916 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4917 movq(tmp_reg, swap_reg); 4918 orq(tmp_reg, r15_thread); 4919 if (os::is_MP()) { 4920 lock(); 4921 } 4922 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 4923 // If the biasing toward our thread failed, this means that 4924 // another thread succeeded in biasing it toward itself and we 4925 // need to revoke that bias. The revocation will occur in the 4926 // interpreter runtime in the slow case. 4927 if (counters != NULL) { 4928 cond_inc32(Assembler::zero, 4929 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 4930 } 4931 if (slow_case != NULL) { 4932 jcc(Assembler::notZero, *slow_case); 4933 } 4934 jmp(done); 4935 4936 bind(try_rebias); 4937 // At this point we know the epoch has expired, meaning that the 4938 // current "bias owner", if any, is actually invalid. Under these 4939 // circumstances _only_, we are allowed to use the current header's 4940 // value as the comparison value when doing the cas to acquire the 4941 // bias in the current epoch. In other words, we allow transfer of 4942 // the bias from one thread to another directly in this situation. 4943 // 4944 // FIXME: due to a lack of registers we currently blow away the age 4945 // bits in this situation. Should attempt to preserve them. 4946 load_prototype_header(tmp_reg, obj_reg); 4947 orq(tmp_reg, r15_thread); 4948 if (os::is_MP()) { 4949 lock(); 4950 } 4951 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 4952 // If the biasing toward our thread failed, then another thread 4953 // succeeded in biasing it toward itself and we need to revoke that 4954 // bias. The revocation will occur in the runtime in the slow case. 4955 if (counters != NULL) { 4956 cond_inc32(Assembler::zero, 4957 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 4958 } 4959 if (slow_case != NULL) { 4960 jcc(Assembler::notZero, *slow_case); 4961 } 4962 jmp(done); 4963 4964 bind(try_revoke_bias); 4965 // The prototype mark in the klass doesn't have the bias bit set any 4966 // more, indicating that objects of this data type are not supposed 4967 // to be biased any more. We are going to try to reset the mark of 4968 // this object to the prototype value and fall through to the 4969 // CAS-based locking scheme. Note that if our CAS fails, it means 4970 // that another thread raced us for the privilege of revoking the 4971 // bias of this particular object, so it's okay to continue in the 4972 // normal locking code. 4973 // 4974 // FIXME: due to a lack of registers we currently blow away the age 4975 // bits in this situation. Should attempt to preserve them. 4976 load_prototype_header(tmp_reg, obj_reg); 4977 if (os::is_MP()) { 4978 lock(); 4979 } 4980 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 4981 // Fall through to the normal CAS-based lock, because no matter what 4982 // the result of the above CAS, some thread must have succeeded in 4983 // removing the bias bit from the object's header. 4984 if (counters != NULL) { 4985 cond_inc32(Assembler::zero, 4986 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 4987 } 4988 4989 bind(cas_label); 4990 4991 return null_check_offset; 4992} 4993 4994void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 4995 Label L, E; 4996 4997#ifdef _WIN64 4998 // Windows always allocates space for it's register args 4999 assert(num_args <= 4, "only register arguments supported"); 5000 subq(rsp, frame::arg_reg_save_area_bytes); 5001#endif 5002 5003 // Align stack if necessary 5004 testl(rsp, 15); 5005 jcc(Assembler::zero, L); 5006 5007 subq(rsp, 8); 5008 { 5009 call(RuntimeAddress(entry_point)); 5010 } 5011 addq(rsp, 8); 5012 jmp(E); 5013 5014 bind(L); 5015 { 5016 call(RuntimeAddress(entry_point)); 5017 } 5018 5019 bind(E); 5020 5021#ifdef _WIN64 5022 // restore stack pointer 5023 addq(rsp, frame::arg_reg_save_area_bytes); 5024#endif 5025 5026} 5027 5028void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5029 assert(!src2.is_lval(), "should use cmpptr"); 5030 5031 if (reachable(src2)) { 5032 cmpq(src1, as_Address(src2)); 5033 } else { 5034 lea(rscratch1, src2); 5035 Assembler::cmpq(src1, Address(rscratch1, 0)); 5036 } 5037} 5038 5039int MacroAssembler::corrected_idivq(Register reg) { 5040 // Full implementation of Java ldiv and lrem; checks for special 5041 // case as described in JVM spec., p.243 & p.271. The function 5042 // returns the (pc) offset of the idivl instruction - may be needed 5043 // for implicit exceptions. 5044 // 5045 // normal case special case 5046 // 5047 // input : rax: dividend min_long 5048 // reg: divisor (may not be eax/edx) -1 5049 // 5050 // output: rax: quotient (= rax idiv reg) min_long 5051 // rdx: remainder (= rax irem reg) 0 5052 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5053 static const int64_t min_long = 0x8000000000000000; 5054 Label normal_case, special_case; 5055 5056 // check for special case 5057 cmp64(rax, ExternalAddress((address) &min_long)); 5058 jcc(Assembler::notEqual, normal_case); 5059 xorl(rdx, rdx); // prepare rdx for possible special case (where 5060 // remainder = 0) 5061 cmpq(reg, -1); 5062 jcc(Assembler::equal, special_case); 5063 5064 // handle normal case 5065 bind(normal_case); 5066 cdqq(); 5067 int idivq_offset = offset(); 5068 idivq(reg); 5069 5070 // normal and special case exit 5071 bind(special_case); 5072 5073 return idivq_offset; 5074} 5075 5076void MacroAssembler::decrementq(Register reg, int value) { 5077 if (value == min_jint) { subq(reg, value); return; } 5078 if (value < 0) { incrementq(reg, -value); return; } 5079 if (value == 0) { ; return; } 5080 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5081 /* else */ { subq(reg, value) ; return; } 5082} 5083 5084void MacroAssembler::decrementq(Address dst, int value) { 5085 if (value == min_jint) { subq(dst, value); return; } 5086 if (value < 0) { incrementq(dst, -value); return; } 5087 if (value == 0) { ; return; } 5088 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5089 /* else */ { subq(dst, value) ; return; } 5090} 5091 5092void MacroAssembler::fat_nop() { 5093 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5094 // Recommened sequence from 'Software Optimization Guide for the AMD 5095 // Hammer Processor' 5096 emit_byte(0x66); 5097 emit_byte(0x66); 5098 emit_byte(0x90); 5099 emit_byte(0x66); 5100 emit_byte(0x90); 5101} 5102 5103void MacroAssembler::incrementq(Register reg, int value) { 5104 if (value == min_jint) { addq(reg, value); return; } 5105 if (value < 0) { decrementq(reg, -value); return; } 5106 if (value == 0) { ; return; } 5107 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5108 /* else */ { addq(reg, value) ; return; } 5109} 5110 5111void MacroAssembler::incrementq(Address dst, int value) { 5112 if (value == min_jint) { addq(dst, value); return; } 5113 if (value < 0) { decrementq(dst, -value); return; } 5114 if (value == 0) { ; return; } 5115 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5116 /* else */ { addq(dst, value) ; return; } 5117} 5118 5119// 32bit can do a case table jump in one instruction but we no longer allow the base 5120// to be installed in the Address class 5121void MacroAssembler::jump(ArrayAddress entry) { 5122 lea(rscratch1, entry.base()); 5123 Address dispatch = entry.index(); 5124 assert(dispatch._base == noreg, "must be"); 5125 dispatch._base = rscratch1; 5126 jmp(dispatch); 5127} 5128 5129void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5130 ShouldNotReachHere(); // 64bit doesn't use two regs 5131 cmpq(x_lo, y_lo); 5132} 5133 5134void MacroAssembler::lea(Register dst, AddressLiteral src) { 5135 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5136} 5137 5138void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5139 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5140 movptr(dst, rscratch1); 5141} 5142 5143void MacroAssembler::leave() { 5144 // %%% is this really better? Why not on 32bit too? 5145 emit_byte(0xC9); // LEAVE 5146} 5147 5148void MacroAssembler::lneg(Register hi, Register lo) { 5149 ShouldNotReachHere(); // 64bit doesn't use two regs 5150 negq(lo); 5151} 5152 5153void MacroAssembler::movoop(Register dst, jobject obj) { 5154 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5155} 5156 5157void MacroAssembler::movoop(Address dst, jobject obj) { 5158 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5159 movq(dst, rscratch1); 5160} 5161 5162void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5163 if (src.is_lval()) { 5164 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5165 } else { 5166 if (reachable(src)) { 5167 movq(dst, as_Address(src)); 5168 } else { 5169 lea(rscratch1, src); 5170 movq(dst, Address(rscratch1,0)); 5171 } 5172 } 5173} 5174 5175void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5176 movq(as_Address(dst), src); 5177} 5178 5179void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5180 movq(dst, as_Address(src)); 5181} 5182 5183// src should NEVER be a real pointer. Use AddressLiteral for true pointers 5184void MacroAssembler::movptr(Address dst, intptr_t src) { 5185 mov64(rscratch1, src); 5186 movq(dst, rscratch1); 5187} 5188 5189// These are mostly for initializing NULL 5190void MacroAssembler::movptr(Address dst, int32_t src) { 5191 movslq(dst, src); 5192} 5193 5194void MacroAssembler::movptr(Register dst, int32_t src) { 5195 mov64(dst, (intptr_t)src); 5196} 5197 5198void MacroAssembler::pushoop(jobject obj) { 5199 movoop(rscratch1, obj); 5200 push(rscratch1); 5201} 5202 5203void MacroAssembler::pushptr(AddressLiteral src) { 5204 lea(rscratch1, src); 5205 if (src.is_lval()) { 5206 push(rscratch1); 5207 } else { 5208 pushq(Address(rscratch1, 0)); 5209 } 5210} 5211 5212void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5213 bool clear_pc) { 5214 // we must set sp to zero to clear frame 5215 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5216 // must clear fp, so that compiled frames are not confused; it is 5217 // possible that we need it only for debugging 5218 if (clear_fp) { 5219 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5220 } 5221 5222 if (clear_pc) { 5223 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5224 } 5225} 5226 5227void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5228 Register last_java_fp, 5229 address last_java_pc) { 5230 // determine last_java_sp register 5231 if (!last_java_sp->is_valid()) { 5232 last_java_sp = rsp; 5233 } 5234 5235 // last_java_fp is optional 5236 if (last_java_fp->is_valid()) { 5237 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5238 last_java_fp); 5239 } 5240 5241 // last_java_pc is optional 5242 if (last_java_pc != NULL) { 5243 Address java_pc(r15_thread, 5244 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5245 lea(rscratch1, InternalAddress(last_java_pc)); 5246 movptr(java_pc, rscratch1); 5247 } 5248 5249 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5250} 5251 5252static void pass_arg0(MacroAssembler* masm, Register arg) { 5253 if (c_rarg0 != arg ) { 5254 masm->mov(c_rarg0, arg); 5255 } 5256} 5257 5258static void pass_arg1(MacroAssembler* masm, Register arg) { 5259 if (c_rarg1 != arg ) { 5260 masm->mov(c_rarg1, arg); 5261 } 5262} 5263 5264static void pass_arg2(MacroAssembler* masm, Register arg) { 5265 if (c_rarg2 != arg ) { 5266 masm->mov(c_rarg2, arg); 5267 } 5268} 5269 5270static void pass_arg3(MacroAssembler* masm, Register arg) { 5271 if (c_rarg3 != arg ) { 5272 masm->mov(c_rarg3, arg); 5273 } 5274} 5275 5276void MacroAssembler::stop(const char* msg) { 5277 address rip = pc(); 5278 pusha(); // get regs on stack 5279 lea(c_rarg0, ExternalAddress((address) msg)); 5280 lea(c_rarg1, InternalAddress(rip)); 5281 movq(c_rarg2, rsp); // pass pointer to regs array 5282 andq(rsp, -16); // align stack as required by ABI 5283 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5284 hlt(); 5285} 5286 5287void MacroAssembler::warn(const char* msg) { 5288 push(r12); 5289 movq(r12, rsp); 5290 andq(rsp, -16); // align stack as required by push_CPU_state and call 5291 5292 push_CPU_state(); // keeps alignment at 16 bytes 5293 lea(c_rarg0, ExternalAddress((address) msg)); 5294 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5295 pop_CPU_state(); 5296 5297 movq(rsp, r12); 5298 pop(r12); 5299} 5300 5301#ifndef PRODUCT 5302extern "C" void findpc(intptr_t x); 5303#endif 5304 5305void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5306 // In order to get locks to work, we need to fake a in_VM state 5307 if (ShowMessageBoxOnError ) { 5308 JavaThread* thread = JavaThread::current(); 5309 JavaThreadState saved_state = thread->thread_state(); 5310 thread->set_thread_state(_thread_in_vm); 5311#ifndef PRODUCT 5312 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5313 ttyLocker ttyl; 5314 BytecodeCounter::print(); 5315 } 5316#endif 5317 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5318 // XXX correct this offset for amd64 5319 // This is the value of eip which points to where verify_oop will return. 5320 if (os::message_box(msg, "Execution stopped, print registers?")) { 5321 ttyLocker ttyl; 5322 tty->print_cr("rip = 0x%016lx", pc); 5323#ifndef PRODUCT 5324 tty->cr(); 5325 findpc(pc); 5326 tty->cr(); 5327#endif 5328 tty->print_cr("rax = 0x%016lx", regs[15]); 5329 tty->print_cr("rbx = 0x%016lx", regs[12]); 5330 tty->print_cr("rcx = 0x%016lx", regs[14]); 5331 tty->print_cr("rdx = 0x%016lx", regs[13]); 5332 tty->print_cr("rdi = 0x%016lx", regs[8]); 5333 tty->print_cr("rsi = 0x%016lx", regs[9]); 5334 tty->print_cr("rbp = 0x%016lx", regs[10]); 5335 tty->print_cr("rsp = 0x%016lx", regs[11]); 5336 tty->print_cr("r8 = 0x%016lx", regs[7]); 5337 tty->print_cr("r9 = 0x%016lx", regs[6]); 5338 tty->print_cr("r10 = 0x%016lx", regs[5]); 5339 tty->print_cr("r11 = 0x%016lx", regs[4]); 5340 tty->print_cr("r12 = 0x%016lx", regs[3]); 5341 tty->print_cr("r13 = 0x%016lx", regs[2]); 5342 tty->print_cr("r14 = 0x%016lx", regs[1]); 5343 tty->print_cr("r15 = 0x%016lx", regs[0]); 5344 BREAKPOINT; 5345 } 5346 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5347 } else { 5348 ttyLocker ttyl; 5349 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5350 msg); 5351 } 5352} 5353 5354#endif // _LP64 5355 5356// Now versions that are common to 32/64 bit 5357 5358void MacroAssembler::addptr(Register dst, int32_t imm32) { 5359 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5360} 5361 5362void MacroAssembler::addptr(Register dst, Register src) { 5363 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5364} 5365 5366void MacroAssembler::addptr(Address dst, Register src) { 5367 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5368} 5369 5370void MacroAssembler::align(int modulus) { 5371 if (offset() % modulus != 0) { 5372 nop(modulus - (offset() % modulus)); 5373 } 5374} 5375 5376void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5377 andpd(dst, as_Address(src)); 5378} 5379 5380void MacroAssembler::andptr(Register dst, int32_t imm32) { 5381 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 5382} 5383 5384void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 5385 pushf(); 5386 if (os::is_MP()) 5387 lock(); 5388 incrementl(counter_addr); 5389 popf(); 5390} 5391 5392// Writes to stack successive pages until offset reached to check for 5393// stack overflow + shadow pages. This clobbers tmp. 5394void MacroAssembler::bang_stack_size(Register size, Register tmp) { 5395 movptr(tmp, rsp); 5396 // Bang stack for total size given plus shadow page size. 5397 // Bang one page at a time because large size can bang beyond yellow and 5398 // red zones. 5399 Label loop; 5400 bind(loop); 5401 movl(Address(tmp, (-os::vm_page_size())), size ); 5402 subptr(tmp, os::vm_page_size()); 5403 subl(size, os::vm_page_size()); 5404 jcc(Assembler::greater, loop); 5405 5406 // Bang down shadow pages too. 5407 // The -1 because we already subtracted 1 page. 5408 for (int i = 0; i< StackShadowPages-1; i++) { 5409 // this could be any sized move but this is can be a debugging crumb 5410 // so the bigger the better. 5411 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 5412 } 5413} 5414 5415void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 5416 assert(UseBiasedLocking, "why call this otherwise?"); 5417 5418 // Check for biased locking unlock case, which is a no-op 5419 // Note: we do not have to check the thread ID for two reasons. 5420 // First, the interpreter checks for IllegalMonitorStateException at 5421 // a higher level. Second, if the bias was revoked while we held the 5422 // lock, the object could not be rebiased toward another thread, so 5423 // the bias bit would be clear. 5424 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 5425 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 5426 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 5427 jcc(Assembler::equal, done); 5428} 5429 5430void MacroAssembler::c2bool(Register x) { 5431 // implements x == 0 ? 0 : 1 5432 // note: must only look at least-significant byte of x 5433 // since C-style booleans are stored in one byte 5434 // only! (was bug) 5435 andl(x, 0xFF); 5436 setb(Assembler::notZero, x); 5437} 5438 5439// Wouldn't need if AddressLiteral version had new name 5440void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 5441 Assembler::call(L, rtype); 5442} 5443 5444void MacroAssembler::call(Register entry) { 5445 Assembler::call(entry); 5446} 5447 5448void MacroAssembler::call(AddressLiteral entry) { 5449 if (reachable(entry)) { 5450 Assembler::call_literal(entry.target(), entry.rspec()); 5451 } else { 5452 lea(rscratch1, entry); 5453 Assembler::call(rscratch1); 5454 } 5455} 5456 5457// Implementation of call_VM versions 5458 5459void MacroAssembler::call_VM(Register oop_result, 5460 address entry_point, 5461 bool check_exceptions) { 5462 Label C, E; 5463 call(C, relocInfo::none); 5464 jmp(E); 5465 5466 bind(C); 5467 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 5468 ret(0); 5469 5470 bind(E); 5471} 5472 5473void MacroAssembler::call_VM(Register oop_result, 5474 address entry_point, 5475 Register arg_1, 5476 bool check_exceptions) { 5477 Label C, E; 5478 call(C, relocInfo::none); 5479 jmp(E); 5480 5481 bind(C); 5482 pass_arg1(this, arg_1); 5483 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 5484 ret(0); 5485 5486 bind(E); 5487} 5488 5489void MacroAssembler::call_VM(Register oop_result, 5490 address entry_point, 5491 Register arg_1, 5492 Register arg_2, 5493 bool check_exceptions) { 5494 Label C, E; 5495 call(C, relocInfo::none); 5496 jmp(E); 5497 5498 bind(C); 5499 5500 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5501 5502 pass_arg2(this, arg_2); 5503 pass_arg1(this, arg_1); 5504 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 5505 ret(0); 5506 5507 bind(E); 5508} 5509 5510void MacroAssembler::call_VM(Register oop_result, 5511 address entry_point, 5512 Register arg_1, 5513 Register arg_2, 5514 Register arg_3, 5515 bool check_exceptions) { 5516 Label C, E; 5517 call(C, relocInfo::none); 5518 jmp(E); 5519 5520 bind(C); 5521 5522 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5523 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5524 pass_arg3(this, arg_3); 5525 5526 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5527 pass_arg2(this, arg_2); 5528 5529 pass_arg1(this, arg_1); 5530 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 5531 ret(0); 5532 5533 bind(E); 5534} 5535 5536void MacroAssembler::call_VM(Register oop_result, 5537 Register last_java_sp, 5538 address entry_point, 5539 int number_of_arguments, 5540 bool check_exceptions) { 5541 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 5542 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 5543} 5544 5545void MacroAssembler::call_VM(Register oop_result, 5546 Register last_java_sp, 5547 address entry_point, 5548 Register arg_1, 5549 bool check_exceptions) { 5550 pass_arg1(this, arg_1); 5551 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 5552} 5553 5554void MacroAssembler::call_VM(Register oop_result, 5555 Register last_java_sp, 5556 address entry_point, 5557 Register arg_1, 5558 Register arg_2, 5559 bool check_exceptions) { 5560 5561 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5562 pass_arg2(this, arg_2); 5563 pass_arg1(this, arg_1); 5564 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 5565} 5566 5567void MacroAssembler::call_VM(Register oop_result, 5568 Register last_java_sp, 5569 address entry_point, 5570 Register arg_1, 5571 Register arg_2, 5572 Register arg_3, 5573 bool check_exceptions) { 5574 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5575 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5576 pass_arg3(this, arg_3); 5577 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5578 pass_arg2(this, arg_2); 5579 pass_arg1(this, arg_1); 5580 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 5581} 5582 5583void MacroAssembler::call_VM_base(Register oop_result, 5584 Register java_thread, 5585 Register last_java_sp, 5586 address entry_point, 5587 int number_of_arguments, 5588 bool check_exceptions) { 5589 // determine java_thread register 5590 if (!java_thread->is_valid()) { 5591#ifdef _LP64 5592 java_thread = r15_thread; 5593#else 5594 java_thread = rdi; 5595 get_thread(java_thread); 5596#endif // LP64 5597 } 5598 // determine last_java_sp register 5599 if (!last_java_sp->is_valid()) { 5600 last_java_sp = rsp; 5601 } 5602 // debugging support 5603 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 5604 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 5605 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 5606 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 5607 5608 // push java thread (becomes first argument of C function) 5609 5610 NOT_LP64(push(java_thread); number_of_arguments++); 5611 LP64_ONLY(mov(c_rarg0, r15_thread)); 5612 5613 // set last Java frame before call 5614 assert(last_java_sp != rbp, "can't use ebp/rbp"); 5615 5616 // Only interpreter should have to set fp 5617 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 5618 5619 // do the call, remove parameters 5620 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 5621 5622 // restore the thread (cannot use the pushed argument since arguments 5623 // may be overwritten by C code generated by an optimizing compiler); 5624 // however can use the register value directly if it is callee saved. 5625 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 5626 // rdi & rsi (also r15) are callee saved -> nothing to do 5627#ifdef ASSERT 5628 guarantee(java_thread != rax, "change this code"); 5629 push(rax); 5630 { Label L; 5631 get_thread(rax); 5632 cmpptr(java_thread, rax); 5633 jcc(Assembler::equal, L); 5634 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 5635 bind(L); 5636 } 5637 pop(rax); 5638#endif 5639 } else { 5640 get_thread(java_thread); 5641 } 5642 // reset last Java frame 5643 // Only interpreter should have to clear fp 5644 reset_last_Java_frame(java_thread, true, false); 5645 5646#ifndef CC_INTERP 5647 // C++ interp handles this in the interpreter 5648 check_and_handle_popframe(java_thread); 5649 check_and_handle_earlyret(java_thread); 5650#endif /* CC_INTERP */ 5651 5652 if (check_exceptions) { 5653 // check for pending exceptions (java_thread is set upon return) 5654 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 5655#ifndef _LP64 5656 jump_cc(Assembler::notEqual, 5657 RuntimeAddress(StubRoutines::forward_exception_entry())); 5658#else 5659 // This used to conditionally jump to forward_exception however it is 5660 // possible if we relocate that the branch will not reach. So we must jump 5661 // around so we can always reach 5662 5663 Label ok; 5664 jcc(Assembler::equal, ok); 5665 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 5666 bind(ok); 5667#endif // LP64 5668 } 5669 5670 // get oop result if there is one and reset the value in the thread 5671 if (oop_result->is_valid()) { 5672 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 5673 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 5674 verify_oop(oop_result, "broken oop in call_VM_base"); 5675 } 5676} 5677 5678void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 5679 5680 // Calculate the value for last_Java_sp 5681 // somewhat subtle. call_VM does an intermediate call 5682 // which places a return address on the stack just under the 5683 // stack pointer as the user finsihed with it. This allows 5684 // use to retrieve last_Java_pc from last_Java_sp[-1]. 5685 // On 32bit we then have to push additional args on the stack to accomplish 5686 // the actual requested call. On 64bit call_VM only can use register args 5687 // so the only extra space is the return address that call_VM created. 5688 // This hopefully explains the calculations here. 5689 5690#ifdef _LP64 5691 // We've pushed one address, correct last_Java_sp 5692 lea(rax, Address(rsp, wordSize)); 5693#else 5694 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 5695#endif // LP64 5696 5697 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 5698 5699} 5700 5701void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 5702 call_VM_leaf_base(entry_point, number_of_arguments); 5703} 5704 5705void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 5706 pass_arg0(this, arg_0); 5707 call_VM_leaf(entry_point, 1); 5708} 5709 5710void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 5711 5712 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 5713 pass_arg1(this, arg_1); 5714 pass_arg0(this, arg_0); 5715 call_VM_leaf(entry_point, 2); 5716} 5717 5718void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 5719 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 5720 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5721 pass_arg2(this, arg_2); 5722 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 5723 pass_arg1(this, arg_1); 5724 pass_arg0(this, arg_0); 5725 call_VM_leaf(entry_point, 3); 5726} 5727 5728void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 5729} 5730 5731void MacroAssembler::check_and_handle_popframe(Register java_thread) { 5732} 5733 5734void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 5735 if (reachable(src1)) { 5736 cmpl(as_Address(src1), imm); 5737 } else { 5738 lea(rscratch1, src1); 5739 cmpl(Address(rscratch1, 0), imm); 5740 } 5741} 5742 5743void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 5744 assert(!src2.is_lval(), "use cmpptr"); 5745 if (reachable(src2)) { 5746 cmpl(src1, as_Address(src2)); 5747 } else { 5748 lea(rscratch1, src2); 5749 cmpl(src1, Address(rscratch1, 0)); 5750 } 5751} 5752 5753void MacroAssembler::cmp32(Register src1, int32_t imm) { 5754 Assembler::cmpl(src1, imm); 5755} 5756 5757void MacroAssembler::cmp32(Register src1, Address src2) { 5758 Assembler::cmpl(src1, src2); 5759} 5760 5761void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 5762 ucomisd(opr1, opr2); 5763 5764 Label L; 5765 if (unordered_is_less) { 5766 movl(dst, -1); 5767 jcc(Assembler::parity, L); 5768 jcc(Assembler::below , L); 5769 movl(dst, 0); 5770 jcc(Assembler::equal , L); 5771 increment(dst); 5772 } else { // unordered is greater 5773 movl(dst, 1); 5774 jcc(Assembler::parity, L); 5775 jcc(Assembler::above , L); 5776 movl(dst, 0); 5777 jcc(Assembler::equal , L); 5778 decrementl(dst); 5779 } 5780 bind(L); 5781} 5782 5783void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 5784 ucomiss(opr1, opr2); 5785 5786 Label L; 5787 if (unordered_is_less) { 5788 movl(dst, -1); 5789 jcc(Assembler::parity, L); 5790 jcc(Assembler::below , L); 5791 movl(dst, 0); 5792 jcc(Assembler::equal , L); 5793 increment(dst); 5794 } else { // unordered is greater 5795 movl(dst, 1); 5796 jcc(Assembler::parity, L); 5797 jcc(Assembler::above , L); 5798 movl(dst, 0); 5799 jcc(Assembler::equal , L); 5800 decrementl(dst); 5801 } 5802 bind(L); 5803} 5804 5805 5806void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 5807 if (reachable(src1)) { 5808 cmpb(as_Address(src1), imm); 5809 } else { 5810 lea(rscratch1, src1); 5811 cmpb(Address(rscratch1, 0), imm); 5812 } 5813} 5814 5815void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 5816#ifdef _LP64 5817 if (src2.is_lval()) { 5818 movptr(rscratch1, src2); 5819 Assembler::cmpq(src1, rscratch1); 5820 } else if (reachable(src2)) { 5821 cmpq(src1, as_Address(src2)); 5822 } else { 5823 lea(rscratch1, src2); 5824 Assembler::cmpq(src1, Address(rscratch1, 0)); 5825 } 5826#else 5827 if (src2.is_lval()) { 5828 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 5829 } else { 5830 cmpl(src1, as_Address(src2)); 5831 } 5832#endif // _LP64 5833} 5834 5835void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 5836 assert(src2.is_lval(), "not a mem-mem compare"); 5837#ifdef _LP64 5838 // moves src2's literal address 5839 movptr(rscratch1, src2); 5840 Assembler::cmpq(src1, rscratch1); 5841#else 5842 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 5843#endif // _LP64 5844} 5845 5846void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 5847 if (reachable(adr)) { 5848 if (os::is_MP()) 5849 lock(); 5850 cmpxchgptr(reg, as_Address(adr)); 5851 } else { 5852 lea(rscratch1, adr); 5853 if (os::is_MP()) 5854 lock(); 5855 cmpxchgptr(reg, Address(rscratch1, 0)); 5856 } 5857} 5858 5859void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 5860 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 5861} 5862 5863void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 5864 comisd(dst, as_Address(src)); 5865} 5866 5867void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 5868 comiss(dst, as_Address(src)); 5869} 5870 5871 5872void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 5873 Condition negated_cond = negate_condition(cond); 5874 Label L; 5875 jcc(negated_cond, L); 5876 atomic_incl(counter_addr); 5877 bind(L); 5878} 5879 5880int MacroAssembler::corrected_idivl(Register reg) { 5881 // Full implementation of Java idiv and irem; checks for 5882 // special case as described in JVM spec., p.243 & p.271. 5883 // The function returns the (pc) offset of the idivl 5884 // instruction - may be needed for implicit exceptions. 5885 // 5886 // normal case special case 5887 // 5888 // input : rax,: dividend min_int 5889 // reg: divisor (may not be rax,/rdx) -1 5890 // 5891 // output: rax,: quotient (= rax, idiv reg) min_int 5892 // rdx: remainder (= rax, irem reg) 0 5893 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 5894 const int min_int = 0x80000000; 5895 Label normal_case, special_case; 5896 5897 // check for special case 5898 cmpl(rax, min_int); 5899 jcc(Assembler::notEqual, normal_case); 5900 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 5901 cmpl(reg, -1); 5902 jcc(Assembler::equal, special_case); 5903 5904 // handle normal case 5905 bind(normal_case); 5906 cdql(); 5907 int idivl_offset = offset(); 5908 idivl(reg); 5909 5910 // normal and special case exit 5911 bind(special_case); 5912 5913 return idivl_offset; 5914} 5915 5916 5917 5918void MacroAssembler::decrementl(Register reg, int value) { 5919 if (value == min_jint) {subl(reg, value) ; return; } 5920 if (value < 0) { incrementl(reg, -value); return; } 5921 if (value == 0) { ; return; } 5922 if (value == 1 && UseIncDec) { decl(reg) ; return; } 5923 /* else */ { subl(reg, value) ; return; } 5924} 5925 5926void MacroAssembler::decrementl(Address dst, int value) { 5927 if (value == min_jint) {subl(dst, value) ; return; } 5928 if (value < 0) { incrementl(dst, -value); return; } 5929 if (value == 0) { ; return; } 5930 if (value == 1 && UseIncDec) { decl(dst) ; return; } 5931 /* else */ { subl(dst, value) ; return; } 5932} 5933 5934void MacroAssembler::division_with_shift (Register reg, int shift_value) { 5935 assert (shift_value > 0, "illegal shift value"); 5936 Label _is_positive; 5937 testl (reg, reg); 5938 jcc (Assembler::positive, _is_positive); 5939 int offset = (1 << shift_value) - 1 ; 5940 5941 if (offset == 1) { 5942 incrementl(reg); 5943 } else { 5944 addl(reg, offset); 5945 } 5946 5947 bind (_is_positive); 5948 sarl(reg, shift_value); 5949} 5950 5951// !defined(COMPILER2) is because of stupid core builds 5952#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 5953void MacroAssembler::empty_FPU_stack() { 5954 if (VM_Version::supports_mmx()) { 5955 emms(); 5956 } else { 5957 for (int i = 8; i-- > 0; ) ffree(i); 5958 } 5959} 5960#endif // !LP64 || C1 || !C2 5961 5962 5963// Defines obj, preserves var_size_in_bytes 5964void MacroAssembler::eden_allocate(Register obj, 5965 Register var_size_in_bytes, 5966 int con_size_in_bytes, 5967 Register t1, 5968 Label& slow_case) { 5969 assert(obj == rax, "obj must be in rax, for cmpxchg"); 5970 assert_different_registers(obj, var_size_in_bytes, t1); 5971 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 5972 jmp(slow_case); 5973 } else { 5974 Register end = t1; 5975 Label retry; 5976 bind(retry); 5977 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 5978 movptr(obj, heap_top); 5979 if (var_size_in_bytes == noreg) { 5980 lea(end, Address(obj, con_size_in_bytes)); 5981 } else { 5982 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 5983 } 5984 // if end < obj then we wrapped around => object too long => slow case 5985 cmpptr(end, obj); 5986 jcc(Assembler::below, slow_case); 5987 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 5988 jcc(Assembler::above, slow_case); 5989 // Compare obj with the top addr, and if still equal, store the new top addr in 5990 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 5991 // it otherwise. Use lock prefix for atomicity on MPs. 5992 locked_cmpxchgptr(end, heap_top); 5993 jcc(Assembler::notEqual, retry); 5994 } 5995} 5996 5997void MacroAssembler::enter() { 5998 push(rbp); 5999 mov(rbp, rsp); 6000} 6001 6002void MacroAssembler::fcmp(Register tmp) { 6003 fcmp(tmp, 1, true, true); 6004} 6005 6006void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6007 assert(!pop_right || pop_left, "usage error"); 6008 if (VM_Version::supports_cmov()) { 6009 assert(tmp == noreg, "unneeded temp"); 6010 if (pop_left) { 6011 fucomip(index); 6012 } else { 6013 fucomi(index); 6014 } 6015 if (pop_right) { 6016 fpop(); 6017 } 6018 } else { 6019 assert(tmp != noreg, "need temp"); 6020 if (pop_left) { 6021 if (pop_right) { 6022 fcompp(); 6023 } else { 6024 fcomp(index); 6025 } 6026 } else { 6027 fcom(index); 6028 } 6029 // convert FPU condition into eflags condition via rax, 6030 save_rax(tmp); 6031 fwait(); fnstsw_ax(); 6032 sahf(); 6033 restore_rax(tmp); 6034 } 6035 // condition codes set as follows: 6036 // 6037 // CF (corresponds to C0) if x < y 6038 // PF (corresponds to C2) if unordered 6039 // ZF (corresponds to C3) if x = y 6040} 6041 6042void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6043 fcmp2int(dst, unordered_is_less, 1, true, true); 6044} 6045 6046void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6047 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6048 Label L; 6049 if (unordered_is_less) { 6050 movl(dst, -1); 6051 jcc(Assembler::parity, L); 6052 jcc(Assembler::below , L); 6053 movl(dst, 0); 6054 jcc(Assembler::equal , L); 6055 increment(dst); 6056 } else { // unordered is greater 6057 movl(dst, 1); 6058 jcc(Assembler::parity, L); 6059 jcc(Assembler::above , L); 6060 movl(dst, 0); 6061 jcc(Assembler::equal , L); 6062 decrementl(dst); 6063 } 6064 bind(L); 6065} 6066 6067void MacroAssembler::fld_d(AddressLiteral src) { 6068 fld_d(as_Address(src)); 6069} 6070 6071void MacroAssembler::fld_s(AddressLiteral src) { 6072 fld_s(as_Address(src)); 6073} 6074 6075void MacroAssembler::fld_x(AddressLiteral src) { 6076 Assembler::fld_x(as_Address(src)); 6077} 6078 6079void MacroAssembler::fldcw(AddressLiteral src) { 6080 Assembler::fldcw(as_Address(src)); 6081} 6082 6083void MacroAssembler::fpop() { 6084 ffree(); 6085 fincstp(); 6086} 6087 6088void MacroAssembler::fremr(Register tmp) { 6089 save_rax(tmp); 6090 { Label L; 6091 bind(L); 6092 fprem(); 6093 fwait(); fnstsw_ax(); 6094#ifdef _LP64 6095 testl(rax, 0x400); 6096 jcc(Assembler::notEqual, L); 6097#else 6098 sahf(); 6099 jcc(Assembler::parity, L); 6100#endif // _LP64 6101 } 6102 restore_rax(tmp); 6103 // Result is in ST0. 6104 // Note: fxch & fpop to get rid of ST1 6105 // (otherwise FPU stack could overflow eventually) 6106 fxch(1); 6107 fpop(); 6108} 6109 6110 6111void MacroAssembler::incrementl(AddressLiteral dst) { 6112 if (reachable(dst)) { 6113 incrementl(as_Address(dst)); 6114 } else { 6115 lea(rscratch1, dst); 6116 incrementl(Address(rscratch1, 0)); 6117 } 6118} 6119 6120void MacroAssembler::incrementl(ArrayAddress dst) { 6121 incrementl(as_Address(dst)); 6122} 6123 6124void MacroAssembler::incrementl(Register reg, int value) { 6125 if (value == min_jint) {addl(reg, value) ; return; } 6126 if (value < 0) { decrementl(reg, -value); return; } 6127 if (value == 0) { ; return; } 6128 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6129 /* else */ { addl(reg, value) ; return; } 6130} 6131 6132void MacroAssembler::incrementl(Address dst, int value) { 6133 if (value == min_jint) {addl(dst, value) ; return; } 6134 if (value < 0) { decrementl(dst, -value); return; } 6135 if (value == 0) { ; return; } 6136 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6137 /* else */ { addl(dst, value) ; return; } 6138} 6139 6140void MacroAssembler::jump(AddressLiteral dst) { 6141 if (reachable(dst)) { 6142 jmp_literal(dst.target(), dst.rspec()); 6143 } else { 6144 lea(rscratch1, dst); 6145 jmp(rscratch1); 6146 } 6147} 6148 6149void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6150 if (reachable(dst)) { 6151 InstructionMark im(this); 6152 relocate(dst.reloc()); 6153 const int short_size = 2; 6154 const int long_size = 6; 6155 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6156 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6157 // 0111 tttn #8-bit disp 6158 emit_byte(0x70 | cc); 6159 emit_byte((offs - short_size) & 0xFF); 6160 } else { 6161 // 0000 1111 1000 tttn #32-bit disp 6162 emit_byte(0x0F); 6163 emit_byte(0x80 | cc); 6164 emit_long(offs - long_size); 6165 } 6166 } else { 6167#ifdef ASSERT 6168 warning("reversing conditional branch"); 6169#endif /* ASSERT */ 6170 Label skip; 6171 jccb(reverse[cc], skip); 6172 lea(rscratch1, dst); 6173 Assembler::jmp(rscratch1); 6174 bind(skip); 6175 } 6176} 6177 6178void MacroAssembler::ldmxcsr(AddressLiteral src) { 6179 if (reachable(src)) { 6180 Assembler::ldmxcsr(as_Address(src)); 6181 } else { 6182 lea(rscratch1, src); 6183 Assembler::ldmxcsr(Address(rscratch1, 0)); 6184 } 6185} 6186 6187int MacroAssembler::load_signed_byte(Register dst, Address src) { 6188 int off; 6189 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6190 off = offset(); 6191 movsbl(dst, src); // movsxb 6192 } else { 6193 off = load_unsigned_byte(dst, src); 6194 shll(dst, 24); 6195 sarl(dst, 24); 6196 } 6197 return off; 6198} 6199 6200// word => int32 which seems bad for 64bit 6201int MacroAssembler::load_signed_word(Register dst, Address src) { 6202 int off; 6203 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6204 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6205 // version but this is what 64bit has always done. This seems to imply 6206 // that users are only using 32bits worth. 6207 off = offset(); 6208 movswl(dst, src); // movsxw 6209 } else { 6210 off = load_unsigned_word(dst, src); 6211 shll(dst, 16); 6212 sarl(dst, 16); 6213 } 6214 return off; 6215} 6216 6217int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6218 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6219 // and "3.9 Partial Register Penalties", p. 22). 6220 int off; 6221 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6222 off = offset(); 6223 movzbl(dst, src); // movzxb 6224 } else { 6225 xorl(dst, dst); 6226 off = offset(); 6227 movb(dst, src); 6228 } 6229 return off; 6230} 6231 6232int MacroAssembler::load_unsigned_word(Register dst, Address src) { 6233 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6234 // and "3.9 Partial Register Penalties", p. 22). 6235 int off; 6236 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6237 off = offset(); 6238 movzwl(dst, src); // movzxw 6239 } else { 6240 xorl(dst, dst); 6241 off = offset(); 6242 movw(dst, src); 6243 } 6244 return off; 6245} 6246 6247void MacroAssembler::mov32(AddressLiteral dst, Register src) { 6248 if (reachable(dst)) { 6249 movl(as_Address(dst), src); 6250 } else { 6251 lea(rscratch1, dst); 6252 movl(Address(rscratch1, 0), src); 6253 } 6254} 6255 6256void MacroAssembler::mov32(Register dst, AddressLiteral src) { 6257 if (reachable(src)) { 6258 movl(dst, as_Address(src)); 6259 } else { 6260 lea(rscratch1, src); 6261 movl(dst, Address(rscratch1, 0)); 6262 } 6263} 6264 6265// C++ bool manipulation 6266 6267void MacroAssembler::movbool(Register dst, Address src) { 6268 if(sizeof(bool) == 1) 6269 movb(dst, src); 6270 else if(sizeof(bool) == 2) 6271 movw(dst, src); 6272 else if(sizeof(bool) == 4) 6273 movl(dst, src); 6274 else 6275 // unsupported 6276 ShouldNotReachHere(); 6277} 6278 6279void MacroAssembler::movbool(Address dst, bool boolconst) { 6280 if(sizeof(bool) == 1) 6281 movb(dst, (int) boolconst); 6282 else if(sizeof(bool) == 2) 6283 movw(dst, (int) boolconst); 6284 else if(sizeof(bool) == 4) 6285 movl(dst, (int) boolconst); 6286 else 6287 // unsupported 6288 ShouldNotReachHere(); 6289} 6290 6291void MacroAssembler::movbool(Address dst, Register src) { 6292 if(sizeof(bool) == 1) 6293 movb(dst, src); 6294 else if(sizeof(bool) == 2) 6295 movw(dst, src); 6296 else if(sizeof(bool) == 4) 6297 movl(dst, src); 6298 else 6299 // unsupported 6300 ShouldNotReachHere(); 6301} 6302 6303void MacroAssembler::movbyte(ArrayAddress dst, int src) { 6304 movb(as_Address(dst), src); 6305} 6306 6307void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 6308 if (reachable(src)) { 6309 if (UseXmmLoadAndClearUpper) { 6310 movsd (dst, as_Address(src)); 6311 } else { 6312 movlpd(dst, as_Address(src)); 6313 } 6314 } else { 6315 lea(rscratch1, src); 6316 if (UseXmmLoadAndClearUpper) { 6317 movsd (dst, Address(rscratch1, 0)); 6318 } else { 6319 movlpd(dst, Address(rscratch1, 0)); 6320 } 6321 } 6322} 6323 6324void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 6325 if (reachable(src)) { 6326 movss(dst, as_Address(src)); 6327 } else { 6328 lea(rscratch1, src); 6329 movss(dst, Address(rscratch1, 0)); 6330 } 6331} 6332 6333void MacroAssembler::movptr(Register dst, Register src) { 6334 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6335} 6336 6337void MacroAssembler::movptr(Register dst, Address src) { 6338 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6339} 6340 6341// src should NEVER be a real pointer. Use AddressLiteral for true pointers 6342void MacroAssembler::movptr(Register dst, intptr_t src) { 6343 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 6344} 6345 6346void MacroAssembler::movptr(Address dst, Register src) { 6347 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6348} 6349 6350void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 6351 if (reachable(src)) { 6352 movss(dst, as_Address(src)); 6353 } else { 6354 lea(rscratch1, src); 6355 movss(dst, Address(rscratch1, 0)); 6356 } 6357} 6358 6359void MacroAssembler::null_check(Register reg, int offset) { 6360 if (needs_explicit_null_check(offset)) { 6361 // provoke OS NULL exception if reg = NULL by 6362 // accessing M[reg] w/o changing any (non-CC) registers 6363 // NOTE: cmpl is plenty here to provoke a segv 6364 cmpptr(rax, Address(reg, 0)); 6365 // Note: should probably use testl(rax, Address(reg, 0)); 6366 // may be shorter code (however, this version of 6367 // testl needs to be implemented first) 6368 } else { 6369 // nothing to do, (later) access of M[reg + offset] 6370 // will provoke OS NULL exception if reg = NULL 6371 } 6372} 6373 6374void MacroAssembler::os_breakpoint() { 6375 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 6376 // (e.g., MSVC can't call ps() otherwise) 6377 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 6378} 6379 6380void MacroAssembler::pop_CPU_state() { 6381 pop_FPU_state(); 6382 pop_IU_state(); 6383} 6384 6385void MacroAssembler::pop_FPU_state() { 6386 NOT_LP64(frstor(Address(rsp, 0));) 6387 LP64_ONLY(fxrstor(Address(rsp, 0));) 6388 addptr(rsp, FPUStateSizeInWords * wordSize); 6389} 6390 6391void MacroAssembler::pop_IU_state() { 6392 popa(); 6393 LP64_ONLY(addq(rsp, 8)); 6394 popf(); 6395} 6396 6397// Save Integer and Float state 6398// Warning: Stack must be 16 byte aligned (64bit) 6399void MacroAssembler::push_CPU_state() { 6400 push_IU_state(); 6401 push_FPU_state(); 6402} 6403 6404void MacroAssembler::push_FPU_state() { 6405 subptr(rsp, FPUStateSizeInWords * wordSize); 6406#ifndef _LP64 6407 fnsave(Address(rsp, 0)); 6408 fwait(); 6409#else 6410 fxsave(Address(rsp, 0)); 6411#endif // LP64 6412} 6413 6414void MacroAssembler::push_IU_state() { 6415 // Push flags first because pusha kills them 6416 pushf(); 6417 // Make sure rsp stays 16-byte aligned 6418 LP64_ONLY(subq(rsp, 8)); 6419 pusha(); 6420} 6421 6422void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 6423 // determine java_thread register 6424 if (!java_thread->is_valid()) { 6425 java_thread = rdi; 6426 get_thread(java_thread); 6427 } 6428 // we must set sp to zero to clear frame 6429 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6430 if (clear_fp) { 6431 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6432 } 6433 6434 if (clear_pc) 6435 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6436 6437} 6438 6439void MacroAssembler::restore_rax(Register tmp) { 6440 if (tmp == noreg) pop(rax); 6441 else if (tmp != rax) mov(rax, tmp); 6442} 6443 6444void MacroAssembler::round_to(Register reg, int modulus) { 6445 addptr(reg, modulus - 1); 6446 andptr(reg, -modulus); 6447} 6448 6449void MacroAssembler::save_rax(Register tmp) { 6450 if (tmp == noreg) push(rax); 6451 else if (tmp != rax) mov(tmp, rax); 6452} 6453 6454// Write serialization page so VM thread can do a pseudo remote membar. 6455// We use the current thread pointer to calculate a thread specific 6456// offset to write to within the page. This minimizes bus traffic 6457// due to cache line collision. 6458void MacroAssembler::serialize_memory(Register thread, Register tmp) { 6459 movl(tmp, thread); 6460 shrl(tmp, os::get_serialize_page_shift_count()); 6461 andl(tmp, (os::vm_page_size() - sizeof(int))); 6462 6463 Address index(noreg, tmp, Address::times_1); 6464 ExternalAddress page(os::get_memory_serialize_page()); 6465 6466 movptr(ArrayAddress(page, index), tmp); 6467} 6468 6469// Calls to C land 6470// 6471// When entering C land, the rbp, & rsp of the last Java frame have to be recorded 6472// in the (thread-local) JavaThread object. When leaving C land, the last Java fp 6473// has to be reset to 0. This is required to allow proper stack traversal. 6474void MacroAssembler::set_last_Java_frame(Register java_thread, 6475 Register last_java_sp, 6476 Register last_java_fp, 6477 address last_java_pc) { 6478 // determine java_thread register 6479 if (!java_thread->is_valid()) { 6480 java_thread = rdi; 6481 get_thread(java_thread); 6482 } 6483 // determine last_java_sp register 6484 if (!last_java_sp->is_valid()) { 6485 last_java_sp = rsp; 6486 } 6487 6488 // last_java_fp is optional 6489 6490 if (last_java_fp->is_valid()) { 6491 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 6492 } 6493 6494 // last_java_pc is optional 6495 6496 if (last_java_pc != NULL) { 6497 lea(Address(java_thread, 6498 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 6499 InternalAddress(last_java_pc)); 6500 6501 } 6502 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6503} 6504 6505void MacroAssembler::shlptr(Register dst, int imm8) { 6506 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 6507} 6508 6509void MacroAssembler::shrptr(Register dst, int imm8) { 6510 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 6511} 6512 6513void MacroAssembler::sign_extend_byte(Register reg) { 6514 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 6515 movsbl(reg, reg); // movsxb 6516 } else { 6517 shll(reg, 24); 6518 sarl(reg, 24); 6519 } 6520} 6521 6522void MacroAssembler::sign_extend_short(Register reg) { 6523 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6524 movswl(reg, reg); // movsxw 6525 } else { 6526 shll(reg, 16); 6527 sarl(reg, 16); 6528 } 6529} 6530 6531////////////////////////////////////////////////////////////////////////////////// 6532#ifndef SERIALGC 6533 6534void MacroAssembler::g1_write_barrier_pre(Register obj, 6535#ifndef _LP64 6536 Register thread, 6537#endif 6538 Register tmp, 6539 Register tmp2, 6540 bool tosca_live) { 6541 LP64_ONLY(Register thread = r15_thread;) 6542 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6543 PtrQueue::byte_offset_of_active())); 6544 6545 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6546 PtrQueue::byte_offset_of_index())); 6547 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6548 PtrQueue::byte_offset_of_buf())); 6549 6550 6551 Label done; 6552 Label runtime; 6553 6554 // if (!marking_in_progress) goto done; 6555 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 6556 cmpl(in_progress, 0); 6557 } else { 6558 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 6559 cmpb(in_progress, 0); 6560 } 6561 jcc(Assembler::equal, done); 6562 6563 // if (x.f == NULL) goto done; 6564 cmpptr(Address(obj, 0), NULL_WORD); 6565 jcc(Assembler::equal, done); 6566 6567 // Can we store original value in the thread's buffer? 6568 6569 LP64_ONLY(movslq(tmp, index);) 6570 movptr(tmp2, Address(obj, 0)); 6571#ifdef _LP64 6572 cmpq(tmp, 0); 6573#else 6574 cmpl(index, 0); 6575#endif 6576 jcc(Assembler::equal, runtime); 6577#ifdef _LP64 6578 subq(tmp, wordSize); 6579 movl(index, tmp); 6580 addq(tmp, buffer); 6581#else 6582 subl(index, wordSize); 6583 movl(tmp, buffer); 6584 addl(tmp, index); 6585#endif 6586 movptr(Address(tmp, 0), tmp2); 6587 jmp(done); 6588 bind(runtime); 6589 // save the live input values 6590 if(tosca_live) push(rax); 6591 push(obj); 6592#ifdef _LP64 6593 movq(c_rarg0, Address(obj, 0)); 6594 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread); 6595#else 6596 push(thread); 6597 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); 6598 pop(thread); 6599#endif 6600 pop(obj); 6601 if(tosca_live) pop(rax); 6602 bind(done); 6603 6604} 6605 6606void MacroAssembler::g1_write_barrier_post(Register store_addr, 6607 Register new_val, 6608#ifndef _LP64 6609 Register thread, 6610#endif 6611 Register tmp, 6612 Register tmp2) { 6613 6614 LP64_ONLY(Register thread = r15_thread;) 6615 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 6616 PtrQueue::byte_offset_of_index())); 6617 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 6618 PtrQueue::byte_offset_of_buf())); 6619 BarrierSet* bs = Universe::heap()->barrier_set(); 6620 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 6621 Label done; 6622 Label runtime; 6623 6624 // Does store cross heap regions? 6625 6626 movptr(tmp, store_addr); 6627 xorptr(tmp, new_val); 6628 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 6629 jcc(Assembler::equal, done); 6630 6631 // crosses regions, storing NULL? 6632 6633 cmpptr(new_val, (int32_t) NULL_WORD); 6634 jcc(Assembler::equal, done); 6635 6636 // storing region crossing non-NULL, is card already dirty? 6637 6638 ExternalAddress cardtable((address) ct->byte_map_base); 6639 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 6640#ifdef _LP64 6641 const Register card_addr = tmp; 6642 6643 movq(card_addr, store_addr); 6644 shrq(card_addr, CardTableModRefBS::card_shift); 6645 6646 lea(tmp2, cardtable); 6647 6648 // get the address of the card 6649 addq(card_addr, tmp2); 6650#else 6651 const Register card_index = tmp; 6652 6653 movl(card_index, store_addr); 6654 shrl(card_index, CardTableModRefBS::card_shift); 6655 6656 Address index(noreg, card_index, Address::times_1); 6657 const Register card_addr = tmp; 6658 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 6659#endif 6660 cmpb(Address(card_addr, 0), 0); 6661 jcc(Assembler::equal, done); 6662 6663 // storing a region crossing, non-NULL oop, card is clean. 6664 // dirty card and log. 6665 6666 movb(Address(card_addr, 0), 0); 6667 6668 cmpl(queue_index, 0); 6669 jcc(Assembler::equal, runtime); 6670 subl(queue_index, wordSize); 6671 movptr(tmp2, buffer); 6672#ifdef _LP64 6673 movslq(rscratch1, queue_index); 6674 addq(tmp2, rscratch1); 6675 movq(Address(tmp2, 0), card_addr); 6676#else 6677 addl(tmp2, queue_index); 6678 movl(Address(tmp2, 0), card_index); 6679#endif 6680 jmp(done); 6681 6682 bind(runtime); 6683 // save the live input values 6684 push(store_addr); 6685 push(new_val); 6686#ifdef _LP64 6687 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 6688#else 6689 push(thread); 6690 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 6691 pop(thread); 6692#endif 6693 pop(new_val); 6694 pop(store_addr); 6695 6696 bind(done); 6697 6698} 6699 6700#endif // SERIALGC 6701////////////////////////////////////////////////////////////////////////////////// 6702 6703 6704void MacroAssembler::store_check(Register obj) { 6705 // Does a store check for the oop in register obj. The content of 6706 // register obj is destroyed afterwards. 6707 store_check_part_1(obj); 6708 store_check_part_2(obj); 6709} 6710 6711void MacroAssembler::store_check(Register obj, Address dst) { 6712 store_check(obj); 6713} 6714 6715 6716// split the store check operation so that other instructions can be scheduled inbetween 6717void MacroAssembler::store_check_part_1(Register obj) { 6718 BarrierSet* bs = Universe::heap()->barrier_set(); 6719 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 6720 shrptr(obj, CardTableModRefBS::card_shift); 6721} 6722 6723void MacroAssembler::store_check_part_2(Register obj) { 6724 BarrierSet* bs = Universe::heap()->barrier_set(); 6725 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 6726 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 6727 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 6728 6729 // The calculation for byte_map_base is as follows: 6730 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 6731 // So this essentially converts an address to a displacement and 6732 // it will never need to be relocated. On 64bit however the value may be too 6733 // large for a 32bit displacement 6734 6735 intptr_t disp = (intptr_t) ct->byte_map_base; 6736 if (is_simm32(disp)) { 6737 Address cardtable(noreg, obj, Address::times_1, disp); 6738 movb(cardtable, 0); 6739 } else { 6740 // By doing it as an ExternalAddress disp could be converted to a rip-relative 6741 // displacement and done in a single instruction given favorable mapping and 6742 // a smarter version of as_Address. Worst case it is two instructions which 6743 // is no worse off then loading disp into a register and doing as a simple 6744 // Address() as above. 6745 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 6746 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 6747 // in some cases we'll get a single instruction version. 6748 6749 ExternalAddress cardtable((address)disp); 6750 Address index(noreg, obj, Address::times_1); 6751 movb(as_Address(ArrayAddress(cardtable, index)), 0); 6752 } 6753} 6754 6755void MacroAssembler::subptr(Register dst, int32_t imm32) { 6756 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 6757} 6758 6759void MacroAssembler::subptr(Register dst, Register src) { 6760 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 6761} 6762 6763void MacroAssembler::test32(Register src1, AddressLiteral src2) { 6764 // src2 must be rval 6765 6766 if (reachable(src2)) { 6767 testl(src1, as_Address(src2)); 6768 } else { 6769 lea(rscratch1, src2); 6770 testl(src1, Address(rscratch1, 0)); 6771 } 6772} 6773 6774// C++ bool manipulation 6775void MacroAssembler::testbool(Register dst) { 6776 if(sizeof(bool) == 1) 6777 testb(dst, 0xff); 6778 else if(sizeof(bool) == 2) { 6779 // testw implementation needed for two byte bools 6780 ShouldNotReachHere(); 6781 } else if(sizeof(bool) == 4) 6782 testl(dst, dst); 6783 else 6784 // unsupported 6785 ShouldNotReachHere(); 6786} 6787 6788void MacroAssembler::testptr(Register dst, Register src) { 6789 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 6790} 6791 6792// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 6793void MacroAssembler::tlab_allocate(Register obj, 6794 Register var_size_in_bytes, 6795 int con_size_in_bytes, 6796 Register t1, 6797 Register t2, 6798 Label& slow_case) { 6799 assert_different_registers(obj, t1, t2); 6800 assert_different_registers(obj, var_size_in_bytes, t1); 6801 Register end = t2; 6802 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 6803 6804 verify_tlab(); 6805 6806 NOT_LP64(get_thread(thread)); 6807 6808 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 6809 if (var_size_in_bytes == noreg) { 6810 lea(end, Address(obj, con_size_in_bytes)); 6811 } else { 6812 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6813 } 6814 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 6815 jcc(Assembler::above, slow_case); 6816 6817 // update the tlab top pointer 6818 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 6819 6820 // recover var_size_in_bytes if necessary 6821 if (var_size_in_bytes == end) { 6822 subptr(var_size_in_bytes, obj); 6823 } 6824 verify_tlab(); 6825} 6826 6827// Preserves rbx, and rdx. 6828void MacroAssembler::tlab_refill(Label& retry, 6829 Label& try_eden, 6830 Label& slow_case) { 6831 Register top = rax; 6832 Register t1 = rcx; 6833 Register t2 = rsi; 6834 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 6835 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 6836 Label do_refill, discard_tlab; 6837 6838 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6839 // No allocation in the shared eden. 6840 jmp(slow_case); 6841 } 6842 6843 NOT_LP64(get_thread(thread_reg)); 6844 6845 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 6846 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 6847 6848 // calculate amount of free space 6849 subptr(t1, top); 6850 shrptr(t1, LogHeapWordSize); 6851 6852 // Retain tlab and allocate object in shared space if 6853 // the amount free in the tlab is too large to discard. 6854 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 6855 jcc(Assembler::lessEqual, discard_tlab); 6856 6857 // Retain 6858 // %%% yuck as movptr... 6859 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 6860 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 6861 if (TLABStats) { 6862 // increment number of slow_allocations 6863 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 6864 } 6865 jmp(try_eden); 6866 6867 bind(discard_tlab); 6868 if (TLABStats) { 6869 // increment number of refills 6870 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 6871 // accumulate wastage -- t1 is amount free in tlab 6872 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 6873 } 6874 6875 // if tlab is currently allocated (top or end != null) then 6876 // fill [top, end + alignment_reserve) with array object 6877 testptr (top, top); 6878 jcc(Assembler::zero, do_refill); 6879 6880 // set up the mark word 6881 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 6882 // set the length to the remaining space 6883 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 6884 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 6885 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 6886 movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 6887 // set klass to intArrayKlass 6888 // dubious reloc why not an oop reloc? 6889 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr())); 6890 // store klass last. concurrent gcs assumes klass length is valid if 6891 // klass field is not null. 6892 store_klass(top, t1); 6893 6894 // refill the tlab with an eden allocation 6895 bind(do_refill); 6896 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 6897 shlptr(t1, LogHeapWordSize); 6898 // add object_size ?? 6899 eden_allocate(top, t1, 0, t2, slow_case); 6900 6901 // Check that t1 was preserved in eden_allocate. 6902#ifdef ASSERT 6903 if (UseTLAB) { 6904 Label ok; 6905 Register tsize = rsi; 6906 assert_different_registers(tsize, thread_reg, t1); 6907 push(tsize); 6908 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 6909 shlptr(tsize, LogHeapWordSize); 6910 cmpptr(t1, tsize); 6911 jcc(Assembler::equal, ok); 6912 stop("assert(t1 != tlab size)"); 6913 should_not_reach_here(); 6914 6915 bind(ok); 6916 pop(tsize); 6917 } 6918#endif 6919 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 6920 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 6921 addptr(top, t1); 6922 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 6923 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 6924 verify_tlab(); 6925 jmp(retry); 6926} 6927 6928static const double pi_4 = 0.7853981633974483; 6929 6930void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 6931 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 6932 // was attempted in this code; unfortunately it appears that the 6933 // switch to 80-bit precision and back causes this to be 6934 // unprofitable compared with simply performing a runtime call if 6935 // the argument is out of the (-pi/4, pi/4) range. 6936 6937 Register tmp = noreg; 6938 if (!VM_Version::supports_cmov()) { 6939 // fcmp needs a temporary so preserve rbx, 6940 tmp = rbx; 6941 push(tmp); 6942 } 6943 6944 Label slow_case, done; 6945 6946 ExternalAddress pi4_adr = (address)&pi_4; 6947 if (reachable(pi4_adr)) { 6948 // x ?<= pi/4 6949 fld_d(pi4_adr); 6950 fld_s(1); // Stack: X PI/4 X 6951 fabs(); // Stack: |X| PI/4 X 6952 fcmp(tmp); 6953 jcc(Assembler::above, slow_case); 6954 6955 // fastest case: -pi/4 <= x <= pi/4 6956 switch(trig) { 6957 case 's': 6958 fsin(); 6959 break; 6960 case 'c': 6961 fcos(); 6962 break; 6963 case 't': 6964 ftan(); 6965 break; 6966 default: 6967 assert(false, "bad intrinsic"); 6968 break; 6969 } 6970 jmp(done); 6971 } 6972 6973 // slow case: runtime call 6974 bind(slow_case); 6975 // Preserve registers across runtime call 6976 pusha(); 6977 int incoming_argument_and_return_value_offset = -1; 6978 if (num_fpu_regs_in_use > 1) { 6979 // Must preserve all other FPU regs (could alternatively convert 6980 // SharedRuntime::dsin and dcos into assembly routines known not to trash 6981 // FPU state, but can not trust C compiler) 6982 NEEDS_CLEANUP; 6983 // NOTE that in this case we also push the incoming argument to 6984 // the stack and restore it later; we also use this stack slot to 6985 // hold the return value from dsin or dcos. 6986 for (int i = 0; i < num_fpu_regs_in_use; i++) { 6987 subptr(rsp, sizeof(jdouble)); 6988 fstp_d(Address(rsp, 0)); 6989 } 6990 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 6991 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 6992 } 6993 subptr(rsp, sizeof(jdouble)); 6994 fstp_d(Address(rsp, 0)); 6995#ifdef _LP64 6996 movdbl(xmm0, Address(rsp, 0)); 6997#endif // _LP64 6998 6999 // NOTE: we must not use call_VM_leaf here because that requires a 7000 // complete interpreter frame in debug mode -- same bug as 4387334 7001 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 7002 // do proper 64bit abi 7003 7004 NEEDS_CLEANUP; 7005 // Need to add stack banging before this runtime call if it needs to 7006 // be taken; however, there is no generic stack banging routine at 7007 // the MacroAssembler level 7008 switch(trig) { 7009 case 's': 7010 { 7011 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 7012 } 7013 break; 7014 case 'c': 7015 { 7016 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 7017 } 7018 break; 7019 case 't': 7020 { 7021 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 7022 } 7023 break; 7024 default: 7025 assert(false, "bad intrinsic"); 7026 break; 7027 } 7028#ifdef _LP64 7029 movsd(Address(rsp, 0), xmm0); 7030 fld_d(Address(rsp, 0)); 7031#endif // _LP64 7032 addptr(rsp, sizeof(jdouble)); 7033 if (num_fpu_regs_in_use > 1) { 7034 // Must save return value to stack and then restore entire FPU stack 7035 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 7036 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7037 fld_d(Address(rsp, 0)); 7038 addptr(rsp, sizeof(jdouble)); 7039 } 7040 } 7041 popa(); 7042 7043 // Come here with result in F-TOS 7044 bind(done); 7045 7046 if (tmp != noreg) { 7047 pop(tmp); 7048 } 7049} 7050 7051 7052void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7053 ucomisd(dst, as_Address(src)); 7054} 7055 7056void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7057 ucomiss(dst, as_Address(src)); 7058} 7059 7060void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7061 if (reachable(src)) { 7062 xorpd(dst, as_Address(src)); 7063 } else { 7064 lea(rscratch1, src); 7065 xorpd(dst, Address(rscratch1, 0)); 7066 } 7067} 7068 7069void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7070 if (reachable(src)) { 7071 xorps(dst, as_Address(src)); 7072 } else { 7073 lea(rscratch1, src); 7074 xorps(dst, Address(rscratch1, 0)); 7075 } 7076} 7077 7078void MacroAssembler::verify_oop(Register reg, const char* s) { 7079 if (!VerifyOops) return; 7080 7081 // Pass register number to verify_oop_subroutine 7082 char* b = new char[strlen(s) + 50]; 7083 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 7084 push(rax); // save rax, 7085 push(reg); // pass register argument 7086 ExternalAddress buffer((address) b); 7087 // avoid using pushptr, as it modifies scratch registers 7088 // and our contract is not to modify anything 7089 movptr(rax, buffer.addr()); 7090 push(rax); 7091 // call indirectly to solve generation ordering problem 7092 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7093 call(rax); 7094} 7095 7096 7097void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 7098 if (!VerifyOops) return; 7099 7100 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 7101 // Pass register number to verify_oop_subroutine 7102 char* b = new char[strlen(s) + 50]; 7103 sprintf(b, "verify_oop_addr: %s", s); 7104 7105 push(rax); // save rax, 7106 // addr may contain rsp so we will have to adjust it based on the push 7107 // we just did 7108 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 7109 // stores rax into addr which is backwards of what was intended. 7110 if (addr.uses(rsp)) { 7111 lea(rax, addr); 7112 pushptr(Address(rax, BytesPerWord)); 7113 } else { 7114 pushptr(addr); 7115 } 7116 7117 ExternalAddress buffer((address) b); 7118 // pass msg argument 7119 // avoid using pushptr, as it modifies scratch registers 7120 // and our contract is not to modify anything 7121 movptr(rax, buffer.addr()); 7122 push(rax); 7123 7124 // call indirectly to solve generation ordering problem 7125 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7126 call(rax); 7127 // Caller pops the arguments and restores rax, from the stack 7128} 7129 7130void MacroAssembler::verify_tlab() { 7131#ifdef ASSERT 7132 if (UseTLAB && VerifyOops) { 7133 Label next, ok; 7134 Register t1 = rsi; 7135 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 7136 7137 push(t1); 7138 NOT_LP64(push(thread_reg)); 7139 NOT_LP64(get_thread(thread_reg)); 7140 7141 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7142 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7143 jcc(Assembler::aboveEqual, next); 7144 stop("assert(top >= start)"); 7145 should_not_reach_here(); 7146 7147 bind(next); 7148 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7149 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7150 jcc(Assembler::aboveEqual, ok); 7151 stop("assert(top <= end)"); 7152 should_not_reach_here(); 7153 7154 bind(ok); 7155 NOT_LP64(pop(thread_reg)); 7156 pop(t1); 7157 } 7158#endif 7159} 7160 7161class ControlWord { 7162 public: 7163 int32_t _value; 7164 7165 int rounding_control() const { return (_value >> 10) & 3 ; } 7166 int precision_control() const { return (_value >> 8) & 3 ; } 7167 bool precision() const { return ((_value >> 5) & 1) != 0; } 7168 bool underflow() const { return ((_value >> 4) & 1) != 0; } 7169 bool overflow() const { return ((_value >> 3) & 1) != 0; } 7170 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 7171 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 7172 bool invalid() const { return ((_value >> 0) & 1) != 0; } 7173 7174 void print() const { 7175 // rounding control 7176 const char* rc; 7177 switch (rounding_control()) { 7178 case 0: rc = "round near"; break; 7179 case 1: rc = "round down"; break; 7180 case 2: rc = "round up "; break; 7181 case 3: rc = "chop "; break; 7182 }; 7183 // precision control 7184 const char* pc; 7185 switch (precision_control()) { 7186 case 0: pc = "24 bits "; break; 7187 case 1: pc = "reserved"; break; 7188 case 2: pc = "53 bits "; break; 7189 case 3: pc = "64 bits "; break; 7190 }; 7191 // flags 7192 char f[9]; 7193 f[0] = ' '; 7194 f[1] = ' '; 7195 f[2] = (precision ()) ? 'P' : 'p'; 7196 f[3] = (underflow ()) ? 'U' : 'u'; 7197 f[4] = (overflow ()) ? 'O' : 'o'; 7198 f[5] = (zero_divide ()) ? 'Z' : 'z'; 7199 f[6] = (denormalized()) ? 'D' : 'd'; 7200 f[7] = (invalid ()) ? 'I' : 'i'; 7201 f[8] = '\x0'; 7202 // output 7203 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 7204 } 7205 7206}; 7207 7208class StatusWord { 7209 public: 7210 int32_t _value; 7211 7212 bool busy() const { return ((_value >> 15) & 1) != 0; } 7213 bool C3() const { return ((_value >> 14) & 1) != 0; } 7214 bool C2() const { return ((_value >> 10) & 1) != 0; } 7215 bool C1() const { return ((_value >> 9) & 1) != 0; } 7216 bool C0() const { return ((_value >> 8) & 1) != 0; } 7217 int top() const { return (_value >> 11) & 7 ; } 7218 bool error_status() const { return ((_value >> 7) & 1) != 0; } 7219 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 7220 bool precision() const { return ((_value >> 5) & 1) != 0; } 7221 bool underflow() const { return ((_value >> 4) & 1) != 0; } 7222 bool overflow() const { return ((_value >> 3) & 1) != 0; } 7223 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 7224 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 7225 bool invalid() const { return ((_value >> 0) & 1) != 0; } 7226 7227 void print() const { 7228 // condition codes 7229 char c[5]; 7230 c[0] = (C3()) ? '3' : '-'; 7231 c[1] = (C2()) ? '2' : '-'; 7232 c[2] = (C1()) ? '1' : '-'; 7233 c[3] = (C0()) ? '0' : '-'; 7234 c[4] = '\x0'; 7235 // flags 7236 char f[9]; 7237 f[0] = (error_status()) ? 'E' : '-'; 7238 f[1] = (stack_fault ()) ? 'S' : '-'; 7239 f[2] = (precision ()) ? 'P' : '-'; 7240 f[3] = (underflow ()) ? 'U' : '-'; 7241 f[4] = (overflow ()) ? 'O' : '-'; 7242 f[5] = (zero_divide ()) ? 'Z' : '-'; 7243 f[6] = (denormalized()) ? 'D' : '-'; 7244 f[7] = (invalid ()) ? 'I' : '-'; 7245 f[8] = '\x0'; 7246 // output 7247 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 7248 } 7249 7250}; 7251 7252class TagWord { 7253 public: 7254 int32_t _value; 7255 7256 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 7257 7258 void print() const { 7259 printf("%04x", _value & 0xFFFF); 7260 } 7261 7262}; 7263 7264class FPU_Register { 7265 public: 7266 int32_t _m0; 7267 int32_t _m1; 7268 int16_t _ex; 7269 7270 bool is_indefinite() const { 7271 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 7272 } 7273 7274 void print() const { 7275 char sign = (_ex < 0) ? '-' : '+'; 7276 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 7277 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 7278 }; 7279 7280}; 7281 7282class FPU_State { 7283 public: 7284 enum { 7285 register_size = 10, 7286 number_of_registers = 8, 7287 register_mask = 7 7288 }; 7289 7290 ControlWord _control_word; 7291 StatusWord _status_word; 7292 TagWord _tag_word; 7293 int32_t _error_offset; 7294 int32_t _error_selector; 7295 int32_t _data_offset; 7296 int32_t _data_selector; 7297 int8_t _register[register_size * number_of_registers]; 7298 7299 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 7300 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 7301 7302 const char* tag_as_string(int tag) const { 7303 switch (tag) { 7304 case 0: return "valid"; 7305 case 1: return "zero"; 7306 case 2: return "special"; 7307 case 3: return "empty"; 7308 } 7309 ShouldNotReachHere() 7310 return NULL; 7311 } 7312 7313 void print() const { 7314 // print computation registers 7315 { int t = _status_word.top(); 7316 for (int i = 0; i < number_of_registers; i++) { 7317 int j = (i - t) & register_mask; 7318 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 7319 st(j)->print(); 7320 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 7321 } 7322 } 7323 printf("\n"); 7324 // print control registers 7325 printf("ctrl = "); _control_word.print(); printf("\n"); 7326 printf("stat = "); _status_word .print(); printf("\n"); 7327 printf("tags = "); _tag_word .print(); printf("\n"); 7328 } 7329 7330}; 7331 7332class Flag_Register { 7333 public: 7334 int32_t _value; 7335 7336 bool overflow() const { return ((_value >> 11) & 1) != 0; } 7337 bool direction() const { return ((_value >> 10) & 1) != 0; } 7338 bool sign() const { return ((_value >> 7) & 1) != 0; } 7339 bool zero() const { return ((_value >> 6) & 1) != 0; } 7340 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 7341 bool parity() const { return ((_value >> 2) & 1) != 0; } 7342 bool carry() const { return ((_value >> 0) & 1) != 0; } 7343 7344 void print() const { 7345 // flags 7346 char f[8]; 7347 f[0] = (overflow ()) ? 'O' : '-'; 7348 f[1] = (direction ()) ? 'D' : '-'; 7349 f[2] = (sign ()) ? 'S' : '-'; 7350 f[3] = (zero ()) ? 'Z' : '-'; 7351 f[4] = (auxiliary_carry()) ? 'A' : '-'; 7352 f[5] = (parity ()) ? 'P' : '-'; 7353 f[6] = (carry ()) ? 'C' : '-'; 7354 f[7] = '\x0'; 7355 // output 7356 printf("%08x flags = %s", _value, f); 7357 } 7358 7359}; 7360 7361class IU_Register { 7362 public: 7363 int32_t _value; 7364 7365 void print() const { 7366 printf("%08x %11d", _value, _value); 7367 } 7368 7369}; 7370 7371class IU_State { 7372 public: 7373 Flag_Register _eflags; 7374 IU_Register _rdi; 7375 IU_Register _rsi; 7376 IU_Register _rbp; 7377 IU_Register _rsp; 7378 IU_Register _rbx; 7379 IU_Register _rdx; 7380 IU_Register _rcx; 7381 IU_Register _rax; 7382 7383 void print() const { 7384 // computation registers 7385 printf("rax, = "); _rax.print(); printf("\n"); 7386 printf("rbx, = "); _rbx.print(); printf("\n"); 7387 printf("rcx = "); _rcx.print(); printf("\n"); 7388 printf("rdx = "); _rdx.print(); printf("\n"); 7389 printf("rdi = "); _rdi.print(); printf("\n"); 7390 printf("rsi = "); _rsi.print(); printf("\n"); 7391 printf("rbp, = "); _rbp.print(); printf("\n"); 7392 printf("rsp = "); _rsp.print(); printf("\n"); 7393 printf("\n"); 7394 // control registers 7395 printf("flgs = "); _eflags.print(); printf("\n"); 7396 } 7397}; 7398 7399 7400class CPU_State { 7401 public: 7402 FPU_State _fpu_state; 7403 IU_State _iu_state; 7404 7405 void print() const { 7406 printf("--------------------------------------------------\n"); 7407 _iu_state .print(); 7408 printf("\n"); 7409 _fpu_state.print(); 7410 printf("--------------------------------------------------\n"); 7411 } 7412 7413}; 7414 7415 7416static void _print_CPU_state(CPU_State* state) { 7417 state->print(); 7418}; 7419 7420 7421void MacroAssembler::print_CPU_state() { 7422 push_CPU_state(); 7423 push(rsp); // pass CPU state 7424 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 7425 addptr(rsp, wordSize); // discard argument 7426 pop_CPU_state(); 7427} 7428 7429 7430static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 7431 static int counter = 0; 7432 FPU_State* fs = &state->_fpu_state; 7433 counter++; 7434 // For leaf calls, only verify that the top few elements remain empty. 7435 // We only need 1 empty at the top for C2 code. 7436 if( stack_depth < 0 ) { 7437 if( fs->tag_for_st(7) != 3 ) { 7438 printf("FPR7 not empty\n"); 7439 state->print(); 7440 assert(false, "error"); 7441 return false; 7442 } 7443 return true; // All other stack states do not matter 7444 } 7445 7446 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 7447 "bad FPU control word"); 7448 7449 // compute stack depth 7450 int i = 0; 7451 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 7452 int d = i; 7453 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 7454 // verify findings 7455 if (i != FPU_State::number_of_registers) { 7456 // stack not contiguous 7457 printf("%s: stack not contiguous at ST%d\n", s, i); 7458 state->print(); 7459 assert(false, "error"); 7460 return false; 7461 } 7462 // check if computed stack depth corresponds to expected stack depth 7463 if (stack_depth < 0) { 7464 // expected stack depth is -stack_depth or less 7465 if (d > -stack_depth) { 7466 // too many elements on the stack 7467 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 7468 state->print(); 7469 assert(false, "error"); 7470 return false; 7471 } 7472 } else { 7473 // expected stack depth is stack_depth 7474 if (d != stack_depth) { 7475 // wrong stack depth 7476 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 7477 state->print(); 7478 assert(false, "error"); 7479 return false; 7480 } 7481 } 7482 // everything is cool 7483 return true; 7484} 7485 7486 7487void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 7488 if (!VerifyFPU) return; 7489 push_CPU_state(); 7490 push(rsp); // pass CPU state 7491 ExternalAddress msg((address) s); 7492 // pass message string s 7493 pushptr(msg.addr()); 7494 push(stack_depth); // pass stack depth 7495 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 7496 addptr(rsp, 3 * wordSize); // discard arguments 7497 // check for error 7498 { Label L; 7499 testl(rax, rax); 7500 jcc(Assembler::notZero, L); 7501 int3(); // break if error condition 7502 bind(L); 7503 } 7504 pop_CPU_state(); 7505} 7506 7507void MacroAssembler::load_klass(Register dst, Register src) { 7508#ifdef _LP64 7509 if (UseCompressedOops) { 7510 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 7511 decode_heap_oop_not_null(dst); 7512 } else 7513#endif 7514 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 7515} 7516 7517void MacroAssembler::load_prototype_header(Register dst, Register src) { 7518#ifdef _LP64 7519 if (UseCompressedOops) { 7520 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 7521 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 7522 } else 7523#endif 7524 { 7525 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 7526 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 7527 } 7528} 7529 7530void MacroAssembler::store_klass(Register dst, Register src) { 7531#ifdef _LP64 7532 if (UseCompressedOops) { 7533 encode_heap_oop_not_null(src); 7534 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 7535 } else 7536#endif 7537 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 7538} 7539 7540#ifdef _LP64 7541void MacroAssembler::store_klass_gap(Register dst, Register src) { 7542 if (UseCompressedOops) { 7543 // Store to klass gap in destination 7544 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 7545 } 7546} 7547 7548void MacroAssembler::load_heap_oop(Register dst, Address src) { 7549 if (UseCompressedOops) { 7550 movl(dst, src); 7551 decode_heap_oop(dst); 7552 } else { 7553 movq(dst, src); 7554 } 7555} 7556 7557void MacroAssembler::store_heap_oop(Address dst, Register src) { 7558 if (UseCompressedOops) { 7559 assert(!dst.uses(src), "not enough registers"); 7560 encode_heap_oop(src); 7561 movl(dst, src); 7562 } else { 7563 movq(dst, src); 7564 } 7565} 7566 7567// Algorithm must match oop.inline.hpp encode_heap_oop. 7568void MacroAssembler::encode_heap_oop(Register r) { 7569 assert (UseCompressedOops, "should be compressed"); 7570#ifdef ASSERT 7571 if (CheckCompressedOops) { 7572 Label ok; 7573 push(rscratch1); // cmpptr trashes rscratch1 7574 cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); 7575 jcc(Assembler::equal, ok); 7576 stop("MacroAssembler::encode_heap_oop: heap base corrupted?"); 7577 bind(ok); 7578 pop(rscratch1); 7579 } 7580#endif 7581 verify_oop(r, "broken oop in encode_heap_oop"); 7582 testq(r, r); 7583 cmovq(Assembler::equal, r, r12_heapbase); 7584 subq(r, r12_heapbase); 7585 shrq(r, LogMinObjAlignmentInBytes); 7586} 7587 7588void MacroAssembler::encode_heap_oop_not_null(Register r) { 7589 assert (UseCompressedOops, "should be compressed"); 7590#ifdef ASSERT 7591 if (CheckCompressedOops) { 7592 Label ok; 7593 testq(r, r); 7594 jcc(Assembler::notEqual, ok); 7595 stop("null oop passed to encode_heap_oop_not_null"); 7596 bind(ok); 7597 } 7598#endif 7599 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 7600 subq(r, r12_heapbase); 7601 shrq(r, LogMinObjAlignmentInBytes); 7602} 7603 7604void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 7605 assert (UseCompressedOops, "should be compressed"); 7606#ifdef ASSERT 7607 if (CheckCompressedOops) { 7608 Label ok; 7609 testq(src, src); 7610 jcc(Assembler::notEqual, ok); 7611 stop("null oop passed to encode_heap_oop_not_null2"); 7612 bind(ok); 7613 } 7614#endif 7615 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 7616 if (dst != src) { 7617 movq(dst, src); 7618 } 7619 subq(dst, r12_heapbase); 7620 shrq(dst, LogMinObjAlignmentInBytes); 7621} 7622 7623void MacroAssembler::decode_heap_oop(Register r) { 7624 assert (UseCompressedOops, "should be compressed"); 7625#ifdef ASSERT 7626 if (CheckCompressedOops) { 7627 Label ok; 7628 push(rscratch1); 7629 cmpptr(r12_heapbase, 7630 ExternalAddress((address)Universe::heap_base_addr())); 7631 jcc(Assembler::equal, ok); 7632 stop("MacroAssembler::decode_heap_oop: heap base corrupted?"); 7633 bind(ok); 7634 pop(rscratch1); 7635 } 7636#endif 7637 7638 Label done; 7639 shlq(r, LogMinObjAlignmentInBytes); 7640 jccb(Assembler::equal, done); 7641 addq(r, r12_heapbase); 7642#if 0 7643 // alternate decoding probably a wash. 7644 testq(r, r); 7645 jccb(Assembler::equal, done); 7646 leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); 7647#endif 7648 bind(done); 7649 verify_oop(r, "broken oop in decode_heap_oop"); 7650} 7651 7652void MacroAssembler::decode_heap_oop_not_null(Register r) { 7653 assert (UseCompressedOops, "should only be used for compressed headers"); 7654 // Cannot assert, unverified entry point counts instructions (see .ad file) 7655 // vtableStubs also counts instructions in pd_code_size_limit. 7656 // Also do not verify_oop as this is called by verify_oop. 7657 assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); 7658 leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); 7659} 7660 7661void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 7662 assert (UseCompressedOops, "should only be used for compressed headers"); 7663 // Cannot assert, unverified entry point counts instructions (see .ad file) 7664 // vtableStubs also counts instructions in pd_code_size_limit. 7665 // Also do not verify_oop as this is called by verify_oop. 7666 assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); 7667 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 7668} 7669 7670void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 7671 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 7672 int oop_index = oop_recorder()->find_index(obj); 7673 RelocationHolder rspec = oop_Relocation::spec(oop_index); 7674 mov_literal32(dst, oop_index, rspec, narrow_oop_operand); 7675} 7676 7677void MacroAssembler::reinit_heapbase() { 7678 if (UseCompressedOops) { 7679 movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); 7680 } 7681} 7682#endif // _LP64 7683 7684Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 7685 switch (cond) { 7686 // Note some conditions are synonyms for others 7687 case Assembler::zero: return Assembler::notZero; 7688 case Assembler::notZero: return Assembler::zero; 7689 case Assembler::less: return Assembler::greaterEqual; 7690 case Assembler::lessEqual: return Assembler::greater; 7691 case Assembler::greater: return Assembler::lessEqual; 7692 case Assembler::greaterEqual: return Assembler::less; 7693 case Assembler::below: return Assembler::aboveEqual; 7694 case Assembler::belowEqual: return Assembler::above; 7695 case Assembler::above: return Assembler::belowEqual; 7696 case Assembler::aboveEqual: return Assembler::below; 7697 case Assembler::overflow: return Assembler::noOverflow; 7698 case Assembler::noOverflow: return Assembler::overflow; 7699 case Assembler::negative: return Assembler::positive; 7700 case Assembler::positive: return Assembler::negative; 7701 case Assembler::parity: return Assembler::noParity; 7702 case Assembler::noParity: return Assembler::parity; 7703 } 7704 ShouldNotReachHere(); return Assembler::overflow; 7705} 7706 7707SkipIfEqual::SkipIfEqual( 7708 MacroAssembler* masm, const bool* flag_addr, bool value) { 7709 _masm = masm; 7710 _masm->cmp8(ExternalAddress((address)flag_addr), value); 7711 _masm->jcc(Assembler::equal, _label); 7712} 7713 7714SkipIfEqual::~SkipIfEqual() { 7715 _masm->bind(_label); 7716} 7717