assembler_x86.cpp revision 3602:da91efe96a93
1/* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25#include "precompiled.hpp" 26#include "assembler_x86.inline.hpp" 27#include "gc_interface/collectedHeap.inline.hpp" 28#include "interpreter/interpreter.hpp" 29#include "memory/cardTableModRefBS.hpp" 30#include "memory/resourceArea.hpp" 31#include "prims/methodHandles.hpp" 32#include "runtime/biasedLocking.hpp" 33#include "runtime/interfaceSupport.hpp" 34#include "runtime/objectMonitor.hpp" 35#include "runtime/os.hpp" 36#include "runtime/sharedRuntime.hpp" 37#include "runtime/stubRoutines.hpp" 38#ifndef SERIALGC 39#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41#include "gc_implementation/g1/heapRegion.hpp" 42#endif 43 44#ifdef PRODUCT 45#define BLOCK_COMMENT(str) /* nothing */ 46#define STOP(error) stop(error) 47#else 48#define BLOCK_COMMENT(str) block_comment(str) 49#define STOP(error) block_comment(error); stop(error) 50#endif 51 52#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 53// Implementation of AddressLiteral 54 55AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 56 _is_lval = false; 57 _target = target; 58 switch (rtype) { 59 case relocInfo::oop_type: 60 case relocInfo::metadata_type: 61 // Oops are a special case. Normally they would be their own section 62 // but in cases like icBuffer they are literals in the code stream that 63 // we don't have a section for. We use none so that we get a literal address 64 // which is always patchable. 65 break; 66 case relocInfo::external_word_type: 67 _rspec = external_word_Relocation::spec(target); 68 break; 69 case relocInfo::internal_word_type: 70 _rspec = internal_word_Relocation::spec(target); 71 break; 72 case relocInfo::opt_virtual_call_type: 73 _rspec = opt_virtual_call_Relocation::spec(); 74 break; 75 case relocInfo::static_call_type: 76 _rspec = static_call_Relocation::spec(); 77 break; 78 case relocInfo::runtime_call_type: 79 _rspec = runtime_call_Relocation::spec(); 80 break; 81 case relocInfo::poll_type: 82 case relocInfo::poll_return_type: 83 _rspec = Relocation::spec_simple(rtype); 84 break; 85 case relocInfo::none: 86 break; 87 default: 88 ShouldNotReachHere(); 89 break; 90 } 91} 92 93// Implementation of Address 94 95#ifdef _LP64 96 97Address Address::make_array(ArrayAddress adr) { 98 // Not implementable on 64bit machines 99 // Should have been handled higher up the call chain. 100 ShouldNotReachHere(); 101 return Address(); 102} 103 104// exceedingly dangerous constructor 105Address::Address(int disp, address loc, relocInfo::relocType rtype) { 106 _base = noreg; 107 _index = noreg; 108 _scale = no_scale; 109 _disp = disp; 110 switch (rtype) { 111 case relocInfo::external_word_type: 112 _rspec = external_word_Relocation::spec(loc); 113 break; 114 case relocInfo::internal_word_type: 115 _rspec = internal_word_Relocation::spec(loc); 116 break; 117 case relocInfo::runtime_call_type: 118 // HMM 119 _rspec = runtime_call_Relocation::spec(); 120 break; 121 case relocInfo::poll_type: 122 case relocInfo::poll_return_type: 123 _rspec = Relocation::spec_simple(rtype); 124 break; 125 case relocInfo::none: 126 break; 127 default: 128 ShouldNotReachHere(); 129 } 130} 131#else // LP64 132 133Address Address::make_array(ArrayAddress adr) { 134 AddressLiteral base = adr.base(); 135 Address index = adr.index(); 136 assert(index._disp == 0, "must not have disp"); // maybe it can? 137 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 138 array._rspec = base._rspec; 139 return array; 140} 141 142// exceedingly dangerous constructor 143Address::Address(address loc, RelocationHolder spec) { 144 _base = noreg; 145 _index = noreg; 146 _scale = no_scale; 147 _disp = (intptr_t) loc; 148 _rspec = spec; 149} 150 151#endif // _LP64 152 153 154 155// Convert the raw encoding form into the form expected by the constructor for 156// Address. An index of 4 (rsp) corresponds to having no index, so convert 157// that to noreg for the Address constructor. 158Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { 159 RelocationHolder rspec; 160 if (disp_reloc != relocInfo::none) { 161 rspec = Relocation::spec_simple(disp_reloc); 162 } 163 bool valid_index = index != rsp->encoding(); 164 if (valid_index) { 165 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 166 madr._rspec = rspec; 167 return madr; 168 } else { 169 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 170 madr._rspec = rspec; 171 return madr; 172 } 173} 174 175// Implementation of Assembler 176 177int AbstractAssembler::code_fill_byte() { 178 return (u_char)'\xF4'; // hlt 179} 180 181// make this go away someday 182void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 183 if (rtype == relocInfo::none) 184 emit_long(data); 185 else emit_data(data, Relocation::spec_simple(rtype), format); 186} 187 188void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 189 assert(imm_operand == 0, "default format must be immediate in this file"); 190 assert(inst_mark() != NULL, "must be inside InstructionMark"); 191 if (rspec.type() != relocInfo::none) { 192 #ifdef ASSERT 193 check_relocation(rspec, format); 194 #endif 195 // Do not use AbstractAssembler::relocate, which is not intended for 196 // embedded words. Instead, relocate to the enclosing instruction. 197 198 // hack. call32 is too wide for mask so use disp32 199 if (format == call32_operand) 200 code_section()->relocate(inst_mark(), rspec, disp32_operand); 201 else 202 code_section()->relocate(inst_mark(), rspec, format); 203 } 204 emit_long(data); 205} 206 207static int encode(Register r) { 208 int enc = r->encoding(); 209 if (enc >= 8) { 210 enc -= 8; 211 } 212 return enc; 213} 214 215static int encode(XMMRegister r) { 216 int enc = r->encoding(); 217 if (enc >= 8) { 218 enc -= 8; 219 } 220 return enc; 221} 222 223void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 224 assert(dst->has_byte_register(), "must have byte register"); 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert(isByte(imm8), "not a byte"); 227 assert((op1 & 0x01) == 0, "should be 8bit operation"); 228 emit_byte(op1); 229 emit_byte(op2 | encode(dst)); 230 emit_byte(imm8); 231} 232 233 234void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 235 assert(isByte(op1) && isByte(op2), "wrong opcode"); 236 assert((op1 & 0x01) == 1, "should be 32bit operation"); 237 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 238 if (is8bit(imm32)) { 239 emit_byte(op1 | 0x02); // set sign bit 240 emit_byte(op2 | encode(dst)); 241 emit_byte(imm32 & 0xFF); 242 } else { 243 emit_byte(op1); 244 emit_byte(op2 | encode(dst)); 245 emit_long(imm32); 246 } 247} 248 249// Force generation of a 4 byte immediate value even if it fits into 8bit 250void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 251 assert(isByte(op1) && isByte(op2), "wrong opcode"); 252 assert((op1 & 0x01) == 1, "should be 32bit operation"); 253 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 254 emit_byte(op1); 255 emit_byte(op2 | encode(dst)); 256 emit_long(imm32); 257} 258 259// immediate-to-memory forms 260void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 261 assert((op1 & 0x01) == 1, "should be 32bit operation"); 262 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 263 if (is8bit(imm32)) { 264 emit_byte(op1 | 0x02); // set sign bit 265 emit_operand(rm, adr, 1); 266 emit_byte(imm32 & 0xFF); 267 } else { 268 emit_byte(op1); 269 emit_operand(rm, adr, 4); 270 emit_long(imm32); 271 } 272} 273 274 275void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 276 assert(isByte(op1) && isByte(op2), "wrong opcode"); 277 emit_byte(op1); 278 emit_byte(op2 | encode(dst) << 3 | encode(src)); 279} 280 281 282void Assembler::emit_operand(Register reg, Register base, Register index, 283 Address::ScaleFactor scale, int disp, 284 RelocationHolder const& rspec, 285 int rip_relative_correction) { 286 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 287 288 // Encode the registers as needed in the fields they are used in 289 290 int regenc = encode(reg) << 3; 291 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 292 int baseenc = base->is_valid() ? encode(base) : 0; 293 294 if (base->is_valid()) { 295 if (index->is_valid()) { 296 assert(scale != Address::no_scale, "inconsistent address"); 297 // [base + index*scale + disp] 298 if (disp == 0 && rtype == relocInfo::none && 299 base != rbp LP64_ONLY(&& base != r13)) { 300 // [base + index*scale] 301 // [00 reg 100][ss index base] 302 assert(index != rsp, "illegal addressing mode"); 303 emit_byte(0x04 | regenc); 304 emit_byte(scale << 6 | indexenc | baseenc); 305 } else if (is8bit(disp) && rtype == relocInfo::none) { 306 // [base + index*scale + imm8] 307 // [01 reg 100][ss index base] imm8 308 assert(index != rsp, "illegal addressing mode"); 309 emit_byte(0x44 | regenc); 310 emit_byte(scale << 6 | indexenc | baseenc); 311 emit_byte(disp & 0xFF); 312 } else { 313 // [base + index*scale + disp32] 314 // [10 reg 100][ss index base] disp32 315 assert(index != rsp, "illegal addressing mode"); 316 emit_byte(0x84 | regenc); 317 emit_byte(scale << 6 | indexenc | baseenc); 318 emit_data(disp, rspec, disp32_operand); 319 } 320 } else if (base == rsp LP64_ONLY(|| base == r12)) { 321 // [rsp + disp] 322 if (disp == 0 && rtype == relocInfo::none) { 323 // [rsp] 324 // [00 reg 100][00 100 100] 325 emit_byte(0x04 | regenc); 326 emit_byte(0x24); 327 } else if (is8bit(disp) && rtype == relocInfo::none) { 328 // [rsp + imm8] 329 // [01 reg 100][00 100 100] disp8 330 emit_byte(0x44 | regenc); 331 emit_byte(0x24); 332 emit_byte(disp & 0xFF); 333 } else { 334 // [rsp + imm32] 335 // [10 reg 100][00 100 100] disp32 336 emit_byte(0x84 | regenc); 337 emit_byte(0x24); 338 emit_data(disp, rspec, disp32_operand); 339 } 340 } else { 341 // [base + disp] 342 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 343 if (disp == 0 && rtype == relocInfo::none && 344 base != rbp LP64_ONLY(&& base != r13)) { 345 // [base] 346 // [00 reg base] 347 emit_byte(0x00 | regenc | baseenc); 348 } else if (is8bit(disp) && rtype == relocInfo::none) { 349 // [base + disp8] 350 // [01 reg base] disp8 351 emit_byte(0x40 | regenc | baseenc); 352 emit_byte(disp & 0xFF); 353 } else { 354 // [base + disp32] 355 // [10 reg base] disp32 356 emit_byte(0x80 | regenc | baseenc); 357 emit_data(disp, rspec, disp32_operand); 358 } 359 } 360 } else { 361 if (index->is_valid()) { 362 assert(scale != Address::no_scale, "inconsistent address"); 363 // [index*scale + disp] 364 // [00 reg 100][ss index 101] disp32 365 assert(index != rsp, "illegal addressing mode"); 366 emit_byte(0x04 | regenc); 367 emit_byte(scale << 6 | indexenc | 0x05); 368 emit_data(disp, rspec, disp32_operand); 369 } else if (rtype != relocInfo::none ) { 370 // [disp] (64bit) RIP-RELATIVE (32bit) abs 371 // [00 000 101] disp32 372 373 emit_byte(0x05 | regenc); 374 // Note that the RIP-rel. correction applies to the generated 375 // disp field, but _not_ to the target address in the rspec. 376 377 // disp was created by converting the target address minus the pc 378 // at the start of the instruction. That needs more correction here. 379 // intptr_t disp = target - next_ip; 380 assert(inst_mark() != NULL, "must be inside InstructionMark"); 381 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 382 int64_t adjusted = disp; 383 // Do rip-rel adjustment for 64bit 384 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 385 assert(is_simm32(adjusted), 386 "must be 32bit offset (RIP relative address)"); 387 emit_data((int32_t) adjusted, rspec, disp32_operand); 388 389 } else { 390 // 32bit never did this, did everything as the rip-rel/disp code above 391 // [disp] ABSOLUTE 392 // [00 reg 100][00 100 101] disp32 393 emit_byte(0x04 | regenc); 394 emit_byte(0x25); 395 emit_data(disp, rspec, disp32_operand); 396 } 397 } 398} 399 400void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 401 Address::ScaleFactor scale, int disp, 402 RelocationHolder const& rspec) { 403 emit_operand((Register)reg, base, index, scale, disp, rspec); 404} 405 406// Secret local extension to Assembler::WhichOperand: 407#define end_pc_operand (_WhichOperand_limit) 408 409address Assembler::locate_operand(address inst, WhichOperand which) { 410 // Decode the given instruction, and return the address of 411 // an embedded 32-bit operand word. 412 413 // If "which" is disp32_operand, selects the displacement portion 414 // of an effective address specifier. 415 // If "which" is imm64_operand, selects the trailing immediate constant. 416 // If "which" is call32_operand, selects the displacement of a call or jump. 417 // Caller is responsible for ensuring that there is such an operand, 418 // and that it is 32/64 bits wide. 419 420 // If "which" is end_pc_operand, find the end of the instruction. 421 422 address ip = inst; 423 bool is_64bit = false; 424 425 debug_only(bool has_disp32 = false); 426 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 427 428 again_after_prefix: 429 switch (0xFF & *ip++) { 430 431 // These convenience macros generate groups of "case" labels for the switch. 432#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 433#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 434 case (x)+4: case (x)+5: case (x)+6: case (x)+7 435#define REP16(x) REP8((x)+0): \ 436 case REP8((x)+8) 437 438 case CS_segment: 439 case SS_segment: 440 case DS_segment: 441 case ES_segment: 442 case FS_segment: 443 case GS_segment: 444 // Seems dubious 445 LP64_ONLY(assert(false, "shouldn't have that prefix")); 446 assert(ip == inst+1, "only one prefix allowed"); 447 goto again_after_prefix; 448 449 case 0x67: 450 case REX: 451 case REX_B: 452 case REX_X: 453 case REX_XB: 454 case REX_R: 455 case REX_RB: 456 case REX_RX: 457 case REX_RXB: 458 NOT_LP64(assert(false, "64bit prefixes")); 459 goto again_after_prefix; 460 461 case REX_W: 462 case REX_WB: 463 case REX_WX: 464 case REX_WXB: 465 case REX_WR: 466 case REX_WRB: 467 case REX_WRX: 468 case REX_WRXB: 469 NOT_LP64(assert(false, "64bit prefixes")); 470 is_64bit = true; 471 goto again_after_prefix; 472 473 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 474 case 0x88: // movb a, r 475 case 0x89: // movl a, r 476 case 0x8A: // movb r, a 477 case 0x8B: // movl r, a 478 case 0x8F: // popl a 479 debug_only(has_disp32 = true); 480 break; 481 482 case 0x68: // pushq #32 483 if (which == end_pc_operand) { 484 return ip + 4; 485 } 486 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 487 return ip; // not produced by emit_operand 488 489 case 0x66: // movw ... (size prefix) 490 again_after_size_prefix2: 491 switch (0xFF & *ip++) { 492 case REX: 493 case REX_B: 494 case REX_X: 495 case REX_XB: 496 case REX_R: 497 case REX_RB: 498 case REX_RX: 499 case REX_RXB: 500 case REX_W: 501 case REX_WB: 502 case REX_WX: 503 case REX_WXB: 504 case REX_WR: 505 case REX_WRB: 506 case REX_WRX: 507 case REX_WRXB: 508 NOT_LP64(assert(false, "64bit prefix found")); 509 goto again_after_size_prefix2; 510 case 0x8B: // movw r, a 511 case 0x89: // movw a, r 512 debug_only(has_disp32 = true); 513 break; 514 case 0xC7: // movw a, #16 515 debug_only(has_disp32 = true); 516 tail_size = 2; // the imm16 517 break; 518 case 0x0F: // several SSE/SSE2 variants 519 ip--; // reparse the 0x0F 520 goto again_after_prefix; 521 default: 522 ShouldNotReachHere(); 523 } 524 break; 525 526 case REP8(0xB8): // movl/q r, #32/#64(oop?) 527 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 528 // these asserts are somewhat nonsensical 529#ifndef _LP64 530 assert(which == imm_operand || which == disp32_operand, 531 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 532#else 533 assert((which == call32_operand || which == imm_operand) && is_64bit || 534 which == narrow_oop_operand && !is_64bit, 535 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 536#endif // _LP64 537 return ip; 538 539 case 0x69: // imul r, a, #32 540 case 0xC7: // movl a, #32(oop?) 541 tail_size = 4; 542 debug_only(has_disp32 = true); // has both kinds of operands! 543 break; 544 545 case 0x0F: // movx..., etc. 546 switch (0xFF & *ip++) { 547 case 0x3A: // pcmpestri 548 tail_size = 1; 549 case 0x38: // ptest, pmovzxbw 550 ip++; // skip opcode 551 debug_only(has_disp32 = true); // has both kinds of operands! 552 break; 553 554 case 0x70: // pshufd r, r/a, #8 555 debug_only(has_disp32 = true); // has both kinds of operands! 556 case 0x73: // psrldq r, #8 557 tail_size = 1; 558 break; 559 560 case 0x12: // movlps 561 case 0x28: // movaps 562 case 0x2E: // ucomiss 563 case 0x2F: // comiss 564 case 0x54: // andps 565 case 0x55: // andnps 566 case 0x56: // orps 567 case 0x57: // xorps 568 case 0x6E: // movd 569 case 0x7E: // movd 570 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 571 debug_only(has_disp32 = true); 572 break; 573 574 case 0xAD: // shrd r, a, %cl 575 case 0xAF: // imul r, a 576 case 0xBE: // movsbl r, a (movsxb) 577 case 0xBF: // movswl r, a (movsxw) 578 case 0xB6: // movzbl r, a (movzxb) 579 case 0xB7: // movzwl r, a (movzxw) 580 case REP16(0x40): // cmovl cc, r, a 581 case 0xB0: // cmpxchgb 582 case 0xB1: // cmpxchg 583 case 0xC1: // xaddl 584 case 0xC7: // cmpxchg8 585 case REP16(0x90): // setcc a 586 debug_only(has_disp32 = true); 587 // fall out of the switch to decode the address 588 break; 589 590 case 0xC4: // pinsrw r, a, #8 591 debug_only(has_disp32 = true); 592 case 0xC5: // pextrw r, r, #8 593 tail_size = 1; // the imm8 594 break; 595 596 case 0xAC: // shrd r, a, #8 597 debug_only(has_disp32 = true); 598 tail_size = 1; // the imm8 599 break; 600 601 case REP16(0x80): // jcc rdisp32 602 if (which == end_pc_operand) return ip + 4; 603 assert(which == call32_operand, "jcc has no disp32 or imm"); 604 return ip; 605 default: 606 ShouldNotReachHere(); 607 } 608 break; 609 610 case 0x81: // addl a, #32; addl r, #32 611 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 612 // on 32bit in the case of cmpl, the imm might be an oop 613 tail_size = 4; 614 debug_only(has_disp32 = true); // has both kinds of operands! 615 break; 616 617 case 0x83: // addl a, #8; addl r, #8 618 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 619 debug_only(has_disp32 = true); // has both kinds of operands! 620 tail_size = 1; 621 break; 622 623 case 0x9B: 624 switch (0xFF & *ip++) { 625 case 0xD9: // fnstcw a 626 debug_only(has_disp32 = true); 627 break; 628 default: 629 ShouldNotReachHere(); 630 } 631 break; 632 633 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 634 case REP4(0x10): // adc... 635 case REP4(0x20): // and... 636 case REP4(0x30): // xor... 637 case REP4(0x08): // or... 638 case REP4(0x18): // sbb... 639 case REP4(0x28): // sub... 640 case 0xF7: // mull a 641 case 0x8D: // lea r, a 642 case 0x87: // xchg r, a 643 case REP4(0x38): // cmp... 644 case 0x85: // test r, a 645 debug_only(has_disp32 = true); // has both kinds of operands! 646 break; 647 648 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 649 case 0xC6: // movb a, #8 650 case 0x80: // cmpb a, #8 651 case 0x6B: // imul r, a, #8 652 debug_only(has_disp32 = true); // has both kinds of operands! 653 tail_size = 1; // the imm8 654 break; 655 656 case 0xC4: // VEX_3bytes 657 case 0xC5: // VEX_2bytes 658 assert((UseAVX > 0), "shouldn't have VEX prefix"); 659 assert(ip == inst+1, "no prefixes allowed"); 660 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 661 // but they have prefix 0x0F and processed when 0x0F processed above. 662 // 663 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 664 // instructions (these instructions are not supported in 64-bit mode). 665 // To distinguish them bits [7:6] are set in the VEX second byte since 666 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 667 // those VEX bits REX and vvvv bits are inverted. 668 // 669 // Fortunately C2 doesn't generate these instructions so we don't need 670 // to check for them in product version. 671 672 // Check second byte 673 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 674 675 // First byte 676 if ((0xFF & *inst) == VEX_3bytes) { 677 ip++; // third byte 678 is_64bit = ((VEX_W & *ip) == VEX_W); 679 } 680 ip++; // opcode 681 // To find the end of instruction (which == end_pc_operand). 682 switch (0xFF & *ip) { 683 case 0x61: // pcmpestri r, r/a, #8 684 case 0x70: // pshufd r, r/a, #8 685 case 0x73: // psrldq r, #8 686 tail_size = 1; // the imm8 687 break; 688 default: 689 break; 690 } 691 ip++; // skip opcode 692 debug_only(has_disp32 = true); // has both kinds of operands! 693 break; 694 695 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 696 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 697 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 698 case 0xDD: // fld_d a; fst_d a; fstp_d a 699 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 700 case 0xDF: // fild_d a; fistp_d a 701 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 702 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 703 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 704 debug_only(has_disp32 = true); 705 break; 706 707 case 0xE8: // call rdisp32 708 case 0xE9: // jmp rdisp32 709 if (which == end_pc_operand) return ip + 4; 710 assert(which == call32_operand, "call has no disp32 or imm"); 711 return ip; 712 713 case 0xF0: // Lock 714 assert(os::is_MP(), "only on MP"); 715 goto again_after_prefix; 716 717 case 0xF3: // For SSE 718 case 0xF2: // For SSE2 719 switch (0xFF & *ip++) { 720 case REX: 721 case REX_B: 722 case REX_X: 723 case REX_XB: 724 case REX_R: 725 case REX_RB: 726 case REX_RX: 727 case REX_RXB: 728 case REX_W: 729 case REX_WB: 730 case REX_WX: 731 case REX_WXB: 732 case REX_WR: 733 case REX_WRB: 734 case REX_WRX: 735 case REX_WRXB: 736 NOT_LP64(assert(false, "found 64bit prefix")); 737 ip++; 738 default: 739 ip++; 740 } 741 debug_only(has_disp32 = true); // has both kinds of operands! 742 break; 743 744 default: 745 ShouldNotReachHere(); 746 747#undef REP8 748#undef REP16 749 } 750 751 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 752#ifdef _LP64 753 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 754#else 755 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 756 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 757#endif // LP64 758 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 759 760 // parse the output of emit_operand 761 int op2 = 0xFF & *ip++; 762 int base = op2 & 0x07; 763 int op3 = -1; 764 const int b100 = 4; 765 const int b101 = 5; 766 if (base == b100 && (op2 >> 6) != 3) { 767 op3 = 0xFF & *ip++; 768 base = op3 & 0x07; // refetch the base 769 } 770 // now ip points at the disp (if any) 771 772 switch (op2 >> 6) { 773 case 0: 774 // [00 reg 100][ss index base] 775 // [00 reg 100][00 100 esp] 776 // [00 reg base] 777 // [00 reg 100][ss index 101][disp32] 778 // [00 reg 101] [disp32] 779 780 if (base == b101) { 781 if (which == disp32_operand) 782 return ip; // caller wants the disp32 783 ip += 4; // skip the disp32 784 } 785 break; 786 787 case 1: 788 // [01 reg 100][ss index base][disp8] 789 // [01 reg 100][00 100 esp][disp8] 790 // [01 reg base] [disp8] 791 ip += 1; // skip the disp8 792 break; 793 794 case 2: 795 // [10 reg 100][ss index base][disp32] 796 // [10 reg 100][00 100 esp][disp32] 797 // [10 reg base] [disp32] 798 if (which == disp32_operand) 799 return ip; // caller wants the disp32 800 ip += 4; // skip the disp32 801 break; 802 803 case 3: 804 // [11 reg base] (not a memory addressing mode) 805 break; 806 } 807 808 if (which == end_pc_operand) { 809 return ip + tail_size; 810 } 811 812#ifdef _LP64 813 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 814#else 815 assert(which == imm_operand, "instruction has only an imm field"); 816#endif // LP64 817 return ip; 818} 819 820address Assembler::locate_next_instruction(address inst) { 821 // Secretly share code with locate_operand: 822 return locate_operand(inst, end_pc_operand); 823} 824 825 826#ifdef ASSERT 827void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 828 address inst = inst_mark(); 829 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 830 address opnd; 831 832 Relocation* r = rspec.reloc(); 833 if (r->type() == relocInfo::none) { 834 return; 835 } else if (r->is_call() || format == call32_operand) { 836 // assert(format == imm32_operand, "cannot specify a nonzero format"); 837 opnd = locate_operand(inst, call32_operand); 838 } else if (r->is_data()) { 839 assert(format == imm_operand || format == disp32_operand 840 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 841 opnd = locate_operand(inst, (WhichOperand)format); 842 } else { 843 assert(format == imm_operand, "cannot specify a format"); 844 return; 845 } 846 assert(opnd == pc(), "must put operand where relocs can find it"); 847} 848#endif // ASSERT 849 850void Assembler::emit_operand32(Register reg, Address adr) { 851 assert(reg->encoding() < 8, "no extended registers"); 852 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 853 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 854 adr._rspec); 855} 856 857void Assembler::emit_operand(Register reg, Address adr, 858 int rip_relative_correction) { 859 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 860 adr._rspec, 861 rip_relative_correction); 862} 863 864void Assembler::emit_operand(XMMRegister reg, Address adr) { 865 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 866 adr._rspec); 867} 868 869// MMX operations 870void Assembler::emit_operand(MMXRegister reg, Address adr) { 871 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 872 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 873} 874 875// work around gcc (3.2.1-7a) bug 876void Assembler::emit_operand(Address adr, MMXRegister reg) { 877 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 878 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 879} 880 881 882void Assembler::emit_farith(int b1, int b2, int i) { 883 assert(isByte(b1) && isByte(b2), "wrong opcode"); 884 assert(0 <= i && i < 8, "illegal stack offset"); 885 emit_byte(b1); 886 emit_byte(b2 + i); 887} 888 889 890// Now the Assembler instructions (identical for 32/64 bits) 891 892void Assembler::adcl(Address dst, int32_t imm32) { 893 InstructionMark im(this); 894 prefix(dst); 895 emit_arith_operand(0x81, rdx, dst, imm32); 896} 897 898void Assembler::adcl(Address dst, Register src) { 899 InstructionMark im(this); 900 prefix(dst, src); 901 emit_byte(0x11); 902 emit_operand(src, dst); 903} 904 905void Assembler::adcl(Register dst, int32_t imm32) { 906 prefix(dst); 907 emit_arith(0x81, 0xD0, dst, imm32); 908} 909 910void Assembler::adcl(Register dst, Address src) { 911 InstructionMark im(this); 912 prefix(src, dst); 913 emit_byte(0x13); 914 emit_operand(dst, src); 915} 916 917void Assembler::adcl(Register dst, Register src) { 918 (void) prefix_and_encode(dst->encoding(), src->encoding()); 919 emit_arith(0x13, 0xC0, dst, src); 920} 921 922void Assembler::addl(Address dst, int32_t imm32) { 923 InstructionMark im(this); 924 prefix(dst); 925 emit_arith_operand(0x81, rax, dst, imm32); 926} 927 928void Assembler::addl(Address dst, Register src) { 929 InstructionMark im(this); 930 prefix(dst, src); 931 emit_byte(0x01); 932 emit_operand(src, dst); 933} 934 935void Assembler::addl(Register dst, int32_t imm32) { 936 prefix(dst); 937 emit_arith(0x81, 0xC0, dst, imm32); 938} 939 940void Assembler::addl(Register dst, Address src) { 941 InstructionMark im(this); 942 prefix(src, dst); 943 emit_byte(0x03); 944 emit_operand(dst, src); 945} 946 947void Assembler::addl(Register dst, Register src) { 948 (void) prefix_and_encode(dst->encoding(), src->encoding()); 949 emit_arith(0x03, 0xC0, dst, src); 950} 951 952void Assembler::addr_nop_4() { 953 assert(UseAddressNop, "no CPU support"); 954 // 4 bytes: NOP DWORD PTR [EAX+0] 955 emit_byte(0x0F); 956 emit_byte(0x1F); 957 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 958 emit_byte(0); // 8-bits offset (1 byte) 959} 960 961void Assembler::addr_nop_5() { 962 assert(UseAddressNop, "no CPU support"); 963 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 964 emit_byte(0x0F); 965 emit_byte(0x1F); 966 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 967 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 968 emit_byte(0); // 8-bits offset (1 byte) 969} 970 971void Assembler::addr_nop_7() { 972 assert(UseAddressNop, "no CPU support"); 973 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 974 emit_byte(0x0F); 975 emit_byte(0x1F); 976 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 977 emit_long(0); // 32-bits offset (4 bytes) 978} 979 980void Assembler::addr_nop_8() { 981 assert(UseAddressNop, "no CPU support"); 982 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 983 emit_byte(0x0F); 984 emit_byte(0x1F); 985 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 986 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 987 emit_long(0); // 32-bits offset (4 bytes) 988} 989 990void Assembler::addsd(XMMRegister dst, XMMRegister src) { 991 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 992 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 993} 994 995void Assembler::addsd(XMMRegister dst, Address src) { 996 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 997 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 998} 999 1000void Assembler::addss(XMMRegister dst, XMMRegister src) { 1001 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1002 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1003} 1004 1005void Assembler::addss(XMMRegister dst, Address src) { 1006 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1007 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1008} 1009 1010void Assembler::andl(Address dst, int32_t imm32) { 1011 InstructionMark im(this); 1012 prefix(dst); 1013 emit_byte(0x81); 1014 emit_operand(rsp, dst, 4); 1015 emit_long(imm32); 1016} 1017 1018void Assembler::andl(Register dst, int32_t imm32) { 1019 prefix(dst); 1020 emit_arith(0x81, 0xE0, dst, imm32); 1021} 1022 1023void Assembler::andl(Register dst, Address src) { 1024 InstructionMark im(this); 1025 prefix(src, dst); 1026 emit_byte(0x23); 1027 emit_operand(dst, src); 1028} 1029 1030void Assembler::andl(Register dst, Register src) { 1031 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1032 emit_arith(0x23, 0xC0, dst, src); 1033} 1034 1035void Assembler::bsfl(Register dst, Register src) { 1036 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1037 emit_byte(0x0F); 1038 emit_byte(0xBC); 1039 emit_byte(0xC0 | encode); 1040} 1041 1042void Assembler::bsrl(Register dst, Register src) { 1043 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1044 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1045 emit_byte(0x0F); 1046 emit_byte(0xBD); 1047 emit_byte(0xC0 | encode); 1048} 1049 1050void Assembler::bswapl(Register reg) { // bswap 1051 int encode = prefix_and_encode(reg->encoding()); 1052 emit_byte(0x0F); 1053 emit_byte(0xC8 | encode); 1054} 1055 1056void Assembler::call(Label& L, relocInfo::relocType rtype) { 1057 // suspect disp32 is always good 1058 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1059 1060 if (L.is_bound()) { 1061 const int long_size = 5; 1062 int offs = (int)( target(L) - pc() ); 1063 assert(offs <= 0, "assembler error"); 1064 InstructionMark im(this); 1065 // 1110 1000 #32-bit disp 1066 emit_byte(0xE8); 1067 emit_data(offs - long_size, rtype, operand); 1068 } else { 1069 InstructionMark im(this); 1070 // 1110 1000 #32-bit disp 1071 L.add_patch_at(code(), locator()); 1072 1073 emit_byte(0xE8); 1074 emit_data(int(0), rtype, operand); 1075 } 1076} 1077 1078void Assembler::call(Register dst) { 1079 int encode = prefix_and_encode(dst->encoding()); 1080 emit_byte(0xFF); 1081 emit_byte(0xD0 | encode); 1082} 1083 1084 1085void Assembler::call(Address adr) { 1086 InstructionMark im(this); 1087 prefix(adr); 1088 emit_byte(0xFF); 1089 emit_operand(rdx, adr); 1090} 1091 1092void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1093 assert(entry != NULL, "call most probably wrong"); 1094 InstructionMark im(this); 1095 emit_byte(0xE8); 1096 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1097 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1098 // Technically, should use call32_operand, but this format is 1099 // implied by the fact that we're emitting a call instruction. 1100 1101 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1102 emit_data((int) disp, rspec, operand); 1103} 1104 1105void Assembler::cdql() { 1106 emit_byte(0x99); 1107} 1108 1109void Assembler::cmovl(Condition cc, Register dst, Register src) { 1110 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1111 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1112 emit_byte(0x0F); 1113 emit_byte(0x40 | cc); 1114 emit_byte(0xC0 | encode); 1115} 1116 1117 1118void Assembler::cmovl(Condition cc, Register dst, Address src) { 1119 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1120 prefix(src, dst); 1121 emit_byte(0x0F); 1122 emit_byte(0x40 | cc); 1123 emit_operand(dst, src); 1124} 1125 1126void Assembler::cmpb(Address dst, int imm8) { 1127 InstructionMark im(this); 1128 prefix(dst); 1129 emit_byte(0x80); 1130 emit_operand(rdi, dst, 1); 1131 emit_byte(imm8); 1132} 1133 1134void Assembler::cmpl(Address dst, int32_t imm32) { 1135 InstructionMark im(this); 1136 prefix(dst); 1137 emit_byte(0x81); 1138 emit_operand(rdi, dst, 4); 1139 emit_long(imm32); 1140} 1141 1142void Assembler::cmpl(Register dst, int32_t imm32) { 1143 prefix(dst); 1144 emit_arith(0x81, 0xF8, dst, imm32); 1145} 1146 1147void Assembler::cmpl(Register dst, Register src) { 1148 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1149 emit_arith(0x3B, 0xC0, dst, src); 1150} 1151 1152 1153void Assembler::cmpl(Register dst, Address src) { 1154 InstructionMark im(this); 1155 prefix(src, dst); 1156 emit_byte(0x3B); 1157 emit_operand(dst, src); 1158} 1159 1160void Assembler::cmpw(Address dst, int imm16) { 1161 InstructionMark im(this); 1162 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1163 emit_byte(0x66); 1164 emit_byte(0x81); 1165 emit_operand(rdi, dst, 2); 1166 emit_word(imm16); 1167} 1168 1169// The 32-bit cmpxchg compares the value at adr with the contents of rax, 1170// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1171// The ZF is set if the compared values were equal, and cleared otherwise. 1172void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1173 if (Atomics & 2) { 1174 // caveat: no instructionmark, so this isn't relocatable. 1175 // Emit a synthetic, non-atomic, CAS equivalent. 1176 // Beware. The synthetic form sets all ICCs, not just ZF. 1177 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1178 cmpl(rax, adr); 1179 movl(rax, adr); 1180 if (reg != rax) { 1181 Label L ; 1182 jcc(Assembler::notEqual, L); 1183 movl(adr, reg); 1184 bind(L); 1185 } 1186 } else { 1187 InstructionMark im(this); 1188 prefix(adr, reg); 1189 emit_byte(0x0F); 1190 emit_byte(0xB1); 1191 emit_operand(reg, adr); 1192 } 1193} 1194 1195void Assembler::comisd(XMMRegister dst, Address src) { 1196 // NOTE: dbx seems to decode this as comiss even though the 1197 // 0x66 is there. Strangly ucomisd comes out correct 1198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1199 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1200} 1201 1202void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1203 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1204 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1205} 1206 1207void Assembler::comiss(XMMRegister dst, Address src) { 1208 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1209 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1210} 1211 1212void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1213 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1214 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1215} 1216 1217void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1218 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1219 emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); 1220} 1221 1222void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1223 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1224 emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE); 1225} 1226 1227void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1228 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1229 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1230} 1231 1232void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1233 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1234 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1235} 1236 1237void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1239 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1240 emit_byte(0x2A); 1241 emit_byte(0xC0 | encode); 1242} 1243 1244void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1245 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1246 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); 1247} 1248 1249void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1250 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1251 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1252 emit_byte(0x2A); 1253 emit_byte(0xC0 | encode); 1254} 1255 1256void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1257 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1258 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); 1259} 1260 1261void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1262 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1263 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1264} 1265 1266void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1267 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1268 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1269} 1270 1271 1272void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1273 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1274 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1275 emit_byte(0x2C); 1276 emit_byte(0xC0 | encode); 1277} 1278 1279void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1280 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1281 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1282 emit_byte(0x2C); 1283 emit_byte(0xC0 | encode); 1284} 1285 1286void Assembler::decl(Address dst) { 1287 // Don't use it directly. Use MacroAssembler::decrement() instead. 1288 InstructionMark im(this); 1289 prefix(dst); 1290 emit_byte(0xFF); 1291 emit_operand(rcx, dst); 1292} 1293 1294void Assembler::divsd(XMMRegister dst, Address src) { 1295 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1296 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1297} 1298 1299void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1300 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1301 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1302} 1303 1304void Assembler::divss(XMMRegister dst, Address src) { 1305 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1306 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1307} 1308 1309void Assembler::divss(XMMRegister dst, XMMRegister src) { 1310 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1311 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1312} 1313 1314void Assembler::emms() { 1315 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1316 emit_byte(0x0F); 1317 emit_byte(0x77); 1318} 1319 1320void Assembler::hlt() { 1321 emit_byte(0xF4); 1322} 1323 1324void Assembler::idivl(Register src) { 1325 int encode = prefix_and_encode(src->encoding()); 1326 emit_byte(0xF7); 1327 emit_byte(0xF8 | encode); 1328} 1329 1330void Assembler::divl(Register src) { // Unsigned 1331 int encode = prefix_and_encode(src->encoding()); 1332 emit_byte(0xF7); 1333 emit_byte(0xF0 | encode); 1334} 1335 1336void Assembler::imull(Register dst, Register src) { 1337 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1338 emit_byte(0x0F); 1339 emit_byte(0xAF); 1340 emit_byte(0xC0 | encode); 1341} 1342 1343 1344void Assembler::imull(Register dst, Register src, int value) { 1345 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1346 if (is8bit(value)) { 1347 emit_byte(0x6B); 1348 emit_byte(0xC0 | encode); 1349 emit_byte(value & 0xFF); 1350 } else { 1351 emit_byte(0x69); 1352 emit_byte(0xC0 | encode); 1353 emit_long(value); 1354 } 1355} 1356 1357void Assembler::incl(Address dst) { 1358 // Don't use it directly. Use MacroAssembler::increment() instead. 1359 InstructionMark im(this); 1360 prefix(dst); 1361 emit_byte(0xFF); 1362 emit_operand(rax, dst); 1363} 1364 1365void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1366 InstructionMark im(this); 1367 assert((0 <= cc) && (cc < 16), "illegal cc"); 1368 if (L.is_bound()) { 1369 address dst = target(L); 1370 assert(dst != NULL, "jcc most probably wrong"); 1371 1372 const int short_size = 2; 1373 const int long_size = 6; 1374 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1375 if (maybe_short && is8bit(offs - short_size)) { 1376 // 0111 tttn #8-bit disp 1377 emit_byte(0x70 | cc); 1378 emit_byte((offs - short_size) & 0xFF); 1379 } else { 1380 // 0000 1111 1000 tttn #32-bit disp 1381 assert(is_simm32(offs - long_size), 1382 "must be 32bit offset (call4)"); 1383 emit_byte(0x0F); 1384 emit_byte(0x80 | cc); 1385 emit_long(offs - long_size); 1386 } 1387 } else { 1388 // Note: could eliminate cond. jumps to this jump if condition 1389 // is the same however, seems to be rather unlikely case. 1390 // Note: use jccb() if label to be bound is very close to get 1391 // an 8-bit displacement 1392 L.add_patch_at(code(), locator()); 1393 emit_byte(0x0F); 1394 emit_byte(0x80 | cc); 1395 emit_long(0); 1396 } 1397} 1398 1399void Assembler::jccb(Condition cc, Label& L) { 1400 if (L.is_bound()) { 1401 const int short_size = 2; 1402 address entry = target(L); 1403#ifdef ASSERT 1404 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1405 intptr_t delta = short_branch_delta(); 1406 if (delta != 0) { 1407 dist += (dist < 0 ? (-delta) :delta); 1408 } 1409 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1410#endif 1411 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1412 // 0111 tttn #8-bit disp 1413 emit_byte(0x70 | cc); 1414 emit_byte((offs - short_size) & 0xFF); 1415 } else { 1416 InstructionMark im(this); 1417 L.add_patch_at(code(), locator()); 1418 emit_byte(0x70 | cc); 1419 emit_byte(0); 1420 } 1421} 1422 1423void Assembler::jmp(Address adr) { 1424 InstructionMark im(this); 1425 prefix(adr); 1426 emit_byte(0xFF); 1427 emit_operand(rsp, adr); 1428} 1429 1430void Assembler::jmp(Label& L, bool maybe_short) { 1431 if (L.is_bound()) { 1432 address entry = target(L); 1433 assert(entry != NULL, "jmp most probably wrong"); 1434 InstructionMark im(this); 1435 const int short_size = 2; 1436 const int long_size = 5; 1437 intptr_t offs = entry - _code_pos; 1438 if (maybe_short && is8bit(offs - short_size)) { 1439 emit_byte(0xEB); 1440 emit_byte((offs - short_size) & 0xFF); 1441 } else { 1442 emit_byte(0xE9); 1443 emit_long(offs - long_size); 1444 } 1445 } else { 1446 // By default, forward jumps are always 32-bit displacements, since 1447 // we can't yet know where the label will be bound. If you're sure that 1448 // the forward jump will not run beyond 256 bytes, use jmpb to 1449 // force an 8-bit displacement. 1450 InstructionMark im(this); 1451 L.add_patch_at(code(), locator()); 1452 emit_byte(0xE9); 1453 emit_long(0); 1454 } 1455} 1456 1457void Assembler::jmp(Register entry) { 1458 int encode = prefix_and_encode(entry->encoding()); 1459 emit_byte(0xFF); 1460 emit_byte(0xE0 | encode); 1461} 1462 1463void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1464 InstructionMark im(this); 1465 emit_byte(0xE9); 1466 assert(dest != NULL, "must have a target"); 1467 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1468 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1469 emit_data(disp, rspec.reloc(), call32_operand); 1470} 1471 1472void Assembler::jmpb(Label& L) { 1473 if (L.is_bound()) { 1474 const int short_size = 2; 1475 address entry = target(L); 1476 assert(entry != NULL, "jmp most probably wrong"); 1477#ifdef ASSERT 1478 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1479 intptr_t delta = short_branch_delta(); 1480 if (delta != 0) { 1481 dist += (dist < 0 ? (-delta) :delta); 1482 } 1483 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1484#endif 1485 intptr_t offs = entry - _code_pos; 1486 emit_byte(0xEB); 1487 emit_byte((offs - short_size) & 0xFF); 1488 } else { 1489 InstructionMark im(this); 1490 L.add_patch_at(code(), locator()); 1491 emit_byte(0xEB); 1492 emit_byte(0); 1493 } 1494} 1495 1496void Assembler::ldmxcsr( Address src) { 1497 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1498 InstructionMark im(this); 1499 prefix(src); 1500 emit_byte(0x0F); 1501 emit_byte(0xAE); 1502 emit_operand(as_Register(2), src); 1503} 1504 1505void Assembler::leal(Register dst, Address src) { 1506 InstructionMark im(this); 1507#ifdef _LP64 1508 emit_byte(0x67); // addr32 1509 prefix(src, dst); 1510#endif // LP64 1511 emit_byte(0x8D); 1512 emit_operand(dst, src); 1513} 1514 1515void Assembler::lock() { 1516 if (Atomics & 1) { 1517 // Emit either nothing, a NOP, or a NOP: prefix 1518 emit_byte(0x90) ; 1519 } else { 1520 emit_byte(0xF0); 1521 } 1522} 1523 1524void Assembler::lzcntl(Register dst, Register src) { 1525 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1526 emit_byte(0xF3); 1527 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1528 emit_byte(0x0F); 1529 emit_byte(0xBD); 1530 emit_byte(0xC0 | encode); 1531} 1532 1533// Emit mfence instruction 1534void Assembler::mfence() { 1535 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1536 emit_byte( 0x0F ); 1537 emit_byte( 0xAE ); 1538 emit_byte( 0xF0 ); 1539} 1540 1541void Assembler::mov(Register dst, Register src) { 1542 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1543} 1544 1545void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1546 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1547 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); 1548} 1549 1550void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1551 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1552 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); 1553} 1554 1555void Assembler::movlhps(XMMRegister dst, XMMRegister src) { 1556 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1557 int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); 1558 emit_byte(0x16); 1559 emit_byte(0xC0 | encode); 1560} 1561 1562void Assembler::movb(Register dst, Address src) { 1563 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1564 InstructionMark im(this); 1565 prefix(src, dst, true); 1566 emit_byte(0x8A); 1567 emit_operand(dst, src); 1568} 1569 1570 1571void Assembler::movb(Address dst, int imm8) { 1572 InstructionMark im(this); 1573 prefix(dst); 1574 emit_byte(0xC6); 1575 emit_operand(rax, dst, 1); 1576 emit_byte(imm8); 1577} 1578 1579 1580void Assembler::movb(Address dst, Register src) { 1581 assert(src->has_byte_register(), "must have byte register"); 1582 InstructionMark im(this); 1583 prefix(dst, src, true); 1584 emit_byte(0x88); 1585 emit_operand(src, dst); 1586} 1587 1588void Assembler::movdl(XMMRegister dst, Register src) { 1589 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1590 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1591 emit_byte(0x6E); 1592 emit_byte(0xC0 | encode); 1593} 1594 1595void Assembler::movdl(Register dst, XMMRegister src) { 1596 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1597 // swap src/dst to get correct prefix 1598 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1599 emit_byte(0x7E); 1600 emit_byte(0xC0 | encode); 1601} 1602 1603void Assembler::movdl(XMMRegister dst, Address src) { 1604 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1605 InstructionMark im(this); 1606 simd_prefix(dst, src, VEX_SIMD_66); 1607 emit_byte(0x6E); 1608 emit_operand(dst, src); 1609} 1610 1611void Assembler::movdl(Address dst, XMMRegister src) { 1612 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1613 InstructionMark im(this); 1614 simd_prefix(dst, src, VEX_SIMD_66); 1615 emit_byte(0x7E); 1616 emit_operand(src, dst); 1617} 1618 1619void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1620 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1621 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 1622} 1623 1624void Assembler::movdqu(XMMRegister dst, Address src) { 1625 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1626 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1627} 1628 1629void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1630 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1631 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1632} 1633 1634void Assembler::movdqu(Address dst, XMMRegister src) { 1635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1636 InstructionMark im(this); 1637 simd_prefix(dst, src, VEX_SIMD_F3); 1638 emit_byte(0x7F); 1639 emit_operand(src, dst); 1640} 1641 1642// Move Unaligned 256bit Vector 1643void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { 1644 assert(UseAVX, ""); 1645 bool vector256 = true; 1646 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1647 emit_byte(0x6F); 1648 emit_byte(0xC0 | encode); 1649} 1650 1651void Assembler::vmovdqu(XMMRegister dst, Address src) { 1652 assert(UseAVX, ""); 1653 InstructionMark im(this); 1654 bool vector256 = true; 1655 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1656 emit_byte(0x6F); 1657 emit_operand(dst, src); 1658} 1659 1660void Assembler::vmovdqu(Address dst, XMMRegister src) { 1661 assert(UseAVX, ""); 1662 InstructionMark im(this); 1663 bool vector256 = true; 1664 // swap src<->dst for encoding 1665 assert(src != xnoreg, "sanity"); 1666 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); 1667 emit_byte(0x7F); 1668 emit_operand(src, dst); 1669} 1670 1671// Uses zero extension on 64bit 1672 1673void Assembler::movl(Register dst, int32_t imm32) { 1674 int encode = prefix_and_encode(dst->encoding()); 1675 emit_byte(0xB8 | encode); 1676 emit_long(imm32); 1677} 1678 1679void Assembler::movl(Register dst, Register src) { 1680 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1681 emit_byte(0x8B); 1682 emit_byte(0xC0 | encode); 1683} 1684 1685void Assembler::movl(Register dst, Address src) { 1686 InstructionMark im(this); 1687 prefix(src, dst); 1688 emit_byte(0x8B); 1689 emit_operand(dst, src); 1690} 1691 1692void Assembler::movl(Address dst, int32_t imm32) { 1693 InstructionMark im(this); 1694 prefix(dst); 1695 emit_byte(0xC7); 1696 emit_operand(rax, dst, 4); 1697 emit_long(imm32); 1698} 1699 1700void Assembler::movl(Address dst, Register src) { 1701 InstructionMark im(this); 1702 prefix(dst, src); 1703 emit_byte(0x89); 1704 emit_operand(src, dst); 1705} 1706 1707// New cpus require to use movsd and movss to avoid partial register stall 1708// when loading from memory. But for old Opteron use movlpd instead of movsd. 1709// The selection is done in MacroAssembler::movdbl() and movflt(). 1710void Assembler::movlpd(XMMRegister dst, Address src) { 1711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1712 emit_simd_arith(0x12, dst, src, VEX_SIMD_66); 1713} 1714 1715void Assembler::movq( MMXRegister dst, Address src ) { 1716 assert( VM_Version::supports_mmx(), "" ); 1717 emit_byte(0x0F); 1718 emit_byte(0x6F); 1719 emit_operand(dst, src); 1720} 1721 1722void Assembler::movq( Address dst, MMXRegister src ) { 1723 assert( VM_Version::supports_mmx(), "" ); 1724 emit_byte(0x0F); 1725 emit_byte(0x7F); 1726 // workaround gcc (3.2.1-7a) bug 1727 // In that version of gcc with only an emit_operand(MMX, Address) 1728 // gcc will tail jump and try and reverse the parameters completely 1729 // obliterating dst in the process. By having a version available 1730 // that doesn't need to swap the args at the tail jump the bug is 1731 // avoided. 1732 emit_operand(dst, src); 1733} 1734 1735void Assembler::movq(XMMRegister dst, Address src) { 1736 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1737 InstructionMark im(this); 1738 simd_prefix(dst, src, VEX_SIMD_F3); 1739 emit_byte(0x7E); 1740 emit_operand(dst, src); 1741} 1742 1743void Assembler::movq(Address dst, XMMRegister src) { 1744 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1745 InstructionMark im(this); 1746 simd_prefix(dst, src, VEX_SIMD_66); 1747 emit_byte(0xD6); 1748 emit_operand(src, dst); 1749} 1750 1751void Assembler::movsbl(Register dst, Address src) { // movsxb 1752 InstructionMark im(this); 1753 prefix(src, dst); 1754 emit_byte(0x0F); 1755 emit_byte(0xBE); 1756 emit_operand(dst, src); 1757} 1758 1759void Assembler::movsbl(Register dst, Register src) { // movsxb 1760 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1761 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1762 emit_byte(0x0F); 1763 emit_byte(0xBE); 1764 emit_byte(0xC0 | encode); 1765} 1766 1767void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1768 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1769 emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); 1770} 1771 1772void Assembler::movsd(XMMRegister dst, Address src) { 1773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1774 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); 1775} 1776 1777void Assembler::movsd(Address dst, XMMRegister src) { 1778 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1779 InstructionMark im(this); 1780 simd_prefix(dst, src, VEX_SIMD_F2); 1781 emit_byte(0x11); 1782 emit_operand(src, dst); 1783} 1784 1785void Assembler::movss(XMMRegister dst, XMMRegister src) { 1786 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1787 emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); 1788} 1789 1790void Assembler::movss(XMMRegister dst, Address src) { 1791 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1792 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); 1793} 1794 1795void Assembler::movss(Address dst, XMMRegister src) { 1796 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1797 InstructionMark im(this); 1798 simd_prefix(dst, src, VEX_SIMD_F3); 1799 emit_byte(0x11); 1800 emit_operand(src, dst); 1801} 1802 1803void Assembler::movswl(Register dst, Address src) { // movsxw 1804 InstructionMark im(this); 1805 prefix(src, dst); 1806 emit_byte(0x0F); 1807 emit_byte(0xBF); 1808 emit_operand(dst, src); 1809} 1810 1811void Assembler::movswl(Register dst, Register src) { // movsxw 1812 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1813 emit_byte(0x0F); 1814 emit_byte(0xBF); 1815 emit_byte(0xC0 | encode); 1816} 1817 1818void Assembler::movw(Address dst, int imm16) { 1819 InstructionMark im(this); 1820 1821 emit_byte(0x66); // switch to 16-bit mode 1822 prefix(dst); 1823 emit_byte(0xC7); 1824 emit_operand(rax, dst, 2); 1825 emit_word(imm16); 1826} 1827 1828void Assembler::movw(Register dst, Address src) { 1829 InstructionMark im(this); 1830 emit_byte(0x66); 1831 prefix(src, dst); 1832 emit_byte(0x8B); 1833 emit_operand(dst, src); 1834} 1835 1836void Assembler::movw(Address dst, Register src) { 1837 InstructionMark im(this); 1838 emit_byte(0x66); 1839 prefix(dst, src); 1840 emit_byte(0x89); 1841 emit_operand(src, dst); 1842} 1843 1844void Assembler::movzbl(Register dst, Address src) { // movzxb 1845 InstructionMark im(this); 1846 prefix(src, dst); 1847 emit_byte(0x0F); 1848 emit_byte(0xB6); 1849 emit_operand(dst, src); 1850} 1851 1852void Assembler::movzbl(Register dst, Register src) { // movzxb 1853 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1854 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1855 emit_byte(0x0F); 1856 emit_byte(0xB6); 1857 emit_byte(0xC0 | encode); 1858} 1859 1860void Assembler::movzwl(Register dst, Address src) { // movzxw 1861 InstructionMark im(this); 1862 prefix(src, dst); 1863 emit_byte(0x0F); 1864 emit_byte(0xB7); 1865 emit_operand(dst, src); 1866} 1867 1868void Assembler::movzwl(Register dst, Register src) { // movzxw 1869 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1870 emit_byte(0x0F); 1871 emit_byte(0xB7); 1872 emit_byte(0xC0 | encode); 1873} 1874 1875void Assembler::mull(Address src) { 1876 InstructionMark im(this); 1877 prefix(src); 1878 emit_byte(0xF7); 1879 emit_operand(rsp, src); 1880} 1881 1882void Assembler::mull(Register src) { 1883 int encode = prefix_and_encode(src->encoding()); 1884 emit_byte(0xF7); 1885 emit_byte(0xE0 | encode); 1886} 1887 1888void Assembler::mulsd(XMMRegister dst, Address src) { 1889 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1890 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1891} 1892 1893void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1894 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1895 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1896} 1897 1898void Assembler::mulss(XMMRegister dst, Address src) { 1899 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1900 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1901} 1902 1903void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1904 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1905 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1906} 1907 1908void Assembler::negl(Register dst) { 1909 int encode = prefix_and_encode(dst->encoding()); 1910 emit_byte(0xF7); 1911 emit_byte(0xD8 | encode); 1912} 1913 1914void Assembler::nop(int i) { 1915#ifdef ASSERT 1916 assert(i > 0, " "); 1917 // The fancy nops aren't currently recognized by debuggers making it a 1918 // pain to disassemble code while debugging. If asserts are on clearly 1919 // speed is not an issue so simply use the single byte traditional nop 1920 // to do alignment. 1921 1922 for (; i > 0 ; i--) emit_byte(0x90); 1923 return; 1924 1925#endif // ASSERT 1926 1927 if (UseAddressNop && VM_Version::is_intel()) { 1928 // 1929 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1930 // 1: 0x90 1931 // 2: 0x66 0x90 1932 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1933 // 4: 0x0F 0x1F 0x40 0x00 1934 // 5: 0x0F 0x1F 0x44 0x00 0x00 1935 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1936 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1937 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1938 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1939 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1940 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1941 1942 // The rest coding is Intel specific - don't use consecutive address nops 1943 1944 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1945 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1946 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1947 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1948 1949 while(i >= 15) { 1950 // For Intel don't generate consecutive addess nops (mix with regular nops) 1951 i -= 15; 1952 emit_byte(0x66); // size prefix 1953 emit_byte(0x66); // size prefix 1954 emit_byte(0x66); // size prefix 1955 addr_nop_8(); 1956 emit_byte(0x66); // size prefix 1957 emit_byte(0x66); // size prefix 1958 emit_byte(0x66); // size prefix 1959 emit_byte(0x90); // nop 1960 } 1961 switch (i) { 1962 case 14: 1963 emit_byte(0x66); // size prefix 1964 case 13: 1965 emit_byte(0x66); // size prefix 1966 case 12: 1967 addr_nop_8(); 1968 emit_byte(0x66); // size prefix 1969 emit_byte(0x66); // size prefix 1970 emit_byte(0x66); // size prefix 1971 emit_byte(0x90); // nop 1972 break; 1973 case 11: 1974 emit_byte(0x66); // size prefix 1975 case 10: 1976 emit_byte(0x66); // size prefix 1977 case 9: 1978 emit_byte(0x66); // size prefix 1979 case 8: 1980 addr_nop_8(); 1981 break; 1982 case 7: 1983 addr_nop_7(); 1984 break; 1985 case 6: 1986 emit_byte(0x66); // size prefix 1987 case 5: 1988 addr_nop_5(); 1989 break; 1990 case 4: 1991 addr_nop_4(); 1992 break; 1993 case 3: 1994 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1995 emit_byte(0x66); // size prefix 1996 case 2: 1997 emit_byte(0x66); // size prefix 1998 case 1: 1999 emit_byte(0x90); // nop 2000 break; 2001 default: 2002 assert(i == 0, " "); 2003 } 2004 return; 2005 } 2006 if (UseAddressNop && VM_Version::is_amd()) { 2007 // 2008 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2009 // 1: 0x90 2010 // 2: 0x66 0x90 2011 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2012 // 4: 0x0F 0x1F 0x40 0x00 2013 // 5: 0x0F 0x1F 0x44 0x00 0x00 2014 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2015 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2016 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2017 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2018 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2019 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2020 2021 // The rest coding is AMD specific - use consecutive address nops 2022 2023 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2024 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2025 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2026 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2027 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2028 // Size prefixes (0x66) are added for larger sizes 2029 2030 while(i >= 22) { 2031 i -= 11; 2032 emit_byte(0x66); // size prefix 2033 emit_byte(0x66); // size prefix 2034 emit_byte(0x66); // size prefix 2035 addr_nop_8(); 2036 } 2037 // Generate first nop for size between 21-12 2038 switch (i) { 2039 case 21: 2040 i -= 1; 2041 emit_byte(0x66); // size prefix 2042 case 20: 2043 case 19: 2044 i -= 1; 2045 emit_byte(0x66); // size prefix 2046 case 18: 2047 case 17: 2048 i -= 1; 2049 emit_byte(0x66); // size prefix 2050 case 16: 2051 case 15: 2052 i -= 8; 2053 addr_nop_8(); 2054 break; 2055 case 14: 2056 case 13: 2057 i -= 7; 2058 addr_nop_7(); 2059 break; 2060 case 12: 2061 i -= 6; 2062 emit_byte(0x66); // size prefix 2063 addr_nop_5(); 2064 break; 2065 default: 2066 assert(i < 12, " "); 2067 } 2068 2069 // Generate second nop for size between 11-1 2070 switch (i) { 2071 case 11: 2072 emit_byte(0x66); // size prefix 2073 case 10: 2074 emit_byte(0x66); // size prefix 2075 case 9: 2076 emit_byte(0x66); // size prefix 2077 case 8: 2078 addr_nop_8(); 2079 break; 2080 case 7: 2081 addr_nop_7(); 2082 break; 2083 case 6: 2084 emit_byte(0x66); // size prefix 2085 case 5: 2086 addr_nop_5(); 2087 break; 2088 case 4: 2089 addr_nop_4(); 2090 break; 2091 case 3: 2092 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2093 emit_byte(0x66); // size prefix 2094 case 2: 2095 emit_byte(0x66); // size prefix 2096 case 1: 2097 emit_byte(0x90); // nop 2098 break; 2099 default: 2100 assert(i == 0, " "); 2101 } 2102 return; 2103 } 2104 2105 // Using nops with size prefixes "0x66 0x90". 2106 // From AMD Optimization Guide: 2107 // 1: 0x90 2108 // 2: 0x66 0x90 2109 // 3: 0x66 0x66 0x90 2110 // 4: 0x66 0x66 0x66 0x90 2111 // 5: 0x66 0x66 0x90 0x66 0x90 2112 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2113 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2114 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2115 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2116 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2117 // 2118 while(i > 12) { 2119 i -= 4; 2120 emit_byte(0x66); // size prefix 2121 emit_byte(0x66); 2122 emit_byte(0x66); 2123 emit_byte(0x90); // nop 2124 } 2125 // 1 - 12 nops 2126 if(i > 8) { 2127 if(i > 9) { 2128 i -= 1; 2129 emit_byte(0x66); 2130 } 2131 i -= 3; 2132 emit_byte(0x66); 2133 emit_byte(0x66); 2134 emit_byte(0x90); 2135 } 2136 // 1 - 8 nops 2137 if(i > 4) { 2138 if(i > 6) { 2139 i -= 1; 2140 emit_byte(0x66); 2141 } 2142 i -= 3; 2143 emit_byte(0x66); 2144 emit_byte(0x66); 2145 emit_byte(0x90); 2146 } 2147 switch (i) { 2148 case 4: 2149 emit_byte(0x66); 2150 case 3: 2151 emit_byte(0x66); 2152 case 2: 2153 emit_byte(0x66); 2154 case 1: 2155 emit_byte(0x90); 2156 break; 2157 default: 2158 assert(i == 0, " "); 2159 } 2160} 2161 2162void Assembler::notl(Register dst) { 2163 int encode = prefix_and_encode(dst->encoding()); 2164 emit_byte(0xF7); 2165 emit_byte(0xD0 | encode ); 2166} 2167 2168void Assembler::orl(Address dst, int32_t imm32) { 2169 InstructionMark im(this); 2170 prefix(dst); 2171 emit_arith_operand(0x81, rcx, dst, imm32); 2172} 2173 2174void Assembler::orl(Register dst, int32_t imm32) { 2175 prefix(dst); 2176 emit_arith(0x81, 0xC8, dst, imm32); 2177} 2178 2179void Assembler::orl(Register dst, Address src) { 2180 InstructionMark im(this); 2181 prefix(src, dst); 2182 emit_byte(0x0B); 2183 emit_operand(dst, src); 2184} 2185 2186void Assembler::orl(Register dst, Register src) { 2187 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2188 emit_arith(0x0B, 0xC0, dst, src); 2189} 2190 2191void Assembler::packuswb(XMMRegister dst, Address src) { 2192 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2193 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2194 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2195} 2196 2197void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2199 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2200} 2201 2202void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2203 assert(VM_Version::supports_sse4_2(), ""); 2204 InstructionMark im(this); 2205 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2206 emit_byte(0x61); 2207 emit_operand(dst, src); 2208 emit_byte(imm8); 2209} 2210 2211void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2212 assert(VM_Version::supports_sse4_2(), ""); 2213 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2214 emit_byte(0x61); 2215 emit_byte(0xC0 | encode); 2216 emit_byte(imm8); 2217} 2218 2219void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2220 assert(VM_Version::supports_sse4_1(), ""); 2221 InstructionMark im(this); 2222 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2223 emit_byte(0x30); 2224 emit_operand(dst, src); 2225} 2226 2227void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2228 assert(VM_Version::supports_sse4_1(), ""); 2229 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2230 emit_byte(0x30); 2231 emit_byte(0xC0 | encode); 2232} 2233 2234// generic 2235void Assembler::pop(Register dst) { 2236 int encode = prefix_and_encode(dst->encoding()); 2237 emit_byte(0x58 | encode); 2238} 2239 2240void Assembler::popcntl(Register dst, Address src) { 2241 assert(VM_Version::supports_popcnt(), "must support"); 2242 InstructionMark im(this); 2243 emit_byte(0xF3); 2244 prefix(src, dst); 2245 emit_byte(0x0F); 2246 emit_byte(0xB8); 2247 emit_operand(dst, src); 2248} 2249 2250void Assembler::popcntl(Register dst, Register src) { 2251 assert(VM_Version::supports_popcnt(), "must support"); 2252 emit_byte(0xF3); 2253 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2254 emit_byte(0x0F); 2255 emit_byte(0xB8); 2256 emit_byte(0xC0 | encode); 2257} 2258 2259void Assembler::popf() { 2260 emit_byte(0x9D); 2261} 2262 2263#ifndef _LP64 // no 32bit push/pop on amd64 2264void Assembler::popl(Address dst) { 2265 // NOTE: this will adjust stack by 8byte on 64bits 2266 InstructionMark im(this); 2267 prefix(dst); 2268 emit_byte(0x8F); 2269 emit_operand(rax, dst); 2270} 2271#endif 2272 2273void Assembler::prefetch_prefix(Address src) { 2274 prefix(src); 2275 emit_byte(0x0F); 2276} 2277 2278void Assembler::prefetchnta(Address src) { 2279 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2280 InstructionMark im(this); 2281 prefetch_prefix(src); 2282 emit_byte(0x18); 2283 emit_operand(rax, src); // 0, src 2284} 2285 2286void Assembler::prefetchr(Address src) { 2287 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2288 InstructionMark im(this); 2289 prefetch_prefix(src); 2290 emit_byte(0x0D); 2291 emit_operand(rax, src); // 0, src 2292} 2293 2294void Assembler::prefetcht0(Address src) { 2295 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2296 InstructionMark im(this); 2297 prefetch_prefix(src); 2298 emit_byte(0x18); 2299 emit_operand(rcx, src); // 1, src 2300} 2301 2302void Assembler::prefetcht1(Address src) { 2303 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2304 InstructionMark im(this); 2305 prefetch_prefix(src); 2306 emit_byte(0x18); 2307 emit_operand(rdx, src); // 2, src 2308} 2309 2310void Assembler::prefetcht2(Address src) { 2311 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2312 InstructionMark im(this); 2313 prefetch_prefix(src); 2314 emit_byte(0x18); 2315 emit_operand(rbx, src); // 3, src 2316} 2317 2318void Assembler::prefetchw(Address src) { 2319 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2320 InstructionMark im(this); 2321 prefetch_prefix(src); 2322 emit_byte(0x0D); 2323 emit_operand(rcx, src); // 1, src 2324} 2325 2326void Assembler::prefix(Prefix p) { 2327 a_byte(p); 2328} 2329 2330void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2331 assert(isByte(mode), "invalid value"); 2332 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2333 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); 2334 emit_byte(mode & 0xFF); 2335 2336} 2337 2338void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2339 assert(isByte(mode), "invalid value"); 2340 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2341 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2342 InstructionMark im(this); 2343 simd_prefix(dst, src, VEX_SIMD_66); 2344 emit_byte(0x70); 2345 emit_operand(dst, src); 2346 emit_byte(mode & 0xFF); 2347} 2348 2349void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2350 assert(isByte(mode), "invalid value"); 2351 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2352 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); 2353 emit_byte(mode & 0xFF); 2354} 2355 2356void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2357 assert(isByte(mode), "invalid value"); 2358 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2359 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2360 InstructionMark im(this); 2361 simd_prefix(dst, src, VEX_SIMD_F2); 2362 emit_byte(0x70); 2363 emit_operand(dst, src); 2364 emit_byte(mode & 0xFF); 2365} 2366 2367void Assembler::psrldq(XMMRegister dst, int shift) { 2368 // Shift 128 bit value in xmm register by number of bytes. 2369 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2370 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2371 emit_byte(0x73); 2372 emit_byte(0xC0 | encode); 2373 emit_byte(shift); 2374} 2375 2376void Assembler::ptest(XMMRegister dst, Address src) { 2377 assert(VM_Version::supports_sse4_1(), ""); 2378 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2379 InstructionMark im(this); 2380 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2381 emit_byte(0x17); 2382 emit_operand(dst, src); 2383} 2384 2385void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2386 assert(VM_Version::supports_sse4_1(), ""); 2387 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2388 emit_byte(0x17); 2389 emit_byte(0xC0 | encode); 2390} 2391 2392void Assembler::punpcklbw(XMMRegister dst, Address src) { 2393 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2394 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2395 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2396} 2397 2398void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2399 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2400 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2401} 2402 2403void Assembler::punpckldq(XMMRegister dst, Address src) { 2404 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2405 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2406 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2407} 2408 2409void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2410 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2411 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2412} 2413 2414void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { 2415 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2416 emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); 2417} 2418 2419void Assembler::push(int32_t imm32) { 2420 // in 64bits we push 64bits onto the stack but only 2421 // take a 32bit immediate 2422 emit_byte(0x68); 2423 emit_long(imm32); 2424} 2425 2426void Assembler::push(Register src) { 2427 int encode = prefix_and_encode(src->encoding()); 2428 2429 emit_byte(0x50 | encode); 2430} 2431 2432void Assembler::pushf() { 2433 emit_byte(0x9C); 2434} 2435 2436#ifndef _LP64 // no 32bit push/pop on amd64 2437void Assembler::pushl(Address src) { 2438 // Note this will push 64bit on 64bit 2439 InstructionMark im(this); 2440 prefix(src); 2441 emit_byte(0xFF); 2442 emit_operand(rsi, src); 2443} 2444#endif 2445 2446void Assembler::rcll(Register dst, int imm8) { 2447 assert(isShiftCount(imm8), "illegal shift count"); 2448 int encode = prefix_and_encode(dst->encoding()); 2449 if (imm8 == 1) { 2450 emit_byte(0xD1); 2451 emit_byte(0xD0 | encode); 2452 } else { 2453 emit_byte(0xC1); 2454 emit_byte(0xD0 | encode); 2455 emit_byte(imm8); 2456 } 2457} 2458 2459// copies data from [esi] to [edi] using rcx pointer sized words 2460// generic 2461void Assembler::rep_mov() { 2462 emit_byte(0xF3); 2463 // MOVSQ 2464 LP64_ONLY(prefix(REX_W)); 2465 emit_byte(0xA5); 2466} 2467 2468// sets rcx pointer sized words with rax, value at [edi] 2469// generic 2470void Assembler::rep_set() { // rep_set 2471 emit_byte(0xF3); 2472 // STOSQ 2473 LP64_ONLY(prefix(REX_W)); 2474 emit_byte(0xAB); 2475} 2476 2477// scans rcx pointer sized words at [edi] for occurance of rax, 2478// generic 2479void Assembler::repne_scan() { // repne_scan 2480 emit_byte(0xF2); 2481 // SCASQ 2482 LP64_ONLY(prefix(REX_W)); 2483 emit_byte(0xAF); 2484} 2485 2486#ifdef _LP64 2487// scans rcx 4 byte words at [edi] for occurance of rax, 2488// generic 2489void Assembler::repne_scanl() { // repne_scan 2490 emit_byte(0xF2); 2491 // SCASL 2492 emit_byte(0xAF); 2493} 2494#endif 2495 2496void Assembler::ret(int imm16) { 2497 if (imm16 == 0) { 2498 emit_byte(0xC3); 2499 } else { 2500 emit_byte(0xC2); 2501 emit_word(imm16); 2502 } 2503} 2504 2505void Assembler::sahf() { 2506#ifdef _LP64 2507 // Not supported in 64bit mode 2508 ShouldNotReachHere(); 2509#endif 2510 emit_byte(0x9E); 2511} 2512 2513void Assembler::sarl(Register dst, int imm8) { 2514 int encode = prefix_and_encode(dst->encoding()); 2515 assert(isShiftCount(imm8), "illegal shift count"); 2516 if (imm8 == 1) { 2517 emit_byte(0xD1); 2518 emit_byte(0xF8 | encode); 2519 } else { 2520 emit_byte(0xC1); 2521 emit_byte(0xF8 | encode); 2522 emit_byte(imm8); 2523 } 2524} 2525 2526void Assembler::sarl(Register dst) { 2527 int encode = prefix_and_encode(dst->encoding()); 2528 emit_byte(0xD3); 2529 emit_byte(0xF8 | encode); 2530} 2531 2532void Assembler::sbbl(Address dst, int32_t imm32) { 2533 InstructionMark im(this); 2534 prefix(dst); 2535 emit_arith_operand(0x81, rbx, dst, imm32); 2536} 2537 2538void Assembler::sbbl(Register dst, int32_t imm32) { 2539 prefix(dst); 2540 emit_arith(0x81, 0xD8, dst, imm32); 2541} 2542 2543 2544void Assembler::sbbl(Register dst, Address src) { 2545 InstructionMark im(this); 2546 prefix(src, dst); 2547 emit_byte(0x1B); 2548 emit_operand(dst, src); 2549} 2550 2551void Assembler::sbbl(Register dst, Register src) { 2552 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2553 emit_arith(0x1B, 0xC0, dst, src); 2554} 2555 2556void Assembler::setb(Condition cc, Register dst) { 2557 assert(0 <= cc && cc < 16, "illegal cc"); 2558 int encode = prefix_and_encode(dst->encoding(), true); 2559 emit_byte(0x0F); 2560 emit_byte(0x90 | cc); 2561 emit_byte(0xC0 | encode); 2562} 2563 2564void Assembler::shll(Register dst, int imm8) { 2565 assert(isShiftCount(imm8), "illegal shift count"); 2566 int encode = prefix_and_encode(dst->encoding()); 2567 if (imm8 == 1 ) { 2568 emit_byte(0xD1); 2569 emit_byte(0xE0 | encode); 2570 } else { 2571 emit_byte(0xC1); 2572 emit_byte(0xE0 | encode); 2573 emit_byte(imm8); 2574 } 2575} 2576 2577void Assembler::shll(Register dst) { 2578 int encode = prefix_and_encode(dst->encoding()); 2579 emit_byte(0xD3); 2580 emit_byte(0xE0 | encode); 2581} 2582 2583void Assembler::shrl(Register dst, int imm8) { 2584 assert(isShiftCount(imm8), "illegal shift count"); 2585 int encode = prefix_and_encode(dst->encoding()); 2586 emit_byte(0xC1); 2587 emit_byte(0xE8 | encode); 2588 emit_byte(imm8); 2589} 2590 2591void Assembler::shrl(Register dst) { 2592 int encode = prefix_and_encode(dst->encoding()); 2593 emit_byte(0xD3); 2594 emit_byte(0xE8 | encode); 2595} 2596 2597// copies a single word from [esi] to [edi] 2598void Assembler::smovl() { 2599 emit_byte(0xA5); 2600} 2601 2602void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2603 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2604 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2605} 2606 2607void Assembler::sqrtsd(XMMRegister dst, Address src) { 2608 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2609 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2610} 2611 2612void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2613 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2614 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2615} 2616 2617void Assembler::sqrtss(XMMRegister dst, Address src) { 2618 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2619 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2620} 2621 2622void Assembler::stmxcsr( Address dst) { 2623 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2624 InstructionMark im(this); 2625 prefix(dst); 2626 emit_byte(0x0F); 2627 emit_byte(0xAE); 2628 emit_operand(as_Register(3), dst); 2629} 2630 2631void Assembler::subl(Address dst, int32_t imm32) { 2632 InstructionMark im(this); 2633 prefix(dst); 2634 emit_arith_operand(0x81, rbp, dst, imm32); 2635} 2636 2637void Assembler::subl(Address dst, Register src) { 2638 InstructionMark im(this); 2639 prefix(dst, src); 2640 emit_byte(0x29); 2641 emit_operand(src, dst); 2642} 2643 2644void Assembler::subl(Register dst, int32_t imm32) { 2645 prefix(dst); 2646 emit_arith(0x81, 0xE8, dst, imm32); 2647} 2648 2649// Force generation of a 4 byte immediate value even if it fits into 8bit 2650void Assembler::subl_imm32(Register dst, int32_t imm32) { 2651 prefix(dst); 2652 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2653} 2654 2655void Assembler::subl(Register dst, Address src) { 2656 InstructionMark im(this); 2657 prefix(src, dst); 2658 emit_byte(0x2B); 2659 emit_operand(dst, src); 2660} 2661 2662void Assembler::subl(Register dst, Register src) { 2663 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2664 emit_arith(0x2B, 0xC0, dst, src); 2665} 2666 2667void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2668 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2669 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2670} 2671 2672void Assembler::subsd(XMMRegister dst, Address src) { 2673 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2674 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2675} 2676 2677void Assembler::subss(XMMRegister dst, XMMRegister src) { 2678 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2679 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2680} 2681 2682void Assembler::subss(XMMRegister dst, Address src) { 2683 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2684 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2685} 2686 2687void Assembler::testb(Register dst, int imm8) { 2688 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2689 (void) prefix_and_encode(dst->encoding(), true); 2690 emit_arith_b(0xF6, 0xC0, dst, imm8); 2691} 2692 2693void Assembler::testl(Register dst, int32_t imm32) { 2694 // not using emit_arith because test 2695 // doesn't support sign-extension of 2696 // 8bit operands 2697 int encode = dst->encoding(); 2698 if (encode == 0) { 2699 emit_byte(0xA9); 2700 } else { 2701 encode = prefix_and_encode(encode); 2702 emit_byte(0xF7); 2703 emit_byte(0xC0 | encode); 2704 } 2705 emit_long(imm32); 2706} 2707 2708void Assembler::testl(Register dst, Register src) { 2709 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2710 emit_arith(0x85, 0xC0, dst, src); 2711} 2712 2713void Assembler::testl(Register dst, Address src) { 2714 InstructionMark im(this); 2715 prefix(src, dst); 2716 emit_byte(0x85); 2717 emit_operand(dst, src); 2718} 2719 2720void Assembler::ucomisd(XMMRegister dst, Address src) { 2721 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2722 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2723} 2724 2725void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2726 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2727 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2728} 2729 2730void Assembler::ucomiss(XMMRegister dst, Address src) { 2731 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2732 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2733} 2734 2735void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2736 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2737 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2738} 2739 2740 2741void Assembler::xaddl(Address dst, Register src) { 2742 InstructionMark im(this); 2743 prefix(dst, src); 2744 emit_byte(0x0F); 2745 emit_byte(0xC1); 2746 emit_operand(src, dst); 2747} 2748 2749void Assembler::xchgl(Register dst, Address src) { // xchg 2750 InstructionMark im(this); 2751 prefix(src, dst); 2752 emit_byte(0x87); 2753 emit_operand(dst, src); 2754} 2755 2756void Assembler::xchgl(Register dst, Register src) { 2757 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2758 emit_byte(0x87); 2759 emit_byte(0xc0 | encode); 2760} 2761 2762void Assembler::xorl(Register dst, int32_t imm32) { 2763 prefix(dst); 2764 emit_arith(0x81, 0xF0, dst, imm32); 2765} 2766 2767void Assembler::xorl(Register dst, Address src) { 2768 InstructionMark im(this); 2769 prefix(src, dst); 2770 emit_byte(0x33); 2771 emit_operand(dst, src); 2772} 2773 2774void Assembler::xorl(Register dst, Register src) { 2775 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2776 emit_arith(0x33, 0xC0, dst, src); 2777} 2778 2779 2780// AVX 3-operands scalar float-point arithmetic instructions 2781 2782void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2783 assert(VM_Version::supports_avx(), ""); 2784 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2785} 2786 2787void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2788 assert(VM_Version::supports_avx(), ""); 2789 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2790} 2791 2792void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2793 assert(VM_Version::supports_avx(), ""); 2794 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2795} 2796 2797void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2798 assert(VM_Version::supports_avx(), ""); 2799 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2800} 2801 2802void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 2803 assert(VM_Version::supports_avx(), ""); 2804 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2805} 2806 2807void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2808 assert(VM_Version::supports_avx(), ""); 2809 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2810} 2811 2812void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 2813 assert(VM_Version::supports_avx(), ""); 2814 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2815} 2816 2817void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2818 assert(VM_Version::supports_avx(), ""); 2819 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2820} 2821 2822void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 2823 assert(VM_Version::supports_avx(), ""); 2824 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2825} 2826 2827void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2828 assert(VM_Version::supports_avx(), ""); 2829 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2830} 2831 2832void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 2833 assert(VM_Version::supports_avx(), ""); 2834 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2835} 2836 2837void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2838 assert(VM_Version::supports_avx(), ""); 2839 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2840} 2841 2842void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 2843 assert(VM_Version::supports_avx(), ""); 2844 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2845} 2846 2847void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2848 assert(VM_Version::supports_avx(), ""); 2849 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2850} 2851 2852void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 2853 assert(VM_Version::supports_avx(), ""); 2854 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2855} 2856 2857void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2858 assert(VM_Version::supports_avx(), ""); 2859 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2860} 2861 2862//====================VECTOR ARITHMETIC===================================== 2863 2864// Float-point vector arithmetic 2865 2866void Assembler::addpd(XMMRegister dst, XMMRegister src) { 2867 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2868 emit_simd_arith(0x58, dst, src, VEX_SIMD_66); 2869} 2870 2871void Assembler::addps(XMMRegister dst, XMMRegister src) { 2872 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2873 emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); 2874} 2875 2876void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2877 assert(VM_Version::supports_avx(), ""); 2878 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2879} 2880 2881void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2882 assert(VM_Version::supports_avx(), ""); 2883 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2884} 2885 2886void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2887 assert(VM_Version::supports_avx(), ""); 2888 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2889} 2890 2891void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2892 assert(VM_Version::supports_avx(), ""); 2893 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2894} 2895 2896void Assembler::subpd(XMMRegister dst, XMMRegister src) { 2897 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2898 emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); 2899} 2900 2901void Assembler::subps(XMMRegister dst, XMMRegister src) { 2902 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2903 emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); 2904} 2905 2906void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2907 assert(VM_Version::supports_avx(), ""); 2908 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2909} 2910 2911void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2912 assert(VM_Version::supports_avx(), ""); 2913 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2914} 2915 2916void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2917 assert(VM_Version::supports_avx(), ""); 2918 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2919} 2920 2921void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2922 assert(VM_Version::supports_avx(), ""); 2923 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2924} 2925 2926void Assembler::mulpd(XMMRegister dst, XMMRegister src) { 2927 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2928 emit_simd_arith(0x59, dst, src, VEX_SIMD_66); 2929} 2930 2931void Assembler::mulps(XMMRegister dst, XMMRegister src) { 2932 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2933 emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); 2934} 2935 2936void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2937 assert(VM_Version::supports_avx(), ""); 2938 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2939} 2940 2941void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2942 assert(VM_Version::supports_avx(), ""); 2943 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2944} 2945 2946void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2947 assert(VM_Version::supports_avx(), ""); 2948 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2949} 2950 2951void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2952 assert(VM_Version::supports_avx(), ""); 2953 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2954} 2955 2956void Assembler::divpd(XMMRegister dst, XMMRegister src) { 2957 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2958 emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); 2959} 2960 2961void Assembler::divps(XMMRegister dst, XMMRegister src) { 2962 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2963 emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); 2964} 2965 2966void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2967 assert(VM_Version::supports_avx(), ""); 2968 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2969} 2970 2971void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2972 assert(VM_Version::supports_avx(), ""); 2973 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2974} 2975 2976void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2977 assert(VM_Version::supports_avx(), ""); 2978 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2979} 2980 2981void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2982 assert(VM_Version::supports_avx(), ""); 2983 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2984} 2985 2986void Assembler::andpd(XMMRegister dst, XMMRegister src) { 2987 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2988 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 2989} 2990 2991void Assembler::andps(XMMRegister dst, XMMRegister src) { 2992 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2993 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2994} 2995 2996void Assembler::andps(XMMRegister dst, Address src) { 2997 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2998 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2999} 3000 3001void Assembler::andpd(XMMRegister dst, Address src) { 3002 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3003 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 3004} 3005 3006void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3007 assert(VM_Version::supports_avx(), ""); 3008 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3009} 3010 3011void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3012 assert(VM_Version::supports_avx(), ""); 3013 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3014} 3015 3016void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3017 assert(VM_Version::supports_avx(), ""); 3018 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3019} 3020 3021void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3022 assert(VM_Version::supports_avx(), ""); 3023 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3024} 3025 3026void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 3027 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3028 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3029} 3030 3031void Assembler::xorps(XMMRegister dst, XMMRegister src) { 3032 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3033 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3034} 3035 3036void Assembler::xorpd(XMMRegister dst, Address src) { 3037 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3038 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3039} 3040 3041void Assembler::xorps(XMMRegister dst, Address src) { 3042 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3043 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3044} 3045 3046void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3047 assert(VM_Version::supports_avx(), ""); 3048 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3049} 3050 3051void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3052 assert(VM_Version::supports_avx(), ""); 3053 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3054} 3055 3056void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3057 assert(VM_Version::supports_avx(), ""); 3058 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3059} 3060 3061void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3062 assert(VM_Version::supports_avx(), ""); 3063 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3064} 3065 3066 3067// Integer vector arithmetic 3068void Assembler::paddb(XMMRegister dst, XMMRegister src) { 3069 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3070 emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); 3071} 3072 3073void Assembler::paddw(XMMRegister dst, XMMRegister src) { 3074 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3075 emit_simd_arith(0xFD, dst, src, VEX_SIMD_66); 3076} 3077 3078void Assembler::paddd(XMMRegister dst, XMMRegister src) { 3079 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3080 emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); 3081} 3082 3083void Assembler::paddq(XMMRegister dst, XMMRegister src) { 3084 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3085 emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); 3086} 3087 3088void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3089 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3090 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3091} 3092 3093void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3094 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3095 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3096} 3097 3098void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3099 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3100 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3101} 3102 3103void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3104 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3105 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3106} 3107 3108void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3109 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3110 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3111} 3112 3113void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3114 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3115 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3116} 3117 3118void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3119 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3120 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3121} 3122 3123void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3124 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3125 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3126} 3127 3128void Assembler::psubb(XMMRegister dst, XMMRegister src) { 3129 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3130 emit_simd_arith(0xF8, dst, src, VEX_SIMD_66); 3131} 3132 3133void Assembler::psubw(XMMRegister dst, XMMRegister src) { 3134 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3135 emit_simd_arith(0xF9, dst, src, VEX_SIMD_66); 3136} 3137 3138void Assembler::psubd(XMMRegister dst, XMMRegister src) { 3139 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3140 emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); 3141} 3142 3143void Assembler::psubq(XMMRegister dst, XMMRegister src) { 3144 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3145 emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); 3146} 3147 3148void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3149 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3150 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3151} 3152 3153void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3154 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3155 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3156} 3157 3158void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3159 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3160 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3161} 3162 3163void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3164 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3165 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3166} 3167 3168void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3169 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3170 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3171} 3172 3173void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3174 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3175 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3176} 3177 3178void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3179 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3180 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3181} 3182 3183void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3184 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3185 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3186} 3187 3188void Assembler::pmullw(XMMRegister dst, XMMRegister src) { 3189 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3190 emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); 3191} 3192 3193void Assembler::pmulld(XMMRegister dst, XMMRegister src) { 3194 assert(VM_Version::supports_sse4_1(), ""); 3195 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 3196 emit_byte(0x40); 3197 emit_byte(0xC0 | encode); 3198} 3199 3200void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3201 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3202 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3203} 3204 3205void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3206 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3207 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); 3208 emit_byte(0x40); 3209 emit_byte(0xC0 | encode); 3210} 3211 3212void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3213 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3214 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3215} 3216 3217void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3218 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3219 InstructionMark im(this); 3220 int dst_enc = dst->encoding(); 3221 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3222 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); 3223 emit_byte(0x40); 3224 emit_operand(dst, src); 3225} 3226 3227// Shift packed integers left by specified number of bits. 3228void Assembler::psllw(XMMRegister dst, int shift) { 3229 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3230 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3231 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3232 emit_byte(0x71); 3233 emit_byte(0xC0 | encode); 3234 emit_byte(shift & 0xFF); 3235} 3236 3237void Assembler::pslld(XMMRegister dst, int shift) { 3238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3239 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3240 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3241 emit_byte(0x72); 3242 emit_byte(0xC0 | encode); 3243 emit_byte(shift & 0xFF); 3244} 3245 3246void Assembler::psllq(XMMRegister dst, int shift) { 3247 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3248 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3249 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3250 emit_byte(0x73); 3251 emit_byte(0xC0 | encode); 3252 emit_byte(shift & 0xFF); 3253} 3254 3255void Assembler::psllw(XMMRegister dst, XMMRegister shift) { 3256 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3257 emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); 3258} 3259 3260void Assembler::pslld(XMMRegister dst, XMMRegister shift) { 3261 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3262 emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); 3263} 3264 3265void Assembler::psllq(XMMRegister dst, XMMRegister shift) { 3266 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3267 emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); 3268} 3269 3270void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3271 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3272 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3273 emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); 3274 emit_byte(shift & 0xFF); 3275} 3276 3277void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3278 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3279 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3280 emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); 3281 emit_byte(shift & 0xFF); 3282} 3283 3284void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3285 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3286 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3287 emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); 3288 emit_byte(shift & 0xFF); 3289} 3290 3291void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3292 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3293 emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); 3294} 3295 3296void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3297 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3298 emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); 3299} 3300 3301void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3302 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3303 emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); 3304} 3305 3306// Shift packed integers logically right by specified number of bits. 3307void Assembler::psrlw(XMMRegister dst, int shift) { 3308 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3309 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 3310 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3311 emit_byte(0x71); 3312 emit_byte(0xC0 | encode); 3313 emit_byte(shift & 0xFF); 3314} 3315 3316void Assembler::psrld(XMMRegister dst, int shift) { 3317 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3318 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 3319 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3320 emit_byte(0x72); 3321 emit_byte(0xC0 | encode); 3322 emit_byte(shift & 0xFF); 3323} 3324 3325void Assembler::psrlq(XMMRegister dst, int shift) { 3326 // Do not confuse it with psrldq SSE2 instruction which 3327 // shifts 128 bit value in xmm register by number of bytes. 3328 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3329 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3330 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3331 emit_byte(0x73); 3332 emit_byte(0xC0 | encode); 3333 emit_byte(shift & 0xFF); 3334} 3335 3336void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { 3337 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3338 emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); 3339} 3340 3341void Assembler::psrld(XMMRegister dst, XMMRegister shift) { 3342 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3343 emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); 3344} 3345 3346void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { 3347 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3348 emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); 3349} 3350 3351void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3352 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3353 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3354 emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); 3355 emit_byte(shift & 0xFF); 3356} 3357 3358void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3359 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3360 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3361 emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); 3362 emit_byte(shift & 0xFF); 3363} 3364 3365void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3366 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3367 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3368 emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); 3369 emit_byte(shift & 0xFF); 3370} 3371 3372void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3373 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3374 emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); 3375} 3376 3377void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3378 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3379 emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); 3380} 3381 3382void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3383 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3384 emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); 3385} 3386 3387// Shift packed integers arithmetically right by specified number of bits. 3388void Assembler::psraw(XMMRegister dst, int shift) { 3389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3390 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3391 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3392 emit_byte(0x71); 3393 emit_byte(0xC0 | encode); 3394 emit_byte(shift & 0xFF); 3395} 3396 3397void Assembler::psrad(XMMRegister dst, int shift) { 3398 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3399 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 3400 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3401 emit_byte(0x72); 3402 emit_byte(0xC0 | encode); 3403 emit_byte(shift & 0xFF); 3404} 3405 3406void Assembler::psraw(XMMRegister dst, XMMRegister shift) { 3407 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3408 emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); 3409} 3410 3411void Assembler::psrad(XMMRegister dst, XMMRegister shift) { 3412 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3413 emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); 3414} 3415 3416void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3417 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3418 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3419 emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); 3420 emit_byte(shift & 0xFF); 3421} 3422 3423void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3424 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3425 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3426 emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); 3427 emit_byte(shift & 0xFF); 3428} 3429 3430void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3431 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3432 emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); 3433} 3434 3435void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3436 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3437 emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); 3438} 3439 3440 3441// AND packed integers 3442void Assembler::pand(XMMRegister dst, XMMRegister src) { 3443 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3444 emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); 3445} 3446 3447void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3448 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3449 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3450} 3451 3452void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3453 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3454 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3455} 3456 3457void Assembler::por(XMMRegister dst, XMMRegister src) { 3458 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3459 emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); 3460} 3461 3462void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3463 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3464 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3465} 3466 3467void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3468 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3469 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3470} 3471 3472void Assembler::pxor(XMMRegister dst, XMMRegister src) { 3473 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3474 emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); 3475} 3476 3477void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3478 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3479 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3480} 3481 3482void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3483 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3484 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3485} 3486 3487 3488void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3489 assert(VM_Version::supports_avx(), ""); 3490 bool vector256 = true; 3491 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3492 emit_byte(0x18); 3493 emit_byte(0xC0 | encode); 3494 // 0x00 - insert into lower 128 bits 3495 // 0x01 - insert into upper 128 bits 3496 emit_byte(0x01); 3497} 3498 3499void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3500 assert(VM_Version::supports_avx2(), ""); 3501 bool vector256 = true; 3502 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3503 emit_byte(0x38); 3504 emit_byte(0xC0 | encode); 3505 // 0x00 - insert into lower 128 bits 3506 // 0x01 - insert into upper 128 bits 3507 emit_byte(0x01); 3508} 3509 3510void Assembler::vzeroupper() { 3511 assert(VM_Version::supports_avx(), ""); 3512 (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); 3513 emit_byte(0x77); 3514} 3515 3516 3517#ifndef _LP64 3518// 32bit only pieces of the assembler 3519 3520void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3521 // NO PREFIX AS NEVER 64BIT 3522 InstructionMark im(this); 3523 emit_byte(0x81); 3524 emit_byte(0xF8 | src1->encoding()); 3525 emit_data(imm32, rspec, 0); 3526} 3527 3528void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3529 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3530 InstructionMark im(this); 3531 emit_byte(0x81); 3532 emit_operand(rdi, src1); 3533 emit_data(imm32, rspec, 0); 3534} 3535 3536// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3537// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3538// into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3539void Assembler::cmpxchg8(Address adr) { 3540 InstructionMark im(this); 3541 emit_byte(0x0F); 3542 emit_byte(0xc7); 3543 emit_operand(rcx, adr); 3544} 3545 3546void Assembler::decl(Register dst) { 3547 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3548 emit_byte(0x48 | dst->encoding()); 3549} 3550 3551#endif // _LP64 3552 3553// 64bit typically doesn't use the x87 but needs to for the trig funcs 3554 3555void Assembler::fabs() { 3556 emit_byte(0xD9); 3557 emit_byte(0xE1); 3558} 3559 3560void Assembler::fadd(int i) { 3561 emit_farith(0xD8, 0xC0, i); 3562} 3563 3564void Assembler::fadd_d(Address src) { 3565 InstructionMark im(this); 3566 emit_byte(0xDC); 3567 emit_operand32(rax, src); 3568} 3569 3570void Assembler::fadd_s(Address src) { 3571 InstructionMark im(this); 3572 emit_byte(0xD8); 3573 emit_operand32(rax, src); 3574} 3575 3576void Assembler::fadda(int i) { 3577 emit_farith(0xDC, 0xC0, i); 3578} 3579 3580void Assembler::faddp(int i) { 3581 emit_farith(0xDE, 0xC0, i); 3582} 3583 3584void Assembler::fchs() { 3585 emit_byte(0xD9); 3586 emit_byte(0xE0); 3587} 3588 3589void Assembler::fcom(int i) { 3590 emit_farith(0xD8, 0xD0, i); 3591} 3592 3593void Assembler::fcomp(int i) { 3594 emit_farith(0xD8, 0xD8, i); 3595} 3596 3597void Assembler::fcomp_d(Address src) { 3598 InstructionMark im(this); 3599 emit_byte(0xDC); 3600 emit_operand32(rbx, src); 3601} 3602 3603void Assembler::fcomp_s(Address src) { 3604 InstructionMark im(this); 3605 emit_byte(0xD8); 3606 emit_operand32(rbx, src); 3607} 3608 3609void Assembler::fcompp() { 3610 emit_byte(0xDE); 3611 emit_byte(0xD9); 3612} 3613 3614void Assembler::fcos() { 3615 emit_byte(0xD9); 3616 emit_byte(0xFF); 3617} 3618 3619void Assembler::fdecstp() { 3620 emit_byte(0xD9); 3621 emit_byte(0xF6); 3622} 3623 3624void Assembler::fdiv(int i) { 3625 emit_farith(0xD8, 0xF0, i); 3626} 3627 3628void Assembler::fdiv_d(Address src) { 3629 InstructionMark im(this); 3630 emit_byte(0xDC); 3631 emit_operand32(rsi, src); 3632} 3633 3634void Assembler::fdiv_s(Address src) { 3635 InstructionMark im(this); 3636 emit_byte(0xD8); 3637 emit_operand32(rsi, src); 3638} 3639 3640void Assembler::fdiva(int i) { 3641 emit_farith(0xDC, 0xF8, i); 3642} 3643 3644// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3645// is erroneous for some of the floating-point instructions below. 3646 3647void Assembler::fdivp(int i) { 3648 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3649} 3650 3651void Assembler::fdivr(int i) { 3652 emit_farith(0xD8, 0xF8, i); 3653} 3654 3655void Assembler::fdivr_d(Address src) { 3656 InstructionMark im(this); 3657 emit_byte(0xDC); 3658 emit_operand32(rdi, src); 3659} 3660 3661void Assembler::fdivr_s(Address src) { 3662 InstructionMark im(this); 3663 emit_byte(0xD8); 3664 emit_operand32(rdi, src); 3665} 3666 3667void Assembler::fdivra(int i) { 3668 emit_farith(0xDC, 0xF0, i); 3669} 3670 3671void Assembler::fdivrp(int i) { 3672 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3673} 3674 3675void Assembler::ffree(int i) { 3676 emit_farith(0xDD, 0xC0, i); 3677} 3678 3679void Assembler::fild_d(Address adr) { 3680 InstructionMark im(this); 3681 emit_byte(0xDF); 3682 emit_operand32(rbp, adr); 3683} 3684 3685void Assembler::fild_s(Address adr) { 3686 InstructionMark im(this); 3687 emit_byte(0xDB); 3688 emit_operand32(rax, adr); 3689} 3690 3691void Assembler::fincstp() { 3692 emit_byte(0xD9); 3693 emit_byte(0xF7); 3694} 3695 3696void Assembler::finit() { 3697 emit_byte(0x9B); 3698 emit_byte(0xDB); 3699 emit_byte(0xE3); 3700} 3701 3702void Assembler::fist_s(Address adr) { 3703 InstructionMark im(this); 3704 emit_byte(0xDB); 3705 emit_operand32(rdx, adr); 3706} 3707 3708void Assembler::fistp_d(Address adr) { 3709 InstructionMark im(this); 3710 emit_byte(0xDF); 3711 emit_operand32(rdi, adr); 3712} 3713 3714void Assembler::fistp_s(Address adr) { 3715 InstructionMark im(this); 3716 emit_byte(0xDB); 3717 emit_operand32(rbx, adr); 3718} 3719 3720void Assembler::fld1() { 3721 emit_byte(0xD9); 3722 emit_byte(0xE8); 3723} 3724 3725void Assembler::fld_d(Address adr) { 3726 InstructionMark im(this); 3727 emit_byte(0xDD); 3728 emit_operand32(rax, adr); 3729} 3730 3731void Assembler::fld_s(Address adr) { 3732 InstructionMark im(this); 3733 emit_byte(0xD9); 3734 emit_operand32(rax, adr); 3735} 3736 3737 3738void Assembler::fld_s(int index) { 3739 emit_farith(0xD9, 0xC0, index); 3740} 3741 3742void Assembler::fld_x(Address adr) { 3743 InstructionMark im(this); 3744 emit_byte(0xDB); 3745 emit_operand32(rbp, adr); 3746} 3747 3748void Assembler::fldcw(Address src) { 3749 InstructionMark im(this); 3750 emit_byte(0xd9); 3751 emit_operand32(rbp, src); 3752} 3753 3754void Assembler::fldenv(Address src) { 3755 InstructionMark im(this); 3756 emit_byte(0xD9); 3757 emit_operand32(rsp, src); 3758} 3759 3760void Assembler::fldlg2() { 3761 emit_byte(0xD9); 3762 emit_byte(0xEC); 3763} 3764 3765void Assembler::fldln2() { 3766 emit_byte(0xD9); 3767 emit_byte(0xED); 3768} 3769 3770void Assembler::fldz() { 3771 emit_byte(0xD9); 3772 emit_byte(0xEE); 3773} 3774 3775void Assembler::flog() { 3776 fldln2(); 3777 fxch(); 3778 fyl2x(); 3779} 3780 3781void Assembler::flog10() { 3782 fldlg2(); 3783 fxch(); 3784 fyl2x(); 3785} 3786 3787void Assembler::fmul(int i) { 3788 emit_farith(0xD8, 0xC8, i); 3789} 3790 3791void Assembler::fmul_d(Address src) { 3792 InstructionMark im(this); 3793 emit_byte(0xDC); 3794 emit_operand32(rcx, src); 3795} 3796 3797void Assembler::fmul_s(Address src) { 3798 InstructionMark im(this); 3799 emit_byte(0xD8); 3800 emit_operand32(rcx, src); 3801} 3802 3803void Assembler::fmula(int i) { 3804 emit_farith(0xDC, 0xC8, i); 3805} 3806 3807void Assembler::fmulp(int i) { 3808 emit_farith(0xDE, 0xC8, i); 3809} 3810 3811void Assembler::fnsave(Address dst) { 3812 InstructionMark im(this); 3813 emit_byte(0xDD); 3814 emit_operand32(rsi, dst); 3815} 3816 3817void Assembler::fnstcw(Address src) { 3818 InstructionMark im(this); 3819 emit_byte(0x9B); 3820 emit_byte(0xD9); 3821 emit_operand32(rdi, src); 3822} 3823 3824void Assembler::fnstsw_ax() { 3825 emit_byte(0xdF); 3826 emit_byte(0xE0); 3827} 3828 3829void Assembler::fprem() { 3830 emit_byte(0xD9); 3831 emit_byte(0xF8); 3832} 3833 3834void Assembler::fprem1() { 3835 emit_byte(0xD9); 3836 emit_byte(0xF5); 3837} 3838 3839void Assembler::frstor(Address src) { 3840 InstructionMark im(this); 3841 emit_byte(0xDD); 3842 emit_operand32(rsp, src); 3843} 3844 3845void Assembler::fsin() { 3846 emit_byte(0xD9); 3847 emit_byte(0xFE); 3848} 3849 3850void Assembler::fsqrt() { 3851 emit_byte(0xD9); 3852 emit_byte(0xFA); 3853} 3854 3855void Assembler::fst_d(Address adr) { 3856 InstructionMark im(this); 3857 emit_byte(0xDD); 3858 emit_operand32(rdx, adr); 3859} 3860 3861void Assembler::fst_s(Address adr) { 3862 InstructionMark im(this); 3863 emit_byte(0xD9); 3864 emit_operand32(rdx, adr); 3865} 3866 3867void Assembler::fstp_d(Address adr) { 3868 InstructionMark im(this); 3869 emit_byte(0xDD); 3870 emit_operand32(rbx, adr); 3871} 3872 3873void Assembler::fstp_d(int index) { 3874 emit_farith(0xDD, 0xD8, index); 3875} 3876 3877void Assembler::fstp_s(Address adr) { 3878 InstructionMark im(this); 3879 emit_byte(0xD9); 3880 emit_operand32(rbx, adr); 3881} 3882 3883void Assembler::fstp_x(Address adr) { 3884 InstructionMark im(this); 3885 emit_byte(0xDB); 3886 emit_operand32(rdi, adr); 3887} 3888 3889void Assembler::fsub(int i) { 3890 emit_farith(0xD8, 0xE0, i); 3891} 3892 3893void Assembler::fsub_d(Address src) { 3894 InstructionMark im(this); 3895 emit_byte(0xDC); 3896 emit_operand32(rsp, src); 3897} 3898 3899void Assembler::fsub_s(Address src) { 3900 InstructionMark im(this); 3901 emit_byte(0xD8); 3902 emit_operand32(rsp, src); 3903} 3904 3905void Assembler::fsuba(int i) { 3906 emit_farith(0xDC, 0xE8, i); 3907} 3908 3909void Assembler::fsubp(int i) { 3910 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3911} 3912 3913void Assembler::fsubr(int i) { 3914 emit_farith(0xD8, 0xE8, i); 3915} 3916 3917void Assembler::fsubr_d(Address src) { 3918 InstructionMark im(this); 3919 emit_byte(0xDC); 3920 emit_operand32(rbp, src); 3921} 3922 3923void Assembler::fsubr_s(Address src) { 3924 InstructionMark im(this); 3925 emit_byte(0xD8); 3926 emit_operand32(rbp, src); 3927} 3928 3929void Assembler::fsubra(int i) { 3930 emit_farith(0xDC, 0xE0, i); 3931} 3932 3933void Assembler::fsubrp(int i) { 3934 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3935} 3936 3937void Assembler::ftan() { 3938 emit_byte(0xD9); 3939 emit_byte(0xF2); 3940 emit_byte(0xDD); 3941 emit_byte(0xD8); 3942} 3943 3944void Assembler::ftst() { 3945 emit_byte(0xD9); 3946 emit_byte(0xE4); 3947} 3948 3949void Assembler::fucomi(int i) { 3950 // make sure the instruction is supported (introduced for P6, together with cmov) 3951 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3952 emit_farith(0xDB, 0xE8, i); 3953} 3954 3955void Assembler::fucomip(int i) { 3956 // make sure the instruction is supported (introduced for P6, together with cmov) 3957 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3958 emit_farith(0xDF, 0xE8, i); 3959} 3960 3961void Assembler::fwait() { 3962 emit_byte(0x9B); 3963} 3964 3965void Assembler::fxch(int i) { 3966 emit_farith(0xD9, 0xC8, i); 3967} 3968 3969void Assembler::fyl2x() { 3970 emit_byte(0xD9); 3971 emit_byte(0xF1); 3972} 3973 3974void Assembler::frndint() { 3975 emit_byte(0xD9); 3976 emit_byte(0xFC); 3977} 3978 3979void Assembler::f2xm1() { 3980 emit_byte(0xD9); 3981 emit_byte(0xF0); 3982} 3983 3984void Assembler::fldl2e() { 3985 emit_byte(0xD9); 3986 emit_byte(0xEA); 3987} 3988 3989// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 3990static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 3991// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 3992static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 3993 3994// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 3995void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3996 if (pre > 0) { 3997 emit_byte(simd_pre[pre]); 3998 } 3999 if (rex_w) { 4000 prefixq(adr, xreg); 4001 } else { 4002 prefix(adr, xreg); 4003 } 4004 if (opc > 0) { 4005 emit_byte(0x0F); 4006 int opc2 = simd_opc[opc]; 4007 if (opc2 > 0) { 4008 emit_byte(opc2); 4009 } 4010 } 4011} 4012 4013int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4014 if (pre > 0) { 4015 emit_byte(simd_pre[pre]); 4016 } 4017 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 4018 prefix_and_encode(dst_enc, src_enc); 4019 if (opc > 0) { 4020 emit_byte(0x0F); 4021 int opc2 = simd_opc[opc]; 4022 if (opc2 > 0) { 4023 emit_byte(opc2); 4024 } 4025 } 4026 return encode; 4027} 4028 4029 4030void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 4031 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 4032 prefix(VEX_3bytes); 4033 4034 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 4035 byte1 = (~byte1) & 0xE0; 4036 byte1 |= opc; 4037 a_byte(byte1); 4038 4039 int byte2 = ((~nds_enc) & 0xf) << 3; 4040 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 4041 emit_byte(byte2); 4042 } else { 4043 prefix(VEX_2bytes); 4044 4045 int byte1 = vex_r ? VEX_R : 0; 4046 byte1 = (~byte1) & 0x80; 4047 byte1 |= ((~nds_enc) & 0xf) << 3; 4048 byte1 |= (vector256 ? 4 : 0) | pre; 4049 emit_byte(byte1); 4050 } 4051} 4052 4053void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 4054 bool vex_r = (xreg_enc >= 8); 4055 bool vex_b = adr.base_needs_rex(); 4056 bool vex_x = adr.index_needs_rex(); 4057 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4058} 4059 4060int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 4061 bool vex_r = (dst_enc >= 8); 4062 bool vex_b = (src_enc >= 8); 4063 bool vex_x = false; 4064 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4065 return (((dst_enc & 7) << 3) | (src_enc & 7)); 4066} 4067 4068 4069void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4070 if (UseAVX > 0) { 4071 int xreg_enc = xreg->encoding(); 4072 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4073 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 4074 } else { 4075 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 4076 rex_prefix(adr, xreg, pre, opc, rex_w); 4077 } 4078} 4079 4080int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4081 int dst_enc = dst->encoding(); 4082 int src_enc = src->encoding(); 4083 if (UseAVX > 0) { 4084 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4085 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 4086 } else { 4087 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 4088 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 4089 } 4090} 4091 4092void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4093 InstructionMark im(this); 4094 simd_prefix(dst, dst, src, pre); 4095 emit_byte(opcode); 4096 emit_operand(dst, src); 4097} 4098 4099void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4100 int encode = simd_prefix_and_encode(dst, dst, src, pre); 4101 emit_byte(opcode); 4102 emit_byte(0xC0 | encode); 4103} 4104 4105// Versions with no second source register (non-destructive source). 4106void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4107 InstructionMark im(this); 4108 simd_prefix(dst, xnoreg, src, pre); 4109 emit_byte(opcode); 4110 emit_operand(dst, src); 4111} 4112 4113void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4114 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); 4115 emit_byte(opcode); 4116 emit_byte(0xC0 | encode); 4117} 4118 4119// 3-operands AVX instructions 4120void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4121 Address src, VexSimdPrefix pre, bool vector256) { 4122 InstructionMark im(this); 4123 vex_prefix(dst, nds, src, pre, vector256); 4124 emit_byte(opcode); 4125 emit_operand(dst, src); 4126} 4127 4128void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4129 XMMRegister src, VexSimdPrefix pre, bool vector256) { 4130 int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); 4131 emit_byte(opcode); 4132 emit_byte(0xC0 | encode); 4133} 4134 4135#ifndef _LP64 4136 4137void Assembler::incl(Register dst) { 4138 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4139 emit_byte(0x40 | dst->encoding()); 4140} 4141 4142void Assembler::lea(Register dst, Address src) { 4143 leal(dst, src); 4144} 4145 4146void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4147 InstructionMark im(this); 4148 emit_byte(0xC7); 4149 emit_operand(rax, dst); 4150 emit_data((int)imm32, rspec, 0); 4151} 4152 4153void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4154 InstructionMark im(this); 4155 int encode = prefix_and_encode(dst->encoding()); 4156 emit_byte(0xB8 | encode); 4157 emit_data((int)imm32, rspec, 0); 4158} 4159 4160void Assembler::popa() { // 32bit 4161 emit_byte(0x61); 4162} 4163 4164void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 4165 InstructionMark im(this); 4166 emit_byte(0x68); 4167 emit_data(imm32, rspec, 0); 4168} 4169 4170void Assembler::pusha() { // 32bit 4171 emit_byte(0x60); 4172} 4173 4174void Assembler::set_byte_if_not_zero(Register dst) { 4175 emit_byte(0x0F); 4176 emit_byte(0x95); 4177 emit_byte(0xE0 | dst->encoding()); 4178} 4179 4180void Assembler::shldl(Register dst, Register src) { 4181 emit_byte(0x0F); 4182 emit_byte(0xA5); 4183 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4184} 4185 4186void Assembler::shrdl(Register dst, Register src) { 4187 emit_byte(0x0F); 4188 emit_byte(0xAD); 4189 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4190} 4191 4192#else // LP64 4193 4194void Assembler::set_byte_if_not_zero(Register dst) { 4195 int enc = prefix_and_encode(dst->encoding(), true); 4196 emit_byte(0x0F); 4197 emit_byte(0x95); 4198 emit_byte(0xE0 | enc); 4199} 4200 4201// 64bit only pieces of the assembler 4202// This should only be used by 64bit instructions that can use rip-relative 4203// it cannot be used by instructions that want an immediate value. 4204 4205bool Assembler::reachable(AddressLiteral adr) { 4206 int64_t disp; 4207 // None will force a 64bit literal to the code stream. Likely a placeholder 4208 // for something that will be patched later and we need to certain it will 4209 // always be reachable. 4210 if (adr.reloc() == relocInfo::none) { 4211 return false; 4212 } 4213 if (adr.reloc() == relocInfo::internal_word_type) { 4214 // This should be rip relative and easily reachable. 4215 return true; 4216 } 4217 if (adr.reloc() == relocInfo::virtual_call_type || 4218 adr.reloc() == relocInfo::opt_virtual_call_type || 4219 adr.reloc() == relocInfo::static_call_type || 4220 adr.reloc() == relocInfo::static_stub_type ) { 4221 // This should be rip relative within the code cache and easily 4222 // reachable until we get huge code caches. (At which point 4223 // ic code is going to have issues). 4224 return true; 4225 } 4226 if (adr.reloc() != relocInfo::external_word_type && 4227 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 4228 adr.reloc() != relocInfo::poll_type && // relocs to identify them 4229 adr.reloc() != relocInfo::runtime_call_type ) { 4230 return false; 4231 } 4232 4233 // Stress the correction code 4234 if (ForceUnreachable) { 4235 // Must be runtimecall reloc, see if it is in the codecache 4236 // Flipping stuff in the codecache to be unreachable causes issues 4237 // with things like inline caches where the additional instructions 4238 // are not handled. 4239 if (CodeCache::find_blob(adr._target) == NULL) { 4240 return false; 4241 } 4242 } 4243 // For external_word_type/runtime_call_type if it is reachable from where we 4244 // are now (possibly a temp buffer) and where we might end up 4245 // anywhere in the codeCache then we are always reachable. 4246 // This would have to change if we ever save/restore shared code 4247 // to be more pessimistic. 4248 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 4249 if (!is_simm32(disp)) return false; 4250 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 4251 if (!is_simm32(disp)) return false; 4252 4253 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 4254 4255 // Because rip relative is a disp + address_of_next_instruction and we 4256 // don't know the value of address_of_next_instruction we apply a fudge factor 4257 // to make sure we will be ok no matter the size of the instruction we get placed into. 4258 // We don't have to fudge the checks above here because they are already worst case. 4259 4260 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 4261 // + 4 because better safe than sorry. 4262 const int fudge = 12 + 4; 4263 if (disp < 0) { 4264 disp -= fudge; 4265 } else { 4266 disp += fudge; 4267 } 4268 return is_simm32(disp); 4269} 4270 4271// Check if the polling page is not reachable from the code cache using rip-relative 4272// addressing. 4273bool Assembler::is_polling_page_far() { 4274 intptr_t addr = (intptr_t)os::get_polling_page(); 4275 return ForceUnreachable || 4276 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 4277 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 4278} 4279 4280void Assembler::emit_data64(jlong data, 4281 relocInfo::relocType rtype, 4282 int format) { 4283 if (rtype == relocInfo::none) { 4284 emit_long64(data); 4285 } else { 4286 emit_data64(data, Relocation::spec_simple(rtype), format); 4287 } 4288} 4289 4290void Assembler::emit_data64(jlong data, 4291 RelocationHolder const& rspec, 4292 int format) { 4293 assert(imm_operand == 0, "default format must be immediate in this file"); 4294 assert(imm_operand == format, "must be immediate"); 4295 assert(inst_mark() != NULL, "must be inside InstructionMark"); 4296 // Do not use AbstractAssembler::relocate, which is not intended for 4297 // embedded words. Instead, relocate to the enclosing instruction. 4298 code_section()->relocate(inst_mark(), rspec, format); 4299#ifdef ASSERT 4300 check_relocation(rspec, format); 4301#endif 4302 emit_long64(data); 4303} 4304 4305int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 4306 if (reg_enc >= 8) { 4307 prefix(REX_B); 4308 reg_enc -= 8; 4309 } else if (byteinst && reg_enc >= 4) { 4310 prefix(REX); 4311 } 4312 return reg_enc; 4313} 4314 4315int Assembler::prefixq_and_encode(int reg_enc) { 4316 if (reg_enc < 8) { 4317 prefix(REX_W); 4318 } else { 4319 prefix(REX_WB); 4320 reg_enc -= 8; 4321 } 4322 return reg_enc; 4323} 4324 4325int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 4326 if (dst_enc < 8) { 4327 if (src_enc >= 8) { 4328 prefix(REX_B); 4329 src_enc -= 8; 4330 } else if (byteinst && src_enc >= 4) { 4331 prefix(REX); 4332 } 4333 } else { 4334 if (src_enc < 8) { 4335 prefix(REX_R); 4336 } else { 4337 prefix(REX_RB); 4338 src_enc -= 8; 4339 } 4340 dst_enc -= 8; 4341 } 4342 return dst_enc << 3 | src_enc; 4343} 4344 4345int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 4346 if (dst_enc < 8) { 4347 if (src_enc < 8) { 4348 prefix(REX_W); 4349 } else { 4350 prefix(REX_WB); 4351 src_enc -= 8; 4352 } 4353 } else { 4354 if (src_enc < 8) { 4355 prefix(REX_WR); 4356 } else { 4357 prefix(REX_WRB); 4358 src_enc -= 8; 4359 } 4360 dst_enc -= 8; 4361 } 4362 return dst_enc << 3 | src_enc; 4363} 4364 4365void Assembler::prefix(Register reg) { 4366 if (reg->encoding() >= 8) { 4367 prefix(REX_B); 4368 } 4369} 4370 4371void Assembler::prefix(Address adr) { 4372 if (adr.base_needs_rex()) { 4373 if (adr.index_needs_rex()) { 4374 prefix(REX_XB); 4375 } else { 4376 prefix(REX_B); 4377 } 4378 } else { 4379 if (adr.index_needs_rex()) { 4380 prefix(REX_X); 4381 } 4382 } 4383} 4384 4385void Assembler::prefixq(Address adr) { 4386 if (adr.base_needs_rex()) { 4387 if (adr.index_needs_rex()) { 4388 prefix(REX_WXB); 4389 } else { 4390 prefix(REX_WB); 4391 } 4392 } else { 4393 if (adr.index_needs_rex()) { 4394 prefix(REX_WX); 4395 } else { 4396 prefix(REX_W); 4397 } 4398 } 4399} 4400 4401 4402void Assembler::prefix(Address adr, Register reg, bool byteinst) { 4403 if (reg->encoding() < 8) { 4404 if (adr.base_needs_rex()) { 4405 if (adr.index_needs_rex()) { 4406 prefix(REX_XB); 4407 } else { 4408 prefix(REX_B); 4409 } 4410 } else { 4411 if (adr.index_needs_rex()) { 4412 prefix(REX_X); 4413 } else if (byteinst && reg->encoding() >= 4 ) { 4414 prefix(REX); 4415 } 4416 } 4417 } else { 4418 if (adr.base_needs_rex()) { 4419 if (adr.index_needs_rex()) { 4420 prefix(REX_RXB); 4421 } else { 4422 prefix(REX_RB); 4423 } 4424 } else { 4425 if (adr.index_needs_rex()) { 4426 prefix(REX_RX); 4427 } else { 4428 prefix(REX_R); 4429 } 4430 } 4431 } 4432} 4433 4434void Assembler::prefixq(Address adr, Register src) { 4435 if (src->encoding() < 8) { 4436 if (adr.base_needs_rex()) { 4437 if (adr.index_needs_rex()) { 4438 prefix(REX_WXB); 4439 } else { 4440 prefix(REX_WB); 4441 } 4442 } else { 4443 if (adr.index_needs_rex()) { 4444 prefix(REX_WX); 4445 } else { 4446 prefix(REX_W); 4447 } 4448 } 4449 } else { 4450 if (adr.base_needs_rex()) { 4451 if (adr.index_needs_rex()) { 4452 prefix(REX_WRXB); 4453 } else { 4454 prefix(REX_WRB); 4455 } 4456 } else { 4457 if (adr.index_needs_rex()) { 4458 prefix(REX_WRX); 4459 } else { 4460 prefix(REX_WR); 4461 } 4462 } 4463 } 4464} 4465 4466void Assembler::prefix(Address adr, XMMRegister reg) { 4467 if (reg->encoding() < 8) { 4468 if (adr.base_needs_rex()) { 4469 if (adr.index_needs_rex()) { 4470 prefix(REX_XB); 4471 } else { 4472 prefix(REX_B); 4473 } 4474 } else { 4475 if (adr.index_needs_rex()) { 4476 prefix(REX_X); 4477 } 4478 } 4479 } else { 4480 if (adr.base_needs_rex()) { 4481 if (adr.index_needs_rex()) { 4482 prefix(REX_RXB); 4483 } else { 4484 prefix(REX_RB); 4485 } 4486 } else { 4487 if (adr.index_needs_rex()) { 4488 prefix(REX_RX); 4489 } else { 4490 prefix(REX_R); 4491 } 4492 } 4493 } 4494} 4495 4496void Assembler::prefixq(Address adr, XMMRegister src) { 4497 if (src->encoding() < 8) { 4498 if (adr.base_needs_rex()) { 4499 if (adr.index_needs_rex()) { 4500 prefix(REX_WXB); 4501 } else { 4502 prefix(REX_WB); 4503 } 4504 } else { 4505 if (adr.index_needs_rex()) { 4506 prefix(REX_WX); 4507 } else { 4508 prefix(REX_W); 4509 } 4510 } 4511 } else { 4512 if (adr.base_needs_rex()) { 4513 if (adr.index_needs_rex()) { 4514 prefix(REX_WRXB); 4515 } else { 4516 prefix(REX_WRB); 4517 } 4518 } else { 4519 if (adr.index_needs_rex()) { 4520 prefix(REX_WRX); 4521 } else { 4522 prefix(REX_WR); 4523 } 4524 } 4525 } 4526} 4527 4528void Assembler::adcq(Register dst, int32_t imm32) { 4529 (void) prefixq_and_encode(dst->encoding()); 4530 emit_arith(0x81, 0xD0, dst, imm32); 4531} 4532 4533void Assembler::adcq(Register dst, Address src) { 4534 InstructionMark im(this); 4535 prefixq(src, dst); 4536 emit_byte(0x13); 4537 emit_operand(dst, src); 4538} 4539 4540void Assembler::adcq(Register dst, Register src) { 4541 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4542 emit_arith(0x13, 0xC0, dst, src); 4543} 4544 4545void Assembler::addq(Address dst, int32_t imm32) { 4546 InstructionMark im(this); 4547 prefixq(dst); 4548 emit_arith_operand(0x81, rax, dst,imm32); 4549} 4550 4551void Assembler::addq(Address dst, Register src) { 4552 InstructionMark im(this); 4553 prefixq(dst, src); 4554 emit_byte(0x01); 4555 emit_operand(src, dst); 4556} 4557 4558void Assembler::addq(Register dst, int32_t imm32) { 4559 (void) prefixq_and_encode(dst->encoding()); 4560 emit_arith(0x81, 0xC0, dst, imm32); 4561} 4562 4563void Assembler::addq(Register dst, Address src) { 4564 InstructionMark im(this); 4565 prefixq(src, dst); 4566 emit_byte(0x03); 4567 emit_operand(dst, src); 4568} 4569 4570void Assembler::addq(Register dst, Register src) { 4571 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4572 emit_arith(0x03, 0xC0, dst, src); 4573} 4574 4575void Assembler::andq(Address dst, int32_t imm32) { 4576 InstructionMark im(this); 4577 prefixq(dst); 4578 emit_byte(0x81); 4579 emit_operand(rsp, dst, 4); 4580 emit_long(imm32); 4581} 4582 4583void Assembler::andq(Register dst, int32_t imm32) { 4584 (void) prefixq_and_encode(dst->encoding()); 4585 emit_arith(0x81, 0xE0, dst, imm32); 4586} 4587 4588void Assembler::andq(Register dst, Address src) { 4589 InstructionMark im(this); 4590 prefixq(src, dst); 4591 emit_byte(0x23); 4592 emit_operand(dst, src); 4593} 4594 4595void Assembler::andq(Register dst, Register src) { 4596 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4597 emit_arith(0x23, 0xC0, dst, src); 4598} 4599 4600void Assembler::bsfq(Register dst, Register src) { 4601 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4602 emit_byte(0x0F); 4603 emit_byte(0xBC); 4604 emit_byte(0xC0 | encode); 4605} 4606 4607void Assembler::bsrq(Register dst, Register src) { 4608 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4609 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4610 emit_byte(0x0F); 4611 emit_byte(0xBD); 4612 emit_byte(0xC0 | encode); 4613} 4614 4615void Assembler::bswapq(Register reg) { 4616 int encode = prefixq_and_encode(reg->encoding()); 4617 emit_byte(0x0F); 4618 emit_byte(0xC8 | encode); 4619} 4620 4621void Assembler::cdqq() { 4622 prefix(REX_W); 4623 emit_byte(0x99); 4624} 4625 4626void Assembler::clflush(Address adr) { 4627 prefix(adr); 4628 emit_byte(0x0F); 4629 emit_byte(0xAE); 4630 emit_operand(rdi, adr); 4631} 4632 4633void Assembler::cmovq(Condition cc, Register dst, Register src) { 4634 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4635 emit_byte(0x0F); 4636 emit_byte(0x40 | cc); 4637 emit_byte(0xC0 | encode); 4638} 4639 4640void Assembler::cmovq(Condition cc, Register dst, Address src) { 4641 InstructionMark im(this); 4642 prefixq(src, dst); 4643 emit_byte(0x0F); 4644 emit_byte(0x40 | cc); 4645 emit_operand(dst, src); 4646} 4647 4648void Assembler::cmpq(Address dst, int32_t imm32) { 4649 InstructionMark im(this); 4650 prefixq(dst); 4651 emit_byte(0x81); 4652 emit_operand(rdi, dst, 4); 4653 emit_long(imm32); 4654} 4655 4656void Assembler::cmpq(Register dst, int32_t imm32) { 4657 (void) prefixq_and_encode(dst->encoding()); 4658 emit_arith(0x81, 0xF8, dst, imm32); 4659} 4660 4661void Assembler::cmpq(Address dst, Register src) { 4662 InstructionMark im(this); 4663 prefixq(dst, src); 4664 emit_byte(0x3B); 4665 emit_operand(src, dst); 4666} 4667 4668void Assembler::cmpq(Register dst, Register src) { 4669 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4670 emit_arith(0x3B, 0xC0, dst, src); 4671} 4672 4673void Assembler::cmpq(Register dst, Address src) { 4674 InstructionMark im(this); 4675 prefixq(src, dst); 4676 emit_byte(0x3B); 4677 emit_operand(dst, src); 4678} 4679 4680void Assembler::cmpxchgq(Register reg, Address adr) { 4681 InstructionMark im(this); 4682 prefixq(adr, reg); 4683 emit_byte(0x0F); 4684 emit_byte(0xB1); 4685 emit_operand(reg, adr); 4686} 4687 4688void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4689 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4690 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4691 emit_byte(0x2A); 4692 emit_byte(0xC0 | encode); 4693} 4694 4695void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4696 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4697 InstructionMark im(this); 4698 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4699 emit_byte(0x2A); 4700 emit_operand(dst, src); 4701} 4702 4703void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4704 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4705 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4706 emit_byte(0x2A); 4707 emit_byte(0xC0 | encode); 4708} 4709 4710void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4711 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4712 InstructionMark im(this); 4713 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4714 emit_byte(0x2A); 4715 emit_operand(dst, src); 4716} 4717 4718void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4719 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4720 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4721 emit_byte(0x2C); 4722 emit_byte(0xC0 | encode); 4723} 4724 4725void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4726 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4727 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4728 emit_byte(0x2C); 4729 emit_byte(0xC0 | encode); 4730} 4731 4732void Assembler::decl(Register dst) { 4733 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4734 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4735 int encode = prefix_and_encode(dst->encoding()); 4736 emit_byte(0xFF); 4737 emit_byte(0xC8 | encode); 4738} 4739 4740void Assembler::decq(Register dst) { 4741 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4742 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4743 int encode = prefixq_and_encode(dst->encoding()); 4744 emit_byte(0xFF); 4745 emit_byte(0xC8 | encode); 4746} 4747 4748void Assembler::decq(Address dst) { 4749 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4750 InstructionMark im(this); 4751 prefixq(dst); 4752 emit_byte(0xFF); 4753 emit_operand(rcx, dst); 4754} 4755 4756void Assembler::fxrstor(Address src) { 4757 prefixq(src); 4758 emit_byte(0x0F); 4759 emit_byte(0xAE); 4760 emit_operand(as_Register(1), src); 4761} 4762 4763void Assembler::fxsave(Address dst) { 4764 prefixq(dst); 4765 emit_byte(0x0F); 4766 emit_byte(0xAE); 4767 emit_operand(as_Register(0), dst); 4768} 4769 4770void Assembler::idivq(Register src) { 4771 int encode = prefixq_and_encode(src->encoding()); 4772 emit_byte(0xF7); 4773 emit_byte(0xF8 | encode); 4774} 4775 4776void Assembler::imulq(Register dst, Register src) { 4777 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4778 emit_byte(0x0F); 4779 emit_byte(0xAF); 4780 emit_byte(0xC0 | encode); 4781} 4782 4783void Assembler::imulq(Register dst, Register src, int value) { 4784 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4785 if (is8bit(value)) { 4786 emit_byte(0x6B); 4787 emit_byte(0xC0 | encode); 4788 emit_byte(value & 0xFF); 4789 } else { 4790 emit_byte(0x69); 4791 emit_byte(0xC0 | encode); 4792 emit_long(value); 4793 } 4794} 4795 4796void Assembler::incl(Register dst) { 4797 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4798 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4799 int encode = prefix_and_encode(dst->encoding()); 4800 emit_byte(0xFF); 4801 emit_byte(0xC0 | encode); 4802} 4803 4804void Assembler::incq(Register dst) { 4805 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4806 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4807 int encode = prefixq_and_encode(dst->encoding()); 4808 emit_byte(0xFF); 4809 emit_byte(0xC0 | encode); 4810} 4811 4812void Assembler::incq(Address dst) { 4813 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4814 InstructionMark im(this); 4815 prefixq(dst); 4816 emit_byte(0xFF); 4817 emit_operand(rax, dst); 4818} 4819 4820void Assembler::lea(Register dst, Address src) { 4821 leaq(dst, src); 4822} 4823 4824void Assembler::leaq(Register dst, Address src) { 4825 InstructionMark im(this); 4826 prefixq(src, dst); 4827 emit_byte(0x8D); 4828 emit_operand(dst, src); 4829} 4830 4831void Assembler::mov64(Register dst, int64_t imm64) { 4832 InstructionMark im(this); 4833 int encode = prefixq_and_encode(dst->encoding()); 4834 emit_byte(0xB8 | encode); 4835 emit_long64(imm64); 4836} 4837 4838void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4839 InstructionMark im(this); 4840 int encode = prefixq_and_encode(dst->encoding()); 4841 emit_byte(0xB8 | encode); 4842 emit_data64(imm64, rspec); 4843} 4844 4845void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4846 InstructionMark im(this); 4847 int encode = prefix_and_encode(dst->encoding()); 4848 emit_byte(0xB8 | encode); 4849 emit_data((int)imm32, rspec, narrow_oop_operand); 4850} 4851 4852void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4853 InstructionMark im(this); 4854 prefix(dst); 4855 emit_byte(0xC7); 4856 emit_operand(rax, dst, 4); 4857 emit_data((int)imm32, rspec, narrow_oop_operand); 4858} 4859 4860void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4861 InstructionMark im(this); 4862 int encode = prefix_and_encode(src1->encoding()); 4863 emit_byte(0x81); 4864 emit_byte(0xF8 | encode); 4865 emit_data((int)imm32, rspec, narrow_oop_operand); 4866} 4867 4868void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4869 InstructionMark im(this); 4870 prefix(src1); 4871 emit_byte(0x81); 4872 emit_operand(rax, src1, 4); 4873 emit_data((int)imm32, rspec, narrow_oop_operand); 4874} 4875 4876void Assembler::lzcntq(Register dst, Register src) { 4877 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4878 emit_byte(0xF3); 4879 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4880 emit_byte(0x0F); 4881 emit_byte(0xBD); 4882 emit_byte(0xC0 | encode); 4883} 4884 4885void Assembler::movdq(XMMRegister dst, Register src) { 4886 // table D-1 says MMX/SSE2 4887 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4888 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4889 emit_byte(0x6E); 4890 emit_byte(0xC0 | encode); 4891} 4892 4893void Assembler::movdq(Register dst, XMMRegister src) { 4894 // table D-1 says MMX/SSE2 4895 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4896 // swap src/dst to get correct prefix 4897 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4898 emit_byte(0x7E); 4899 emit_byte(0xC0 | encode); 4900} 4901 4902void Assembler::movq(Register dst, Register src) { 4903 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4904 emit_byte(0x8B); 4905 emit_byte(0xC0 | encode); 4906} 4907 4908void Assembler::movq(Register dst, Address src) { 4909 InstructionMark im(this); 4910 prefixq(src, dst); 4911 emit_byte(0x8B); 4912 emit_operand(dst, src); 4913} 4914 4915void Assembler::movq(Address dst, Register src) { 4916 InstructionMark im(this); 4917 prefixq(dst, src); 4918 emit_byte(0x89); 4919 emit_operand(src, dst); 4920} 4921 4922void Assembler::movsbq(Register dst, Address src) { 4923 InstructionMark im(this); 4924 prefixq(src, dst); 4925 emit_byte(0x0F); 4926 emit_byte(0xBE); 4927 emit_operand(dst, src); 4928} 4929 4930void Assembler::movsbq(Register dst, Register src) { 4931 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4932 emit_byte(0x0F); 4933 emit_byte(0xBE); 4934 emit_byte(0xC0 | encode); 4935} 4936 4937void Assembler::movslq(Register dst, int32_t imm32) { 4938 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4939 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4940 // as a result we shouldn't use until tested at runtime... 4941 ShouldNotReachHere(); 4942 InstructionMark im(this); 4943 int encode = prefixq_and_encode(dst->encoding()); 4944 emit_byte(0xC7 | encode); 4945 emit_long(imm32); 4946} 4947 4948void Assembler::movslq(Address dst, int32_t imm32) { 4949 assert(is_simm32(imm32), "lost bits"); 4950 InstructionMark im(this); 4951 prefixq(dst); 4952 emit_byte(0xC7); 4953 emit_operand(rax, dst, 4); 4954 emit_long(imm32); 4955} 4956 4957void Assembler::movslq(Register dst, Address src) { 4958 InstructionMark im(this); 4959 prefixq(src, dst); 4960 emit_byte(0x63); 4961 emit_operand(dst, src); 4962} 4963 4964void Assembler::movslq(Register dst, Register src) { 4965 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4966 emit_byte(0x63); 4967 emit_byte(0xC0 | encode); 4968} 4969 4970void Assembler::movswq(Register dst, Address src) { 4971 InstructionMark im(this); 4972 prefixq(src, dst); 4973 emit_byte(0x0F); 4974 emit_byte(0xBF); 4975 emit_operand(dst, src); 4976} 4977 4978void Assembler::movswq(Register dst, Register src) { 4979 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4980 emit_byte(0x0F); 4981 emit_byte(0xBF); 4982 emit_byte(0xC0 | encode); 4983} 4984 4985void Assembler::movzbq(Register dst, Address src) { 4986 InstructionMark im(this); 4987 prefixq(src, dst); 4988 emit_byte(0x0F); 4989 emit_byte(0xB6); 4990 emit_operand(dst, src); 4991} 4992 4993void Assembler::movzbq(Register dst, Register src) { 4994 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4995 emit_byte(0x0F); 4996 emit_byte(0xB6); 4997 emit_byte(0xC0 | encode); 4998} 4999 5000void Assembler::movzwq(Register dst, Address src) { 5001 InstructionMark im(this); 5002 prefixq(src, dst); 5003 emit_byte(0x0F); 5004 emit_byte(0xB7); 5005 emit_operand(dst, src); 5006} 5007 5008void Assembler::movzwq(Register dst, Register src) { 5009 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5010 emit_byte(0x0F); 5011 emit_byte(0xB7); 5012 emit_byte(0xC0 | encode); 5013} 5014 5015void Assembler::negq(Register dst) { 5016 int encode = prefixq_and_encode(dst->encoding()); 5017 emit_byte(0xF7); 5018 emit_byte(0xD8 | encode); 5019} 5020 5021void Assembler::notq(Register dst) { 5022 int encode = prefixq_and_encode(dst->encoding()); 5023 emit_byte(0xF7); 5024 emit_byte(0xD0 | encode); 5025} 5026 5027void Assembler::orq(Address dst, int32_t imm32) { 5028 InstructionMark im(this); 5029 prefixq(dst); 5030 emit_byte(0x81); 5031 emit_operand(rcx, dst, 4); 5032 emit_long(imm32); 5033} 5034 5035void Assembler::orq(Register dst, int32_t imm32) { 5036 (void) prefixq_and_encode(dst->encoding()); 5037 emit_arith(0x81, 0xC8, dst, imm32); 5038} 5039 5040void Assembler::orq(Register dst, Address src) { 5041 InstructionMark im(this); 5042 prefixq(src, dst); 5043 emit_byte(0x0B); 5044 emit_operand(dst, src); 5045} 5046 5047void Assembler::orq(Register dst, Register src) { 5048 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5049 emit_arith(0x0B, 0xC0, dst, src); 5050} 5051 5052void Assembler::popa() { // 64bit 5053 movq(r15, Address(rsp, 0)); 5054 movq(r14, Address(rsp, wordSize)); 5055 movq(r13, Address(rsp, 2 * wordSize)); 5056 movq(r12, Address(rsp, 3 * wordSize)); 5057 movq(r11, Address(rsp, 4 * wordSize)); 5058 movq(r10, Address(rsp, 5 * wordSize)); 5059 movq(r9, Address(rsp, 6 * wordSize)); 5060 movq(r8, Address(rsp, 7 * wordSize)); 5061 movq(rdi, Address(rsp, 8 * wordSize)); 5062 movq(rsi, Address(rsp, 9 * wordSize)); 5063 movq(rbp, Address(rsp, 10 * wordSize)); 5064 // skip rsp 5065 movq(rbx, Address(rsp, 12 * wordSize)); 5066 movq(rdx, Address(rsp, 13 * wordSize)); 5067 movq(rcx, Address(rsp, 14 * wordSize)); 5068 movq(rax, Address(rsp, 15 * wordSize)); 5069 5070 addq(rsp, 16 * wordSize); 5071} 5072 5073void Assembler::popcntq(Register dst, Address src) { 5074 assert(VM_Version::supports_popcnt(), "must support"); 5075 InstructionMark im(this); 5076 emit_byte(0xF3); 5077 prefixq(src, dst); 5078 emit_byte(0x0F); 5079 emit_byte(0xB8); 5080 emit_operand(dst, src); 5081} 5082 5083void Assembler::popcntq(Register dst, Register src) { 5084 assert(VM_Version::supports_popcnt(), "must support"); 5085 emit_byte(0xF3); 5086 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5087 emit_byte(0x0F); 5088 emit_byte(0xB8); 5089 emit_byte(0xC0 | encode); 5090} 5091 5092void Assembler::popq(Address dst) { 5093 InstructionMark im(this); 5094 prefixq(dst); 5095 emit_byte(0x8F); 5096 emit_operand(rax, dst); 5097} 5098 5099void Assembler::pusha() { // 64bit 5100 // we have to store original rsp. ABI says that 128 bytes 5101 // below rsp are local scratch. 5102 movq(Address(rsp, -5 * wordSize), rsp); 5103 5104 subq(rsp, 16 * wordSize); 5105 5106 movq(Address(rsp, 15 * wordSize), rax); 5107 movq(Address(rsp, 14 * wordSize), rcx); 5108 movq(Address(rsp, 13 * wordSize), rdx); 5109 movq(Address(rsp, 12 * wordSize), rbx); 5110 // skip rsp 5111 movq(Address(rsp, 10 * wordSize), rbp); 5112 movq(Address(rsp, 9 * wordSize), rsi); 5113 movq(Address(rsp, 8 * wordSize), rdi); 5114 movq(Address(rsp, 7 * wordSize), r8); 5115 movq(Address(rsp, 6 * wordSize), r9); 5116 movq(Address(rsp, 5 * wordSize), r10); 5117 movq(Address(rsp, 4 * wordSize), r11); 5118 movq(Address(rsp, 3 * wordSize), r12); 5119 movq(Address(rsp, 2 * wordSize), r13); 5120 movq(Address(rsp, wordSize), r14); 5121 movq(Address(rsp, 0), r15); 5122} 5123 5124void Assembler::pushq(Address src) { 5125 InstructionMark im(this); 5126 prefixq(src); 5127 emit_byte(0xFF); 5128 emit_operand(rsi, src); 5129} 5130 5131void Assembler::rclq(Register dst, int imm8) { 5132 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5133 int encode = prefixq_and_encode(dst->encoding()); 5134 if (imm8 == 1) { 5135 emit_byte(0xD1); 5136 emit_byte(0xD0 | encode); 5137 } else { 5138 emit_byte(0xC1); 5139 emit_byte(0xD0 | encode); 5140 emit_byte(imm8); 5141 } 5142} 5143void Assembler::sarq(Register dst, int imm8) { 5144 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5145 int encode = prefixq_and_encode(dst->encoding()); 5146 if (imm8 == 1) { 5147 emit_byte(0xD1); 5148 emit_byte(0xF8 | encode); 5149 } else { 5150 emit_byte(0xC1); 5151 emit_byte(0xF8 | encode); 5152 emit_byte(imm8); 5153 } 5154} 5155 5156void Assembler::sarq(Register dst) { 5157 int encode = prefixq_and_encode(dst->encoding()); 5158 emit_byte(0xD3); 5159 emit_byte(0xF8 | encode); 5160} 5161 5162void Assembler::sbbq(Address dst, int32_t imm32) { 5163 InstructionMark im(this); 5164 prefixq(dst); 5165 emit_arith_operand(0x81, rbx, dst, imm32); 5166} 5167 5168void Assembler::sbbq(Register dst, int32_t imm32) { 5169 (void) prefixq_and_encode(dst->encoding()); 5170 emit_arith(0x81, 0xD8, dst, imm32); 5171} 5172 5173void Assembler::sbbq(Register dst, Address src) { 5174 InstructionMark im(this); 5175 prefixq(src, dst); 5176 emit_byte(0x1B); 5177 emit_operand(dst, src); 5178} 5179 5180void Assembler::sbbq(Register dst, Register src) { 5181 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5182 emit_arith(0x1B, 0xC0, dst, src); 5183} 5184 5185void Assembler::shlq(Register dst, int imm8) { 5186 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5187 int encode = prefixq_and_encode(dst->encoding()); 5188 if (imm8 == 1) { 5189 emit_byte(0xD1); 5190 emit_byte(0xE0 | encode); 5191 } else { 5192 emit_byte(0xC1); 5193 emit_byte(0xE0 | encode); 5194 emit_byte(imm8); 5195 } 5196} 5197 5198void Assembler::shlq(Register dst) { 5199 int encode = prefixq_and_encode(dst->encoding()); 5200 emit_byte(0xD3); 5201 emit_byte(0xE0 | encode); 5202} 5203 5204void Assembler::shrq(Register dst, int imm8) { 5205 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5206 int encode = prefixq_and_encode(dst->encoding()); 5207 emit_byte(0xC1); 5208 emit_byte(0xE8 | encode); 5209 emit_byte(imm8); 5210} 5211 5212void Assembler::shrq(Register dst) { 5213 int encode = prefixq_and_encode(dst->encoding()); 5214 emit_byte(0xD3); 5215 emit_byte(0xE8 | encode); 5216} 5217 5218void Assembler::subq(Address dst, int32_t imm32) { 5219 InstructionMark im(this); 5220 prefixq(dst); 5221 emit_arith_operand(0x81, rbp, dst, imm32); 5222} 5223 5224void Assembler::subq(Address dst, Register src) { 5225 InstructionMark im(this); 5226 prefixq(dst, src); 5227 emit_byte(0x29); 5228 emit_operand(src, dst); 5229} 5230 5231void Assembler::subq(Register dst, int32_t imm32) { 5232 (void) prefixq_and_encode(dst->encoding()); 5233 emit_arith(0x81, 0xE8, dst, imm32); 5234} 5235 5236// Force generation of a 4 byte immediate value even if it fits into 8bit 5237void Assembler::subq_imm32(Register dst, int32_t imm32) { 5238 (void) prefixq_and_encode(dst->encoding()); 5239 emit_arith_imm32(0x81, 0xE8, dst, imm32); 5240} 5241 5242void Assembler::subq(Register dst, Address src) { 5243 InstructionMark im(this); 5244 prefixq(src, dst); 5245 emit_byte(0x2B); 5246 emit_operand(dst, src); 5247} 5248 5249void Assembler::subq(Register dst, Register src) { 5250 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5251 emit_arith(0x2B, 0xC0, dst, src); 5252} 5253 5254void Assembler::testq(Register dst, int32_t imm32) { 5255 // not using emit_arith because test 5256 // doesn't support sign-extension of 5257 // 8bit operands 5258 int encode = dst->encoding(); 5259 if (encode == 0) { 5260 prefix(REX_W); 5261 emit_byte(0xA9); 5262 } else { 5263 encode = prefixq_and_encode(encode); 5264 emit_byte(0xF7); 5265 emit_byte(0xC0 | encode); 5266 } 5267 emit_long(imm32); 5268} 5269 5270void Assembler::testq(Register dst, Register src) { 5271 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5272 emit_arith(0x85, 0xC0, dst, src); 5273} 5274 5275void Assembler::xaddq(Address dst, Register src) { 5276 InstructionMark im(this); 5277 prefixq(dst, src); 5278 emit_byte(0x0F); 5279 emit_byte(0xC1); 5280 emit_operand(src, dst); 5281} 5282 5283void Assembler::xchgq(Register dst, Address src) { 5284 InstructionMark im(this); 5285 prefixq(src, dst); 5286 emit_byte(0x87); 5287 emit_operand(dst, src); 5288} 5289 5290void Assembler::xchgq(Register dst, Register src) { 5291 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5292 emit_byte(0x87); 5293 emit_byte(0xc0 | encode); 5294} 5295 5296void Assembler::xorq(Register dst, Register src) { 5297 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5298 emit_arith(0x33, 0xC0, dst, src); 5299} 5300 5301void Assembler::xorq(Register dst, Address src) { 5302 InstructionMark im(this); 5303 prefixq(src, dst); 5304 emit_byte(0x33); 5305 emit_operand(dst, src); 5306} 5307 5308#endif // !LP64 5309 5310static Assembler::Condition reverse[] = { 5311 Assembler::noOverflow /* overflow = 0x0 */ , 5312 Assembler::overflow /* noOverflow = 0x1 */ , 5313 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 5314 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 5315 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 5316 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 5317 Assembler::above /* belowEqual = 0x6 */ , 5318 Assembler::belowEqual /* above = 0x7 */ , 5319 Assembler::positive /* negative = 0x8 */ , 5320 Assembler::negative /* positive = 0x9 */ , 5321 Assembler::noParity /* parity = 0xa */ , 5322 Assembler::parity /* noParity = 0xb */ , 5323 Assembler::greaterEqual /* less = 0xc */ , 5324 Assembler::less /* greaterEqual = 0xd */ , 5325 Assembler::greater /* lessEqual = 0xe */ , 5326 Assembler::lessEqual /* greater = 0xf, */ 5327 5328}; 5329 5330 5331// Implementation of MacroAssembler 5332 5333// First all the versions that have distinct versions depending on 32/64 bit 5334// Unless the difference is trivial (1 line or so). 5335 5336#ifndef _LP64 5337 5338// 32bit versions 5339 5340Address MacroAssembler::as_Address(AddressLiteral adr) { 5341 return Address(adr.target(), adr.rspec()); 5342} 5343 5344Address MacroAssembler::as_Address(ArrayAddress adr) { 5345 return Address::make_array(adr); 5346} 5347 5348int MacroAssembler::biased_locking_enter(Register lock_reg, 5349 Register obj_reg, 5350 Register swap_reg, 5351 Register tmp_reg, 5352 bool swap_reg_contains_mark, 5353 Label& done, 5354 Label* slow_case, 5355 BiasedLockingCounters* counters) { 5356 assert(UseBiasedLocking, "why call this otherwise?"); 5357 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 5358 assert_different_registers(lock_reg, obj_reg, swap_reg); 5359 5360 if (PrintBiasedLockingStatistics && counters == NULL) 5361 counters = BiasedLocking::counters(); 5362 5363 bool need_tmp_reg = false; 5364 if (tmp_reg == noreg) { 5365 need_tmp_reg = true; 5366 tmp_reg = lock_reg; 5367 } else { 5368 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5369 } 5370 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5371 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5372 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 5373 Address saved_mark_addr(lock_reg, 0); 5374 5375 // Biased locking 5376 // See whether the lock is currently biased toward our thread and 5377 // whether the epoch is still valid 5378 // Note that the runtime guarantees sufficient alignment of JavaThread 5379 // pointers to allow age to be placed into low bits 5380 // First check to see whether biasing is even enabled for this object 5381 Label cas_label; 5382 int null_check_offset = -1; 5383 if (!swap_reg_contains_mark) { 5384 null_check_offset = offset(); 5385 movl(swap_reg, mark_addr); 5386 } 5387 if (need_tmp_reg) { 5388 push(tmp_reg); 5389 } 5390 movl(tmp_reg, swap_reg); 5391 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5392 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 5393 if (need_tmp_reg) { 5394 pop(tmp_reg); 5395 } 5396 jcc(Assembler::notEqual, cas_label); 5397 // The bias pattern is present in the object's header. Need to check 5398 // whether the bias owner and the epoch are both still current. 5399 // Note that because there is no current thread register on x86 we 5400 // need to store off the mark word we read out of the object to 5401 // avoid reloading it and needing to recheck invariants below. This 5402 // store is unfortunate but it makes the overall code shorter and 5403 // simpler. 5404 movl(saved_mark_addr, swap_reg); 5405 if (need_tmp_reg) { 5406 push(tmp_reg); 5407 } 5408 get_thread(tmp_reg); 5409 xorl(swap_reg, tmp_reg); 5410 if (swap_reg_contains_mark) { 5411 null_check_offset = offset(); 5412 } 5413 movl(tmp_reg, klass_addr); 5414 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5415 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 5416 if (need_tmp_reg) { 5417 pop(tmp_reg); 5418 } 5419 if (counters != NULL) { 5420 cond_inc32(Assembler::zero, 5421 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 5422 } 5423 jcc(Assembler::equal, done); 5424 5425 Label try_revoke_bias; 5426 Label try_rebias; 5427 5428 // At this point we know that the header has the bias pattern and 5429 // that we are not the bias owner in the current epoch. We need to 5430 // figure out more details about the state of the header in order to 5431 // know what operations can be legally performed on the object's 5432 // header. 5433 5434 // If the low three bits in the xor result aren't clear, that means 5435 // the prototype header is no longer biased and we have to revoke 5436 // the bias on this object. 5437 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5438 jcc(Assembler::notZero, try_revoke_bias); 5439 5440 // Biasing is still enabled for this data type. See whether the 5441 // epoch of the current bias is still valid, meaning that the epoch 5442 // bits of the mark word are equal to the epoch bits of the 5443 // prototype header. (Note that the prototype header's epoch bits 5444 // only change at a safepoint.) If not, attempt to rebias the object 5445 // toward the current thread. Note that we must be absolutely sure 5446 // that the current epoch is invalid in order to do this because 5447 // otherwise the manipulations it performs on the mark word are 5448 // illegal. 5449 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5450 jcc(Assembler::notZero, try_rebias); 5451 5452 // The epoch of the current bias is still valid but we know nothing 5453 // about the owner; it might be set or it might be clear. Try to 5454 // acquire the bias of the object using an atomic operation. If this 5455 // fails we will go in to the runtime to revoke the object's bias. 5456 // Note that we first construct the presumed unbiased header so we 5457 // don't accidentally blow away another thread's valid bias. 5458 movl(swap_reg, saved_mark_addr); 5459 andl(swap_reg, 5460 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5461 if (need_tmp_reg) { 5462 push(tmp_reg); 5463 } 5464 get_thread(tmp_reg); 5465 orl(tmp_reg, swap_reg); 5466 if (os::is_MP()) { 5467 lock(); 5468 } 5469 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5470 if (need_tmp_reg) { 5471 pop(tmp_reg); 5472 } 5473 // If the biasing toward our thread failed, this means that 5474 // another thread succeeded in biasing it toward itself and we 5475 // need to revoke that bias. The revocation will occur in the 5476 // interpreter runtime in the slow case. 5477 if (counters != NULL) { 5478 cond_inc32(Assembler::zero, 5479 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5480 } 5481 if (slow_case != NULL) { 5482 jcc(Assembler::notZero, *slow_case); 5483 } 5484 jmp(done); 5485 5486 bind(try_rebias); 5487 // At this point we know the epoch has expired, meaning that the 5488 // current "bias owner", if any, is actually invalid. Under these 5489 // circumstances _only_, we are allowed to use the current header's 5490 // value as the comparison value when doing the cas to acquire the 5491 // bias in the current epoch. In other words, we allow transfer of 5492 // the bias from one thread to another directly in this situation. 5493 // 5494 // FIXME: due to a lack of registers we currently blow away the age 5495 // bits in this situation. Should attempt to preserve them. 5496 if (need_tmp_reg) { 5497 push(tmp_reg); 5498 } 5499 get_thread(tmp_reg); 5500 movl(swap_reg, klass_addr); 5501 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5502 movl(swap_reg, saved_mark_addr); 5503 if (os::is_MP()) { 5504 lock(); 5505 } 5506 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5507 if (need_tmp_reg) { 5508 pop(tmp_reg); 5509 } 5510 // If the biasing toward our thread failed, then another thread 5511 // succeeded in biasing it toward itself and we need to revoke that 5512 // bias. The revocation will occur in the runtime in the slow case. 5513 if (counters != NULL) { 5514 cond_inc32(Assembler::zero, 5515 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5516 } 5517 if (slow_case != NULL) { 5518 jcc(Assembler::notZero, *slow_case); 5519 } 5520 jmp(done); 5521 5522 bind(try_revoke_bias); 5523 // The prototype mark in the klass doesn't have the bias bit set any 5524 // more, indicating that objects of this data type are not supposed 5525 // to be biased any more. We are going to try to reset the mark of 5526 // this object to the prototype value and fall through to the 5527 // CAS-based locking scheme. Note that if our CAS fails, it means 5528 // that another thread raced us for the privilege of revoking the 5529 // bias of this particular object, so it's okay to continue in the 5530 // normal locking code. 5531 // 5532 // FIXME: due to a lack of registers we currently blow away the age 5533 // bits in this situation. Should attempt to preserve them. 5534 movl(swap_reg, saved_mark_addr); 5535 if (need_tmp_reg) { 5536 push(tmp_reg); 5537 } 5538 movl(tmp_reg, klass_addr); 5539 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5540 if (os::is_MP()) { 5541 lock(); 5542 } 5543 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5544 if (need_tmp_reg) { 5545 pop(tmp_reg); 5546 } 5547 // Fall through to the normal CAS-based lock, because no matter what 5548 // the result of the above CAS, some thread must have succeeded in 5549 // removing the bias bit from the object's header. 5550 if (counters != NULL) { 5551 cond_inc32(Assembler::zero, 5552 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5553 } 5554 5555 bind(cas_label); 5556 5557 return null_check_offset; 5558} 5559void MacroAssembler::call_VM_leaf_base(address entry_point, 5560 int number_of_arguments) { 5561 call(RuntimeAddress(entry_point)); 5562 increment(rsp, number_of_arguments * wordSize); 5563} 5564 5565void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 5566 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5567} 5568 5569void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 5570 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5571} 5572 5573void MacroAssembler::cmpoop(Address src1, jobject obj) { 5574 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5575} 5576 5577void MacroAssembler::cmpoop(Register src1, jobject obj) { 5578 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5579} 5580 5581void MacroAssembler::extend_sign(Register hi, Register lo) { 5582 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5583 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5584 cdql(); 5585 } else { 5586 movl(hi, lo); 5587 sarl(hi, 31); 5588 } 5589} 5590 5591void MacroAssembler::jC2(Register tmp, Label& L) { 5592 // set parity bit if FPU flag C2 is set (via rax) 5593 save_rax(tmp); 5594 fwait(); fnstsw_ax(); 5595 sahf(); 5596 restore_rax(tmp); 5597 // branch 5598 jcc(Assembler::parity, L); 5599} 5600 5601void MacroAssembler::jnC2(Register tmp, Label& L) { 5602 // set parity bit if FPU flag C2 is set (via rax) 5603 save_rax(tmp); 5604 fwait(); fnstsw_ax(); 5605 sahf(); 5606 restore_rax(tmp); 5607 // branch 5608 jcc(Assembler::noParity, L); 5609} 5610 5611// 32bit can do a case table jump in one instruction but we no longer allow the base 5612// to be installed in the Address class 5613void MacroAssembler::jump(ArrayAddress entry) { 5614 jmp(as_Address(entry)); 5615} 5616 5617// Note: y_lo will be destroyed 5618void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5619 // Long compare for Java (semantics as described in JVM spec.) 5620 Label high, low, done; 5621 5622 cmpl(x_hi, y_hi); 5623 jcc(Assembler::less, low); 5624 jcc(Assembler::greater, high); 5625 // x_hi is the return register 5626 xorl(x_hi, x_hi); 5627 cmpl(x_lo, y_lo); 5628 jcc(Assembler::below, low); 5629 jcc(Assembler::equal, done); 5630 5631 bind(high); 5632 xorl(x_hi, x_hi); 5633 increment(x_hi); 5634 jmp(done); 5635 5636 bind(low); 5637 xorl(x_hi, x_hi); 5638 decrementl(x_hi); 5639 5640 bind(done); 5641} 5642 5643void MacroAssembler::lea(Register dst, AddressLiteral src) { 5644 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5645} 5646 5647void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5648 // leal(dst, as_Address(adr)); 5649 // see note in movl as to why we must use a move 5650 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5651} 5652 5653void MacroAssembler::leave() { 5654 mov(rsp, rbp); 5655 pop(rbp); 5656} 5657 5658void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5659 // Multiplication of two Java long values stored on the stack 5660 // as illustrated below. Result is in rdx:rax. 5661 // 5662 // rsp ---> [ ?? ] \ \ 5663 // .... | y_rsp_offset | 5664 // [ y_lo ] / (in bytes) | x_rsp_offset 5665 // [ y_hi ] | (in bytes) 5666 // .... | 5667 // [ x_lo ] / 5668 // [ x_hi ] 5669 // .... 5670 // 5671 // Basic idea: lo(result) = lo(x_lo * y_lo) 5672 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5673 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5674 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5675 Label quick; 5676 // load x_hi, y_hi and check if quick 5677 // multiplication is possible 5678 movl(rbx, x_hi); 5679 movl(rcx, y_hi); 5680 movl(rax, rbx); 5681 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5682 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5683 // do full multiplication 5684 // 1st step 5685 mull(y_lo); // x_hi * y_lo 5686 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5687 // 2nd step 5688 movl(rax, x_lo); 5689 mull(rcx); // x_lo * y_hi 5690 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5691 // 3rd step 5692 bind(quick); // note: rbx, = 0 if quick multiply! 5693 movl(rax, x_lo); 5694 mull(y_lo); // x_lo * y_lo 5695 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5696} 5697 5698void MacroAssembler::lneg(Register hi, Register lo) { 5699 negl(lo); 5700 adcl(hi, 0); 5701 negl(hi); 5702} 5703 5704void MacroAssembler::lshl(Register hi, Register lo) { 5705 // Java shift left long support (semantics as described in JVM spec., p.305) 5706 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5707 // shift value is in rcx ! 5708 assert(hi != rcx, "must not use rcx"); 5709 assert(lo != rcx, "must not use rcx"); 5710 const Register s = rcx; // shift count 5711 const int n = BitsPerWord; 5712 Label L; 5713 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5714 cmpl(s, n); // if (s < n) 5715 jcc(Assembler::less, L); // else (s >= n) 5716 movl(hi, lo); // x := x << n 5717 xorl(lo, lo); 5718 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5719 bind(L); // s (mod n) < n 5720 shldl(hi, lo); // x := x << s 5721 shll(lo); 5722} 5723 5724 5725void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5726 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5727 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5728 assert(hi != rcx, "must not use rcx"); 5729 assert(lo != rcx, "must not use rcx"); 5730 const Register s = rcx; // shift count 5731 const int n = BitsPerWord; 5732 Label L; 5733 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5734 cmpl(s, n); // if (s < n) 5735 jcc(Assembler::less, L); // else (s >= n) 5736 movl(lo, hi); // x := x >> n 5737 if (sign_extension) sarl(hi, 31); 5738 else xorl(hi, hi); 5739 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5740 bind(L); // s (mod n) < n 5741 shrdl(lo, hi); // x := x >> s 5742 if (sign_extension) sarl(hi); 5743 else shrl(hi); 5744} 5745 5746void MacroAssembler::movoop(Register dst, jobject obj) { 5747 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5748} 5749 5750void MacroAssembler::movoop(Address dst, jobject obj) { 5751 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5752} 5753 5754void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 5755 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5756} 5757 5758void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 5759 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5760} 5761 5762void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5763 if (src.is_lval()) { 5764 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5765 } else { 5766 movl(dst, as_Address(src)); 5767 } 5768} 5769 5770void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5771 movl(as_Address(dst), src); 5772} 5773 5774void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5775 movl(dst, as_Address(src)); 5776} 5777 5778// src should NEVER be a real pointer. Use AddressLiteral for true pointers 5779void MacroAssembler::movptr(Address dst, intptr_t src) { 5780 movl(dst, src); 5781} 5782 5783 5784void MacroAssembler::pop_callee_saved_registers() { 5785 pop(rcx); 5786 pop(rdx); 5787 pop(rdi); 5788 pop(rsi); 5789} 5790 5791void MacroAssembler::pop_fTOS() { 5792 fld_d(Address(rsp, 0)); 5793 addl(rsp, 2 * wordSize); 5794} 5795 5796void MacroAssembler::push_callee_saved_registers() { 5797 push(rsi); 5798 push(rdi); 5799 push(rdx); 5800 push(rcx); 5801} 5802 5803void MacroAssembler::push_fTOS() { 5804 subl(rsp, 2 * wordSize); 5805 fstp_d(Address(rsp, 0)); 5806} 5807 5808 5809void MacroAssembler::pushoop(jobject obj) { 5810 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5811} 5812 5813void MacroAssembler::pushklass(Metadata* obj) { 5814 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 5815} 5816 5817void MacroAssembler::pushptr(AddressLiteral src) { 5818 if (src.is_lval()) { 5819 push_literal32((int32_t)src.target(), src.rspec()); 5820 } else { 5821 pushl(as_Address(src)); 5822 } 5823} 5824 5825void MacroAssembler::set_word_if_not_zero(Register dst) { 5826 xorl(dst, dst); 5827 set_byte_if_not_zero(dst); 5828} 5829 5830static void pass_arg0(MacroAssembler* masm, Register arg) { 5831 masm->push(arg); 5832} 5833 5834static void pass_arg1(MacroAssembler* masm, Register arg) { 5835 masm->push(arg); 5836} 5837 5838static void pass_arg2(MacroAssembler* masm, Register arg) { 5839 masm->push(arg); 5840} 5841 5842static void pass_arg3(MacroAssembler* masm, Register arg) { 5843 masm->push(arg); 5844} 5845 5846#ifndef PRODUCT 5847extern "C" void findpc(intptr_t x); 5848#endif 5849 5850void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5851 // In order to get locks to work, we need to fake a in_VM state 5852 JavaThread* thread = JavaThread::current(); 5853 JavaThreadState saved_state = thread->thread_state(); 5854 thread->set_thread_state(_thread_in_vm); 5855 if (ShowMessageBoxOnError) { 5856 JavaThread* thread = JavaThread::current(); 5857 JavaThreadState saved_state = thread->thread_state(); 5858 thread->set_thread_state(_thread_in_vm); 5859 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5860 ttyLocker ttyl; 5861 BytecodeCounter::print(); 5862 } 5863 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5864 // This is the value of eip which points to where verify_oop will return. 5865 if (os::message_box(msg, "Execution stopped, print registers?")) { 5866 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 5867 BREAKPOINT; 5868 } 5869 } else { 5870 ttyLocker ttyl; 5871 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5872 } 5873 // Don't assert holding the ttyLock 5874 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5875 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5876} 5877 5878void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 5879 ttyLocker ttyl; 5880 FlagSetting fs(Debugging, true); 5881 tty->print_cr("eip = 0x%08x", eip); 5882#ifndef PRODUCT 5883 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5884 tty->cr(); 5885 findpc(eip); 5886 tty->cr(); 5887 } 5888#endif 5889#define PRINT_REG(rax) \ 5890 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 5891 PRINT_REG(rax); 5892 PRINT_REG(rbx); 5893 PRINT_REG(rcx); 5894 PRINT_REG(rdx); 5895 PRINT_REG(rdi); 5896 PRINT_REG(rsi); 5897 PRINT_REG(rbp); 5898 PRINT_REG(rsp); 5899#undef PRINT_REG 5900 // Print some words near top of staack. 5901 int* dump_sp = (int*) rsp; 5902 for (int col1 = 0; col1 < 8; col1++) { 5903 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 5904 os::print_location(tty, *dump_sp++); 5905 } 5906 for (int row = 0; row < 16; row++) { 5907 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 5908 for (int col = 0; col < 8; col++) { 5909 tty->print(" 0x%08x", *dump_sp++); 5910 } 5911 tty->cr(); 5912 } 5913 // Print some instructions around pc: 5914 Disassembler::decode((address)eip-64, (address)eip); 5915 tty->print_cr("--------"); 5916 Disassembler::decode((address)eip, (address)eip+32); 5917} 5918 5919void MacroAssembler::stop(const char* msg) { 5920 ExternalAddress message((address)msg); 5921 // push address of message 5922 pushptr(message.addr()); 5923 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5924 pusha(); // push registers 5925 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5926 hlt(); 5927} 5928 5929void MacroAssembler::warn(const char* msg) { 5930 push_CPU_state(); 5931 5932 ExternalAddress message((address) msg); 5933 // push address of message 5934 pushptr(message.addr()); 5935 5936 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5937 addl(rsp, wordSize); // discard argument 5938 pop_CPU_state(); 5939} 5940 5941void MacroAssembler::print_state() { 5942 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5943 pusha(); // push registers 5944 5945 push_CPU_state(); 5946 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 5947 pop_CPU_state(); 5948 5949 popa(); 5950 addl(rsp, wordSize); 5951} 5952 5953#else // _LP64 5954 5955// 64 bit versions 5956 5957Address MacroAssembler::as_Address(AddressLiteral adr) { 5958 // amd64 always does this as a pc-rel 5959 // we can be absolute or disp based on the instruction type 5960 // jmp/call are displacements others are absolute 5961 assert(!adr.is_lval(), "must be rval"); 5962 assert(reachable(adr), "must be"); 5963 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5964 5965} 5966 5967Address MacroAssembler::as_Address(ArrayAddress adr) { 5968 AddressLiteral base = adr.base(); 5969 lea(rscratch1, base); 5970 Address index = adr.index(); 5971 assert(index._disp == 0, "must not have disp"); // maybe it can? 5972 Address array(rscratch1, index._index, index._scale, index._disp); 5973 return array; 5974} 5975 5976int MacroAssembler::biased_locking_enter(Register lock_reg, 5977 Register obj_reg, 5978 Register swap_reg, 5979 Register tmp_reg, 5980 bool swap_reg_contains_mark, 5981 Label& done, 5982 Label* slow_case, 5983 BiasedLockingCounters* counters) { 5984 assert(UseBiasedLocking, "why call this otherwise?"); 5985 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5986 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5987 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5988 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5989 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5990 Address saved_mark_addr(lock_reg, 0); 5991 5992 if (PrintBiasedLockingStatistics && counters == NULL) 5993 counters = BiasedLocking::counters(); 5994 5995 // Biased locking 5996 // See whether the lock is currently biased toward our thread and 5997 // whether the epoch is still valid 5998 // Note that the runtime guarantees sufficient alignment of JavaThread 5999 // pointers to allow age to be placed into low bits 6000 // First check to see whether biasing is even enabled for this object 6001 Label cas_label; 6002 int null_check_offset = -1; 6003 if (!swap_reg_contains_mark) { 6004 null_check_offset = offset(); 6005 movq(swap_reg, mark_addr); 6006 } 6007 movq(tmp_reg, swap_reg); 6008 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6009 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 6010 jcc(Assembler::notEqual, cas_label); 6011 // The bias pattern is present in the object's header. Need to check 6012 // whether the bias owner and the epoch are both still current. 6013 load_prototype_header(tmp_reg, obj_reg); 6014 orq(tmp_reg, r15_thread); 6015 xorq(tmp_reg, swap_reg); 6016 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 6017 if (counters != NULL) { 6018 cond_inc32(Assembler::zero, 6019 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6020 } 6021 jcc(Assembler::equal, done); 6022 6023 Label try_revoke_bias; 6024 Label try_rebias; 6025 6026 // At this point we know that the header has the bias pattern and 6027 // that we are not the bias owner in the current epoch. We need to 6028 // figure out more details about the state of the header in order to 6029 // know what operations can be legally performed on the object's 6030 // header. 6031 6032 // If the low three bits in the xor result aren't clear, that means 6033 // the prototype header is no longer biased and we have to revoke 6034 // the bias on this object. 6035 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6036 jcc(Assembler::notZero, try_revoke_bias); 6037 6038 // Biasing is still enabled for this data type. See whether the 6039 // epoch of the current bias is still valid, meaning that the epoch 6040 // bits of the mark word are equal to the epoch bits of the 6041 // prototype header. (Note that the prototype header's epoch bits 6042 // only change at a safepoint.) If not, attempt to rebias the object 6043 // toward the current thread. Note that we must be absolutely sure 6044 // that the current epoch is invalid in order to do this because 6045 // otherwise the manipulations it performs on the mark word are 6046 // illegal. 6047 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 6048 jcc(Assembler::notZero, try_rebias); 6049 6050 // The epoch of the current bias is still valid but we know nothing 6051 // about the owner; it might be set or it might be clear. Try to 6052 // acquire the bias of the object using an atomic operation. If this 6053 // fails we will go in to the runtime to revoke the object's bias. 6054 // Note that we first construct the presumed unbiased header so we 6055 // don't accidentally blow away another thread's valid bias. 6056 andq(swap_reg, 6057 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 6058 movq(tmp_reg, swap_reg); 6059 orq(tmp_reg, r15_thread); 6060 if (os::is_MP()) { 6061 lock(); 6062 } 6063 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6064 // If the biasing toward our thread failed, this means that 6065 // another thread succeeded in biasing it toward itself and we 6066 // need to revoke that bias. The revocation will occur in the 6067 // interpreter runtime in the slow case. 6068 if (counters != NULL) { 6069 cond_inc32(Assembler::zero, 6070 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6071 } 6072 if (slow_case != NULL) { 6073 jcc(Assembler::notZero, *slow_case); 6074 } 6075 jmp(done); 6076 6077 bind(try_rebias); 6078 // At this point we know the epoch has expired, meaning that the 6079 // current "bias owner", if any, is actually invalid. Under these 6080 // circumstances _only_, we are allowed to use the current header's 6081 // value as the comparison value when doing the cas to acquire the 6082 // bias in the current epoch. In other words, we allow transfer of 6083 // the bias from one thread to another directly in this situation. 6084 // 6085 // FIXME: due to a lack of registers we currently blow away the age 6086 // bits in this situation. Should attempt to preserve them. 6087 load_prototype_header(tmp_reg, obj_reg); 6088 orq(tmp_reg, r15_thread); 6089 if (os::is_MP()) { 6090 lock(); 6091 } 6092 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6093 // If the biasing toward our thread failed, then another thread 6094 // succeeded in biasing it toward itself and we need to revoke that 6095 // bias. The revocation will occur in the runtime in the slow case. 6096 if (counters != NULL) { 6097 cond_inc32(Assembler::zero, 6098 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 6099 } 6100 if (slow_case != NULL) { 6101 jcc(Assembler::notZero, *slow_case); 6102 } 6103 jmp(done); 6104 6105 bind(try_revoke_bias); 6106 // The prototype mark in the klass doesn't have the bias bit set any 6107 // more, indicating that objects of this data type are not supposed 6108 // to be biased any more. We are going to try to reset the mark of 6109 // this object to the prototype value and fall through to the 6110 // CAS-based locking scheme. Note that if our CAS fails, it means 6111 // that another thread raced us for the privilege of revoking the 6112 // bias of this particular object, so it's okay to continue in the 6113 // normal locking code. 6114 // 6115 // FIXME: due to a lack of registers we currently blow away the age 6116 // bits in this situation. Should attempt to preserve them. 6117 load_prototype_header(tmp_reg, obj_reg); 6118 if (os::is_MP()) { 6119 lock(); 6120 } 6121 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6122 // Fall through to the normal CAS-based lock, because no matter what 6123 // the result of the above CAS, some thread must have succeeded in 6124 // removing the bias bit from the object's header. 6125 if (counters != NULL) { 6126 cond_inc32(Assembler::zero, 6127 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 6128 } 6129 6130 bind(cas_label); 6131 6132 return null_check_offset; 6133} 6134 6135void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 6136 Label L, E; 6137 6138#ifdef _WIN64 6139 // Windows always allocates space for it's register args 6140 assert(num_args <= 4, "only register arguments supported"); 6141 subq(rsp, frame::arg_reg_save_area_bytes); 6142#endif 6143 6144 // Align stack if necessary 6145 testl(rsp, 15); 6146 jcc(Assembler::zero, L); 6147 6148 subq(rsp, 8); 6149 { 6150 call(RuntimeAddress(entry_point)); 6151 } 6152 addq(rsp, 8); 6153 jmp(E); 6154 6155 bind(L); 6156 { 6157 call(RuntimeAddress(entry_point)); 6158 } 6159 6160 bind(E); 6161 6162#ifdef _WIN64 6163 // restore stack pointer 6164 addq(rsp, frame::arg_reg_save_area_bytes); 6165#endif 6166 6167} 6168 6169void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 6170 assert(!src2.is_lval(), "should use cmpptr"); 6171 6172 if (reachable(src2)) { 6173 cmpq(src1, as_Address(src2)); 6174 } else { 6175 lea(rscratch1, src2); 6176 Assembler::cmpq(src1, Address(rscratch1, 0)); 6177 } 6178} 6179 6180int MacroAssembler::corrected_idivq(Register reg) { 6181 // Full implementation of Java ldiv and lrem; checks for special 6182 // case as described in JVM spec., p.243 & p.271. The function 6183 // returns the (pc) offset of the idivl instruction - may be needed 6184 // for implicit exceptions. 6185 // 6186 // normal case special case 6187 // 6188 // input : rax: dividend min_long 6189 // reg: divisor (may not be eax/edx) -1 6190 // 6191 // output: rax: quotient (= rax idiv reg) min_long 6192 // rdx: remainder (= rax irem reg) 0 6193 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 6194 static const int64_t min_long = 0x8000000000000000; 6195 Label normal_case, special_case; 6196 6197 // check for special case 6198 cmp64(rax, ExternalAddress((address) &min_long)); 6199 jcc(Assembler::notEqual, normal_case); 6200 xorl(rdx, rdx); // prepare rdx for possible special case (where 6201 // remainder = 0) 6202 cmpq(reg, -1); 6203 jcc(Assembler::equal, special_case); 6204 6205 // handle normal case 6206 bind(normal_case); 6207 cdqq(); 6208 int idivq_offset = offset(); 6209 idivq(reg); 6210 6211 // normal and special case exit 6212 bind(special_case); 6213 6214 return idivq_offset; 6215} 6216 6217void MacroAssembler::decrementq(Register reg, int value) { 6218 if (value == min_jint) { subq(reg, value); return; } 6219 if (value < 0) { incrementq(reg, -value); return; } 6220 if (value == 0) { ; return; } 6221 if (value == 1 && UseIncDec) { decq(reg) ; return; } 6222 /* else */ { subq(reg, value) ; return; } 6223} 6224 6225void MacroAssembler::decrementq(Address dst, int value) { 6226 if (value == min_jint) { subq(dst, value); return; } 6227 if (value < 0) { incrementq(dst, -value); return; } 6228 if (value == 0) { ; return; } 6229 if (value == 1 && UseIncDec) { decq(dst) ; return; } 6230 /* else */ { subq(dst, value) ; return; } 6231} 6232 6233void MacroAssembler::incrementq(Register reg, int value) { 6234 if (value == min_jint) { addq(reg, value); return; } 6235 if (value < 0) { decrementq(reg, -value); return; } 6236 if (value == 0) { ; return; } 6237 if (value == 1 && UseIncDec) { incq(reg) ; return; } 6238 /* else */ { addq(reg, value) ; return; } 6239} 6240 6241void MacroAssembler::incrementq(Address dst, int value) { 6242 if (value == min_jint) { addq(dst, value); return; } 6243 if (value < 0) { decrementq(dst, -value); return; } 6244 if (value == 0) { ; return; } 6245 if (value == 1 && UseIncDec) { incq(dst) ; return; } 6246 /* else */ { addq(dst, value) ; return; } 6247} 6248 6249// 32bit can do a case table jump in one instruction but we no longer allow the base 6250// to be installed in the Address class 6251void MacroAssembler::jump(ArrayAddress entry) { 6252 lea(rscratch1, entry.base()); 6253 Address dispatch = entry.index(); 6254 assert(dispatch._base == noreg, "must be"); 6255 dispatch._base = rscratch1; 6256 jmp(dispatch); 6257} 6258 6259void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 6260 ShouldNotReachHere(); // 64bit doesn't use two regs 6261 cmpq(x_lo, y_lo); 6262} 6263 6264void MacroAssembler::lea(Register dst, AddressLiteral src) { 6265 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6266} 6267 6268void MacroAssembler::lea(Address dst, AddressLiteral adr) { 6269 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 6270 movptr(dst, rscratch1); 6271} 6272 6273void MacroAssembler::leave() { 6274 // %%% is this really better? Why not on 32bit too? 6275 emit_byte(0xC9); // LEAVE 6276} 6277 6278void MacroAssembler::lneg(Register hi, Register lo) { 6279 ShouldNotReachHere(); // 64bit doesn't use two regs 6280 negq(lo); 6281} 6282 6283void MacroAssembler::movoop(Register dst, jobject obj) { 6284 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6285} 6286 6287void MacroAssembler::movoop(Address dst, jobject obj) { 6288 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6289 movq(dst, rscratch1); 6290} 6291 6292void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 6293 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6294} 6295 6296void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 6297 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6298 movq(dst, rscratch1); 6299} 6300 6301void MacroAssembler::movptr(Register dst, AddressLiteral src) { 6302 if (src.is_lval()) { 6303 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6304 } else { 6305 if (reachable(src)) { 6306 movq(dst, as_Address(src)); 6307 } else { 6308 lea(rscratch1, src); 6309 movq(dst, Address(rscratch1,0)); 6310 } 6311 } 6312} 6313 6314void MacroAssembler::movptr(ArrayAddress dst, Register src) { 6315 movq(as_Address(dst), src); 6316} 6317 6318void MacroAssembler::movptr(Register dst, ArrayAddress src) { 6319 movq(dst, as_Address(src)); 6320} 6321 6322// src should NEVER be a real pointer. Use AddressLiteral for true pointers 6323void MacroAssembler::movptr(Address dst, intptr_t src) { 6324 mov64(rscratch1, src); 6325 movq(dst, rscratch1); 6326} 6327 6328// These are mostly for initializing NULL 6329void MacroAssembler::movptr(Address dst, int32_t src) { 6330 movslq(dst, src); 6331} 6332 6333void MacroAssembler::movptr(Register dst, int32_t src) { 6334 mov64(dst, (intptr_t)src); 6335} 6336 6337void MacroAssembler::pushoop(jobject obj) { 6338 movoop(rscratch1, obj); 6339 push(rscratch1); 6340} 6341 6342void MacroAssembler::pushklass(Metadata* obj) { 6343 mov_metadata(rscratch1, obj); 6344 push(rscratch1); 6345} 6346 6347void MacroAssembler::pushptr(AddressLiteral src) { 6348 lea(rscratch1, src); 6349 if (src.is_lval()) { 6350 push(rscratch1); 6351 } else { 6352 pushq(Address(rscratch1, 0)); 6353 } 6354} 6355 6356void MacroAssembler::reset_last_Java_frame(bool clear_fp, 6357 bool clear_pc) { 6358 // we must set sp to zero to clear frame 6359 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6360 // must clear fp, so that compiled frames are not confused; it is 6361 // possible that we need it only for debugging 6362 if (clear_fp) { 6363 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6364 } 6365 6366 if (clear_pc) { 6367 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6368 } 6369} 6370 6371void MacroAssembler::set_last_Java_frame(Register last_java_sp, 6372 Register last_java_fp, 6373 address last_java_pc) { 6374 // determine last_java_sp register 6375 if (!last_java_sp->is_valid()) { 6376 last_java_sp = rsp; 6377 } 6378 6379 // last_java_fp is optional 6380 if (last_java_fp->is_valid()) { 6381 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 6382 last_java_fp); 6383 } 6384 6385 // last_java_pc is optional 6386 if (last_java_pc != NULL) { 6387 Address java_pc(r15_thread, 6388 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 6389 lea(rscratch1, InternalAddress(last_java_pc)); 6390 movptr(java_pc, rscratch1); 6391 } 6392 6393 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6394} 6395 6396static void pass_arg0(MacroAssembler* masm, Register arg) { 6397 if (c_rarg0 != arg ) { 6398 masm->mov(c_rarg0, arg); 6399 } 6400} 6401 6402static void pass_arg1(MacroAssembler* masm, Register arg) { 6403 if (c_rarg1 != arg ) { 6404 masm->mov(c_rarg1, arg); 6405 } 6406} 6407 6408static void pass_arg2(MacroAssembler* masm, Register arg) { 6409 if (c_rarg2 != arg ) { 6410 masm->mov(c_rarg2, arg); 6411 } 6412} 6413 6414static void pass_arg3(MacroAssembler* masm, Register arg) { 6415 if (c_rarg3 != arg ) { 6416 masm->mov(c_rarg3, arg); 6417 } 6418} 6419 6420void MacroAssembler::stop(const char* msg) { 6421 address rip = pc(); 6422 pusha(); // get regs on stack 6423 lea(c_rarg0, ExternalAddress((address) msg)); 6424 lea(c_rarg1, InternalAddress(rip)); 6425 movq(c_rarg2, rsp); // pass pointer to regs array 6426 andq(rsp, -16); // align stack as required by ABI 6427 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 6428 hlt(); 6429} 6430 6431void MacroAssembler::warn(const char* msg) { 6432 push(rbp); 6433 movq(rbp, rsp); 6434 andq(rsp, -16); // align stack as required by push_CPU_state and call 6435 push_CPU_state(); // keeps alignment at 16 bytes 6436 lea(c_rarg0, ExternalAddress((address) msg)); 6437 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 6438 pop_CPU_state(); 6439 mov(rsp, rbp); 6440 pop(rbp); 6441} 6442 6443void MacroAssembler::print_state() { 6444 address rip = pc(); 6445 pusha(); // get regs on stack 6446 push(rbp); 6447 movq(rbp, rsp); 6448 andq(rsp, -16); // align stack as required by push_CPU_state and call 6449 push_CPU_state(); // keeps alignment at 16 bytes 6450 6451 lea(c_rarg0, InternalAddress(rip)); 6452 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 6453 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 6454 6455 pop_CPU_state(); 6456 mov(rsp, rbp); 6457 pop(rbp); 6458 popa(); 6459} 6460 6461#ifndef PRODUCT 6462extern "C" void findpc(intptr_t x); 6463#endif 6464 6465void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 6466 // In order to get locks to work, we need to fake a in_VM state 6467 if (ShowMessageBoxOnError) { 6468 JavaThread* thread = JavaThread::current(); 6469 JavaThreadState saved_state = thread->thread_state(); 6470 thread->set_thread_state(_thread_in_vm); 6471#ifndef PRODUCT 6472 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 6473 ttyLocker ttyl; 6474 BytecodeCounter::print(); 6475 } 6476#endif 6477 // To see where a verify_oop failed, get $ebx+40/X for this frame. 6478 // XXX correct this offset for amd64 6479 // This is the value of eip which points to where verify_oop will return. 6480 if (os::message_box(msg, "Execution stopped, print registers?")) { 6481 print_state64(pc, regs); 6482 BREAKPOINT; 6483 assert(false, "start up GDB"); 6484 } 6485 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 6486 } else { 6487 ttyLocker ttyl; 6488 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 6489 msg); 6490 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 6491 } 6492} 6493 6494void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 6495 ttyLocker ttyl; 6496 FlagSetting fs(Debugging, true); 6497 tty->print_cr("rip = 0x%016lx", pc); 6498#ifndef PRODUCT 6499 tty->cr(); 6500 findpc(pc); 6501 tty->cr(); 6502#endif 6503#define PRINT_REG(rax, value) \ 6504 { tty->print("%s = ", #rax); os::print_location(tty, value); } 6505 PRINT_REG(rax, regs[15]); 6506 PRINT_REG(rbx, regs[12]); 6507 PRINT_REG(rcx, regs[14]); 6508 PRINT_REG(rdx, regs[13]); 6509 PRINT_REG(rdi, regs[8]); 6510 PRINT_REG(rsi, regs[9]); 6511 PRINT_REG(rbp, regs[10]); 6512 PRINT_REG(rsp, regs[11]); 6513 PRINT_REG(r8 , regs[7]); 6514 PRINT_REG(r9 , regs[6]); 6515 PRINT_REG(r10, regs[5]); 6516 PRINT_REG(r11, regs[4]); 6517 PRINT_REG(r12, regs[3]); 6518 PRINT_REG(r13, regs[2]); 6519 PRINT_REG(r14, regs[1]); 6520 PRINT_REG(r15, regs[0]); 6521#undef PRINT_REG 6522 // Print some words near top of staack. 6523 int64_t* rsp = (int64_t*) regs[11]; 6524 int64_t* dump_sp = rsp; 6525 for (int col1 = 0; col1 < 8; col1++) { 6526 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6527 os::print_location(tty, *dump_sp++); 6528 } 6529 for (int row = 0; row < 25; row++) { 6530 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6531 for (int col = 0; col < 4; col++) { 6532 tty->print(" 0x%016lx", *dump_sp++); 6533 } 6534 tty->cr(); 6535 } 6536 // Print some instructions around pc: 6537 Disassembler::decode((address)pc-64, (address)pc); 6538 tty->print_cr("--------"); 6539 Disassembler::decode((address)pc, (address)pc+32); 6540} 6541 6542#endif // _LP64 6543 6544// Now versions that are common to 32/64 bit 6545 6546void MacroAssembler::addptr(Register dst, int32_t imm32) { 6547 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 6548} 6549 6550void MacroAssembler::addptr(Register dst, Register src) { 6551 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6552} 6553 6554void MacroAssembler::addptr(Address dst, Register src) { 6555 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6556} 6557 6558void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6559 if (reachable(src)) { 6560 Assembler::addsd(dst, as_Address(src)); 6561 } else { 6562 lea(rscratch1, src); 6563 Assembler::addsd(dst, Address(rscratch1, 0)); 6564 } 6565} 6566 6567void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6568 if (reachable(src)) { 6569 addss(dst, as_Address(src)); 6570 } else { 6571 lea(rscratch1, src); 6572 addss(dst, Address(rscratch1, 0)); 6573 } 6574} 6575 6576void MacroAssembler::align(int modulus) { 6577 if (offset() % modulus != 0) { 6578 nop(modulus - (offset() % modulus)); 6579 } 6580} 6581 6582void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6583 // Used in sign-masking with aligned address. 6584 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6585 if (reachable(src)) { 6586 Assembler::andpd(dst, as_Address(src)); 6587 } else { 6588 lea(rscratch1, src); 6589 Assembler::andpd(dst, Address(rscratch1, 0)); 6590 } 6591} 6592 6593void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6594 // Used in sign-masking with aligned address. 6595 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6596 if (reachable(src)) { 6597 Assembler::andps(dst, as_Address(src)); 6598 } else { 6599 lea(rscratch1, src); 6600 Assembler::andps(dst, Address(rscratch1, 0)); 6601 } 6602} 6603 6604void MacroAssembler::andptr(Register dst, int32_t imm32) { 6605 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6606} 6607 6608void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6609 pushf(); 6610 if (os::is_MP()) 6611 lock(); 6612 incrementl(counter_addr); 6613 popf(); 6614} 6615 6616// Writes to stack successive pages until offset reached to check for 6617// stack overflow + shadow pages. This clobbers tmp. 6618void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6619 movptr(tmp, rsp); 6620 // Bang stack for total size given plus shadow page size. 6621 // Bang one page at a time because large size can bang beyond yellow and 6622 // red zones. 6623 Label loop; 6624 bind(loop); 6625 movl(Address(tmp, (-os::vm_page_size())), size ); 6626 subptr(tmp, os::vm_page_size()); 6627 subl(size, os::vm_page_size()); 6628 jcc(Assembler::greater, loop); 6629 6630 // Bang down shadow pages too. 6631 // The -1 because we already subtracted 1 page. 6632 for (int i = 0; i< StackShadowPages-1; i++) { 6633 // this could be any sized move but this is can be a debugging crumb 6634 // so the bigger the better. 6635 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6636 } 6637} 6638 6639void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6640 assert(UseBiasedLocking, "why call this otherwise?"); 6641 6642 // Check for biased locking unlock case, which is a no-op 6643 // Note: we do not have to check the thread ID for two reasons. 6644 // First, the interpreter checks for IllegalMonitorStateException at 6645 // a higher level. Second, if the bias was revoked while we held the 6646 // lock, the object could not be rebiased toward another thread, so 6647 // the bias bit would be clear. 6648 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6649 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6650 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6651 jcc(Assembler::equal, done); 6652} 6653 6654void MacroAssembler::c2bool(Register x) { 6655 // implements x == 0 ? 0 : 1 6656 // note: must only look at least-significant byte of x 6657 // since C-style booleans are stored in one byte 6658 // only! (was bug) 6659 andl(x, 0xFF); 6660 setb(Assembler::notZero, x); 6661} 6662 6663// Wouldn't need if AddressLiteral version had new name 6664void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6665 Assembler::call(L, rtype); 6666} 6667 6668void MacroAssembler::call(Register entry) { 6669 Assembler::call(entry); 6670} 6671 6672void MacroAssembler::call(AddressLiteral entry) { 6673 if (reachable(entry)) { 6674 Assembler::call_literal(entry.target(), entry.rspec()); 6675 } else { 6676 lea(rscratch1, entry); 6677 Assembler::call(rscratch1); 6678 } 6679} 6680 6681void MacroAssembler::ic_call(address entry) { 6682 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 6683 movptr(rax, (intptr_t)Universe::non_oop_word()); 6684 call(AddressLiteral(entry, rh)); 6685} 6686 6687// Implementation of call_VM versions 6688 6689void MacroAssembler::call_VM(Register oop_result, 6690 address entry_point, 6691 bool check_exceptions) { 6692 Label C, E; 6693 call(C, relocInfo::none); 6694 jmp(E); 6695 6696 bind(C); 6697 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6698 ret(0); 6699 6700 bind(E); 6701} 6702 6703void MacroAssembler::call_VM(Register oop_result, 6704 address entry_point, 6705 Register arg_1, 6706 bool check_exceptions) { 6707 Label C, E; 6708 call(C, relocInfo::none); 6709 jmp(E); 6710 6711 bind(C); 6712 pass_arg1(this, arg_1); 6713 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6714 ret(0); 6715 6716 bind(E); 6717} 6718 6719void MacroAssembler::call_VM(Register oop_result, 6720 address entry_point, 6721 Register arg_1, 6722 Register arg_2, 6723 bool check_exceptions) { 6724 Label C, E; 6725 call(C, relocInfo::none); 6726 jmp(E); 6727 6728 bind(C); 6729 6730 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6731 6732 pass_arg2(this, arg_2); 6733 pass_arg1(this, arg_1); 6734 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6735 ret(0); 6736 6737 bind(E); 6738} 6739 6740void MacroAssembler::call_VM(Register oop_result, 6741 address entry_point, 6742 Register arg_1, 6743 Register arg_2, 6744 Register arg_3, 6745 bool check_exceptions) { 6746 Label C, E; 6747 call(C, relocInfo::none); 6748 jmp(E); 6749 6750 bind(C); 6751 6752 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6753 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6754 pass_arg3(this, arg_3); 6755 6756 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6757 pass_arg2(this, arg_2); 6758 6759 pass_arg1(this, arg_1); 6760 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6761 ret(0); 6762 6763 bind(E); 6764} 6765 6766void MacroAssembler::call_VM(Register oop_result, 6767 Register last_java_sp, 6768 address entry_point, 6769 int number_of_arguments, 6770 bool check_exceptions) { 6771 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6772 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6773} 6774 6775void MacroAssembler::call_VM(Register oop_result, 6776 Register last_java_sp, 6777 address entry_point, 6778 Register arg_1, 6779 bool check_exceptions) { 6780 pass_arg1(this, arg_1); 6781 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6782} 6783 6784void MacroAssembler::call_VM(Register oop_result, 6785 Register last_java_sp, 6786 address entry_point, 6787 Register arg_1, 6788 Register arg_2, 6789 bool check_exceptions) { 6790 6791 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6792 pass_arg2(this, arg_2); 6793 pass_arg1(this, arg_1); 6794 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6795} 6796 6797void MacroAssembler::call_VM(Register oop_result, 6798 Register last_java_sp, 6799 address entry_point, 6800 Register arg_1, 6801 Register arg_2, 6802 Register arg_3, 6803 bool check_exceptions) { 6804 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6805 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6806 pass_arg3(this, arg_3); 6807 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6808 pass_arg2(this, arg_2); 6809 pass_arg1(this, arg_1); 6810 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6811} 6812 6813void MacroAssembler::super_call_VM(Register oop_result, 6814 Register last_java_sp, 6815 address entry_point, 6816 int number_of_arguments, 6817 bool check_exceptions) { 6818 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6819 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6820} 6821 6822void MacroAssembler::super_call_VM(Register oop_result, 6823 Register last_java_sp, 6824 address entry_point, 6825 Register arg_1, 6826 bool check_exceptions) { 6827 pass_arg1(this, arg_1); 6828 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6829} 6830 6831void MacroAssembler::super_call_VM(Register oop_result, 6832 Register last_java_sp, 6833 address entry_point, 6834 Register arg_1, 6835 Register arg_2, 6836 bool check_exceptions) { 6837 6838 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6839 pass_arg2(this, arg_2); 6840 pass_arg1(this, arg_1); 6841 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6842} 6843 6844void MacroAssembler::super_call_VM(Register oop_result, 6845 Register last_java_sp, 6846 address entry_point, 6847 Register arg_1, 6848 Register arg_2, 6849 Register arg_3, 6850 bool check_exceptions) { 6851 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6852 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6853 pass_arg3(this, arg_3); 6854 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6855 pass_arg2(this, arg_2); 6856 pass_arg1(this, arg_1); 6857 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6858} 6859 6860void MacroAssembler::call_VM_base(Register oop_result, 6861 Register java_thread, 6862 Register last_java_sp, 6863 address entry_point, 6864 int number_of_arguments, 6865 bool check_exceptions) { 6866 // determine java_thread register 6867 if (!java_thread->is_valid()) { 6868#ifdef _LP64 6869 java_thread = r15_thread; 6870#else 6871 java_thread = rdi; 6872 get_thread(java_thread); 6873#endif // LP64 6874 } 6875 // determine last_java_sp register 6876 if (!last_java_sp->is_valid()) { 6877 last_java_sp = rsp; 6878 } 6879 // debugging support 6880 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6881 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6882#ifdef ASSERT 6883 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6884 // r12 is the heapbase. 6885 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");) 6886#endif // ASSERT 6887 6888 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6889 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6890 6891 // push java thread (becomes first argument of C function) 6892 6893 NOT_LP64(push(java_thread); number_of_arguments++); 6894 LP64_ONLY(mov(c_rarg0, r15_thread)); 6895 6896 // set last Java frame before call 6897 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6898 6899 // Only interpreter should have to set fp 6900 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6901 6902 // do the call, remove parameters 6903 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6904 6905 // restore the thread (cannot use the pushed argument since arguments 6906 // may be overwritten by C code generated by an optimizing compiler); 6907 // however can use the register value directly if it is callee saved. 6908 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6909 // rdi & rsi (also r15) are callee saved -> nothing to do 6910#ifdef ASSERT 6911 guarantee(java_thread != rax, "change this code"); 6912 push(rax); 6913 { Label L; 6914 get_thread(rax); 6915 cmpptr(java_thread, rax); 6916 jcc(Assembler::equal, L); 6917 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 6918 bind(L); 6919 } 6920 pop(rax); 6921#endif 6922 } else { 6923 get_thread(java_thread); 6924 } 6925 // reset last Java frame 6926 // Only interpreter should have to clear fp 6927 reset_last_Java_frame(java_thread, true, false); 6928 6929#ifndef CC_INTERP 6930 // C++ interp handles this in the interpreter 6931 check_and_handle_popframe(java_thread); 6932 check_and_handle_earlyret(java_thread); 6933#endif /* CC_INTERP */ 6934 6935 if (check_exceptions) { 6936 // check for pending exceptions (java_thread is set upon return) 6937 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6938#ifndef _LP64 6939 jump_cc(Assembler::notEqual, 6940 RuntimeAddress(StubRoutines::forward_exception_entry())); 6941#else 6942 // This used to conditionally jump to forward_exception however it is 6943 // possible if we relocate that the branch will not reach. So we must jump 6944 // around so we can always reach 6945 6946 Label ok; 6947 jcc(Assembler::equal, ok); 6948 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 6949 bind(ok); 6950#endif // LP64 6951 } 6952 6953 // get oop result if there is one and reset the value in the thread 6954 if (oop_result->is_valid()) { 6955 get_vm_result(oop_result, java_thread); 6956 } 6957} 6958 6959void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 6960 6961 // Calculate the value for last_Java_sp 6962 // somewhat subtle. call_VM does an intermediate call 6963 // which places a return address on the stack just under the 6964 // stack pointer as the user finsihed with it. This allows 6965 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6966 // On 32bit we then have to push additional args on the stack to accomplish 6967 // the actual requested call. On 64bit call_VM only can use register args 6968 // so the only extra space is the return address that call_VM created. 6969 // This hopefully explains the calculations here. 6970 6971#ifdef _LP64 6972 // We've pushed one address, correct last_Java_sp 6973 lea(rax, Address(rsp, wordSize)); 6974#else 6975 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6976#endif // LP64 6977 6978 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6979 6980} 6981 6982void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6983 call_VM_leaf_base(entry_point, number_of_arguments); 6984} 6985 6986void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6987 pass_arg0(this, arg_0); 6988 call_VM_leaf(entry_point, 1); 6989} 6990 6991void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6992 6993 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6994 pass_arg1(this, arg_1); 6995 pass_arg0(this, arg_0); 6996 call_VM_leaf(entry_point, 2); 6997} 6998 6999void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7000 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7001 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7002 pass_arg2(this, arg_2); 7003 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7004 pass_arg1(this, arg_1); 7005 pass_arg0(this, arg_0); 7006 call_VM_leaf(entry_point, 3); 7007} 7008 7009void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 7010 pass_arg0(this, arg_0); 7011 MacroAssembler::call_VM_leaf_base(entry_point, 1); 7012} 7013 7014void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7015 7016 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7017 pass_arg1(this, arg_1); 7018 pass_arg0(this, arg_0); 7019 MacroAssembler::call_VM_leaf_base(entry_point, 2); 7020} 7021 7022void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7023 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7024 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7025 pass_arg2(this, arg_2); 7026 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7027 pass_arg1(this, arg_1); 7028 pass_arg0(this, arg_0); 7029 MacroAssembler::call_VM_leaf_base(entry_point, 3); 7030} 7031 7032void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 7033 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 7034 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 7035 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 7036 pass_arg3(this, arg_3); 7037 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7038 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7039 pass_arg2(this, arg_2); 7040 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7041 pass_arg1(this, arg_1); 7042 pass_arg0(this, arg_0); 7043 MacroAssembler::call_VM_leaf_base(entry_point, 4); 7044} 7045 7046void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 7047 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 7048 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 7049 verify_oop(oop_result, "broken oop in call_VM_base"); 7050} 7051 7052void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 7053 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 7054 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 7055} 7056 7057void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 7058} 7059 7060void MacroAssembler::check_and_handle_popframe(Register java_thread) { 7061} 7062 7063void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 7064 if (reachable(src1)) { 7065 cmpl(as_Address(src1), imm); 7066 } else { 7067 lea(rscratch1, src1); 7068 cmpl(Address(rscratch1, 0), imm); 7069 } 7070} 7071 7072void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 7073 assert(!src2.is_lval(), "use cmpptr"); 7074 if (reachable(src2)) { 7075 cmpl(src1, as_Address(src2)); 7076 } else { 7077 lea(rscratch1, src2); 7078 cmpl(src1, Address(rscratch1, 0)); 7079 } 7080} 7081 7082void MacroAssembler::cmp32(Register src1, int32_t imm) { 7083 Assembler::cmpl(src1, imm); 7084} 7085 7086void MacroAssembler::cmp32(Register src1, Address src2) { 7087 Assembler::cmpl(src1, src2); 7088} 7089 7090void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7091 ucomisd(opr1, opr2); 7092 7093 Label L; 7094 if (unordered_is_less) { 7095 movl(dst, -1); 7096 jcc(Assembler::parity, L); 7097 jcc(Assembler::below , L); 7098 movl(dst, 0); 7099 jcc(Assembler::equal , L); 7100 increment(dst); 7101 } else { // unordered is greater 7102 movl(dst, 1); 7103 jcc(Assembler::parity, L); 7104 jcc(Assembler::above , L); 7105 movl(dst, 0); 7106 jcc(Assembler::equal , L); 7107 decrementl(dst); 7108 } 7109 bind(L); 7110} 7111 7112void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7113 ucomiss(opr1, opr2); 7114 7115 Label L; 7116 if (unordered_is_less) { 7117 movl(dst, -1); 7118 jcc(Assembler::parity, L); 7119 jcc(Assembler::below , L); 7120 movl(dst, 0); 7121 jcc(Assembler::equal , L); 7122 increment(dst); 7123 } else { // unordered is greater 7124 movl(dst, 1); 7125 jcc(Assembler::parity, L); 7126 jcc(Assembler::above , L); 7127 movl(dst, 0); 7128 jcc(Assembler::equal , L); 7129 decrementl(dst); 7130 } 7131 bind(L); 7132} 7133 7134 7135void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 7136 if (reachable(src1)) { 7137 cmpb(as_Address(src1), imm); 7138 } else { 7139 lea(rscratch1, src1); 7140 cmpb(Address(rscratch1, 0), imm); 7141 } 7142} 7143 7144void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 7145#ifdef _LP64 7146 if (src2.is_lval()) { 7147 movptr(rscratch1, src2); 7148 Assembler::cmpq(src1, rscratch1); 7149 } else if (reachable(src2)) { 7150 cmpq(src1, as_Address(src2)); 7151 } else { 7152 lea(rscratch1, src2); 7153 Assembler::cmpq(src1, Address(rscratch1, 0)); 7154 } 7155#else 7156 if (src2.is_lval()) { 7157 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7158 } else { 7159 cmpl(src1, as_Address(src2)); 7160 } 7161#endif // _LP64 7162} 7163 7164void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 7165 assert(src2.is_lval(), "not a mem-mem compare"); 7166#ifdef _LP64 7167 // moves src2's literal address 7168 movptr(rscratch1, src2); 7169 Assembler::cmpq(src1, rscratch1); 7170#else 7171 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7172#endif // _LP64 7173} 7174 7175void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 7176 if (reachable(adr)) { 7177 if (os::is_MP()) 7178 lock(); 7179 cmpxchgptr(reg, as_Address(adr)); 7180 } else { 7181 lea(rscratch1, adr); 7182 if (os::is_MP()) 7183 lock(); 7184 cmpxchgptr(reg, Address(rscratch1, 0)); 7185 } 7186} 7187 7188void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 7189 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 7190} 7191 7192void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 7193 if (reachable(src)) { 7194 Assembler::comisd(dst, as_Address(src)); 7195 } else { 7196 lea(rscratch1, src); 7197 Assembler::comisd(dst, Address(rscratch1, 0)); 7198 } 7199} 7200 7201void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 7202 if (reachable(src)) { 7203 Assembler::comiss(dst, as_Address(src)); 7204 } else { 7205 lea(rscratch1, src); 7206 Assembler::comiss(dst, Address(rscratch1, 0)); 7207 } 7208} 7209 7210 7211void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 7212 Condition negated_cond = negate_condition(cond); 7213 Label L; 7214 jcc(negated_cond, L); 7215 atomic_incl(counter_addr); 7216 bind(L); 7217} 7218 7219int MacroAssembler::corrected_idivl(Register reg) { 7220 // Full implementation of Java idiv and irem; checks for 7221 // special case as described in JVM spec., p.243 & p.271. 7222 // The function returns the (pc) offset of the idivl 7223 // instruction - may be needed for implicit exceptions. 7224 // 7225 // normal case special case 7226 // 7227 // input : rax,: dividend min_int 7228 // reg: divisor (may not be rax,/rdx) -1 7229 // 7230 // output: rax,: quotient (= rax, idiv reg) min_int 7231 // rdx: remainder (= rax, irem reg) 0 7232 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 7233 const int min_int = 0x80000000; 7234 Label normal_case, special_case; 7235 7236 // check for special case 7237 cmpl(rax, min_int); 7238 jcc(Assembler::notEqual, normal_case); 7239 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 7240 cmpl(reg, -1); 7241 jcc(Assembler::equal, special_case); 7242 7243 // handle normal case 7244 bind(normal_case); 7245 cdql(); 7246 int idivl_offset = offset(); 7247 idivl(reg); 7248 7249 // normal and special case exit 7250 bind(special_case); 7251 7252 return idivl_offset; 7253} 7254 7255 7256 7257void MacroAssembler::decrementl(Register reg, int value) { 7258 if (value == min_jint) {subl(reg, value) ; return; } 7259 if (value < 0) { incrementl(reg, -value); return; } 7260 if (value == 0) { ; return; } 7261 if (value == 1 && UseIncDec) { decl(reg) ; return; } 7262 /* else */ { subl(reg, value) ; return; } 7263} 7264 7265void MacroAssembler::decrementl(Address dst, int value) { 7266 if (value == min_jint) {subl(dst, value) ; return; } 7267 if (value < 0) { incrementl(dst, -value); return; } 7268 if (value == 0) { ; return; } 7269 if (value == 1 && UseIncDec) { decl(dst) ; return; } 7270 /* else */ { subl(dst, value) ; return; } 7271} 7272 7273void MacroAssembler::division_with_shift (Register reg, int shift_value) { 7274 assert (shift_value > 0, "illegal shift value"); 7275 Label _is_positive; 7276 testl (reg, reg); 7277 jcc (Assembler::positive, _is_positive); 7278 int offset = (1 << shift_value) - 1 ; 7279 7280 if (offset == 1) { 7281 incrementl(reg); 7282 } else { 7283 addl(reg, offset); 7284 } 7285 7286 bind (_is_positive); 7287 sarl(reg, shift_value); 7288} 7289 7290void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 7291 if (reachable(src)) { 7292 Assembler::divsd(dst, as_Address(src)); 7293 } else { 7294 lea(rscratch1, src); 7295 Assembler::divsd(dst, Address(rscratch1, 0)); 7296 } 7297} 7298 7299void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 7300 if (reachable(src)) { 7301 Assembler::divss(dst, as_Address(src)); 7302 } else { 7303 lea(rscratch1, src); 7304 Assembler::divss(dst, Address(rscratch1, 0)); 7305 } 7306} 7307 7308// !defined(COMPILER2) is because of stupid core builds 7309#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 7310void MacroAssembler::empty_FPU_stack() { 7311 if (VM_Version::supports_mmx()) { 7312 emms(); 7313 } else { 7314 for (int i = 8; i-- > 0; ) ffree(i); 7315 } 7316} 7317#endif // !LP64 || C1 || !C2 7318 7319 7320// Defines obj, preserves var_size_in_bytes 7321void MacroAssembler::eden_allocate(Register obj, 7322 Register var_size_in_bytes, 7323 int con_size_in_bytes, 7324 Register t1, 7325 Label& slow_case) { 7326 assert(obj == rax, "obj must be in rax, for cmpxchg"); 7327 assert_different_registers(obj, var_size_in_bytes, t1); 7328 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7329 jmp(slow_case); 7330 } else { 7331 Register end = t1; 7332 Label retry; 7333 bind(retry); 7334 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 7335 movptr(obj, heap_top); 7336 if (var_size_in_bytes == noreg) { 7337 lea(end, Address(obj, con_size_in_bytes)); 7338 } else { 7339 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7340 } 7341 // if end < obj then we wrapped around => object too long => slow case 7342 cmpptr(end, obj); 7343 jcc(Assembler::below, slow_case); 7344 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 7345 jcc(Assembler::above, slow_case); 7346 // Compare obj with the top addr, and if still equal, store the new top addr in 7347 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 7348 // it otherwise. Use lock prefix for atomicity on MPs. 7349 locked_cmpxchgptr(end, heap_top); 7350 jcc(Assembler::notEqual, retry); 7351 } 7352} 7353 7354void MacroAssembler::enter() { 7355 push(rbp); 7356 mov(rbp, rsp); 7357} 7358 7359// A 5 byte nop that is safe for patching (see patch_verified_entry) 7360void MacroAssembler::fat_nop() { 7361 if (UseAddressNop) { 7362 addr_nop_5(); 7363 } else { 7364 emit_byte(0x26); // es: 7365 emit_byte(0x2e); // cs: 7366 emit_byte(0x64); // fs: 7367 emit_byte(0x65); // gs: 7368 emit_byte(0x90); 7369 } 7370} 7371 7372void MacroAssembler::fcmp(Register tmp) { 7373 fcmp(tmp, 1, true, true); 7374} 7375 7376void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 7377 assert(!pop_right || pop_left, "usage error"); 7378 if (VM_Version::supports_cmov()) { 7379 assert(tmp == noreg, "unneeded temp"); 7380 if (pop_left) { 7381 fucomip(index); 7382 } else { 7383 fucomi(index); 7384 } 7385 if (pop_right) { 7386 fpop(); 7387 } 7388 } else { 7389 assert(tmp != noreg, "need temp"); 7390 if (pop_left) { 7391 if (pop_right) { 7392 fcompp(); 7393 } else { 7394 fcomp(index); 7395 } 7396 } else { 7397 fcom(index); 7398 } 7399 // convert FPU condition into eflags condition via rax, 7400 save_rax(tmp); 7401 fwait(); fnstsw_ax(); 7402 sahf(); 7403 restore_rax(tmp); 7404 } 7405 // condition codes set as follows: 7406 // 7407 // CF (corresponds to C0) if x < y 7408 // PF (corresponds to C2) if unordered 7409 // ZF (corresponds to C3) if x = y 7410} 7411 7412void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 7413 fcmp2int(dst, unordered_is_less, 1, true, true); 7414} 7415 7416void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 7417 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 7418 Label L; 7419 if (unordered_is_less) { 7420 movl(dst, -1); 7421 jcc(Assembler::parity, L); 7422 jcc(Assembler::below , L); 7423 movl(dst, 0); 7424 jcc(Assembler::equal , L); 7425 increment(dst); 7426 } else { // unordered is greater 7427 movl(dst, 1); 7428 jcc(Assembler::parity, L); 7429 jcc(Assembler::above , L); 7430 movl(dst, 0); 7431 jcc(Assembler::equal , L); 7432 decrementl(dst); 7433 } 7434 bind(L); 7435} 7436 7437void MacroAssembler::fld_d(AddressLiteral src) { 7438 fld_d(as_Address(src)); 7439} 7440 7441void MacroAssembler::fld_s(AddressLiteral src) { 7442 fld_s(as_Address(src)); 7443} 7444 7445void MacroAssembler::fld_x(AddressLiteral src) { 7446 Assembler::fld_x(as_Address(src)); 7447} 7448 7449void MacroAssembler::fldcw(AddressLiteral src) { 7450 Assembler::fldcw(as_Address(src)); 7451} 7452 7453void MacroAssembler::pow_exp_core_encoding() { 7454 // kills rax, rcx, rdx 7455 subptr(rsp,sizeof(jdouble)); 7456 // computes 2^X. Stack: X ... 7457 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 7458 // keep it on the thread's stack to compute 2^int(X) later 7459 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 7460 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 7461 fld_s(0); // Stack: X X ... 7462 frndint(); // Stack: int(X) X ... 7463 fsuba(1); // Stack: int(X) X-int(X) ... 7464 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 7465 f2xm1(); // Stack: 2^(X-int(X))-1 ... 7466 fld1(); // Stack: 1 2^(X-int(X))-1 ... 7467 faddp(1); // Stack: 2^(X-int(X)) 7468 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 7469 // shift int(X)+1023 to exponent position. 7470 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 7471 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 7472 // values so detect them and set result to NaN. 7473 movl(rax,Address(rsp,0)); 7474 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 7475 addl(rax, 1023); 7476 movl(rdx,rax); 7477 shll(rax,20); 7478 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 7479 addl(rdx,1); 7480 // Check that 1 < int(X)+1023+1 < 2048 7481 // in 3 steps: 7482 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 7483 // 2- (int(X)+1023+1)&-2048 != 0 7484 // 3- (int(X)+1023+1)&-2048 != 1 7485 // Do 2- first because addl just updated the flags. 7486 cmov32(Assembler::equal,rax,rcx); 7487 cmpl(rdx,1); 7488 cmov32(Assembler::equal,rax,rcx); 7489 testl(rdx,rcx); 7490 cmov32(Assembler::notEqual,rax,rcx); 7491 movl(Address(rsp,4),rax); 7492 movl(Address(rsp,0),0); 7493 fmul_d(Address(rsp,0)); // Stack: 2^X ... 7494 addptr(rsp,sizeof(jdouble)); 7495} 7496 7497void MacroAssembler::increase_precision() { 7498 subptr(rsp, BytesPerWord); 7499 fnstcw(Address(rsp, 0)); 7500 movl(rax, Address(rsp, 0)); 7501 orl(rax, 0x300); 7502 push(rax); 7503 fldcw(Address(rsp, 0)); 7504 pop(rax); 7505} 7506 7507void MacroAssembler::restore_precision() { 7508 fldcw(Address(rsp, 0)); 7509 addptr(rsp, BytesPerWord); 7510} 7511 7512void MacroAssembler::fast_pow() { 7513 // computes X^Y = 2^(Y * log2(X)) 7514 // if fast computation is not possible, result is NaN. Requires 7515 // fallback from user of this macro. 7516 // increase precision for intermediate steps of the computation 7517 increase_precision(); 7518 fyl2x(); // Stack: (Y*log2(X)) ... 7519 pow_exp_core_encoding(); // Stack: exp(X) ... 7520 restore_precision(); 7521} 7522 7523void MacroAssembler::fast_exp() { 7524 // computes exp(X) = 2^(X * log2(e)) 7525 // if fast computation is not possible, result is NaN. Requires 7526 // fallback from user of this macro. 7527 // increase precision for intermediate steps of the computation 7528 increase_precision(); 7529 fldl2e(); // Stack: log2(e) X ... 7530 fmulp(1); // Stack: (X*log2(e)) ... 7531 pow_exp_core_encoding(); // Stack: exp(X) ... 7532 restore_precision(); 7533} 7534 7535void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 7536 // kills rax, rcx, rdx 7537 // pow and exp needs 2 extra registers on the fpu stack. 7538 Label slow_case, done; 7539 Register tmp = noreg; 7540 if (!VM_Version::supports_cmov()) { 7541 // fcmp needs a temporary so preserve rdx, 7542 tmp = rdx; 7543 } 7544 Register tmp2 = rax; 7545 Register tmp3 = rcx; 7546 7547 if (is_exp) { 7548 // Stack: X 7549 fld_s(0); // duplicate argument for runtime call. Stack: X X 7550 fast_exp(); // Stack: exp(X) X 7551 fcmp(tmp, 0, false, false); // Stack: exp(X) X 7552 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 7553 jcc(Assembler::parity, slow_case); 7554 // get rid of duplicate argument. Stack: exp(X) 7555 if (num_fpu_regs_in_use > 0) { 7556 fxch(); 7557 fpop(); 7558 } else { 7559 ffree(1); 7560 } 7561 jmp(done); 7562 } else { 7563 // Stack: X Y 7564 Label x_negative, y_odd; 7565 7566 fldz(); // Stack: 0 X Y 7567 fcmp(tmp, 1, true, false); // Stack: X Y 7568 jcc(Assembler::above, x_negative); 7569 7570 // X >= 0 7571 7572 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7573 fld_s(1); // Stack: X Y X Y 7574 fast_pow(); // Stack: X^Y X Y 7575 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 7576 // X^Y not equal to itself: X^Y is NaN go to slow case. 7577 jcc(Assembler::parity, slow_case); 7578 // get rid of duplicate arguments. Stack: X^Y 7579 if (num_fpu_regs_in_use > 0) { 7580 fxch(); fpop(); 7581 fxch(); fpop(); 7582 } else { 7583 ffree(2); 7584 ffree(1); 7585 } 7586 jmp(done); 7587 7588 // X <= 0 7589 bind(x_negative); 7590 7591 fld_s(1); // Stack: Y X Y 7592 frndint(); // Stack: int(Y) X Y 7593 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7594 jcc(Assembler::notEqual, slow_case); 7595 7596 subptr(rsp, 8); 7597 7598 // For X^Y, when X < 0, Y has to be an integer and the final 7599 // result depends on whether it's odd or even. We just checked 7600 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 7601 // integer to test its parity. If int(Y) is huge and doesn't fit 7602 // in the 64 bit integer range, the integer indefinite value will 7603 // end up in the gp registers. Huge numbers are all even, the 7604 // integer indefinite number is even so it's fine. 7605 7606#ifdef ASSERT 7607 // Let's check we don't end up with an integer indefinite number 7608 // when not expected. First test for huge numbers: check whether 7609 // int(Y)+1 == int(Y) which is true for very large numbers and 7610 // those are all even. A 64 bit integer is guaranteed to not 7611 // overflow for numbers where y+1 != y (when precision is set to 7612 // double precision). 7613 Label y_not_huge; 7614 7615 fld1(); // Stack: 1 int(Y) X Y 7616 fadd(1); // Stack: 1+int(Y) int(Y) X Y 7617 7618#ifdef _LP64 7619 // trip to memory to force the precision down from double extended 7620 // precision 7621 fstp_d(Address(rsp, 0)); 7622 fld_d(Address(rsp, 0)); 7623#endif 7624 7625 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 7626#endif 7627 7628 // move int(Y) as 64 bit integer to thread's stack 7629 fistp_d(Address(rsp,0)); // Stack: X Y 7630 7631#ifdef ASSERT 7632 jcc(Assembler::notEqual, y_not_huge); 7633 7634 // Y is huge so we know it's even. It may not fit in a 64 bit 7635 // integer and we don't want the debug code below to see the 7636 // integer indefinite value so overwrite int(Y) on the thread's 7637 // stack with 0. 7638 movl(Address(rsp, 0), 0); 7639 movl(Address(rsp, 4), 0); 7640 7641 bind(y_not_huge); 7642#endif 7643 7644 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7645 fld_s(1); // Stack: X Y X Y 7646 fabs(); // Stack: abs(X) Y X Y 7647 fast_pow(); // Stack: abs(X)^Y X Y 7648 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7649 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7650 7651 pop(tmp2); 7652 NOT_LP64(pop(tmp3)); 7653 jcc(Assembler::parity, slow_case); 7654 7655#ifdef ASSERT 7656 // Check that int(Y) is not integer indefinite value (int 7657 // overflow). Shouldn't happen because for values that would 7658 // overflow, 1+int(Y)==Y which was tested earlier. 7659#ifndef _LP64 7660 { 7661 Label integer; 7662 testl(tmp2, tmp2); 7663 jcc(Assembler::notZero, integer); 7664 cmpl(tmp3, 0x80000000); 7665 jcc(Assembler::notZero, integer); 7666 STOP("integer indefinite value shouldn't be seen here"); 7667 bind(integer); 7668 } 7669#else 7670 { 7671 Label integer; 7672 mov(tmp3, tmp2); // preserve tmp2 for parity check below 7673 shlq(tmp3, 1); 7674 jcc(Assembler::carryClear, integer); 7675 jcc(Assembler::notZero, integer); 7676 STOP("integer indefinite value shouldn't be seen here"); 7677 bind(integer); 7678 } 7679#endif 7680#endif 7681 7682 // get rid of duplicate arguments. Stack: X^Y 7683 if (num_fpu_regs_in_use > 0) { 7684 fxch(); fpop(); 7685 fxch(); fpop(); 7686 } else { 7687 ffree(2); 7688 ffree(1); 7689 } 7690 7691 testl(tmp2, 1); 7692 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7693 // X <= 0, Y even: X^Y = -abs(X)^Y 7694 7695 fchs(); // Stack: -abs(X)^Y Y 7696 jmp(done); 7697 } 7698 7699 // slow case: runtime call 7700 bind(slow_case); 7701 7702 fpop(); // pop incorrect result or int(Y) 7703 7704 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7705 is_exp ? 1 : 2, num_fpu_regs_in_use); 7706 7707 // Come here with result in F-TOS 7708 bind(done); 7709} 7710 7711void MacroAssembler::fpop() { 7712 ffree(); 7713 fincstp(); 7714} 7715 7716void MacroAssembler::fremr(Register tmp) { 7717 save_rax(tmp); 7718 { Label L; 7719 bind(L); 7720 fprem(); 7721 fwait(); fnstsw_ax(); 7722#ifdef _LP64 7723 testl(rax, 0x400); 7724 jcc(Assembler::notEqual, L); 7725#else 7726 sahf(); 7727 jcc(Assembler::parity, L); 7728#endif // _LP64 7729 } 7730 restore_rax(tmp); 7731 // Result is in ST0. 7732 // Note: fxch & fpop to get rid of ST1 7733 // (otherwise FPU stack could overflow eventually) 7734 fxch(1); 7735 fpop(); 7736} 7737 7738 7739void MacroAssembler::incrementl(AddressLiteral dst) { 7740 if (reachable(dst)) { 7741 incrementl(as_Address(dst)); 7742 } else { 7743 lea(rscratch1, dst); 7744 incrementl(Address(rscratch1, 0)); 7745 } 7746} 7747 7748void MacroAssembler::incrementl(ArrayAddress dst) { 7749 incrementl(as_Address(dst)); 7750} 7751 7752void MacroAssembler::incrementl(Register reg, int value) { 7753 if (value == min_jint) {addl(reg, value) ; return; } 7754 if (value < 0) { decrementl(reg, -value); return; } 7755 if (value == 0) { ; return; } 7756 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7757 /* else */ { addl(reg, value) ; return; } 7758} 7759 7760void MacroAssembler::incrementl(Address dst, int value) { 7761 if (value == min_jint) {addl(dst, value) ; return; } 7762 if (value < 0) { decrementl(dst, -value); return; } 7763 if (value == 0) { ; return; } 7764 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7765 /* else */ { addl(dst, value) ; return; } 7766} 7767 7768void MacroAssembler::jump(AddressLiteral dst) { 7769 if (reachable(dst)) { 7770 jmp_literal(dst.target(), dst.rspec()); 7771 } else { 7772 lea(rscratch1, dst); 7773 jmp(rscratch1); 7774 } 7775} 7776 7777void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7778 if (reachable(dst)) { 7779 InstructionMark im(this); 7780 relocate(dst.reloc()); 7781 const int short_size = 2; 7782 const int long_size = 6; 7783 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7784 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7785 // 0111 tttn #8-bit disp 7786 emit_byte(0x70 | cc); 7787 emit_byte((offs - short_size) & 0xFF); 7788 } else { 7789 // 0000 1111 1000 tttn #32-bit disp 7790 emit_byte(0x0F); 7791 emit_byte(0x80 | cc); 7792 emit_long(offs - long_size); 7793 } 7794 } else { 7795#ifdef ASSERT 7796 warning("reversing conditional branch"); 7797#endif /* ASSERT */ 7798 Label skip; 7799 jccb(reverse[cc], skip); 7800 lea(rscratch1, dst); 7801 Assembler::jmp(rscratch1); 7802 bind(skip); 7803 } 7804} 7805 7806void MacroAssembler::ldmxcsr(AddressLiteral src) { 7807 if (reachable(src)) { 7808 Assembler::ldmxcsr(as_Address(src)); 7809 } else { 7810 lea(rscratch1, src); 7811 Assembler::ldmxcsr(Address(rscratch1, 0)); 7812 } 7813} 7814 7815int MacroAssembler::load_signed_byte(Register dst, Address src) { 7816 int off; 7817 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7818 off = offset(); 7819 movsbl(dst, src); // movsxb 7820 } else { 7821 off = load_unsigned_byte(dst, src); 7822 shll(dst, 24); 7823 sarl(dst, 24); 7824 } 7825 return off; 7826} 7827 7828// Note: load_signed_short used to be called load_signed_word. 7829// Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7830// manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7831// The term "word" in HotSpot means a 32- or 64-bit machine word. 7832int MacroAssembler::load_signed_short(Register dst, Address src) { 7833 int off; 7834 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7835 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7836 // version but this is what 64bit has always done. This seems to imply 7837 // that users are only using 32bits worth. 7838 off = offset(); 7839 movswl(dst, src); // movsxw 7840 } else { 7841 off = load_unsigned_short(dst, src); 7842 shll(dst, 16); 7843 sarl(dst, 16); 7844 } 7845 return off; 7846} 7847 7848int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7849 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7850 // and "3.9 Partial Register Penalties", p. 22). 7851 int off; 7852 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7853 off = offset(); 7854 movzbl(dst, src); // movzxb 7855 } else { 7856 xorl(dst, dst); 7857 off = offset(); 7858 movb(dst, src); 7859 } 7860 return off; 7861} 7862 7863// Note: load_unsigned_short used to be called load_unsigned_word. 7864int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7865 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7866 // and "3.9 Partial Register Penalties", p. 22). 7867 int off; 7868 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7869 off = offset(); 7870 movzwl(dst, src); // movzxw 7871 } else { 7872 xorl(dst, dst); 7873 off = offset(); 7874 movw(dst, src); 7875 } 7876 return off; 7877} 7878 7879void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7880 switch (size_in_bytes) { 7881#ifndef _LP64 7882 case 8: 7883 assert(dst2 != noreg, "second dest register required"); 7884 movl(dst, src); 7885 movl(dst2, src.plus_disp(BytesPerInt)); 7886 break; 7887#else 7888 case 8: movq(dst, src); break; 7889#endif 7890 case 4: movl(dst, src); break; 7891 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7892 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7893 default: ShouldNotReachHere(); 7894 } 7895} 7896 7897void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7898 switch (size_in_bytes) { 7899#ifndef _LP64 7900 case 8: 7901 assert(src2 != noreg, "second source register required"); 7902 movl(dst, src); 7903 movl(dst.plus_disp(BytesPerInt), src2); 7904 break; 7905#else 7906 case 8: movq(dst, src); break; 7907#endif 7908 case 4: movl(dst, src); break; 7909 case 2: movw(dst, src); break; 7910 case 1: movb(dst, src); break; 7911 default: ShouldNotReachHere(); 7912 } 7913} 7914 7915void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7916 if (reachable(dst)) { 7917 movl(as_Address(dst), src); 7918 } else { 7919 lea(rscratch1, dst); 7920 movl(Address(rscratch1, 0), src); 7921 } 7922} 7923 7924void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7925 if (reachable(src)) { 7926 movl(dst, as_Address(src)); 7927 } else { 7928 lea(rscratch1, src); 7929 movl(dst, Address(rscratch1, 0)); 7930 } 7931} 7932 7933// C++ bool manipulation 7934 7935void MacroAssembler::movbool(Register dst, Address src) { 7936 if(sizeof(bool) == 1) 7937 movb(dst, src); 7938 else if(sizeof(bool) == 2) 7939 movw(dst, src); 7940 else if(sizeof(bool) == 4) 7941 movl(dst, src); 7942 else 7943 // unsupported 7944 ShouldNotReachHere(); 7945} 7946 7947void MacroAssembler::movbool(Address dst, bool boolconst) { 7948 if(sizeof(bool) == 1) 7949 movb(dst, (int) boolconst); 7950 else if(sizeof(bool) == 2) 7951 movw(dst, (int) boolconst); 7952 else if(sizeof(bool) == 4) 7953 movl(dst, (int) boolconst); 7954 else 7955 // unsupported 7956 ShouldNotReachHere(); 7957} 7958 7959void MacroAssembler::movbool(Address dst, Register src) { 7960 if(sizeof(bool) == 1) 7961 movb(dst, src); 7962 else if(sizeof(bool) == 2) 7963 movw(dst, src); 7964 else if(sizeof(bool) == 4) 7965 movl(dst, src); 7966 else 7967 // unsupported 7968 ShouldNotReachHere(); 7969} 7970 7971void MacroAssembler::movbyte(ArrayAddress dst, int src) { 7972 movb(as_Address(dst), src); 7973} 7974 7975void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 7976 if (reachable(src)) { 7977 movdl(dst, as_Address(src)); 7978 } else { 7979 lea(rscratch1, src); 7980 movdl(dst, Address(rscratch1, 0)); 7981 } 7982} 7983 7984void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 7985 if (reachable(src)) { 7986 movq(dst, as_Address(src)); 7987 } else { 7988 lea(rscratch1, src); 7989 movq(dst, Address(rscratch1, 0)); 7990 } 7991} 7992 7993void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 7994 if (reachable(src)) { 7995 if (UseXmmLoadAndClearUpper) { 7996 movsd (dst, as_Address(src)); 7997 } else { 7998 movlpd(dst, as_Address(src)); 7999 } 8000 } else { 8001 lea(rscratch1, src); 8002 if (UseXmmLoadAndClearUpper) { 8003 movsd (dst, Address(rscratch1, 0)); 8004 } else { 8005 movlpd(dst, Address(rscratch1, 0)); 8006 } 8007 } 8008} 8009 8010void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 8011 if (reachable(src)) { 8012 movss(dst, as_Address(src)); 8013 } else { 8014 lea(rscratch1, src); 8015 movss(dst, Address(rscratch1, 0)); 8016 } 8017} 8018 8019void MacroAssembler::movptr(Register dst, Register src) { 8020 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8021} 8022 8023void MacroAssembler::movptr(Register dst, Address src) { 8024 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8025} 8026 8027// src should NEVER be a real pointer. Use AddressLiteral for true pointers 8028void MacroAssembler::movptr(Register dst, intptr_t src) { 8029 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 8030} 8031 8032void MacroAssembler::movptr(Address dst, Register src) { 8033 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8034} 8035 8036void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 8037 if (reachable(src)) { 8038 Assembler::movsd(dst, as_Address(src)); 8039 } else { 8040 lea(rscratch1, src); 8041 Assembler::movsd(dst, Address(rscratch1, 0)); 8042 } 8043} 8044 8045void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 8046 if (reachable(src)) { 8047 Assembler::movss(dst, as_Address(src)); 8048 } else { 8049 lea(rscratch1, src); 8050 Assembler::movss(dst, Address(rscratch1, 0)); 8051 } 8052} 8053 8054void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 8055 if (reachable(src)) { 8056 Assembler::mulsd(dst, as_Address(src)); 8057 } else { 8058 lea(rscratch1, src); 8059 Assembler::mulsd(dst, Address(rscratch1, 0)); 8060 } 8061} 8062 8063void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 8064 if (reachable(src)) { 8065 Assembler::mulss(dst, as_Address(src)); 8066 } else { 8067 lea(rscratch1, src); 8068 Assembler::mulss(dst, Address(rscratch1, 0)); 8069 } 8070} 8071 8072void MacroAssembler::null_check(Register reg, int offset) { 8073 if (needs_explicit_null_check(offset)) { 8074 // provoke OS NULL exception if reg = NULL by 8075 // accessing M[reg] w/o changing any (non-CC) registers 8076 // NOTE: cmpl is plenty here to provoke a segv 8077 cmpptr(rax, Address(reg, 0)); 8078 // Note: should probably use testl(rax, Address(reg, 0)); 8079 // may be shorter code (however, this version of 8080 // testl needs to be implemented first) 8081 } else { 8082 // nothing to do, (later) access of M[reg + offset] 8083 // will provoke OS NULL exception if reg = NULL 8084 } 8085} 8086 8087void MacroAssembler::os_breakpoint() { 8088 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 8089 // (e.g., MSVC can't call ps() otherwise) 8090 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 8091} 8092 8093void MacroAssembler::pop_CPU_state() { 8094 pop_FPU_state(); 8095 pop_IU_state(); 8096} 8097 8098void MacroAssembler::pop_FPU_state() { 8099 NOT_LP64(frstor(Address(rsp, 0));) 8100 LP64_ONLY(fxrstor(Address(rsp, 0));) 8101 addptr(rsp, FPUStateSizeInWords * wordSize); 8102} 8103 8104void MacroAssembler::pop_IU_state() { 8105 popa(); 8106 LP64_ONLY(addq(rsp, 8)); 8107 popf(); 8108} 8109 8110// Save Integer and Float state 8111// Warning: Stack must be 16 byte aligned (64bit) 8112void MacroAssembler::push_CPU_state() { 8113 push_IU_state(); 8114 push_FPU_state(); 8115} 8116 8117void MacroAssembler::push_FPU_state() { 8118 subptr(rsp, FPUStateSizeInWords * wordSize); 8119#ifndef _LP64 8120 fnsave(Address(rsp, 0)); 8121 fwait(); 8122#else 8123 fxsave(Address(rsp, 0)); 8124#endif // LP64 8125} 8126 8127void MacroAssembler::push_IU_state() { 8128 // Push flags first because pusha kills them 8129 pushf(); 8130 // Make sure rsp stays 16-byte aligned 8131 LP64_ONLY(subq(rsp, 8)); 8132 pusha(); 8133} 8134 8135void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 8136 // determine java_thread register 8137 if (!java_thread->is_valid()) { 8138 java_thread = rdi; 8139 get_thread(java_thread); 8140 } 8141 // we must set sp to zero to clear frame 8142 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 8143 if (clear_fp) { 8144 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 8145 } 8146 8147 if (clear_pc) 8148 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 8149 8150} 8151 8152void MacroAssembler::restore_rax(Register tmp) { 8153 if (tmp == noreg) pop(rax); 8154 else if (tmp != rax) mov(rax, tmp); 8155} 8156 8157void MacroAssembler::round_to(Register reg, int modulus) { 8158 addptr(reg, modulus - 1); 8159 andptr(reg, -modulus); 8160} 8161 8162void MacroAssembler::save_rax(Register tmp) { 8163 if (tmp == noreg) push(rax); 8164 else if (tmp != rax) mov(tmp, rax); 8165} 8166 8167// Write serialization page so VM thread can do a pseudo remote membar. 8168// We use the current thread pointer to calculate a thread specific 8169// offset to write to within the page. This minimizes bus traffic 8170// due to cache line collision. 8171void MacroAssembler::serialize_memory(Register thread, Register tmp) { 8172 movl(tmp, thread); 8173 shrl(tmp, os::get_serialize_page_shift_count()); 8174 andl(tmp, (os::vm_page_size() - sizeof(int))); 8175 8176 Address index(noreg, tmp, Address::times_1); 8177 ExternalAddress page(os::get_memory_serialize_page()); 8178 8179 // Size of store must match masking code above 8180 movl(as_Address(ArrayAddress(page, index)), tmp); 8181} 8182 8183// Calls to C land 8184// 8185// When entering C land, the rbp, & rsp of the last Java frame have to be recorded 8186// in the (thread-local) JavaThread object. When leaving C land, the last Java fp 8187// has to be reset to 0. This is required to allow proper stack traversal. 8188void MacroAssembler::set_last_Java_frame(Register java_thread, 8189 Register last_java_sp, 8190 Register last_java_fp, 8191 address last_java_pc) { 8192 // determine java_thread register 8193 if (!java_thread->is_valid()) { 8194 java_thread = rdi; 8195 get_thread(java_thread); 8196 } 8197 // determine last_java_sp register 8198 if (!last_java_sp->is_valid()) { 8199 last_java_sp = rsp; 8200 } 8201 8202 // last_java_fp is optional 8203 8204 if (last_java_fp->is_valid()) { 8205 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 8206 } 8207 8208 // last_java_pc is optional 8209 8210 if (last_java_pc != NULL) { 8211 lea(Address(java_thread, 8212 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 8213 InternalAddress(last_java_pc)); 8214 8215 } 8216 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 8217} 8218 8219void MacroAssembler::shlptr(Register dst, int imm8) { 8220 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 8221} 8222 8223void MacroAssembler::shrptr(Register dst, int imm8) { 8224 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 8225} 8226 8227void MacroAssembler::sign_extend_byte(Register reg) { 8228 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 8229 movsbl(reg, reg); // movsxb 8230 } else { 8231 shll(reg, 24); 8232 sarl(reg, 24); 8233 } 8234} 8235 8236void MacroAssembler::sign_extend_short(Register reg) { 8237 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 8238 movswl(reg, reg); // movsxw 8239 } else { 8240 shll(reg, 16); 8241 sarl(reg, 16); 8242 } 8243} 8244 8245void MacroAssembler::testl(Register dst, AddressLiteral src) { 8246 assert(reachable(src), "Address should be reachable"); 8247 testl(dst, as_Address(src)); 8248} 8249 8250void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 8251 if (reachable(src)) { 8252 Assembler::sqrtsd(dst, as_Address(src)); 8253 } else { 8254 lea(rscratch1, src); 8255 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 8256 } 8257} 8258 8259void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 8260 if (reachable(src)) { 8261 Assembler::sqrtss(dst, as_Address(src)); 8262 } else { 8263 lea(rscratch1, src); 8264 Assembler::sqrtss(dst, Address(rscratch1, 0)); 8265 } 8266} 8267 8268void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 8269 if (reachable(src)) { 8270 Assembler::subsd(dst, as_Address(src)); 8271 } else { 8272 lea(rscratch1, src); 8273 Assembler::subsd(dst, Address(rscratch1, 0)); 8274 } 8275} 8276 8277void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 8278 if (reachable(src)) { 8279 Assembler::subss(dst, as_Address(src)); 8280 } else { 8281 lea(rscratch1, src); 8282 Assembler::subss(dst, Address(rscratch1, 0)); 8283 } 8284} 8285 8286void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 8287 if (reachable(src)) { 8288 Assembler::ucomisd(dst, as_Address(src)); 8289 } else { 8290 lea(rscratch1, src); 8291 Assembler::ucomisd(dst, Address(rscratch1, 0)); 8292 } 8293} 8294 8295void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 8296 if (reachable(src)) { 8297 Assembler::ucomiss(dst, as_Address(src)); 8298 } else { 8299 lea(rscratch1, src); 8300 Assembler::ucomiss(dst, Address(rscratch1, 0)); 8301 } 8302} 8303 8304void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 8305 // Used in sign-bit flipping with aligned address. 8306 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8307 if (reachable(src)) { 8308 Assembler::xorpd(dst, as_Address(src)); 8309 } else { 8310 lea(rscratch1, src); 8311 Assembler::xorpd(dst, Address(rscratch1, 0)); 8312 } 8313} 8314 8315void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 8316 // Used in sign-bit flipping with aligned address. 8317 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8318 if (reachable(src)) { 8319 Assembler::xorps(dst, as_Address(src)); 8320 } else { 8321 lea(rscratch1, src); 8322 Assembler::xorps(dst, Address(rscratch1, 0)); 8323 } 8324} 8325 8326// AVX 3-operands instructions 8327 8328void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8329 if (reachable(src)) { 8330 vaddsd(dst, nds, as_Address(src)); 8331 } else { 8332 lea(rscratch1, src); 8333 vaddsd(dst, nds, Address(rscratch1, 0)); 8334 } 8335} 8336 8337void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8338 if (reachable(src)) { 8339 vaddss(dst, nds, as_Address(src)); 8340 } else { 8341 lea(rscratch1, src); 8342 vaddss(dst, nds, Address(rscratch1, 0)); 8343 } 8344} 8345 8346void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8347 if (reachable(src)) { 8348 vandpd(dst, nds, as_Address(src), vector256); 8349 } else { 8350 lea(rscratch1, src); 8351 vandpd(dst, nds, Address(rscratch1, 0), vector256); 8352 } 8353} 8354 8355void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8356 if (reachable(src)) { 8357 vandps(dst, nds, as_Address(src), vector256); 8358 } else { 8359 lea(rscratch1, src); 8360 vandps(dst, nds, Address(rscratch1, 0), vector256); 8361 } 8362} 8363 8364void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8365 if (reachable(src)) { 8366 vdivsd(dst, nds, as_Address(src)); 8367 } else { 8368 lea(rscratch1, src); 8369 vdivsd(dst, nds, Address(rscratch1, 0)); 8370 } 8371} 8372 8373void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8374 if (reachable(src)) { 8375 vdivss(dst, nds, as_Address(src)); 8376 } else { 8377 lea(rscratch1, src); 8378 vdivss(dst, nds, Address(rscratch1, 0)); 8379 } 8380} 8381 8382void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8383 if (reachable(src)) { 8384 vmulsd(dst, nds, as_Address(src)); 8385 } else { 8386 lea(rscratch1, src); 8387 vmulsd(dst, nds, Address(rscratch1, 0)); 8388 } 8389} 8390 8391void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8392 if (reachable(src)) { 8393 vmulss(dst, nds, as_Address(src)); 8394 } else { 8395 lea(rscratch1, src); 8396 vmulss(dst, nds, Address(rscratch1, 0)); 8397 } 8398} 8399 8400void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8401 if (reachable(src)) { 8402 vsubsd(dst, nds, as_Address(src)); 8403 } else { 8404 lea(rscratch1, src); 8405 vsubsd(dst, nds, Address(rscratch1, 0)); 8406 } 8407} 8408 8409void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8410 if (reachable(src)) { 8411 vsubss(dst, nds, as_Address(src)); 8412 } else { 8413 lea(rscratch1, src); 8414 vsubss(dst, nds, Address(rscratch1, 0)); 8415 } 8416} 8417 8418void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8419 if (reachable(src)) { 8420 vxorpd(dst, nds, as_Address(src), vector256); 8421 } else { 8422 lea(rscratch1, src); 8423 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 8424 } 8425} 8426 8427void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8428 if (reachable(src)) { 8429 vxorps(dst, nds, as_Address(src), vector256); 8430 } else { 8431 lea(rscratch1, src); 8432 vxorps(dst, nds, Address(rscratch1, 0), vector256); 8433 } 8434} 8435 8436 8437////////////////////////////////////////////////////////////////////////////////// 8438#ifndef SERIALGC 8439 8440void MacroAssembler::g1_write_barrier_pre(Register obj, 8441 Register pre_val, 8442 Register thread, 8443 Register tmp, 8444 bool tosca_live, 8445 bool expand_call) { 8446 8447 // If expand_call is true then we expand the call_VM_leaf macro 8448 // directly to skip generating the check by 8449 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 8450 8451#ifdef _LP64 8452 assert(thread == r15_thread, "must be"); 8453#endif // _LP64 8454 8455 Label done; 8456 Label runtime; 8457 8458 assert(pre_val != noreg, "check this code"); 8459 8460 if (obj != noreg) { 8461 assert_different_registers(obj, pre_val, tmp); 8462 assert(pre_val != rax, "check this code"); 8463 } 8464 8465 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8466 PtrQueue::byte_offset_of_active())); 8467 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8468 PtrQueue::byte_offset_of_index())); 8469 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8470 PtrQueue::byte_offset_of_buf())); 8471 8472 8473 // Is marking active? 8474 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 8475 cmpl(in_progress, 0); 8476 } else { 8477 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 8478 cmpb(in_progress, 0); 8479 } 8480 jcc(Assembler::equal, done); 8481 8482 // Do we need to load the previous value? 8483 if (obj != noreg) { 8484 load_heap_oop(pre_val, Address(obj, 0)); 8485 } 8486 8487 // Is the previous value null? 8488 cmpptr(pre_val, (int32_t) NULL_WORD); 8489 jcc(Assembler::equal, done); 8490 8491 // Can we store original value in the thread's buffer? 8492 // Is index == 0? 8493 // (The index field is typed as size_t.) 8494 8495 movptr(tmp, index); // tmp := *index_adr 8496 cmpptr(tmp, 0); // tmp == 0? 8497 jcc(Assembler::equal, runtime); // If yes, goto runtime 8498 8499 subptr(tmp, wordSize); // tmp := tmp - wordSize 8500 movptr(index, tmp); // *index_adr := tmp 8501 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 8502 8503 // Record the previous value 8504 movptr(Address(tmp, 0), pre_val); 8505 jmp(done); 8506 8507 bind(runtime); 8508 // save the live input values 8509 if(tosca_live) push(rax); 8510 8511 if (obj != noreg && obj != rax) 8512 push(obj); 8513 8514 if (pre_val != rax) 8515 push(pre_val); 8516 8517 // Calling the runtime using the regular call_VM_leaf mechanism generates 8518 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 8519 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 8520 // 8521 // If we care generating the pre-barrier without a frame (e.g. in the 8522 // intrinsified Reference.get() routine) then ebp might be pointing to 8523 // the caller frame and so this check will most likely fail at runtime. 8524 // 8525 // Expanding the call directly bypasses the generation of the check. 8526 // So when we do not have have a full interpreter frame on the stack 8527 // expand_call should be passed true. 8528 8529 NOT_LP64( push(thread); ) 8530 8531 if (expand_call) { 8532 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 8533 pass_arg1(this, thread); 8534 pass_arg0(this, pre_val); 8535 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 8536 } else { 8537 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 8538 } 8539 8540 NOT_LP64( pop(thread); ) 8541 8542 // save the live input values 8543 if (pre_val != rax) 8544 pop(pre_val); 8545 8546 if (obj != noreg && obj != rax) 8547 pop(obj); 8548 8549 if(tosca_live) pop(rax); 8550 8551 bind(done); 8552} 8553 8554void MacroAssembler::g1_write_barrier_post(Register store_addr, 8555 Register new_val, 8556 Register thread, 8557 Register tmp, 8558 Register tmp2) { 8559#ifdef _LP64 8560 assert(thread == r15_thread, "must be"); 8561#endif // _LP64 8562 8563 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8564 PtrQueue::byte_offset_of_index())); 8565 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8566 PtrQueue::byte_offset_of_buf())); 8567 8568 BarrierSet* bs = Universe::heap()->barrier_set(); 8569 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8570 Label done; 8571 Label runtime; 8572 8573 // Does store cross heap regions? 8574 8575 movptr(tmp, store_addr); 8576 xorptr(tmp, new_val); 8577 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 8578 jcc(Assembler::equal, done); 8579 8580 // crosses regions, storing NULL? 8581 8582 cmpptr(new_val, (int32_t) NULL_WORD); 8583 jcc(Assembler::equal, done); 8584 8585 // storing region crossing non-NULL, is card already dirty? 8586 8587 ExternalAddress cardtable((address) ct->byte_map_base); 8588 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8589#ifdef _LP64 8590 const Register card_addr = tmp; 8591 8592 movq(card_addr, store_addr); 8593 shrq(card_addr, CardTableModRefBS::card_shift); 8594 8595 lea(tmp2, cardtable); 8596 8597 // get the address of the card 8598 addq(card_addr, tmp2); 8599#else 8600 const Register card_index = tmp; 8601 8602 movl(card_index, store_addr); 8603 shrl(card_index, CardTableModRefBS::card_shift); 8604 8605 Address index(noreg, card_index, Address::times_1); 8606 const Register card_addr = tmp; 8607 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 8608#endif 8609 cmpb(Address(card_addr, 0), 0); 8610 jcc(Assembler::equal, done); 8611 8612 // storing a region crossing, non-NULL oop, card is clean. 8613 // dirty card and log. 8614 8615 movb(Address(card_addr, 0), 0); 8616 8617 cmpl(queue_index, 0); 8618 jcc(Assembler::equal, runtime); 8619 subl(queue_index, wordSize); 8620 movptr(tmp2, buffer); 8621#ifdef _LP64 8622 movslq(rscratch1, queue_index); 8623 addq(tmp2, rscratch1); 8624 movq(Address(tmp2, 0), card_addr); 8625#else 8626 addl(tmp2, queue_index); 8627 movl(Address(tmp2, 0), card_index); 8628#endif 8629 jmp(done); 8630 8631 bind(runtime); 8632 // save the live input values 8633 push(store_addr); 8634 push(new_val); 8635#ifdef _LP64 8636 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 8637#else 8638 push(thread); 8639 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 8640 pop(thread); 8641#endif 8642 pop(new_val); 8643 pop(store_addr); 8644 8645 bind(done); 8646} 8647 8648#endif // SERIALGC 8649////////////////////////////////////////////////////////////////////////////////// 8650 8651 8652void MacroAssembler::store_check(Register obj) { 8653 // Does a store check for the oop in register obj. The content of 8654 // register obj is destroyed afterwards. 8655 store_check_part_1(obj); 8656 store_check_part_2(obj); 8657} 8658 8659void MacroAssembler::store_check(Register obj, Address dst) { 8660 store_check(obj); 8661} 8662 8663 8664// split the store check operation so that other instructions can be scheduled inbetween 8665void MacroAssembler::store_check_part_1(Register obj) { 8666 BarrierSet* bs = Universe::heap()->barrier_set(); 8667 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8668 shrptr(obj, CardTableModRefBS::card_shift); 8669} 8670 8671void MacroAssembler::store_check_part_2(Register obj) { 8672 BarrierSet* bs = Universe::heap()->barrier_set(); 8673 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8674 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8675 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8676 8677 // The calculation for byte_map_base is as follows: 8678 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8679 // So this essentially converts an address to a displacement and 8680 // it will never need to be relocated. On 64bit however the value may be too 8681 // large for a 32bit displacement 8682 8683 intptr_t disp = (intptr_t) ct->byte_map_base; 8684 if (is_simm32(disp)) { 8685 Address cardtable(noreg, obj, Address::times_1, disp); 8686 movb(cardtable, 0); 8687 } else { 8688 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8689 // displacement and done in a single instruction given favorable mapping and 8690 // a smarter version of as_Address. Worst case it is two instructions which 8691 // is no worse off then loading disp into a register and doing as a simple 8692 // Address() as above. 8693 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8694 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8695 // in some cases we'll get a single instruction version. 8696 8697 ExternalAddress cardtable((address)disp); 8698 Address index(noreg, obj, Address::times_1); 8699 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8700 } 8701} 8702 8703void MacroAssembler::subptr(Register dst, int32_t imm32) { 8704 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8705} 8706 8707// Force generation of a 4 byte immediate value even if it fits into 8bit 8708void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8709 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8710} 8711 8712void MacroAssembler::subptr(Register dst, Register src) { 8713 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8714} 8715 8716// C++ bool manipulation 8717void MacroAssembler::testbool(Register dst) { 8718 if(sizeof(bool) == 1) 8719 testb(dst, 0xff); 8720 else if(sizeof(bool) == 2) { 8721 // testw implementation needed for two byte bools 8722 ShouldNotReachHere(); 8723 } else if(sizeof(bool) == 4) 8724 testl(dst, dst); 8725 else 8726 // unsupported 8727 ShouldNotReachHere(); 8728} 8729 8730void MacroAssembler::testptr(Register dst, Register src) { 8731 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8732} 8733 8734// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8735void MacroAssembler::tlab_allocate(Register obj, 8736 Register var_size_in_bytes, 8737 int con_size_in_bytes, 8738 Register t1, 8739 Register t2, 8740 Label& slow_case) { 8741 assert_different_registers(obj, t1, t2); 8742 assert_different_registers(obj, var_size_in_bytes, t1); 8743 Register end = t2; 8744 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8745 8746 verify_tlab(); 8747 8748 NOT_LP64(get_thread(thread)); 8749 8750 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8751 if (var_size_in_bytes == noreg) { 8752 lea(end, Address(obj, con_size_in_bytes)); 8753 } else { 8754 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8755 } 8756 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8757 jcc(Assembler::above, slow_case); 8758 8759 // update the tlab top pointer 8760 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8761 8762 // recover var_size_in_bytes if necessary 8763 if (var_size_in_bytes == end) { 8764 subptr(var_size_in_bytes, obj); 8765 } 8766 verify_tlab(); 8767} 8768 8769// Preserves rbx, and rdx. 8770Register MacroAssembler::tlab_refill(Label& retry, 8771 Label& try_eden, 8772 Label& slow_case) { 8773 Register top = rax; 8774 Register t1 = rcx; 8775 Register t2 = rsi; 8776 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8777 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8778 Label do_refill, discard_tlab; 8779 8780 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8781 // No allocation in the shared eden. 8782 jmp(slow_case); 8783 } 8784 8785 NOT_LP64(get_thread(thread_reg)); 8786 8787 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8788 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8789 8790 // calculate amount of free space 8791 subptr(t1, top); 8792 shrptr(t1, LogHeapWordSize); 8793 8794 // Retain tlab and allocate object in shared space if 8795 // the amount free in the tlab is too large to discard. 8796 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8797 jcc(Assembler::lessEqual, discard_tlab); 8798 8799 // Retain 8800 // %%% yuck as movptr... 8801 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8802 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8803 if (TLABStats) { 8804 // increment number of slow_allocations 8805 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8806 } 8807 jmp(try_eden); 8808 8809 bind(discard_tlab); 8810 if (TLABStats) { 8811 // increment number of refills 8812 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8813 // accumulate wastage -- t1 is amount free in tlab 8814 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8815 } 8816 8817 // if tlab is currently allocated (top or end != null) then 8818 // fill [top, end + alignment_reserve) with array object 8819 testptr(top, top); 8820 jcc(Assembler::zero, do_refill); 8821 8822 // set up the mark word 8823 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8824 // set the length to the remaining space 8825 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8826 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8827 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8828 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8829 // set klass to intArrayKlass 8830 // dubious reloc why not an oop reloc? 8831 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8832 // store klass last. concurrent gcs assumes klass length is valid if 8833 // klass field is not null. 8834 store_klass(top, t1); 8835 8836 movptr(t1, top); 8837 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8838 incr_allocated_bytes(thread_reg, t1, 0); 8839 8840 // refill the tlab with an eden allocation 8841 bind(do_refill); 8842 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8843 shlptr(t1, LogHeapWordSize); 8844 // allocate new tlab, address returned in top 8845 eden_allocate(top, t1, 0, t2, slow_case); 8846 8847 // Check that t1 was preserved in eden_allocate. 8848#ifdef ASSERT 8849 if (UseTLAB) { 8850 Label ok; 8851 Register tsize = rsi; 8852 assert_different_registers(tsize, thread_reg, t1); 8853 push(tsize); 8854 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8855 shlptr(tsize, LogHeapWordSize); 8856 cmpptr(t1, tsize); 8857 jcc(Assembler::equal, ok); 8858 STOP("assert(t1 != tlab size)"); 8859 should_not_reach_here(); 8860 8861 bind(ok); 8862 pop(tsize); 8863 } 8864#endif 8865 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8866 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8867 addptr(top, t1); 8868 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8869 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8870 verify_tlab(); 8871 jmp(retry); 8872 8873 return thread_reg; // for use by caller 8874} 8875 8876void MacroAssembler::incr_allocated_bytes(Register thread, 8877 Register var_size_in_bytes, 8878 int con_size_in_bytes, 8879 Register t1) { 8880 if (!thread->is_valid()) { 8881#ifdef _LP64 8882 thread = r15_thread; 8883#else 8884 assert(t1->is_valid(), "need temp reg"); 8885 thread = t1; 8886 get_thread(thread); 8887#endif 8888 } 8889 8890#ifdef _LP64 8891 if (var_size_in_bytes->is_valid()) { 8892 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8893 } else { 8894 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8895 } 8896#else 8897 if (var_size_in_bytes->is_valid()) { 8898 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8899 } else { 8900 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8901 } 8902 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8903#endif 8904} 8905 8906void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8907 pusha(); 8908 8909 // if we are coming from c1, xmm registers may be live 8910 if (UseSSE >= 1) { 8911 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8912 } 8913 int off = 0; 8914 if (UseSSE == 1) { 8915 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8916 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8917 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8918 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8919 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8920 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8921 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8922 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8923 } else if (UseSSE >= 2) { 8924 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); 8925 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); 8926 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); 8927 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); 8928 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); 8929 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); 8930 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); 8931 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); 8932#ifdef _LP64 8933 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); 8934 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); 8935 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); 8936 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); 8937 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); 8938 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); 8939 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); 8940 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); 8941#endif 8942 } 8943 8944 // Preserve registers across runtime call 8945 int incoming_argument_and_return_value_offset = -1; 8946 if (num_fpu_regs_in_use > 1) { 8947 // Must preserve all other FPU regs (could alternatively convert 8948 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 8949 // FPU state, but can not trust C compiler) 8950 NEEDS_CLEANUP; 8951 // NOTE that in this case we also push the incoming argument(s) to 8952 // the stack and restore it later; we also use this stack slot to 8953 // hold the return value from dsin, dcos etc. 8954 for (int i = 0; i < num_fpu_regs_in_use; i++) { 8955 subptr(rsp, sizeof(jdouble)); 8956 fstp_d(Address(rsp, 0)); 8957 } 8958 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 8959 for (int i = nb_args-1; i >= 0; i--) { 8960 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 8961 } 8962 } 8963 8964 subptr(rsp, nb_args*sizeof(jdouble)); 8965 for (int i = 0; i < nb_args; i++) { 8966 fstp_d(Address(rsp, i*sizeof(jdouble))); 8967 } 8968 8969#ifdef _LP64 8970 if (nb_args > 0) { 8971 movdbl(xmm0, Address(rsp, 0)); 8972 } 8973 if (nb_args > 1) { 8974 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 8975 } 8976 assert(nb_args <= 2, "unsupported number of args"); 8977#endif // _LP64 8978 8979 // NOTE: we must not use call_VM_leaf here because that requires a 8980 // complete interpreter frame in debug mode -- same bug as 4387334 8981 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 8982 // do proper 64bit abi 8983 8984 NEEDS_CLEANUP; 8985 // Need to add stack banging before this runtime call if it needs to 8986 // be taken; however, there is no generic stack banging routine at 8987 // the MacroAssembler level 8988 8989 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 8990 8991#ifdef _LP64 8992 movsd(Address(rsp, 0), xmm0); 8993 fld_d(Address(rsp, 0)); 8994#endif // _LP64 8995 addptr(rsp, sizeof(jdouble) * nb_args); 8996 if (num_fpu_regs_in_use > 1) { 8997 // Must save return value to stack and then restore entire FPU 8998 // stack except incoming arguments 8999 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 9000 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 9001 fld_d(Address(rsp, 0)); 9002 addptr(rsp, sizeof(jdouble)); 9003 } 9004 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 9005 addptr(rsp, sizeof(jdouble) * nb_args); 9006 } 9007 9008 off = 0; 9009 if (UseSSE == 1) { 9010 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 9011 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 9012 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 9013 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 9014 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 9015 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 9016 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 9017 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 9018 } else if (UseSSE >= 2) { 9019 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); 9020 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); 9021 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); 9022 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); 9023 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); 9024 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); 9025 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); 9026 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); 9027#ifdef _LP64 9028 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); 9029 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); 9030 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); 9031 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); 9032 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); 9033 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); 9034 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); 9035 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); 9036#endif 9037 } 9038 if (UseSSE >= 1) { 9039 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 9040 } 9041 popa(); 9042} 9043 9044static const double pi_4 = 0.7853981633974483; 9045 9046void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 9047 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 9048 // was attempted in this code; unfortunately it appears that the 9049 // switch to 80-bit precision and back causes this to be 9050 // unprofitable compared with simply performing a runtime call if 9051 // the argument is out of the (-pi/4, pi/4) range. 9052 9053 Register tmp = noreg; 9054 if (!VM_Version::supports_cmov()) { 9055 // fcmp needs a temporary so preserve rbx, 9056 tmp = rbx; 9057 push(tmp); 9058 } 9059 9060 Label slow_case, done; 9061 9062 ExternalAddress pi4_adr = (address)&pi_4; 9063 if (reachable(pi4_adr)) { 9064 // x ?<= pi/4 9065 fld_d(pi4_adr); 9066 fld_s(1); // Stack: X PI/4 X 9067 fabs(); // Stack: |X| PI/4 X 9068 fcmp(tmp); 9069 jcc(Assembler::above, slow_case); 9070 9071 // fastest case: -pi/4 <= x <= pi/4 9072 switch(trig) { 9073 case 's': 9074 fsin(); 9075 break; 9076 case 'c': 9077 fcos(); 9078 break; 9079 case 't': 9080 ftan(); 9081 break; 9082 default: 9083 assert(false, "bad intrinsic"); 9084 break; 9085 } 9086 jmp(done); 9087 } 9088 9089 // slow case: runtime call 9090 bind(slow_case); 9091 9092 switch(trig) { 9093 case 's': 9094 { 9095 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 9096 } 9097 break; 9098 case 'c': 9099 { 9100 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 9101 } 9102 break; 9103 case 't': 9104 { 9105 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 9106 } 9107 break; 9108 default: 9109 assert(false, "bad intrinsic"); 9110 break; 9111 } 9112 9113 // Come here with result in F-TOS 9114 bind(done); 9115 9116 if (tmp != noreg) { 9117 pop(tmp); 9118 } 9119} 9120 9121 9122// Look up the method for a megamorphic invokeinterface call. 9123// The target method is determined by <intf_klass, itable_index>. 9124// The receiver klass is in recv_klass. 9125// On success, the result will be in method_result, and execution falls through. 9126// On failure, execution transfers to the given label. 9127void MacroAssembler::lookup_interface_method(Register recv_klass, 9128 Register intf_klass, 9129 RegisterOrConstant itable_index, 9130 Register method_result, 9131 Register scan_temp, 9132 Label& L_no_such_interface) { 9133 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 9134 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 9135 "caller must use same register for non-constant itable index as for method"); 9136 9137 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 9138 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 9139 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 9140 int scan_step = itableOffsetEntry::size() * wordSize; 9141 int vte_size = vtableEntry::size() * wordSize; 9142 Address::ScaleFactor times_vte_scale = Address::times_ptr; 9143 assert(vte_size == wordSize, "else adjust times_vte_scale"); 9144 9145 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 9146 9147 // %%% Could store the aligned, prescaled offset in the klassoop. 9148 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 9149 if (HeapWordsPerLong > 1) { 9150 // Round up to align_object_offset boundary 9151 // see code for InstanceKlass::start_of_itable! 9152 round_to(scan_temp, BytesPerLong); 9153 } 9154 9155 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 9156 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 9157 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 9158 9159 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 9160 // if (scan->interface() == intf) { 9161 // result = (klass + scan->offset() + itable_index); 9162 // } 9163 // } 9164 Label search, found_method; 9165 9166 for (int peel = 1; peel >= 0; peel--) { 9167 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 9168 cmpptr(intf_klass, method_result); 9169 9170 if (peel) { 9171 jccb(Assembler::equal, found_method); 9172 } else { 9173 jccb(Assembler::notEqual, search); 9174 // (invert the test to fall through to found_method...) 9175 } 9176 9177 if (!peel) break; 9178 9179 bind(search); 9180 9181 // Check that the previous entry is non-null. A null entry means that 9182 // the receiver class doesn't implement the interface, and wasn't the 9183 // same as when the caller was compiled. 9184 testptr(method_result, method_result); 9185 jcc(Assembler::zero, L_no_such_interface); 9186 addptr(scan_temp, scan_step); 9187 } 9188 9189 bind(found_method); 9190 9191 // Got a hit. 9192 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 9193 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 9194} 9195 9196 9197// virtual method calling 9198void MacroAssembler::lookup_virtual_method(Register recv_klass, 9199 RegisterOrConstant vtable_index, 9200 Register method_result) { 9201 const int base = InstanceKlass::vtable_start_offset() * wordSize; 9202 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 9203 Address vtable_entry_addr(recv_klass, 9204 vtable_index, Address::times_ptr, 9205 base + vtableEntry::method_offset_in_bytes()); 9206 movptr(method_result, vtable_entry_addr); 9207} 9208 9209 9210void MacroAssembler::check_klass_subtype(Register sub_klass, 9211 Register super_klass, 9212 Register temp_reg, 9213 Label& L_success) { 9214 Label L_failure; 9215 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 9216 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 9217 bind(L_failure); 9218} 9219 9220 9221void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 9222 Register super_klass, 9223 Register temp_reg, 9224 Label* L_success, 9225 Label* L_failure, 9226 Label* L_slow_path, 9227 RegisterOrConstant super_check_offset) { 9228 assert_different_registers(sub_klass, super_klass, temp_reg); 9229 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 9230 if (super_check_offset.is_register()) { 9231 assert_different_registers(sub_klass, super_klass, 9232 super_check_offset.as_register()); 9233 } else if (must_load_sco) { 9234 assert(temp_reg != noreg, "supply either a temp or a register offset"); 9235 } 9236 9237 Label L_fallthrough; 9238 int label_nulls = 0; 9239 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9240 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9241 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 9242 assert(label_nulls <= 1, "at most one NULL in the batch"); 9243 9244 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9245 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 9246 Address super_check_offset_addr(super_klass, sco_offset); 9247 9248 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 9249 // range of a jccb. If this routine grows larger, reconsider at 9250 // least some of these. 9251#define local_jcc(assembler_cond, label) \ 9252 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 9253 else jcc( assembler_cond, label) /*omit semi*/ 9254 9255 // Hacked jmp, which may only be used just before L_fallthrough. 9256#define final_jmp(label) \ 9257 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 9258 else jmp(label) /*omit semi*/ 9259 9260 // If the pointers are equal, we are done (e.g., String[] elements). 9261 // This self-check enables sharing of secondary supertype arrays among 9262 // non-primary types such as array-of-interface. Otherwise, each such 9263 // type would need its own customized SSA. 9264 // We move this check to the front of the fast path because many 9265 // type checks are in fact trivially successful in this manner, 9266 // so we get a nicely predicted branch right at the start of the check. 9267 cmpptr(sub_klass, super_klass); 9268 local_jcc(Assembler::equal, *L_success); 9269 9270 // Check the supertype display: 9271 if (must_load_sco) { 9272 // Positive movl does right thing on LP64. 9273 movl(temp_reg, super_check_offset_addr); 9274 super_check_offset = RegisterOrConstant(temp_reg); 9275 } 9276 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 9277 cmpptr(super_klass, super_check_addr); // load displayed supertype 9278 9279 // This check has worked decisively for primary supers. 9280 // Secondary supers are sought in the super_cache ('super_cache_addr'). 9281 // (Secondary supers are interfaces and very deeply nested subtypes.) 9282 // This works in the same check above because of a tricky aliasing 9283 // between the super_cache and the primary super display elements. 9284 // (The 'super_check_addr' can address either, as the case requires.) 9285 // Note that the cache is updated below if it does not help us find 9286 // what we need immediately. 9287 // So if it was a primary super, we can just fail immediately. 9288 // Otherwise, it's the slow path for us (no success at this point). 9289 9290 if (super_check_offset.is_register()) { 9291 local_jcc(Assembler::equal, *L_success); 9292 cmpl(super_check_offset.as_register(), sc_offset); 9293 if (L_failure == &L_fallthrough) { 9294 local_jcc(Assembler::equal, *L_slow_path); 9295 } else { 9296 local_jcc(Assembler::notEqual, *L_failure); 9297 final_jmp(*L_slow_path); 9298 } 9299 } else if (super_check_offset.as_constant() == sc_offset) { 9300 // Need a slow path; fast failure is impossible. 9301 if (L_slow_path == &L_fallthrough) { 9302 local_jcc(Assembler::equal, *L_success); 9303 } else { 9304 local_jcc(Assembler::notEqual, *L_slow_path); 9305 final_jmp(*L_success); 9306 } 9307 } else { 9308 // No slow path; it's a fast decision. 9309 if (L_failure == &L_fallthrough) { 9310 local_jcc(Assembler::equal, *L_success); 9311 } else { 9312 local_jcc(Assembler::notEqual, *L_failure); 9313 final_jmp(*L_success); 9314 } 9315 } 9316 9317 bind(L_fallthrough); 9318 9319#undef local_jcc 9320#undef final_jmp 9321} 9322 9323 9324void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 9325 Register super_klass, 9326 Register temp_reg, 9327 Register temp2_reg, 9328 Label* L_success, 9329 Label* L_failure, 9330 bool set_cond_codes) { 9331 assert_different_registers(sub_klass, super_klass, temp_reg); 9332 if (temp2_reg != noreg) 9333 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 9334#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 9335 9336 Label L_fallthrough; 9337 int label_nulls = 0; 9338 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9339 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9340 assert(label_nulls <= 1, "at most one NULL in the batch"); 9341 9342 // a couple of useful fields in sub_klass: 9343 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 9344 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9345 Address secondary_supers_addr(sub_klass, ss_offset); 9346 Address super_cache_addr( sub_klass, sc_offset); 9347 9348 // Do a linear scan of the secondary super-klass chain. 9349 // This code is rarely used, so simplicity is a virtue here. 9350 // The repne_scan instruction uses fixed registers, which we must spill. 9351 // Don't worry too much about pre-existing connections with the input regs. 9352 9353 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 9354 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 9355 9356 // Get super_klass value into rax (even if it was in rdi or rcx). 9357 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 9358 if (super_klass != rax || UseCompressedOops) { 9359 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 9360 mov(rax, super_klass); 9361 } 9362 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 9363 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 9364 9365#ifndef PRODUCT 9366 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 9367 ExternalAddress pst_counter_addr((address) pst_counter); 9368 NOT_LP64( incrementl(pst_counter_addr) ); 9369 LP64_ONLY( lea(rcx, pst_counter_addr) ); 9370 LP64_ONLY( incrementl(Address(rcx, 0)) ); 9371#endif //PRODUCT 9372 9373 // We will consult the secondary-super array. 9374 movptr(rdi, secondary_supers_addr); 9375 // Load the array length. (Positive movl does right thing on LP64.) 9376 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 9377 // Skip to start of data. 9378 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 9379 9380 // Scan RCX words at [RDI] for an occurrence of RAX. 9381 // Set NZ/Z based on last compare. 9382 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 9383 // not change flags (only scas instruction which is repeated sets flags). 9384 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 9385 9386 testptr(rax,rax); // Set Z = 0 9387 repne_scan(); 9388 9389 // Unspill the temp. registers: 9390 if (pushed_rdi) pop(rdi); 9391 if (pushed_rcx) pop(rcx); 9392 if (pushed_rax) pop(rax); 9393 9394 if (set_cond_codes) { 9395 // Special hack for the AD files: rdi is guaranteed non-zero. 9396 assert(!pushed_rdi, "rdi must be left non-NULL"); 9397 // Also, the condition codes are properly set Z/NZ on succeed/failure. 9398 } 9399 9400 if (L_failure == &L_fallthrough) 9401 jccb(Assembler::notEqual, *L_failure); 9402 else jcc(Assembler::notEqual, *L_failure); 9403 9404 // Success. Cache the super we found and proceed in triumph. 9405 movptr(super_cache_addr, super_klass); 9406 9407 if (L_success != &L_fallthrough) { 9408 jmp(*L_success); 9409 } 9410 9411#undef IS_A_TEMP 9412 9413 bind(L_fallthrough); 9414} 9415 9416 9417void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 9418 if (VM_Version::supports_cmov()) { 9419 cmovl(cc, dst, src); 9420 } else { 9421 Label L; 9422 jccb(negate_condition(cc), L); 9423 movl(dst, src); 9424 bind(L); 9425 } 9426} 9427 9428void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 9429 if (VM_Version::supports_cmov()) { 9430 cmovl(cc, dst, src); 9431 } else { 9432 Label L; 9433 jccb(negate_condition(cc), L); 9434 movl(dst, src); 9435 bind(L); 9436 } 9437} 9438 9439void MacroAssembler::verify_oop(Register reg, const char* s) { 9440 if (!VerifyOops) return; 9441 9442 // Pass register number to verify_oop_subroutine 9443 char* b = new char[strlen(s) + 50]; 9444 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 9445 BLOCK_COMMENT("verify_oop {"); 9446#ifdef _LP64 9447 push(rscratch1); // save r10, trashed by movptr() 9448#endif 9449 push(rax); // save rax, 9450 push(reg); // pass register argument 9451 ExternalAddress buffer((address) b); 9452 // avoid using pushptr, as it modifies scratch registers 9453 // and our contract is not to modify anything 9454 movptr(rax, buffer.addr()); 9455 push(rax); 9456 // call indirectly to solve generation ordering problem 9457 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9458 call(rax); 9459 // Caller pops the arguments (oop, message) and restores rax, r10 9460 BLOCK_COMMENT("} verify_oop"); 9461} 9462 9463 9464RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 9465 Register tmp, 9466 int offset) { 9467 intptr_t value = *delayed_value_addr; 9468 if (value != 0) 9469 return RegisterOrConstant(value + offset); 9470 9471 // load indirectly to solve generation ordering problem 9472 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 9473 9474#ifdef ASSERT 9475 { Label L; 9476 testptr(tmp, tmp); 9477 if (WizardMode) { 9478 jcc(Assembler::notZero, L); 9479 char* buf = new char[40]; 9480 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 9481 STOP(buf); 9482 } else { 9483 jccb(Assembler::notZero, L); 9484 hlt(); 9485 } 9486 bind(L); 9487 } 9488#endif 9489 9490 if (offset != 0) 9491 addptr(tmp, offset); 9492 9493 return RegisterOrConstant(tmp); 9494} 9495 9496 9497Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 9498 int extra_slot_offset) { 9499 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 9500 int stackElementSize = Interpreter::stackElementSize; 9501 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 9502#ifdef ASSERT 9503 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 9504 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 9505#endif 9506 Register scale_reg = noreg; 9507 Address::ScaleFactor scale_factor = Address::no_scale; 9508 if (arg_slot.is_constant()) { 9509 offset += arg_slot.as_constant() * stackElementSize; 9510 } else { 9511 scale_reg = arg_slot.as_register(); 9512 scale_factor = Address::times(stackElementSize); 9513 } 9514 offset += wordSize; // return PC is on stack 9515 return Address(rsp, scale_reg, scale_factor, offset); 9516} 9517 9518 9519void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 9520 if (!VerifyOops) return; 9521 9522 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 9523 // Pass register number to verify_oop_subroutine 9524 char* b = new char[strlen(s) + 50]; 9525 sprintf(b, "verify_oop_addr: %s", s); 9526 9527#ifdef _LP64 9528 push(rscratch1); // save r10, trashed by movptr() 9529#endif 9530 push(rax); // save rax, 9531 // addr may contain rsp so we will have to adjust it based on the push 9532 // we just did (and on 64 bit we do two pushes) 9533 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 9534 // stores rax into addr which is backwards of what was intended. 9535 if (addr.uses(rsp)) { 9536 lea(rax, addr); 9537 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 9538 } else { 9539 pushptr(addr); 9540 } 9541 9542 ExternalAddress buffer((address) b); 9543 // pass msg argument 9544 // avoid using pushptr, as it modifies scratch registers 9545 // and our contract is not to modify anything 9546 movptr(rax, buffer.addr()); 9547 push(rax); 9548 9549 // call indirectly to solve generation ordering problem 9550 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9551 call(rax); 9552 // Caller pops the arguments (addr, message) and restores rax, r10. 9553} 9554 9555void MacroAssembler::verify_tlab() { 9556#ifdef ASSERT 9557 if (UseTLAB && VerifyOops) { 9558 Label next, ok; 9559 Register t1 = rsi; 9560 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 9561 9562 push(t1); 9563 NOT_LP64(push(thread_reg)); 9564 NOT_LP64(get_thread(thread_reg)); 9565 9566 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9567 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 9568 jcc(Assembler::aboveEqual, next); 9569 STOP("assert(top >= start)"); 9570 should_not_reach_here(); 9571 9572 bind(next); 9573 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 9574 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9575 jcc(Assembler::aboveEqual, ok); 9576 STOP("assert(top <= end)"); 9577 should_not_reach_here(); 9578 9579 bind(ok); 9580 NOT_LP64(pop(thread_reg)); 9581 pop(t1); 9582 } 9583#endif 9584} 9585 9586class ControlWord { 9587 public: 9588 int32_t _value; 9589 9590 int rounding_control() const { return (_value >> 10) & 3 ; } 9591 int precision_control() const { return (_value >> 8) & 3 ; } 9592 bool precision() const { return ((_value >> 5) & 1) != 0; } 9593 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9594 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9595 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9596 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9597 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9598 9599 void print() const { 9600 // rounding control 9601 const char* rc; 9602 switch (rounding_control()) { 9603 case 0: rc = "round near"; break; 9604 case 1: rc = "round down"; break; 9605 case 2: rc = "round up "; break; 9606 case 3: rc = "chop "; break; 9607 }; 9608 // precision control 9609 const char* pc; 9610 switch (precision_control()) { 9611 case 0: pc = "24 bits "; break; 9612 case 1: pc = "reserved"; break; 9613 case 2: pc = "53 bits "; break; 9614 case 3: pc = "64 bits "; break; 9615 }; 9616 // flags 9617 char f[9]; 9618 f[0] = ' '; 9619 f[1] = ' '; 9620 f[2] = (precision ()) ? 'P' : 'p'; 9621 f[3] = (underflow ()) ? 'U' : 'u'; 9622 f[4] = (overflow ()) ? 'O' : 'o'; 9623 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9624 f[6] = (denormalized()) ? 'D' : 'd'; 9625 f[7] = (invalid ()) ? 'I' : 'i'; 9626 f[8] = '\x0'; 9627 // output 9628 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9629 } 9630 9631}; 9632 9633class StatusWord { 9634 public: 9635 int32_t _value; 9636 9637 bool busy() const { return ((_value >> 15) & 1) != 0; } 9638 bool C3() const { return ((_value >> 14) & 1) != 0; } 9639 bool C2() const { return ((_value >> 10) & 1) != 0; } 9640 bool C1() const { return ((_value >> 9) & 1) != 0; } 9641 bool C0() const { return ((_value >> 8) & 1) != 0; } 9642 int top() const { return (_value >> 11) & 7 ; } 9643 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9644 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9645 bool precision() const { return ((_value >> 5) & 1) != 0; } 9646 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9647 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9648 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9649 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9650 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9651 9652 void print() const { 9653 // condition codes 9654 char c[5]; 9655 c[0] = (C3()) ? '3' : '-'; 9656 c[1] = (C2()) ? '2' : '-'; 9657 c[2] = (C1()) ? '1' : '-'; 9658 c[3] = (C0()) ? '0' : '-'; 9659 c[4] = '\x0'; 9660 // flags 9661 char f[9]; 9662 f[0] = (error_status()) ? 'E' : '-'; 9663 f[1] = (stack_fault ()) ? 'S' : '-'; 9664 f[2] = (precision ()) ? 'P' : '-'; 9665 f[3] = (underflow ()) ? 'U' : '-'; 9666 f[4] = (overflow ()) ? 'O' : '-'; 9667 f[5] = (zero_divide ()) ? 'Z' : '-'; 9668 f[6] = (denormalized()) ? 'D' : '-'; 9669 f[7] = (invalid ()) ? 'I' : '-'; 9670 f[8] = '\x0'; 9671 // output 9672 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9673 } 9674 9675}; 9676 9677class TagWord { 9678 public: 9679 int32_t _value; 9680 9681 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9682 9683 void print() const { 9684 printf("%04x", _value & 0xFFFF); 9685 } 9686 9687}; 9688 9689class FPU_Register { 9690 public: 9691 int32_t _m0; 9692 int32_t _m1; 9693 int16_t _ex; 9694 9695 bool is_indefinite() const { 9696 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9697 } 9698 9699 void print() const { 9700 char sign = (_ex < 0) ? '-' : '+'; 9701 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9702 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9703 }; 9704 9705}; 9706 9707class FPU_State { 9708 public: 9709 enum { 9710 register_size = 10, 9711 number_of_registers = 8, 9712 register_mask = 7 9713 }; 9714 9715 ControlWord _control_word; 9716 StatusWord _status_word; 9717 TagWord _tag_word; 9718 int32_t _error_offset; 9719 int32_t _error_selector; 9720 int32_t _data_offset; 9721 int32_t _data_selector; 9722 int8_t _register[register_size * number_of_registers]; 9723 9724 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9725 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9726 9727 const char* tag_as_string(int tag) const { 9728 switch (tag) { 9729 case 0: return "valid"; 9730 case 1: return "zero"; 9731 case 2: return "special"; 9732 case 3: return "empty"; 9733 } 9734 ShouldNotReachHere(); 9735 return NULL; 9736 } 9737 9738 void print() const { 9739 // print computation registers 9740 { int t = _status_word.top(); 9741 for (int i = 0; i < number_of_registers; i++) { 9742 int j = (i - t) & register_mask; 9743 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9744 st(j)->print(); 9745 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9746 } 9747 } 9748 printf("\n"); 9749 // print control registers 9750 printf("ctrl = "); _control_word.print(); printf("\n"); 9751 printf("stat = "); _status_word .print(); printf("\n"); 9752 printf("tags = "); _tag_word .print(); printf("\n"); 9753 } 9754 9755}; 9756 9757class Flag_Register { 9758 public: 9759 int32_t _value; 9760 9761 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9762 bool direction() const { return ((_value >> 10) & 1) != 0; } 9763 bool sign() const { return ((_value >> 7) & 1) != 0; } 9764 bool zero() const { return ((_value >> 6) & 1) != 0; } 9765 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9766 bool parity() const { return ((_value >> 2) & 1) != 0; } 9767 bool carry() const { return ((_value >> 0) & 1) != 0; } 9768 9769 void print() const { 9770 // flags 9771 char f[8]; 9772 f[0] = (overflow ()) ? 'O' : '-'; 9773 f[1] = (direction ()) ? 'D' : '-'; 9774 f[2] = (sign ()) ? 'S' : '-'; 9775 f[3] = (zero ()) ? 'Z' : '-'; 9776 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9777 f[5] = (parity ()) ? 'P' : '-'; 9778 f[6] = (carry ()) ? 'C' : '-'; 9779 f[7] = '\x0'; 9780 // output 9781 printf("%08x flags = %s", _value, f); 9782 } 9783 9784}; 9785 9786class IU_Register { 9787 public: 9788 int32_t _value; 9789 9790 void print() const { 9791 printf("%08x %11d", _value, _value); 9792 } 9793 9794}; 9795 9796class IU_State { 9797 public: 9798 Flag_Register _eflags; 9799 IU_Register _rdi; 9800 IU_Register _rsi; 9801 IU_Register _rbp; 9802 IU_Register _rsp; 9803 IU_Register _rbx; 9804 IU_Register _rdx; 9805 IU_Register _rcx; 9806 IU_Register _rax; 9807 9808 void print() const { 9809 // computation registers 9810 printf("rax, = "); _rax.print(); printf("\n"); 9811 printf("rbx, = "); _rbx.print(); printf("\n"); 9812 printf("rcx = "); _rcx.print(); printf("\n"); 9813 printf("rdx = "); _rdx.print(); printf("\n"); 9814 printf("rdi = "); _rdi.print(); printf("\n"); 9815 printf("rsi = "); _rsi.print(); printf("\n"); 9816 printf("rbp, = "); _rbp.print(); printf("\n"); 9817 printf("rsp = "); _rsp.print(); printf("\n"); 9818 printf("\n"); 9819 // control registers 9820 printf("flgs = "); _eflags.print(); printf("\n"); 9821 } 9822}; 9823 9824 9825class CPU_State { 9826 public: 9827 FPU_State _fpu_state; 9828 IU_State _iu_state; 9829 9830 void print() const { 9831 printf("--------------------------------------------------\n"); 9832 _iu_state .print(); 9833 printf("\n"); 9834 _fpu_state.print(); 9835 printf("--------------------------------------------------\n"); 9836 } 9837 9838}; 9839 9840 9841static void _print_CPU_state(CPU_State* state) { 9842 state->print(); 9843}; 9844 9845 9846void MacroAssembler::print_CPU_state() { 9847 push_CPU_state(); 9848 push(rsp); // pass CPU state 9849 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 9850 addptr(rsp, wordSize); // discard argument 9851 pop_CPU_state(); 9852} 9853 9854 9855static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 9856 static int counter = 0; 9857 FPU_State* fs = &state->_fpu_state; 9858 counter++; 9859 // For leaf calls, only verify that the top few elements remain empty. 9860 // We only need 1 empty at the top for C2 code. 9861 if( stack_depth < 0 ) { 9862 if( fs->tag_for_st(7) != 3 ) { 9863 printf("FPR7 not empty\n"); 9864 state->print(); 9865 assert(false, "error"); 9866 return false; 9867 } 9868 return true; // All other stack states do not matter 9869 } 9870 9871 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 9872 "bad FPU control word"); 9873 9874 // compute stack depth 9875 int i = 0; 9876 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 9877 int d = i; 9878 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 9879 // verify findings 9880 if (i != FPU_State::number_of_registers) { 9881 // stack not contiguous 9882 printf("%s: stack not contiguous at ST%d\n", s, i); 9883 state->print(); 9884 assert(false, "error"); 9885 return false; 9886 } 9887 // check if computed stack depth corresponds to expected stack depth 9888 if (stack_depth < 0) { 9889 // expected stack depth is -stack_depth or less 9890 if (d > -stack_depth) { 9891 // too many elements on the stack 9892 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 9893 state->print(); 9894 assert(false, "error"); 9895 return false; 9896 } 9897 } else { 9898 // expected stack depth is stack_depth 9899 if (d != stack_depth) { 9900 // wrong stack depth 9901 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 9902 state->print(); 9903 assert(false, "error"); 9904 return false; 9905 } 9906 } 9907 // everything is cool 9908 return true; 9909} 9910 9911 9912void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 9913 if (!VerifyFPU) return; 9914 push_CPU_state(); 9915 push(rsp); // pass CPU state 9916 ExternalAddress msg((address) s); 9917 // pass message string s 9918 pushptr(msg.addr()); 9919 push(stack_depth); // pass stack depth 9920 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 9921 addptr(rsp, 3 * wordSize); // discard arguments 9922 // check for error 9923 { Label L; 9924 testl(rax, rax); 9925 jcc(Assembler::notZero, L); 9926 int3(); // break if error condition 9927 bind(L); 9928 } 9929 pop_CPU_state(); 9930} 9931 9932void MacroAssembler::load_klass(Register dst, Register src) { 9933#ifdef _LP64 9934 if (UseCompressedKlassPointers) { 9935 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9936 decode_heap_oop_not_null(dst); 9937 } else 9938#endif 9939 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9940} 9941 9942void MacroAssembler::load_prototype_header(Register dst, Register src) { 9943#ifdef _LP64 9944 if (UseCompressedKlassPointers) { 9945 assert (Universe::heap() != NULL, "java heap should be initialized"); 9946 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9947 if (Universe::narrow_oop_shift() != 0) { 9948 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9949 if (LogMinObjAlignmentInBytes == Address::times_8) { 9950 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 9951 } else { 9952 // OK to use shift since we don't need to preserve flags. 9953 shlq(dst, LogMinObjAlignmentInBytes); 9954 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); 9955 } 9956 } else { 9957 movq(dst, Address(dst, Klass::prototype_header_offset())); 9958 } 9959 } else 9960#endif 9961 { 9962 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9963 movptr(dst, Address(dst, Klass::prototype_header_offset())); 9964 } 9965} 9966 9967void MacroAssembler::store_klass(Register dst, Register src) { 9968#ifdef _LP64 9969 if (UseCompressedKlassPointers) { 9970 encode_heap_oop_not_null(src); 9971 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9972 } else 9973#endif 9974 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9975} 9976 9977void MacroAssembler::load_heap_oop(Register dst, Address src) { 9978#ifdef _LP64 9979 // FIXME: Must change all places where we try to load the klass. 9980 if (UseCompressedOops) { 9981 movl(dst, src); 9982 decode_heap_oop(dst); 9983 } else 9984#endif 9985 movptr(dst, src); 9986} 9987 9988// Doesn't do verfication, generates fixed size code 9989void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 9990#ifdef _LP64 9991 if (UseCompressedOops) { 9992 movl(dst, src); 9993 decode_heap_oop_not_null(dst); 9994 } else 9995#endif 9996 movptr(dst, src); 9997} 9998 9999void MacroAssembler::store_heap_oop(Address dst, Register src) { 10000#ifdef _LP64 10001 if (UseCompressedOops) { 10002 assert(!dst.uses(src), "not enough registers"); 10003 encode_heap_oop(src); 10004 movl(dst, src); 10005 } else 10006#endif 10007 movptr(dst, src); 10008} 10009 10010void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 10011 assert_different_registers(src1, tmp); 10012#ifdef _LP64 10013 if (UseCompressedOops) { 10014 bool did_push = false; 10015 if (tmp == noreg) { 10016 tmp = rax; 10017 push(tmp); 10018 did_push = true; 10019 assert(!src2.uses(rsp), "can't push"); 10020 } 10021 load_heap_oop(tmp, src2); 10022 cmpptr(src1, tmp); 10023 if (did_push) pop(tmp); 10024 } else 10025#endif 10026 cmpptr(src1, src2); 10027} 10028 10029// Used for storing NULLs. 10030void MacroAssembler::store_heap_oop_null(Address dst) { 10031#ifdef _LP64 10032 if (UseCompressedOops) { 10033 movl(dst, (int32_t)NULL_WORD); 10034 } else { 10035 movslq(dst, (int32_t)NULL_WORD); 10036 } 10037#else 10038 movl(dst, (int32_t)NULL_WORD); 10039#endif 10040} 10041 10042#ifdef _LP64 10043void MacroAssembler::store_klass_gap(Register dst, Register src) { 10044 if (UseCompressedKlassPointers) { 10045 // Store to klass gap in destination 10046 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 10047 } 10048} 10049 10050#ifdef ASSERT 10051void MacroAssembler::verify_heapbase(const char* msg) { 10052 assert (UseCompressedOops, "should be compressed"); 10053 assert (Universe::heap() != NULL, "java heap should be initialized"); 10054 if (CheckCompressedOops) { 10055 Label ok; 10056 push(rscratch1); // cmpptr trashes rscratch1 10057 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 10058 jcc(Assembler::equal, ok); 10059 STOP(msg); 10060 bind(ok); 10061 pop(rscratch1); 10062 } 10063} 10064#endif 10065 10066// Algorithm must match oop.inline.hpp encode_heap_oop. 10067void MacroAssembler::encode_heap_oop(Register r) { 10068#ifdef ASSERT 10069 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 10070#endif 10071 verify_oop(r, "broken oop in encode_heap_oop"); 10072 if (Universe::narrow_oop_base() == NULL) { 10073 if (Universe::narrow_oop_shift() != 0) { 10074 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10075 shrq(r, LogMinObjAlignmentInBytes); 10076 } 10077 return; 10078 } 10079 testq(r, r); 10080 cmovq(Assembler::equal, r, r12_heapbase); 10081 subq(r, r12_heapbase); 10082 shrq(r, LogMinObjAlignmentInBytes); 10083} 10084 10085void MacroAssembler::encode_heap_oop_not_null(Register r) { 10086#ifdef ASSERT 10087 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 10088 if (CheckCompressedOops) { 10089 Label ok; 10090 testq(r, r); 10091 jcc(Assembler::notEqual, ok); 10092 STOP("null oop passed to encode_heap_oop_not_null"); 10093 bind(ok); 10094 } 10095#endif 10096 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 10097 if (Universe::narrow_oop_base() != NULL) { 10098 subq(r, r12_heapbase); 10099 } 10100 if (Universe::narrow_oop_shift() != 0) { 10101 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10102 shrq(r, LogMinObjAlignmentInBytes); 10103 } 10104} 10105 10106void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 10107#ifdef ASSERT 10108 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 10109 if (CheckCompressedOops) { 10110 Label ok; 10111 testq(src, src); 10112 jcc(Assembler::notEqual, ok); 10113 STOP("null oop passed to encode_heap_oop_not_null2"); 10114 bind(ok); 10115 } 10116#endif 10117 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 10118 if (dst != src) { 10119 movq(dst, src); 10120 } 10121 if (Universe::narrow_oop_base() != NULL) { 10122 subq(dst, r12_heapbase); 10123 } 10124 if (Universe::narrow_oop_shift() != 0) { 10125 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10126 shrq(dst, LogMinObjAlignmentInBytes); 10127 } 10128} 10129 10130void MacroAssembler::decode_heap_oop(Register r) { 10131#ifdef ASSERT 10132 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 10133#endif 10134 if (Universe::narrow_oop_base() == NULL) { 10135 if (Universe::narrow_oop_shift() != 0) { 10136 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10137 shlq(r, LogMinObjAlignmentInBytes); 10138 } 10139 } else { 10140 Label done; 10141 shlq(r, LogMinObjAlignmentInBytes); 10142 jccb(Assembler::equal, done); 10143 addq(r, r12_heapbase); 10144 bind(done); 10145 } 10146 verify_oop(r, "broken oop in decode_heap_oop"); 10147} 10148 10149void MacroAssembler::decode_heap_oop_not_null(Register r) { 10150 // Note: it will change flags 10151 assert (UseCompressedOops, "should only be used for compressed headers"); 10152 assert (Universe::heap() != NULL, "java heap should be initialized"); 10153 // Cannot assert, unverified entry point counts instructions (see .ad file) 10154 // vtableStubs also counts instructions in pd_code_size_limit. 10155 // Also do not verify_oop as this is called by verify_oop. 10156 if (Universe::narrow_oop_shift() != 0) { 10157 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10158 shlq(r, LogMinObjAlignmentInBytes); 10159 if (Universe::narrow_oop_base() != NULL) { 10160 addq(r, r12_heapbase); 10161 } 10162 } else { 10163 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10164 } 10165} 10166 10167void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 10168 // Note: it will change flags 10169 assert (UseCompressedOops, "should only be used for compressed headers"); 10170 assert (Universe::heap() != NULL, "java heap should be initialized"); 10171 // Cannot assert, unverified entry point counts instructions (see .ad file) 10172 // vtableStubs also counts instructions in pd_code_size_limit. 10173 // Also do not verify_oop as this is called by verify_oop. 10174 if (Universe::narrow_oop_shift() != 0) { 10175 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10176 if (LogMinObjAlignmentInBytes == Address::times_8) { 10177 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10178 } else { 10179 if (dst != src) { 10180 movq(dst, src); 10181 } 10182 shlq(dst, LogMinObjAlignmentInBytes); 10183 if (Universe::narrow_oop_base() != NULL) { 10184 addq(dst, r12_heapbase); 10185 } 10186 } 10187 } else { 10188 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10189 if (dst != src) { 10190 movq(dst, src); 10191 } 10192 } 10193} 10194 10195void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 10196 assert (UseCompressedOops, "should only be used for compressed headers"); 10197 assert (Universe::heap() != NULL, "java heap should be initialized"); 10198 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10199 int oop_index = oop_recorder()->find_index(obj); 10200 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10201 mov_narrow_oop(dst, oop_index, rspec); 10202} 10203 10204void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 10205 assert (UseCompressedOops, "should only be used for compressed headers"); 10206 assert (Universe::heap() != NULL, "java heap should be initialized"); 10207 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10208 int oop_index = oop_recorder()->find_index(obj); 10209 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10210 mov_narrow_oop(dst, oop_index, rspec); 10211} 10212 10213void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 10214 assert (UseCompressedOops, "should only be used for compressed headers"); 10215 assert (Universe::heap() != NULL, "java heap should be initialized"); 10216 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10217 int oop_index = oop_recorder()->find_index(obj); 10218 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10219 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10220} 10221 10222void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 10223 assert (UseCompressedOops, "should only be used for compressed headers"); 10224 assert (Universe::heap() != NULL, "java heap should be initialized"); 10225 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10226 int oop_index = oop_recorder()->find_index(obj); 10227 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10228 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10229} 10230 10231void MacroAssembler::reinit_heapbase() { 10232 if (UseCompressedOops) { 10233 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 10234 } 10235} 10236#endif // _LP64 10237 10238 10239// C2 compiled method's prolog code. 10240void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 10241 10242 // WARNING: Initial instruction MUST be 5 bytes or longer so that 10243 // NativeJump::patch_verified_entry will be able to patch out the entry 10244 // code safely. The push to verify stack depth is ok at 5 bytes, 10245 // the frame allocation can be either 3 or 6 bytes. So if we don't do 10246 // stack bang then we must use the 6 byte frame allocation even if 10247 // we have no frame. :-( 10248 10249 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 10250 // Remove word for return addr 10251 framesize -= wordSize; 10252 10253 // Calls to C2R adapters often do not accept exceptional returns. 10254 // We require that their callers must bang for them. But be careful, because 10255 // some VM calls (such as call site linkage) can use several kilobytes of 10256 // stack. But the stack safety zone should account for that. 10257 // See bugs 4446381, 4468289, 4497237. 10258 if (stack_bang) { 10259 generate_stack_overflow_check(framesize); 10260 10261 // We always push rbp, so that on return to interpreter rbp, will be 10262 // restored correctly and we can correct the stack. 10263 push(rbp); 10264 // Remove word for ebp 10265 framesize -= wordSize; 10266 10267 // Create frame 10268 if (framesize) { 10269 subptr(rsp, framesize); 10270 } 10271 } else { 10272 // Create frame (force generation of a 4 byte immediate value) 10273 subptr_imm32(rsp, framesize); 10274 10275 // Save RBP register now. 10276 framesize -= wordSize; 10277 movptr(Address(rsp, framesize), rbp); 10278 } 10279 10280 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 10281 framesize -= wordSize; 10282 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 10283 } 10284 10285#ifndef _LP64 10286 // If method sets FPU control word do it now 10287 if (fp_mode_24b) { 10288 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10289 } 10290 if (UseSSE >= 2 && VerifyFPU) { 10291 verify_FPU(0, "FPU stack must be clean on entry"); 10292 } 10293#endif 10294 10295#ifdef ASSERT 10296 if (VerifyStackAtCalls) { 10297 Label L; 10298 push(rax); 10299 mov(rax, rsp); 10300 andptr(rax, StackAlignmentInBytes-1); 10301 cmpptr(rax, StackAlignmentInBytes-wordSize); 10302 pop(rax); 10303 jcc(Assembler::equal, L); 10304 STOP("Stack is not properly aligned!"); 10305 bind(L); 10306 } 10307#endif 10308 10309} 10310 10311 10312// IndexOf for constant substrings with size >= 8 chars 10313// which don't need to be loaded through stack. 10314void MacroAssembler::string_indexofC8(Register str1, Register str2, 10315 Register cnt1, Register cnt2, 10316 int int_cnt2, Register result, 10317 XMMRegister vec, Register tmp) { 10318 ShortBranchVerifier sbv(this); 10319 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10320 10321 // This method uses pcmpestri inxtruction with bound registers 10322 // inputs: 10323 // xmm - substring 10324 // rax - substring length (elements count) 10325 // mem - scanned string 10326 // rdx - string length (elements count) 10327 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10328 // outputs: 10329 // rcx - matched index in string 10330 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10331 10332 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 10333 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 10334 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 10335 10336 // Note, inline_string_indexOf() generates checks: 10337 // if (substr.count > string.count) return -1; 10338 // if (substr.count == 0) return 0; 10339 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 10340 10341 // Load substring. 10342 movdqu(vec, Address(str2, 0)); 10343 movl(cnt2, int_cnt2); 10344 movptr(result, str1); // string addr 10345 10346 if (int_cnt2 > 8) { 10347 jmpb(SCAN_TO_SUBSTR); 10348 10349 // Reload substr for rescan, this code 10350 // is executed only for large substrings (> 8 chars) 10351 bind(RELOAD_SUBSTR); 10352 movdqu(vec, Address(str2, 0)); 10353 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 10354 10355 bind(RELOAD_STR); 10356 // We came here after the beginning of the substring was 10357 // matched but the rest of it was not so we need to search 10358 // again. Start from the next element after the previous match. 10359 10360 // cnt2 is number of substring reminding elements and 10361 // cnt1 is number of string reminding elements when cmp failed. 10362 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 10363 subl(cnt1, cnt2); 10364 addl(cnt1, int_cnt2); 10365 movl(cnt2, int_cnt2); // Now restore cnt2 10366 10367 decrementl(cnt1); // Shift to next element 10368 cmpl(cnt1, cnt2); 10369 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10370 10371 addptr(result, 2); 10372 10373 } // (int_cnt2 > 8) 10374 10375 // Scan string for start of substr in 16-byte vectors 10376 bind(SCAN_TO_SUBSTR); 10377 pcmpestri(vec, Address(result, 0), 0x0d); 10378 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10379 subl(cnt1, 8); 10380 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10381 cmpl(cnt1, cnt2); 10382 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10383 addptr(result, 16); 10384 jmpb(SCAN_TO_SUBSTR); 10385 10386 // Found a potential substr 10387 bind(FOUND_CANDIDATE); 10388 // Matched whole vector if first element matched (tmp(rcx) == 0). 10389 if (int_cnt2 == 8) { 10390 jccb(Assembler::overflow, RET_FOUND); // OF == 1 10391 } else { // int_cnt2 > 8 10392 jccb(Assembler::overflow, FOUND_SUBSTR); 10393 } 10394 // After pcmpestri tmp(rcx) contains matched element index 10395 // Compute start addr of substr 10396 lea(result, Address(result, tmp, Address::times_2)); 10397 10398 // Make sure string is still long enough 10399 subl(cnt1, tmp); 10400 cmpl(cnt1, cnt2); 10401 if (int_cnt2 == 8) { 10402 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10403 } else { // int_cnt2 > 8 10404 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 10405 } 10406 // Left less then substring. 10407 10408 bind(RET_NOT_FOUND); 10409 movl(result, -1); 10410 jmpb(EXIT); 10411 10412 if (int_cnt2 > 8) { 10413 // This code is optimized for the case when whole substring 10414 // is matched if its head is matched. 10415 bind(MATCH_SUBSTR_HEAD); 10416 pcmpestri(vec, Address(result, 0), 0x0d); 10417 // Reload only string if does not match 10418 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 10419 10420 Label CONT_SCAN_SUBSTR; 10421 // Compare the rest of substring (> 8 chars). 10422 bind(FOUND_SUBSTR); 10423 // First 8 chars are already matched. 10424 negptr(cnt2); 10425 addptr(cnt2, 8); 10426 10427 bind(SCAN_SUBSTR); 10428 subl(cnt1, 8); 10429 cmpl(cnt2, -8); // Do not read beyond substring 10430 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 10431 // Back-up strings to avoid reading beyond substring: 10432 // cnt1 = cnt1 - cnt2 + 8 10433 addl(cnt1, cnt2); // cnt2 is negative 10434 addl(cnt1, 8); 10435 movl(cnt2, 8); negptr(cnt2); 10436 bind(CONT_SCAN_SUBSTR); 10437 if (int_cnt2 < (int)G) { 10438 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 10439 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 10440 } else { 10441 // calculate index in register to avoid integer overflow (int_cnt2*2) 10442 movl(tmp, int_cnt2); 10443 addptr(tmp, cnt2); 10444 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 10445 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 10446 } 10447 // Need to reload strings pointers if not matched whole vector 10448 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10449 addptr(cnt2, 8); 10450 jcc(Assembler::negative, SCAN_SUBSTR); 10451 // Fall through if found full substring 10452 10453 } // (int_cnt2 > 8) 10454 10455 bind(RET_FOUND); 10456 // Found result if we matched full small substring. 10457 // Compute substr offset 10458 subptr(result, str1); 10459 shrl(result, 1); // index 10460 bind(EXIT); 10461 10462} // string_indexofC8 10463 10464// Small strings are loaded through stack if they cross page boundary. 10465void MacroAssembler::string_indexof(Register str1, Register str2, 10466 Register cnt1, Register cnt2, 10467 int int_cnt2, Register result, 10468 XMMRegister vec, Register tmp) { 10469 ShortBranchVerifier sbv(this); 10470 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10471 // 10472 // int_cnt2 is length of small (< 8 chars) constant substring 10473 // or (-1) for non constant substring in which case its length 10474 // is in cnt2 register. 10475 // 10476 // Note, inline_string_indexOf() generates checks: 10477 // if (substr.count > string.count) return -1; 10478 // if (substr.count == 0) return 0; 10479 // 10480 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 10481 10482 // This method uses pcmpestri inxtruction with bound registers 10483 // inputs: 10484 // xmm - substring 10485 // rax - substring length (elements count) 10486 // mem - scanned string 10487 // rdx - string length (elements count) 10488 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10489 // outputs: 10490 // rcx - matched index in string 10491 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10492 10493 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 10494 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 10495 FOUND_CANDIDATE; 10496 10497 { //======================================================== 10498 // We don't know where these strings are located 10499 // and we can't read beyond them. Load them through stack. 10500 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 10501 10502 movptr(tmp, rsp); // save old SP 10503 10504 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 10505 if (int_cnt2 == 1) { // One char 10506 load_unsigned_short(result, Address(str2, 0)); 10507 movdl(vec, result); // move 32 bits 10508 } else if (int_cnt2 == 2) { // Two chars 10509 movdl(vec, Address(str2, 0)); // move 32 bits 10510 } else if (int_cnt2 == 4) { // Four chars 10511 movq(vec, Address(str2, 0)); // move 64 bits 10512 } else { // cnt2 = { 3, 5, 6, 7 } 10513 // Array header size is 12 bytes in 32-bit VM 10514 // + 6 bytes for 3 chars == 18 bytes, 10515 // enough space to load vec and shift. 10516 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 10517 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 10518 psrldq(vec, 16-(int_cnt2*2)); 10519 } 10520 } else { // not constant substring 10521 cmpl(cnt2, 8); 10522 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 10523 10524 // We can read beyond string if srt+16 does not cross page boundary 10525 // since heaps are aligned and mapped by pages. 10526 assert(os::vm_page_size() < (int)G, "default page should be small"); 10527 movl(result, str2); // We need only low 32 bits 10528 andl(result, (os::vm_page_size()-1)); 10529 cmpl(result, (os::vm_page_size()-16)); 10530 jccb(Assembler::belowEqual, CHECK_STR); 10531 10532 // Move small strings to stack to allow load 16 bytes into vec. 10533 subptr(rsp, 16); 10534 int stk_offset = wordSize-2; 10535 push(cnt2); 10536 10537 bind(COPY_SUBSTR); 10538 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 10539 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10540 decrement(cnt2); 10541 jccb(Assembler::notZero, COPY_SUBSTR); 10542 10543 pop(cnt2); 10544 movptr(str2, rsp); // New substring address 10545 } // non constant 10546 10547 bind(CHECK_STR); 10548 cmpl(cnt1, 8); 10549 jccb(Assembler::aboveEqual, BIG_STRINGS); 10550 10551 // Check cross page boundary. 10552 movl(result, str1); // We need only low 32 bits 10553 andl(result, (os::vm_page_size()-1)); 10554 cmpl(result, (os::vm_page_size()-16)); 10555 jccb(Assembler::belowEqual, BIG_STRINGS); 10556 10557 subptr(rsp, 16); 10558 int stk_offset = -2; 10559 if (int_cnt2 < 0) { // not constant 10560 push(cnt2); 10561 stk_offset += wordSize; 10562 } 10563 movl(cnt2, cnt1); 10564 10565 bind(COPY_STR); 10566 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 10567 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10568 decrement(cnt2); 10569 jccb(Assembler::notZero, COPY_STR); 10570 10571 if (int_cnt2 < 0) { // not constant 10572 pop(cnt2); 10573 } 10574 movptr(str1, rsp); // New string address 10575 10576 bind(BIG_STRINGS); 10577 // Load substring. 10578 if (int_cnt2 < 0) { // -1 10579 movdqu(vec, Address(str2, 0)); 10580 push(cnt2); // substr count 10581 push(str2); // substr addr 10582 push(str1); // string addr 10583 } else { 10584 // Small (< 8 chars) constant substrings are loaded already. 10585 movl(cnt2, int_cnt2); 10586 } 10587 push(tmp); // original SP 10588 10589 } // Finished loading 10590 10591 //======================================================== 10592 // Start search 10593 // 10594 10595 movptr(result, str1); // string addr 10596 10597 if (int_cnt2 < 0) { // Only for non constant substring 10598 jmpb(SCAN_TO_SUBSTR); 10599 10600 // SP saved at sp+0 10601 // String saved at sp+1*wordSize 10602 // Substr saved at sp+2*wordSize 10603 // Substr count saved at sp+3*wordSize 10604 10605 // Reload substr for rescan, this code 10606 // is executed only for large substrings (> 8 chars) 10607 bind(RELOAD_SUBSTR); 10608 movptr(str2, Address(rsp, 2*wordSize)); 10609 movl(cnt2, Address(rsp, 3*wordSize)); 10610 movdqu(vec, Address(str2, 0)); 10611 // We came here after the beginning of the substring was 10612 // matched but the rest of it was not so we need to search 10613 // again. Start from the next element after the previous match. 10614 subptr(str1, result); // Restore counter 10615 shrl(str1, 1); 10616 addl(cnt1, str1); 10617 decrementl(cnt1); // Shift to next element 10618 cmpl(cnt1, cnt2); 10619 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10620 10621 addptr(result, 2); 10622 } // non constant 10623 10624 // Scan string for start of substr in 16-byte vectors 10625 bind(SCAN_TO_SUBSTR); 10626 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10627 pcmpestri(vec, Address(result, 0), 0x0d); 10628 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10629 subl(cnt1, 8); 10630 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10631 cmpl(cnt1, cnt2); 10632 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10633 addptr(result, 16); 10634 10635 bind(ADJUST_STR); 10636 cmpl(cnt1, 8); // Do not read beyond string 10637 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10638 // Back-up string to avoid reading beyond string. 10639 lea(result, Address(result, cnt1, Address::times_2, -16)); 10640 movl(cnt1, 8); 10641 jmpb(SCAN_TO_SUBSTR); 10642 10643 // Found a potential substr 10644 bind(FOUND_CANDIDATE); 10645 // After pcmpestri tmp(rcx) contains matched element index 10646 10647 // Make sure string is still long enough 10648 subl(cnt1, tmp); 10649 cmpl(cnt1, cnt2); 10650 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10651 // Left less then substring. 10652 10653 bind(RET_NOT_FOUND); 10654 movl(result, -1); 10655 jmpb(CLEANUP); 10656 10657 bind(FOUND_SUBSTR); 10658 // Compute start addr of substr 10659 lea(result, Address(result, tmp, Address::times_2)); 10660 10661 if (int_cnt2 > 0) { // Constant substring 10662 // Repeat search for small substring (< 8 chars) 10663 // from new point without reloading substring. 10664 // Have to check that we don't read beyond string. 10665 cmpl(tmp, 8-int_cnt2); 10666 jccb(Assembler::greater, ADJUST_STR); 10667 // Fall through if matched whole substring. 10668 } else { // non constant 10669 assert(int_cnt2 == -1, "should be != 0"); 10670 10671 addl(tmp, cnt2); 10672 // Found result if we matched whole substring. 10673 cmpl(tmp, 8); 10674 jccb(Assembler::lessEqual, RET_FOUND); 10675 10676 // Repeat search for small substring (<= 8 chars) 10677 // from new point 'str1' without reloading substring. 10678 cmpl(cnt2, 8); 10679 // Have to check that we don't read beyond string. 10680 jccb(Assembler::lessEqual, ADJUST_STR); 10681 10682 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10683 // Compare the rest of substring (> 8 chars). 10684 movptr(str1, result); 10685 10686 cmpl(tmp, cnt2); 10687 // First 8 chars are already matched. 10688 jccb(Assembler::equal, CHECK_NEXT); 10689 10690 bind(SCAN_SUBSTR); 10691 pcmpestri(vec, Address(str1, 0), 0x0d); 10692 // Need to reload strings pointers if not matched whole vector 10693 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10694 10695 bind(CHECK_NEXT); 10696 subl(cnt2, 8); 10697 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10698 addptr(str1, 16); 10699 addptr(str2, 16); 10700 subl(cnt1, 8); 10701 cmpl(cnt2, 8); // Do not read beyond substring 10702 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10703 // Back-up strings to avoid reading beyond substring. 10704 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10705 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10706 subl(cnt1, cnt2); 10707 movl(cnt2, 8); 10708 addl(cnt1, 8); 10709 bind(CONT_SCAN_SUBSTR); 10710 movdqu(vec, Address(str2, 0)); 10711 jmpb(SCAN_SUBSTR); 10712 10713 bind(RET_FOUND_LONG); 10714 movptr(str1, Address(rsp, wordSize)); 10715 } // non constant 10716 10717 bind(RET_FOUND); 10718 // Compute substr offset 10719 subptr(result, str1); 10720 shrl(result, 1); // index 10721 10722 bind(CLEANUP); 10723 pop(rsp); // restore SP 10724 10725} // string_indexof 10726 10727// Compare strings. 10728void MacroAssembler::string_compare(Register str1, Register str2, 10729 Register cnt1, Register cnt2, Register result, 10730 XMMRegister vec1) { 10731 ShortBranchVerifier sbv(this); 10732 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 10733 10734 // Compute the minimum of the string lengths and the 10735 // difference of the string lengths (stack). 10736 // Do the conditional move stuff 10737 movl(result, cnt1); 10738 subl(cnt1, cnt2); 10739 push(cnt1); 10740 cmov32(Assembler::lessEqual, cnt2, result); 10741 10742 // Is the minimum length zero? 10743 testl(cnt2, cnt2); 10744 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10745 10746 // Load first characters 10747 load_unsigned_short(result, Address(str1, 0)); 10748 load_unsigned_short(cnt1, Address(str2, 0)); 10749 10750 // Compare first characters 10751 subl(result, cnt1); 10752 jcc(Assembler::notZero, POP_LABEL); 10753 decrementl(cnt2); 10754 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10755 10756 { 10757 // Check after comparing first character to see if strings are equivalent 10758 Label LSkip2; 10759 // Check if the strings start at same location 10760 cmpptr(str1, str2); 10761 jccb(Assembler::notEqual, LSkip2); 10762 10763 // Check if the length difference is zero (from stack) 10764 cmpl(Address(rsp, 0), 0x0); 10765 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 10766 10767 // Strings might not be equivalent 10768 bind(LSkip2); 10769 } 10770 10771 Address::ScaleFactor scale = Address::times_2; 10772 int stride = 8; 10773 10774 // Advance to next element 10775 addptr(str1, 16/stride); 10776 addptr(str2, 16/stride); 10777 10778 if (UseSSE42Intrinsics) { 10779 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 10780 int pcmpmask = 0x19; 10781 // Setup to compare 16-byte vectors 10782 movl(result, cnt2); 10783 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 10784 jccb(Assembler::zero, COMPARE_TAIL); 10785 10786 lea(str1, Address(str1, result, scale)); 10787 lea(str2, Address(str2, result, scale)); 10788 negptr(result); 10789 10790 // pcmpestri 10791 // inputs: 10792 // vec1- substring 10793 // rax - negative string length (elements count) 10794 // mem - scaned string 10795 // rdx - string length (elements count) 10796 // pcmpmask - cmp mode: 11000 (string compare with negated result) 10797 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 10798 // outputs: 10799 // rcx - first mismatched element index 10800 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 10801 10802 bind(COMPARE_WIDE_VECTORS); 10803 movdqu(vec1, Address(str1, result, scale)); 10804 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10805 // After pcmpestri cnt1(rcx) contains mismatched element index 10806 10807 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 10808 addptr(result, stride); 10809 subptr(cnt2, stride); 10810 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 10811 10812 // compare wide vectors tail 10813 testl(result, result); 10814 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 10815 10816 movl(cnt2, stride); 10817 movl(result, stride); 10818 negptr(result); 10819 movdqu(vec1, Address(str1, result, scale)); 10820 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10821 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 10822 10823 // Mismatched characters in the vectors 10824 bind(VECTOR_NOT_EQUAL); 10825 addptr(result, cnt1); 10826 movptr(cnt2, result); 10827 load_unsigned_short(result, Address(str1, cnt2, scale)); 10828 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 10829 subl(result, cnt1); 10830 jmpb(POP_LABEL); 10831 10832 bind(COMPARE_TAIL); // limit is zero 10833 movl(cnt2, result); 10834 // Fallthru to tail compare 10835 } 10836 10837 // Shift str2 and str1 to the end of the arrays, negate min 10838 lea(str1, Address(str1, cnt2, scale, 0)); 10839 lea(str2, Address(str2, cnt2, scale, 0)); 10840 negptr(cnt2); 10841 10842 // Compare the rest of the elements 10843 bind(WHILE_HEAD_LABEL); 10844 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 10845 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 10846 subl(result, cnt1); 10847 jccb(Assembler::notZero, POP_LABEL); 10848 increment(cnt2); 10849 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 10850 10851 // Strings are equal up to min length. Return the length difference. 10852 bind(LENGTH_DIFF_LABEL); 10853 pop(result); 10854 jmpb(DONE_LABEL); 10855 10856 // Discard the stored length difference 10857 bind(POP_LABEL); 10858 pop(cnt1); 10859 10860 // That's it 10861 bind(DONE_LABEL); 10862} 10863 10864// Compare char[] arrays aligned to 4 bytes or substrings. 10865void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 10866 Register limit, Register result, Register chr, 10867 XMMRegister vec1, XMMRegister vec2) { 10868 ShortBranchVerifier sbv(this); 10869 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 10870 10871 int length_offset = arrayOopDesc::length_offset_in_bytes(); 10872 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 10873 10874 // Check the input args 10875 cmpptr(ary1, ary2); 10876 jcc(Assembler::equal, TRUE_LABEL); 10877 10878 if (is_array_equ) { 10879 // Need additional checks for arrays_equals. 10880 testptr(ary1, ary1); 10881 jcc(Assembler::zero, FALSE_LABEL); 10882 testptr(ary2, ary2); 10883 jcc(Assembler::zero, FALSE_LABEL); 10884 10885 // Check the lengths 10886 movl(limit, Address(ary1, length_offset)); 10887 cmpl(limit, Address(ary2, length_offset)); 10888 jcc(Assembler::notEqual, FALSE_LABEL); 10889 } 10890 10891 // count == 0 10892 testl(limit, limit); 10893 jcc(Assembler::zero, TRUE_LABEL); 10894 10895 if (is_array_equ) { 10896 // Load array address 10897 lea(ary1, Address(ary1, base_offset)); 10898 lea(ary2, Address(ary2, base_offset)); 10899 } 10900 10901 shll(limit, 1); // byte count != 0 10902 movl(result, limit); // copy 10903 10904 if (UseSSE42Intrinsics) { 10905 // With SSE4.2, use double quad vector compare 10906 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 10907 10908 // Compare 16-byte vectors 10909 andl(result, 0x0000000e); // tail count (in bytes) 10910 andl(limit, 0xfffffff0); // vector count (in bytes) 10911 jccb(Assembler::zero, COMPARE_TAIL); 10912 10913 lea(ary1, Address(ary1, limit, Address::times_1)); 10914 lea(ary2, Address(ary2, limit, Address::times_1)); 10915 negptr(limit); 10916 10917 bind(COMPARE_WIDE_VECTORS); 10918 movdqu(vec1, Address(ary1, limit, Address::times_1)); 10919 movdqu(vec2, Address(ary2, limit, Address::times_1)); 10920 pxor(vec1, vec2); 10921 10922 ptest(vec1, vec1); 10923 jccb(Assembler::notZero, FALSE_LABEL); 10924 addptr(limit, 16); 10925 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 10926 10927 testl(result, result); 10928 jccb(Assembler::zero, TRUE_LABEL); 10929 10930 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 10931 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 10932 pxor(vec1, vec2); 10933 10934 ptest(vec1, vec1); 10935 jccb(Assembler::notZero, FALSE_LABEL); 10936 jmpb(TRUE_LABEL); 10937 10938 bind(COMPARE_TAIL); // limit is zero 10939 movl(limit, result); 10940 // Fallthru to tail compare 10941 } 10942 10943 // Compare 4-byte vectors 10944 andl(limit, 0xfffffffc); // vector count (in bytes) 10945 jccb(Assembler::zero, COMPARE_CHAR); 10946 10947 lea(ary1, Address(ary1, limit, Address::times_1)); 10948 lea(ary2, Address(ary2, limit, Address::times_1)); 10949 negptr(limit); 10950 10951 bind(COMPARE_VECTORS); 10952 movl(chr, Address(ary1, limit, Address::times_1)); 10953 cmpl(chr, Address(ary2, limit, Address::times_1)); 10954 jccb(Assembler::notEqual, FALSE_LABEL); 10955 addptr(limit, 4); 10956 jcc(Assembler::notZero, COMPARE_VECTORS); 10957 10958 // Compare trailing char (final 2 bytes), if any 10959 bind(COMPARE_CHAR); 10960 testl(result, 0x2); // tail char 10961 jccb(Assembler::zero, TRUE_LABEL); 10962 load_unsigned_short(chr, Address(ary1, 0)); 10963 load_unsigned_short(limit, Address(ary2, 0)); 10964 cmpl(chr, limit); 10965 jccb(Assembler::notEqual, FALSE_LABEL); 10966 10967 bind(TRUE_LABEL); 10968 movl(result, 1); // return true 10969 jmpb(DONE); 10970 10971 bind(FALSE_LABEL); 10972 xorl(result, result); // return false 10973 10974 // That's it 10975 bind(DONE); 10976} 10977 10978void MacroAssembler::generate_fill(BasicType t, bool aligned, 10979 Register to, Register value, Register count, 10980 Register rtmp, XMMRegister xtmp) { 10981 ShortBranchVerifier sbv(this); 10982 assert_different_registers(to, value, count, rtmp); 10983 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 10984 Label L_fill_2_bytes, L_fill_4_bytes; 10985 10986 int shift = -1; 10987 switch (t) { 10988 case T_BYTE: 10989 shift = 2; 10990 break; 10991 case T_SHORT: 10992 shift = 1; 10993 break; 10994 case T_INT: 10995 shift = 0; 10996 break; 10997 default: ShouldNotReachHere(); 10998 } 10999 11000 if (t == T_BYTE) { 11001 andl(value, 0xff); 11002 movl(rtmp, value); 11003 shll(rtmp, 8); 11004 orl(value, rtmp); 11005 } 11006 if (t == T_SHORT) { 11007 andl(value, 0xffff); 11008 } 11009 if (t == T_BYTE || t == T_SHORT) { 11010 movl(rtmp, value); 11011 shll(rtmp, 16); 11012 orl(value, rtmp); 11013 } 11014 11015 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 11016 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 11017 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 11018 // align source address at 4 bytes address boundary 11019 if (t == T_BYTE) { 11020 // One byte misalignment happens only for byte arrays 11021 testptr(to, 1); 11022 jccb(Assembler::zero, L_skip_align1); 11023 movb(Address(to, 0), value); 11024 increment(to); 11025 decrement(count); 11026 BIND(L_skip_align1); 11027 } 11028 // Two bytes misalignment happens only for byte and short (char) arrays 11029 testptr(to, 2); 11030 jccb(Assembler::zero, L_skip_align2); 11031 movw(Address(to, 0), value); 11032 addptr(to, 2); 11033 subl(count, 1<<(shift-1)); 11034 BIND(L_skip_align2); 11035 } 11036 if (UseSSE < 2) { 11037 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11038 // Fill 32-byte chunks 11039 subl(count, 8 << shift); 11040 jcc(Assembler::less, L_check_fill_8_bytes); 11041 align(16); 11042 11043 BIND(L_fill_32_bytes_loop); 11044 11045 for (int i = 0; i < 32; i += 4) { 11046 movl(Address(to, i), value); 11047 } 11048 11049 addptr(to, 32); 11050 subl(count, 8 << shift); 11051 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11052 BIND(L_check_fill_8_bytes); 11053 addl(count, 8 << shift); 11054 jccb(Assembler::zero, L_exit); 11055 jmpb(L_fill_8_bytes); 11056 11057 // 11058 // length is too short, just fill qwords 11059 // 11060 BIND(L_fill_8_bytes_loop); 11061 movl(Address(to, 0), value); 11062 movl(Address(to, 4), value); 11063 addptr(to, 8); 11064 BIND(L_fill_8_bytes); 11065 subl(count, 1 << (shift + 1)); 11066 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11067 // fall through to fill 4 bytes 11068 } else { 11069 Label L_fill_32_bytes; 11070 if (!UseUnalignedLoadStores) { 11071 // align to 8 bytes, we know we are 4 byte aligned to start 11072 testptr(to, 4); 11073 jccb(Assembler::zero, L_fill_32_bytes); 11074 movl(Address(to, 0), value); 11075 addptr(to, 4); 11076 subl(count, 1<<shift); 11077 } 11078 BIND(L_fill_32_bytes); 11079 { 11080 assert( UseSSE >= 2, "supported cpu only" ); 11081 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11082 // Fill 32-byte chunks 11083 movdl(xtmp, value); 11084 pshufd(xtmp, xtmp, 0); 11085 11086 subl(count, 8 << shift); 11087 jcc(Assembler::less, L_check_fill_8_bytes); 11088 align(16); 11089 11090 BIND(L_fill_32_bytes_loop); 11091 11092 if (UseUnalignedLoadStores) { 11093 movdqu(Address(to, 0), xtmp); 11094 movdqu(Address(to, 16), xtmp); 11095 } else { 11096 movq(Address(to, 0), xtmp); 11097 movq(Address(to, 8), xtmp); 11098 movq(Address(to, 16), xtmp); 11099 movq(Address(to, 24), xtmp); 11100 } 11101 11102 addptr(to, 32); 11103 subl(count, 8 << shift); 11104 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11105 BIND(L_check_fill_8_bytes); 11106 addl(count, 8 << shift); 11107 jccb(Assembler::zero, L_exit); 11108 jmpb(L_fill_8_bytes); 11109 11110 // 11111 // length is too short, just fill qwords 11112 // 11113 BIND(L_fill_8_bytes_loop); 11114 movq(Address(to, 0), xtmp); 11115 addptr(to, 8); 11116 BIND(L_fill_8_bytes); 11117 subl(count, 1 << (shift + 1)); 11118 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11119 } 11120 } 11121 // fill trailing 4 bytes 11122 BIND(L_fill_4_bytes); 11123 testl(count, 1<<shift); 11124 jccb(Assembler::zero, L_fill_2_bytes); 11125 movl(Address(to, 0), value); 11126 if (t == T_BYTE || t == T_SHORT) { 11127 addptr(to, 4); 11128 BIND(L_fill_2_bytes); 11129 // fill trailing 2 bytes 11130 testl(count, 1<<(shift-1)); 11131 jccb(Assembler::zero, L_fill_byte); 11132 movw(Address(to, 0), value); 11133 if (t == T_BYTE) { 11134 addptr(to, 2); 11135 BIND(L_fill_byte); 11136 // fill trailing byte 11137 testl(count, 1); 11138 jccb(Assembler::zero, L_exit); 11139 movb(Address(to, 0), value); 11140 } else { 11141 BIND(L_fill_byte); 11142 } 11143 } else { 11144 BIND(L_fill_2_bytes); 11145 } 11146 BIND(L_exit); 11147} 11148#undef BIND 11149#undef BLOCK_COMMENT 11150 11151 11152Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 11153 switch (cond) { 11154 // Note some conditions are synonyms for others 11155 case Assembler::zero: return Assembler::notZero; 11156 case Assembler::notZero: return Assembler::zero; 11157 case Assembler::less: return Assembler::greaterEqual; 11158 case Assembler::lessEqual: return Assembler::greater; 11159 case Assembler::greater: return Assembler::lessEqual; 11160 case Assembler::greaterEqual: return Assembler::less; 11161 case Assembler::below: return Assembler::aboveEqual; 11162 case Assembler::belowEqual: return Assembler::above; 11163 case Assembler::above: return Assembler::belowEqual; 11164 case Assembler::aboveEqual: return Assembler::below; 11165 case Assembler::overflow: return Assembler::noOverflow; 11166 case Assembler::noOverflow: return Assembler::overflow; 11167 case Assembler::negative: return Assembler::positive; 11168 case Assembler::positive: return Assembler::negative; 11169 case Assembler::parity: return Assembler::noParity; 11170 case Assembler::noParity: return Assembler::parity; 11171 } 11172 ShouldNotReachHere(); return Assembler::overflow; 11173} 11174 11175SkipIfEqual::SkipIfEqual( 11176 MacroAssembler* masm, const bool* flag_addr, bool value) { 11177 _masm = masm; 11178 _masm->cmp8(ExternalAddress((address)flag_addr), value); 11179 _masm->jcc(Assembler::equal, _label); 11180} 11181 11182SkipIfEqual::~SkipIfEqual() { 11183 _masm->bind(_label); 11184} 11185