assembler_x86.cpp revision 3724:8e47bac5643a
1186690Sobrien/* 2186690Sobrien * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3226048Sobrien * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4186690Sobrien * 5186690Sobrien * This code is free software; you can redistribute it and/or modify it 6226048Sobrien * under the terms of the GNU General Public License version 2 only, as 7186690Sobrien * published by the Free Software Foundation. 8226048Sobrien * 9186690Sobrien * This code is distributed in the hope that it will be useful, but WITHOUT 10226048Sobrien * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11186690Sobrien * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25#include "precompiled.hpp" 26#include "assembler_x86.inline.hpp" 27#include "gc_interface/collectedHeap.inline.hpp" 28#include "interpreter/interpreter.hpp" 29#include "memory/cardTableModRefBS.hpp" 30#include "memory/resourceArea.hpp" 31#include "prims/methodHandles.hpp" 32#include "runtime/biasedLocking.hpp" 33#include "runtime/interfaceSupport.hpp" 34#include "runtime/objectMonitor.hpp" 35#include "runtime/os.hpp" 36#include "runtime/sharedRuntime.hpp" 37#include "runtime/stubRoutines.hpp" 38#ifndef SERIALGC 39#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41#include "gc_implementation/g1/heapRegion.hpp" 42#endif 43 44#ifdef PRODUCT 45#define BLOCK_COMMENT(str) /* nothing */ 46#define STOP(error) stop(error) 47#else 48#define BLOCK_COMMENT(str) block_comment(str) 49#define STOP(error) block_comment(error); stop(error) 50#endif 51 52#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 53// Implementation of AddressLiteral 54 55AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 56 _is_lval = false; 57 _target = target; 58 switch (rtype) { 59 case relocInfo::oop_type: 60 case relocInfo::metadata_type: 61 // Oops are a special case. Normally they would be their own section 62 // but in cases like icBuffer they are literals in the code stream that 63 // we don't have a section for. We use none so that we get a literal address 64 // which is always patchable. 65 break; 66 case relocInfo::external_word_type: 67 _rspec = external_word_Relocation::spec(target); 68 break; 69 case relocInfo::internal_word_type: 70 _rspec = internal_word_Relocation::spec(target); 71 break; 72 case relocInfo::opt_virtual_call_type: 73 _rspec = opt_virtual_call_Relocation::spec(); 74 break; 75 case relocInfo::static_call_type: 76 _rspec = static_call_Relocation::spec(); 77 break; 78 case relocInfo::runtime_call_type: 79 _rspec = runtime_call_Relocation::spec(); 80 break; 81 case relocInfo::poll_type: 82 case relocInfo::poll_return_type: 83 _rspec = Relocation::spec_simple(rtype); 84 break; 85 case relocInfo::none: 86 break; 87 default: 88 ShouldNotReachHere(); 89 break; 90 } 91} 92 93// Implementation of Address 94 95#ifdef _LP64 96 97Address Address::make_array(ArrayAddress adr) { 98 // Not implementable on 64bit machines 99 // Should have been handled higher up the call chain. 100 ShouldNotReachHere(); 101 return Address(); 102} 103 104// exceedingly dangerous constructor 105Address::Address(int disp, address loc, relocInfo::relocType rtype) { 106 _base = noreg; 107 _index = noreg; 108 _scale = no_scale; 109 _disp = disp; 110 switch (rtype) { 111 case relocInfo::external_word_type: 112 _rspec = external_word_Relocation::spec(loc); 113 break; 114 case relocInfo::internal_word_type: 115 _rspec = internal_word_Relocation::spec(loc); 116 break; 117 case relocInfo::runtime_call_type: 118 // HMM 119 _rspec = runtime_call_Relocation::spec(); 120 break; 121 case relocInfo::poll_type: 122 case relocInfo::poll_return_type: 123 _rspec = Relocation::spec_simple(rtype); 124 break; 125 case relocInfo::none: 126 break; 127 default: 128 ShouldNotReachHere(); 129 } 130} 131#else // LP64 132 133Address Address::make_array(ArrayAddress adr) { 134 AddressLiteral base = adr.base(); 135 Address index = adr.index(); 136 assert(index._disp == 0, "must not have disp"); // maybe it can? 137 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 138 array._rspec = base._rspec; 139 return array; 140} 141 142// exceedingly dangerous constructor 143Address::Address(address loc, RelocationHolder spec) { 144 _base = noreg; 145 _index = noreg; 146 _scale = no_scale; 147 _disp = (intptr_t) loc; 148 _rspec = spec; 149} 150 151#endif // _LP64 152 153 154 155// Convert the raw encoding form into the form expected by the constructor for 156// Address. An index of 4 (rsp) corresponds to having no index, so convert 157// that to noreg for the Address constructor. 158Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { 159 RelocationHolder rspec; 160 if (disp_reloc != relocInfo::none) { 161 rspec = Relocation::spec_simple(disp_reloc); 162 } 163 bool valid_index = index != rsp->encoding(); 164 if (valid_index) { 165 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 166 madr._rspec = rspec; 167 return madr; 168 } else { 169 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 170 madr._rspec = rspec; 171 return madr; 172 } 173} 174 175// Implementation of Assembler 176 177int AbstractAssembler::code_fill_byte() { 178 return (u_char)'\xF4'; // hlt 179} 180 181// make this go away someday 182void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 183 if (rtype == relocInfo::none) 184 emit_long(data); 185 else emit_data(data, Relocation::spec_simple(rtype), format); 186} 187 188void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 189 assert(imm_operand == 0, "default format must be immediate in this file"); 190 assert(inst_mark() != NULL, "must be inside InstructionMark"); 191 if (rspec.type() != relocInfo::none) { 192 #ifdef ASSERT 193 check_relocation(rspec, format); 194 #endif 195 // Do not use AbstractAssembler::relocate, which is not intended for 196 // embedded words. Instead, relocate to the enclosing instruction. 197 198 // hack. call32 is too wide for mask so use disp32 199 if (format == call32_operand) 200 code_section()->relocate(inst_mark(), rspec, disp32_operand); 201 else 202 code_section()->relocate(inst_mark(), rspec, format); 203 } 204 emit_long(data); 205} 206 207static int encode(Register r) { 208 int enc = r->encoding(); 209 if (enc >= 8) { 210 enc -= 8; 211 } 212 return enc; 213} 214 215static int encode(XMMRegister r) { 216 int enc = r->encoding(); 217 if (enc >= 8) { 218 enc -= 8; 219 } 220 return enc; 221} 222 223void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 224 assert(dst->has_byte_register(), "must have byte register"); 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert(isByte(imm8), "not a byte"); 227 assert((op1 & 0x01) == 0, "should be 8bit operation"); 228 emit_byte(op1); 229 emit_byte(op2 | encode(dst)); 230 emit_byte(imm8); 231} 232 233 234void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 235 assert(isByte(op1) && isByte(op2), "wrong opcode"); 236 assert((op1 & 0x01) == 1, "should be 32bit operation"); 237 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 238 if (is8bit(imm32)) { 239 emit_byte(op1 | 0x02); // set sign bit 240 emit_byte(op2 | encode(dst)); 241 emit_byte(imm32 & 0xFF); 242 } else { 243 emit_byte(op1); 244 emit_byte(op2 | encode(dst)); 245 emit_long(imm32); 246 } 247} 248 249// Force generation of a 4 byte immediate value even if it fits into 8bit 250void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 251 assert(isByte(op1) && isByte(op2), "wrong opcode"); 252 assert((op1 & 0x01) == 1, "should be 32bit operation"); 253 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 254 emit_byte(op1); 255 emit_byte(op2 | encode(dst)); 256 emit_long(imm32); 257} 258 259// immediate-to-memory forms 260void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 261 assert((op1 & 0x01) == 1, "should be 32bit operation"); 262 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 263 if (is8bit(imm32)) { 264 emit_byte(op1 | 0x02); // set sign bit 265 emit_operand(rm, adr, 1); 266 emit_byte(imm32 & 0xFF); 267 } else { 268 emit_byte(op1); 269 emit_operand(rm, adr, 4); 270 emit_long(imm32); 271 } 272} 273 274 275void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 276 assert(isByte(op1) && isByte(op2), "wrong opcode"); 277 emit_byte(op1); 278 emit_byte(op2 | encode(dst) << 3 | encode(src)); 279} 280 281 282void Assembler::emit_operand(Register reg, Register base, Register index, 283 Address::ScaleFactor scale, int disp, 284 RelocationHolder const& rspec, 285 int rip_relative_correction) { 286 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 287 288 // Encode the registers as needed in the fields they are used in 289 290 int regenc = encode(reg) << 3; 291 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 292 int baseenc = base->is_valid() ? encode(base) : 0; 293 294 if (base->is_valid()) { 295 if (index->is_valid()) { 296 assert(scale != Address::no_scale, "inconsistent address"); 297 // [base + index*scale + disp] 298 if (disp == 0 && rtype == relocInfo::none && 299 base != rbp LP64_ONLY(&& base != r13)) { 300 // [base + index*scale] 301 // [00 reg 100][ss index base] 302 assert(index != rsp, "illegal addressing mode"); 303 emit_byte(0x04 | regenc); 304 emit_byte(scale << 6 | indexenc | baseenc); 305 } else if (is8bit(disp) && rtype == relocInfo::none) { 306 // [base + index*scale + imm8] 307 // [01 reg 100][ss index base] imm8 308 assert(index != rsp, "illegal addressing mode"); 309 emit_byte(0x44 | regenc); 310 emit_byte(scale << 6 | indexenc | baseenc); 311 emit_byte(disp & 0xFF); 312 } else { 313 // [base + index*scale + disp32] 314 // [10 reg 100][ss index base] disp32 315 assert(index != rsp, "illegal addressing mode"); 316 emit_byte(0x84 | regenc); 317 emit_byte(scale << 6 | indexenc | baseenc); 318 emit_data(disp, rspec, disp32_operand); 319 } 320 } else if (base == rsp LP64_ONLY(|| base == r12)) { 321 // [rsp + disp] 322 if (disp == 0 && rtype == relocInfo::none) { 323 // [rsp] 324 // [00 reg 100][00 100 100] 325 emit_byte(0x04 | regenc); 326 emit_byte(0x24); 327 } else if (is8bit(disp) && rtype == relocInfo::none) { 328 // [rsp + imm8] 329 // [01 reg 100][00 100 100] disp8 330 emit_byte(0x44 | regenc); 331 emit_byte(0x24); 332 emit_byte(disp & 0xFF); 333 } else { 334 // [rsp + imm32] 335 // [10 reg 100][00 100 100] disp32 336 emit_byte(0x84 | regenc); 337 emit_byte(0x24); 338 emit_data(disp, rspec, disp32_operand); 339 } 340 } else { 341 // [base + disp] 342 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 343 if (disp == 0 && rtype == relocInfo::none && 344 base != rbp LP64_ONLY(&& base != r13)) { 345 // [base] 346 // [00 reg base] 347 emit_byte(0x00 | regenc | baseenc); 348 } else if (is8bit(disp) && rtype == relocInfo::none) { 349 // [base + disp8] 350 // [01 reg base] disp8 351 emit_byte(0x40 | regenc | baseenc); 352 emit_byte(disp & 0xFF); 353 } else { 354 // [base + disp32] 355 // [10 reg base] disp32 356 emit_byte(0x80 | regenc | baseenc); 357 emit_data(disp, rspec, disp32_operand); 358 } 359 } 360 } else { 361 if (index->is_valid()) { 362 assert(scale != Address::no_scale, "inconsistent address"); 363 // [index*scale + disp] 364 // [00 reg 100][ss index 101] disp32 365 assert(index != rsp, "illegal addressing mode"); 366 emit_byte(0x04 | regenc); 367 emit_byte(scale << 6 | indexenc | 0x05); 368 emit_data(disp, rspec, disp32_operand); 369 } else if (rtype != relocInfo::none ) { 370 // [disp] (64bit) RIP-RELATIVE (32bit) abs 371 // [00 000 101] disp32 372 373 emit_byte(0x05 | regenc); 374 // Note that the RIP-rel. correction applies to the generated 375 // disp field, but _not_ to the target address in the rspec. 376 377 // disp was created by converting the target address minus the pc 378 // at the start of the instruction. That needs more correction here. 379 // intptr_t disp = target - next_ip; 380 assert(inst_mark() != NULL, "must be inside InstructionMark"); 381 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 382 int64_t adjusted = disp; 383 // Do rip-rel adjustment for 64bit 384 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 385 assert(is_simm32(adjusted), 386 "must be 32bit offset (RIP relative address)"); 387 emit_data((int32_t) adjusted, rspec, disp32_operand); 388 389 } else { 390 // 32bit never did this, did everything as the rip-rel/disp code above 391 // [disp] ABSOLUTE 392 // [00 reg 100][00 100 101] disp32 393 emit_byte(0x04 | regenc); 394 emit_byte(0x25); 395 emit_data(disp, rspec, disp32_operand); 396 } 397 } 398} 399 400void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 401 Address::ScaleFactor scale, int disp, 402 RelocationHolder const& rspec) { 403 emit_operand((Register)reg, base, index, scale, disp, rspec); 404} 405 406// Secret local extension to Assembler::WhichOperand: 407#define end_pc_operand (_WhichOperand_limit) 408 409address Assembler::locate_operand(address inst, WhichOperand which) { 410 // Decode the given instruction, and return the address of 411 // an embedded 32-bit operand word. 412 413 // If "which" is disp32_operand, selects the displacement portion 414 // of an effective address specifier. 415 // If "which" is imm64_operand, selects the trailing immediate constant. 416 // If "which" is call32_operand, selects the displacement of a call or jump. 417 // Caller is responsible for ensuring that there is such an operand, 418 // and that it is 32/64 bits wide. 419 420 // If "which" is end_pc_operand, find the end of the instruction. 421 422 address ip = inst; 423 bool is_64bit = false; 424 425 debug_only(bool has_disp32 = false); 426 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 427 428 again_after_prefix: 429 switch (0xFF & *ip++) { 430 431 // These convenience macros generate groups of "case" labels for the switch. 432#define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 433#define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 434 case (x)+4: case (x)+5: case (x)+6: case (x)+7 435#define REP16(x) REP8((x)+0): \ 436 case REP8((x)+8) 437 438 case CS_segment: 439 case SS_segment: 440 case DS_segment: 441 case ES_segment: 442 case FS_segment: 443 case GS_segment: 444 // Seems dubious 445 LP64_ONLY(assert(false, "shouldn't have that prefix")); 446 assert(ip == inst+1, "only one prefix allowed"); 447 goto again_after_prefix; 448 449 case 0x67: 450 case REX: 451 case REX_B: 452 case REX_X: 453 case REX_XB: 454 case REX_R: 455 case REX_RB: 456 case REX_RX: 457 case REX_RXB: 458 NOT_LP64(assert(false, "64bit prefixes")); 459 goto again_after_prefix; 460 461 case REX_W: 462 case REX_WB: 463 case REX_WX: 464 case REX_WXB: 465 case REX_WR: 466 case REX_WRB: 467 case REX_WRX: 468 case REX_WRXB: 469 NOT_LP64(assert(false, "64bit prefixes")); 470 is_64bit = true; 471 goto again_after_prefix; 472 473 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 474 case 0x88: // movb a, r 475 case 0x89: // movl a, r 476 case 0x8A: // movb r, a 477 case 0x8B: // movl r, a 478 case 0x8F: // popl a 479 debug_only(has_disp32 = true); 480 break; 481 482 case 0x68: // pushq #32 483 if (which == end_pc_operand) { 484 return ip + 4; 485 } 486 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 487 return ip; // not produced by emit_operand 488 489 case 0x66: // movw ... (size prefix) 490 again_after_size_prefix2: 491 switch (0xFF & *ip++) { 492 case REX: 493 case REX_B: 494 case REX_X: 495 case REX_XB: 496 case REX_R: 497 case REX_RB: 498 case REX_RX: 499 case REX_RXB: 500 case REX_W: 501 case REX_WB: 502 case REX_WX: 503 case REX_WXB: 504 case REX_WR: 505 case REX_WRB: 506 case REX_WRX: 507 case REX_WRXB: 508 NOT_LP64(assert(false, "64bit prefix found")); 509 goto again_after_size_prefix2; 510 case 0x8B: // movw r, a 511 case 0x89: // movw a, r 512 debug_only(has_disp32 = true); 513 break; 514 case 0xC7: // movw a, #16 515 debug_only(has_disp32 = true); 516 tail_size = 2; // the imm16 517 break; 518 case 0x0F: // several SSE/SSE2 variants 519 ip--; // reparse the 0x0F 520 goto again_after_prefix; 521 default: 522 ShouldNotReachHere(); 523 } 524 break; 525 526 case REP8(0xB8): // movl/q r, #32/#64(oop?) 527 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 528 // these asserts are somewhat nonsensical 529#ifndef _LP64 530 assert(which == imm_operand || which == disp32_operand, 531 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 532#else 533 assert((which == call32_operand || which == imm_operand) && is_64bit || 534 which == narrow_oop_operand && !is_64bit, 535 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 536#endif // _LP64 537 return ip; 538 539 case 0x69: // imul r, a, #32 540 case 0xC7: // movl a, #32(oop?) 541 tail_size = 4; 542 debug_only(has_disp32 = true); // has both kinds of operands! 543 break; 544 545 case 0x0F: // movx..., etc. 546 switch (0xFF & *ip++) { 547 case 0x3A: // pcmpestri 548 tail_size = 1; 549 case 0x38: // ptest, pmovzxbw 550 ip++; // skip opcode 551 debug_only(has_disp32 = true); // has both kinds of operands! 552 break; 553 554 case 0x70: // pshufd r, r/a, #8 555 debug_only(has_disp32 = true); // has both kinds of operands! 556 case 0x73: // psrldq r, #8 557 tail_size = 1; 558 break; 559 560 case 0x12: // movlps 561 case 0x28: // movaps 562 case 0x2E: // ucomiss 563 case 0x2F: // comiss 564 case 0x54: // andps 565 case 0x55: // andnps 566 case 0x56: // orps 567 case 0x57: // xorps 568 case 0x6E: // movd 569 case 0x7E: // movd 570 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 571 debug_only(has_disp32 = true); 572 break; 573 574 case 0xAD: // shrd r, a, %cl 575 case 0xAF: // imul r, a 576 case 0xBE: // movsbl r, a (movsxb) 577 case 0xBF: // movswl r, a (movsxw) 578 case 0xB6: // movzbl r, a (movzxb) 579 case 0xB7: // movzwl r, a (movzxw) 580 case REP16(0x40): // cmovl cc, r, a 581 case 0xB0: // cmpxchgb 582 case 0xB1: // cmpxchg 583 case 0xC1: // xaddl 584 case 0xC7: // cmpxchg8 585 case REP16(0x90): // setcc a 586 debug_only(has_disp32 = true); 587 // fall out of the switch to decode the address 588 break; 589 590 case 0xC4: // pinsrw r, a, #8 591 debug_only(has_disp32 = true); 592 case 0xC5: // pextrw r, r, #8 593 tail_size = 1; // the imm8 594 break; 595 596 case 0xAC: // shrd r, a, #8 597 debug_only(has_disp32 = true); 598 tail_size = 1; // the imm8 599 break; 600 601 case REP16(0x80): // jcc rdisp32 602 if (which == end_pc_operand) return ip + 4; 603 assert(which == call32_operand, "jcc has no disp32 or imm"); 604 return ip; 605 default: 606 ShouldNotReachHere(); 607 } 608 break; 609 610 case 0x81: // addl a, #32; addl r, #32 611 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 612 // on 32bit in the case of cmpl, the imm might be an oop 613 tail_size = 4; 614 debug_only(has_disp32 = true); // has both kinds of operands! 615 break; 616 617 case 0x83: // addl a, #8; addl r, #8 618 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 619 debug_only(has_disp32 = true); // has both kinds of operands! 620 tail_size = 1; 621 break; 622 623 case 0x9B: 624 switch (0xFF & *ip++) { 625 case 0xD9: // fnstcw a 626 debug_only(has_disp32 = true); 627 break; 628 default: 629 ShouldNotReachHere(); 630 } 631 break; 632 633 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 634 case REP4(0x10): // adc... 635 case REP4(0x20): // and... 636 case REP4(0x30): // xor... 637 case REP4(0x08): // or... 638 case REP4(0x18): // sbb... 639 case REP4(0x28): // sub... 640 case 0xF7: // mull a 641 case 0x8D: // lea r, a 642 case 0x87: // xchg r, a 643 case REP4(0x38): // cmp... 644 case 0x85: // test r, a 645 debug_only(has_disp32 = true); // has both kinds of operands! 646 break; 647 648 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 649 case 0xC6: // movb a, #8 650 case 0x80: // cmpb a, #8 651 case 0x6B: // imul r, a, #8 652 debug_only(has_disp32 = true); // has both kinds of operands! 653 tail_size = 1; // the imm8 654 break; 655 656 case 0xC4: // VEX_3bytes 657 case 0xC5: // VEX_2bytes 658 assert((UseAVX > 0), "shouldn't have VEX prefix"); 659 assert(ip == inst+1, "no prefixes allowed"); 660 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 661 // but they have prefix 0x0F and processed when 0x0F processed above. 662 // 663 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 664 // instructions (these instructions are not supported in 64-bit mode). 665 // To distinguish them bits [7:6] are set in the VEX second byte since 666 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 667 // those VEX bits REX and vvvv bits are inverted. 668 // 669 // Fortunately C2 doesn't generate these instructions so we don't need 670 // to check for them in product version. 671 672 // Check second byte 673 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 674 675 // First byte 676 if ((0xFF & *inst) == VEX_3bytes) { 677 ip++; // third byte 678 is_64bit = ((VEX_W & *ip) == VEX_W); 679 } 680 ip++; // opcode 681 // To find the end of instruction (which == end_pc_operand). 682 switch (0xFF & *ip) { 683 case 0x61: // pcmpestri r, r/a, #8 684 case 0x70: // pshufd r, r/a, #8 685 case 0x73: // psrldq r, #8 686 tail_size = 1; // the imm8 687 break; 688 default: 689 break; 690 } 691 ip++; // skip opcode 692 debug_only(has_disp32 = true); // has both kinds of operands! 693 break; 694 695 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 696 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 697 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 698 case 0xDD: // fld_d a; fst_d a; fstp_d a 699 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 700 case 0xDF: // fild_d a; fistp_d a 701 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 702 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 703 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 704 debug_only(has_disp32 = true); 705 break; 706 707 case 0xE8: // call rdisp32 708 case 0xE9: // jmp rdisp32 709 if (which == end_pc_operand) return ip + 4; 710 assert(which == call32_operand, "call has no disp32 or imm"); 711 return ip; 712 713 case 0xF0: // Lock 714 assert(os::is_MP(), "only on MP"); 715 goto again_after_prefix; 716 717 case 0xF3: // For SSE 718 case 0xF2: // For SSE2 719 switch (0xFF & *ip++) { 720 case REX: 721 case REX_B: 722 case REX_X: 723 case REX_XB: 724 case REX_R: 725 case REX_RB: 726 case REX_RX: 727 case REX_RXB: 728 case REX_W: 729 case REX_WB: 730 case REX_WX: 731 case REX_WXB: 732 case REX_WR: 733 case REX_WRB: 734 case REX_WRX: 735 case REX_WRXB: 736 NOT_LP64(assert(false, "found 64bit prefix")); 737 ip++; 738 default: 739 ip++; 740 } 741 debug_only(has_disp32 = true); // has both kinds of operands! 742 break; 743 744 default: 745 ShouldNotReachHere(); 746 747#undef REP8 748#undef REP16 749 } 750 751 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 752#ifdef _LP64 753 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 754#else 755 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 756 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 757#endif // LP64 758 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 759 760 // parse the output of emit_operand 761 int op2 = 0xFF & *ip++; 762 int base = op2 & 0x07; 763 int op3 = -1; 764 const int b100 = 4; 765 const int b101 = 5; 766 if (base == b100 && (op2 >> 6) != 3) { 767 op3 = 0xFF & *ip++; 768 base = op3 & 0x07; // refetch the base 769 } 770 // now ip points at the disp (if any) 771 772 switch (op2 >> 6) { 773 case 0: 774 // [00 reg 100][ss index base] 775 // [00 reg 100][00 100 esp] 776 // [00 reg base] 777 // [00 reg 100][ss index 101][disp32] 778 // [00 reg 101] [disp32] 779 780 if (base == b101) { 781 if (which == disp32_operand) 782 return ip; // caller wants the disp32 783 ip += 4; // skip the disp32 784 } 785 break; 786 787 case 1: 788 // [01 reg 100][ss index base][disp8] 789 // [01 reg 100][00 100 esp][disp8] 790 // [01 reg base] [disp8] 791 ip += 1; // skip the disp8 792 break; 793 794 case 2: 795 // [10 reg 100][ss index base][disp32] 796 // [10 reg 100][00 100 esp][disp32] 797 // [10 reg base] [disp32] 798 if (which == disp32_operand) 799 return ip; // caller wants the disp32 800 ip += 4; // skip the disp32 801 break; 802 803 case 3: 804 // [11 reg base] (not a memory addressing mode) 805 break; 806 } 807 808 if (which == end_pc_operand) { 809 return ip + tail_size; 810 } 811 812#ifdef _LP64 813 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 814#else 815 assert(which == imm_operand, "instruction has only an imm field"); 816#endif // LP64 817 return ip; 818} 819 820address Assembler::locate_next_instruction(address inst) { 821 // Secretly share code with locate_operand: 822 return locate_operand(inst, end_pc_operand); 823} 824 825 826#ifdef ASSERT 827void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 828 address inst = inst_mark(); 829 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 830 address opnd; 831 832 Relocation* r = rspec.reloc(); 833 if (r->type() == relocInfo::none) { 834 return; 835 } else if (r->is_call() || format == call32_operand) { 836 // assert(format == imm32_operand, "cannot specify a nonzero format"); 837 opnd = locate_operand(inst, call32_operand); 838 } else if (r->is_data()) { 839 assert(format == imm_operand || format == disp32_operand 840 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 841 opnd = locate_operand(inst, (WhichOperand)format); 842 } else { 843 assert(format == imm_operand, "cannot specify a format"); 844 return; 845 } 846 assert(opnd == pc(), "must put operand where relocs can find it"); 847} 848#endif // ASSERT 849 850void Assembler::emit_operand32(Register reg, Address adr) { 851 assert(reg->encoding() < 8, "no extended registers"); 852 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 853 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 854 adr._rspec); 855} 856 857void Assembler::emit_operand(Register reg, Address adr, 858 int rip_relative_correction) { 859 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 860 adr._rspec, 861 rip_relative_correction); 862} 863 864void Assembler::emit_operand(XMMRegister reg, Address adr) { 865 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 866 adr._rspec); 867} 868 869// MMX operations 870void Assembler::emit_operand(MMXRegister reg, Address adr) { 871 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 872 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 873} 874 875// work around gcc (3.2.1-7a) bug 876void Assembler::emit_operand(Address adr, MMXRegister reg) { 877 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 878 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 879} 880 881 882void Assembler::emit_farith(int b1, int b2, int i) { 883 assert(isByte(b1) && isByte(b2), "wrong opcode"); 884 assert(0 <= i && i < 8, "illegal stack offset"); 885 emit_byte(b1); 886 emit_byte(b2 + i); 887} 888 889 890// Now the Assembler instructions (identical for 32/64 bits) 891 892void Assembler::adcl(Address dst, int32_t imm32) { 893 InstructionMark im(this); 894 prefix(dst); 895 emit_arith_operand(0x81, rdx, dst, imm32); 896} 897 898void Assembler::adcl(Address dst, Register src) { 899 InstructionMark im(this); 900 prefix(dst, src); 901 emit_byte(0x11); 902 emit_operand(src, dst); 903} 904 905void Assembler::adcl(Register dst, int32_t imm32) { 906 prefix(dst); 907 emit_arith(0x81, 0xD0, dst, imm32); 908} 909 910void Assembler::adcl(Register dst, Address src) { 911 InstructionMark im(this); 912 prefix(src, dst); 913 emit_byte(0x13); 914 emit_operand(dst, src); 915} 916 917void Assembler::adcl(Register dst, Register src) { 918 (void) prefix_and_encode(dst->encoding(), src->encoding()); 919 emit_arith(0x13, 0xC0, dst, src); 920} 921 922void Assembler::addl(Address dst, int32_t imm32) { 923 InstructionMark im(this); 924 prefix(dst); 925 emit_arith_operand(0x81, rax, dst, imm32); 926} 927 928void Assembler::addl(Address dst, Register src) { 929 InstructionMark im(this); 930 prefix(dst, src); 931 emit_byte(0x01); 932 emit_operand(src, dst); 933} 934 935void Assembler::addl(Register dst, int32_t imm32) { 936 prefix(dst); 937 emit_arith(0x81, 0xC0, dst, imm32); 938} 939 940void Assembler::addl(Register dst, Address src) { 941 InstructionMark im(this); 942 prefix(src, dst); 943 emit_byte(0x03); 944 emit_operand(dst, src); 945} 946 947void Assembler::addl(Register dst, Register src) { 948 (void) prefix_and_encode(dst->encoding(), src->encoding()); 949 emit_arith(0x03, 0xC0, dst, src); 950} 951 952void Assembler::addr_nop_4() { 953 assert(UseAddressNop, "no CPU support"); 954 // 4 bytes: NOP DWORD PTR [EAX+0] 955 emit_byte(0x0F); 956 emit_byte(0x1F); 957 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 958 emit_byte(0); // 8-bits offset (1 byte) 959} 960 961void Assembler::addr_nop_5() { 962 assert(UseAddressNop, "no CPU support"); 963 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 964 emit_byte(0x0F); 965 emit_byte(0x1F); 966 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 967 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 968 emit_byte(0); // 8-bits offset (1 byte) 969} 970 971void Assembler::addr_nop_7() { 972 assert(UseAddressNop, "no CPU support"); 973 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 974 emit_byte(0x0F); 975 emit_byte(0x1F); 976 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 977 emit_long(0); // 32-bits offset (4 bytes) 978} 979 980void Assembler::addr_nop_8() { 981 assert(UseAddressNop, "no CPU support"); 982 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 983 emit_byte(0x0F); 984 emit_byte(0x1F); 985 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 986 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 987 emit_long(0); // 32-bits offset (4 bytes) 988} 989 990void Assembler::addsd(XMMRegister dst, XMMRegister src) { 991 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 992 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 993} 994 995void Assembler::addsd(XMMRegister dst, Address src) { 996 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 997 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 998} 999 1000void Assembler::addss(XMMRegister dst, XMMRegister src) { 1001 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1002 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1003} 1004 1005void Assembler::addss(XMMRegister dst, Address src) { 1006 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1007 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1008} 1009 1010void Assembler::andl(Address dst, int32_t imm32) { 1011 InstructionMark im(this); 1012 prefix(dst); 1013 emit_byte(0x81); 1014 emit_operand(rsp, dst, 4); 1015 emit_long(imm32); 1016} 1017 1018void Assembler::andl(Register dst, int32_t imm32) { 1019 prefix(dst); 1020 emit_arith(0x81, 0xE0, dst, imm32); 1021} 1022 1023void Assembler::andl(Register dst, Address src) { 1024 InstructionMark im(this); 1025 prefix(src, dst); 1026 emit_byte(0x23); 1027 emit_operand(dst, src); 1028} 1029 1030void Assembler::andl(Register dst, Register src) { 1031 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1032 emit_arith(0x23, 0xC0, dst, src); 1033} 1034 1035void Assembler::bsfl(Register dst, Register src) { 1036 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1037 emit_byte(0x0F); 1038 emit_byte(0xBC); 1039 emit_byte(0xC0 | encode); 1040} 1041 1042void Assembler::bsrl(Register dst, Register src) { 1043 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1044 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1045 emit_byte(0x0F); 1046 emit_byte(0xBD); 1047 emit_byte(0xC0 | encode); 1048} 1049 1050void Assembler::bswapl(Register reg) { // bswap 1051 int encode = prefix_and_encode(reg->encoding()); 1052 emit_byte(0x0F); 1053 emit_byte(0xC8 | encode); 1054} 1055 1056void Assembler::call(Label& L, relocInfo::relocType rtype) { 1057 // suspect disp32 is always good 1058 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1059 1060 if (L.is_bound()) { 1061 const int long_size = 5; 1062 int offs = (int)( target(L) - pc() ); 1063 assert(offs <= 0, "assembler error"); 1064 InstructionMark im(this); 1065 // 1110 1000 #32-bit disp 1066 emit_byte(0xE8); 1067 emit_data(offs - long_size, rtype, operand); 1068 } else { 1069 InstructionMark im(this); 1070 // 1110 1000 #32-bit disp 1071 L.add_patch_at(code(), locator()); 1072 1073 emit_byte(0xE8); 1074 emit_data(int(0), rtype, operand); 1075 } 1076} 1077 1078void Assembler::call(Register dst) { 1079 int encode = prefix_and_encode(dst->encoding()); 1080 emit_byte(0xFF); 1081 emit_byte(0xD0 | encode); 1082} 1083 1084 1085void Assembler::call(Address adr) { 1086 InstructionMark im(this); 1087 prefix(adr); 1088 emit_byte(0xFF); 1089 emit_operand(rdx, adr); 1090} 1091 1092void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1093 assert(entry != NULL, "call most probably wrong"); 1094 InstructionMark im(this); 1095 emit_byte(0xE8); 1096 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1097 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1098 // Technically, should use call32_operand, but this format is 1099 // implied by the fact that we're emitting a call instruction. 1100 1101 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1102 emit_data((int) disp, rspec, operand); 1103} 1104 1105void Assembler::cdql() { 1106 emit_byte(0x99); 1107} 1108 1109void Assembler::cmovl(Condition cc, Register dst, Register src) { 1110 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1111 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1112 emit_byte(0x0F); 1113 emit_byte(0x40 | cc); 1114 emit_byte(0xC0 | encode); 1115} 1116 1117 1118void Assembler::cmovl(Condition cc, Register dst, Address src) { 1119 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1120 prefix(src, dst); 1121 emit_byte(0x0F); 1122 emit_byte(0x40 | cc); 1123 emit_operand(dst, src); 1124} 1125 1126void Assembler::cmpb(Address dst, int imm8) { 1127 InstructionMark im(this); 1128 prefix(dst); 1129 emit_byte(0x80); 1130 emit_operand(rdi, dst, 1); 1131 emit_byte(imm8); 1132} 1133 1134void Assembler::cmpl(Address dst, int32_t imm32) { 1135 InstructionMark im(this); 1136 prefix(dst); 1137 emit_byte(0x81); 1138 emit_operand(rdi, dst, 4); 1139 emit_long(imm32); 1140} 1141 1142void Assembler::cmpl(Register dst, int32_t imm32) { 1143 prefix(dst); 1144 emit_arith(0x81, 0xF8, dst, imm32); 1145} 1146 1147void Assembler::cmpl(Register dst, Register src) { 1148 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1149 emit_arith(0x3B, 0xC0, dst, src); 1150} 1151 1152 1153void Assembler::cmpl(Register dst, Address src) { 1154 InstructionMark im(this); 1155 prefix(src, dst); 1156 emit_byte(0x3B); 1157 emit_operand(dst, src); 1158} 1159 1160void Assembler::cmpw(Address dst, int imm16) { 1161 InstructionMark im(this); 1162 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1163 emit_byte(0x66); 1164 emit_byte(0x81); 1165 emit_operand(rdi, dst, 2); 1166 emit_word(imm16); 1167} 1168 1169// The 32-bit cmpxchg compares the value at adr with the contents of rax, 1170// and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1171// The ZF is set if the compared values were equal, and cleared otherwise. 1172void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1173 if (Atomics & 2) { 1174 // caveat: no instructionmark, so this isn't relocatable. 1175 // Emit a synthetic, non-atomic, CAS equivalent. 1176 // Beware. The synthetic form sets all ICCs, not just ZF. 1177 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1178 cmpl(rax, adr); 1179 movl(rax, adr); 1180 if (reg != rax) { 1181 Label L ; 1182 jcc(Assembler::notEqual, L); 1183 movl(adr, reg); 1184 bind(L); 1185 } 1186 } else { 1187 InstructionMark im(this); 1188 prefix(adr, reg); 1189 emit_byte(0x0F); 1190 emit_byte(0xB1); 1191 emit_operand(reg, adr); 1192 } 1193} 1194 1195void Assembler::comisd(XMMRegister dst, Address src) { 1196 // NOTE: dbx seems to decode this as comiss even though the 1197 // 0x66 is there. Strangly ucomisd comes out correct 1198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1199 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1200} 1201 1202void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1203 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1204 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1205} 1206 1207void Assembler::comiss(XMMRegister dst, Address src) { 1208 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1209 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1210} 1211 1212void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1213 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1214 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1215} 1216 1217void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1218 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1219 emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); 1220} 1221 1222void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1223 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1224 emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE); 1225} 1226 1227void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1228 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1229 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1230} 1231 1232void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1233 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1234 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1235} 1236 1237void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1239 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1240 emit_byte(0x2A); 1241 emit_byte(0xC0 | encode); 1242} 1243 1244void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1245 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1246 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); 1247} 1248 1249void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1250 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1251 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1252 emit_byte(0x2A); 1253 emit_byte(0xC0 | encode); 1254} 1255 1256void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1257 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1258 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); 1259} 1260 1261void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1262 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1263 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1264} 1265 1266void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1267 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1268 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1269} 1270 1271 1272void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1273 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1274 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1275 emit_byte(0x2C); 1276 emit_byte(0xC0 | encode); 1277} 1278 1279void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1280 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1281 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1282 emit_byte(0x2C); 1283 emit_byte(0xC0 | encode); 1284} 1285 1286void Assembler::decl(Address dst) { 1287 // Don't use it directly. Use MacroAssembler::decrement() instead. 1288 InstructionMark im(this); 1289 prefix(dst); 1290 emit_byte(0xFF); 1291 emit_operand(rcx, dst); 1292} 1293 1294void Assembler::divsd(XMMRegister dst, Address src) { 1295 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1296 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1297} 1298 1299void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1300 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1301 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1302} 1303 1304void Assembler::divss(XMMRegister dst, Address src) { 1305 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1306 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1307} 1308 1309void Assembler::divss(XMMRegister dst, XMMRegister src) { 1310 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1311 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1312} 1313 1314void Assembler::emms() { 1315 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1316 emit_byte(0x0F); 1317 emit_byte(0x77); 1318} 1319 1320void Assembler::hlt() { 1321 emit_byte(0xF4); 1322} 1323 1324void Assembler::idivl(Register src) { 1325 int encode = prefix_and_encode(src->encoding()); 1326 emit_byte(0xF7); 1327 emit_byte(0xF8 | encode); 1328} 1329 1330void Assembler::divl(Register src) { // Unsigned 1331 int encode = prefix_and_encode(src->encoding()); 1332 emit_byte(0xF7); 1333 emit_byte(0xF0 | encode); 1334} 1335 1336void Assembler::imull(Register dst, Register src) { 1337 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1338 emit_byte(0x0F); 1339 emit_byte(0xAF); 1340 emit_byte(0xC0 | encode); 1341} 1342 1343 1344void Assembler::imull(Register dst, Register src, int value) { 1345 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1346 if (is8bit(value)) { 1347 emit_byte(0x6B); 1348 emit_byte(0xC0 | encode); 1349 emit_byte(value & 0xFF); 1350 } else { 1351 emit_byte(0x69); 1352 emit_byte(0xC0 | encode); 1353 emit_long(value); 1354 } 1355} 1356 1357void Assembler::incl(Address dst) { 1358 // Don't use it directly. Use MacroAssembler::increment() instead. 1359 InstructionMark im(this); 1360 prefix(dst); 1361 emit_byte(0xFF); 1362 emit_operand(rax, dst); 1363} 1364 1365void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1366 InstructionMark im(this); 1367 assert((0 <= cc) && (cc < 16), "illegal cc"); 1368 if (L.is_bound()) { 1369 address dst = target(L); 1370 assert(dst != NULL, "jcc most probably wrong"); 1371 1372 const int short_size = 2; 1373 const int long_size = 6; 1374 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1375 if (maybe_short && is8bit(offs - short_size)) { 1376 // 0111 tttn #8-bit disp 1377 emit_byte(0x70 | cc); 1378 emit_byte((offs - short_size) & 0xFF); 1379 } else { 1380 // 0000 1111 1000 tttn #32-bit disp 1381 assert(is_simm32(offs - long_size), 1382 "must be 32bit offset (call4)"); 1383 emit_byte(0x0F); 1384 emit_byte(0x80 | cc); 1385 emit_long(offs - long_size); 1386 } 1387 } else { 1388 // Note: could eliminate cond. jumps to this jump if condition 1389 // is the same however, seems to be rather unlikely case. 1390 // Note: use jccb() if label to be bound is very close to get 1391 // an 8-bit displacement 1392 L.add_patch_at(code(), locator()); 1393 emit_byte(0x0F); 1394 emit_byte(0x80 | cc); 1395 emit_long(0); 1396 } 1397} 1398 1399void Assembler::jccb(Condition cc, Label& L) { 1400 if (L.is_bound()) { 1401 const int short_size = 2; 1402 address entry = target(L); 1403#ifdef ASSERT 1404 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1405 intptr_t delta = short_branch_delta(); 1406 if (delta != 0) { 1407 dist += (dist < 0 ? (-delta) :delta); 1408 } 1409 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1410#endif 1411 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1412 // 0111 tttn #8-bit disp 1413 emit_byte(0x70 | cc); 1414 emit_byte((offs - short_size) & 0xFF); 1415 } else { 1416 InstructionMark im(this); 1417 L.add_patch_at(code(), locator()); 1418 emit_byte(0x70 | cc); 1419 emit_byte(0); 1420 } 1421} 1422 1423void Assembler::jmp(Address adr) { 1424 InstructionMark im(this); 1425 prefix(adr); 1426 emit_byte(0xFF); 1427 emit_operand(rsp, adr); 1428} 1429 1430void Assembler::jmp(Label& L, bool maybe_short) { 1431 if (L.is_bound()) { 1432 address entry = target(L); 1433 assert(entry != NULL, "jmp most probably wrong"); 1434 InstructionMark im(this); 1435 const int short_size = 2; 1436 const int long_size = 5; 1437 intptr_t offs = entry - _code_pos; 1438 if (maybe_short && is8bit(offs - short_size)) { 1439 emit_byte(0xEB); 1440 emit_byte((offs - short_size) & 0xFF); 1441 } else { 1442 emit_byte(0xE9); 1443 emit_long(offs - long_size); 1444 } 1445 } else { 1446 // By default, forward jumps are always 32-bit displacements, since 1447 // we can't yet know where the label will be bound. If you're sure that 1448 // the forward jump will not run beyond 256 bytes, use jmpb to 1449 // force an 8-bit displacement. 1450 InstructionMark im(this); 1451 L.add_patch_at(code(), locator()); 1452 emit_byte(0xE9); 1453 emit_long(0); 1454 } 1455} 1456 1457void Assembler::jmp(Register entry) { 1458 int encode = prefix_and_encode(entry->encoding()); 1459 emit_byte(0xFF); 1460 emit_byte(0xE0 | encode); 1461} 1462 1463void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1464 InstructionMark im(this); 1465 emit_byte(0xE9); 1466 assert(dest != NULL, "must have a target"); 1467 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1468 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1469 emit_data(disp, rspec.reloc(), call32_operand); 1470} 1471 1472void Assembler::jmpb(Label& L) { 1473 if (L.is_bound()) { 1474 const int short_size = 2; 1475 address entry = target(L); 1476 assert(entry != NULL, "jmp most probably wrong"); 1477#ifdef ASSERT 1478 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1479 intptr_t delta = short_branch_delta(); 1480 if (delta != 0) { 1481 dist += (dist < 0 ? (-delta) :delta); 1482 } 1483 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1484#endif 1485 intptr_t offs = entry - _code_pos; 1486 emit_byte(0xEB); 1487 emit_byte((offs - short_size) & 0xFF); 1488 } else { 1489 InstructionMark im(this); 1490 L.add_patch_at(code(), locator()); 1491 emit_byte(0xEB); 1492 emit_byte(0); 1493 } 1494} 1495 1496void Assembler::ldmxcsr( Address src) { 1497 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1498 InstructionMark im(this); 1499 prefix(src); 1500 emit_byte(0x0F); 1501 emit_byte(0xAE); 1502 emit_operand(as_Register(2), src); 1503} 1504 1505void Assembler::leal(Register dst, Address src) { 1506 InstructionMark im(this); 1507#ifdef _LP64 1508 emit_byte(0x67); // addr32 1509 prefix(src, dst); 1510#endif // LP64 1511 emit_byte(0x8D); 1512 emit_operand(dst, src); 1513} 1514 1515void Assembler::lock() { 1516 if (Atomics & 1) { 1517 // Emit either nothing, a NOP, or a NOP: prefix 1518 emit_byte(0x90) ; 1519 } else { 1520 emit_byte(0xF0); 1521 } 1522} 1523 1524void Assembler::lzcntl(Register dst, Register src) { 1525 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1526 emit_byte(0xF3); 1527 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1528 emit_byte(0x0F); 1529 emit_byte(0xBD); 1530 emit_byte(0xC0 | encode); 1531} 1532 1533// Emit mfence instruction 1534void Assembler::mfence() { 1535 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1536 emit_byte( 0x0F ); 1537 emit_byte( 0xAE ); 1538 emit_byte( 0xF0 ); 1539} 1540 1541void Assembler::mov(Register dst, Register src) { 1542 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1543} 1544 1545void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1546 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1547 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); 1548} 1549 1550void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1551 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1552 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); 1553} 1554 1555void Assembler::movlhps(XMMRegister dst, XMMRegister src) { 1556 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1557 int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); 1558 emit_byte(0x16); 1559 emit_byte(0xC0 | encode); 1560} 1561 1562void Assembler::movb(Register dst, Address src) { 1563 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1564 InstructionMark im(this); 1565 prefix(src, dst, true); 1566 emit_byte(0x8A); 1567 emit_operand(dst, src); 1568} 1569 1570 1571void Assembler::movb(Address dst, int imm8) { 1572 InstructionMark im(this); 1573 prefix(dst); 1574 emit_byte(0xC6); 1575 emit_operand(rax, dst, 1); 1576 emit_byte(imm8); 1577} 1578 1579 1580void Assembler::movb(Address dst, Register src) { 1581 assert(src->has_byte_register(), "must have byte register"); 1582 InstructionMark im(this); 1583 prefix(dst, src, true); 1584 emit_byte(0x88); 1585 emit_operand(src, dst); 1586} 1587 1588void Assembler::movdl(XMMRegister dst, Register src) { 1589 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1590 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1591 emit_byte(0x6E); 1592 emit_byte(0xC0 | encode); 1593} 1594 1595void Assembler::movdl(Register dst, XMMRegister src) { 1596 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1597 // swap src/dst to get correct prefix 1598 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1599 emit_byte(0x7E); 1600 emit_byte(0xC0 | encode); 1601} 1602 1603void Assembler::movdl(XMMRegister dst, Address src) { 1604 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1605 InstructionMark im(this); 1606 simd_prefix(dst, src, VEX_SIMD_66); 1607 emit_byte(0x6E); 1608 emit_operand(dst, src); 1609} 1610 1611void Assembler::movdl(Address dst, XMMRegister src) { 1612 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1613 InstructionMark im(this); 1614 simd_prefix(dst, src, VEX_SIMD_66); 1615 emit_byte(0x7E); 1616 emit_operand(src, dst); 1617} 1618 1619void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1620 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1621 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 1622} 1623 1624void Assembler::movdqu(XMMRegister dst, Address src) { 1625 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1626 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1627} 1628 1629void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1630 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1631 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1632} 1633 1634void Assembler::movdqu(Address dst, XMMRegister src) { 1635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1636 InstructionMark im(this); 1637 simd_prefix(dst, src, VEX_SIMD_F3); 1638 emit_byte(0x7F); 1639 emit_operand(src, dst); 1640} 1641 1642// Move Unaligned 256bit Vector 1643void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { 1644 assert(UseAVX, ""); 1645 bool vector256 = true; 1646 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1647 emit_byte(0x6F); 1648 emit_byte(0xC0 | encode); 1649} 1650 1651void Assembler::vmovdqu(XMMRegister dst, Address src) { 1652 assert(UseAVX, ""); 1653 InstructionMark im(this); 1654 bool vector256 = true; 1655 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1656 emit_byte(0x6F); 1657 emit_operand(dst, src); 1658} 1659 1660void Assembler::vmovdqu(Address dst, XMMRegister src) { 1661 assert(UseAVX, ""); 1662 InstructionMark im(this); 1663 bool vector256 = true; 1664 // swap src<->dst for encoding 1665 assert(src != xnoreg, "sanity"); 1666 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); 1667 emit_byte(0x7F); 1668 emit_operand(src, dst); 1669} 1670 1671// Uses zero extension on 64bit 1672 1673void Assembler::movl(Register dst, int32_t imm32) { 1674 int encode = prefix_and_encode(dst->encoding()); 1675 emit_byte(0xB8 | encode); 1676 emit_long(imm32); 1677} 1678 1679void Assembler::movl(Register dst, Register src) { 1680 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1681 emit_byte(0x8B); 1682 emit_byte(0xC0 | encode); 1683} 1684 1685void Assembler::movl(Register dst, Address src) { 1686 InstructionMark im(this); 1687 prefix(src, dst); 1688 emit_byte(0x8B); 1689 emit_operand(dst, src); 1690} 1691 1692void Assembler::movl(Address dst, int32_t imm32) { 1693 InstructionMark im(this); 1694 prefix(dst); 1695 emit_byte(0xC7); 1696 emit_operand(rax, dst, 4); 1697 emit_long(imm32); 1698} 1699 1700void Assembler::movl(Address dst, Register src) { 1701 InstructionMark im(this); 1702 prefix(dst, src); 1703 emit_byte(0x89); 1704 emit_operand(src, dst); 1705} 1706 1707// New cpus require to use movsd and movss to avoid partial register stall 1708// when loading from memory. But for old Opteron use movlpd instead of movsd. 1709// The selection is done in MacroAssembler::movdbl() and movflt(). 1710void Assembler::movlpd(XMMRegister dst, Address src) { 1711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1712 emit_simd_arith(0x12, dst, src, VEX_SIMD_66); 1713} 1714 1715void Assembler::movq( MMXRegister dst, Address src ) { 1716 assert( VM_Version::supports_mmx(), "" ); 1717 emit_byte(0x0F); 1718 emit_byte(0x6F); 1719 emit_operand(dst, src); 1720} 1721 1722void Assembler::movq( Address dst, MMXRegister src ) { 1723 assert( VM_Version::supports_mmx(), "" ); 1724 emit_byte(0x0F); 1725 emit_byte(0x7F); 1726 // workaround gcc (3.2.1-7a) bug 1727 // In that version of gcc with only an emit_operand(MMX, Address) 1728 // gcc will tail jump and try and reverse the parameters completely 1729 // obliterating dst in the process. By having a version available 1730 // that doesn't need to swap the args at the tail jump the bug is 1731 // avoided. 1732 emit_operand(dst, src); 1733} 1734 1735void Assembler::movq(XMMRegister dst, Address src) { 1736 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1737 InstructionMark im(this); 1738 simd_prefix(dst, src, VEX_SIMD_F3); 1739 emit_byte(0x7E); 1740 emit_operand(dst, src); 1741} 1742 1743void Assembler::movq(Address dst, XMMRegister src) { 1744 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1745 InstructionMark im(this); 1746 simd_prefix(dst, src, VEX_SIMD_66); 1747 emit_byte(0xD6); 1748 emit_operand(src, dst); 1749} 1750 1751void Assembler::movsbl(Register dst, Address src) { // movsxb 1752 InstructionMark im(this); 1753 prefix(src, dst); 1754 emit_byte(0x0F); 1755 emit_byte(0xBE); 1756 emit_operand(dst, src); 1757} 1758 1759void Assembler::movsbl(Register dst, Register src) { // movsxb 1760 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1761 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1762 emit_byte(0x0F); 1763 emit_byte(0xBE); 1764 emit_byte(0xC0 | encode); 1765} 1766 1767void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1768 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1769 emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); 1770} 1771 1772void Assembler::movsd(XMMRegister dst, Address src) { 1773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1774 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); 1775} 1776 1777void Assembler::movsd(Address dst, XMMRegister src) { 1778 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1779 InstructionMark im(this); 1780 simd_prefix(dst, src, VEX_SIMD_F2); 1781 emit_byte(0x11); 1782 emit_operand(src, dst); 1783} 1784 1785void Assembler::movss(XMMRegister dst, XMMRegister src) { 1786 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1787 emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); 1788} 1789 1790void Assembler::movss(XMMRegister dst, Address src) { 1791 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1792 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); 1793} 1794 1795void Assembler::movss(Address dst, XMMRegister src) { 1796 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1797 InstructionMark im(this); 1798 simd_prefix(dst, src, VEX_SIMD_F3); 1799 emit_byte(0x11); 1800 emit_operand(src, dst); 1801} 1802 1803void Assembler::movswl(Register dst, Address src) { // movsxw 1804 InstructionMark im(this); 1805 prefix(src, dst); 1806 emit_byte(0x0F); 1807 emit_byte(0xBF); 1808 emit_operand(dst, src); 1809} 1810 1811void Assembler::movswl(Register dst, Register src) { // movsxw 1812 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1813 emit_byte(0x0F); 1814 emit_byte(0xBF); 1815 emit_byte(0xC0 | encode); 1816} 1817 1818void Assembler::movw(Address dst, int imm16) { 1819 InstructionMark im(this); 1820 1821 emit_byte(0x66); // switch to 16-bit mode 1822 prefix(dst); 1823 emit_byte(0xC7); 1824 emit_operand(rax, dst, 2); 1825 emit_word(imm16); 1826} 1827 1828void Assembler::movw(Register dst, Address src) { 1829 InstructionMark im(this); 1830 emit_byte(0x66); 1831 prefix(src, dst); 1832 emit_byte(0x8B); 1833 emit_operand(dst, src); 1834} 1835 1836void Assembler::movw(Address dst, Register src) { 1837 InstructionMark im(this); 1838 emit_byte(0x66); 1839 prefix(dst, src); 1840 emit_byte(0x89); 1841 emit_operand(src, dst); 1842} 1843 1844void Assembler::movzbl(Register dst, Address src) { // movzxb 1845 InstructionMark im(this); 1846 prefix(src, dst); 1847 emit_byte(0x0F); 1848 emit_byte(0xB6); 1849 emit_operand(dst, src); 1850} 1851 1852void Assembler::movzbl(Register dst, Register src) { // movzxb 1853 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1854 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1855 emit_byte(0x0F); 1856 emit_byte(0xB6); 1857 emit_byte(0xC0 | encode); 1858} 1859 1860void Assembler::movzwl(Register dst, Address src) { // movzxw 1861 InstructionMark im(this); 1862 prefix(src, dst); 1863 emit_byte(0x0F); 1864 emit_byte(0xB7); 1865 emit_operand(dst, src); 1866} 1867 1868void Assembler::movzwl(Register dst, Register src) { // movzxw 1869 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1870 emit_byte(0x0F); 1871 emit_byte(0xB7); 1872 emit_byte(0xC0 | encode); 1873} 1874 1875void Assembler::mull(Address src) { 1876 InstructionMark im(this); 1877 prefix(src); 1878 emit_byte(0xF7); 1879 emit_operand(rsp, src); 1880} 1881 1882void Assembler::mull(Register src) { 1883 int encode = prefix_and_encode(src->encoding()); 1884 emit_byte(0xF7); 1885 emit_byte(0xE0 | encode); 1886} 1887 1888void Assembler::mulsd(XMMRegister dst, Address src) { 1889 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1890 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1891} 1892 1893void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1894 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1895 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1896} 1897 1898void Assembler::mulss(XMMRegister dst, Address src) { 1899 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1900 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1901} 1902 1903void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1904 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1905 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1906} 1907 1908void Assembler::negl(Register dst) { 1909 int encode = prefix_and_encode(dst->encoding()); 1910 emit_byte(0xF7); 1911 emit_byte(0xD8 | encode); 1912} 1913 1914void Assembler::nop(int i) { 1915#ifdef ASSERT 1916 assert(i > 0, " "); 1917 // The fancy nops aren't currently recognized by debuggers making it a 1918 // pain to disassemble code while debugging. If asserts are on clearly 1919 // speed is not an issue so simply use the single byte traditional nop 1920 // to do alignment. 1921 1922 for (; i > 0 ; i--) emit_byte(0x90); 1923 return; 1924 1925#endif // ASSERT 1926 1927 if (UseAddressNop && VM_Version::is_intel()) { 1928 // 1929 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1930 // 1: 0x90 1931 // 2: 0x66 0x90 1932 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1933 // 4: 0x0F 0x1F 0x40 0x00 1934 // 5: 0x0F 0x1F 0x44 0x00 0x00 1935 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1936 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1937 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1938 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1939 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1940 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1941 1942 // The rest coding is Intel specific - don't use consecutive address nops 1943 1944 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1945 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1946 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1947 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1948 1949 while(i >= 15) { 1950 // For Intel don't generate consecutive addess nops (mix with regular nops) 1951 i -= 15; 1952 emit_byte(0x66); // size prefix 1953 emit_byte(0x66); // size prefix 1954 emit_byte(0x66); // size prefix 1955 addr_nop_8(); 1956 emit_byte(0x66); // size prefix 1957 emit_byte(0x66); // size prefix 1958 emit_byte(0x66); // size prefix 1959 emit_byte(0x90); // nop 1960 } 1961 switch (i) { 1962 case 14: 1963 emit_byte(0x66); // size prefix 1964 case 13: 1965 emit_byte(0x66); // size prefix 1966 case 12: 1967 addr_nop_8(); 1968 emit_byte(0x66); // size prefix 1969 emit_byte(0x66); // size prefix 1970 emit_byte(0x66); // size prefix 1971 emit_byte(0x90); // nop 1972 break; 1973 case 11: 1974 emit_byte(0x66); // size prefix 1975 case 10: 1976 emit_byte(0x66); // size prefix 1977 case 9: 1978 emit_byte(0x66); // size prefix 1979 case 8: 1980 addr_nop_8(); 1981 break; 1982 case 7: 1983 addr_nop_7(); 1984 break; 1985 case 6: 1986 emit_byte(0x66); // size prefix 1987 case 5: 1988 addr_nop_5(); 1989 break; 1990 case 4: 1991 addr_nop_4(); 1992 break; 1993 case 3: 1994 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1995 emit_byte(0x66); // size prefix 1996 case 2: 1997 emit_byte(0x66); // size prefix 1998 case 1: 1999 emit_byte(0x90); // nop 2000 break; 2001 default: 2002 assert(i == 0, " "); 2003 } 2004 return; 2005 } 2006 if (UseAddressNop && VM_Version::is_amd()) { 2007 // 2008 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2009 // 1: 0x90 2010 // 2: 0x66 0x90 2011 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2012 // 4: 0x0F 0x1F 0x40 0x00 2013 // 5: 0x0F 0x1F 0x44 0x00 0x00 2014 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2015 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2016 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2017 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2018 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2019 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2020 2021 // The rest coding is AMD specific - use consecutive address nops 2022 2023 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2024 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2025 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2026 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2027 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2028 // Size prefixes (0x66) are added for larger sizes 2029 2030 while(i >= 22) { 2031 i -= 11; 2032 emit_byte(0x66); // size prefix 2033 emit_byte(0x66); // size prefix 2034 emit_byte(0x66); // size prefix 2035 addr_nop_8(); 2036 } 2037 // Generate first nop for size between 21-12 2038 switch (i) { 2039 case 21: 2040 i -= 1; 2041 emit_byte(0x66); // size prefix 2042 case 20: 2043 case 19: 2044 i -= 1; 2045 emit_byte(0x66); // size prefix 2046 case 18: 2047 case 17: 2048 i -= 1; 2049 emit_byte(0x66); // size prefix 2050 case 16: 2051 case 15: 2052 i -= 8; 2053 addr_nop_8(); 2054 break; 2055 case 14: 2056 case 13: 2057 i -= 7; 2058 addr_nop_7(); 2059 break; 2060 case 12: 2061 i -= 6; 2062 emit_byte(0x66); // size prefix 2063 addr_nop_5(); 2064 break; 2065 default: 2066 assert(i < 12, " "); 2067 } 2068 2069 // Generate second nop for size between 11-1 2070 switch (i) { 2071 case 11: 2072 emit_byte(0x66); // size prefix 2073 case 10: 2074 emit_byte(0x66); // size prefix 2075 case 9: 2076 emit_byte(0x66); // size prefix 2077 case 8: 2078 addr_nop_8(); 2079 break; 2080 case 7: 2081 addr_nop_7(); 2082 break; 2083 case 6: 2084 emit_byte(0x66); // size prefix 2085 case 5: 2086 addr_nop_5(); 2087 break; 2088 case 4: 2089 addr_nop_4(); 2090 break; 2091 case 3: 2092 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2093 emit_byte(0x66); // size prefix 2094 case 2: 2095 emit_byte(0x66); // size prefix 2096 case 1: 2097 emit_byte(0x90); // nop 2098 break; 2099 default: 2100 assert(i == 0, " "); 2101 } 2102 return; 2103 } 2104 2105 // Using nops with size prefixes "0x66 0x90". 2106 // From AMD Optimization Guide: 2107 // 1: 0x90 2108 // 2: 0x66 0x90 2109 // 3: 0x66 0x66 0x90 2110 // 4: 0x66 0x66 0x66 0x90 2111 // 5: 0x66 0x66 0x90 0x66 0x90 2112 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2113 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2114 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2115 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2116 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2117 // 2118 while(i > 12) { 2119 i -= 4; 2120 emit_byte(0x66); // size prefix 2121 emit_byte(0x66); 2122 emit_byte(0x66); 2123 emit_byte(0x90); // nop 2124 } 2125 // 1 - 12 nops 2126 if(i > 8) { 2127 if(i > 9) { 2128 i -= 1; 2129 emit_byte(0x66); 2130 } 2131 i -= 3; 2132 emit_byte(0x66); 2133 emit_byte(0x66); 2134 emit_byte(0x90); 2135 } 2136 // 1 - 8 nops 2137 if(i > 4) { 2138 if(i > 6) { 2139 i -= 1; 2140 emit_byte(0x66); 2141 } 2142 i -= 3; 2143 emit_byte(0x66); 2144 emit_byte(0x66); 2145 emit_byte(0x90); 2146 } 2147 switch (i) { 2148 case 4: 2149 emit_byte(0x66); 2150 case 3: 2151 emit_byte(0x66); 2152 case 2: 2153 emit_byte(0x66); 2154 case 1: 2155 emit_byte(0x90); 2156 break; 2157 default: 2158 assert(i == 0, " "); 2159 } 2160} 2161 2162void Assembler::notl(Register dst) { 2163 int encode = prefix_and_encode(dst->encoding()); 2164 emit_byte(0xF7); 2165 emit_byte(0xD0 | encode ); 2166} 2167 2168void Assembler::orl(Address dst, int32_t imm32) { 2169 InstructionMark im(this); 2170 prefix(dst); 2171 emit_arith_operand(0x81, rcx, dst, imm32); 2172} 2173 2174void Assembler::orl(Register dst, int32_t imm32) { 2175 prefix(dst); 2176 emit_arith(0x81, 0xC8, dst, imm32); 2177} 2178 2179void Assembler::orl(Register dst, Address src) { 2180 InstructionMark im(this); 2181 prefix(src, dst); 2182 emit_byte(0x0B); 2183 emit_operand(dst, src); 2184} 2185 2186void Assembler::orl(Register dst, Register src) { 2187 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2188 emit_arith(0x0B, 0xC0, dst, src); 2189} 2190 2191void Assembler::packuswb(XMMRegister dst, Address src) { 2192 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2193 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2194 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2195} 2196 2197void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2199 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2200} 2201 2202void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2203 assert(VM_Version::supports_sse4_2(), ""); 2204 InstructionMark im(this); 2205 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2206 emit_byte(0x61); 2207 emit_operand(dst, src); 2208 emit_byte(imm8); 2209} 2210 2211void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2212 assert(VM_Version::supports_sse4_2(), ""); 2213 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2214 emit_byte(0x61); 2215 emit_byte(0xC0 | encode); 2216 emit_byte(imm8); 2217} 2218 2219void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2220 assert(VM_Version::supports_sse4_1(), ""); 2221 InstructionMark im(this); 2222 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2223 emit_byte(0x30); 2224 emit_operand(dst, src); 2225} 2226 2227void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2228 assert(VM_Version::supports_sse4_1(), ""); 2229 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2230 emit_byte(0x30); 2231 emit_byte(0xC0 | encode); 2232} 2233 2234// generic 2235void Assembler::pop(Register dst) { 2236 int encode = prefix_and_encode(dst->encoding()); 2237 emit_byte(0x58 | encode); 2238} 2239 2240void Assembler::popcntl(Register dst, Address src) { 2241 assert(VM_Version::supports_popcnt(), "must support"); 2242 InstructionMark im(this); 2243 emit_byte(0xF3); 2244 prefix(src, dst); 2245 emit_byte(0x0F); 2246 emit_byte(0xB8); 2247 emit_operand(dst, src); 2248} 2249 2250void Assembler::popcntl(Register dst, Register src) { 2251 assert(VM_Version::supports_popcnt(), "must support"); 2252 emit_byte(0xF3); 2253 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2254 emit_byte(0x0F); 2255 emit_byte(0xB8); 2256 emit_byte(0xC0 | encode); 2257} 2258 2259void Assembler::popf() { 2260 emit_byte(0x9D); 2261} 2262 2263#ifndef _LP64 // no 32bit push/pop on amd64 2264void Assembler::popl(Address dst) { 2265 // NOTE: this will adjust stack by 8byte on 64bits 2266 InstructionMark im(this); 2267 prefix(dst); 2268 emit_byte(0x8F); 2269 emit_operand(rax, dst); 2270} 2271#endif 2272 2273void Assembler::prefetch_prefix(Address src) { 2274 prefix(src); 2275 emit_byte(0x0F); 2276} 2277 2278void Assembler::prefetchnta(Address src) { 2279 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2280 InstructionMark im(this); 2281 prefetch_prefix(src); 2282 emit_byte(0x18); 2283 emit_operand(rax, src); // 0, src 2284} 2285 2286void Assembler::prefetchr(Address src) { 2287 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2288 InstructionMark im(this); 2289 prefetch_prefix(src); 2290 emit_byte(0x0D); 2291 emit_operand(rax, src); // 0, src 2292} 2293 2294void Assembler::prefetcht0(Address src) { 2295 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2296 InstructionMark im(this); 2297 prefetch_prefix(src); 2298 emit_byte(0x18); 2299 emit_operand(rcx, src); // 1, src 2300} 2301 2302void Assembler::prefetcht1(Address src) { 2303 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2304 InstructionMark im(this); 2305 prefetch_prefix(src); 2306 emit_byte(0x18); 2307 emit_operand(rdx, src); // 2, src 2308} 2309 2310void Assembler::prefetcht2(Address src) { 2311 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2312 InstructionMark im(this); 2313 prefetch_prefix(src); 2314 emit_byte(0x18); 2315 emit_operand(rbx, src); // 3, src 2316} 2317 2318void Assembler::prefetchw(Address src) { 2319 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2320 InstructionMark im(this); 2321 prefetch_prefix(src); 2322 emit_byte(0x0D); 2323 emit_operand(rcx, src); // 1, src 2324} 2325 2326void Assembler::prefix(Prefix p) { 2327 a_byte(p); 2328} 2329 2330void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2331 assert(isByte(mode), "invalid value"); 2332 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2333 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); 2334 emit_byte(mode & 0xFF); 2335 2336} 2337 2338void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2339 assert(isByte(mode), "invalid value"); 2340 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2341 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2342 InstructionMark im(this); 2343 simd_prefix(dst, src, VEX_SIMD_66); 2344 emit_byte(0x70); 2345 emit_operand(dst, src); 2346 emit_byte(mode & 0xFF); 2347} 2348 2349void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2350 assert(isByte(mode), "invalid value"); 2351 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2352 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); 2353 emit_byte(mode & 0xFF); 2354} 2355 2356void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2357 assert(isByte(mode), "invalid value"); 2358 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2359 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2360 InstructionMark im(this); 2361 simd_prefix(dst, src, VEX_SIMD_F2); 2362 emit_byte(0x70); 2363 emit_operand(dst, src); 2364 emit_byte(mode & 0xFF); 2365} 2366 2367void Assembler::psrldq(XMMRegister dst, int shift) { 2368 // Shift 128 bit value in xmm register by number of bytes. 2369 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2370 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2371 emit_byte(0x73); 2372 emit_byte(0xC0 | encode); 2373 emit_byte(shift); 2374} 2375 2376void Assembler::ptest(XMMRegister dst, Address src) { 2377 assert(VM_Version::supports_sse4_1(), ""); 2378 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2379 InstructionMark im(this); 2380 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2381 emit_byte(0x17); 2382 emit_operand(dst, src); 2383} 2384 2385void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2386 assert(VM_Version::supports_sse4_1(), ""); 2387 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2388 emit_byte(0x17); 2389 emit_byte(0xC0 | encode); 2390} 2391 2392void Assembler::punpcklbw(XMMRegister dst, Address src) { 2393 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2394 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2395 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2396} 2397 2398void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2399 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2400 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2401} 2402 2403void Assembler::punpckldq(XMMRegister dst, Address src) { 2404 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2405 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2406 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2407} 2408 2409void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2410 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2411 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2412} 2413 2414void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { 2415 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2416 emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); 2417} 2418 2419void Assembler::push(int32_t imm32) { 2420 // in 64bits we push 64bits onto the stack but only 2421 // take a 32bit immediate 2422 emit_byte(0x68); 2423 emit_long(imm32); 2424} 2425 2426void Assembler::push(Register src) { 2427 int encode = prefix_and_encode(src->encoding()); 2428 2429 emit_byte(0x50 | encode); 2430} 2431 2432void Assembler::pushf() { 2433 emit_byte(0x9C); 2434} 2435 2436#ifndef _LP64 // no 32bit push/pop on amd64 2437void Assembler::pushl(Address src) { 2438 // Note this will push 64bit on 64bit 2439 InstructionMark im(this); 2440 prefix(src); 2441 emit_byte(0xFF); 2442 emit_operand(rsi, src); 2443} 2444#endif 2445 2446void Assembler::rcll(Register dst, int imm8) { 2447 assert(isShiftCount(imm8), "illegal shift count"); 2448 int encode = prefix_and_encode(dst->encoding()); 2449 if (imm8 == 1) { 2450 emit_byte(0xD1); 2451 emit_byte(0xD0 | encode); 2452 } else { 2453 emit_byte(0xC1); 2454 emit_byte(0xD0 | encode); 2455 emit_byte(imm8); 2456 } 2457} 2458 2459// copies data from [esi] to [edi] using rcx pointer sized words 2460// generic 2461void Assembler::rep_mov() { 2462 emit_byte(0xF3); 2463 // MOVSQ 2464 LP64_ONLY(prefix(REX_W)); 2465 emit_byte(0xA5); 2466} 2467 2468// sets rcx pointer sized words with rax, value at [edi] 2469// generic 2470void Assembler::rep_set() { // rep_set 2471 emit_byte(0xF3); 2472 // STOSQ 2473 LP64_ONLY(prefix(REX_W)); 2474 emit_byte(0xAB); 2475} 2476 2477// scans rcx pointer sized words at [edi] for occurance of rax, 2478// generic 2479void Assembler::repne_scan() { // repne_scan 2480 emit_byte(0xF2); 2481 // SCASQ 2482 LP64_ONLY(prefix(REX_W)); 2483 emit_byte(0xAF); 2484} 2485 2486#ifdef _LP64 2487// scans rcx 4 byte words at [edi] for occurance of rax, 2488// generic 2489void Assembler::repne_scanl() { // repne_scan 2490 emit_byte(0xF2); 2491 // SCASL 2492 emit_byte(0xAF); 2493} 2494#endif 2495 2496void Assembler::ret(int imm16) { 2497 if (imm16 == 0) { 2498 emit_byte(0xC3); 2499 } else { 2500 emit_byte(0xC2); 2501 emit_word(imm16); 2502 } 2503} 2504 2505void Assembler::sahf() { 2506#ifdef _LP64 2507 // Not supported in 64bit mode 2508 ShouldNotReachHere(); 2509#endif 2510 emit_byte(0x9E); 2511} 2512 2513void Assembler::sarl(Register dst, int imm8) { 2514 int encode = prefix_and_encode(dst->encoding()); 2515 assert(isShiftCount(imm8), "illegal shift count"); 2516 if (imm8 == 1) { 2517 emit_byte(0xD1); 2518 emit_byte(0xF8 | encode); 2519 } else { 2520 emit_byte(0xC1); 2521 emit_byte(0xF8 | encode); 2522 emit_byte(imm8); 2523 } 2524} 2525 2526void Assembler::sarl(Register dst) { 2527 int encode = prefix_and_encode(dst->encoding()); 2528 emit_byte(0xD3); 2529 emit_byte(0xF8 | encode); 2530} 2531 2532void Assembler::sbbl(Address dst, int32_t imm32) { 2533 InstructionMark im(this); 2534 prefix(dst); 2535 emit_arith_operand(0x81, rbx, dst, imm32); 2536} 2537 2538void Assembler::sbbl(Register dst, int32_t imm32) { 2539 prefix(dst); 2540 emit_arith(0x81, 0xD8, dst, imm32); 2541} 2542 2543 2544void Assembler::sbbl(Register dst, Address src) { 2545 InstructionMark im(this); 2546 prefix(src, dst); 2547 emit_byte(0x1B); 2548 emit_operand(dst, src); 2549} 2550 2551void Assembler::sbbl(Register dst, Register src) { 2552 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2553 emit_arith(0x1B, 0xC0, dst, src); 2554} 2555 2556void Assembler::setb(Condition cc, Register dst) { 2557 assert(0 <= cc && cc < 16, "illegal cc"); 2558 int encode = prefix_and_encode(dst->encoding(), true); 2559 emit_byte(0x0F); 2560 emit_byte(0x90 | cc); 2561 emit_byte(0xC0 | encode); 2562} 2563 2564void Assembler::shll(Register dst, int imm8) { 2565 assert(isShiftCount(imm8), "illegal shift count"); 2566 int encode = prefix_and_encode(dst->encoding()); 2567 if (imm8 == 1 ) { 2568 emit_byte(0xD1); 2569 emit_byte(0xE0 | encode); 2570 } else { 2571 emit_byte(0xC1); 2572 emit_byte(0xE0 | encode); 2573 emit_byte(imm8); 2574 } 2575} 2576 2577void Assembler::shll(Register dst) { 2578 int encode = prefix_and_encode(dst->encoding()); 2579 emit_byte(0xD3); 2580 emit_byte(0xE0 | encode); 2581} 2582 2583void Assembler::shrl(Register dst, int imm8) { 2584 assert(isShiftCount(imm8), "illegal shift count"); 2585 int encode = prefix_and_encode(dst->encoding()); 2586 emit_byte(0xC1); 2587 emit_byte(0xE8 | encode); 2588 emit_byte(imm8); 2589} 2590 2591void Assembler::shrl(Register dst) { 2592 int encode = prefix_and_encode(dst->encoding()); 2593 emit_byte(0xD3); 2594 emit_byte(0xE8 | encode); 2595} 2596 2597// copies a single word from [esi] to [edi] 2598void Assembler::smovl() { 2599 emit_byte(0xA5); 2600} 2601 2602void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2603 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2604 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2605} 2606 2607void Assembler::sqrtsd(XMMRegister dst, Address src) { 2608 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2609 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2610} 2611 2612void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2613 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2614 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2615} 2616 2617void Assembler::sqrtss(XMMRegister dst, Address src) { 2618 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2619 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2620} 2621 2622void Assembler::stmxcsr( Address dst) { 2623 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2624 InstructionMark im(this); 2625 prefix(dst); 2626 emit_byte(0x0F); 2627 emit_byte(0xAE); 2628 emit_operand(as_Register(3), dst); 2629} 2630 2631void Assembler::subl(Address dst, int32_t imm32) { 2632 InstructionMark im(this); 2633 prefix(dst); 2634 emit_arith_operand(0x81, rbp, dst, imm32); 2635} 2636 2637void Assembler::subl(Address dst, Register src) { 2638 InstructionMark im(this); 2639 prefix(dst, src); 2640 emit_byte(0x29); 2641 emit_operand(src, dst); 2642} 2643 2644void Assembler::subl(Register dst, int32_t imm32) { 2645 prefix(dst); 2646 emit_arith(0x81, 0xE8, dst, imm32); 2647} 2648 2649// Force generation of a 4 byte immediate value even if it fits into 8bit 2650void Assembler::subl_imm32(Register dst, int32_t imm32) { 2651 prefix(dst); 2652 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2653} 2654 2655void Assembler::subl(Register dst, Address src) { 2656 InstructionMark im(this); 2657 prefix(src, dst); 2658 emit_byte(0x2B); 2659 emit_operand(dst, src); 2660} 2661 2662void Assembler::subl(Register dst, Register src) { 2663 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2664 emit_arith(0x2B, 0xC0, dst, src); 2665} 2666 2667void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2668 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2669 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2670} 2671 2672void Assembler::subsd(XMMRegister dst, Address src) { 2673 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2674 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2675} 2676 2677void Assembler::subss(XMMRegister dst, XMMRegister src) { 2678 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2679 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2680} 2681 2682void Assembler::subss(XMMRegister dst, Address src) { 2683 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2684 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2685} 2686 2687void Assembler::testb(Register dst, int imm8) { 2688 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2689 (void) prefix_and_encode(dst->encoding(), true); 2690 emit_arith_b(0xF6, 0xC0, dst, imm8); 2691} 2692 2693void Assembler::testl(Register dst, int32_t imm32) { 2694 // not using emit_arith because test 2695 // doesn't support sign-extension of 2696 // 8bit operands 2697 int encode = dst->encoding(); 2698 if (encode == 0) { 2699 emit_byte(0xA9); 2700 } else { 2701 encode = prefix_and_encode(encode); 2702 emit_byte(0xF7); 2703 emit_byte(0xC0 | encode); 2704 } 2705 emit_long(imm32); 2706} 2707 2708void Assembler::testl(Register dst, Register src) { 2709 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2710 emit_arith(0x85, 0xC0, dst, src); 2711} 2712 2713void Assembler::testl(Register dst, Address src) { 2714 InstructionMark im(this); 2715 prefix(src, dst); 2716 emit_byte(0x85); 2717 emit_operand(dst, src); 2718} 2719 2720void Assembler::ucomisd(XMMRegister dst, Address src) { 2721 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2722 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2723} 2724 2725void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2726 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2727 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2728} 2729 2730void Assembler::ucomiss(XMMRegister dst, Address src) { 2731 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2732 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2733} 2734 2735void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2736 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2737 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2738} 2739 2740 2741void Assembler::xaddl(Address dst, Register src) { 2742 InstructionMark im(this); 2743 prefix(dst, src); 2744 emit_byte(0x0F); 2745 emit_byte(0xC1); 2746 emit_operand(src, dst); 2747} 2748 2749void Assembler::xchgl(Register dst, Address src) { // xchg 2750 InstructionMark im(this); 2751 prefix(src, dst); 2752 emit_byte(0x87); 2753 emit_operand(dst, src); 2754} 2755 2756void Assembler::xchgl(Register dst, Register src) { 2757 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2758 emit_byte(0x87); 2759 emit_byte(0xc0 | encode); 2760} 2761 2762void Assembler::xorl(Register dst, int32_t imm32) { 2763 prefix(dst); 2764 emit_arith(0x81, 0xF0, dst, imm32); 2765} 2766 2767void Assembler::xorl(Register dst, Address src) { 2768 InstructionMark im(this); 2769 prefix(src, dst); 2770 emit_byte(0x33); 2771 emit_operand(dst, src); 2772} 2773 2774void Assembler::xorl(Register dst, Register src) { 2775 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2776 emit_arith(0x33, 0xC0, dst, src); 2777} 2778 2779 2780// AVX 3-operands scalar float-point arithmetic instructions 2781 2782void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2783 assert(VM_Version::supports_avx(), ""); 2784 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2785} 2786 2787void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2788 assert(VM_Version::supports_avx(), ""); 2789 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2790} 2791 2792void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2793 assert(VM_Version::supports_avx(), ""); 2794 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2795} 2796 2797void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2798 assert(VM_Version::supports_avx(), ""); 2799 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2800} 2801 2802void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 2803 assert(VM_Version::supports_avx(), ""); 2804 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2805} 2806 2807void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2808 assert(VM_Version::supports_avx(), ""); 2809 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2810} 2811 2812void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 2813 assert(VM_Version::supports_avx(), ""); 2814 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2815} 2816 2817void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2818 assert(VM_Version::supports_avx(), ""); 2819 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2820} 2821 2822void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 2823 assert(VM_Version::supports_avx(), ""); 2824 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2825} 2826 2827void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2828 assert(VM_Version::supports_avx(), ""); 2829 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2830} 2831 2832void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 2833 assert(VM_Version::supports_avx(), ""); 2834 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2835} 2836 2837void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2838 assert(VM_Version::supports_avx(), ""); 2839 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2840} 2841 2842void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 2843 assert(VM_Version::supports_avx(), ""); 2844 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2845} 2846 2847void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2848 assert(VM_Version::supports_avx(), ""); 2849 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2850} 2851 2852void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 2853 assert(VM_Version::supports_avx(), ""); 2854 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2855} 2856 2857void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2858 assert(VM_Version::supports_avx(), ""); 2859 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2860} 2861 2862//====================VECTOR ARITHMETIC===================================== 2863 2864// Float-point vector arithmetic 2865 2866void Assembler::addpd(XMMRegister dst, XMMRegister src) { 2867 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2868 emit_simd_arith(0x58, dst, src, VEX_SIMD_66); 2869} 2870 2871void Assembler::addps(XMMRegister dst, XMMRegister src) { 2872 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2873 emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); 2874} 2875 2876void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2877 assert(VM_Version::supports_avx(), ""); 2878 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2879} 2880 2881void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2882 assert(VM_Version::supports_avx(), ""); 2883 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2884} 2885 2886void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2887 assert(VM_Version::supports_avx(), ""); 2888 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2889} 2890 2891void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2892 assert(VM_Version::supports_avx(), ""); 2893 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2894} 2895 2896void Assembler::subpd(XMMRegister dst, XMMRegister src) { 2897 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2898 emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); 2899} 2900 2901void Assembler::subps(XMMRegister dst, XMMRegister src) { 2902 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2903 emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); 2904} 2905 2906void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2907 assert(VM_Version::supports_avx(), ""); 2908 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2909} 2910 2911void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2912 assert(VM_Version::supports_avx(), ""); 2913 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2914} 2915 2916void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2917 assert(VM_Version::supports_avx(), ""); 2918 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2919} 2920 2921void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2922 assert(VM_Version::supports_avx(), ""); 2923 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2924} 2925 2926void Assembler::mulpd(XMMRegister dst, XMMRegister src) { 2927 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2928 emit_simd_arith(0x59, dst, src, VEX_SIMD_66); 2929} 2930 2931void Assembler::mulps(XMMRegister dst, XMMRegister src) { 2932 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2933 emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); 2934} 2935 2936void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2937 assert(VM_Version::supports_avx(), ""); 2938 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2939} 2940 2941void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2942 assert(VM_Version::supports_avx(), ""); 2943 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2944} 2945 2946void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2947 assert(VM_Version::supports_avx(), ""); 2948 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2949} 2950 2951void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2952 assert(VM_Version::supports_avx(), ""); 2953 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2954} 2955 2956void Assembler::divpd(XMMRegister dst, XMMRegister src) { 2957 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2958 emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); 2959} 2960 2961void Assembler::divps(XMMRegister dst, XMMRegister src) { 2962 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2963 emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); 2964} 2965 2966void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2967 assert(VM_Version::supports_avx(), ""); 2968 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2969} 2970 2971void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2972 assert(VM_Version::supports_avx(), ""); 2973 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2974} 2975 2976void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2977 assert(VM_Version::supports_avx(), ""); 2978 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2979} 2980 2981void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2982 assert(VM_Version::supports_avx(), ""); 2983 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2984} 2985 2986void Assembler::andpd(XMMRegister dst, XMMRegister src) { 2987 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2988 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 2989} 2990 2991void Assembler::andps(XMMRegister dst, XMMRegister src) { 2992 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2993 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2994} 2995 2996void Assembler::andps(XMMRegister dst, Address src) { 2997 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2998 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2999} 3000 3001void Assembler::andpd(XMMRegister dst, Address src) { 3002 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3003 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 3004} 3005 3006void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3007 assert(VM_Version::supports_avx(), ""); 3008 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3009} 3010 3011void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3012 assert(VM_Version::supports_avx(), ""); 3013 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3014} 3015 3016void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3017 assert(VM_Version::supports_avx(), ""); 3018 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3019} 3020 3021void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3022 assert(VM_Version::supports_avx(), ""); 3023 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3024} 3025 3026void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 3027 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3028 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3029} 3030 3031void Assembler::xorps(XMMRegister dst, XMMRegister src) { 3032 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3033 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3034} 3035 3036void Assembler::xorpd(XMMRegister dst, Address src) { 3037 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3038 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3039} 3040 3041void Assembler::xorps(XMMRegister dst, Address src) { 3042 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3043 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3044} 3045 3046void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3047 assert(VM_Version::supports_avx(), ""); 3048 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3049} 3050 3051void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3052 assert(VM_Version::supports_avx(), ""); 3053 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3054} 3055 3056void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3057 assert(VM_Version::supports_avx(), ""); 3058 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3059} 3060 3061void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3062 assert(VM_Version::supports_avx(), ""); 3063 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3064} 3065 3066 3067// Integer vector arithmetic 3068void Assembler::paddb(XMMRegister dst, XMMRegister src) { 3069 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3070 emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); 3071} 3072 3073void Assembler::paddw(XMMRegister dst, XMMRegister src) { 3074 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3075 emit_simd_arith(0xFD, dst, src, VEX_SIMD_66); 3076} 3077 3078void Assembler::paddd(XMMRegister dst, XMMRegister src) { 3079 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3080 emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); 3081} 3082 3083void Assembler::paddq(XMMRegister dst, XMMRegister src) { 3084 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3085 emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); 3086} 3087 3088void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3089 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3090 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3091} 3092 3093void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3094 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3095 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3096} 3097 3098void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3099 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3100 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3101} 3102 3103void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3104 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3105 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3106} 3107 3108void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3109 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3110 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3111} 3112 3113void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3114 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3115 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3116} 3117 3118void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3119 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3120 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3121} 3122 3123void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3124 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3125 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3126} 3127 3128void Assembler::psubb(XMMRegister dst, XMMRegister src) { 3129 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3130 emit_simd_arith(0xF8, dst, src, VEX_SIMD_66); 3131} 3132 3133void Assembler::psubw(XMMRegister dst, XMMRegister src) { 3134 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3135 emit_simd_arith(0xF9, dst, src, VEX_SIMD_66); 3136} 3137 3138void Assembler::psubd(XMMRegister dst, XMMRegister src) { 3139 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3140 emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); 3141} 3142 3143void Assembler::psubq(XMMRegister dst, XMMRegister src) { 3144 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3145 emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); 3146} 3147 3148void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3149 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3150 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3151} 3152 3153void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3154 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3155 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3156} 3157 3158void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3159 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3160 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3161} 3162 3163void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3164 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3165 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3166} 3167 3168void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3169 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3170 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3171} 3172 3173void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3174 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3175 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3176} 3177 3178void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3179 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3180 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3181} 3182 3183void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3184 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3185 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3186} 3187 3188void Assembler::pmullw(XMMRegister dst, XMMRegister src) { 3189 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3190 emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); 3191} 3192 3193void Assembler::pmulld(XMMRegister dst, XMMRegister src) { 3194 assert(VM_Version::supports_sse4_1(), ""); 3195 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 3196 emit_byte(0x40); 3197 emit_byte(0xC0 | encode); 3198} 3199 3200void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3201 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3202 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3203} 3204 3205void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3206 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3207 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); 3208 emit_byte(0x40); 3209 emit_byte(0xC0 | encode); 3210} 3211 3212void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3213 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3214 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3215} 3216 3217void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3218 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3219 InstructionMark im(this); 3220 int dst_enc = dst->encoding(); 3221 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3222 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); 3223 emit_byte(0x40); 3224 emit_operand(dst, src); 3225} 3226 3227// Shift packed integers left by specified number of bits. 3228void Assembler::psllw(XMMRegister dst, int shift) { 3229 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3230 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3231 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3232 emit_byte(0x71); 3233 emit_byte(0xC0 | encode); 3234 emit_byte(shift & 0xFF); 3235} 3236 3237void Assembler::pslld(XMMRegister dst, int shift) { 3238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3239 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3240 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3241 emit_byte(0x72); 3242 emit_byte(0xC0 | encode); 3243 emit_byte(shift & 0xFF); 3244} 3245 3246void Assembler::psllq(XMMRegister dst, int shift) { 3247 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3248 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3249 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3250 emit_byte(0x73); 3251 emit_byte(0xC0 | encode); 3252 emit_byte(shift & 0xFF); 3253} 3254 3255void Assembler::psllw(XMMRegister dst, XMMRegister shift) { 3256 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3257 emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); 3258} 3259 3260void Assembler::pslld(XMMRegister dst, XMMRegister shift) { 3261 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3262 emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); 3263} 3264 3265void Assembler::psllq(XMMRegister dst, XMMRegister shift) { 3266 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3267 emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); 3268} 3269 3270void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3271 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3272 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3273 emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); 3274 emit_byte(shift & 0xFF); 3275} 3276 3277void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3278 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3279 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3280 emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); 3281 emit_byte(shift & 0xFF); 3282} 3283 3284void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3285 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3286 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3287 emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); 3288 emit_byte(shift & 0xFF); 3289} 3290 3291void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3292 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3293 emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); 3294} 3295 3296void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3297 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3298 emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); 3299} 3300 3301void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3302 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3303 emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); 3304} 3305 3306// Shift packed integers logically right by specified number of bits. 3307void Assembler::psrlw(XMMRegister dst, int shift) { 3308 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3309 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 3310 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3311 emit_byte(0x71); 3312 emit_byte(0xC0 | encode); 3313 emit_byte(shift & 0xFF); 3314} 3315 3316void Assembler::psrld(XMMRegister dst, int shift) { 3317 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3318 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 3319 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3320 emit_byte(0x72); 3321 emit_byte(0xC0 | encode); 3322 emit_byte(shift & 0xFF); 3323} 3324 3325void Assembler::psrlq(XMMRegister dst, int shift) { 3326 // Do not confuse it with psrldq SSE2 instruction which 3327 // shifts 128 bit value in xmm register by number of bytes. 3328 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3329 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3330 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3331 emit_byte(0x73); 3332 emit_byte(0xC0 | encode); 3333 emit_byte(shift & 0xFF); 3334} 3335 3336void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { 3337 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3338 emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); 3339} 3340 3341void Assembler::psrld(XMMRegister dst, XMMRegister shift) { 3342 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3343 emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); 3344} 3345 3346void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { 3347 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3348 emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); 3349} 3350 3351void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3352 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3353 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3354 emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); 3355 emit_byte(shift & 0xFF); 3356} 3357 3358void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3359 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3360 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3361 emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); 3362 emit_byte(shift & 0xFF); 3363} 3364 3365void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3366 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3367 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3368 emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); 3369 emit_byte(shift & 0xFF); 3370} 3371 3372void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3373 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3374 emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); 3375} 3376 3377void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3378 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3379 emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); 3380} 3381 3382void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3383 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3384 emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); 3385} 3386 3387// Shift packed integers arithmetically right by specified number of bits. 3388void Assembler::psraw(XMMRegister dst, int shift) { 3389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3390 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3391 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3392 emit_byte(0x71); 3393 emit_byte(0xC0 | encode); 3394 emit_byte(shift & 0xFF); 3395} 3396 3397void Assembler::psrad(XMMRegister dst, int shift) { 3398 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3399 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 3400 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3401 emit_byte(0x72); 3402 emit_byte(0xC0 | encode); 3403 emit_byte(shift & 0xFF); 3404} 3405 3406void Assembler::psraw(XMMRegister dst, XMMRegister shift) { 3407 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3408 emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); 3409} 3410 3411void Assembler::psrad(XMMRegister dst, XMMRegister shift) { 3412 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3413 emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); 3414} 3415 3416void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3417 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3418 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3419 emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); 3420 emit_byte(shift & 0xFF); 3421} 3422 3423void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3424 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3425 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3426 emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); 3427 emit_byte(shift & 0xFF); 3428} 3429 3430void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3431 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3432 emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); 3433} 3434 3435void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3436 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3437 emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); 3438} 3439 3440 3441// AND packed integers 3442void Assembler::pand(XMMRegister dst, XMMRegister src) { 3443 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3444 emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); 3445} 3446 3447void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3448 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3449 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3450} 3451 3452void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3453 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3454 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3455} 3456 3457void Assembler::por(XMMRegister dst, XMMRegister src) { 3458 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3459 emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); 3460} 3461 3462void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3463 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3464 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3465} 3466 3467void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3468 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3469 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3470} 3471 3472void Assembler::pxor(XMMRegister dst, XMMRegister src) { 3473 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3474 emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); 3475} 3476 3477void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3478 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3479 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3480} 3481 3482void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3483 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3484 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3485} 3486 3487 3488void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3489 assert(VM_Version::supports_avx(), ""); 3490 bool vector256 = true; 3491 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3492 emit_byte(0x18); 3493 emit_byte(0xC0 | encode); 3494 // 0x00 - insert into lower 128 bits 3495 // 0x01 - insert into upper 128 bits 3496 emit_byte(0x01); 3497} 3498 3499void Assembler::vinsertf128h(XMMRegister dst, Address src) { 3500 assert(VM_Version::supports_avx(), ""); 3501 InstructionMark im(this); 3502 bool vector256 = true; 3503 assert(dst != xnoreg, "sanity"); 3504 int dst_enc = dst->encoding(); 3505 // swap src<->dst for encoding 3506 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3507 emit_byte(0x18); 3508 emit_operand(dst, src); 3509 // 0x01 - insert into upper 128 bits 3510 emit_byte(0x01); 3511} 3512 3513void Assembler::vextractf128h(Address dst, XMMRegister src) { 3514 assert(VM_Version::supports_avx(), ""); 3515 InstructionMark im(this); 3516 bool vector256 = true; 3517 assert(src != xnoreg, "sanity"); 3518 int src_enc = src->encoding(); 3519 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3520 emit_byte(0x19); 3521 emit_operand(src, dst); 3522 // 0x01 - extract from upper 128 bits 3523 emit_byte(0x01); 3524} 3525 3526void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3527 assert(VM_Version::supports_avx2(), ""); 3528 bool vector256 = true; 3529 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3530 emit_byte(0x38); 3531 emit_byte(0xC0 | encode); 3532 // 0x00 - insert into lower 128 bits 3533 // 0x01 - insert into upper 128 bits 3534 emit_byte(0x01); 3535} 3536 3537void Assembler::vinserti128h(XMMRegister dst, Address src) { 3538 assert(VM_Version::supports_avx2(), ""); 3539 InstructionMark im(this); 3540 bool vector256 = true; 3541 assert(dst != xnoreg, "sanity"); 3542 int dst_enc = dst->encoding(); 3543 // swap src<->dst for encoding 3544 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3545 emit_byte(0x38); 3546 emit_operand(dst, src); 3547 // 0x01 - insert into upper 128 bits 3548 emit_byte(0x01); 3549} 3550 3551void Assembler::vextracti128h(Address dst, XMMRegister src) { 3552 assert(VM_Version::supports_avx2(), ""); 3553 InstructionMark im(this); 3554 bool vector256 = true; 3555 assert(src != xnoreg, "sanity"); 3556 int src_enc = src->encoding(); 3557 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3558 emit_byte(0x39); 3559 emit_operand(src, dst); 3560 // 0x01 - extract from upper 128 bits 3561 emit_byte(0x01); 3562} 3563 3564void Assembler::vzeroupper() { 3565 assert(VM_Version::supports_avx(), ""); 3566 (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); 3567 emit_byte(0x77); 3568} 3569 3570 3571#ifndef _LP64 3572// 32bit only pieces of the assembler 3573 3574void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3575 // NO PREFIX AS NEVER 64BIT 3576 InstructionMark im(this); 3577 emit_byte(0x81); 3578 emit_byte(0xF8 | src1->encoding()); 3579 emit_data(imm32, rspec, 0); 3580} 3581 3582void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3583 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3584 InstructionMark im(this); 3585 emit_byte(0x81); 3586 emit_operand(rdi, src1); 3587 emit_data(imm32, rspec, 0); 3588} 3589 3590// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3591// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3592// into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3593void Assembler::cmpxchg8(Address adr) { 3594 InstructionMark im(this); 3595 emit_byte(0x0F); 3596 emit_byte(0xc7); 3597 emit_operand(rcx, adr); 3598} 3599 3600void Assembler::decl(Register dst) { 3601 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3602 emit_byte(0x48 | dst->encoding()); 3603} 3604 3605#endif // _LP64 3606 3607// 64bit typically doesn't use the x87 but needs to for the trig funcs 3608 3609void Assembler::fabs() { 3610 emit_byte(0xD9); 3611 emit_byte(0xE1); 3612} 3613 3614void Assembler::fadd(int i) { 3615 emit_farith(0xD8, 0xC0, i); 3616} 3617 3618void Assembler::fadd_d(Address src) { 3619 InstructionMark im(this); 3620 emit_byte(0xDC); 3621 emit_operand32(rax, src); 3622} 3623 3624void Assembler::fadd_s(Address src) { 3625 InstructionMark im(this); 3626 emit_byte(0xD8); 3627 emit_operand32(rax, src); 3628} 3629 3630void Assembler::fadda(int i) { 3631 emit_farith(0xDC, 0xC0, i); 3632} 3633 3634void Assembler::faddp(int i) { 3635 emit_farith(0xDE, 0xC0, i); 3636} 3637 3638void Assembler::fchs() { 3639 emit_byte(0xD9); 3640 emit_byte(0xE0); 3641} 3642 3643void Assembler::fcom(int i) { 3644 emit_farith(0xD8, 0xD0, i); 3645} 3646 3647void Assembler::fcomp(int i) { 3648 emit_farith(0xD8, 0xD8, i); 3649} 3650 3651void Assembler::fcomp_d(Address src) { 3652 InstructionMark im(this); 3653 emit_byte(0xDC); 3654 emit_operand32(rbx, src); 3655} 3656 3657void Assembler::fcomp_s(Address src) { 3658 InstructionMark im(this); 3659 emit_byte(0xD8); 3660 emit_operand32(rbx, src); 3661} 3662 3663void Assembler::fcompp() { 3664 emit_byte(0xDE); 3665 emit_byte(0xD9); 3666} 3667 3668void Assembler::fcos() { 3669 emit_byte(0xD9); 3670 emit_byte(0xFF); 3671} 3672 3673void Assembler::fdecstp() { 3674 emit_byte(0xD9); 3675 emit_byte(0xF6); 3676} 3677 3678void Assembler::fdiv(int i) { 3679 emit_farith(0xD8, 0xF0, i); 3680} 3681 3682void Assembler::fdiv_d(Address src) { 3683 InstructionMark im(this); 3684 emit_byte(0xDC); 3685 emit_operand32(rsi, src); 3686} 3687 3688void Assembler::fdiv_s(Address src) { 3689 InstructionMark im(this); 3690 emit_byte(0xD8); 3691 emit_operand32(rsi, src); 3692} 3693 3694void Assembler::fdiva(int i) { 3695 emit_farith(0xDC, 0xF8, i); 3696} 3697 3698// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3699// is erroneous for some of the floating-point instructions below. 3700 3701void Assembler::fdivp(int i) { 3702 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3703} 3704 3705void Assembler::fdivr(int i) { 3706 emit_farith(0xD8, 0xF8, i); 3707} 3708 3709void Assembler::fdivr_d(Address src) { 3710 InstructionMark im(this); 3711 emit_byte(0xDC); 3712 emit_operand32(rdi, src); 3713} 3714 3715void Assembler::fdivr_s(Address src) { 3716 InstructionMark im(this); 3717 emit_byte(0xD8); 3718 emit_operand32(rdi, src); 3719} 3720 3721void Assembler::fdivra(int i) { 3722 emit_farith(0xDC, 0xF0, i); 3723} 3724 3725void Assembler::fdivrp(int i) { 3726 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3727} 3728 3729void Assembler::ffree(int i) { 3730 emit_farith(0xDD, 0xC0, i); 3731} 3732 3733void Assembler::fild_d(Address adr) { 3734 InstructionMark im(this); 3735 emit_byte(0xDF); 3736 emit_operand32(rbp, adr); 3737} 3738 3739void Assembler::fild_s(Address adr) { 3740 InstructionMark im(this); 3741 emit_byte(0xDB); 3742 emit_operand32(rax, adr); 3743} 3744 3745void Assembler::fincstp() { 3746 emit_byte(0xD9); 3747 emit_byte(0xF7); 3748} 3749 3750void Assembler::finit() { 3751 emit_byte(0x9B); 3752 emit_byte(0xDB); 3753 emit_byte(0xE3); 3754} 3755 3756void Assembler::fist_s(Address adr) { 3757 InstructionMark im(this); 3758 emit_byte(0xDB); 3759 emit_operand32(rdx, adr); 3760} 3761 3762void Assembler::fistp_d(Address adr) { 3763 InstructionMark im(this); 3764 emit_byte(0xDF); 3765 emit_operand32(rdi, adr); 3766} 3767 3768void Assembler::fistp_s(Address adr) { 3769 InstructionMark im(this); 3770 emit_byte(0xDB); 3771 emit_operand32(rbx, adr); 3772} 3773 3774void Assembler::fld1() { 3775 emit_byte(0xD9); 3776 emit_byte(0xE8); 3777} 3778 3779void Assembler::fld_d(Address adr) { 3780 InstructionMark im(this); 3781 emit_byte(0xDD); 3782 emit_operand32(rax, adr); 3783} 3784 3785void Assembler::fld_s(Address adr) { 3786 InstructionMark im(this); 3787 emit_byte(0xD9); 3788 emit_operand32(rax, adr); 3789} 3790 3791 3792void Assembler::fld_s(int index) { 3793 emit_farith(0xD9, 0xC0, index); 3794} 3795 3796void Assembler::fld_x(Address adr) { 3797 InstructionMark im(this); 3798 emit_byte(0xDB); 3799 emit_operand32(rbp, adr); 3800} 3801 3802void Assembler::fldcw(Address src) { 3803 InstructionMark im(this); 3804 emit_byte(0xd9); 3805 emit_operand32(rbp, src); 3806} 3807 3808void Assembler::fldenv(Address src) { 3809 InstructionMark im(this); 3810 emit_byte(0xD9); 3811 emit_operand32(rsp, src); 3812} 3813 3814void Assembler::fldlg2() { 3815 emit_byte(0xD9); 3816 emit_byte(0xEC); 3817} 3818 3819void Assembler::fldln2() { 3820 emit_byte(0xD9); 3821 emit_byte(0xED); 3822} 3823 3824void Assembler::fldz() { 3825 emit_byte(0xD9); 3826 emit_byte(0xEE); 3827} 3828 3829void Assembler::flog() { 3830 fldln2(); 3831 fxch(); 3832 fyl2x(); 3833} 3834 3835void Assembler::flog10() { 3836 fldlg2(); 3837 fxch(); 3838 fyl2x(); 3839} 3840 3841void Assembler::fmul(int i) { 3842 emit_farith(0xD8, 0xC8, i); 3843} 3844 3845void Assembler::fmul_d(Address src) { 3846 InstructionMark im(this); 3847 emit_byte(0xDC); 3848 emit_operand32(rcx, src); 3849} 3850 3851void Assembler::fmul_s(Address src) { 3852 InstructionMark im(this); 3853 emit_byte(0xD8); 3854 emit_operand32(rcx, src); 3855} 3856 3857void Assembler::fmula(int i) { 3858 emit_farith(0xDC, 0xC8, i); 3859} 3860 3861void Assembler::fmulp(int i) { 3862 emit_farith(0xDE, 0xC8, i); 3863} 3864 3865void Assembler::fnsave(Address dst) { 3866 InstructionMark im(this); 3867 emit_byte(0xDD); 3868 emit_operand32(rsi, dst); 3869} 3870 3871void Assembler::fnstcw(Address src) { 3872 InstructionMark im(this); 3873 emit_byte(0x9B); 3874 emit_byte(0xD9); 3875 emit_operand32(rdi, src); 3876} 3877 3878void Assembler::fnstsw_ax() { 3879 emit_byte(0xdF); 3880 emit_byte(0xE0); 3881} 3882 3883void Assembler::fprem() { 3884 emit_byte(0xD9); 3885 emit_byte(0xF8); 3886} 3887 3888void Assembler::fprem1() { 3889 emit_byte(0xD9); 3890 emit_byte(0xF5); 3891} 3892 3893void Assembler::frstor(Address src) { 3894 InstructionMark im(this); 3895 emit_byte(0xDD); 3896 emit_operand32(rsp, src); 3897} 3898 3899void Assembler::fsin() { 3900 emit_byte(0xD9); 3901 emit_byte(0xFE); 3902} 3903 3904void Assembler::fsqrt() { 3905 emit_byte(0xD9); 3906 emit_byte(0xFA); 3907} 3908 3909void Assembler::fst_d(Address adr) { 3910 InstructionMark im(this); 3911 emit_byte(0xDD); 3912 emit_operand32(rdx, adr); 3913} 3914 3915void Assembler::fst_s(Address adr) { 3916 InstructionMark im(this); 3917 emit_byte(0xD9); 3918 emit_operand32(rdx, adr); 3919} 3920 3921void Assembler::fstp_d(Address adr) { 3922 InstructionMark im(this); 3923 emit_byte(0xDD); 3924 emit_operand32(rbx, adr); 3925} 3926 3927void Assembler::fstp_d(int index) { 3928 emit_farith(0xDD, 0xD8, index); 3929} 3930 3931void Assembler::fstp_s(Address adr) { 3932 InstructionMark im(this); 3933 emit_byte(0xD9); 3934 emit_operand32(rbx, adr); 3935} 3936 3937void Assembler::fstp_x(Address adr) { 3938 InstructionMark im(this); 3939 emit_byte(0xDB); 3940 emit_operand32(rdi, adr); 3941} 3942 3943void Assembler::fsub(int i) { 3944 emit_farith(0xD8, 0xE0, i); 3945} 3946 3947void Assembler::fsub_d(Address src) { 3948 InstructionMark im(this); 3949 emit_byte(0xDC); 3950 emit_operand32(rsp, src); 3951} 3952 3953void Assembler::fsub_s(Address src) { 3954 InstructionMark im(this); 3955 emit_byte(0xD8); 3956 emit_operand32(rsp, src); 3957} 3958 3959void Assembler::fsuba(int i) { 3960 emit_farith(0xDC, 0xE8, i); 3961} 3962 3963void Assembler::fsubp(int i) { 3964 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3965} 3966 3967void Assembler::fsubr(int i) { 3968 emit_farith(0xD8, 0xE8, i); 3969} 3970 3971void Assembler::fsubr_d(Address src) { 3972 InstructionMark im(this); 3973 emit_byte(0xDC); 3974 emit_operand32(rbp, src); 3975} 3976 3977void Assembler::fsubr_s(Address src) { 3978 InstructionMark im(this); 3979 emit_byte(0xD8); 3980 emit_operand32(rbp, src); 3981} 3982 3983void Assembler::fsubra(int i) { 3984 emit_farith(0xDC, 0xE0, i); 3985} 3986 3987void Assembler::fsubrp(int i) { 3988 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3989} 3990 3991void Assembler::ftan() { 3992 emit_byte(0xD9); 3993 emit_byte(0xF2); 3994 emit_byte(0xDD); 3995 emit_byte(0xD8); 3996} 3997 3998void Assembler::ftst() { 3999 emit_byte(0xD9); 4000 emit_byte(0xE4); 4001} 4002 4003void Assembler::fucomi(int i) { 4004 // make sure the instruction is supported (introduced for P6, together with cmov) 4005 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 4006 emit_farith(0xDB, 0xE8, i); 4007} 4008 4009void Assembler::fucomip(int i) { 4010 // make sure the instruction is supported (introduced for P6, together with cmov) 4011 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 4012 emit_farith(0xDF, 0xE8, i); 4013} 4014 4015void Assembler::fwait() { 4016 emit_byte(0x9B); 4017} 4018 4019void Assembler::fxch(int i) { 4020 emit_farith(0xD9, 0xC8, i); 4021} 4022 4023void Assembler::fyl2x() { 4024 emit_byte(0xD9); 4025 emit_byte(0xF1); 4026} 4027 4028void Assembler::frndint() { 4029 emit_byte(0xD9); 4030 emit_byte(0xFC); 4031} 4032 4033void Assembler::f2xm1() { 4034 emit_byte(0xD9); 4035 emit_byte(0xF0); 4036} 4037 4038void Assembler::fldl2e() { 4039 emit_byte(0xD9); 4040 emit_byte(0xEA); 4041} 4042 4043// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 4044static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 4045// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 4046static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 4047 4048// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 4049void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4050 if (pre > 0) { 4051 emit_byte(simd_pre[pre]); 4052 } 4053 if (rex_w) { 4054 prefixq(adr, xreg); 4055 } else { 4056 prefix(adr, xreg); 4057 } 4058 if (opc > 0) { 4059 emit_byte(0x0F); 4060 int opc2 = simd_opc[opc]; 4061 if (opc2 > 0) { 4062 emit_byte(opc2); 4063 } 4064 } 4065} 4066 4067int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4068 if (pre > 0) { 4069 emit_byte(simd_pre[pre]); 4070 } 4071 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 4072 prefix_and_encode(dst_enc, src_enc); 4073 if (opc > 0) { 4074 emit_byte(0x0F); 4075 int opc2 = simd_opc[opc]; 4076 if (opc2 > 0) { 4077 emit_byte(opc2); 4078 } 4079 } 4080 return encode; 4081} 4082 4083 4084void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 4085 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 4086 prefix(VEX_3bytes); 4087 4088 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 4089 byte1 = (~byte1) & 0xE0; 4090 byte1 |= opc; 4091 a_byte(byte1); 4092 4093 int byte2 = ((~nds_enc) & 0xf) << 3; 4094 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 4095 emit_byte(byte2); 4096 } else { 4097 prefix(VEX_2bytes); 4098 4099 int byte1 = vex_r ? VEX_R : 0; 4100 byte1 = (~byte1) & 0x80; 4101 byte1 |= ((~nds_enc) & 0xf) << 3; 4102 byte1 |= (vector256 ? 4 : 0) | pre; 4103 emit_byte(byte1); 4104 } 4105} 4106 4107void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 4108 bool vex_r = (xreg_enc >= 8); 4109 bool vex_b = adr.base_needs_rex(); 4110 bool vex_x = adr.index_needs_rex(); 4111 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4112} 4113 4114int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 4115 bool vex_r = (dst_enc >= 8); 4116 bool vex_b = (src_enc >= 8); 4117 bool vex_x = false; 4118 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4119 return (((dst_enc & 7) << 3) | (src_enc & 7)); 4120} 4121 4122 4123void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4124 if (UseAVX > 0) { 4125 int xreg_enc = xreg->encoding(); 4126 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4127 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 4128 } else { 4129 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 4130 rex_prefix(adr, xreg, pre, opc, rex_w); 4131 } 4132} 4133 4134int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4135 int dst_enc = dst->encoding(); 4136 int src_enc = src->encoding(); 4137 if (UseAVX > 0) { 4138 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4139 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 4140 } else { 4141 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 4142 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 4143 } 4144} 4145 4146void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4147 InstructionMark im(this); 4148 simd_prefix(dst, dst, src, pre); 4149 emit_byte(opcode); 4150 emit_operand(dst, src); 4151} 4152 4153void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4154 int encode = simd_prefix_and_encode(dst, dst, src, pre); 4155 emit_byte(opcode); 4156 emit_byte(0xC0 | encode); 4157} 4158 4159// Versions with no second source register (non-destructive source). 4160void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4161 InstructionMark im(this); 4162 simd_prefix(dst, xnoreg, src, pre); 4163 emit_byte(opcode); 4164 emit_operand(dst, src); 4165} 4166 4167void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4168 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); 4169 emit_byte(opcode); 4170 emit_byte(0xC0 | encode); 4171} 4172 4173// 3-operands AVX instructions 4174void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4175 Address src, VexSimdPrefix pre, bool vector256) { 4176 InstructionMark im(this); 4177 vex_prefix(dst, nds, src, pre, vector256); 4178 emit_byte(opcode); 4179 emit_operand(dst, src); 4180} 4181 4182void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4183 XMMRegister src, VexSimdPrefix pre, bool vector256) { 4184 int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); 4185 emit_byte(opcode); 4186 emit_byte(0xC0 | encode); 4187} 4188 4189#ifndef _LP64 4190 4191void Assembler::incl(Register dst) { 4192 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4193 emit_byte(0x40 | dst->encoding()); 4194} 4195 4196void Assembler::lea(Register dst, Address src) { 4197 leal(dst, src); 4198} 4199 4200void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4201 InstructionMark im(this); 4202 emit_byte(0xC7); 4203 emit_operand(rax, dst); 4204 emit_data((int)imm32, rspec, 0); 4205} 4206 4207void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4208 InstructionMark im(this); 4209 int encode = prefix_and_encode(dst->encoding()); 4210 emit_byte(0xB8 | encode); 4211 emit_data((int)imm32, rspec, 0); 4212} 4213 4214void Assembler::popa() { // 32bit 4215 emit_byte(0x61); 4216} 4217 4218void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 4219 InstructionMark im(this); 4220 emit_byte(0x68); 4221 emit_data(imm32, rspec, 0); 4222} 4223 4224void Assembler::pusha() { // 32bit 4225 emit_byte(0x60); 4226} 4227 4228void Assembler::set_byte_if_not_zero(Register dst) { 4229 emit_byte(0x0F); 4230 emit_byte(0x95); 4231 emit_byte(0xE0 | dst->encoding()); 4232} 4233 4234void Assembler::shldl(Register dst, Register src) { 4235 emit_byte(0x0F); 4236 emit_byte(0xA5); 4237 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4238} 4239 4240void Assembler::shrdl(Register dst, Register src) { 4241 emit_byte(0x0F); 4242 emit_byte(0xAD); 4243 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4244} 4245 4246#else // LP64 4247 4248void Assembler::set_byte_if_not_zero(Register dst) { 4249 int enc = prefix_and_encode(dst->encoding(), true); 4250 emit_byte(0x0F); 4251 emit_byte(0x95); 4252 emit_byte(0xE0 | enc); 4253} 4254 4255// 64bit only pieces of the assembler 4256// This should only be used by 64bit instructions that can use rip-relative 4257// it cannot be used by instructions that want an immediate value. 4258 4259bool Assembler::reachable(AddressLiteral adr) { 4260 int64_t disp; 4261 // None will force a 64bit literal to the code stream. Likely a placeholder 4262 // for something that will be patched later and we need to certain it will 4263 // always be reachable. 4264 if (adr.reloc() == relocInfo::none) { 4265 return false; 4266 } 4267 if (adr.reloc() == relocInfo::internal_word_type) { 4268 // This should be rip relative and easily reachable. 4269 return true; 4270 } 4271 if (adr.reloc() == relocInfo::virtual_call_type || 4272 adr.reloc() == relocInfo::opt_virtual_call_type || 4273 adr.reloc() == relocInfo::static_call_type || 4274 adr.reloc() == relocInfo::static_stub_type ) { 4275 // This should be rip relative within the code cache and easily 4276 // reachable until we get huge code caches. (At which point 4277 // ic code is going to have issues). 4278 return true; 4279 } 4280 if (adr.reloc() != relocInfo::external_word_type && 4281 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 4282 adr.reloc() != relocInfo::poll_type && // relocs to identify them 4283 adr.reloc() != relocInfo::runtime_call_type ) { 4284 return false; 4285 } 4286 4287 // Stress the correction code 4288 if (ForceUnreachable) { 4289 // Must be runtimecall reloc, see if it is in the codecache 4290 // Flipping stuff in the codecache to be unreachable causes issues 4291 // with things like inline caches where the additional instructions 4292 // are not handled. 4293 if (CodeCache::find_blob(adr._target) == NULL) { 4294 return false; 4295 } 4296 } 4297 // For external_word_type/runtime_call_type if it is reachable from where we 4298 // are now (possibly a temp buffer) and where we might end up 4299 // anywhere in the codeCache then we are always reachable. 4300 // This would have to change if we ever save/restore shared code 4301 // to be more pessimistic. 4302 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 4303 if (!is_simm32(disp)) return false; 4304 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 4305 if (!is_simm32(disp)) return false; 4306 4307 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 4308 4309 // Because rip relative is a disp + address_of_next_instruction and we 4310 // don't know the value of address_of_next_instruction we apply a fudge factor 4311 // to make sure we will be ok no matter the size of the instruction we get placed into. 4312 // We don't have to fudge the checks above here because they are already worst case. 4313 4314 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 4315 // + 4 because better safe than sorry. 4316 const int fudge = 12 + 4; 4317 if (disp < 0) { 4318 disp -= fudge; 4319 } else { 4320 disp += fudge; 4321 } 4322 return is_simm32(disp); 4323} 4324 4325// Check if the polling page is not reachable from the code cache using rip-relative 4326// addressing. 4327bool Assembler::is_polling_page_far() { 4328 intptr_t addr = (intptr_t)os::get_polling_page(); 4329 return ForceUnreachable || 4330 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 4331 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 4332} 4333 4334void Assembler::emit_data64(jlong data, 4335 relocInfo::relocType rtype, 4336 int format) { 4337 if (rtype == relocInfo::none) { 4338 emit_long64(data); 4339 } else { 4340 emit_data64(data, Relocation::spec_simple(rtype), format); 4341 } 4342} 4343 4344void Assembler::emit_data64(jlong data, 4345 RelocationHolder const& rspec, 4346 int format) { 4347 assert(imm_operand == 0, "default format must be immediate in this file"); 4348 assert(imm_operand == format, "must be immediate"); 4349 assert(inst_mark() != NULL, "must be inside InstructionMark"); 4350 // Do not use AbstractAssembler::relocate, which is not intended for 4351 // embedded words. Instead, relocate to the enclosing instruction. 4352 code_section()->relocate(inst_mark(), rspec, format); 4353#ifdef ASSERT 4354 check_relocation(rspec, format); 4355#endif 4356 emit_long64(data); 4357} 4358 4359int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 4360 if (reg_enc >= 8) { 4361 prefix(REX_B); 4362 reg_enc -= 8; 4363 } else if (byteinst && reg_enc >= 4) { 4364 prefix(REX); 4365 } 4366 return reg_enc; 4367} 4368 4369int Assembler::prefixq_and_encode(int reg_enc) { 4370 if (reg_enc < 8) { 4371 prefix(REX_W); 4372 } else { 4373 prefix(REX_WB); 4374 reg_enc -= 8; 4375 } 4376 return reg_enc; 4377} 4378 4379int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 4380 if (dst_enc < 8) { 4381 if (src_enc >= 8) { 4382 prefix(REX_B); 4383 src_enc -= 8; 4384 } else if (byteinst && src_enc >= 4) { 4385 prefix(REX); 4386 } 4387 } else { 4388 if (src_enc < 8) { 4389 prefix(REX_R); 4390 } else { 4391 prefix(REX_RB); 4392 src_enc -= 8; 4393 } 4394 dst_enc -= 8; 4395 } 4396 return dst_enc << 3 | src_enc; 4397} 4398 4399int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 4400 if (dst_enc < 8) { 4401 if (src_enc < 8) { 4402 prefix(REX_W); 4403 } else { 4404 prefix(REX_WB); 4405 src_enc -= 8; 4406 } 4407 } else { 4408 if (src_enc < 8) { 4409 prefix(REX_WR); 4410 } else { 4411 prefix(REX_WRB); 4412 src_enc -= 8; 4413 } 4414 dst_enc -= 8; 4415 } 4416 return dst_enc << 3 | src_enc; 4417} 4418 4419void Assembler::prefix(Register reg) { 4420 if (reg->encoding() >= 8) { 4421 prefix(REX_B); 4422 } 4423} 4424 4425void Assembler::prefix(Address adr) { 4426 if (adr.base_needs_rex()) { 4427 if (adr.index_needs_rex()) { 4428 prefix(REX_XB); 4429 } else { 4430 prefix(REX_B); 4431 } 4432 } else { 4433 if (adr.index_needs_rex()) { 4434 prefix(REX_X); 4435 } 4436 } 4437} 4438 4439void Assembler::prefixq(Address adr) { 4440 if (adr.base_needs_rex()) { 4441 if (adr.index_needs_rex()) { 4442 prefix(REX_WXB); 4443 } else { 4444 prefix(REX_WB); 4445 } 4446 } else { 4447 if (adr.index_needs_rex()) { 4448 prefix(REX_WX); 4449 } else { 4450 prefix(REX_W); 4451 } 4452 } 4453} 4454 4455 4456void Assembler::prefix(Address adr, Register reg, bool byteinst) { 4457 if (reg->encoding() < 8) { 4458 if (adr.base_needs_rex()) { 4459 if (adr.index_needs_rex()) { 4460 prefix(REX_XB); 4461 } else { 4462 prefix(REX_B); 4463 } 4464 } else { 4465 if (adr.index_needs_rex()) { 4466 prefix(REX_X); 4467 } else if (byteinst && reg->encoding() >= 4 ) { 4468 prefix(REX); 4469 } 4470 } 4471 } else { 4472 if (adr.base_needs_rex()) { 4473 if (adr.index_needs_rex()) { 4474 prefix(REX_RXB); 4475 } else { 4476 prefix(REX_RB); 4477 } 4478 } else { 4479 if (adr.index_needs_rex()) { 4480 prefix(REX_RX); 4481 } else { 4482 prefix(REX_R); 4483 } 4484 } 4485 } 4486} 4487 4488void Assembler::prefixq(Address adr, Register src) { 4489 if (src->encoding() < 8) { 4490 if (adr.base_needs_rex()) { 4491 if (adr.index_needs_rex()) { 4492 prefix(REX_WXB); 4493 } else { 4494 prefix(REX_WB); 4495 } 4496 } else { 4497 if (adr.index_needs_rex()) { 4498 prefix(REX_WX); 4499 } else { 4500 prefix(REX_W); 4501 } 4502 } 4503 } else { 4504 if (adr.base_needs_rex()) { 4505 if (adr.index_needs_rex()) { 4506 prefix(REX_WRXB); 4507 } else { 4508 prefix(REX_WRB); 4509 } 4510 } else { 4511 if (adr.index_needs_rex()) { 4512 prefix(REX_WRX); 4513 } else { 4514 prefix(REX_WR); 4515 } 4516 } 4517 } 4518} 4519 4520void Assembler::prefix(Address adr, XMMRegister reg) { 4521 if (reg->encoding() < 8) { 4522 if (adr.base_needs_rex()) { 4523 if (adr.index_needs_rex()) { 4524 prefix(REX_XB); 4525 } else { 4526 prefix(REX_B); 4527 } 4528 } else { 4529 if (adr.index_needs_rex()) { 4530 prefix(REX_X); 4531 } 4532 } 4533 } else { 4534 if (adr.base_needs_rex()) { 4535 if (adr.index_needs_rex()) { 4536 prefix(REX_RXB); 4537 } else { 4538 prefix(REX_RB); 4539 } 4540 } else { 4541 if (adr.index_needs_rex()) { 4542 prefix(REX_RX); 4543 } else { 4544 prefix(REX_R); 4545 } 4546 } 4547 } 4548} 4549 4550void Assembler::prefixq(Address adr, XMMRegister src) { 4551 if (src->encoding() < 8) { 4552 if (adr.base_needs_rex()) { 4553 if (adr.index_needs_rex()) { 4554 prefix(REX_WXB); 4555 } else { 4556 prefix(REX_WB); 4557 } 4558 } else { 4559 if (adr.index_needs_rex()) { 4560 prefix(REX_WX); 4561 } else { 4562 prefix(REX_W); 4563 } 4564 } 4565 } else { 4566 if (adr.base_needs_rex()) { 4567 if (adr.index_needs_rex()) { 4568 prefix(REX_WRXB); 4569 } else { 4570 prefix(REX_WRB); 4571 } 4572 } else { 4573 if (adr.index_needs_rex()) { 4574 prefix(REX_WRX); 4575 } else { 4576 prefix(REX_WR); 4577 } 4578 } 4579 } 4580} 4581 4582void Assembler::adcq(Register dst, int32_t imm32) { 4583 (void) prefixq_and_encode(dst->encoding()); 4584 emit_arith(0x81, 0xD0, dst, imm32); 4585} 4586 4587void Assembler::adcq(Register dst, Address src) { 4588 InstructionMark im(this); 4589 prefixq(src, dst); 4590 emit_byte(0x13); 4591 emit_operand(dst, src); 4592} 4593 4594void Assembler::adcq(Register dst, Register src) { 4595 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4596 emit_arith(0x13, 0xC0, dst, src); 4597} 4598 4599void Assembler::addq(Address dst, int32_t imm32) { 4600 InstructionMark im(this); 4601 prefixq(dst); 4602 emit_arith_operand(0x81, rax, dst,imm32); 4603} 4604 4605void Assembler::addq(Address dst, Register src) { 4606 InstructionMark im(this); 4607 prefixq(dst, src); 4608 emit_byte(0x01); 4609 emit_operand(src, dst); 4610} 4611 4612void Assembler::addq(Register dst, int32_t imm32) { 4613 (void) prefixq_and_encode(dst->encoding()); 4614 emit_arith(0x81, 0xC0, dst, imm32); 4615} 4616 4617void Assembler::addq(Register dst, Address src) { 4618 InstructionMark im(this); 4619 prefixq(src, dst); 4620 emit_byte(0x03); 4621 emit_operand(dst, src); 4622} 4623 4624void Assembler::addq(Register dst, Register src) { 4625 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4626 emit_arith(0x03, 0xC0, dst, src); 4627} 4628 4629void Assembler::andq(Address dst, int32_t imm32) { 4630 InstructionMark im(this); 4631 prefixq(dst); 4632 emit_byte(0x81); 4633 emit_operand(rsp, dst, 4); 4634 emit_long(imm32); 4635} 4636 4637void Assembler::andq(Register dst, int32_t imm32) { 4638 (void) prefixq_and_encode(dst->encoding()); 4639 emit_arith(0x81, 0xE0, dst, imm32); 4640} 4641 4642void Assembler::andq(Register dst, Address src) { 4643 InstructionMark im(this); 4644 prefixq(src, dst); 4645 emit_byte(0x23); 4646 emit_operand(dst, src); 4647} 4648 4649void Assembler::andq(Register dst, Register src) { 4650 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4651 emit_arith(0x23, 0xC0, dst, src); 4652} 4653 4654void Assembler::bsfq(Register dst, Register src) { 4655 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4656 emit_byte(0x0F); 4657 emit_byte(0xBC); 4658 emit_byte(0xC0 | encode); 4659} 4660 4661void Assembler::bsrq(Register dst, Register src) { 4662 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4663 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4664 emit_byte(0x0F); 4665 emit_byte(0xBD); 4666 emit_byte(0xC0 | encode); 4667} 4668 4669void Assembler::bswapq(Register reg) { 4670 int encode = prefixq_and_encode(reg->encoding()); 4671 emit_byte(0x0F); 4672 emit_byte(0xC8 | encode); 4673} 4674 4675void Assembler::cdqq() { 4676 prefix(REX_W); 4677 emit_byte(0x99); 4678} 4679 4680void Assembler::clflush(Address adr) { 4681 prefix(adr); 4682 emit_byte(0x0F); 4683 emit_byte(0xAE); 4684 emit_operand(rdi, adr); 4685} 4686 4687void Assembler::cmovq(Condition cc, Register dst, Register src) { 4688 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4689 emit_byte(0x0F); 4690 emit_byte(0x40 | cc); 4691 emit_byte(0xC0 | encode); 4692} 4693 4694void Assembler::cmovq(Condition cc, Register dst, Address src) { 4695 InstructionMark im(this); 4696 prefixq(src, dst); 4697 emit_byte(0x0F); 4698 emit_byte(0x40 | cc); 4699 emit_operand(dst, src); 4700} 4701 4702void Assembler::cmpq(Address dst, int32_t imm32) { 4703 InstructionMark im(this); 4704 prefixq(dst); 4705 emit_byte(0x81); 4706 emit_operand(rdi, dst, 4); 4707 emit_long(imm32); 4708} 4709 4710void Assembler::cmpq(Register dst, int32_t imm32) { 4711 (void) prefixq_and_encode(dst->encoding()); 4712 emit_arith(0x81, 0xF8, dst, imm32); 4713} 4714 4715void Assembler::cmpq(Address dst, Register src) { 4716 InstructionMark im(this); 4717 prefixq(dst, src); 4718 emit_byte(0x3B); 4719 emit_operand(src, dst); 4720} 4721 4722void Assembler::cmpq(Register dst, Register src) { 4723 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4724 emit_arith(0x3B, 0xC0, dst, src); 4725} 4726 4727void Assembler::cmpq(Register dst, Address src) { 4728 InstructionMark im(this); 4729 prefixq(src, dst); 4730 emit_byte(0x3B); 4731 emit_operand(dst, src); 4732} 4733 4734void Assembler::cmpxchgq(Register reg, Address adr) { 4735 InstructionMark im(this); 4736 prefixq(adr, reg); 4737 emit_byte(0x0F); 4738 emit_byte(0xB1); 4739 emit_operand(reg, adr); 4740} 4741 4742void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4743 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4744 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4745 emit_byte(0x2A); 4746 emit_byte(0xC0 | encode); 4747} 4748 4749void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4750 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4751 InstructionMark im(this); 4752 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4753 emit_byte(0x2A); 4754 emit_operand(dst, src); 4755} 4756 4757void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4758 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4759 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4760 emit_byte(0x2A); 4761 emit_byte(0xC0 | encode); 4762} 4763 4764void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4765 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4766 InstructionMark im(this); 4767 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4768 emit_byte(0x2A); 4769 emit_operand(dst, src); 4770} 4771 4772void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4774 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4775 emit_byte(0x2C); 4776 emit_byte(0xC0 | encode); 4777} 4778 4779void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4780 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4781 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4782 emit_byte(0x2C); 4783 emit_byte(0xC0 | encode); 4784} 4785 4786void Assembler::decl(Register dst) { 4787 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4788 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4789 int encode = prefix_and_encode(dst->encoding()); 4790 emit_byte(0xFF); 4791 emit_byte(0xC8 | encode); 4792} 4793 4794void Assembler::decq(Register dst) { 4795 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4796 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4797 int encode = prefixq_and_encode(dst->encoding()); 4798 emit_byte(0xFF); 4799 emit_byte(0xC8 | encode); 4800} 4801 4802void Assembler::decq(Address dst) { 4803 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4804 InstructionMark im(this); 4805 prefixq(dst); 4806 emit_byte(0xFF); 4807 emit_operand(rcx, dst); 4808} 4809 4810void Assembler::fxrstor(Address src) { 4811 prefixq(src); 4812 emit_byte(0x0F); 4813 emit_byte(0xAE); 4814 emit_operand(as_Register(1), src); 4815} 4816 4817void Assembler::fxsave(Address dst) { 4818 prefixq(dst); 4819 emit_byte(0x0F); 4820 emit_byte(0xAE); 4821 emit_operand(as_Register(0), dst); 4822} 4823 4824void Assembler::idivq(Register src) { 4825 int encode = prefixq_and_encode(src->encoding()); 4826 emit_byte(0xF7); 4827 emit_byte(0xF8 | encode); 4828} 4829 4830void Assembler::imulq(Register dst, Register src) { 4831 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4832 emit_byte(0x0F); 4833 emit_byte(0xAF); 4834 emit_byte(0xC0 | encode); 4835} 4836 4837void Assembler::imulq(Register dst, Register src, int value) { 4838 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4839 if (is8bit(value)) { 4840 emit_byte(0x6B); 4841 emit_byte(0xC0 | encode); 4842 emit_byte(value & 0xFF); 4843 } else { 4844 emit_byte(0x69); 4845 emit_byte(0xC0 | encode); 4846 emit_long(value); 4847 } 4848} 4849 4850void Assembler::incl(Register dst) { 4851 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4852 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4853 int encode = prefix_and_encode(dst->encoding()); 4854 emit_byte(0xFF); 4855 emit_byte(0xC0 | encode); 4856} 4857 4858void Assembler::incq(Register dst) { 4859 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4860 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4861 int encode = prefixq_and_encode(dst->encoding()); 4862 emit_byte(0xFF); 4863 emit_byte(0xC0 | encode); 4864} 4865 4866void Assembler::incq(Address dst) { 4867 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4868 InstructionMark im(this); 4869 prefixq(dst); 4870 emit_byte(0xFF); 4871 emit_operand(rax, dst); 4872} 4873 4874void Assembler::lea(Register dst, Address src) { 4875 leaq(dst, src); 4876} 4877 4878void Assembler::leaq(Register dst, Address src) { 4879 InstructionMark im(this); 4880 prefixq(src, dst); 4881 emit_byte(0x8D); 4882 emit_operand(dst, src); 4883} 4884 4885void Assembler::mov64(Register dst, int64_t imm64) { 4886 InstructionMark im(this); 4887 int encode = prefixq_and_encode(dst->encoding()); 4888 emit_byte(0xB8 | encode); 4889 emit_long64(imm64); 4890} 4891 4892void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4893 InstructionMark im(this); 4894 int encode = prefixq_and_encode(dst->encoding()); 4895 emit_byte(0xB8 | encode); 4896 emit_data64(imm64, rspec); 4897} 4898 4899void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4900 InstructionMark im(this); 4901 int encode = prefix_and_encode(dst->encoding()); 4902 emit_byte(0xB8 | encode); 4903 emit_data((int)imm32, rspec, narrow_oop_operand); 4904} 4905 4906void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4907 InstructionMark im(this); 4908 prefix(dst); 4909 emit_byte(0xC7); 4910 emit_operand(rax, dst, 4); 4911 emit_data((int)imm32, rspec, narrow_oop_operand); 4912} 4913 4914void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4915 InstructionMark im(this); 4916 int encode = prefix_and_encode(src1->encoding()); 4917 emit_byte(0x81); 4918 emit_byte(0xF8 | encode); 4919 emit_data((int)imm32, rspec, narrow_oop_operand); 4920} 4921 4922void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4923 InstructionMark im(this); 4924 prefix(src1); 4925 emit_byte(0x81); 4926 emit_operand(rax, src1, 4); 4927 emit_data((int)imm32, rspec, narrow_oop_operand); 4928} 4929 4930void Assembler::lzcntq(Register dst, Register src) { 4931 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4932 emit_byte(0xF3); 4933 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4934 emit_byte(0x0F); 4935 emit_byte(0xBD); 4936 emit_byte(0xC0 | encode); 4937} 4938 4939void Assembler::movdq(XMMRegister dst, Register src) { 4940 // table D-1 says MMX/SSE2 4941 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4942 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4943 emit_byte(0x6E); 4944 emit_byte(0xC0 | encode); 4945} 4946 4947void Assembler::movdq(Register dst, XMMRegister src) { 4948 // table D-1 says MMX/SSE2 4949 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4950 // swap src/dst to get correct prefix 4951 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4952 emit_byte(0x7E); 4953 emit_byte(0xC0 | encode); 4954} 4955 4956void Assembler::movq(Register dst, Register src) { 4957 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4958 emit_byte(0x8B); 4959 emit_byte(0xC0 | encode); 4960} 4961 4962void Assembler::movq(Register dst, Address src) { 4963 InstructionMark im(this); 4964 prefixq(src, dst); 4965 emit_byte(0x8B); 4966 emit_operand(dst, src); 4967} 4968 4969void Assembler::movq(Address dst, Register src) { 4970 InstructionMark im(this); 4971 prefixq(dst, src); 4972 emit_byte(0x89); 4973 emit_operand(src, dst); 4974} 4975 4976void Assembler::movsbq(Register dst, Address src) { 4977 InstructionMark im(this); 4978 prefixq(src, dst); 4979 emit_byte(0x0F); 4980 emit_byte(0xBE); 4981 emit_operand(dst, src); 4982} 4983 4984void Assembler::movsbq(Register dst, Register src) { 4985 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4986 emit_byte(0x0F); 4987 emit_byte(0xBE); 4988 emit_byte(0xC0 | encode); 4989} 4990 4991void Assembler::movslq(Register dst, int32_t imm32) { 4992 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4993 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4994 // as a result we shouldn't use until tested at runtime... 4995 ShouldNotReachHere(); 4996 InstructionMark im(this); 4997 int encode = prefixq_and_encode(dst->encoding()); 4998 emit_byte(0xC7 | encode); 4999 emit_long(imm32); 5000} 5001 5002void Assembler::movslq(Address dst, int32_t imm32) { 5003 assert(is_simm32(imm32), "lost bits"); 5004 InstructionMark im(this); 5005 prefixq(dst); 5006 emit_byte(0xC7); 5007 emit_operand(rax, dst, 4); 5008 emit_long(imm32); 5009} 5010 5011void Assembler::movslq(Register dst, Address src) { 5012 InstructionMark im(this); 5013 prefixq(src, dst); 5014 emit_byte(0x63); 5015 emit_operand(dst, src); 5016} 5017 5018void Assembler::movslq(Register dst, Register src) { 5019 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5020 emit_byte(0x63); 5021 emit_byte(0xC0 | encode); 5022} 5023 5024void Assembler::movswq(Register dst, Address src) { 5025 InstructionMark im(this); 5026 prefixq(src, dst); 5027 emit_byte(0x0F); 5028 emit_byte(0xBF); 5029 emit_operand(dst, src); 5030} 5031 5032void Assembler::movswq(Register dst, Register src) { 5033 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5034 emit_byte(0x0F); 5035 emit_byte(0xBF); 5036 emit_byte(0xC0 | encode); 5037} 5038 5039void Assembler::movzbq(Register dst, Address src) { 5040 InstructionMark im(this); 5041 prefixq(src, dst); 5042 emit_byte(0x0F); 5043 emit_byte(0xB6); 5044 emit_operand(dst, src); 5045} 5046 5047void Assembler::movzbq(Register dst, Register src) { 5048 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5049 emit_byte(0x0F); 5050 emit_byte(0xB6); 5051 emit_byte(0xC0 | encode); 5052} 5053 5054void Assembler::movzwq(Register dst, Address src) { 5055 InstructionMark im(this); 5056 prefixq(src, dst); 5057 emit_byte(0x0F); 5058 emit_byte(0xB7); 5059 emit_operand(dst, src); 5060} 5061 5062void Assembler::movzwq(Register dst, Register src) { 5063 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5064 emit_byte(0x0F); 5065 emit_byte(0xB7); 5066 emit_byte(0xC0 | encode); 5067} 5068 5069void Assembler::negq(Register dst) { 5070 int encode = prefixq_and_encode(dst->encoding()); 5071 emit_byte(0xF7); 5072 emit_byte(0xD8 | encode); 5073} 5074 5075void Assembler::notq(Register dst) { 5076 int encode = prefixq_and_encode(dst->encoding()); 5077 emit_byte(0xF7); 5078 emit_byte(0xD0 | encode); 5079} 5080 5081void Assembler::orq(Address dst, int32_t imm32) { 5082 InstructionMark im(this); 5083 prefixq(dst); 5084 emit_byte(0x81); 5085 emit_operand(rcx, dst, 4); 5086 emit_long(imm32); 5087} 5088 5089void Assembler::orq(Register dst, int32_t imm32) { 5090 (void) prefixq_and_encode(dst->encoding()); 5091 emit_arith(0x81, 0xC8, dst, imm32); 5092} 5093 5094void Assembler::orq(Register dst, Address src) { 5095 InstructionMark im(this); 5096 prefixq(src, dst); 5097 emit_byte(0x0B); 5098 emit_operand(dst, src); 5099} 5100 5101void Assembler::orq(Register dst, Register src) { 5102 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5103 emit_arith(0x0B, 0xC0, dst, src); 5104} 5105 5106void Assembler::popa() { // 64bit 5107 movq(r15, Address(rsp, 0)); 5108 movq(r14, Address(rsp, wordSize)); 5109 movq(r13, Address(rsp, 2 * wordSize)); 5110 movq(r12, Address(rsp, 3 * wordSize)); 5111 movq(r11, Address(rsp, 4 * wordSize)); 5112 movq(r10, Address(rsp, 5 * wordSize)); 5113 movq(r9, Address(rsp, 6 * wordSize)); 5114 movq(r8, Address(rsp, 7 * wordSize)); 5115 movq(rdi, Address(rsp, 8 * wordSize)); 5116 movq(rsi, Address(rsp, 9 * wordSize)); 5117 movq(rbp, Address(rsp, 10 * wordSize)); 5118 // skip rsp 5119 movq(rbx, Address(rsp, 12 * wordSize)); 5120 movq(rdx, Address(rsp, 13 * wordSize)); 5121 movq(rcx, Address(rsp, 14 * wordSize)); 5122 movq(rax, Address(rsp, 15 * wordSize)); 5123 5124 addq(rsp, 16 * wordSize); 5125} 5126 5127void Assembler::popcntq(Register dst, Address src) { 5128 assert(VM_Version::supports_popcnt(), "must support"); 5129 InstructionMark im(this); 5130 emit_byte(0xF3); 5131 prefixq(src, dst); 5132 emit_byte(0x0F); 5133 emit_byte(0xB8); 5134 emit_operand(dst, src); 5135} 5136 5137void Assembler::popcntq(Register dst, Register src) { 5138 assert(VM_Version::supports_popcnt(), "must support"); 5139 emit_byte(0xF3); 5140 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5141 emit_byte(0x0F); 5142 emit_byte(0xB8); 5143 emit_byte(0xC0 | encode); 5144} 5145 5146void Assembler::popq(Address dst) { 5147 InstructionMark im(this); 5148 prefixq(dst); 5149 emit_byte(0x8F); 5150 emit_operand(rax, dst); 5151} 5152 5153void Assembler::pusha() { // 64bit 5154 // we have to store original rsp. ABI says that 128 bytes 5155 // below rsp are local scratch. 5156 movq(Address(rsp, -5 * wordSize), rsp); 5157 5158 subq(rsp, 16 * wordSize); 5159 5160 movq(Address(rsp, 15 * wordSize), rax); 5161 movq(Address(rsp, 14 * wordSize), rcx); 5162 movq(Address(rsp, 13 * wordSize), rdx); 5163 movq(Address(rsp, 12 * wordSize), rbx); 5164 // skip rsp 5165 movq(Address(rsp, 10 * wordSize), rbp); 5166 movq(Address(rsp, 9 * wordSize), rsi); 5167 movq(Address(rsp, 8 * wordSize), rdi); 5168 movq(Address(rsp, 7 * wordSize), r8); 5169 movq(Address(rsp, 6 * wordSize), r9); 5170 movq(Address(rsp, 5 * wordSize), r10); 5171 movq(Address(rsp, 4 * wordSize), r11); 5172 movq(Address(rsp, 3 * wordSize), r12); 5173 movq(Address(rsp, 2 * wordSize), r13); 5174 movq(Address(rsp, wordSize), r14); 5175 movq(Address(rsp, 0), r15); 5176} 5177 5178void Assembler::pushq(Address src) { 5179 InstructionMark im(this); 5180 prefixq(src); 5181 emit_byte(0xFF); 5182 emit_operand(rsi, src); 5183} 5184 5185void Assembler::rclq(Register dst, int imm8) { 5186 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5187 int encode = prefixq_and_encode(dst->encoding()); 5188 if (imm8 == 1) { 5189 emit_byte(0xD1); 5190 emit_byte(0xD0 | encode); 5191 } else { 5192 emit_byte(0xC1); 5193 emit_byte(0xD0 | encode); 5194 emit_byte(imm8); 5195 } 5196} 5197void Assembler::sarq(Register dst, int imm8) { 5198 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5199 int encode = prefixq_and_encode(dst->encoding()); 5200 if (imm8 == 1) { 5201 emit_byte(0xD1); 5202 emit_byte(0xF8 | encode); 5203 } else { 5204 emit_byte(0xC1); 5205 emit_byte(0xF8 | encode); 5206 emit_byte(imm8); 5207 } 5208} 5209 5210void Assembler::sarq(Register dst) { 5211 int encode = prefixq_and_encode(dst->encoding()); 5212 emit_byte(0xD3); 5213 emit_byte(0xF8 | encode); 5214} 5215 5216void Assembler::sbbq(Address dst, int32_t imm32) { 5217 InstructionMark im(this); 5218 prefixq(dst); 5219 emit_arith_operand(0x81, rbx, dst, imm32); 5220} 5221 5222void Assembler::sbbq(Register dst, int32_t imm32) { 5223 (void) prefixq_and_encode(dst->encoding()); 5224 emit_arith(0x81, 0xD8, dst, imm32); 5225} 5226 5227void Assembler::sbbq(Register dst, Address src) { 5228 InstructionMark im(this); 5229 prefixq(src, dst); 5230 emit_byte(0x1B); 5231 emit_operand(dst, src); 5232} 5233 5234void Assembler::sbbq(Register dst, Register src) { 5235 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5236 emit_arith(0x1B, 0xC0, dst, src); 5237} 5238 5239void Assembler::shlq(Register dst, int imm8) { 5240 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5241 int encode = prefixq_and_encode(dst->encoding()); 5242 if (imm8 == 1) { 5243 emit_byte(0xD1); 5244 emit_byte(0xE0 | encode); 5245 } else { 5246 emit_byte(0xC1); 5247 emit_byte(0xE0 | encode); 5248 emit_byte(imm8); 5249 } 5250} 5251 5252void Assembler::shlq(Register dst) { 5253 int encode = prefixq_and_encode(dst->encoding()); 5254 emit_byte(0xD3); 5255 emit_byte(0xE0 | encode); 5256} 5257 5258void Assembler::shrq(Register dst, int imm8) { 5259 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5260 int encode = prefixq_and_encode(dst->encoding()); 5261 emit_byte(0xC1); 5262 emit_byte(0xE8 | encode); 5263 emit_byte(imm8); 5264} 5265 5266void Assembler::shrq(Register dst) { 5267 int encode = prefixq_and_encode(dst->encoding()); 5268 emit_byte(0xD3); 5269 emit_byte(0xE8 | encode); 5270} 5271 5272void Assembler::subq(Address dst, int32_t imm32) { 5273 InstructionMark im(this); 5274 prefixq(dst); 5275 emit_arith_operand(0x81, rbp, dst, imm32); 5276} 5277 5278void Assembler::subq(Address dst, Register src) { 5279 InstructionMark im(this); 5280 prefixq(dst, src); 5281 emit_byte(0x29); 5282 emit_operand(src, dst); 5283} 5284 5285void Assembler::subq(Register dst, int32_t imm32) { 5286 (void) prefixq_and_encode(dst->encoding()); 5287 emit_arith(0x81, 0xE8, dst, imm32); 5288} 5289 5290// Force generation of a 4 byte immediate value even if it fits into 8bit 5291void Assembler::subq_imm32(Register dst, int32_t imm32) { 5292 (void) prefixq_and_encode(dst->encoding()); 5293 emit_arith_imm32(0x81, 0xE8, dst, imm32); 5294} 5295 5296void Assembler::subq(Register dst, Address src) { 5297 InstructionMark im(this); 5298 prefixq(src, dst); 5299 emit_byte(0x2B); 5300 emit_operand(dst, src); 5301} 5302 5303void Assembler::subq(Register dst, Register src) { 5304 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5305 emit_arith(0x2B, 0xC0, dst, src); 5306} 5307 5308void Assembler::testq(Register dst, int32_t imm32) { 5309 // not using emit_arith because test 5310 // doesn't support sign-extension of 5311 // 8bit operands 5312 int encode = dst->encoding(); 5313 if (encode == 0) { 5314 prefix(REX_W); 5315 emit_byte(0xA9); 5316 } else { 5317 encode = prefixq_and_encode(encode); 5318 emit_byte(0xF7); 5319 emit_byte(0xC0 | encode); 5320 } 5321 emit_long(imm32); 5322} 5323 5324void Assembler::testq(Register dst, Register src) { 5325 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5326 emit_arith(0x85, 0xC0, dst, src); 5327} 5328 5329void Assembler::xaddq(Address dst, Register src) { 5330 InstructionMark im(this); 5331 prefixq(dst, src); 5332 emit_byte(0x0F); 5333 emit_byte(0xC1); 5334 emit_operand(src, dst); 5335} 5336 5337void Assembler::xchgq(Register dst, Address src) { 5338 InstructionMark im(this); 5339 prefixq(src, dst); 5340 emit_byte(0x87); 5341 emit_operand(dst, src); 5342} 5343 5344void Assembler::xchgq(Register dst, Register src) { 5345 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5346 emit_byte(0x87); 5347 emit_byte(0xc0 | encode); 5348} 5349 5350void Assembler::xorq(Register dst, Register src) { 5351 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5352 emit_arith(0x33, 0xC0, dst, src); 5353} 5354 5355void Assembler::xorq(Register dst, Address src) { 5356 InstructionMark im(this); 5357 prefixq(src, dst); 5358 emit_byte(0x33); 5359 emit_operand(dst, src); 5360} 5361 5362#endif // !LP64 5363 5364static Assembler::Condition reverse[] = { 5365 Assembler::noOverflow /* overflow = 0x0 */ , 5366 Assembler::overflow /* noOverflow = 0x1 */ , 5367 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 5368 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 5369 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 5370 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 5371 Assembler::above /* belowEqual = 0x6 */ , 5372 Assembler::belowEqual /* above = 0x7 */ , 5373 Assembler::positive /* negative = 0x8 */ , 5374 Assembler::negative /* positive = 0x9 */ , 5375 Assembler::noParity /* parity = 0xa */ , 5376 Assembler::parity /* noParity = 0xb */ , 5377 Assembler::greaterEqual /* less = 0xc */ , 5378 Assembler::less /* greaterEqual = 0xd */ , 5379 Assembler::greater /* lessEqual = 0xe */ , 5380 Assembler::lessEqual /* greater = 0xf, */ 5381 5382}; 5383 5384 5385// Implementation of MacroAssembler 5386 5387// First all the versions that have distinct versions depending on 32/64 bit 5388// Unless the difference is trivial (1 line or so). 5389 5390#ifndef _LP64 5391 5392// 32bit versions 5393 5394Address MacroAssembler::as_Address(AddressLiteral adr) { 5395 return Address(adr.target(), adr.rspec()); 5396} 5397 5398Address MacroAssembler::as_Address(ArrayAddress adr) { 5399 return Address::make_array(adr); 5400} 5401 5402int MacroAssembler::biased_locking_enter(Register lock_reg, 5403 Register obj_reg, 5404 Register swap_reg, 5405 Register tmp_reg, 5406 bool swap_reg_contains_mark, 5407 Label& done, 5408 Label* slow_case, 5409 BiasedLockingCounters* counters) { 5410 assert(UseBiasedLocking, "why call this otherwise?"); 5411 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 5412 assert_different_registers(lock_reg, obj_reg, swap_reg); 5413 5414 if (PrintBiasedLockingStatistics && counters == NULL) 5415 counters = BiasedLocking::counters(); 5416 5417 bool need_tmp_reg = false; 5418 if (tmp_reg == noreg) { 5419 need_tmp_reg = true; 5420 tmp_reg = lock_reg; 5421 } else { 5422 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5423 } 5424 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5425 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5426 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 5427 Address saved_mark_addr(lock_reg, 0); 5428 5429 // Biased locking 5430 // See whether the lock is currently biased toward our thread and 5431 // whether the epoch is still valid 5432 // Note that the runtime guarantees sufficient alignment of JavaThread 5433 // pointers to allow age to be placed into low bits 5434 // First check to see whether biasing is even enabled for this object 5435 Label cas_label; 5436 int null_check_offset = -1; 5437 if (!swap_reg_contains_mark) { 5438 null_check_offset = offset(); 5439 movl(swap_reg, mark_addr); 5440 } 5441 if (need_tmp_reg) { 5442 push(tmp_reg); 5443 } 5444 movl(tmp_reg, swap_reg); 5445 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5446 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 5447 if (need_tmp_reg) { 5448 pop(tmp_reg); 5449 } 5450 jcc(Assembler::notEqual, cas_label); 5451 // The bias pattern is present in the object's header. Need to check 5452 // whether the bias owner and the epoch are both still current. 5453 // Note that because there is no current thread register on x86 we 5454 // need to store off the mark word we read out of the object to 5455 // avoid reloading it and needing to recheck invariants below. This 5456 // store is unfortunate but it makes the overall code shorter and 5457 // simpler. 5458 movl(saved_mark_addr, swap_reg); 5459 if (need_tmp_reg) { 5460 push(tmp_reg); 5461 } 5462 get_thread(tmp_reg); 5463 xorl(swap_reg, tmp_reg); 5464 if (swap_reg_contains_mark) { 5465 null_check_offset = offset(); 5466 } 5467 movl(tmp_reg, klass_addr); 5468 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5469 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 5470 if (need_tmp_reg) { 5471 pop(tmp_reg); 5472 } 5473 if (counters != NULL) { 5474 cond_inc32(Assembler::zero, 5475 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 5476 } 5477 jcc(Assembler::equal, done); 5478 5479 Label try_revoke_bias; 5480 Label try_rebias; 5481 5482 // At this point we know that the header has the bias pattern and 5483 // that we are not the bias owner in the current epoch. We need to 5484 // figure out more details about the state of the header in order to 5485 // know what operations can be legally performed on the object's 5486 // header. 5487 5488 // If the low three bits in the xor result aren't clear, that means 5489 // the prototype header is no longer biased and we have to revoke 5490 // the bias on this object. 5491 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5492 jcc(Assembler::notZero, try_revoke_bias); 5493 5494 // Biasing is still enabled for this data type. See whether the 5495 // epoch of the current bias is still valid, meaning that the epoch 5496 // bits of the mark word are equal to the epoch bits of the 5497 // prototype header. (Note that the prototype header's epoch bits 5498 // only change at a safepoint.) If not, attempt to rebias the object 5499 // toward the current thread. Note that we must be absolutely sure 5500 // that the current epoch is invalid in order to do this because 5501 // otherwise the manipulations it performs on the mark word are 5502 // illegal. 5503 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5504 jcc(Assembler::notZero, try_rebias); 5505 5506 // The epoch of the current bias is still valid but we know nothing 5507 // about the owner; it might be set or it might be clear. Try to 5508 // acquire the bias of the object using an atomic operation. If this 5509 // fails we will go in to the runtime to revoke the object's bias. 5510 // Note that we first construct the presumed unbiased header so we 5511 // don't accidentally blow away another thread's valid bias. 5512 movl(swap_reg, saved_mark_addr); 5513 andl(swap_reg, 5514 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5515 if (need_tmp_reg) { 5516 push(tmp_reg); 5517 } 5518 get_thread(tmp_reg); 5519 orl(tmp_reg, swap_reg); 5520 if (os::is_MP()) { 5521 lock(); 5522 } 5523 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5524 if (need_tmp_reg) { 5525 pop(tmp_reg); 5526 } 5527 // If the biasing toward our thread failed, this means that 5528 // another thread succeeded in biasing it toward itself and we 5529 // need to revoke that bias. The revocation will occur in the 5530 // interpreter runtime in the slow case. 5531 if (counters != NULL) { 5532 cond_inc32(Assembler::zero, 5533 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5534 } 5535 if (slow_case != NULL) { 5536 jcc(Assembler::notZero, *slow_case); 5537 } 5538 jmp(done); 5539 5540 bind(try_rebias); 5541 // At this point we know the epoch has expired, meaning that the 5542 // current "bias owner", if any, is actually invalid. Under these 5543 // circumstances _only_, we are allowed to use the current header's 5544 // value as the comparison value when doing the cas to acquire the 5545 // bias in the current epoch. In other words, we allow transfer of 5546 // the bias from one thread to another directly in this situation. 5547 // 5548 // FIXME: due to a lack of registers we currently blow away the age 5549 // bits in this situation. Should attempt to preserve them. 5550 if (need_tmp_reg) { 5551 push(tmp_reg); 5552 } 5553 get_thread(tmp_reg); 5554 movl(swap_reg, klass_addr); 5555 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5556 movl(swap_reg, saved_mark_addr); 5557 if (os::is_MP()) { 5558 lock(); 5559 } 5560 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5561 if (need_tmp_reg) { 5562 pop(tmp_reg); 5563 } 5564 // If the biasing toward our thread failed, then another thread 5565 // succeeded in biasing it toward itself and we need to revoke that 5566 // bias. The revocation will occur in the runtime in the slow case. 5567 if (counters != NULL) { 5568 cond_inc32(Assembler::zero, 5569 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5570 } 5571 if (slow_case != NULL) { 5572 jcc(Assembler::notZero, *slow_case); 5573 } 5574 jmp(done); 5575 5576 bind(try_revoke_bias); 5577 // The prototype mark in the klass doesn't have the bias bit set any 5578 // more, indicating that objects of this data type are not supposed 5579 // to be biased any more. We are going to try to reset the mark of 5580 // this object to the prototype value and fall through to the 5581 // CAS-based locking scheme. Note that if our CAS fails, it means 5582 // that another thread raced us for the privilege of revoking the 5583 // bias of this particular object, so it's okay to continue in the 5584 // normal locking code. 5585 // 5586 // FIXME: due to a lack of registers we currently blow away the age 5587 // bits in this situation. Should attempt to preserve them. 5588 movl(swap_reg, saved_mark_addr); 5589 if (need_tmp_reg) { 5590 push(tmp_reg); 5591 } 5592 movl(tmp_reg, klass_addr); 5593 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5594 if (os::is_MP()) { 5595 lock(); 5596 } 5597 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5598 if (need_tmp_reg) { 5599 pop(tmp_reg); 5600 } 5601 // Fall through to the normal CAS-based lock, because no matter what 5602 // the result of the above CAS, some thread must have succeeded in 5603 // removing the bias bit from the object's header. 5604 if (counters != NULL) { 5605 cond_inc32(Assembler::zero, 5606 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5607 } 5608 5609 bind(cas_label); 5610 5611 return null_check_offset; 5612} 5613void MacroAssembler::call_VM_leaf_base(address entry_point, 5614 int number_of_arguments) { 5615 call(RuntimeAddress(entry_point)); 5616 increment(rsp, number_of_arguments * wordSize); 5617} 5618 5619void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 5620 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5621} 5622 5623void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 5624 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5625} 5626 5627void MacroAssembler::cmpoop(Address src1, jobject obj) { 5628 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5629} 5630 5631void MacroAssembler::cmpoop(Register src1, jobject obj) { 5632 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5633} 5634 5635void MacroAssembler::extend_sign(Register hi, Register lo) { 5636 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5637 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5638 cdql(); 5639 } else { 5640 movl(hi, lo); 5641 sarl(hi, 31); 5642 } 5643} 5644 5645void MacroAssembler::jC2(Register tmp, Label& L) { 5646 // set parity bit if FPU flag C2 is set (via rax) 5647 save_rax(tmp); 5648 fwait(); fnstsw_ax(); 5649 sahf(); 5650 restore_rax(tmp); 5651 // branch 5652 jcc(Assembler::parity, L); 5653} 5654 5655void MacroAssembler::jnC2(Register tmp, Label& L) { 5656 // set parity bit if FPU flag C2 is set (via rax) 5657 save_rax(tmp); 5658 fwait(); fnstsw_ax(); 5659 sahf(); 5660 restore_rax(tmp); 5661 // branch 5662 jcc(Assembler::noParity, L); 5663} 5664 5665// 32bit can do a case table jump in one instruction but we no longer allow the base 5666// to be installed in the Address class 5667void MacroAssembler::jump(ArrayAddress entry) { 5668 jmp(as_Address(entry)); 5669} 5670 5671// Note: y_lo will be destroyed 5672void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5673 // Long compare for Java (semantics as described in JVM spec.) 5674 Label high, low, done; 5675 5676 cmpl(x_hi, y_hi); 5677 jcc(Assembler::less, low); 5678 jcc(Assembler::greater, high); 5679 // x_hi is the return register 5680 xorl(x_hi, x_hi); 5681 cmpl(x_lo, y_lo); 5682 jcc(Assembler::below, low); 5683 jcc(Assembler::equal, done); 5684 5685 bind(high); 5686 xorl(x_hi, x_hi); 5687 increment(x_hi); 5688 jmp(done); 5689 5690 bind(low); 5691 xorl(x_hi, x_hi); 5692 decrementl(x_hi); 5693 5694 bind(done); 5695} 5696 5697void MacroAssembler::lea(Register dst, AddressLiteral src) { 5698 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5699} 5700 5701void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5702 // leal(dst, as_Address(adr)); 5703 // see note in movl as to why we must use a move 5704 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5705} 5706 5707void MacroAssembler::leave() { 5708 mov(rsp, rbp); 5709 pop(rbp); 5710} 5711 5712void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5713 // Multiplication of two Java long values stored on the stack 5714 // as illustrated below. Result is in rdx:rax. 5715 // 5716 // rsp ---> [ ?? ] \ \ 5717 // .... | y_rsp_offset | 5718 // [ y_lo ] / (in bytes) | x_rsp_offset 5719 // [ y_hi ] | (in bytes) 5720 // .... | 5721 // [ x_lo ] / 5722 // [ x_hi ] 5723 // .... 5724 // 5725 // Basic idea: lo(result) = lo(x_lo * y_lo) 5726 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5727 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5728 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5729 Label quick; 5730 // load x_hi, y_hi and check if quick 5731 // multiplication is possible 5732 movl(rbx, x_hi); 5733 movl(rcx, y_hi); 5734 movl(rax, rbx); 5735 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5736 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5737 // do full multiplication 5738 // 1st step 5739 mull(y_lo); // x_hi * y_lo 5740 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5741 // 2nd step 5742 movl(rax, x_lo); 5743 mull(rcx); // x_lo * y_hi 5744 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5745 // 3rd step 5746 bind(quick); // note: rbx, = 0 if quick multiply! 5747 movl(rax, x_lo); 5748 mull(y_lo); // x_lo * y_lo 5749 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5750} 5751 5752void MacroAssembler::lneg(Register hi, Register lo) { 5753 negl(lo); 5754 adcl(hi, 0); 5755 negl(hi); 5756} 5757 5758void MacroAssembler::lshl(Register hi, Register lo) { 5759 // Java shift left long support (semantics as described in JVM spec., p.305) 5760 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5761 // shift value is in rcx ! 5762 assert(hi != rcx, "must not use rcx"); 5763 assert(lo != rcx, "must not use rcx"); 5764 const Register s = rcx; // shift count 5765 const int n = BitsPerWord; 5766 Label L; 5767 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5768 cmpl(s, n); // if (s < n) 5769 jcc(Assembler::less, L); // else (s >= n) 5770 movl(hi, lo); // x := x << n 5771 xorl(lo, lo); 5772 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5773 bind(L); // s (mod n) < n 5774 shldl(hi, lo); // x := x << s 5775 shll(lo); 5776} 5777 5778 5779void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5780 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5781 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5782 assert(hi != rcx, "must not use rcx"); 5783 assert(lo != rcx, "must not use rcx"); 5784 const Register s = rcx; // shift count 5785 const int n = BitsPerWord; 5786 Label L; 5787 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5788 cmpl(s, n); // if (s < n) 5789 jcc(Assembler::less, L); // else (s >= n) 5790 movl(lo, hi); // x := x >> n 5791 if (sign_extension) sarl(hi, 31); 5792 else xorl(hi, hi); 5793 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5794 bind(L); // s (mod n) < n 5795 shrdl(lo, hi); // x := x >> s 5796 if (sign_extension) sarl(hi); 5797 else shrl(hi); 5798} 5799 5800void MacroAssembler::movoop(Register dst, jobject obj) { 5801 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5802} 5803 5804void MacroAssembler::movoop(Address dst, jobject obj) { 5805 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5806} 5807 5808void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 5809 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5810} 5811 5812void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 5813 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5814} 5815 5816void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5817 if (src.is_lval()) { 5818 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5819 } else { 5820 movl(dst, as_Address(src)); 5821 } 5822} 5823 5824void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5825 movl(as_Address(dst), src); 5826} 5827 5828void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5829 movl(dst, as_Address(src)); 5830} 5831 5832// src should NEVER be a real pointer. Use AddressLiteral for true pointers 5833void MacroAssembler::movptr(Address dst, intptr_t src) { 5834 movl(dst, src); 5835} 5836 5837 5838void MacroAssembler::pop_callee_saved_registers() { 5839 pop(rcx); 5840 pop(rdx); 5841 pop(rdi); 5842 pop(rsi); 5843} 5844 5845void MacroAssembler::pop_fTOS() { 5846 fld_d(Address(rsp, 0)); 5847 addl(rsp, 2 * wordSize); 5848} 5849 5850void MacroAssembler::push_callee_saved_registers() { 5851 push(rsi); 5852 push(rdi); 5853 push(rdx); 5854 push(rcx); 5855} 5856 5857void MacroAssembler::push_fTOS() { 5858 subl(rsp, 2 * wordSize); 5859 fstp_d(Address(rsp, 0)); 5860} 5861 5862 5863void MacroAssembler::pushoop(jobject obj) { 5864 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5865} 5866 5867void MacroAssembler::pushklass(Metadata* obj) { 5868 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 5869} 5870 5871void MacroAssembler::pushptr(AddressLiteral src) { 5872 if (src.is_lval()) { 5873 push_literal32((int32_t)src.target(), src.rspec()); 5874 } else { 5875 pushl(as_Address(src)); 5876 } 5877} 5878 5879void MacroAssembler::set_word_if_not_zero(Register dst) { 5880 xorl(dst, dst); 5881 set_byte_if_not_zero(dst); 5882} 5883 5884static void pass_arg0(MacroAssembler* masm, Register arg) { 5885 masm->push(arg); 5886} 5887 5888static void pass_arg1(MacroAssembler* masm, Register arg) { 5889 masm->push(arg); 5890} 5891 5892static void pass_arg2(MacroAssembler* masm, Register arg) { 5893 masm->push(arg); 5894} 5895 5896static void pass_arg3(MacroAssembler* masm, Register arg) { 5897 masm->push(arg); 5898} 5899 5900#ifndef PRODUCT 5901extern "C" void findpc(intptr_t x); 5902#endif 5903 5904void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5905 // In order to get locks to work, we need to fake a in_VM state 5906 JavaThread* thread = JavaThread::current(); 5907 JavaThreadState saved_state = thread->thread_state(); 5908 thread->set_thread_state(_thread_in_vm); 5909 if (ShowMessageBoxOnError) { 5910 JavaThread* thread = JavaThread::current(); 5911 JavaThreadState saved_state = thread->thread_state(); 5912 thread->set_thread_state(_thread_in_vm); 5913 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5914 ttyLocker ttyl; 5915 BytecodeCounter::print(); 5916 } 5917 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5918 // This is the value of eip which points to where verify_oop will return. 5919 if (os::message_box(msg, "Execution stopped, print registers?")) { 5920 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 5921 BREAKPOINT; 5922 } 5923 } else { 5924 ttyLocker ttyl; 5925 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5926 } 5927 // Don't assert holding the ttyLock 5928 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5929 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5930} 5931 5932void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 5933 ttyLocker ttyl; 5934 FlagSetting fs(Debugging, true); 5935 tty->print_cr("eip = 0x%08x", eip); 5936#ifndef PRODUCT 5937 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5938 tty->cr(); 5939 findpc(eip); 5940 tty->cr(); 5941 } 5942#endif 5943#define PRINT_REG(rax) \ 5944 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 5945 PRINT_REG(rax); 5946 PRINT_REG(rbx); 5947 PRINT_REG(rcx); 5948 PRINT_REG(rdx); 5949 PRINT_REG(rdi); 5950 PRINT_REG(rsi); 5951 PRINT_REG(rbp); 5952 PRINT_REG(rsp); 5953#undef PRINT_REG 5954 // Print some words near top of staack. 5955 int* dump_sp = (int*) rsp; 5956 for (int col1 = 0; col1 < 8; col1++) { 5957 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 5958 os::print_location(tty, *dump_sp++); 5959 } 5960 for (int row = 0; row < 16; row++) { 5961 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 5962 for (int col = 0; col < 8; col++) { 5963 tty->print(" 0x%08x", *dump_sp++); 5964 } 5965 tty->cr(); 5966 } 5967 // Print some instructions around pc: 5968 Disassembler::decode((address)eip-64, (address)eip); 5969 tty->print_cr("--------"); 5970 Disassembler::decode((address)eip, (address)eip+32); 5971} 5972 5973void MacroAssembler::stop(const char* msg) { 5974 ExternalAddress message((address)msg); 5975 // push address of message 5976 pushptr(message.addr()); 5977 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5978 pusha(); // push registers 5979 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5980 hlt(); 5981} 5982 5983void MacroAssembler::warn(const char* msg) { 5984 push_CPU_state(); 5985 5986 ExternalAddress message((address) msg); 5987 // push address of message 5988 pushptr(message.addr()); 5989 5990 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5991 addl(rsp, wordSize); // discard argument 5992 pop_CPU_state(); 5993} 5994 5995void MacroAssembler::print_state() { 5996 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5997 pusha(); // push registers 5998 5999 push_CPU_state(); 6000 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 6001 pop_CPU_state(); 6002 6003 popa(); 6004 addl(rsp, wordSize); 6005} 6006 6007#else // _LP64 6008 6009// 64 bit versions 6010 6011Address MacroAssembler::as_Address(AddressLiteral adr) { 6012 // amd64 always does this as a pc-rel 6013 // we can be absolute or disp based on the instruction type 6014 // jmp/call are displacements others are absolute 6015 assert(!adr.is_lval(), "must be rval"); 6016 assert(reachable(adr), "must be"); 6017 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 6018 6019} 6020 6021Address MacroAssembler::as_Address(ArrayAddress adr) { 6022 AddressLiteral base = adr.base(); 6023 lea(rscratch1, base); 6024 Address index = adr.index(); 6025 assert(index._disp == 0, "must not have disp"); // maybe it can? 6026 Address array(rscratch1, index._index, index._scale, index._disp); 6027 return array; 6028} 6029 6030int MacroAssembler::biased_locking_enter(Register lock_reg, 6031 Register obj_reg, 6032 Register swap_reg, 6033 Register tmp_reg, 6034 bool swap_reg_contains_mark, 6035 Label& done, 6036 Label* slow_case, 6037 BiasedLockingCounters* counters) { 6038 assert(UseBiasedLocking, "why call this otherwise?"); 6039 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 6040 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 6041 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 6042 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 6043 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 6044 Address saved_mark_addr(lock_reg, 0); 6045 6046 if (PrintBiasedLockingStatistics && counters == NULL) 6047 counters = BiasedLocking::counters(); 6048 6049 // Biased locking 6050 // See whether the lock is currently biased toward our thread and 6051 // whether the epoch is still valid 6052 // Note that the runtime guarantees sufficient alignment of JavaThread 6053 // pointers to allow age to be placed into low bits 6054 // First check to see whether biasing is even enabled for this object 6055 Label cas_label; 6056 int null_check_offset = -1; 6057 if (!swap_reg_contains_mark) { 6058 null_check_offset = offset(); 6059 movq(swap_reg, mark_addr); 6060 } 6061 movq(tmp_reg, swap_reg); 6062 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6063 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 6064 jcc(Assembler::notEqual, cas_label); 6065 // The bias pattern is present in the object's header. Need to check 6066 // whether the bias owner and the epoch are both still current. 6067 load_prototype_header(tmp_reg, obj_reg); 6068 orq(tmp_reg, r15_thread); 6069 xorq(tmp_reg, swap_reg); 6070 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 6071 if (counters != NULL) { 6072 cond_inc32(Assembler::zero, 6073 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6074 } 6075 jcc(Assembler::equal, done); 6076 6077 Label try_revoke_bias; 6078 Label try_rebias; 6079 6080 // At this point we know that the header has the bias pattern and 6081 // that we are not the bias owner in the current epoch. We need to 6082 // figure out more details about the state of the header in order to 6083 // know what operations can be legally performed on the object's 6084 // header. 6085 6086 // If the low three bits in the xor result aren't clear, that means 6087 // the prototype header is no longer biased and we have to revoke 6088 // the bias on this object. 6089 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6090 jcc(Assembler::notZero, try_revoke_bias); 6091 6092 // Biasing is still enabled for this data type. See whether the 6093 // epoch of the current bias is still valid, meaning that the epoch 6094 // bits of the mark word are equal to the epoch bits of the 6095 // prototype header. (Note that the prototype header's epoch bits 6096 // only change at a safepoint.) If not, attempt to rebias the object 6097 // toward the current thread. Note that we must be absolutely sure 6098 // that the current epoch is invalid in order to do this because 6099 // otherwise the manipulations it performs on the mark word are 6100 // illegal. 6101 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 6102 jcc(Assembler::notZero, try_rebias); 6103 6104 // The epoch of the current bias is still valid but we know nothing 6105 // about the owner; it might be set or it might be clear. Try to 6106 // acquire the bias of the object using an atomic operation. If this 6107 // fails we will go in to the runtime to revoke the object's bias. 6108 // Note that we first construct the presumed unbiased header so we 6109 // don't accidentally blow away another thread's valid bias. 6110 andq(swap_reg, 6111 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 6112 movq(tmp_reg, swap_reg); 6113 orq(tmp_reg, r15_thread); 6114 if (os::is_MP()) { 6115 lock(); 6116 } 6117 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6118 // If the biasing toward our thread failed, this means that 6119 // another thread succeeded in biasing it toward itself and we 6120 // need to revoke that bias. The revocation will occur in the 6121 // interpreter runtime in the slow case. 6122 if (counters != NULL) { 6123 cond_inc32(Assembler::zero, 6124 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6125 } 6126 if (slow_case != NULL) { 6127 jcc(Assembler::notZero, *slow_case); 6128 } 6129 jmp(done); 6130 6131 bind(try_rebias); 6132 // At this point we know the epoch has expired, meaning that the 6133 // current "bias owner", if any, is actually invalid. Under these 6134 // circumstances _only_, we are allowed to use the current header's 6135 // value as the comparison value when doing the cas to acquire the 6136 // bias in the current epoch. In other words, we allow transfer of 6137 // the bias from one thread to another directly in this situation. 6138 // 6139 // FIXME: due to a lack of registers we currently blow away the age 6140 // bits in this situation. Should attempt to preserve them. 6141 load_prototype_header(tmp_reg, obj_reg); 6142 orq(tmp_reg, r15_thread); 6143 if (os::is_MP()) { 6144 lock(); 6145 } 6146 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6147 // If the biasing toward our thread failed, then another thread 6148 // succeeded in biasing it toward itself and we need to revoke that 6149 // bias. The revocation will occur in the runtime in the slow case. 6150 if (counters != NULL) { 6151 cond_inc32(Assembler::zero, 6152 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 6153 } 6154 if (slow_case != NULL) { 6155 jcc(Assembler::notZero, *slow_case); 6156 } 6157 jmp(done); 6158 6159 bind(try_revoke_bias); 6160 // The prototype mark in the klass doesn't have the bias bit set any 6161 // more, indicating that objects of this data type are not supposed 6162 // to be biased any more. We are going to try to reset the mark of 6163 // this object to the prototype value and fall through to the 6164 // CAS-based locking scheme. Note that if our CAS fails, it means 6165 // that another thread raced us for the privilege of revoking the 6166 // bias of this particular object, so it's okay to continue in the 6167 // normal locking code. 6168 // 6169 // FIXME: due to a lack of registers we currently blow away the age 6170 // bits in this situation. Should attempt to preserve them. 6171 load_prototype_header(tmp_reg, obj_reg); 6172 if (os::is_MP()) { 6173 lock(); 6174 } 6175 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6176 // Fall through to the normal CAS-based lock, because no matter what 6177 // the result of the above CAS, some thread must have succeeded in 6178 // removing the bias bit from the object's header. 6179 if (counters != NULL) { 6180 cond_inc32(Assembler::zero, 6181 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 6182 } 6183 6184 bind(cas_label); 6185 6186 return null_check_offset; 6187} 6188 6189void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 6190 Label L, E; 6191 6192#ifdef _WIN64 6193 // Windows always allocates space for it's register args 6194 assert(num_args <= 4, "only register arguments supported"); 6195 subq(rsp, frame::arg_reg_save_area_bytes); 6196#endif 6197 6198 // Align stack if necessary 6199 testl(rsp, 15); 6200 jcc(Assembler::zero, L); 6201 6202 subq(rsp, 8); 6203 { 6204 call(RuntimeAddress(entry_point)); 6205 } 6206 addq(rsp, 8); 6207 jmp(E); 6208 6209 bind(L); 6210 { 6211 call(RuntimeAddress(entry_point)); 6212 } 6213 6214 bind(E); 6215 6216#ifdef _WIN64 6217 // restore stack pointer 6218 addq(rsp, frame::arg_reg_save_area_bytes); 6219#endif 6220 6221} 6222 6223void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 6224 assert(!src2.is_lval(), "should use cmpptr"); 6225 6226 if (reachable(src2)) { 6227 cmpq(src1, as_Address(src2)); 6228 } else { 6229 lea(rscratch1, src2); 6230 Assembler::cmpq(src1, Address(rscratch1, 0)); 6231 } 6232} 6233 6234int MacroAssembler::corrected_idivq(Register reg) { 6235 // Full implementation of Java ldiv and lrem; checks for special 6236 // case as described in JVM spec., p.243 & p.271. The function 6237 // returns the (pc) offset of the idivl instruction - may be needed 6238 // for implicit exceptions. 6239 // 6240 // normal case special case 6241 // 6242 // input : rax: dividend min_long 6243 // reg: divisor (may not be eax/edx) -1 6244 // 6245 // output: rax: quotient (= rax idiv reg) min_long 6246 // rdx: remainder (= rax irem reg) 0 6247 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 6248 static const int64_t min_long = 0x8000000000000000; 6249 Label normal_case, special_case; 6250 6251 // check for special case 6252 cmp64(rax, ExternalAddress((address) &min_long)); 6253 jcc(Assembler::notEqual, normal_case); 6254 xorl(rdx, rdx); // prepare rdx for possible special case (where 6255 // remainder = 0) 6256 cmpq(reg, -1); 6257 jcc(Assembler::equal, special_case); 6258 6259 // handle normal case 6260 bind(normal_case); 6261 cdqq(); 6262 int idivq_offset = offset(); 6263 idivq(reg); 6264 6265 // normal and special case exit 6266 bind(special_case); 6267 6268 return idivq_offset; 6269} 6270 6271void MacroAssembler::decrementq(Register reg, int value) { 6272 if (value == min_jint) { subq(reg, value); return; } 6273 if (value < 0) { incrementq(reg, -value); return; } 6274 if (value == 0) { ; return; } 6275 if (value == 1 && UseIncDec) { decq(reg) ; return; } 6276 /* else */ { subq(reg, value) ; return; } 6277} 6278 6279void MacroAssembler::decrementq(Address dst, int value) { 6280 if (value == min_jint) { subq(dst, value); return; } 6281 if (value < 0) { incrementq(dst, -value); return; } 6282 if (value == 0) { ; return; } 6283 if (value == 1 && UseIncDec) { decq(dst) ; return; } 6284 /* else */ { subq(dst, value) ; return; } 6285} 6286 6287void MacroAssembler::incrementq(Register reg, int value) { 6288 if (value == min_jint) { addq(reg, value); return; } 6289 if (value < 0) { decrementq(reg, -value); return; } 6290 if (value == 0) { ; return; } 6291 if (value == 1 && UseIncDec) { incq(reg) ; return; } 6292 /* else */ { addq(reg, value) ; return; } 6293} 6294 6295void MacroAssembler::incrementq(Address dst, int value) { 6296 if (value == min_jint) { addq(dst, value); return; } 6297 if (value < 0) { decrementq(dst, -value); return; } 6298 if (value == 0) { ; return; } 6299 if (value == 1 && UseIncDec) { incq(dst) ; return; } 6300 /* else */ { addq(dst, value) ; return; } 6301} 6302 6303// 32bit can do a case table jump in one instruction but we no longer allow the base 6304// to be installed in the Address class 6305void MacroAssembler::jump(ArrayAddress entry) { 6306 lea(rscratch1, entry.base()); 6307 Address dispatch = entry.index(); 6308 assert(dispatch._base == noreg, "must be"); 6309 dispatch._base = rscratch1; 6310 jmp(dispatch); 6311} 6312 6313void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 6314 ShouldNotReachHere(); // 64bit doesn't use two regs 6315 cmpq(x_lo, y_lo); 6316} 6317 6318void MacroAssembler::lea(Register dst, AddressLiteral src) { 6319 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6320} 6321 6322void MacroAssembler::lea(Address dst, AddressLiteral adr) { 6323 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 6324 movptr(dst, rscratch1); 6325} 6326 6327void MacroAssembler::leave() { 6328 // %%% is this really better? Why not on 32bit too? 6329 emit_byte(0xC9); // LEAVE 6330} 6331 6332void MacroAssembler::lneg(Register hi, Register lo) { 6333 ShouldNotReachHere(); // 64bit doesn't use two regs 6334 negq(lo); 6335} 6336 6337void MacroAssembler::movoop(Register dst, jobject obj) { 6338 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6339} 6340 6341void MacroAssembler::movoop(Address dst, jobject obj) { 6342 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6343 movq(dst, rscratch1); 6344} 6345 6346void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 6347 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6348} 6349 6350void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 6351 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6352 movq(dst, rscratch1); 6353} 6354 6355void MacroAssembler::movptr(Register dst, AddressLiteral src) { 6356 if (src.is_lval()) { 6357 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6358 } else { 6359 if (reachable(src)) { 6360 movq(dst, as_Address(src)); 6361 } else { 6362 lea(rscratch1, src); 6363 movq(dst, Address(rscratch1,0)); 6364 } 6365 } 6366} 6367 6368void MacroAssembler::movptr(ArrayAddress dst, Register src) { 6369 movq(as_Address(dst), src); 6370} 6371 6372void MacroAssembler::movptr(Register dst, ArrayAddress src) { 6373 movq(dst, as_Address(src)); 6374} 6375 6376// src should NEVER be a real pointer. Use AddressLiteral for true pointers 6377void MacroAssembler::movptr(Address dst, intptr_t src) { 6378 mov64(rscratch1, src); 6379 movq(dst, rscratch1); 6380} 6381 6382// These are mostly for initializing NULL 6383void MacroAssembler::movptr(Address dst, int32_t src) { 6384 movslq(dst, src); 6385} 6386 6387void MacroAssembler::movptr(Register dst, int32_t src) { 6388 mov64(dst, (intptr_t)src); 6389} 6390 6391void MacroAssembler::pushoop(jobject obj) { 6392 movoop(rscratch1, obj); 6393 push(rscratch1); 6394} 6395 6396void MacroAssembler::pushklass(Metadata* obj) { 6397 mov_metadata(rscratch1, obj); 6398 push(rscratch1); 6399} 6400 6401void MacroAssembler::pushptr(AddressLiteral src) { 6402 lea(rscratch1, src); 6403 if (src.is_lval()) { 6404 push(rscratch1); 6405 } else { 6406 pushq(Address(rscratch1, 0)); 6407 } 6408} 6409 6410void MacroAssembler::reset_last_Java_frame(bool clear_fp, 6411 bool clear_pc) { 6412 // we must set sp to zero to clear frame 6413 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6414 // must clear fp, so that compiled frames are not confused; it is 6415 // possible that we need it only for debugging 6416 if (clear_fp) { 6417 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6418 } 6419 6420 if (clear_pc) { 6421 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6422 } 6423} 6424 6425void MacroAssembler::set_last_Java_frame(Register last_java_sp, 6426 Register last_java_fp, 6427 address last_java_pc) { 6428 // determine last_java_sp register 6429 if (!last_java_sp->is_valid()) { 6430 last_java_sp = rsp; 6431 } 6432 6433 // last_java_fp is optional 6434 if (last_java_fp->is_valid()) { 6435 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 6436 last_java_fp); 6437 } 6438 6439 // last_java_pc is optional 6440 if (last_java_pc != NULL) { 6441 Address java_pc(r15_thread, 6442 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 6443 lea(rscratch1, InternalAddress(last_java_pc)); 6444 movptr(java_pc, rscratch1); 6445 } 6446 6447 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6448} 6449 6450static void pass_arg0(MacroAssembler* masm, Register arg) { 6451 if (c_rarg0 != arg ) { 6452 masm->mov(c_rarg0, arg); 6453 } 6454} 6455 6456static void pass_arg1(MacroAssembler* masm, Register arg) { 6457 if (c_rarg1 != arg ) { 6458 masm->mov(c_rarg1, arg); 6459 } 6460} 6461 6462static void pass_arg2(MacroAssembler* masm, Register arg) { 6463 if (c_rarg2 != arg ) { 6464 masm->mov(c_rarg2, arg); 6465 } 6466} 6467 6468static void pass_arg3(MacroAssembler* masm, Register arg) { 6469 if (c_rarg3 != arg ) { 6470 masm->mov(c_rarg3, arg); 6471 } 6472} 6473 6474void MacroAssembler::stop(const char* msg) { 6475 address rip = pc(); 6476 pusha(); // get regs on stack 6477 lea(c_rarg0, ExternalAddress((address) msg)); 6478 lea(c_rarg1, InternalAddress(rip)); 6479 movq(c_rarg2, rsp); // pass pointer to regs array 6480 andq(rsp, -16); // align stack as required by ABI 6481 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 6482 hlt(); 6483} 6484 6485void MacroAssembler::warn(const char* msg) { 6486 push(rbp); 6487 movq(rbp, rsp); 6488 andq(rsp, -16); // align stack as required by push_CPU_state and call 6489 push_CPU_state(); // keeps alignment at 16 bytes 6490 lea(c_rarg0, ExternalAddress((address) msg)); 6491 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 6492 pop_CPU_state(); 6493 mov(rsp, rbp); 6494 pop(rbp); 6495} 6496 6497void MacroAssembler::print_state() { 6498 address rip = pc(); 6499 pusha(); // get regs on stack 6500 push(rbp); 6501 movq(rbp, rsp); 6502 andq(rsp, -16); // align stack as required by push_CPU_state and call 6503 push_CPU_state(); // keeps alignment at 16 bytes 6504 6505 lea(c_rarg0, InternalAddress(rip)); 6506 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 6507 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 6508 6509 pop_CPU_state(); 6510 mov(rsp, rbp); 6511 pop(rbp); 6512 popa(); 6513} 6514 6515#ifndef PRODUCT 6516extern "C" void findpc(intptr_t x); 6517#endif 6518 6519void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 6520 // In order to get locks to work, we need to fake a in_VM state 6521 if (ShowMessageBoxOnError) { 6522 JavaThread* thread = JavaThread::current(); 6523 JavaThreadState saved_state = thread->thread_state(); 6524 thread->set_thread_state(_thread_in_vm); 6525#ifndef PRODUCT 6526 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 6527 ttyLocker ttyl; 6528 BytecodeCounter::print(); 6529 } 6530#endif 6531 // To see where a verify_oop failed, get $ebx+40/X for this frame. 6532 // XXX correct this offset for amd64 6533 // This is the value of eip which points to where verify_oop will return. 6534 if (os::message_box(msg, "Execution stopped, print registers?")) { 6535 print_state64(pc, regs); 6536 BREAKPOINT; 6537 assert(false, "start up GDB"); 6538 } 6539 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 6540 } else { 6541 ttyLocker ttyl; 6542 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 6543 msg); 6544 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 6545 } 6546} 6547 6548void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 6549 ttyLocker ttyl; 6550 FlagSetting fs(Debugging, true); 6551 tty->print_cr("rip = 0x%016lx", pc); 6552#ifndef PRODUCT 6553 tty->cr(); 6554 findpc(pc); 6555 tty->cr(); 6556#endif 6557#define PRINT_REG(rax, value) \ 6558 { tty->print("%s = ", #rax); os::print_location(tty, value); } 6559 PRINT_REG(rax, regs[15]); 6560 PRINT_REG(rbx, regs[12]); 6561 PRINT_REG(rcx, regs[14]); 6562 PRINT_REG(rdx, regs[13]); 6563 PRINT_REG(rdi, regs[8]); 6564 PRINT_REG(rsi, regs[9]); 6565 PRINT_REG(rbp, regs[10]); 6566 PRINT_REG(rsp, regs[11]); 6567 PRINT_REG(r8 , regs[7]); 6568 PRINT_REG(r9 , regs[6]); 6569 PRINT_REG(r10, regs[5]); 6570 PRINT_REG(r11, regs[4]); 6571 PRINT_REG(r12, regs[3]); 6572 PRINT_REG(r13, regs[2]); 6573 PRINT_REG(r14, regs[1]); 6574 PRINT_REG(r15, regs[0]); 6575#undef PRINT_REG 6576 // Print some words near top of staack. 6577 int64_t* rsp = (int64_t*) regs[11]; 6578 int64_t* dump_sp = rsp; 6579 for (int col1 = 0; col1 < 8; col1++) { 6580 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6581 os::print_location(tty, *dump_sp++); 6582 } 6583 for (int row = 0; row < 25; row++) { 6584 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6585 for (int col = 0; col < 4; col++) { 6586 tty->print(" 0x%016lx", *dump_sp++); 6587 } 6588 tty->cr(); 6589 } 6590 // Print some instructions around pc: 6591 Disassembler::decode((address)pc-64, (address)pc); 6592 tty->print_cr("--------"); 6593 Disassembler::decode((address)pc, (address)pc+32); 6594} 6595 6596#endif // _LP64 6597 6598// Now versions that are common to 32/64 bit 6599 6600void MacroAssembler::addptr(Register dst, int32_t imm32) { 6601 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 6602} 6603 6604void MacroAssembler::addptr(Register dst, Register src) { 6605 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6606} 6607 6608void MacroAssembler::addptr(Address dst, Register src) { 6609 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6610} 6611 6612void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6613 if (reachable(src)) { 6614 Assembler::addsd(dst, as_Address(src)); 6615 } else { 6616 lea(rscratch1, src); 6617 Assembler::addsd(dst, Address(rscratch1, 0)); 6618 } 6619} 6620 6621void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6622 if (reachable(src)) { 6623 addss(dst, as_Address(src)); 6624 } else { 6625 lea(rscratch1, src); 6626 addss(dst, Address(rscratch1, 0)); 6627 } 6628} 6629 6630void MacroAssembler::align(int modulus) { 6631 if (offset() % modulus != 0) { 6632 nop(modulus - (offset() % modulus)); 6633 } 6634} 6635 6636void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6637 // Used in sign-masking with aligned address. 6638 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6639 if (reachable(src)) { 6640 Assembler::andpd(dst, as_Address(src)); 6641 } else { 6642 lea(rscratch1, src); 6643 Assembler::andpd(dst, Address(rscratch1, 0)); 6644 } 6645} 6646 6647void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6648 // Used in sign-masking with aligned address. 6649 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6650 if (reachable(src)) { 6651 Assembler::andps(dst, as_Address(src)); 6652 } else { 6653 lea(rscratch1, src); 6654 Assembler::andps(dst, Address(rscratch1, 0)); 6655 } 6656} 6657 6658void MacroAssembler::andptr(Register dst, int32_t imm32) { 6659 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6660} 6661 6662void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6663 pushf(); 6664 if (os::is_MP()) 6665 lock(); 6666 incrementl(counter_addr); 6667 popf(); 6668} 6669 6670// Writes to stack successive pages until offset reached to check for 6671// stack overflow + shadow pages. This clobbers tmp. 6672void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6673 movptr(tmp, rsp); 6674 // Bang stack for total size given plus shadow page size. 6675 // Bang one page at a time because large size can bang beyond yellow and 6676 // red zones. 6677 Label loop; 6678 bind(loop); 6679 movl(Address(tmp, (-os::vm_page_size())), size ); 6680 subptr(tmp, os::vm_page_size()); 6681 subl(size, os::vm_page_size()); 6682 jcc(Assembler::greater, loop); 6683 6684 // Bang down shadow pages too. 6685 // The -1 because we already subtracted 1 page. 6686 for (int i = 0; i< StackShadowPages-1; i++) { 6687 // this could be any sized move but this is can be a debugging crumb 6688 // so the bigger the better. 6689 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6690 } 6691} 6692 6693void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6694 assert(UseBiasedLocking, "why call this otherwise?"); 6695 6696 // Check for biased locking unlock case, which is a no-op 6697 // Note: we do not have to check the thread ID for two reasons. 6698 // First, the interpreter checks for IllegalMonitorStateException at 6699 // a higher level. Second, if the bias was revoked while we held the 6700 // lock, the object could not be rebiased toward another thread, so 6701 // the bias bit would be clear. 6702 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6703 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6704 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6705 jcc(Assembler::equal, done); 6706} 6707 6708void MacroAssembler::c2bool(Register x) { 6709 // implements x == 0 ? 0 : 1 6710 // note: must only look at least-significant byte of x 6711 // since C-style booleans are stored in one byte 6712 // only! (was bug) 6713 andl(x, 0xFF); 6714 setb(Assembler::notZero, x); 6715} 6716 6717// Wouldn't need if AddressLiteral version had new name 6718void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6719 Assembler::call(L, rtype); 6720} 6721 6722void MacroAssembler::call(Register entry) { 6723 Assembler::call(entry); 6724} 6725 6726void MacroAssembler::call(AddressLiteral entry) { 6727 if (reachable(entry)) { 6728 Assembler::call_literal(entry.target(), entry.rspec()); 6729 } else { 6730 lea(rscratch1, entry); 6731 Assembler::call(rscratch1); 6732 } 6733} 6734 6735void MacroAssembler::ic_call(address entry) { 6736 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 6737 movptr(rax, (intptr_t)Universe::non_oop_word()); 6738 call(AddressLiteral(entry, rh)); 6739} 6740 6741// Implementation of call_VM versions 6742 6743void MacroAssembler::call_VM(Register oop_result, 6744 address entry_point, 6745 bool check_exceptions) { 6746 Label C, E; 6747 call(C, relocInfo::none); 6748 jmp(E); 6749 6750 bind(C); 6751 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6752 ret(0); 6753 6754 bind(E); 6755} 6756 6757void MacroAssembler::call_VM(Register oop_result, 6758 address entry_point, 6759 Register arg_1, 6760 bool check_exceptions) { 6761 Label C, E; 6762 call(C, relocInfo::none); 6763 jmp(E); 6764 6765 bind(C); 6766 pass_arg1(this, arg_1); 6767 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6768 ret(0); 6769 6770 bind(E); 6771} 6772 6773void MacroAssembler::call_VM(Register oop_result, 6774 address entry_point, 6775 Register arg_1, 6776 Register arg_2, 6777 bool check_exceptions) { 6778 Label C, E; 6779 call(C, relocInfo::none); 6780 jmp(E); 6781 6782 bind(C); 6783 6784 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6785 6786 pass_arg2(this, arg_2); 6787 pass_arg1(this, arg_1); 6788 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6789 ret(0); 6790 6791 bind(E); 6792} 6793 6794void MacroAssembler::call_VM(Register oop_result, 6795 address entry_point, 6796 Register arg_1, 6797 Register arg_2, 6798 Register arg_3, 6799 bool check_exceptions) { 6800 Label C, E; 6801 call(C, relocInfo::none); 6802 jmp(E); 6803 6804 bind(C); 6805 6806 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6807 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6808 pass_arg3(this, arg_3); 6809 6810 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6811 pass_arg2(this, arg_2); 6812 6813 pass_arg1(this, arg_1); 6814 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6815 ret(0); 6816 6817 bind(E); 6818} 6819 6820void MacroAssembler::call_VM(Register oop_result, 6821 Register last_java_sp, 6822 address entry_point, 6823 int number_of_arguments, 6824 bool check_exceptions) { 6825 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6826 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6827} 6828 6829void MacroAssembler::call_VM(Register oop_result, 6830 Register last_java_sp, 6831 address entry_point, 6832 Register arg_1, 6833 bool check_exceptions) { 6834 pass_arg1(this, arg_1); 6835 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6836} 6837 6838void MacroAssembler::call_VM(Register oop_result, 6839 Register last_java_sp, 6840 address entry_point, 6841 Register arg_1, 6842 Register arg_2, 6843 bool check_exceptions) { 6844 6845 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6846 pass_arg2(this, arg_2); 6847 pass_arg1(this, arg_1); 6848 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6849} 6850 6851void MacroAssembler::call_VM(Register oop_result, 6852 Register last_java_sp, 6853 address entry_point, 6854 Register arg_1, 6855 Register arg_2, 6856 Register arg_3, 6857 bool check_exceptions) { 6858 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6859 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6860 pass_arg3(this, arg_3); 6861 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6862 pass_arg2(this, arg_2); 6863 pass_arg1(this, arg_1); 6864 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6865} 6866 6867void MacroAssembler::super_call_VM(Register oop_result, 6868 Register last_java_sp, 6869 address entry_point, 6870 int number_of_arguments, 6871 bool check_exceptions) { 6872 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6873 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6874} 6875 6876void MacroAssembler::super_call_VM(Register oop_result, 6877 Register last_java_sp, 6878 address entry_point, 6879 Register arg_1, 6880 bool check_exceptions) { 6881 pass_arg1(this, arg_1); 6882 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6883} 6884 6885void MacroAssembler::super_call_VM(Register oop_result, 6886 Register last_java_sp, 6887 address entry_point, 6888 Register arg_1, 6889 Register arg_2, 6890 bool check_exceptions) { 6891 6892 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6893 pass_arg2(this, arg_2); 6894 pass_arg1(this, arg_1); 6895 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6896} 6897 6898void MacroAssembler::super_call_VM(Register oop_result, 6899 Register last_java_sp, 6900 address entry_point, 6901 Register arg_1, 6902 Register arg_2, 6903 Register arg_3, 6904 bool check_exceptions) { 6905 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6906 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6907 pass_arg3(this, arg_3); 6908 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6909 pass_arg2(this, arg_2); 6910 pass_arg1(this, arg_1); 6911 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6912} 6913 6914void MacroAssembler::call_VM_base(Register oop_result, 6915 Register java_thread, 6916 Register last_java_sp, 6917 address entry_point, 6918 int number_of_arguments, 6919 bool check_exceptions) { 6920 // determine java_thread register 6921 if (!java_thread->is_valid()) { 6922#ifdef _LP64 6923 java_thread = r15_thread; 6924#else 6925 java_thread = rdi; 6926 get_thread(java_thread); 6927#endif // LP64 6928 } 6929 // determine last_java_sp register 6930 if (!last_java_sp->is_valid()) { 6931 last_java_sp = rsp; 6932 } 6933 // debugging support 6934 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6935 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6936#ifdef ASSERT 6937 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6938 // r12 is the heapbase. 6939 LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) 6940#endif // ASSERT 6941 6942 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6943 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6944 6945 // push java thread (becomes first argument of C function) 6946 6947 NOT_LP64(push(java_thread); number_of_arguments++); 6948 LP64_ONLY(mov(c_rarg0, r15_thread)); 6949 6950 // set last Java frame before call 6951 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6952 6953 // Only interpreter should have to set fp 6954 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6955 6956 // do the call, remove parameters 6957 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6958 6959 // restore the thread (cannot use the pushed argument since arguments 6960 // may be overwritten by C code generated by an optimizing compiler); 6961 // however can use the register value directly if it is callee saved. 6962 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6963 // rdi & rsi (also r15) are callee saved -> nothing to do 6964#ifdef ASSERT 6965 guarantee(java_thread != rax, "change this code"); 6966 push(rax); 6967 { Label L; 6968 get_thread(rax); 6969 cmpptr(java_thread, rax); 6970 jcc(Assembler::equal, L); 6971 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 6972 bind(L); 6973 } 6974 pop(rax); 6975#endif 6976 } else { 6977 get_thread(java_thread); 6978 } 6979 // reset last Java frame 6980 // Only interpreter should have to clear fp 6981 reset_last_Java_frame(java_thread, true, false); 6982 6983#ifndef CC_INTERP 6984 // C++ interp handles this in the interpreter 6985 check_and_handle_popframe(java_thread); 6986 check_and_handle_earlyret(java_thread); 6987#endif /* CC_INTERP */ 6988 6989 if (check_exceptions) { 6990 // check for pending exceptions (java_thread is set upon return) 6991 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6992#ifndef _LP64 6993 jump_cc(Assembler::notEqual, 6994 RuntimeAddress(StubRoutines::forward_exception_entry())); 6995#else 6996 // This used to conditionally jump to forward_exception however it is 6997 // possible if we relocate that the branch will not reach. So we must jump 6998 // around so we can always reach 6999 7000 Label ok; 7001 jcc(Assembler::equal, ok); 7002 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 7003 bind(ok); 7004#endif // LP64 7005 } 7006 7007 // get oop result if there is one and reset the value in the thread 7008 if (oop_result->is_valid()) { 7009 get_vm_result(oop_result, java_thread); 7010 } 7011} 7012 7013void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 7014 7015 // Calculate the value for last_Java_sp 7016 // somewhat subtle. call_VM does an intermediate call 7017 // which places a return address on the stack just under the 7018 // stack pointer as the user finsihed with it. This allows 7019 // use to retrieve last_Java_pc from last_Java_sp[-1]. 7020 // On 32bit we then have to push additional args on the stack to accomplish 7021 // the actual requested call. On 64bit call_VM only can use register args 7022 // so the only extra space is the return address that call_VM created. 7023 // This hopefully explains the calculations here. 7024 7025#ifdef _LP64 7026 // We've pushed one address, correct last_Java_sp 7027 lea(rax, Address(rsp, wordSize)); 7028#else 7029 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 7030#endif // LP64 7031 7032 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 7033 7034} 7035 7036void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 7037 call_VM_leaf_base(entry_point, number_of_arguments); 7038} 7039 7040void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 7041 pass_arg0(this, arg_0); 7042 call_VM_leaf(entry_point, 1); 7043} 7044 7045void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7046 7047 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7048 pass_arg1(this, arg_1); 7049 pass_arg0(this, arg_0); 7050 call_VM_leaf(entry_point, 2); 7051} 7052 7053void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7054 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7055 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7056 pass_arg2(this, arg_2); 7057 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7058 pass_arg1(this, arg_1); 7059 pass_arg0(this, arg_0); 7060 call_VM_leaf(entry_point, 3); 7061} 7062 7063void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 7064 pass_arg0(this, arg_0); 7065 MacroAssembler::call_VM_leaf_base(entry_point, 1); 7066} 7067 7068void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7069 7070 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7071 pass_arg1(this, arg_1); 7072 pass_arg0(this, arg_0); 7073 MacroAssembler::call_VM_leaf_base(entry_point, 2); 7074} 7075 7076void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7077 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7078 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7079 pass_arg2(this, arg_2); 7080 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7081 pass_arg1(this, arg_1); 7082 pass_arg0(this, arg_0); 7083 MacroAssembler::call_VM_leaf_base(entry_point, 3); 7084} 7085 7086void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 7087 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 7088 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 7089 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 7090 pass_arg3(this, arg_3); 7091 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7092 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7093 pass_arg2(this, arg_2); 7094 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7095 pass_arg1(this, arg_1); 7096 pass_arg0(this, arg_0); 7097 MacroAssembler::call_VM_leaf_base(entry_point, 4); 7098} 7099 7100void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 7101 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 7102 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 7103 verify_oop(oop_result, "broken oop in call_VM_base"); 7104} 7105 7106void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 7107 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 7108 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 7109} 7110 7111void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 7112} 7113 7114void MacroAssembler::check_and_handle_popframe(Register java_thread) { 7115} 7116 7117void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 7118 if (reachable(src1)) { 7119 cmpl(as_Address(src1), imm); 7120 } else { 7121 lea(rscratch1, src1); 7122 cmpl(Address(rscratch1, 0), imm); 7123 } 7124} 7125 7126void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 7127 assert(!src2.is_lval(), "use cmpptr"); 7128 if (reachable(src2)) { 7129 cmpl(src1, as_Address(src2)); 7130 } else { 7131 lea(rscratch1, src2); 7132 cmpl(src1, Address(rscratch1, 0)); 7133 } 7134} 7135 7136void MacroAssembler::cmp32(Register src1, int32_t imm) { 7137 Assembler::cmpl(src1, imm); 7138} 7139 7140void MacroAssembler::cmp32(Register src1, Address src2) { 7141 Assembler::cmpl(src1, src2); 7142} 7143 7144void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7145 ucomisd(opr1, opr2); 7146 7147 Label L; 7148 if (unordered_is_less) { 7149 movl(dst, -1); 7150 jcc(Assembler::parity, L); 7151 jcc(Assembler::below , L); 7152 movl(dst, 0); 7153 jcc(Assembler::equal , L); 7154 increment(dst); 7155 } else { // unordered is greater 7156 movl(dst, 1); 7157 jcc(Assembler::parity, L); 7158 jcc(Assembler::above , L); 7159 movl(dst, 0); 7160 jcc(Assembler::equal , L); 7161 decrementl(dst); 7162 } 7163 bind(L); 7164} 7165 7166void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7167 ucomiss(opr1, opr2); 7168 7169 Label L; 7170 if (unordered_is_less) { 7171 movl(dst, -1); 7172 jcc(Assembler::parity, L); 7173 jcc(Assembler::below , L); 7174 movl(dst, 0); 7175 jcc(Assembler::equal , L); 7176 increment(dst); 7177 } else { // unordered is greater 7178 movl(dst, 1); 7179 jcc(Assembler::parity, L); 7180 jcc(Assembler::above , L); 7181 movl(dst, 0); 7182 jcc(Assembler::equal , L); 7183 decrementl(dst); 7184 } 7185 bind(L); 7186} 7187 7188 7189void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 7190 if (reachable(src1)) { 7191 cmpb(as_Address(src1), imm); 7192 } else { 7193 lea(rscratch1, src1); 7194 cmpb(Address(rscratch1, 0), imm); 7195 } 7196} 7197 7198void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 7199#ifdef _LP64 7200 if (src2.is_lval()) { 7201 movptr(rscratch1, src2); 7202 Assembler::cmpq(src1, rscratch1); 7203 } else if (reachable(src2)) { 7204 cmpq(src1, as_Address(src2)); 7205 } else { 7206 lea(rscratch1, src2); 7207 Assembler::cmpq(src1, Address(rscratch1, 0)); 7208 } 7209#else 7210 if (src2.is_lval()) { 7211 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7212 } else { 7213 cmpl(src1, as_Address(src2)); 7214 } 7215#endif // _LP64 7216} 7217 7218void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 7219 assert(src2.is_lval(), "not a mem-mem compare"); 7220#ifdef _LP64 7221 // moves src2's literal address 7222 movptr(rscratch1, src2); 7223 Assembler::cmpq(src1, rscratch1); 7224#else 7225 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7226#endif // _LP64 7227} 7228 7229void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 7230 if (reachable(adr)) { 7231 if (os::is_MP()) 7232 lock(); 7233 cmpxchgptr(reg, as_Address(adr)); 7234 } else { 7235 lea(rscratch1, adr); 7236 if (os::is_MP()) 7237 lock(); 7238 cmpxchgptr(reg, Address(rscratch1, 0)); 7239 } 7240} 7241 7242void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 7243 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 7244} 7245 7246void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 7247 if (reachable(src)) { 7248 Assembler::comisd(dst, as_Address(src)); 7249 } else { 7250 lea(rscratch1, src); 7251 Assembler::comisd(dst, Address(rscratch1, 0)); 7252 } 7253} 7254 7255void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 7256 if (reachable(src)) { 7257 Assembler::comiss(dst, as_Address(src)); 7258 } else { 7259 lea(rscratch1, src); 7260 Assembler::comiss(dst, Address(rscratch1, 0)); 7261 } 7262} 7263 7264 7265void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 7266 Condition negated_cond = negate_condition(cond); 7267 Label L; 7268 jcc(negated_cond, L); 7269 atomic_incl(counter_addr); 7270 bind(L); 7271} 7272 7273int MacroAssembler::corrected_idivl(Register reg) { 7274 // Full implementation of Java idiv and irem; checks for 7275 // special case as described in JVM spec., p.243 & p.271. 7276 // The function returns the (pc) offset of the idivl 7277 // instruction - may be needed for implicit exceptions. 7278 // 7279 // normal case special case 7280 // 7281 // input : rax,: dividend min_int 7282 // reg: divisor (may not be rax,/rdx) -1 7283 // 7284 // output: rax,: quotient (= rax, idiv reg) min_int 7285 // rdx: remainder (= rax, irem reg) 0 7286 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 7287 const int min_int = 0x80000000; 7288 Label normal_case, special_case; 7289 7290 // check for special case 7291 cmpl(rax, min_int); 7292 jcc(Assembler::notEqual, normal_case); 7293 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 7294 cmpl(reg, -1); 7295 jcc(Assembler::equal, special_case); 7296 7297 // handle normal case 7298 bind(normal_case); 7299 cdql(); 7300 int idivl_offset = offset(); 7301 idivl(reg); 7302 7303 // normal and special case exit 7304 bind(special_case); 7305 7306 return idivl_offset; 7307} 7308 7309 7310 7311void MacroAssembler::decrementl(Register reg, int value) { 7312 if (value == min_jint) {subl(reg, value) ; return; } 7313 if (value < 0) { incrementl(reg, -value); return; } 7314 if (value == 0) { ; return; } 7315 if (value == 1 && UseIncDec) { decl(reg) ; return; } 7316 /* else */ { subl(reg, value) ; return; } 7317} 7318 7319void MacroAssembler::decrementl(Address dst, int value) { 7320 if (value == min_jint) {subl(dst, value) ; return; } 7321 if (value < 0) { incrementl(dst, -value); return; } 7322 if (value == 0) { ; return; } 7323 if (value == 1 && UseIncDec) { decl(dst) ; return; } 7324 /* else */ { subl(dst, value) ; return; } 7325} 7326 7327void MacroAssembler::division_with_shift (Register reg, int shift_value) { 7328 assert (shift_value > 0, "illegal shift value"); 7329 Label _is_positive; 7330 testl (reg, reg); 7331 jcc (Assembler::positive, _is_positive); 7332 int offset = (1 << shift_value) - 1 ; 7333 7334 if (offset == 1) { 7335 incrementl(reg); 7336 } else { 7337 addl(reg, offset); 7338 } 7339 7340 bind (_is_positive); 7341 sarl(reg, shift_value); 7342} 7343 7344void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 7345 if (reachable(src)) { 7346 Assembler::divsd(dst, as_Address(src)); 7347 } else { 7348 lea(rscratch1, src); 7349 Assembler::divsd(dst, Address(rscratch1, 0)); 7350 } 7351} 7352 7353void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 7354 if (reachable(src)) { 7355 Assembler::divss(dst, as_Address(src)); 7356 } else { 7357 lea(rscratch1, src); 7358 Assembler::divss(dst, Address(rscratch1, 0)); 7359 } 7360} 7361 7362// !defined(COMPILER2) is because of stupid core builds 7363#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 7364void MacroAssembler::empty_FPU_stack() { 7365 if (VM_Version::supports_mmx()) { 7366 emms(); 7367 } else { 7368 for (int i = 8; i-- > 0; ) ffree(i); 7369 } 7370} 7371#endif // !LP64 || C1 || !C2 7372 7373 7374// Defines obj, preserves var_size_in_bytes 7375void MacroAssembler::eden_allocate(Register obj, 7376 Register var_size_in_bytes, 7377 int con_size_in_bytes, 7378 Register t1, 7379 Label& slow_case) { 7380 assert(obj == rax, "obj must be in rax, for cmpxchg"); 7381 assert_different_registers(obj, var_size_in_bytes, t1); 7382 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7383 jmp(slow_case); 7384 } else { 7385 Register end = t1; 7386 Label retry; 7387 bind(retry); 7388 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 7389 movptr(obj, heap_top); 7390 if (var_size_in_bytes == noreg) { 7391 lea(end, Address(obj, con_size_in_bytes)); 7392 } else { 7393 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7394 } 7395 // if end < obj then we wrapped around => object too long => slow case 7396 cmpptr(end, obj); 7397 jcc(Assembler::below, slow_case); 7398 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 7399 jcc(Assembler::above, slow_case); 7400 // Compare obj with the top addr, and if still equal, store the new top addr in 7401 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 7402 // it otherwise. Use lock prefix for atomicity on MPs. 7403 locked_cmpxchgptr(end, heap_top); 7404 jcc(Assembler::notEqual, retry); 7405 } 7406} 7407 7408void MacroAssembler::enter() { 7409 push(rbp); 7410 mov(rbp, rsp); 7411} 7412 7413// A 5 byte nop that is safe for patching (see patch_verified_entry) 7414void MacroAssembler::fat_nop() { 7415 if (UseAddressNop) { 7416 addr_nop_5(); 7417 } else { 7418 emit_byte(0x26); // es: 7419 emit_byte(0x2e); // cs: 7420 emit_byte(0x64); // fs: 7421 emit_byte(0x65); // gs: 7422 emit_byte(0x90); 7423 } 7424} 7425 7426void MacroAssembler::fcmp(Register tmp) { 7427 fcmp(tmp, 1, true, true); 7428} 7429 7430void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 7431 assert(!pop_right || pop_left, "usage error"); 7432 if (VM_Version::supports_cmov()) { 7433 assert(tmp == noreg, "unneeded temp"); 7434 if (pop_left) { 7435 fucomip(index); 7436 } else { 7437 fucomi(index); 7438 } 7439 if (pop_right) { 7440 fpop(); 7441 } 7442 } else { 7443 assert(tmp != noreg, "need temp"); 7444 if (pop_left) { 7445 if (pop_right) { 7446 fcompp(); 7447 } else { 7448 fcomp(index); 7449 } 7450 } else { 7451 fcom(index); 7452 } 7453 // convert FPU condition into eflags condition via rax, 7454 save_rax(tmp); 7455 fwait(); fnstsw_ax(); 7456 sahf(); 7457 restore_rax(tmp); 7458 } 7459 // condition codes set as follows: 7460 // 7461 // CF (corresponds to C0) if x < y 7462 // PF (corresponds to C2) if unordered 7463 // ZF (corresponds to C3) if x = y 7464} 7465 7466void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 7467 fcmp2int(dst, unordered_is_less, 1, true, true); 7468} 7469 7470void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 7471 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 7472 Label L; 7473 if (unordered_is_less) { 7474 movl(dst, -1); 7475 jcc(Assembler::parity, L); 7476 jcc(Assembler::below , L); 7477 movl(dst, 0); 7478 jcc(Assembler::equal , L); 7479 increment(dst); 7480 } else { // unordered is greater 7481 movl(dst, 1); 7482 jcc(Assembler::parity, L); 7483 jcc(Assembler::above , L); 7484 movl(dst, 0); 7485 jcc(Assembler::equal , L); 7486 decrementl(dst); 7487 } 7488 bind(L); 7489} 7490 7491void MacroAssembler::fld_d(AddressLiteral src) { 7492 fld_d(as_Address(src)); 7493} 7494 7495void MacroAssembler::fld_s(AddressLiteral src) { 7496 fld_s(as_Address(src)); 7497} 7498 7499void MacroAssembler::fld_x(AddressLiteral src) { 7500 Assembler::fld_x(as_Address(src)); 7501} 7502 7503void MacroAssembler::fldcw(AddressLiteral src) { 7504 Assembler::fldcw(as_Address(src)); 7505} 7506 7507void MacroAssembler::pow_exp_core_encoding() { 7508 // kills rax, rcx, rdx 7509 subptr(rsp,sizeof(jdouble)); 7510 // computes 2^X. Stack: X ... 7511 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 7512 // keep it on the thread's stack to compute 2^int(X) later 7513 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 7514 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 7515 fld_s(0); // Stack: X X ... 7516 frndint(); // Stack: int(X) X ... 7517 fsuba(1); // Stack: int(X) X-int(X) ... 7518 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 7519 f2xm1(); // Stack: 2^(X-int(X))-1 ... 7520 fld1(); // Stack: 1 2^(X-int(X))-1 ... 7521 faddp(1); // Stack: 2^(X-int(X)) 7522 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 7523 // shift int(X)+1023 to exponent position. 7524 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 7525 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 7526 // values so detect them and set result to NaN. 7527 movl(rax,Address(rsp,0)); 7528 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 7529 addl(rax, 1023); 7530 movl(rdx,rax); 7531 shll(rax,20); 7532 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 7533 addl(rdx,1); 7534 // Check that 1 < int(X)+1023+1 < 2048 7535 // in 3 steps: 7536 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 7537 // 2- (int(X)+1023+1)&-2048 != 0 7538 // 3- (int(X)+1023+1)&-2048 != 1 7539 // Do 2- first because addl just updated the flags. 7540 cmov32(Assembler::equal,rax,rcx); 7541 cmpl(rdx,1); 7542 cmov32(Assembler::equal,rax,rcx); 7543 testl(rdx,rcx); 7544 cmov32(Assembler::notEqual,rax,rcx); 7545 movl(Address(rsp,4),rax); 7546 movl(Address(rsp,0),0); 7547 fmul_d(Address(rsp,0)); // Stack: 2^X ... 7548 addptr(rsp,sizeof(jdouble)); 7549} 7550 7551void MacroAssembler::increase_precision() { 7552 subptr(rsp, BytesPerWord); 7553 fnstcw(Address(rsp, 0)); 7554 movl(rax, Address(rsp, 0)); 7555 orl(rax, 0x300); 7556 push(rax); 7557 fldcw(Address(rsp, 0)); 7558 pop(rax); 7559} 7560 7561void MacroAssembler::restore_precision() { 7562 fldcw(Address(rsp, 0)); 7563 addptr(rsp, BytesPerWord); 7564} 7565 7566void MacroAssembler::fast_pow() { 7567 // computes X^Y = 2^(Y * log2(X)) 7568 // if fast computation is not possible, result is NaN. Requires 7569 // fallback from user of this macro. 7570 // increase precision for intermediate steps of the computation 7571 increase_precision(); 7572 fyl2x(); // Stack: (Y*log2(X)) ... 7573 pow_exp_core_encoding(); // Stack: exp(X) ... 7574 restore_precision(); 7575} 7576 7577void MacroAssembler::fast_exp() { 7578 // computes exp(X) = 2^(X * log2(e)) 7579 // if fast computation is not possible, result is NaN. Requires 7580 // fallback from user of this macro. 7581 // increase precision for intermediate steps of the computation 7582 increase_precision(); 7583 fldl2e(); // Stack: log2(e) X ... 7584 fmulp(1); // Stack: (X*log2(e)) ... 7585 pow_exp_core_encoding(); // Stack: exp(X) ... 7586 restore_precision(); 7587} 7588 7589void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 7590 // kills rax, rcx, rdx 7591 // pow and exp needs 2 extra registers on the fpu stack. 7592 Label slow_case, done; 7593 Register tmp = noreg; 7594 if (!VM_Version::supports_cmov()) { 7595 // fcmp needs a temporary so preserve rdx, 7596 tmp = rdx; 7597 } 7598 Register tmp2 = rax; 7599 Register tmp3 = rcx; 7600 7601 if (is_exp) { 7602 // Stack: X 7603 fld_s(0); // duplicate argument for runtime call. Stack: X X 7604 fast_exp(); // Stack: exp(X) X 7605 fcmp(tmp, 0, false, false); // Stack: exp(X) X 7606 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 7607 jcc(Assembler::parity, slow_case); 7608 // get rid of duplicate argument. Stack: exp(X) 7609 if (num_fpu_regs_in_use > 0) { 7610 fxch(); 7611 fpop(); 7612 } else { 7613 ffree(1); 7614 } 7615 jmp(done); 7616 } else { 7617 // Stack: X Y 7618 Label x_negative, y_odd; 7619 7620 fldz(); // Stack: 0 X Y 7621 fcmp(tmp, 1, true, false); // Stack: X Y 7622 jcc(Assembler::above, x_negative); 7623 7624 // X >= 0 7625 7626 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7627 fld_s(1); // Stack: X Y X Y 7628 fast_pow(); // Stack: X^Y X Y 7629 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 7630 // X^Y not equal to itself: X^Y is NaN go to slow case. 7631 jcc(Assembler::parity, slow_case); 7632 // get rid of duplicate arguments. Stack: X^Y 7633 if (num_fpu_regs_in_use > 0) { 7634 fxch(); fpop(); 7635 fxch(); fpop(); 7636 } else { 7637 ffree(2); 7638 ffree(1); 7639 } 7640 jmp(done); 7641 7642 // X <= 0 7643 bind(x_negative); 7644 7645 fld_s(1); // Stack: Y X Y 7646 frndint(); // Stack: int(Y) X Y 7647 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7648 jcc(Assembler::notEqual, slow_case); 7649 7650 subptr(rsp, 8); 7651 7652 // For X^Y, when X < 0, Y has to be an integer and the final 7653 // result depends on whether it's odd or even. We just checked 7654 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 7655 // integer to test its parity. If int(Y) is huge and doesn't fit 7656 // in the 64 bit integer range, the integer indefinite value will 7657 // end up in the gp registers. Huge numbers are all even, the 7658 // integer indefinite number is even so it's fine. 7659 7660#ifdef ASSERT 7661 // Let's check we don't end up with an integer indefinite number 7662 // when not expected. First test for huge numbers: check whether 7663 // int(Y)+1 == int(Y) which is true for very large numbers and 7664 // those are all even. A 64 bit integer is guaranteed to not 7665 // overflow for numbers where y+1 != y (when precision is set to 7666 // double precision). 7667 Label y_not_huge; 7668 7669 fld1(); // Stack: 1 int(Y) X Y 7670 fadd(1); // Stack: 1+int(Y) int(Y) X Y 7671 7672#ifdef _LP64 7673 // trip to memory to force the precision down from double extended 7674 // precision 7675 fstp_d(Address(rsp, 0)); 7676 fld_d(Address(rsp, 0)); 7677#endif 7678 7679 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 7680#endif 7681 7682 // move int(Y) as 64 bit integer to thread's stack 7683 fistp_d(Address(rsp,0)); // Stack: X Y 7684 7685#ifdef ASSERT 7686 jcc(Assembler::notEqual, y_not_huge); 7687 7688 // Y is huge so we know it's even. It may not fit in a 64 bit 7689 // integer and we don't want the debug code below to see the 7690 // integer indefinite value so overwrite int(Y) on the thread's 7691 // stack with 0. 7692 movl(Address(rsp, 0), 0); 7693 movl(Address(rsp, 4), 0); 7694 7695 bind(y_not_huge); 7696#endif 7697 7698 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7699 fld_s(1); // Stack: X Y X Y 7700 fabs(); // Stack: abs(X) Y X Y 7701 fast_pow(); // Stack: abs(X)^Y X Y 7702 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7703 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7704 7705 pop(tmp2); 7706 NOT_LP64(pop(tmp3)); 7707 jcc(Assembler::parity, slow_case); 7708 7709#ifdef ASSERT 7710 // Check that int(Y) is not integer indefinite value (int 7711 // overflow). Shouldn't happen because for values that would 7712 // overflow, 1+int(Y)==Y which was tested earlier. 7713#ifndef _LP64 7714 { 7715 Label integer; 7716 testl(tmp2, tmp2); 7717 jcc(Assembler::notZero, integer); 7718 cmpl(tmp3, 0x80000000); 7719 jcc(Assembler::notZero, integer); 7720 STOP("integer indefinite value shouldn't be seen here"); 7721 bind(integer); 7722 } 7723#else 7724 { 7725 Label integer; 7726 mov(tmp3, tmp2); // preserve tmp2 for parity check below 7727 shlq(tmp3, 1); 7728 jcc(Assembler::carryClear, integer); 7729 jcc(Assembler::notZero, integer); 7730 STOP("integer indefinite value shouldn't be seen here"); 7731 bind(integer); 7732 } 7733#endif 7734#endif 7735 7736 // get rid of duplicate arguments. Stack: X^Y 7737 if (num_fpu_regs_in_use > 0) { 7738 fxch(); fpop(); 7739 fxch(); fpop(); 7740 } else { 7741 ffree(2); 7742 ffree(1); 7743 } 7744 7745 testl(tmp2, 1); 7746 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7747 // X <= 0, Y even: X^Y = -abs(X)^Y 7748 7749 fchs(); // Stack: -abs(X)^Y Y 7750 jmp(done); 7751 } 7752 7753 // slow case: runtime call 7754 bind(slow_case); 7755 7756 fpop(); // pop incorrect result or int(Y) 7757 7758 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7759 is_exp ? 1 : 2, num_fpu_regs_in_use); 7760 7761 // Come here with result in F-TOS 7762 bind(done); 7763} 7764 7765void MacroAssembler::fpop() { 7766 ffree(); 7767 fincstp(); 7768} 7769 7770void MacroAssembler::fremr(Register tmp) { 7771 save_rax(tmp); 7772 { Label L; 7773 bind(L); 7774 fprem(); 7775 fwait(); fnstsw_ax(); 7776#ifdef _LP64 7777 testl(rax, 0x400); 7778 jcc(Assembler::notEqual, L); 7779#else 7780 sahf(); 7781 jcc(Assembler::parity, L); 7782#endif // _LP64 7783 } 7784 restore_rax(tmp); 7785 // Result is in ST0. 7786 // Note: fxch & fpop to get rid of ST1 7787 // (otherwise FPU stack could overflow eventually) 7788 fxch(1); 7789 fpop(); 7790} 7791 7792 7793void MacroAssembler::incrementl(AddressLiteral dst) { 7794 if (reachable(dst)) { 7795 incrementl(as_Address(dst)); 7796 } else { 7797 lea(rscratch1, dst); 7798 incrementl(Address(rscratch1, 0)); 7799 } 7800} 7801 7802void MacroAssembler::incrementl(ArrayAddress dst) { 7803 incrementl(as_Address(dst)); 7804} 7805 7806void MacroAssembler::incrementl(Register reg, int value) { 7807 if (value == min_jint) {addl(reg, value) ; return; } 7808 if (value < 0) { decrementl(reg, -value); return; } 7809 if (value == 0) { ; return; } 7810 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7811 /* else */ { addl(reg, value) ; return; } 7812} 7813 7814void MacroAssembler::incrementl(Address dst, int value) { 7815 if (value == min_jint) {addl(dst, value) ; return; } 7816 if (value < 0) { decrementl(dst, -value); return; } 7817 if (value == 0) { ; return; } 7818 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7819 /* else */ { addl(dst, value) ; return; } 7820} 7821 7822void MacroAssembler::jump(AddressLiteral dst) { 7823 if (reachable(dst)) { 7824 jmp_literal(dst.target(), dst.rspec()); 7825 } else { 7826 lea(rscratch1, dst); 7827 jmp(rscratch1); 7828 } 7829} 7830 7831void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7832 if (reachable(dst)) { 7833 InstructionMark im(this); 7834 relocate(dst.reloc()); 7835 const int short_size = 2; 7836 const int long_size = 6; 7837 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7838 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7839 // 0111 tttn #8-bit disp 7840 emit_byte(0x70 | cc); 7841 emit_byte((offs - short_size) & 0xFF); 7842 } else { 7843 // 0000 1111 1000 tttn #32-bit disp 7844 emit_byte(0x0F); 7845 emit_byte(0x80 | cc); 7846 emit_long(offs - long_size); 7847 } 7848 } else { 7849#ifdef ASSERT 7850 warning("reversing conditional branch"); 7851#endif /* ASSERT */ 7852 Label skip; 7853 jccb(reverse[cc], skip); 7854 lea(rscratch1, dst); 7855 Assembler::jmp(rscratch1); 7856 bind(skip); 7857 } 7858} 7859 7860void MacroAssembler::ldmxcsr(AddressLiteral src) { 7861 if (reachable(src)) { 7862 Assembler::ldmxcsr(as_Address(src)); 7863 } else { 7864 lea(rscratch1, src); 7865 Assembler::ldmxcsr(Address(rscratch1, 0)); 7866 } 7867} 7868 7869int MacroAssembler::load_signed_byte(Register dst, Address src) { 7870 int off; 7871 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7872 off = offset(); 7873 movsbl(dst, src); // movsxb 7874 } else { 7875 off = load_unsigned_byte(dst, src); 7876 shll(dst, 24); 7877 sarl(dst, 24); 7878 } 7879 return off; 7880} 7881 7882// Note: load_signed_short used to be called load_signed_word. 7883// Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7884// manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7885// The term "word" in HotSpot means a 32- or 64-bit machine word. 7886int MacroAssembler::load_signed_short(Register dst, Address src) { 7887 int off; 7888 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7889 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7890 // version but this is what 64bit has always done. This seems to imply 7891 // that users are only using 32bits worth. 7892 off = offset(); 7893 movswl(dst, src); // movsxw 7894 } else { 7895 off = load_unsigned_short(dst, src); 7896 shll(dst, 16); 7897 sarl(dst, 16); 7898 } 7899 return off; 7900} 7901 7902int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7903 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7904 // and "3.9 Partial Register Penalties", p. 22). 7905 int off; 7906 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7907 off = offset(); 7908 movzbl(dst, src); // movzxb 7909 } else { 7910 xorl(dst, dst); 7911 off = offset(); 7912 movb(dst, src); 7913 } 7914 return off; 7915} 7916 7917// Note: load_unsigned_short used to be called load_unsigned_word. 7918int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7919 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7920 // and "3.9 Partial Register Penalties", p. 22). 7921 int off; 7922 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7923 off = offset(); 7924 movzwl(dst, src); // movzxw 7925 } else { 7926 xorl(dst, dst); 7927 off = offset(); 7928 movw(dst, src); 7929 } 7930 return off; 7931} 7932 7933void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7934 switch (size_in_bytes) { 7935#ifndef _LP64 7936 case 8: 7937 assert(dst2 != noreg, "second dest register required"); 7938 movl(dst, src); 7939 movl(dst2, src.plus_disp(BytesPerInt)); 7940 break; 7941#else 7942 case 8: movq(dst, src); break; 7943#endif 7944 case 4: movl(dst, src); break; 7945 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7946 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7947 default: ShouldNotReachHere(); 7948 } 7949} 7950 7951void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7952 switch (size_in_bytes) { 7953#ifndef _LP64 7954 case 8: 7955 assert(src2 != noreg, "second source register required"); 7956 movl(dst, src); 7957 movl(dst.plus_disp(BytesPerInt), src2); 7958 break; 7959#else 7960 case 8: movq(dst, src); break; 7961#endif 7962 case 4: movl(dst, src); break; 7963 case 2: movw(dst, src); break; 7964 case 1: movb(dst, src); break; 7965 default: ShouldNotReachHere(); 7966 } 7967} 7968 7969void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7970 if (reachable(dst)) { 7971 movl(as_Address(dst), src); 7972 } else { 7973 lea(rscratch1, dst); 7974 movl(Address(rscratch1, 0), src); 7975 } 7976} 7977 7978void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7979 if (reachable(src)) { 7980 movl(dst, as_Address(src)); 7981 } else { 7982 lea(rscratch1, src); 7983 movl(dst, Address(rscratch1, 0)); 7984 } 7985} 7986 7987// C++ bool manipulation 7988 7989void MacroAssembler::movbool(Register dst, Address src) { 7990 if(sizeof(bool) == 1) 7991 movb(dst, src); 7992 else if(sizeof(bool) == 2) 7993 movw(dst, src); 7994 else if(sizeof(bool) == 4) 7995 movl(dst, src); 7996 else 7997 // unsupported 7998 ShouldNotReachHere(); 7999} 8000 8001void MacroAssembler::movbool(Address dst, bool boolconst) { 8002 if(sizeof(bool) == 1) 8003 movb(dst, (int) boolconst); 8004 else if(sizeof(bool) == 2) 8005 movw(dst, (int) boolconst); 8006 else if(sizeof(bool) == 4) 8007 movl(dst, (int) boolconst); 8008 else 8009 // unsupported 8010 ShouldNotReachHere(); 8011} 8012 8013void MacroAssembler::movbool(Address dst, Register src) { 8014 if(sizeof(bool) == 1) 8015 movb(dst, src); 8016 else if(sizeof(bool) == 2) 8017 movw(dst, src); 8018 else if(sizeof(bool) == 4) 8019 movl(dst, src); 8020 else 8021 // unsupported 8022 ShouldNotReachHere(); 8023} 8024 8025void MacroAssembler::movbyte(ArrayAddress dst, int src) { 8026 movb(as_Address(dst), src); 8027} 8028 8029void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 8030 if (reachable(src)) { 8031 movdl(dst, as_Address(src)); 8032 } else { 8033 lea(rscratch1, src); 8034 movdl(dst, Address(rscratch1, 0)); 8035 } 8036} 8037 8038void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 8039 if (reachable(src)) { 8040 movq(dst, as_Address(src)); 8041 } else { 8042 lea(rscratch1, src); 8043 movq(dst, Address(rscratch1, 0)); 8044 } 8045} 8046 8047void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 8048 if (reachable(src)) { 8049 if (UseXmmLoadAndClearUpper) { 8050 movsd (dst, as_Address(src)); 8051 } else { 8052 movlpd(dst, as_Address(src)); 8053 } 8054 } else { 8055 lea(rscratch1, src); 8056 if (UseXmmLoadAndClearUpper) { 8057 movsd (dst, Address(rscratch1, 0)); 8058 } else { 8059 movlpd(dst, Address(rscratch1, 0)); 8060 } 8061 } 8062} 8063 8064void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 8065 if (reachable(src)) { 8066 movss(dst, as_Address(src)); 8067 } else { 8068 lea(rscratch1, src); 8069 movss(dst, Address(rscratch1, 0)); 8070 } 8071} 8072 8073void MacroAssembler::movptr(Register dst, Register src) { 8074 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8075} 8076 8077void MacroAssembler::movptr(Register dst, Address src) { 8078 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8079} 8080 8081// src should NEVER be a real pointer. Use AddressLiteral for true pointers 8082void MacroAssembler::movptr(Register dst, intptr_t src) { 8083 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 8084} 8085 8086void MacroAssembler::movptr(Address dst, Register src) { 8087 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8088} 8089 8090void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 8091 if (reachable(src)) { 8092 Assembler::movsd(dst, as_Address(src)); 8093 } else { 8094 lea(rscratch1, src); 8095 Assembler::movsd(dst, Address(rscratch1, 0)); 8096 } 8097} 8098 8099void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 8100 if (reachable(src)) { 8101 Assembler::movss(dst, as_Address(src)); 8102 } else { 8103 lea(rscratch1, src); 8104 Assembler::movss(dst, Address(rscratch1, 0)); 8105 } 8106} 8107 8108void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 8109 if (reachable(src)) { 8110 Assembler::mulsd(dst, as_Address(src)); 8111 } else { 8112 lea(rscratch1, src); 8113 Assembler::mulsd(dst, Address(rscratch1, 0)); 8114 } 8115} 8116 8117void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 8118 if (reachable(src)) { 8119 Assembler::mulss(dst, as_Address(src)); 8120 } else { 8121 lea(rscratch1, src); 8122 Assembler::mulss(dst, Address(rscratch1, 0)); 8123 } 8124} 8125 8126void MacroAssembler::null_check(Register reg, int offset) { 8127 if (needs_explicit_null_check(offset)) { 8128 // provoke OS NULL exception if reg = NULL by 8129 // accessing M[reg] w/o changing any (non-CC) registers 8130 // NOTE: cmpl is plenty here to provoke a segv 8131 cmpptr(rax, Address(reg, 0)); 8132 // Note: should probably use testl(rax, Address(reg, 0)); 8133 // may be shorter code (however, this version of 8134 // testl needs to be implemented first) 8135 } else { 8136 // nothing to do, (later) access of M[reg + offset] 8137 // will provoke OS NULL exception if reg = NULL 8138 } 8139} 8140 8141void MacroAssembler::os_breakpoint() { 8142 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 8143 // (e.g., MSVC can't call ps() otherwise) 8144 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 8145} 8146 8147void MacroAssembler::pop_CPU_state() { 8148 pop_FPU_state(); 8149 pop_IU_state(); 8150} 8151 8152void MacroAssembler::pop_FPU_state() { 8153 NOT_LP64(frstor(Address(rsp, 0));) 8154 LP64_ONLY(fxrstor(Address(rsp, 0));) 8155 addptr(rsp, FPUStateSizeInWords * wordSize); 8156} 8157 8158void MacroAssembler::pop_IU_state() { 8159 popa(); 8160 LP64_ONLY(addq(rsp, 8)); 8161 popf(); 8162} 8163 8164// Save Integer and Float state 8165// Warning: Stack must be 16 byte aligned (64bit) 8166void MacroAssembler::push_CPU_state() { 8167 push_IU_state(); 8168 push_FPU_state(); 8169} 8170 8171void MacroAssembler::push_FPU_state() { 8172 subptr(rsp, FPUStateSizeInWords * wordSize); 8173#ifndef _LP64 8174 fnsave(Address(rsp, 0)); 8175 fwait(); 8176#else 8177 fxsave(Address(rsp, 0)); 8178#endif // LP64 8179} 8180 8181void MacroAssembler::push_IU_state() { 8182 // Push flags first because pusha kills them 8183 pushf(); 8184 // Make sure rsp stays 16-byte aligned 8185 LP64_ONLY(subq(rsp, 8)); 8186 pusha(); 8187} 8188 8189void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 8190 // determine java_thread register 8191 if (!java_thread->is_valid()) { 8192 java_thread = rdi; 8193 get_thread(java_thread); 8194 } 8195 // we must set sp to zero to clear frame 8196 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 8197 if (clear_fp) { 8198 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 8199 } 8200 8201 if (clear_pc) 8202 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 8203 8204} 8205 8206void MacroAssembler::restore_rax(Register tmp) { 8207 if (tmp == noreg) pop(rax); 8208 else if (tmp != rax) mov(rax, tmp); 8209} 8210 8211void MacroAssembler::round_to(Register reg, int modulus) { 8212 addptr(reg, modulus - 1); 8213 andptr(reg, -modulus); 8214} 8215 8216void MacroAssembler::save_rax(Register tmp) { 8217 if (tmp == noreg) push(rax); 8218 else if (tmp != rax) mov(tmp, rax); 8219} 8220 8221// Write serialization page so VM thread can do a pseudo remote membar. 8222// We use the current thread pointer to calculate a thread specific 8223// offset to write to within the page. This minimizes bus traffic 8224// due to cache line collision. 8225void MacroAssembler::serialize_memory(Register thread, Register tmp) { 8226 movl(tmp, thread); 8227 shrl(tmp, os::get_serialize_page_shift_count()); 8228 andl(tmp, (os::vm_page_size() - sizeof(int))); 8229 8230 Address index(noreg, tmp, Address::times_1); 8231 ExternalAddress page(os::get_memory_serialize_page()); 8232 8233 // Size of store must match masking code above 8234 movl(as_Address(ArrayAddress(page, index)), tmp); 8235} 8236 8237// Calls to C land 8238// 8239// When entering C land, the rbp, & rsp of the last Java frame have to be recorded 8240// in the (thread-local) JavaThread object. When leaving C land, the last Java fp 8241// has to be reset to 0. This is required to allow proper stack traversal. 8242void MacroAssembler::set_last_Java_frame(Register java_thread, 8243 Register last_java_sp, 8244 Register last_java_fp, 8245 address last_java_pc) { 8246 // determine java_thread register 8247 if (!java_thread->is_valid()) { 8248 java_thread = rdi; 8249 get_thread(java_thread); 8250 } 8251 // determine last_java_sp register 8252 if (!last_java_sp->is_valid()) { 8253 last_java_sp = rsp; 8254 } 8255 8256 // last_java_fp is optional 8257 8258 if (last_java_fp->is_valid()) { 8259 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 8260 } 8261 8262 // last_java_pc is optional 8263 8264 if (last_java_pc != NULL) { 8265 lea(Address(java_thread, 8266 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 8267 InternalAddress(last_java_pc)); 8268 8269 } 8270 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 8271} 8272 8273void MacroAssembler::shlptr(Register dst, int imm8) { 8274 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 8275} 8276 8277void MacroAssembler::shrptr(Register dst, int imm8) { 8278 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 8279} 8280 8281void MacroAssembler::sign_extend_byte(Register reg) { 8282 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 8283 movsbl(reg, reg); // movsxb 8284 } else { 8285 shll(reg, 24); 8286 sarl(reg, 24); 8287 } 8288} 8289 8290void MacroAssembler::sign_extend_short(Register reg) { 8291 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 8292 movswl(reg, reg); // movsxw 8293 } else { 8294 shll(reg, 16); 8295 sarl(reg, 16); 8296 } 8297} 8298 8299void MacroAssembler::testl(Register dst, AddressLiteral src) { 8300 assert(reachable(src), "Address should be reachable"); 8301 testl(dst, as_Address(src)); 8302} 8303 8304void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 8305 if (reachable(src)) { 8306 Assembler::sqrtsd(dst, as_Address(src)); 8307 } else { 8308 lea(rscratch1, src); 8309 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 8310 } 8311} 8312 8313void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 8314 if (reachable(src)) { 8315 Assembler::sqrtss(dst, as_Address(src)); 8316 } else { 8317 lea(rscratch1, src); 8318 Assembler::sqrtss(dst, Address(rscratch1, 0)); 8319 } 8320} 8321 8322void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 8323 if (reachable(src)) { 8324 Assembler::subsd(dst, as_Address(src)); 8325 } else { 8326 lea(rscratch1, src); 8327 Assembler::subsd(dst, Address(rscratch1, 0)); 8328 } 8329} 8330 8331void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 8332 if (reachable(src)) { 8333 Assembler::subss(dst, as_Address(src)); 8334 } else { 8335 lea(rscratch1, src); 8336 Assembler::subss(dst, Address(rscratch1, 0)); 8337 } 8338} 8339 8340void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 8341 if (reachable(src)) { 8342 Assembler::ucomisd(dst, as_Address(src)); 8343 } else { 8344 lea(rscratch1, src); 8345 Assembler::ucomisd(dst, Address(rscratch1, 0)); 8346 } 8347} 8348 8349void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 8350 if (reachable(src)) { 8351 Assembler::ucomiss(dst, as_Address(src)); 8352 } else { 8353 lea(rscratch1, src); 8354 Assembler::ucomiss(dst, Address(rscratch1, 0)); 8355 } 8356} 8357 8358void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 8359 // Used in sign-bit flipping with aligned address. 8360 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8361 if (reachable(src)) { 8362 Assembler::xorpd(dst, as_Address(src)); 8363 } else { 8364 lea(rscratch1, src); 8365 Assembler::xorpd(dst, Address(rscratch1, 0)); 8366 } 8367} 8368 8369void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 8370 // Used in sign-bit flipping with aligned address. 8371 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8372 if (reachable(src)) { 8373 Assembler::xorps(dst, as_Address(src)); 8374 } else { 8375 lea(rscratch1, src); 8376 Assembler::xorps(dst, Address(rscratch1, 0)); 8377 } 8378} 8379 8380// AVX 3-operands instructions 8381 8382void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8383 if (reachable(src)) { 8384 vaddsd(dst, nds, as_Address(src)); 8385 } else { 8386 lea(rscratch1, src); 8387 vaddsd(dst, nds, Address(rscratch1, 0)); 8388 } 8389} 8390 8391void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8392 if (reachable(src)) { 8393 vaddss(dst, nds, as_Address(src)); 8394 } else { 8395 lea(rscratch1, src); 8396 vaddss(dst, nds, Address(rscratch1, 0)); 8397 } 8398} 8399 8400void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8401 if (reachable(src)) { 8402 vandpd(dst, nds, as_Address(src), vector256); 8403 } else { 8404 lea(rscratch1, src); 8405 vandpd(dst, nds, Address(rscratch1, 0), vector256); 8406 } 8407} 8408 8409void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8410 if (reachable(src)) { 8411 vandps(dst, nds, as_Address(src), vector256); 8412 } else { 8413 lea(rscratch1, src); 8414 vandps(dst, nds, Address(rscratch1, 0), vector256); 8415 } 8416} 8417 8418void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8419 if (reachable(src)) { 8420 vdivsd(dst, nds, as_Address(src)); 8421 } else { 8422 lea(rscratch1, src); 8423 vdivsd(dst, nds, Address(rscratch1, 0)); 8424 } 8425} 8426 8427void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8428 if (reachable(src)) { 8429 vdivss(dst, nds, as_Address(src)); 8430 } else { 8431 lea(rscratch1, src); 8432 vdivss(dst, nds, Address(rscratch1, 0)); 8433 } 8434} 8435 8436void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8437 if (reachable(src)) { 8438 vmulsd(dst, nds, as_Address(src)); 8439 } else { 8440 lea(rscratch1, src); 8441 vmulsd(dst, nds, Address(rscratch1, 0)); 8442 } 8443} 8444 8445void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8446 if (reachable(src)) { 8447 vmulss(dst, nds, as_Address(src)); 8448 } else { 8449 lea(rscratch1, src); 8450 vmulss(dst, nds, Address(rscratch1, 0)); 8451 } 8452} 8453 8454void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8455 if (reachable(src)) { 8456 vsubsd(dst, nds, as_Address(src)); 8457 } else { 8458 lea(rscratch1, src); 8459 vsubsd(dst, nds, Address(rscratch1, 0)); 8460 } 8461} 8462 8463void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8464 if (reachable(src)) { 8465 vsubss(dst, nds, as_Address(src)); 8466 } else { 8467 lea(rscratch1, src); 8468 vsubss(dst, nds, Address(rscratch1, 0)); 8469 } 8470} 8471 8472void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8473 if (reachable(src)) { 8474 vxorpd(dst, nds, as_Address(src), vector256); 8475 } else { 8476 lea(rscratch1, src); 8477 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 8478 } 8479} 8480 8481void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8482 if (reachable(src)) { 8483 vxorps(dst, nds, as_Address(src), vector256); 8484 } else { 8485 lea(rscratch1, src); 8486 vxorps(dst, nds, Address(rscratch1, 0), vector256); 8487 } 8488} 8489 8490 8491////////////////////////////////////////////////////////////////////////////////// 8492#ifndef SERIALGC 8493 8494void MacroAssembler::g1_write_barrier_pre(Register obj, 8495 Register pre_val, 8496 Register thread, 8497 Register tmp, 8498 bool tosca_live, 8499 bool expand_call) { 8500 8501 // If expand_call is true then we expand the call_VM_leaf macro 8502 // directly to skip generating the check by 8503 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 8504 8505#ifdef _LP64 8506 assert(thread == r15_thread, "must be"); 8507#endif // _LP64 8508 8509 Label done; 8510 Label runtime; 8511 8512 assert(pre_val != noreg, "check this code"); 8513 8514 if (obj != noreg) { 8515 assert_different_registers(obj, pre_val, tmp); 8516 assert(pre_val != rax, "check this code"); 8517 } 8518 8519 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8520 PtrQueue::byte_offset_of_active())); 8521 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8522 PtrQueue::byte_offset_of_index())); 8523 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8524 PtrQueue::byte_offset_of_buf())); 8525 8526 8527 // Is marking active? 8528 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 8529 cmpl(in_progress, 0); 8530 } else { 8531 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 8532 cmpb(in_progress, 0); 8533 } 8534 jcc(Assembler::equal, done); 8535 8536 // Do we need to load the previous value? 8537 if (obj != noreg) { 8538 load_heap_oop(pre_val, Address(obj, 0)); 8539 } 8540 8541 // Is the previous value null? 8542 cmpptr(pre_val, (int32_t) NULL_WORD); 8543 jcc(Assembler::equal, done); 8544 8545 // Can we store original value in the thread's buffer? 8546 // Is index == 0? 8547 // (The index field is typed as size_t.) 8548 8549 movptr(tmp, index); // tmp := *index_adr 8550 cmpptr(tmp, 0); // tmp == 0? 8551 jcc(Assembler::equal, runtime); // If yes, goto runtime 8552 8553 subptr(tmp, wordSize); // tmp := tmp - wordSize 8554 movptr(index, tmp); // *index_adr := tmp 8555 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 8556 8557 // Record the previous value 8558 movptr(Address(tmp, 0), pre_val); 8559 jmp(done); 8560 8561 bind(runtime); 8562 // save the live input values 8563 if(tosca_live) push(rax); 8564 8565 if (obj != noreg && obj != rax) 8566 push(obj); 8567 8568 if (pre_val != rax) 8569 push(pre_val); 8570 8571 // Calling the runtime using the regular call_VM_leaf mechanism generates 8572 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 8573 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 8574 // 8575 // If we care generating the pre-barrier without a frame (e.g. in the 8576 // intrinsified Reference.get() routine) then ebp might be pointing to 8577 // the caller frame and so this check will most likely fail at runtime. 8578 // 8579 // Expanding the call directly bypasses the generation of the check. 8580 // So when we do not have have a full interpreter frame on the stack 8581 // expand_call should be passed true. 8582 8583 NOT_LP64( push(thread); ) 8584 8585 if (expand_call) { 8586 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 8587 pass_arg1(this, thread); 8588 pass_arg0(this, pre_val); 8589 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 8590 } else { 8591 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 8592 } 8593 8594 NOT_LP64( pop(thread); ) 8595 8596 // save the live input values 8597 if (pre_val != rax) 8598 pop(pre_val); 8599 8600 if (obj != noreg && obj != rax) 8601 pop(obj); 8602 8603 if(tosca_live) pop(rax); 8604 8605 bind(done); 8606} 8607 8608void MacroAssembler::g1_write_barrier_post(Register store_addr, 8609 Register new_val, 8610 Register thread, 8611 Register tmp, 8612 Register tmp2) { 8613#ifdef _LP64 8614 assert(thread == r15_thread, "must be"); 8615#endif // _LP64 8616 8617 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8618 PtrQueue::byte_offset_of_index())); 8619 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8620 PtrQueue::byte_offset_of_buf())); 8621 8622 BarrierSet* bs = Universe::heap()->barrier_set(); 8623 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8624 Label done; 8625 Label runtime; 8626 8627 // Does store cross heap regions? 8628 8629 movptr(tmp, store_addr); 8630 xorptr(tmp, new_val); 8631 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 8632 jcc(Assembler::equal, done); 8633 8634 // crosses regions, storing NULL? 8635 8636 cmpptr(new_val, (int32_t) NULL_WORD); 8637 jcc(Assembler::equal, done); 8638 8639 // storing region crossing non-NULL, is card already dirty? 8640 8641 ExternalAddress cardtable((address) ct->byte_map_base); 8642 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8643#ifdef _LP64 8644 const Register card_addr = tmp; 8645 8646 movq(card_addr, store_addr); 8647 shrq(card_addr, CardTableModRefBS::card_shift); 8648 8649 lea(tmp2, cardtable); 8650 8651 // get the address of the card 8652 addq(card_addr, tmp2); 8653#else 8654 const Register card_index = tmp; 8655 8656 movl(card_index, store_addr); 8657 shrl(card_index, CardTableModRefBS::card_shift); 8658 8659 Address index(noreg, card_index, Address::times_1); 8660 const Register card_addr = tmp; 8661 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 8662#endif 8663 cmpb(Address(card_addr, 0), 0); 8664 jcc(Assembler::equal, done); 8665 8666 // storing a region crossing, non-NULL oop, card is clean. 8667 // dirty card and log. 8668 8669 movb(Address(card_addr, 0), 0); 8670 8671 cmpl(queue_index, 0); 8672 jcc(Assembler::equal, runtime); 8673 subl(queue_index, wordSize); 8674 movptr(tmp2, buffer); 8675#ifdef _LP64 8676 movslq(rscratch1, queue_index); 8677 addq(tmp2, rscratch1); 8678 movq(Address(tmp2, 0), card_addr); 8679#else 8680 addl(tmp2, queue_index); 8681 movl(Address(tmp2, 0), card_index); 8682#endif 8683 jmp(done); 8684 8685 bind(runtime); 8686 // save the live input values 8687 push(store_addr); 8688 push(new_val); 8689#ifdef _LP64 8690 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 8691#else 8692 push(thread); 8693 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 8694 pop(thread); 8695#endif 8696 pop(new_val); 8697 pop(store_addr); 8698 8699 bind(done); 8700} 8701 8702#endif // SERIALGC 8703////////////////////////////////////////////////////////////////////////////////// 8704 8705 8706void MacroAssembler::store_check(Register obj) { 8707 // Does a store check for the oop in register obj. The content of 8708 // register obj is destroyed afterwards. 8709 store_check_part_1(obj); 8710 store_check_part_2(obj); 8711} 8712 8713void MacroAssembler::store_check(Register obj, Address dst) { 8714 store_check(obj); 8715} 8716 8717 8718// split the store check operation so that other instructions can be scheduled inbetween 8719void MacroAssembler::store_check_part_1(Register obj) { 8720 BarrierSet* bs = Universe::heap()->barrier_set(); 8721 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8722 shrptr(obj, CardTableModRefBS::card_shift); 8723} 8724 8725void MacroAssembler::store_check_part_2(Register obj) { 8726 BarrierSet* bs = Universe::heap()->barrier_set(); 8727 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8728 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8729 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8730 8731 // The calculation for byte_map_base is as follows: 8732 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8733 // So this essentially converts an address to a displacement and 8734 // it will never need to be relocated. On 64bit however the value may be too 8735 // large for a 32bit displacement 8736 8737 intptr_t disp = (intptr_t) ct->byte_map_base; 8738 if (is_simm32(disp)) { 8739 Address cardtable(noreg, obj, Address::times_1, disp); 8740 movb(cardtable, 0); 8741 } else { 8742 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8743 // displacement and done in a single instruction given favorable mapping and 8744 // a smarter version of as_Address. Worst case it is two instructions which 8745 // is no worse off then loading disp into a register and doing as a simple 8746 // Address() as above. 8747 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8748 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8749 // in some cases we'll get a single instruction version. 8750 8751 ExternalAddress cardtable((address)disp); 8752 Address index(noreg, obj, Address::times_1); 8753 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8754 } 8755} 8756 8757void MacroAssembler::subptr(Register dst, int32_t imm32) { 8758 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8759} 8760 8761// Force generation of a 4 byte immediate value even if it fits into 8bit 8762void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8763 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8764} 8765 8766void MacroAssembler::subptr(Register dst, Register src) { 8767 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8768} 8769 8770// C++ bool manipulation 8771void MacroAssembler::testbool(Register dst) { 8772 if(sizeof(bool) == 1) 8773 testb(dst, 0xff); 8774 else if(sizeof(bool) == 2) { 8775 // testw implementation needed for two byte bools 8776 ShouldNotReachHere(); 8777 } else if(sizeof(bool) == 4) 8778 testl(dst, dst); 8779 else 8780 // unsupported 8781 ShouldNotReachHere(); 8782} 8783 8784void MacroAssembler::testptr(Register dst, Register src) { 8785 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8786} 8787 8788// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8789void MacroAssembler::tlab_allocate(Register obj, 8790 Register var_size_in_bytes, 8791 int con_size_in_bytes, 8792 Register t1, 8793 Register t2, 8794 Label& slow_case) { 8795 assert_different_registers(obj, t1, t2); 8796 assert_different_registers(obj, var_size_in_bytes, t1); 8797 Register end = t2; 8798 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8799 8800 verify_tlab(); 8801 8802 NOT_LP64(get_thread(thread)); 8803 8804 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8805 if (var_size_in_bytes == noreg) { 8806 lea(end, Address(obj, con_size_in_bytes)); 8807 } else { 8808 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8809 } 8810 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8811 jcc(Assembler::above, slow_case); 8812 8813 // update the tlab top pointer 8814 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8815 8816 // recover var_size_in_bytes if necessary 8817 if (var_size_in_bytes == end) { 8818 subptr(var_size_in_bytes, obj); 8819 } 8820 verify_tlab(); 8821} 8822 8823// Preserves rbx, and rdx. 8824Register MacroAssembler::tlab_refill(Label& retry, 8825 Label& try_eden, 8826 Label& slow_case) { 8827 Register top = rax; 8828 Register t1 = rcx; 8829 Register t2 = rsi; 8830 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8831 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8832 Label do_refill, discard_tlab; 8833 8834 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8835 // No allocation in the shared eden. 8836 jmp(slow_case); 8837 } 8838 8839 NOT_LP64(get_thread(thread_reg)); 8840 8841 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8842 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8843 8844 // calculate amount of free space 8845 subptr(t1, top); 8846 shrptr(t1, LogHeapWordSize); 8847 8848 // Retain tlab and allocate object in shared space if 8849 // the amount free in the tlab is too large to discard. 8850 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8851 jcc(Assembler::lessEqual, discard_tlab); 8852 8853 // Retain 8854 // %%% yuck as movptr... 8855 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8856 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8857 if (TLABStats) { 8858 // increment number of slow_allocations 8859 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8860 } 8861 jmp(try_eden); 8862 8863 bind(discard_tlab); 8864 if (TLABStats) { 8865 // increment number of refills 8866 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8867 // accumulate wastage -- t1 is amount free in tlab 8868 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8869 } 8870 8871 // if tlab is currently allocated (top or end != null) then 8872 // fill [top, end + alignment_reserve) with array object 8873 testptr(top, top); 8874 jcc(Assembler::zero, do_refill); 8875 8876 // set up the mark word 8877 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8878 // set the length to the remaining space 8879 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8880 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8881 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8882 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8883 // set klass to intArrayKlass 8884 // dubious reloc why not an oop reloc? 8885 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8886 // store klass last. concurrent gcs assumes klass length is valid if 8887 // klass field is not null. 8888 store_klass(top, t1); 8889 8890 movptr(t1, top); 8891 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8892 incr_allocated_bytes(thread_reg, t1, 0); 8893 8894 // refill the tlab with an eden allocation 8895 bind(do_refill); 8896 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8897 shlptr(t1, LogHeapWordSize); 8898 // allocate new tlab, address returned in top 8899 eden_allocate(top, t1, 0, t2, slow_case); 8900 8901 // Check that t1 was preserved in eden_allocate. 8902#ifdef ASSERT 8903 if (UseTLAB) { 8904 Label ok; 8905 Register tsize = rsi; 8906 assert_different_registers(tsize, thread_reg, t1); 8907 push(tsize); 8908 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8909 shlptr(tsize, LogHeapWordSize); 8910 cmpptr(t1, tsize); 8911 jcc(Assembler::equal, ok); 8912 STOP("assert(t1 != tlab size)"); 8913 should_not_reach_here(); 8914 8915 bind(ok); 8916 pop(tsize); 8917 } 8918#endif 8919 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8920 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8921 addptr(top, t1); 8922 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8923 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8924 verify_tlab(); 8925 jmp(retry); 8926 8927 return thread_reg; // for use by caller 8928} 8929 8930void MacroAssembler::incr_allocated_bytes(Register thread, 8931 Register var_size_in_bytes, 8932 int con_size_in_bytes, 8933 Register t1) { 8934 if (!thread->is_valid()) { 8935#ifdef _LP64 8936 thread = r15_thread; 8937#else 8938 assert(t1->is_valid(), "need temp reg"); 8939 thread = t1; 8940 get_thread(thread); 8941#endif 8942 } 8943 8944#ifdef _LP64 8945 if (var_size_in_bytes->is_valid()) { 8946 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8947 } else { 8948 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8949 } 8950#else 8951 if (var_size_in_bytes->is_valid()) { 8952 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8953 } else { 8954 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8955 } 8956 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8957#endif 8958} 8959 8960void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8961 pusha(); 8962 8963 // if we are coming from c1, xmm registers may be live 8964 int off = 0; 8965 if (UseSSE == 1) { 8966 subptr(rsp, sizeof(jdouble)*8); 8967 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8968 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8969 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8970 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8971 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8972 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8973 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8974 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8975 } else if (UseSSE >= 2) { 8976#ifdef COMPILER2 8977 if (MaxVectorSize > 16) { 8978 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 8979 // Save upper half of YMM registes 8980 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 8981 vextractf128h(Address(rsp, 0),xmm0); 8982 vextractf128h(Address(rsp, 16),xmm1); 8983 vextractf128h(Address(rsp, 32),xmm2); 8984 vextractf128h(Address(rsp, 48),xmm3); 8985 vextractf128h(Address(rsp, 64),xmm4); 8986 vextractf128h(Address(rsp, 80),xmm5); 8987 vextractf128h(Address(rsp, 96),xmm6); 8988 vextractf128h(Address(rsp,112),xmm7); 8989#ifdef _LP64 8990 vextractf128h(Address(rsp,128),xmm8); 8991 vextractf128h(Address(rsp,144),xmm9); 8992 vextractf128h(Address(rsp,160),xmm10); 8993 vextractf128h(Address(rsp,176),xmm11); 8994 vextractf128h(Address(rsp,192),xmm12); 8995 vextractf128h(Address(rsp,208),xmm13); 8996 vextractf128h(Address(rsp,224),xmm14); 8997 vextractf128h(Address(rsp,240),xmm15); 8998#endif 8999 } 9000#endif 9001 // Save whole 128bit (16 bytes) XMM regiters 9002 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9003 movdqu(Address(rsp,off++*16),xmm0); 9004 movdqu(Address(rsp,off++*16),xmm1); 9005 movdqu(Address(rsp,off++*16),xmm2); 9006 movdqu(Address(rsp,off++*16),xmm3); 9007 movdqu(Address(rsp,off++*16),xmm4); 9008 movdqu(Address(rsp,off++*16),xmm5); 9009 movdqu(Address(rsp,off++*16),xmm6); 9010 movdqu(Address(rsp,off++*16),xmm7); 9011#ifdef _LP64 9012 movdqu(Address(rsp,off++*16),xmm8); 9013 movdqu(Address(rsp,off++*16),xmm9); 9014 movdqu(Address(rsp,off++*16),xmm10); 9015 movdqu(Address(rsp,off++*16),xmm11); 9016 movdqu(Address(rsp,off++*16),xmm12); 9017 movdqu(Address(rsp,off++*16),xmm13); 9018 movdqu(Address(rsp,off++*16),xmm14); 9019 movdqu(Address(rsp,off++*16),xmm15); 9020#endif 9021 } 9022 9023 // Preserve registers across runtime call 9024 int incoming_argument_and_return_value_offset = -1; 9025 if (num_fpu_regs_in_use > 1) { 9026 // Must preserve all other FPU regs (could alternatively convert 9027 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 9028 // FPU state, but can not trust C compiler) 9029 NEEDS_CLEANUP; 9030 // NOTE that in this case we also push the incoming argument(s) to 9031 // the stack and restore it later; we also use this stack slot to 9032 // hold the return value from dsin, dcos etc. 9033 for (int i = 0; i < num_fpu_regs_in_use; i++) { 9034 subptr(rsp, sizeof(jdouble)); 9035 fstp_d(Address(rsp, 0)); 9036 } 9037 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 9038 for (int i = nb_args-1; i >= 0; i--) { 9039 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 9040 } 9041 } 9042 9043 subptr(rsp, nb_args*sizeof(jdouble)); 9044 for (int i = 0; i < nb_args; i++) { 9045 fstp_d(Address(rsp, i*sizeof(jdouble))); 9046 } 9047 9048#ifdef _LP64 9049 if (nb_args > 0) { 9050 movdbl(xmm0, Address(rsp, 0)); 9051 } 9052 if (nb_args > 1) { 9053 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 9054 } 9055 assert(nb_args <= 2, "unsupported number of args"); 9056#endif // _LP64 9057 9058 // NOTE: we must not use call_VM_leaf here because that requires a 9059 // complete interpreter frame in debug mode -- same bug as 4387334 9060 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 9061 // do proper 64bit abi 9062 9063 NEEDS_CLEANUP; 9064 // Need to add stack banging before this runtime call if it needs to 9065 // be taken; however, there is no generic stack banging routine at 9066 // the MacroAssembler level 9067 9068 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 9069 9070#ifdef _LP64 9071 movsd(Address(rsp, 0), xmm0); 9072 fld_d(Address(rsp, 0)); 9073#endif // _LP64 9074 addptr(rsp, sizeof(jdouble) * nb_args); 9075 if (num_fpu_regs_in_use > 1) { 9076 // Must save return value to stack and then restore entire FPU 9077 // stack except incoming arguments 9078 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 9079 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 9080 fld_d(Address(rsp, 0)); 9081 addptr(rsp, sizeof(jdouble)); 9082 } 9083 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 9084 addptr(rsp, sizeof(jdouble) * nb_args); 9085 } 9086 9087 off = 0; 9088 if (UseSSE == 1) { 9089 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 9090 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 9091 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 9092 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 9093 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 9094 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 9095 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 9096 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 9097 addptr(rsp, sizeof(jdouble)*8); 9098 } else if (UseSSE >= 2) { 9099 // Restore whole 128bit (16 bytes) XMM regiters 9100 movdqu(xmm0, Address(rsp,off++*16)); 9101 movdqu(xmm1, Address(rsp,off++*16)); 9102 movdqu(xmm2, Address(rsp,off++*16)); 9103 movdqu(xmm3, Address(rsp,off++*16)); 9104 movdqu(xmm4, Address(rsp,off++*16)); 9105 movdqu(xmm5, Address(rsp,off++*16)); 9106 movdqu(xmm6, Address(rsp,off++*16)); 9107 movdqu(xmm7, Address(rsp,off++*16)); 9108#ifdef _LP64 9109 movdqu(xmm8, Address(rsp,off++*16)); 9110 movdqu(xmm9, Address(rsp,off++*16)); 9111 movdqu(xmm10, Address(rsp,off++*16)); 9112 movdqu(xmm11, Address(rsp,off++*16)); 9113 movdqu(xmm12, Address(rsp,off++*16)); 9114 movdqu(xmm13, Address(rsp,off++*16)); 9115 movdqu(xmm14, Address(rsp,off++*16)); 9116 movdqu(xmm15, Address(rsp,off++*16)); 9117#endif 9118 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9119#ifdef COMPILER2 9120 if (MaxVectorSize > 16) { 9121 // Restore upper half of YMM registes. 9122 vinsertf128h(xmm0, Address(rsp, 0)); 9123 vinsertf128h(xmm1, Address(rsp, 16)); 9124 vinsertf128h(xmm2, Address(rsp, 32)); 9125 vinsertf128h(xmm3, Address(rsp, 48)); 9126 vinsertf128h(xmm4, Address(rsp, 64)); 9127 vinsertf128h(xmm5, Address(rsp, 80)); 9128 vinsertf128h(xmm6, Address(rsp, 96)); 9129 vinsertf128h(xmm7, Address(rsp,112)); 9130#ifdef _LP64 9131 vinsertf128h(xmm8, Address(rsp,128)); 9132 vinsertf128h(xmm9, Address(rsp,144)); 9133 vinsertf128h(xmm10, Address(rsp,160)); 9134 vinsertf128h(xmm11, Address(rsp,176)); 9135 vinsertf128h(xmm12, Address(rsp,192)); 9136 vinsertf128h(xmm13, Address(rsp,208)); 9137 vinsertf128h(xmm14, Address(rsp,224)); 9138 vinsertf128h(xmm15, Address(rsp,240)); 9139#endif 9140 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9141 } 9142#endif 9143 } 9144 popa(); 9145} 9146 9147static const double pi_4 = 0.7853981633974483; 9148 9149void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 9150 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 9151 // was attempted in this code; unfortunately it appears that the 9152 // switch to 80-bit precision and back causes this to be 9153 // unprofitable compared with simply performing a runtime call if 9154 // the argument is out of the (-pi/4, pi/4) range. 9155 9156 Register tmp = noreg; 9157 if (!VM_Version::supports_cmov()) { 9158 // fcmp needs a temporary so preserve rbx, 9159 tmp = rbx; 9160 push(tmp); 9161 } 9162 9163 Label slow_case, done; 9164 9165 ExternalAddress pi4_adr = (address)&pi_4; 9166 if (reachable(pi4_adr)) { 9167 // x ?<= pi/4 9168 fld_d(pi4_adr); 9169 fld_s(1); // Stack: X PI/4 X 9170 fabs(); // Stack: |X| PI/4 X 9171 fcmp(tmp); 9172 jcc(Assembler::above, slow_case); 9173 9174 // fastest case: -pi/4 <= x <= pi/4 9175 switch(trig) { 9176 case 's': 9177 fsin(); 9178 break; 9179 case 'c': 9180 fcos(); 9181 break; 9182 case 't': 9183 ftan(); 9184 break; 9185 default: 9186 assert(false, "bad intrinsic"); 9187 break; 9188 } 9189 jmp(done); 9190 } 9191 9192 // slow case: runtime call 9193 bind(slow_case); 9194 9195 switch(trig) { 9196 case 's': 9197 { 9198 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 9199 } 9200 break; 9201 case 'c': 9202 { 9203 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 9204 } 9205 break; 9206 case 't': 9207 { 9208 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 9209 } 9210 break; 9211 default: 9212 assert(false, "bad intrinsic"); 9213 break; 9214 } 9215 9216 // Come here with result in F-TOS 9217 bind(done); 9218 9219 if (tmp != noreg) { 9220 pop(tmp); 9221 } 9222} 9223 9224 9225// Look up the method for a megamorphic invokeinterface call. 9226// The target method is determined by <intf_klass, itable_index>. 9227// The receiver klass is in recv_klass. 9228// On success, the result will be in method_result, and execution falls through. 9229// On failure, execution transfers to the given label. 9230void MacroAssembler::lookup_interface_method(Register recv_klass, 9231 Register intf_klass, 9232 RegisterOrConstant itable_index, 9233 Register method_result, 9234 Register scan_temp, 9235 Label& L_no_such_interface) { 9236 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 9237 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 9238 "caller must use same register for non-constant itable index as for method"); 9239 9240 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 9241 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 9242 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 9243 int scan_step = itableOffsetEntry::size() * wordSize; 9244 int vte_size = vtableEntry::size() * wordSize; 9245 Address::ScaleFactor times_vte_scale = Address::times_ptr; 9246 assert(vte_size == wordSize, "else adjust times_vte_scale"); 9247 9248 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 9249 9250 // %%% Could store the aligned, prescaled offset in the klassoop. 9251 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 9252 if (HeapWordsPerLong > 1) { 9253 // Round up to align_object_offset boundary 9254 // see code for InstanceKlass::start_of_itable! 9255 round_to(scan_temp, BytesPerLong); 9256 } 9257 9258 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 9259 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 9260 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 9261 9262 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 9263 // if (scan->interface() == intf) { 9264 // result = (klass + scan->offset() + itable_index); 9265 // } 9266 // } 9267 Label search, found_method; 9268 9269 for (int peel = 1; peel >= 0; peel--) { 9270 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 9271 cmpptr(intf_klass, method_result); 9272 9273 if (peel) { 9274 jccb(Assembler::equal, found_method); 9275 } else { 9276 jccb(Assembler::notEqual, search); 9277 // (invert the test to fall through to found_method...) 9278 } 9279 9280 if (!peel) break; 9281 9282 bind(search); 9283 9284 // Check that the previous entry is non-null. A null entry means that 9285 // the receiver class doesn't implement the interface, and wasn't the 9286 // same as when the caller was compiled. 9287 testptr(method_result, method_result); 9288 jcc(Assembler::zero, L_no_such_interface); 9289 addptr(scan_temp, scan_step); 9290 } 9291 9292 bind(found_method); 9293 9294 // Got a hit. 9295 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 9296 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 9297} 9298 9299 9300// virtual method calling 9301void MacroAssembler::lookup_virtual_method(Register recv_klass, 9302 RegisterOrConstant vtable_index, 9303 Register method_result) { 9304 const int base = InstanceKlass::vtable_start_offset() * wordSize; 9305 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 9306 Address vtable_entry_addr(recv_klass, 9307 vtable_index, Address::times_ptr, 9308 base + vtableEntry::method_offset_in_bytes()); 9309 movptr(method_result, vtable_entry_addr); 9310} 9311 9312 9313void MacroAssembler::check_klass_subtype(Register sub_klass, 9314 Register super_klass, 9315 Register temp_reg, 9316 Label& L_success) { 9317 Label L_failure; 9318 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 9319 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 9320 bind(L_failure); 9321} 9322 9323 9324void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 9325 Register super_klass, 9326 Register temp_reg, 9327 Label* L_success, 9328 Label* L_failure, 9329 Label* L_slow_path, 9330 RegisterOrConstant super_check_offset) { 9331 assert_different_registers(sub_klass, super_klass, temp_reg); 9332 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 9333 if (super_check_offset.is_register()) { 9334 assert_different_registers(sub_klass, super_klass, 9335 super_check_offset.as_register()); 9336 } else if (must_load_sco) { 9337 assert(temp_reg != noreg, "supply either a temp or a register offset"); 9338 } 9339 9340 Label L_fallthrough; 9341 int label_nulls = 0; 9342 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9343 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9344 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 9345 assert(label_nulls <= 1, "at most one NULL in the batch"); 9346 9347 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9348 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 9349 Address super_check_offset_addr(super_klass, sco_offset); 9350 9351 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 9352 // range of a jccb. If this routine grows larger, reconsider at 9353 // least some of these. 9354#define local_jcc(assembler_cond, label) \ 9355 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 9356 else jcc( assembler_cond, label) /*omit semi*/ 9357 9358 // Hacked jmp, which may only be used just before L_fallthrough. 9359#define final_jmp(label) \ 9360 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 9361 else jmp(label) /*omit semi*/ 9362 9363 // If the pointers are equal, we are done (e.g., String[] elements). 9364 // This self-check enables sharing of secondary supertype arrays among 9365 // non-primary types such as array-of-interface. Otherwise, each such 9366 // type would need its own customized SSA. 9367 // We move this check to the front of the fast path because many 9368 // type checks are in fact trivially successful in this manner, 9369 // so we get a nicely predicted branch right at the start of the check. 9370 cmpptr(sub_klass, super_klass); 9371 local_jcc(Assembler::equal, *L_success); 9372 9373 // Check the supertype display: 9374 if (must_load_sco) { 9375 // Positive movl does right thing on LP64. 9376 movl(temp_reg, super_check_offset_addr); 9377 super_check_offset = RegisterOrConstant(temp_reg); 9378 } 9379 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 9380 cmpptr(super_klass, super_check_addr); // load displayed supertype 9381 9382 // This check has worked decisively for primary supers. 9383 // Secondary supers are sought in the super_cache ('super_cache_addr'). 9384 // (Secondary supers are interfaces and very deeply nested subtypes.) 9385 // This works in the same check above because of a tricky aliasing 9386 // between the super_cache and the primary super display elements. 9387 // (The 'super_check_addr' can address either, as the case requires.) 9388 // Note that the cache is updated below if it does not help us find 9389 // what we need immediately. 9390 // So if it was a primary super, we can just fail immediately. 9391 // Otherwise, it's the slow path for us (no success at this point). 9392 9393 if (super_check_offset.is_register()) { 9394 local_jcc(Assembler::equal, *L_success); 9395 cmpl(super_check_offset.as_register(), sc_offset); 9396 if (L_failure == &L_fallthrough) { 9397 local_jcc(Assembler::equal, *L_slow_path); 9398 } else { 9399 local_jcc(Assembler::notEqual, *L_failure); 9400 final_jmp(*L_slow_path); 9401 } 9402 } else if (super_check_offset.as_constant() == sc_offset) { 9403 // Need a slow path; fast failure is impossible. 9404 if (L_slow_path == &L_fallthrough) { 9405 local_jcc(Assembler::equal, *L_success); 9406 } else { 9407 local_jcc(Assembler::notEqual, *L_slow_path); 9408 final_jmp(*L_success); 9409 } 9410 } else { 9411 // No slow path; it's a fast decision. 9412 if (L_failure == &L_fallthrough) { 9413 local_jcc(Assembler::equal, *L_success); 9414 } else { 9415 local_jcc(Assembler::notEqual, *L_failure); 9416 final_jmp(*L_success); 9417 } 9418 } 9419 9420 bind(L_fallthrough); 9421 9422#undef local_jcc 9423#undef final_jmp 9424} 9425 9426 9427void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 9428 Register super_klass, 9429 Register temp_reg, 9430 Register temp2_reg, 9431 Label* L_success, 9432 Label* L_failure, 9433 bool set_cond_codes) { 9434 assert_different_registers(sub_klass, super_klass, temp_reg); 9435 if (temp2_reg != noreg) 9436 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 9437#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 9438 9439 Label L_fallthrough; 9440 int label_nulls = 0; 9441 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9442 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9443 assert(label_nulls <= 1, "at most one NULL in the batch"); 9444 9445 // a couple of useful fields in sub_klass: 9446 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 9447 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9448 Address secondary_supers_addr(sub_klass, ss_offset); 9449 Address super_cache_addr( sub_klass, sc_offset); 9450 9451 // Do a linear scan of the secondary super-klass chain. 9452 // This code is rarely used, so simplicity is a virtue here. 9453 // The repne_scan instruction uses fixed registers, which we must spill. 9454 // Don't worry too much about pre-existing connections with the input regs. 9455 9456 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 9457 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 9458 9459 // Get super_klass value into rax (even if it was in rdi or rcx). 9460 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 9461 if (super_klass != rax || UseCompressedOops) { 9462 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 9463 mov(rax, super_klass); 9464 } 9465 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 9466 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 9467 9468#ifndef PRODUCT 9469 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 9470 ExternalAddress pst_counter_addr((address) pst_counter); 9471 NOT_LP64( incrementl(pst_counter_addr) ); 9472 LP64_ONLY( lea(rcx, pst_counter_addr) ); 9473 LP64_ONLY( incrementl(Address(rcx, 0)) ); 9474#endif //PRODUCT 9475 9476 // We will consult the secondary-super array. 9477 movptr(rdi, secondary_supers_addr); 9478 // Load the array length. (Positive movl does right thing on LP64.) 9479 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 9480 // Skip to start of data. 9481 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 9482 9483 // Scan RCX words at [RDI] for an occurrence of RAX. 9484 // Set NZ/Z based on last compare. 9485 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 9486 // not change flags (only scas instruction which is repeated sets flags). 9487 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 9488 9489 testptr(rax,rax); // Set Z = 0 9490 repne_scan(); 9491 9492 // Unspill the temp. registers: 9493 if (pushed_rdi) pop(rdi); 9494 if (pushed_rcx) pop(rcx); 9495 if (pushed_rax) pop(rax); 9496 9497 if (set_cond_codes) { 9498 // Special hack for the AD files: rdi is guaranteed non-zero. 9499 assert(!pushed_rdi, "rdi must be left non-NULL"); 9500 // Also, the condition codes are properly set Z/NZ on succeed/failure. 9501 } 9502 9503 if (L_failure == &L_fallthrough) 9504 jccb(Assembler::notEqual, *L_failure); 9505 else jcc(Assembler::notEqual, *L_failure); 9506 9507 // Success. Cache the super we found and proceed in triumph. 9508 movptr(super_cache_addr, super_klass); 9509 9510 if (L_success != &L_fallthrough) { 9511 jmp(*L_success); 9512 } 9513 9514#undef IS_A_TEMP 9515 9516 bind(L_fallthrough); 9517} 9518 9519 9520void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 9521 if (VM_Version::supports_cmov()) { 9522 cmovl(cc, dst, src); 9523 } else { 9524 Label L; 9525 jccb(negate_condition(cc), L); 9526 movl(dst, src); 9527 bind(L); 9528 } 9529} 9530 9531void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 9532 if (VM_Version::supports_cmov()) { 9533 cmovl(cc, dst, src); 9534 } else { 9535 Label L; 9536 jccb(negate_condition(cc), L); 9537 movl(dst, src); 9538 bind(L); 9539 } 9540} 9541 9542void MacroAssembler::verify_oop(Register reg, const char* s) { 9543 if (!VerifyOops) return; 9544 9545 // Pass register number to verify_oop_subroutine 9546 char* b = new char[strlen(s) + 50]; 9547 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 9548 BLOCK_COMMENT("verify_oop {"); 9549#ifdef _LP64 9550 push(rscratch1); // save r10, trashed by movptr() 9551#endif 9552 push(rax); // save rax, 9553 push(reg); // pass register argument 9554 ExternalAddress buffer((address) b); 9555 // avoid using pushptr, as it modifies scratch registers 9556 // and our contract is not to modify anything 9557 movptr(rax, buffer.addr()); 9558 push(rax); 9559 // call indirectly to solve generation ordering problem 9560 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9561 call(rax); 9562 // Caller pops the arguments (oop, message) and restores rax, r10 9563 BLOCK_COMMENT("} verify_oop"); 9564} 9565 9566 9567RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 9568 Register tmp, 9569 int offset) { 9570 intptr_t value = *delayed_value_addr; 9571 if (value != 0) 9572 return RegisterOrConstant(value + offset); 9573 9574 // load indirectly to solve generation ordering problem 9575 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 9576 9577#ifdef ASSERT 9578 { Label L; 9579 testptr(tmp, tmp); 9580 if (WizardMode) { 9581 jcc(Assembler::notZero, L); 9582 char* buf = new char[40]; 9583 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 9584 STOP(buf); 9585 } else { 9586 jccb(Assembler::notZero, L); 9587 hlt(); 9588 } 9589 bind(L); 9590 } 9591#endif 9592 9593 if (offset != 0) 9594 addptr(tmp, offset); 9595 9596 return RegisterOrConstant(tmp); 9597} 9598 9599 9600Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 9601 int extra_slot_offset) { 9602 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 9603 int stackElementSize = Interpreter::stackElementSize; 9604 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 9605#ifdef ASSERT 9606 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 9607 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 9608#endif 9609 Register scale_reg = noreg; 9610 Address::ScaleFactor scale_factor = Address::no_scale; 9611 if (arg_slot.is_constant()) { 9612 offset += arg_slot.as_constant() * stackElementSize; 9613 } else { 9614 scale_reg = arg_slot.as_register(); 9615 scale_factor = Address::times(stackElementSize); 9616 } 9617 offset += wordSize; // return PC is on stack 9618 return Address(rsp, scale_reg, scale_factor, offset); 9619} 9620 9621 9622void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 9623 if (!VerifyOops) return; 9624 9625 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 9626 // Pass register number to verify_oop_subroutine 9627 char* b = new char[strlen(s) + 50]; 9628 sprintf(b, "verify_oop_addr: %s", s); 9629 9630#ifdef _LP64 9631 push(rscratch1); // save r10, trashed by movptr() 9632#endif 9633 push(rax); // save rax, 9634 // addr may contain rsp so we will have to adjust it based on the push 9635 // we just did (and on 64 bit we do two pushes) 9636 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 9637 // stores rax into addr which is backwards of what was intended. 9638 if (addr.uses(rsp)) { 9639 lea(rax, addr); 9640 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 9641 } else { 9642 pushptr(addr); 9643 } 9644 9645 ExternalAddress buffer((address) b); 9646 // pass msg argument 9647 // avoid using pushptr, as it modifies scratch registers 9648 // and our contract is not to modify anything 9649 movptr(rax, buffer.addr()); 9650 push(rax); 9651 9652 // call indirectly to solve generation ordering problem 9653 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9654 call(rax); 9655 // Caller pops the arguments (addr, message) and restores rax, r10. 9656} 9657 9658void MacroAssembler::verify_tlab() { 9659#ifdef ASSERT 9660 if (UseTLAB && VerifyOops) { 9661 Label next, ok; 9662 Register t1 = rsi; 9663 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 9664 9665 push(t1); 9666 NOT_LP64(push(thread_reg)); 9667 NOT_LP64(get_thread(thread_reg)); 9668 9669 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9670 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 9671 jcc(Assembler::aboveEqual, next); 9672 STOP("assert(top >= start)"); 9673 should_not_reach_here(); 9674 9675 bind(next); 9676 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 9677 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9678 jcc(Assembler::aboveEqual, ok); 9679 STOP("assert(top <= end)"); 9680 should_not_reach_here(); 9681 9682 bind(ok); 9683 NOT_LP64(pop(thread_reg)); 9684 pop(t1); 9685 } 9686#endif 9687} 9688 9689class ControlWord { 9690 public: 9691 int32_t _value; 9692 9693 int rounding_control() const { return (_value >> 10) & 3 ; } 9694 int precision_control() const { return (_value >> 8) & 3 ; } 9695 bool precision() const { return ((_value >> 5) & 1) != 0; } 9696 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9697 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9698 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9699 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9700 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9701 9702 void print() const { 9703 // rounding control 9704 const char* rc; 9705 switch (rounding_control()) { 9706 case 0: rc = "round near"; break; 9707 case 1: rc = "round down"; break; 9708 case 2: rc = "round up "; break; 9709 case 3: rc = "chop "; break; 9710 }; 9711 // precision control 9712 const char* pc; 9713 switch (precision_control()) { 9714 case 0: pc = "24 bits "; break; 9715 case 1: pc = "reserved"; break; 9716 case 2: pc = "53 bits "; break; 9717 case 3: pc = "64 bits "; break; 9718 }; 9719 // flags 9720 char f[9]; 9721 f[0] = ' '; 9722 f[1] = ' '; 9723 f[2] = (precision ()) ? 'P' : 'p'; 9724 f[3] = (underflow ()) ? 'U' : 'u'; 9725 f[4] = (overflow ()) ? 'O' : 'o'; 9726 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9727 f[6] = (denormalized()) ? 'D' : 'd'; 9728 f[7] = (invalid ()) ? 'I' : 'i'; 9729 f[8] = '\x0'; 9730 // output 9731 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9732 } 9733 9734}; 9735 9736class StatusWord { 9737 public: 9738 int32_t _value; 9739 9740 bool busy() const { return ((_value >> 15) & 1) != 0; } 9741 bool C3() const { return ((_value >> 14) & 1) != 0; } 9742 bool C2() const { return ((_value >> 10) & 1) != 0; } 9743 bool C1() const { return ((_value >> 9) & 1) != 0; } 9744 bool C0() const { return ((_value >> 8) & 1) != 0; } 9745 int top() const { return (_value >> 11) & 7 ; } 9746 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9747 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9748 bool precision() const { return ((_value >> 5) & 1) != 0; } 9749 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9750 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9751 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9752 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9753 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9754 9755 void print() const { 9756 // condition codes 9757 char c[5]; 9758 c[0] = (C3()) ? '3' : '-'; 9759 c[1] = (C2()) ? '2' : '-'; 9760 c[2] = (C1()) ? '1' : '-'; 9761 c[3] = (C0()) ? '0' : '-'; 9762 c[4] = '\x0'; 9763 // flags 9764 char f[9]; 9765 f[0] = (error_status()) ? 'E' : '-'; 9766 f[1] = (stack_fault ()) ? 'S' : '-'; 9767 f[2] = (precision ()) ? 'P' : '-'; 9768 f[3] = (underflow ()) ? 'U' : '-'; 9769 f[4] = (overflow ()) ? 'O' : '-'; 9770 f[5] = (zero_divide ()) ? 'Z' : '-'; 9771 f[6] = (denormalized()) ? 'D' : '-'; 9772 f[7] = (invalid ()) ? 'I' : '-'; 9773 f[8] = '\x0'; 9774 // output 9775 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9776 } 9777 9778}; 9779 9780class TagWord { 9781 public: 9782 int32_t _value; 9783 9784 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9785 9786 void print() const { 9787 printf("%04x", _value & 0xFFFF); 9788 } 9789 9790}; 9791 9792class FPU_Register { 9793 public: 9794 int32_t _m0; 9795 int32_t _m1; 9796 int16_t _ex; 9797 9798 bool is_indefinite() const { 9799 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9800 } 9801 9802 void print() const { 9803 char sign = (_ex < 0) ? '-' : '+'; 9804 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9805 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9806 }; 9807 9808}; 9809 9810class FPU_State { 9811 public: 9812 enum { 9813 register_size = 10, 9814 number_of_registers = 8, 9815 register_mask = 7 9816 }; 9817 9818 ControlWord _control_word; 9819 StatusWord _status_word; 9820 TagWord _tag_word; 9821 int32_t _error_offset; 9822 int32_t _error_selector; 9823 int32_t _data_offset; 9824 int32_t _data_selector; 9825 int8_t _register[register_size * number_of_registers]; 9826 9827 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9828 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9829 9830 const char* tag_as_string(int tag) const { 9831 switch (tag) { 9832 case 0: return "valid"; 9833 case 1: return "zero"; 9834 case 2: return "special"; 9835 case 3: return "empty"; 9836 } 9837 ShouldNotReachHere(); 9838 return NULL; 9839 } 9840 9841 void print() const { 9842 // print computation registers 9843 { int t = _status_word.top(); 9844 for (int i = 0; i < number_of_registers; i++) { 9845 int j = (i - t) & register_mask; 9846 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9847 st(j)->print(); 9848 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9849 } 9850 } 9851 printf("\n"); 9852 // print control registers 9853 printf("ctrl = "); _control_word.print(); printf("\n"); 9854 printf("stat = "); _status_word .print(); printf("\n"); 9855 printf("tags = "); _tag_word .print(); printf("\n"); 9856 } 9857 9858}; 9859 9860class Flag_Register { 9861 public: 9862 int32_t _value; 9863 9864 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9865 bool direction() const { return ((_value >> 10) & 1) != 0; } 9866 bool sign() const { return ((_value >> 7) & 1) != 0; } 9867 bool zero() const { return ((_value >> 6) & 1) != 0; } 9868 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9869 bool parity() const { return ((_value >> 2) & 1) != 0; } 9870 bool carry() const { return ((_value >> 0) & 1) != 0; } 9871 9872 void print() const { 9873 // flags 9874 char f[8]; 9875 f[0] = (overflow ()) ? 'O' : '-'; 9876 f[1] = (direction ()) ? 'D' : '-'; 9877 f[2] = (sign ()) ? 'S' : '-'; 9878 f[3] = (zero ()) ? 'Z' : '-'; 9879 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9880 f[5] = (parity ()) ? 'P' : '-'; 9881 f[6] = (carry ()) ? 'C' : '-'; 9882 f[7] = '\x0'; 9883 // output 9884 printf("%08x flags = %s", _value, f); 9885 } 9886 9887}; 9888 9889class IU_Register { 9890 public: 9891 int32_t _value; 9892 9893 void print() const { 9894 printf("%08x %11d", _value, _value); 9895 } 9896 9897}; 9898 9899class IU_State { 9900 public: 9901 Flag_Register _eflags; 9902 IU_Register _rdi; 9903 IU_Register _rsi; 9904 IU_Register _rbp; 9905 IU_Register _rsp; 9906 IU_Register _rbx; 9907 IU_Register _rdx; 9908 IU_Register _rcx; 9909 IU_Register _rax; 9910 9911 void print() const { 9912 // computation registers 9913 printf("rax, = "); _rax.print(); printf("\n"); 9914 printf("rbx, = "); _rbx.print(); printf("\n"); 9915 printf("rcx = "); _rcx.print(); printf("\n"); 9916 printf("rdx = "); _rdx.print(); printf("\n"); 9917 printf("rdi = "); _rdi.print(); printf("\n"); 9918 printf("rsi = "); _rsi.print(); printf("\n"); 9919 printf("rbp, = "); _rbp.print(); printf("\n"); 9920 printf("rsp = "); _rsp.print(); printf("\n"); 9921 printf("\n"); 9922 // control registers 9923 printf("flgs = "); _eflags.print(); printf("\n"); 9924 } 9925}; 9926 9927 9928class CPU_State { 9929 public: 9930 FPU_State _fpu_state; 9931 IU_State _iu_state; 9932 9933 void print() const { 9934 printf("--------------------------------------------------\n"); 9935 _iu_state .print(); 9936 printf("\n"); 9937 _fpu_state.print(); 9938 printf("--------------------------------------------------\n"); 9939 } 9940 9941}; 9942 9943 9944static void _print_CPU_state(CPU_State* state) { 9945 state->print(); 9946}; 9947 9948 9949void MacroAssembler::print_CPU_state() { 9950 push_CPU_state(); 9951 push(rsp); // pass CPU state 9952 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 9953 addptr(rsp, wordSize); // discard argument 9954 pop_CPU_state(); 9955} 9956 9957 9958static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 9959 static int counter = 0; 9960 FPU_State* fs = &state->_fpu_state; 9961 counter++; 9962 // For leaf calls, only verify that the top few elements remain empty. 9963 // We only need 1 empty at the top for C2 code. 9964 if( stack_depth < 0 ) { 9965 if( fs->tag_for_st(7) != 3 ) { 9966 printf("FPR7 not empty\n"); 9967 state->print(); 9968 assert(false, "error"); 9969 return false; 9970 } 9971 return true; // All other stack states do not matter 9972 } 9973 9974 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 9975 "bad FPU control word"); 9976 9977 // compute stack depth 9978 int i = 0; 9979 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 9980 int d = i; 9981 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 9982 // verify findings 9983 if (i != FPU_State::number_of_registers) { 9984 // stack not contiguous 9985 printf("%s: stack not contiguous at ST%d\n", s, i); 9986 state->print(); 9987 assert(false, "error"); 9988 return false; 9989 } 9990 // check if computed stack depth corresponds to expected stack depth 9991 if (stack_depth < 0) { 9992 // expected stack depth is -stack_depth or less 9993 if (d > -stack_depth) { 9994 // too many elements on the stack 9995 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 9996 state->print(); 9997 assert(false, "error"); 9998 return false; 9999 } 10000 } else { 10001 // expected stack depth is stack_depth 10002 if (d != stack_depth) { 10003 // wrong stack depth 10004 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 10005 state->print(); 10006 assert(false, "error"); 10007 return false; 10008 } 10009 } 10010 // everything is cool 10011 return true; 10012} 10013 10014 10015void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 10016 if (!VerifyFPU) return; 10017 push_CPU_state(); 10018 push(rsp); // pass CPU state 10019 ExternalAddress msg((address) s); 10020 // pass message string s 10021 pushptr(msg.addr()); 10022 push(stack_depth); // pass stack depth 10023 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 10024 addptr(rsp, 3 * wordSize); // discard arguments 10025 // check for error 10026 { Label L; 10027 testl(rax, rax); 10028 jcc(Assembler::notZero, L); 10029 int3(); // break if error condition 10030 bind(L); 10031 } 10032 pop_CPU_state(); 10033} 10034 10035void MacroAssembler::load_klass(Register dst, Register src) { 10036#ifdef _LP64 10037 if (UseCompressedKlassPointers) { 10038 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10039 decode_klass_not_null(dst); 10040 } else 10041#endif 10042 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10043} 10044 10045void MacroAssembler::load_prototype_header(Register dst, Register src) { 10046#ifdef _LP64 10047 if (UseCompressedKlassPointers) { 10048 assert (Universe::heap() != NULL, "java heap should be initialized"); 10049 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10050 if (Universe::narrow_klass_shift() != 0) { 10051 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10052 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 10053 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 10054 } else { 10055 movq(dst, Address(dst, Klass::prototype_header_offset())); 10056 } 10057 } else 10058#endif 10059 { 10060 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10061 movptr(dst, Address(dst, Klass::prototype_header_offset())); 10062 } 10063} 10064 10065void MacroAssembler::store_klass(Register dst, Register src) { 10066#ifdef _LP64 10067 if (UseCompressedKlassPointers) { 10068 encode_klass_not_null(src); 10069 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 10070 } else 10071#endif 10072 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 10073} 10074 10075void MacroAssembler::load_heap_oop(Register dst, Address src) { 10076#ifdef _LP64 10077 // FIXME: Must change all places where we try to load the klass. 10078 if (UseCompressedOops) { 10079 movl(dst, src); 10080 decode_heap_oop(dst); 10081 } else 10082#endif 10083 movptr(dst, src); 10084} 10085 10086// Doesn't do verfication, generates fixed size code 10087void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 10088#ifdef _LP64 10089 if (UseCompressedOops) { 10090 movl(dst, src); 10091 decode_heap_oop_not_null(dst); 10092 } else 10093#endif 10094 movptr(dst, src); 10095} 10096 10097void MacroAssembler::store_heap_oop(Address dst, Register src) { 10098#ifdef _LP64 10099 if (UseCompressedOops) { 10100 assert(!dst.uses(src), "not enough registers"); 10101 encode_heap_oop(src); 10102 movl(dst, src); 10103 } else 10104#endif 10105 movptr(dst, src); 10106} 10107 10108void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 10109 assert_different_registers(src1, tmp); 10110#ifdef _LP64 10111 if (UseCompressedOops) { 10112 bool did_push = false; 10113 if (tmp == noreg) { 10114 tmp = rax; 10115 push(tmp); 10116 did_push = true; 10117 assert(!src2.uses(rsp), "can't push"); 10118 } 10119 load_heap_oop(tmp, src2); 10120 cmpptr(src1, tmp); 10121 if (did_push) pop(tmp); 10122 } else 10123#endif 10124 cmpptr(src1, src2); 10125} 10126 10127// Used for storing NULLs. 10128void MacroAssembler::store_heap_oop_null(Address dst) { 10129#ifdef _LP64 10130 if (UseCompressedOops) { 10131 movl(dst, (int32_t)NULL_WORD); 10132 } else { 10133 movslq(dst, (int32_t)NULL_WORD); 10134 } 10135#else 10136 movl(dst, (int32_t)NULL_WORD); 10137#endif 10138} 10139 10140#ifdef _LP64 10141void MacroAssembler::store_klass_gap(Register dst, Register src) { 10142 if (UseCompressedKlassPointers) { 10143 // Store to klass gap in destination 10144 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 10145 } 10146} 10147 10148#ifdef ASSERT 10149void MacroAssembler::verify_heapbase(const char* msg) { 10150 assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); 10151 assert (Universe::heap() != NULL, "java heap should be initialized"); 10152 if (CheckCompressedOops) { 10153 Label ok; 10154 push(rscratch1); // cmpptr trashes rscratch1 10155 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 10156 jcc(Assembler::equal, ok); 10157 STOP(msg); 10158 bind(ok); 10159 pop(rscratch1); 10160 } 10161} 10162#endif 10163 10164// Algorithm must match oop.inline.hpp encode_heap_oop. 10165void MacroAssembler::encode_heap_oop(Register r) { 10166#ifdef ASSERT 10167 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 10168#endif 10169 verify_oop(r, "broken oop in encode_heap_oop"); 10170 if (Universe::narrow_oop_base() == NULL) { 10171 if (Universe::narrow_oop_shift() != 0) { 10172 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10173 shrq(r, LogMinObjAlignmentInBytes); 10174 } 10175 return; 10176 } 10177 testq(r, r); 10178 cmovq(Assembler::equal, r, r12_heapbase); 10179 subq(r, r12_heapbase); 10180 shrq(r, LogMinObjAlignmentInBytes); 10181} 10182 10183void MacroAssembler::encode_heap_oop_not_null(Register r) { 10184#ifdef ASSERT 10185 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 10186 if (CheckCompressedOops) { 10187 Label ok; 10188 testq(r, r); 10189 jcc(Assembler::notEqual, ok); 10190 STOP("null oop passed to encode_heap_oop_not_null"); 10191 bind(ok); 10192 } 10193#endif 10194 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 10195 if (Universe::narrow_oop_base() != NULL) { 10196 subq(r, r12_heapbase); 10197 } 10198 if (Universe::narrow_oop_shift() != 0) { 10199 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10200 shrq(r, LogMinObjAlignmentInBytes); 10201 } 10202} 10203 10204void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 10205#ifdef ASSERT 10206 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 10207 if (CheckCompressedOops) { 10208 Label ok; 10209 testq(src, src); 10210 jcc(Assembler::notEqual, ok); 10211 STOP("null oop passed to encode_heap_oop_not_null2"); 10212 bind(ok); 10213 } 10214#endif 10215 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 10216 if (dst != src) { 10217 movq(dst, src); 10218 } 10219 if (Universe::narrow_oop_base() != NULL) { 10220 subq(dst, r12_heapbase); 10221 } 10222 if (Universe::narrow_oop_shift() != 0) { 10223 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10224 shrq(dst, LogMinObjAlignmentInBytes); 10225 } 10226} 10227 10228void MacroAssembler::decode_heap_oop(Register r) { 10229#ifdef ASSERT 10230 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 10231#endif 10232 if (Universe::narrow_oop_base() == NULL) { 10233 if (Universe::narrow_oop_shift() != 0) { 10234 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10235 shlq(r, LogMinObjAlignmentInBytes); 10236 } 10237 } else { 10238 Label done; 10239 shlq(r, LogMinObjAlignmentInBytes); 10240 jccb(Assembler::equal, done); 10241 addq(r, r12_heapbase); 10242 bind(done); 10243 } 10244 verify_oop(r, "broken oop in decode_heap_oop"); 10245} 10246 10247void MacroAssembler::decode_heap_oop_not_null(Register r) { 10248 // Note: it will change flags 10249 assert (UseCompressedOops, "should only be used for compressed headers"); 10250 assert (Universe::heap() != NULL, "java heap should be initialized"); 10251 // Cannot assert, unverified entry point counts instructions (see .ad file) 10252 // vtableStubs also counts instructions in pd_code_size_limit. 10253 // Also do not verify_oop as this is called by verify_oop. 10254 if (Universe::narrow_oop_shift() != 0) { 10255 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10256 shlq(r, LogMinObjAlignmentInBytes); 10257 if (Universe::narrow_oop_base() != NULL) { 10258 addq(r, r12_heapbase); 10259 } 10260 } else { 10261 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10262 } 10263} 10264 10265void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 10266 // Note: it will change flags 10267 assert (UseCompressedOops, "should only be used for compressed headers"); 10268 assert (Universe::heap() != NULL, "java heap should be initialized"); 10269 // Cannot assert, unverified entry point counts instructions (see .ad file) 10270 // vtableStubs also counts instructions in pd_code_size_limit. 10271 // Also do not verify_oop as this is called by verify_oop. 10272 if (Universe::narrow_oop_shift() != 0) { 10273 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10274 if (LogMinObjAlignmentInBytes == Address::times_8) { 10275 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10276 } else { 10277 if (dst != src) { 10278 movq(dst, src); 10279 } 10280 shlq(dst, LogMinObjAlignmentInBytes); 10281 if (Universe::narrow_oop_base() != NULL) { 10282 addq(dst, r12_heapbase); 10283 } 10284 } 10285 } else { 10286 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10287 if (dst != src) { 10288 movq(dst, src); 10289 } 10290 } 10291} 10292 10293void MacroAssembler::encode_klass_not_null(Register r) { 10294 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10295#ifdef ASSERT 10296 verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); 10297#endif 10298 if (Universe::narrow_klass_base() != NULL) { 10299 subq(r, r12_heapbase); 10300 } 10301 if (Universe::narrow_klass_shift() != 0) { 10302 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10303 shrq(r, LogKlassAlignmentInBytes); 10304 } 10305} 10306 10307void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 10308 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10309#ifdef ASSERT 10310 verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); 10311#endif 10312 if (dst != src) { 10313 movq(dst, src); 10314 } 10315 if (Universe::narrow_klass_base() != NULL) { 10316 subq(dst, r12_heapbase); 10317 } 10318 if (Universe::narrow_klass_shift() != 0) { 10319 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10320 shrq(dst, LogKlassAlignmentInBytes); 10321 } 10322} 10323 10324void MacroAssembler::decode_klass_not_null(Register r) { 10325 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10326 // Note: it will change flags 10327 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10328 // Cannot assert, unverified entry point counts instructions (see .ad file) 10329 // vtableStubs also counts instructions in pd_code_size_limit. 10330 // Also do not verify_oop as this is called by verify_oop. 10331 if (Universe::narrow_klass_shift() != 0) { 10332 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10333 shlq(r, LogKlassAlignmentInBytes); 10334 if (Universe::narrow_klass_base() != NULL) { 10335 addq(r, r12_heapbase); 10336 } 10337 } else { 10338 assert (Universe::narrow_klass_base() == NULL, "sanity"); 10339 } 10340} 10341 10342void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 10343 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10344 // Note: it will change flags 10345 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10346 // Cannot assert, unverified entry point counts instructions (see .ad file) 10347 // vtableStubs also counts instructions in pd_code_size_limit. 10348 // Also do not verify_oop as this is called by verify_oop. 10349 if (Universe::narrow_klass_shift() != 0) { 10350 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10351 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 10352 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10353 } else { 10354 assert (Universe::narrow_klass_base() == NULL, "sanity"); 10355 if (dst != src) { 10356 movq(dst, src); 10357 } 10358 } 10359} 10360 10361void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 10362 assert (UseCompressedOops, "should only be used for compressed headers"); 10363 assert (Universe::heap() != NULL, "java heap should be initialized"); 10364 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10365 int oop_index = oop_recorder()->find_index(obj); 10366 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10367 mov_narrow_oop(dst, oop_index, rspec); 10368} 10369 10370void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 10371 assert (UseCompressedOops, "should only be used for compressed headers"); 10372 assert (Universe::heap() != NULL, "java heap should be initialized"); 10373 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10374 int oop_index = oop_recorder()->find_index(obj); 10375 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10376 mov_narrow_oop(dst, oop_index, rspec); 10377} 10378 10379void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 10380 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10381 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10382 int klass_index = oop_recorder()->find_index(k); 10383 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10384 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10385} 10386 10387void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { 10388 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10389 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10390 int klass_index = oop_recorder()->find_index(k); 10391 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10392 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10393} 10394 10395void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 10396 assert (UseCompressedOops, "should only be used for compressed headers"); 10397 assert (Universe::heap() != NULL, "java heap should be initialized"); 10398 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10399 int oop_index = oop_recorder()->find_index(obj); 10400 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10401 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10402} 10403 10404void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 10405 assert (UseCompressedOops, "should only be used for compressed headers"); 10406 assert (Universe::heap() != NULL, "java heap should be initialized"); 10407 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10408 int oop_index = oop_recorder()->find_index(obj); 10409 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10410 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10411} 10412 10413void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { 10414 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10415 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10416 int klass_index = oop_recorder()->find_index(k); 10417 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10418 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10419} 10420 10421void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { 10422 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10423 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10424 int klass_index = oop_recorder()->find_index(k); 10425 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10426 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10427} 10428 10429void MacroAssembler::reinit_heapbase() { 10430 if (UseCompressedOops || UseCompressedKlassPointers) { 10431 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 10432 } 10433} 10434#endif // _LP64 10435 10436 10437// C2 compiled method's prolog code. 10438void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 10439 10440 // WARNING: Initial instruction MUST be 5 bytes or longer so that 10441 // NativeJump::patch_verified_entry will be able to patch out the entry 10442 // code safely. The push to verify stack depth is ok at 5 bytes, 10443 // the frame allocation can be either 3 or 6 bytes. So if we don't do 10444 // stack bang then we must use the 6 byte frame allocation even if 10445 // we have no frame. :-( 10446 10447 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 10448 // Remove word for return addr 10449 framesize -= wordSize; 10450 10451 // Calls to C2R adapters often do not accept exceptional returns. 10452 // We require that their callers must bang for them. But be careful, because 10453 // some VM calls (such as call site linkage) can use several kilobytes of 10454 // stack. But the stack safety zone should account for that. 10455 // See bugs 4446381, 4468289, 4497237. 10456 if (stack_bang) { 10457 generate_stack_overflow_check(framesize); 10458 10459 // We always push rbp, so that on return to interpreter rbp, will be 10460 // restored correctly and we can correct the stack. 10461 push(rbp); 10462 // Remove word for ebp 10463 framesize -= wordSize; 10464 10465 // Create frame 10466 if (framesize) { 10467 subptr(rsp, framesize); 10468 } 10469 } else { 10470 // Create frame (force generation of a 4 byte immediate value) 10471 subptr_imm32(rsp, framesize); 10472 10473 // Save RBP register now. 10474 framesize -= wordSize; 10475 movptr(Address(rsp, framesize), rbp); 10476 } 10477 10478 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 10479 framesize -= wordSize; 10480 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 10481 } 10482 10483#ifndef _LP64 10484 // If method sets FPU control word do it now 10485 if (fp_mode_24b) { 10486 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10487 } 10488 if (UseSSE >= 2 && VerifyFPU) { 10489 verify_FPU(0, "FPU stack must be clean on entry"); 10490 } 10491#endif 10492 10493#ifdef ASSERT 10494 if (VerifyStackAtCalls) { 10495 Label L; 10496 push(rax); 10497 mov(rax, rsp); 10498 andptr(rax, StackAlignmentInBytes-1); 10499 cmpptr(rax, StackAlignmentInBytes-wordSize); 10500 pop(rax); 10501 jcc(Assembler::equal, L); 10502 STOP("Stack is not properly aligned!"); 10503 bind(L); 10504 } 10505#endif 10506 10507} 10508 10509 10510// IndexOf for constant substrings with size >= 8 chars 10511// which don't need to be loaded through stack. 10512void MacroAssembler::string_indexofC8(Register str1, Register str2, 10513 Register cnt1, Register cnt2, 10514 int int_cnt2, Register result, 10515 XMMRegister vec, Register tmp) { 10516 ShortBranchVerifier sbv(this); 10517 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10518 10519 // This method uses pcmpestri inxtruction with bound registers 10520 // inputs: 10521 // xmm - substring 10522 // rax - substring length (elements count) 10523 // mem - scanned string 10524 // rdx - string length (elements count) 10525 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10526 // outputs: 10527 // rcx - matched index in string 10528 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10529 10530 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 10531 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 10532 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 10533 10534 // Note, inline_string_indexOf() generates checks: 10535 // if (substr.count > string.count) return -1; 10536 // if (substr.count == 0) return 0; 10537 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 10538 10539 // Load substring. 10540 movdqu(vec, Address(str2, 0)); 10541 movl(cnt2, int_cnt2); 10542 movptr(result, str1); // string addr 10543 10544 if (int_cnt2 > 8) { 10545 jmpb(SCAN_TO_SUBSTR); 10546 10547 // Reload substr for rescan, this code 10548 // is executed only for large substrings (> 8 chars) 10549 bind(RELOAD_SUBSTR); 10550 movdqu(vec, Address(str2, 0)); 10551 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 10552 10553 bind(RELOAD_STR); 10554 // We came here after the beginning of the substring was 10555 // matched but the rest of it was not so we need to search 10556 // again. Start from the next element after the previous match. 10557 10558 // cnt2 is number of substring reminding elements and 10559 // cnt1 is number of string reminding elements when cmp failed. 10560 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 10561 subl(cnt1, cnt2); 10562 addl(cnt1, int_cnt2); 10563 movl(cnt2, int_cnt2); // Now restore cnt2 10564 10565 decrementl(cnt1); // Shift to next element 10566 cmpl(cnt1, cnt2); 10567 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10568 10569 addptr(result, 2); 10570 10571 } // (int_cnt2 > 8) 10572 10573 // Scan string for start of substr in 16-byte vectors 10574 bind(SCAN_TO_SUBSTR); 10575 pcmpestri(vec, Address(result, 0), 0x0d); 10576 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10577 subl(cnt1, 8); 10578 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10579 cmpl(cnt1, cnt2); 10580 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10581 addptr(result, 16); 10582 jmpb(SCAN_TO_SUBSTR); 10583 10584 // Found a potential substr 10585 bind(FOUND_CANDIDATE); 10586 // Matched whole vector if first element matched (tmp(rcx) == 0). 10587 if (int_cnt2 == 8) { 10588 jccb(Assembler::overflow, RET_FOUND); // OF == 1 10589 } else { // int_cnt2 > 8 10590 jccb(Assembler::overflow, FOUND_SUBSTR); 10591 } 10592 // After pcmpestri tmp(rcx) contains matched element index 10593 // Compute start addr of substr 10594 lea(result, Address(result, tmp, Address::times_2)); 10595 10596 // Make sure string is still long enough 10597 subl(cnt1, tmp); 10598 cmpl(cnt1, cnt2); 10599 if (int_cnt2 == 8) { 10600 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10601 } else { // int_cnt2 > 8 10602 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 10603 } 10604 // Left less then substring. 10605 10606 bind(RET_NOT_FOUND); 10607 movl(result, -1); 10608 jmpb(EXIT); 10609 10610 if (int_cnt2 > 8) { 10611 // This code is optimized for the case when whole substring 10612 // is matched if its head is matched. 10613 bind(MATCH_SUBSTR_HEAD); 10614 pcmpestri(vec, Address(result, 0), 0x0d); 10615 // Reload only string if does not match 10616 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 10617 10618 Label CONT_SCAN_SUBSTR; 10619 // Compare the rest of substring (> 8 chars). 10620 bind(FOUND_SUBSTR); 10621 // First 8 chars are already matched. 10622 negptr(cnt2); 10623 addptr(cnt2, 8); 10624 10625 bind(SCAN_SUBSTR); 10626 subl(cnt1, 8); 10627 cmpl(cnt2, -8); // Do not read beyond substring 10628 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 10629 // Back-up strings to avoid reading beyond substring: 10630 // cnt1 = cnt1 - cnt2 + 8 10631 addl(cnt1, cnt2); // cnt2 is negative 10632 addl(cnt1, 8); 10633 movl(cnt2, 8); negptr(cnt2); 10634 bind(CONT_SCAN_SUBSTR); 10635 if (int_cnt2 < (int)G) { 10636 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 10637 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 10638 } else { 10639 // calculate index in register to avoid integer overflow (int_cnt2*2) 10640 movl(tmp, int_cnt2); 10641 addptr(tmp, cnt2); 10642 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 10643 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 10644 } 10645 // Need to reload strings pointers if not matched whole vector 10646 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10647 addptr(cnt2, 8); 10648 jcc(Assembler::negative, SCAN_SUBSTR); 10649 // Fall through if found full substring 10650 10651 } // (int_cnt2 > 8) 10652 10653 bind(RET_FOUND); 10654 // Found result if we matched full small substring. 10655 // Compute substr offset 10656 subptr(result, str1); 10657 shrl(result, 1); // index 10658 bind(EXIT); 10659 10660} // string_indexofC8 10661 10662// Small strings are loaded through stack if they cross page boundary. 10663void MacroAssembler::string_indexof(Register str1, Register str2, 10664 Register cnt1, Register cnt2, 10665 int int_cnt2, Register result, 10666 XMMRegister vec, Register tmp) { 10667 ShortBranchVerifier sbv(this); 10668 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10669 // 10670 // int_cnt2 is length of small (< 8 chars) constant substring 10671 // or (-1) for non constant substring in which case its length 10672 // is in cnt2 register. 10673 // 10674 // Note, inline_string_indexOf() generates checks: 10675 // if (substr.count > string.count) return -1; 10676 // if (substr.count == 0) return 0; 10677 // 10678 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 10679 10680 // This method uses pcmpestri inxtruction with bound registers 10681 // inputs: 10682 // xmm - substring 10683 // rax - substring length (elements count) 10684 // mem - scanned string 10685 // rdx - string length (elements count) 10686 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10687 // outputs: 10688 // rcx - matched index in string 10689 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10690 10691 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 10692 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 10693 FOUND_CANDIDATE; 10694 10695 { //======================================================== 10696 // We don't know where these strings are located 10697 // and we can't read beyond them. Load them through stack. 10698 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 10699 10700 movptr(tmp, rsp); // save old SP 10701 10702 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 10703 if (int_cnt2 == 1) { // One char 10704 load_unsigned_short(result, Address(str2, 0)); 10705 movdl(vec, result); // move 32 bits 10706 } else if (int_cnt2 == 2) { // Two chars 10707 movdl(vec, Address(str2, 0)); // move 32 bits 10708 } else if (int_cnt2 == 4) { // Four chars 10709 movq(vec, Address(str2, 0)); // move 64 bits 10710 } else { // cnt2 = { 3, 5, 6, 7 } 10711 // Array header size is 12 bytes in 32-bit VM 10712 // + 6 bytes for 3 chars == 18 bytes, 10713 // enough space to load vec and shift. 10714 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 10715 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 10716 psrldq(vec, 16-(int_cnt2*2)); 10717 } 10718 } else { // not constant substring 10719 cmpl(cnt2, 8); 10720 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 10721 10722 // We can read beyond string if srt+16 does not cross page boundary 10723 // since heaps are aligned and mapped by pages. 10724 assert(os::vm_page_size() < (int)G, "default page should be small"); 10725 movl(result, str2); // We need only low 32 bits 10726 andl(result, (os::vm_page_size()-1)); 10727 cmpl(result, (os::vm_page_size()-16)); 10728 jccb(Assembler::belowEqual, CHECK_STR); 10729 10730 // Move small strings to stack to allow load 16 bytes into vec. 10731 subptr(rsp, 16); 10732 int stk_offset = wordSize-2; 10733 push(cnt2); 10734 10735 bind(COPY_SUBSTR); 10736 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 10737 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10738 decrement(cnt2); 10739 jccb(Assembler::notZero, COPY_SUBSTR); 10740 10741 pop(cnt2); 10742 movptr(str2, rsp); // New substring address 10743 } // non constant 10744 10745 bind(CHECK_STR); 10746 cmpl(cnt1, 8); 10747 jccb(Assembler::aboveEqual, BIG_STRINGS); 10748 10749 // Check cross page boundary. 10750 movl(result, str1); // We need only low 32 bits 10751 andl(result, (os::vm_page_size()-1)); 10752 cmpl(result, (os::vm_page_size()-16)); 10753 jccb(Assembler::belowEqual, BIG_STRINGS); 10754 10755 subptr(rsp, 16); 10756 int stk_offset = -2; 10757 if (int_cnt2 < 0) { // not constant 10758 push(cnt2); 10759 stk_offset += wordSize; 10760 } 10761 movl(cnt2, cnt1); 10762 10763 bind(COPY_STR); 10764 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 10765 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10766 decrement(cnt2); 10767 jccb(Assembler::notZero, COPY_STR); 10768 10769 if (int_cnt2 < 0) { // not constant 10770 pop(cnt2); 10771 } 10772 movptr(str1, rsp); // New string address 10773 10774 bind(BIG_STRINGS); 10775 // Load substring. 10776 if (int_cnt2 < 0) { // -1 10777 movdqu(vec, Address(str2, 0)); 10778 push(cnt2); // substr count 10779 push(str2); // substr addr 10780 push(str1); // string addr 10781 } else { 10782 // Small (< 8 chars) constant substrings are loaded already. 10783 movl(cnt2, int_cnt2); 10784 } 10785 push(tmp); // original SP 10786 10787 } // Finished loading 10788 10789 //======================================================== 10790 // Start search 10791 // 10792 10793 movptr(result, str1); // string addr 10794 10795 if (int_cnt2 < 0) { // Only for non constant substring 10796 jmpb(SCAN_TO_SUBSTR); 10797 10798 // SP saved at sp+0 10799 // String saved at sp+1*wordSize 10800 // Substr saved at sp+2*wordSize 10801 // Substr count saved at sp+3*wordSize 10802 10803 // Reload substr for rescan, this code 10804 // is executed only for large substrings (> 8 chars) 10805 bind(RELOAD_SUBSTR); 10806 movptr(str2, Address(rsp, 2*wordSize)); 10807 movl(cnt2, Address(rsp, 3*wordSize)); 10808 movdqu(vec, Address(str2, 0)); 10809 // We came here after the beginning of the substring was 10810 // matched but the rest of it was not so we need to search 10811 // again. Start from the next element after the previous match. 10812 subptr(str1, result); // Restore counter 10813 shrl(str1, 1); 10814 addl(cnt1, str1); 10815 decrementl(cnt1); // Shift to next element 10816 cmpl(cnt1, cnt2); 10817 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10818 10819 addptr(result, 2); 10820 } // non constant 10821 10822 // Scan string for start of substr in 16-byte vectors 10823 bind(SCAN_TO_SUBSTR); 10824 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10825 pcmpestri(vec, Address(result, 0), 0x0d); 10826 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10827 subl(cnt1, 8); 10828 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10829 cmpl(cnt1, cnt2); 10830 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10831 addptr(result, 16); 10832 10833 bind(ADJUST_STR); 10834 cmpl(cnt1, 8); // Do not read beyond string 10835 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10836 // Back-up string to avoid reading beyond string. 10837 lea(result, Address(result, cnt1, Address::times_2, -16)); 10838 movl(cnt1, 8); 10839 jmpb(SCAN_TO_SUBSTR); 10840 10841 // Found a potential substr 10842 bind(FOUND_CANDIDATE); 10843 // After pcmpestri tmp(rcx) contains matched element index 10844 10845 // Make sure string is still long enough 10846 subl(cnt1, tmp); 10847 cmpl(cnt1, cnt2); 10848 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10849 // Left less then substring. 10850 10851 bind(RET_NOT_FOUND); 10852 movl(result, -1); 10853 jmpb(CLEANUP); 10854 10855 bind(FOUND_SUBSTR); 10856 // Compute start addr of substr 10857 lea(result, Address(result, tmp, Address::times_2)); 10858 10859 if (int_cnt2 > 0) { // Constant substring 10860 // Repeat search for small substring (< 8 chars) 10861 // from new point without reloading substring. 10862 // Have to check that we don't read beyond string. 10863 cmpl(tmp, 8-int_cnt2); 10864 jccb(Assembler::greater, ADJUST_STR); 10865 // Fall through if matched whole substring. 10866 } else { // non constant 10867 assert(int_cnt2 == -1, "should be != 0"); 10868 10869 addl(tmp, cnt2); 10870 // Found result if we matched whole substring. 10871 cmpl(tmp, 8); 10872 jccb(Assembler::lessEqual, RET_FOUND); 10873 10874 // Repeat search for small substring (<= 8 chars) 10875 // from new point 'str1' without reloading substring. 10876 cmpl(cnt2, 8); 10877 // Have to check that we don't read beyond string. 10878 jccb(Assembler::lessEqual, ADJUST_STR); 10879 10880 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10881 // Compare the rest of substring (> 8 chars). 10882 movptr(str1, result); 10883 10884 cmpl(tmp, cnt2); 10885 // First 8 chars are already matched. 10886 jccb(Assembler::equal, CHECK_NEXT); 10887 10888 bind(SCAN_SUBSTR); 10889 pcmpestri(vec, Address(str1, 0), 0x0d); 10890 // Need to reload strings pointers if not matched whole vector 10891 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10892 10893 bind(CHECK_NEXT); 10894 subl(cnt2, 8); 10895 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10896 addptr(str1, 16); 10897 addptr(str2, 16); 10898 subl(cnt1, 8); 10899 cmpl(cnt2, 8); // Do not read beyond substring 10900 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10901 // Back-up strings to avoid reading beyond substring. 10902 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10903 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10904 subl(cnt1, cnt2); 10905 movl(cnt2, 8); 10906 addl(cnt1, 8); 10907 bind(CONT_SCAN_SUBSTR); 10908 movdqu(vec, Address(str2, 0)); 10909 jmpb(SCAN_SUBSTR); 10910 10911 bind(RET_FOUND_LONG); 10912 movptr(str1, Address(rsp, wordSize)); 10913 } // non constant 10914 10915 bind(RET_FOUND); 10916 // Compute substr offset 10917 subptr(result, str1); 10918 shrl(result, 1); // index 10919 10920 bind(CLEANUP); 10921 pop(rsp); // restore SP 10922 10923} // string_indexof 10924 10925// Compare strings. 10926void MacroAssembler::string_compare(Register str1, Register str2, 10927 Register cnt1, Register cnt2, Register result, 10928 XMMRegister vec1) { 10929 ShortBranchVerifier sbv(this); 10930 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 10931 10932 // Compute the minimum of the string lengths and the 10933 // difference of the string lengths (stack). 10934 // Do the conditional move stuff 10935 movl(result, cnt1); 10936 subl(cnt1, cnt2); 10937 push(cnt1); 10938 cmov32(Assembler::lessEqual, cnt2, result); 10939 10940 // Is the minimum length zero? 10941 testl(cnt2, cnt2); 10942 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10943 10944 // Load first characters 10945 load_unsigned_short(result, Address(str1, 0)); 10946 load_unsigned_short(cnt1, Address(str2, 0)); 10947 10948 // Compare first characters 10949 subl(result, cnt1); 10950 jcc(Assembler::notZero, POP_LABEL); 10951 decrementl(cnt2); 10952 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10953 10954 { 10955 // Check after comparing first character to see if strings are equivalent 10956 Label LSkip2; 10957 // Check if the strings start at same location 10958 cmpptr(str1, str2); 10959 jccb(Assembler::notEqual, LSkip2); 10960 10961 // Check if the length difference is zero (from stack) 10962 cmpl(Address(rsp, 0), 0x0); 10963 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 10964 10965 // Strings might not be equivalent 10966 bind(LSkip2); 10967 } 10968 10969 Address::ScaleFactor scale = Address::times_2; 10970 int stride = 8; 10971 10972 // Advance to next element 10973 addptr(str1, 16/stride); 10974 addptr(str2, 16/stride); 10975 10976 if (UseSSE42Intrinsics) { 10977 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 10978 int pcmpmask = 0x19; 10979 // Setup to compare 16-byte vectors 10980 movl(result, cnt2); 10981 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 10982 jccb(Assembler::zero, COMPARE_TAIL); 10983 10984 lea(str1, Address(str1, result, scale)); 10985 lea(str2, Address(str2, result, scale)); 10986 negptr(result); 10987 10988 // pcmpestri 10989 // inputs: 10990 // vec1- substring 10991 // rax - negative string length (elements count) 10992 // mem - scaned string 10993 // rdx - string length (elements count) 10994 // pcmpmask - cmp mode: 11000 (string compare with negated result) 10995 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 10996 // outputs: 10997 // rcx - first mismatched element index 10998 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 10999 11000 bind(COMPARE_WIDE_VECTORS); 11001 movdqu(vec1, Address(str1, result, scale)); 11002 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 11003 // After pcmpestri cnt1(rcx) contains mismatched element index 11004 11005 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 11006 addptr(result, stride); 11007 subptr(cnt2, stride); 11008 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 11009 11010 // compare wide vectors tail 11011 testl(result, result); 11012 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 11013 11014 movl(cnt2, stride); 11015 movl(result, stride); 11016 negptr(result); 11017 movdqu(vec1, Address(str1, result, scale)); 11018 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 11019 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 11020 11021 // Mismatched characters in the vectors 11022 bind(VECTOR_NOT_EQUAL); 11023 addptr(result, cnt1); 11024 movptr(cnt2, result); 11025 load_unsigned_short(result, Address(str1, cnt2, scale)); 11026 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 11027 subl(result, cnt1); 11028 jmpb(POP_LABEL); 11029 11030 bind(COMPARE_TAIL); // limit is zero 11031 movl(cnt2, result); 11032 // Fallthru to tail compare 11033 } 11034 11035 // Shift str2 and str1 to the end of the arrays, negate min 11036 lea(str1, Address(str1, cnt2, scale, 0)); 11037 lea(str2, Address(str2, cnt2, scale, 0)); 11038 negptr(cnt2); 11039 11040 // Compare the rest of the elements 11041 bind(WHILE_HEAD_LABEL); 11042 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 11043 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 11044 subl(result, cnt1); 11045 jccb(Assembler::notZero, POP_LABEL); 11046 increment(cnt2); 11047 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 11048 11049 // Strings are equal up to min length. Return the length difference. 11050 bind(LENGTH_DIFF_LABEL); 11051 pop(result); 11052 jmpb(DONE_LABEL); 11053 11054 // Discard the stored length difference 11055 bind(POP_LABEL); 11056 pop(cnt1); 11057 11058 // That's it 11059 bind(DONE_LABEL); 11060} 11061 11062// Compare char[] arrays aligned to 4 bytes or substrings. 11063void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 11064 Register limit, Register result, Register chr, 11065 XMMRegister vec1, XMMRegister vec2) { 11066 ShortBranchVerifier sbv(this); 11067 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 11068 11069 int length_offset = arrayOopDesc::length_offset_in_bytes(); 11070 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 11071 11072 // Check the input args 11073 cmpptr(ary1, ary2); 11074 jcc(Assembler::equal, TRUE_LABEL); 11075 11076 if (is_array_equ) { 11077 // Need additional checks for arrays_equals. 11078 testptr(ary1, ary1); 11079 jcc(Assembler::zero, FALSE_LABEL); 11080 testptr(ary2, ary2); 11081 jcc(Assembler::zero, FALSE_LABEL); 11082 11083 // Check the lengths 11084 movl(limit, Address(ary1, length_offset)); 11085 cmpl(limit, Address(ary2, length_offset)); 11086 jcc(Assembler::notEqual, FALSE_LABEL); 11087 } 11088 11089 // count == 0 11090 testl(limit, limit); 11091 jcc(Assembler::zero, TRUE_LABEL); 11092 11093 if (is_array_equ) { 11094 // Load array address 11095 lea(ary1, Address(ary1, base_offset)); 11096 lea(ary2, Address(ary2, base_offset)); 11097 } 11098 11099 shll(limit, 1); // byte count != 0 11100 movl(result, limit); // copy 11101 11102 if (UseSSE42Intrinsics) { 11103 // With SSE4.2, use double quad vector compare 11104 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 11105 11106 // Compare 16-byte vectors 11107 andl(result, 0x0000000e); // tail count (in bytes) 11108 andl(limit, 0xfffffff0); // vector count (in bytes) 11109 jccb(Assembler::zero, COMPARE_TAIL); 11110 11111 lea(ary1, Address(ary1, limit, Address::times_1)); 11112 lea(ary2, Address(ary2, limit, Address::times_1)); 11113 negptr(limit); 11114 11115 bind(COMPARE_WIDE_VECTORS); 11116 movdqu(vec1, Address(ary1, limit, Address::times_1)); 11117 movdqu(vec2, Address(ary2, limit, Address::times_1)); 11118 pxor(vec1, vec2); 11119 11120 ptest(vec1, vec1); 11121 jccb(Assembler::notZero, FALSE_LABEL); 11122 addptr(limit, 16); 11123 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 11124 11125 testl(result, result); 11126 jccb(Assembler::zero, TRUE_LABEL); 11127 11128 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 11129 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 11130 pxor(vec1, vec2); 11131 11132 ptest(vec1, vec1); 11133 jccb(Assembler::notZero, FALSE_LABEL); 11134 jmpb(TRUE_LABEL); 11135 11136 bind(COMPARE_TAIL); // limit is zero 11137 movl(limit, result); 11138 // Fallthru to tail compare 11139 } 11140 11141 // Compare 4-byte vectors 11142 andl(limit, 0xfffffffc); // vector count (in bytes) 11143 jccb(Assembler::zero, COMPARE_CHAR); 11144 11145 lea(ary1, Address(ary1, limit, Address::times_1)); 11146 lea(ary2, Address(ary2, limit, Address::times_1)); 11147 negptr(limit); 11148 11149 bind(COMPARE_VECTORS); 11150 movl(chr, Address(ary1, limit, Address::times_1)); 11151 cmpl(chr, Address(ary2, limit, Address::times_1)); 11152 jccb(Assembler::notEqual, FALSE_LABEL); 11153 addptr(limit, 4); 11154 jcc(Assembler::notZero, COMPARE_VECTORS); 11155 11156 // Compare trailing char (final 2 bytes), if any 11157 bind(COMPARE_CHAR); 11158 testl(result, 0x2); // tail char 11159 jccb(Assembler::zero, TRUE_LABEL); 11160 load_unsigned_short(chr, Address(ary1, 0)); 11161 load_unsigned_short(limit, Address(ary2, 0)); 11162 cmpl(chr, limit); 11163 jccb(Assembler::notEqual, FALSE_LABEL); 11164 11165 bind(TRUE_LABEL); 11166 movl(result, 1); // return true 11167 jmpb(DONE); 11168 11169 bind(FALSE_LABEL); 11170 xorl(result, result); // return false 11171 11172 // That's it 11173 bind(DONE); 11174} 11175 11176void MacroAssembler::generate_fill(BasicType t, bool aligned, 11177 Register to, Register value, Register count, 11178 Register rtmp, XMMRegister xtmp) { 11179 ShortBranchVerifier sbv(this); 11180 assert_different_registers(to, value, count, rtmp); 11181 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 11182 Label L_fill_2_bytes, L_fill_4_bytes; 11183 11184 int shift = -1; 11185 switch (t) { 11186 case T_BYTE: 11187 shift = 2; 11188 break; 11189 case T_SHORT: 11190 shift = 1; 11191 break; 11192 case T_INT: 11193 shift = 0; 11194 break; 11195 default: ShouldNotReachHere(); 11196 } 11197 11198 if (t == T_BYTE) { 11199 andl(value, 0xff); 11200 movl(rtmp, value); 11201 shll(rtmp, 8); 11202 orl(value, rtmp); 11203 } 11204 if (t == T_SHORT) { 11205 andl(value, 0xffff); 11206 } 11207 if (t == T_BYTE || t == T_SHORT) { 11208 movl(rtmp, value); 11209 shll(rtmp, 16); 11210 orl(value, rtmp); 11211 } 11212 11213 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 11214 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 11215 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 11216 // align source address at 4 bytes address boundary 11217 if (t == T_BYTE) { 11218 // One byte misalignment happens only for byte arrays 11219 testptr(to, 1); 11220 jccb(Assembler::zero, L_skip_align1); 11221 movb(Address(to, 0), value); 11222 increment(to); 11223 decrement(count); 11224 BIND(L_skip_align1); 11225 } 11226 // Two bytes misalignment happens only for byte and short (char) arrays 11227 testptr(to, 2); 11228 jccb(Assembler::zero, L_skip_align2); 11229 movw(Address(to, 0), value); 11230 addptr(to, 2); 11231 subl(count, 1<<(shift-1)); 11232 BIND(L_skip_align2); 11233 } 11234 if (UseSSE < 2) { 11235 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11236 // Fill 32-byte chunks 11237 subl(count, 8 << shift); 11238 jcc(Assembler::less, L_check_fill_8_bytes); 11239 align(16); 11240 11241 BIND(L_fill_32_bytes_loop); 11242 11243 for (int i = 0; i < 32; i += 4) { 11244 movl(Address(to, i), value); 11245 } 11246 11247 addptr(to, 32); 11248 subl(count, 8 << shift); 11249 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11250 BIND(L_check_fill_8_bytes); 11251 addl(count, 8 << shift); 11252 jccb(Assembler::zero, L_exit); 11253 jmpb(L_fill_8_bytes); 11254 11255 // 11256 // length is too short, just fill qwords 11257 // 11258 BIND(L_fill_8_bytes_loop); 11259 movl(Address(to, 0), value); 11260 movl(Address(to, 4), value); 11261 addptr(to, 8); 11262 BIND(L_fill_8_bytes); 11263 subl(count, 1 << (shift + 1)); 11264 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11265 // fall through to fill 4 bytes 11266 } else { 11267 Label L_fill_32_bytes; 11268 if (!UseUnalignedLoadStores) { 11269 // align to 8 bytes, we know we are 4 byte aligned to start 11270 testptr(to, 4); 11271 jccb(Assembler::zero, L_fill_32_bytes); 11272 movl(Address(to, 0), value); 11273 addptr(to, 4); 11274 subl(count, 1<<shift); 11275 } 11276 BIND(L_fill_32_bytes); 11277 { 11278 assert( UseSSE >= 2, "supported cpu only" ); 11279 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11280 // Fill 32-byte chunks 11281 movdl(xtmp, value); 11282 pshufd(xtmp, xtmp, 0); 11283 11284 subl(count, 8 << shift); 11285 jcc(Assembler::less, L_check_fill_8_bytes); 11286 align(16); 11287 11288 BIND(L_fill_32_bytes_loop); 11289 11290 if (UseUnalignedLoadStores) { 11291 movdqu(Address(to, 0), xtmp); 11292 movdqu(Address(to, 16), xtmp); 11293 } else { 11294 movq(Address(to, 0), xtmp); 11295 movq(Address(to, 8), xtmp); 11296 movq(Address(to, 16), xtmp); 11297 movq(Address(to, 24), xtmp); 11298 } 11299 11300 addptr(to, 32); 11301 subl(count, 8 << shift); 11302 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11303 BIND(L_check_fill_8_bytes); 11304 addl(count, 8 << shift); 11305 jccb(Assembler::zero, L_exit); 11306 jmpb(L_fill_8_bytes); 11307 11308 // 11309 // length is too short, just fill qwords 11310 // 11311 BIND(L_fill_8_bytes_loop); 11312 movq(Address(to, 0), xtmp); 11313 addptr(to, 8); 11314 BIND(L_fill_8_bytes); 11315 subl(count, 1 << (shift + 1)); 11316 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11317 } 11318 } 11319 // fill trailing 4 bytes 11320 BIND(L_fill_4_bytes); 11321 testl(count, 1<<shift); 11322 jccb(Assembler::zero, L_fill_2_bytes); 11323 movl(Address(to, 0), value); 11324 if (t == T_BYTE || t == T_SHORT) { 11325 addptr(to, 4); 11326 BIND(L_fill_2_bytes); 11327 // fill trailing 2 bytes 11328 testl(count, 1<<(shift-1)); 11329 jccb(Assembler::zero, L_fill_byte); 11330 movw(Address(to, 0), value); 11331 if (t == T_BYTE) { 11332 addptr(to, 2); 11333 BIND(L_fill_byte); 11334 // fill trailing byte 11335 testl(count, 1); 11336 jccb(Assembler::zero, L_exit); 11337 movb(Address(to, 0), value); 11338 } else { 11339 BIND(L_fill_byte); 11340 } 11341 } else { 11342 BIND(L_fill_2_bytes); 11343 } 11344 BIND(L_exit); 11345} 11346#undef BIND 11347#undef BLOCK_COMMENT 11348 11349 11350Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 11351 switch (cond) { 11352 // Note some conditions are synonyms for others 11353 case Assembler::zero: return Assembler::notZero; 11354 case Assembler::notZero: return Assembler::zero; 11355 case Assembler::less: return Assembler::greaterEqual; 11356 case Assembler::lessEqual: return Assembler::greater; 11357 case Assembler::greater: return Assembler::lessEqual; 11358 case Assembler::greaterEqual: return Assembler::less; 11359 case Assembler::below: return Assembler::aboveEqual; 11360 case Assembler::belowEqual: return Assembler::above; 11361 case Assembler::above: return Assembler::belowEqual; 11362 case Assembler::aboveEqual: return Assembler::below; 11363 case Assembler::overflow: return Assembler::noOverflow; 11364 case Assembler::noOverflow: return Assembler::overflow; 11365 case Assembler::negative: return Assembler::positive; 11366 case Assembler::positive: return Assembler::negative; 11367 case Assembler::parity: return Assembler::noParity; 11368 case Assembler::noParity: return Assembler::parity; 11369 } 11370 ShouldNotReachHere(); return Assembler::overflow; 11371} 11372 11373SkipIfEqual::SkipIfEqual( 11374 MacroAssembler* masm, const bool* flag_addr, bool value) { 11375 _masm = masm; 11376 _masm->cmp8(ExternalAddress((address)flag_addr), value); 11377 _masm->jcc(Assembler::equal, _label); 11378} 11379 11380SkipIfEqual::~SkipIfEqual() { 11381 _masm->bind(_label); 11382} 11383