vm_version_x86.cpp revision 9814:22fd02fad88b
1/*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "memory/resourceArea.hpp"
29#include "runtime/java.hpp"
30#include "runtime/os.hpp"
31#include "runtime/stubCodeGenerator.hpp"
32#include "vm_version_x86.hpp"
33
34
35int VM_Version::_cpu;
36int VM_Version::_model;
37int VM_Version::_stepping;
38VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
39
40// Address of instruction which causes SEGV
41address VM_Version::_cpuinfo_segv_addr = 0;
42// Address of instruction after the one which causes SEGV
43address VM_Version::_cpuinfo_cont_addr = 0;
44
45static BufferBlob* stub_blob;
46static const int stub_size = 1000;
47
48extern "C" {
49  typedef void (*get_cpu_info_stub_t)(void*);
50}
51static get_cpu_info_stub_t get_cpu_info_stub = NULL;
52
53
54class VM_Version_StubGenerator: public StubCodeGenerator {
55 public:
56
57  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
58
59  address generate_get_cpu_info() {
60    // Flags to test CPU type.
61    const uint32_t HS_EFL_AC = 0x40000;
62    const uint32_t HS_EFL_ID = 0x200000;
63    // Values for when we don't have a CPUID instruction.
64    const int      CPU_FAMILY_SHIFT = 8;
65    const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
66    const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
67
68    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
69    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
70    Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
71
72    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
73#   define __ _masm->
74
75    address start = __ pc();
76
77    //
78    // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
79    //
80    // LP64: rcx and rdx are first and second argument registers on windows
81
82    __ push(rbp);
83#ifdef _LP64
84    __ mov(rbp, c_rarg0); // cpuid_info address
85#else
86    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
87#endif
88    __ push(rbx);
89    __ push(rsi);
90    __ pushf();          // preserve rbx, and flags
91    __ pop(rax);
92    __ push(rax);
93    __ mov(rcx, rax);
94    //
95    // if we are unable to change the AC flag, we have a 386
96    //
97    __ xorl(rax, HS_EFL_AC);
98    __ push(rax);
99    __ popf();
100    __ pushf();
101    __ pop(rax);
102    __ cmpptr(rax, rcx);
103    __ jccb(Assembler::notEqual, detect_486);
104
105    __ movl(rax, CPU_FAMILY_386);
106    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
107    __ jmp(done);
108
109    //
110    // If we are unable to change the ID flag, we have a 486 which does
111    // not support the "cpuid" instruction.
112    //
113    __ bind(detect_486);
114    __ mov(rax, rcx);
115    __ xorl(rax, HS_EFL_ID);
116    __ push(rax);
117    __ popf();
118    __ pushf();
119    __ pop(rax);
120    __ cmpptr(rcx, rax);
121    __ jccb(Assembler::notEqual, detect_586);
122
123    __ bind(cpu486);
124    __ movl(rax, CPU_FAMILY_486);
125    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
126    __ jmp(done);
127
128    //
129    // At this point, we have a chip which supports the "cpuid" instruction
130    //
131    __ bind(detect_586);
132    __ xorl(rax, rax);
133    __ cpuid();
134    __ orl(rax, rax);
135    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
136                                        // value of at least 1, we give up and
137                                        // assume a 486
138    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
139    __ movl(Address(rsi, 0), rax);
140    __ movl(Address(rsi, 4), rbx);
141    __ movl(Address(rsi, 8), rcx);
142    __ movl(Address(rsi,12), rdx);
143
144    __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
145    __ jccb(Assembler::belowEqual, std_cpuid4);
146
147    //
148    // cpuid(0xB) Processor Topology
149    //
150    __ movl(rax, 0xb);
151    __ xorl(rcx, rcx);   // Threads level
152    __ cpuid();
153
154    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
155    __ movl(Address(rsi, 0), rax);
156    __ movl(Address(rsi, 4), rbx);
157    __ movl(Address(rsi, 8), rcx);
158    __ movl(Address(rsi,12), rdx);
159
160    __ movl(rax, 0xb);
161    __ movl(rcx, 1);     // Cores level
162    __ cpuid();
163    __ push(rax);
164    __ andl(rax, 0x1f);  // Determine if valid topology level
165    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
166    __ andl(rax, 0xffff);
167    __ pop(rax);
168    __ jccb(Assembler::equal, std_cpuid4);
169
170    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
171    __ movl(Address(rsi, 0), rax);
172    __ movl(Address(rsi, 4), rbx);
173    __ movl(Address(rsi, 8), rcx);
174    __ movl(Address(rsi,12), rdx);
175
176    __ movl(rax, 0xb);
177    __ movl(rcx, 2);     // Packages level
178    __ cpuid();
179    __ push(rax);
180    __ andl(rax, 0x1f);  // Determine if valid topology level
181    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
182    __ andl(rax, 0xffff);
183    __ pop(rax);
184    __ jccb(Assembler::equal, std_cpuid4);
185
186    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
187    __ movl(Address(rsi, 0), rax);
188    __ movl(Address(rsi, 4), rbx);
189    __ movl(Address(rsi, 8), rcx);
190    __ movl(Address(rsi,12), rdx);
191
192    //
193    // cpuid(0x4) Deterministic cache params
194    //
195    __ bind(std_cpuid4);
196    __ movl(rax, 4);
197    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
198    __ jccb(Assembler::greater, std_cpuid1);
199
200    __ xorl(rcx, rcx);   // L1 cache
201    __ cpuid();
202    __ push(rax);
203    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
204    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
205    __ pop(rax);
206    __ jccb(Assembler::equal, std_cpuid1);
207
208    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
209    __ movl(Address(rsi, 0), rax);
210    __ movl(Address(rsi, 4), rbx);
211    __ movl(Address(rsi, 8), rcx);
212    __ movl(Address(rsi,12), rdx);
213
214    //
215    // Standard cpuid(0x1)
216    //
217    __ bind(std_cpuid1);
218    __ movl(rax, 1);
219    __ cpuid();
220    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
221    __ movl(Address(rsi, 0), rax);
222    __ movl(Address(rsi, 4), rbx);
223    __ movl(Address(rsi, 8), rcx);
224    __ movl(Address(rsi,12), rdx);
225
226    //
227    // Check if OS has enabled XGETBV instruction to access XCR0
228    // (OSXSAVE feature flag) and CPU supports AVX
229    //
230    __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
231    __ cmpl(rcx, 0x18000000);
232    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
233
234    //
235    // XCR0, XFEATURE_ENABLED_MASK register
236    //
237    __ xorl(rcx, rcx);   // zero for XCR0 register
238    __ xgetbv();
239    __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
240    __ movl(Address(rsi, 0), rax);
241    __ movl(Address(rsi, 4), rdx);
242
243    //
244    // cpuid(0x7) Structured Extended Features
245    //
246    __ bind(sef_cpuid);
247    __ movl(rax, 7);
248    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
249    __ jccb(Assembler::greater, ext_cpuid);
250
251    __ xorl(rcx, rcx);
252    __ cpuid();
253    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
254    __ movl(Address(rsi, 0), rax);
255    __ movl(Address(rsi, 4), rbx);
256
257    //
258    // Extended cpuid(0x80000000)
259    //
260    __ bind(ext_cpuid);
261    __ movl(rax, 0x80000000);
262    __ cpuid();
263    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
264    __ jcc(Assembler::belowEqual, done);
265    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
266    __ jccb(Assembler::belowEqual, ext_cpuid1);
267    __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
268    __ jccb(Assembler::belowEqual, ext_cpuid5);
269    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
270    __ jccb(Assembler::belowEqual, ext_cpuid7);
271    //
272    // Extended cpuid(0x80000008)
273    //
274    __ movl(rax, 0x80000008);
275    __ cpuid();
276    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
277    __ movl(Address(rsi, 0), rax);
278    __ movl(Address(rsi, 4), rbx);
279    __ movl(Address(rsi, 8), rcx);
280    __ movl(Address(rsi,12), rdx);
281
282    //
283    // Extended cpuid(0x80000007)
284    //
285    __ bind(ext_cpuid7);
286    __ movl(rax, 0x80000007);
287    __ cpuid();
288    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
289    __ movl(Address(rsi, 0), rax);
290    __ movl(Address(rsi, 4), rbx);
291    __ movl(Address(rsi, 8), rcx);
292    __ movl(Address(rsi,12), rdx);
293
294    //
295    // Extended cpuid(0x80000005)
296    //
297    __ bind(ext_cpuid5);
298    __ movl(rax, 0x80000005);
299    __ cpuid();
300    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
301    __ movl(Address(rsi, 0), rax);
302    __ movl(Address(rsi, 4), rbx);
303    __ movl(Address(rsi, 8), rcx);
304    __ movl(Address(rsi,12), rdx);
305
306    //
307    // Extended cpuid(0x80000001)
308    //
309    __ bind(ext_cpuid1);
310    __ movl(rax, 0x80000001);
311    __ cpuid();
312    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
313    __ movl(Address(rsi, 0), rax);
314    __ movl(Address(rsi, 4), rbx);
315    __ movl(Address(rsi, 8), rcx);
316    __ movl(Address(rsi,12), rdx);
317
318    //
319    // Check if OS has enabled XGETBV instruction to access XCR0
320    // (OSXSAVE feature flag) and CPU supports AVX
321    //
322    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
323    __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
324    __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
325    __ cmpl(rcx, 0x18000000);
326    __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
327
328    __ movl(rax, 0x6);
329    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
330    __ cmpl(rax, 0x6);
331    __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
332
333    // we need to bridge farther than imm8, so we use this island as a thunk
334    __ bind(done);
335    __ jmp(wrapup);
336
337    __ bind(start_simd_check);
338    //
339    // Some OSs have a bug when upper 128/256bits of YMM/ZMM
340    // registers are not restored after a signal processing.
341    // Generate SEGV here (reference through NULL)
342    // and check upper YMM/ZMM bits after it.
343    //
344    intx saved_useavx = UseAVX;
345    intx saved_usesse = UseSSE;
346    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
347    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
348    __ movl(rax, 0x10000);
349    __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
350    __ cmpl(rax, 0x10000);
351    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
352    // check _cpuid_info.xem_xcr0_eax.bits.opmask
353    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
354    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
355    __ movl(rax, 0xE0);
356    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
357    __ cmpl(rax, 0xE0);
358    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
359
360    // EVEX setup: run in lowest evex mode
361    VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
362    UseAVX = 3;
363    UseSSE = 2;
364    // load value into all 64 bytes of zmm7 register
365    __ movl(rcx, VM_Version::ymm_test_value());
366    __ movdl(xmm0, rcx);
367    __ movl(rcx, 0xffff);
368    __ kmovwl(k1, rcx);
369    __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
370    __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
371#ifdef _LP64
372    __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
373    __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
374#endif
375    VM_Version::clean_cpuFeatures();
376    __ jmp(save_restore_except);
377
378    __ bind(legacy_setup);
379    // AVX setup
380    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
381    UseAVX = 1;
382    UseSSE = 2;
383    // load value into all 32 bytes of ymm7 register
384    __ movl(rcx, VM_Version::ymm_test_value());
385
386    __ movdl(xmm0, rcx);
387    __ pshufd(xmm0, xmm0, 0x00);
388    __ vinsertf128h(xmm0, xmm0, xmm0);
389    __ vmovdqu(xmm7, xmm0);
390#ifdef _LP64
391    __ vmovdqu(xmm8, xmm0);
392    __ vmovdqu(xmm15, xmm0);
393#endif
394    VM_Version::clean_cpuFeatures();
395
396    __ bind(save_restore_except);
397    __ xorl(rsi, rsi);
398    VM_Version::set_cpuinfo_segv_addr(__ pc());
399    // Generate SEGV
400    __ movl(rax, Address(rsi, 0));
401
402    VM_Version::set_cpuinfo_cont_addr(__ pc());
403    // Returns here after signal. Save xmm0 to check it later.
404
405    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
406    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
407    __ movl(rax, 0x10000);
408    __ andl(rax, Address(rsi, 4));
409    __ cmpl(rax, 0x10000);
410    __ jccb(Assembler::notEqual, legacy_save_restore);
411    // check _cpuid_info.xem_xcr0_eax.bits.opmask
412    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
413    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
414    __ movl(rax, 0xE0);
415    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
416    __ cmpl(rax, 0xE0);
417    __ jccb(Assembler::notEqual, legacy_save_restore);
418
419    // EVEX check: run in lowest evex mode
420    VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
421    UseAVX = 3;
422    UseSSE = 2;
423    __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
424    __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
425    __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
426#ifdef _LP64
427    __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
428    __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
429#endif
430    VM_Version::clean_cpuFeatures();
431    UseAVX = saved_useavx;
432    UseSSE = saved_usesse;
433    __ jmp(wrapup);
434
435    __ bind(legacy_save_restore);
436    // AVX check
437    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
438    UseAVX = 1;
439    UseSSE = 2;
440    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
441    __ vmovdqu(Address(rsi, 0), xmm0);
442    __ vmovdqu(Address(rsi, 32), xmm7);
443#ifdef _LP64
444    __ vmovdqu(Address(rsi, 64), xmm8);
445    __ vmovdqu(Address(rsi, 96), xmm15);
446#endif
447    VM_Version::clean_cpuFeatures();
448    UseAVX = saved_useavx;
449    UseSSE = saved_usesse;
450
451    __ bind(wrapup);
452    __ popf();
453    __ pop(rsi);
454    __ pop(rbx);
455    __ pop(rbp);
456    __ ret(0);
457
458#   undef __
459
460    return start;
461  };
462};
463
464void VM_Version::get_processor_features() {
465
466  _cpu = 4; // 486 by default
467  _model = 0;
468  _stepping = 0;
469  _features = 0;
470  _logical_processors_per_package = 1;
471  // i486 internal cache is both I&D and has a 16-byte line size
472  _L1_data_cache_line_size = 16;
473
474  // Get raw processor info
475
476  get_cpu_info_stub(&_cpuid_info);
477
478  assert_is_initialized();
479  _cpu = extended_cpu_family();
480  _model = extended_cpu_model();
481  _stepping = cpu_stepping();
482
483  if (cpu_family() > 4) { // it supports CPUID
484    _features = feature_flags();
485    // Logical processors are only available on P4s and above,
486    // and only if hyperthreading is available.
487    _logical_processors_per_package = logical_processor_count();
488    _L1_data_cache_line_size = L1_line_size();
489  }
490
491  _supports_cx8 = supports_cmpxchg8();
492  // xchg and xadd instructions
493  _supports_atomic_getset4 = true;
494  _supports_atomic_getadd4 = true;
495  LP64_ONLY(_supports_atomic_getset8 = true);
496  LP64_ONLY(_supports_atomic_getadd8 = true);
497
498#ifdef _LP64
499  // OS should support SSE for x64 and hardware should support at least SSE2.
500  if (!VM_Version::supports_sse2()) {
501    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
502  }
503  // in 64 bit the use of SSE2 is the minimum
504  if (UseSSE < 2) UseSSE = 2;
505#endif
506
507#ifdef AMD64
508  // flush_icache_stub have to be generated first.
509  // That is why Icache line size is hard coded in ICache class,
510  // see icache_x86.hpp. It is also the reason why we can't use
511  // clflush instruction in 32-bit VM since it could be running
512  // on CPU which does not support it.
513  //
514  // The only thing we can do is to verify that flushed
515  // ICache::line_size has correct value.
516  guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
517  // clflush_size is size in quadwords (8 bytes).
518  guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
519#endif
520
521  // If the OS doesn't support SSE, we can't use this feature even if the HW does
522  if (!os::supports_sse())
523    _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
524
525  if (UseSSE < 4) {
526    _features &= ~CPU_SSE4_1;
527    _features &= ~CPU_SSE4_2;
528  }
529
530  if (UseSSE < 3) {
531    _features &= ~CPU_SSE3;
532    _features &= ~CPU_SSSE3;
533    _features &= ~CPU_SSE4A;
534  }
535
536  if (UseSSE < 2)
537    _features &= ~CPU_SSE2;
538
539  if (UseSSE < 1)
540    _features &= ~CPU_SSE;
541
542  // first try initial setting and detect what we can support
543  if (UseAVX > 0) {
544    if (UseAVX > 2 && supports_evex()) {
545      UseAVX = 3;
546    } else if (UseAVX > 1 && supports_avx2()) {
547      UseAVX = 2;
548    } else if (UseAVX > 0 && supports_avx()) {
549      UseAVX = 1;
550    } else {
551      UseAVX = 0;
552    }
553  } else if (UseAVX < 0) {
554    UseAVX = 0;
555  }
556
557  if (UseAVX < 3) {
558    _features &= ~CPU_AVX512F;
559    _features &= ~CPU_AVX512DQ;
560    _features &= ~CPU_AVX512CD;
561    _features &= ~CPU_AVX512BW;
562    _features &= ~CPU_AVX512VL;
563  }
564
565  if (UseAVX < 2)
566    _features &= ~CPU_AVX2;
567
568  if (UseAVX < 1)
569    _features &= ~CPU_AVX;
570
571  if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
572    _features &= ~CPU_AES;
573
574  if (logical_processors_per_package() == 1) {
575    // HT processor could be installed on a system which doesn't support HT.
576    _features &= ~CPU_HT;
577  }
578
579  char buf[256];
580  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
581               cores_per_cpu(), threads_per_core(),
582               cpu_family(), _model, _stepping,
583               (supports_cmov() ? ", cmov" : ""),
584               (supports_cmpxchg8() ? ", cx8" : ""),
585               (supports_fxsr() ? ", fxsr" : ""),
586               (supports_mmx()  ? ", mmx"  : ""),
587               (supports_sse()  ? ", sse"  : ""),
588               (supports_sse2() ? ", sse2" : ""),
589               (supports_sse3() ? ", sse3" : ""),
590               (supports_ssse3()? ", ssse3": ""),
591               (supports_sse4_1() ? ", sse4.1" : ""),
592               (supports_sse4_2() ? ", sse4.2" : ""),
593               (supports_popcnt() ? ", popcnt" : ""),
594               (supports_avx()    ? ", avx" : ""),
595               (supports_avx2()   ? ", avx2" : ""),
596               (supports_aes()    ? ", aes" : ""),
597               (supports_clmul()  ? ", clmul" : ""),
598               (supports_erms()   ? ", erms" : ""),
599               (supports_rtm()    ? ", rtm" : ""),
600               (supports_mmx_ext() ? ", mmxext" : ""),
601               (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
602               (supports_lzcnt()   ? ", lzcnt": ""),
603               (supports_sse4a()   ? ", sse4a": ""),
604               (supports_ht() ? ", ht": ""),
605               (supports_tsc() ? ", tsc": ""),
606               (supports_tscinv_bit() ? ", tscinvbit": ""),
607               (supports_tscinv() ? ", tscinv": ""),
608               (supports_bmi1() ? ", bmi1" : ""),
609               (supports_bmi2() ? ", bmi2" : ""),
610               (supports_adx() ? ", adx" : ""),
611               (supports_evex() ? ", evex" : ""));
612  _features_string = os::strdup(buf);
613
614  // UseSSE is set to the smaller of what hardware supports and what
615  // the command line requires.  I.e., you cannot set UseSSE to 2 on
616  // older Pentiums which do not support it.
617  if (UseSSE > 4) UseSSE=4;
618  if (UseSSE < 0) UseSSE=0;
619  if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
620    UseSSE = MIN2((intx)3,UseSSE);
621  if (!supports_sse3()) // Drop to 2 if no SSE3 support
622    UseSSE = MIN2((intx)2,UseSSE);
623  if (!supports_sse2()) // Drop to 1 if no SSE2 support
624    UseSSE = MIN2((intx)1,UseSSE);
625  if (!supports_sse ()) // Drop to 0 if no SSE  support
626    UseSSE = 0;
627
628  // Use AES instructions if available.
629  if (supports_aes()) {
630    if (FLAG_IS_DEFAULT(UseAES)) {
631      FLAG_SET_DEFAULT(UseAES, true);
632    }
633    if (!UseAES) {
634      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
635        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
636      }
637      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
638    } else {
639      if (UseSSE > 2) {
640        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
641          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
642        }
643      } else {
644        // The AES intrinsic stubs require AES instruction support (of course)
645        // but also require sse3 mode or higher for instructions it use.
646        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
647          warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
648        }
649        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
650      }
651    }
652  } else if (UseAES || UseAESIntrinsics) {
653    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
654      warning("AES instructions are not available on this CPU");
655      FLAG_SET_DEFAULT(UseAES, false);
656    }
657    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
658      warning("AES intrinsics are not available on this CPU");
659      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
660    }
661  }
662
663  // Use CLMUL instructions if available.
664  if (supports_clmul()) {
665    if (FLAG_IS_DEFAULT(UseCLMUL)) {
666      UseCLMUL = true;
667    }
668  } else if (UseCLMUL) {
669    if (!FLAG_IS_DEFAULT(UseCLMUL))
670      warning("CLMUL instructions not available on this CPU (AVX may also be required)");
671    FLAG_SET_DEFAULT(UseCLMUL, false);
672  }
673
674  if (UseCLMUL && (UseSSE > 2)) {
675    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
676      UseCRC32Intrinsics = true;
677    }
678  } else if (UseCRC32Intrinsics) {
679    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
680      warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
681    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
682  }
683
684  if (supports_sse4_2()) {
685    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
686      UseCRC32CIntrinsics = true;
687    }
688  }
689  else if (UseCRC32CIntrinsics) {
690    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
691      warning("CRC32C intrinsics are not available on this CPU");
692    }
693    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
694  }
695
696  // GHASH/GCM intrinsics
697  if (UseCLMUL && (UseSSE > 2)) {
698    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
699      UseGHASHIntrinsics = true;
700    }
701  } else if (UseGHASHIntrinsics) {
702    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
703      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
704    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
705  }
706
707  if (UseSHA) {
708    warning("SHA instructions are not available on this CPU");
709    FLAG_SET_DEFAULT(UseSHA, false);
710  }
711
712  if (UseSHA1Intrinsics) {
713    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
714    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
715  }
716
717  if (UseSHA256Intrinsics) {
718    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
719    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
720  }
721
722  if (UseSHA512Intrinsics) {
723    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
724    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
725  }
726
727  if (UseAdler32Intrinsics) {
728    warning("Adler32Intrinsics not available on this CPU.");
729    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
730  }
731
732  // Adjust RTM (Restricted Transactional Memory) flags
733  if (!supports_rtm() && UseRTMLocking) {
734    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
735    // setting during arguments processing. See use_biased_locking().
736    // VM_Version_init() is executed after UseBiasedLocking is used
737    // in Thread::allocate().
738    vm_exit_during_initialization("RTM instructions are not available on this CPU");
739  }
740
741#if INCLUDE_RTM_OPT
742  if (UseRTMLocking) {
743    if (is_intel_family_core()) {
744      if ((_model == CPU_MODEL_HASWELL_E3) ||
745          (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
746          (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
747        // currently a collision between SKL and HSW_E3
748        if (!UnlockExperimentalVMOptions && UseAVX < 3) {
749          vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
750        } else {
751          warning("UseRTMLocking is only available as experimental option on this platform.");
752        }
753      }
754    }
755    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
756      // RTM locking should be used only for applications with
757      // high lock contention. For now we do not use it by default.
758      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
759    }
760    if (!is_power_of_2(RTMTotalCountIncrRate)) {
761      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
762      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
763    }
764    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
765      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
766      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
767    }
768  } else { // !UseRTMLocking
769    if (UseRTMForStackLocks) {
770      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
771        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
772      }
773      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
774    }
775    if (UseRTMDeopt) {
776      FLAG_SET_DEFAULT(UseRTMDeopt, false);
777    }
778    if (PrintPreciseRTMLockingStatistics) {
779      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
780    }
781  }
782#else
783  if (UseRTMLocking) {
784    // Only C2 does RTM locking optimization.
785    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
786    // setting during arguments processing. See use_biased_locking().
787    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
788  }
789#endif
790
791#ifdef COMPILER2
792  if (UseFPUForSpilling) {
793    if (UseSSE < 2) {
794      // Only supported with SSE2+
795      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
796    }
797  }
798#endif
799#if defined(COMPILER2) || INCLUDE_JVMCI
800  if (MaxVectorSize > 0) {
801    if (!is_power_of_2(MaxVectorSize)) {
802      warning("MaxVectorSize must be a power of 2");
803      FLAG_SET_DEFAULT(MaxVectorSize, 64);
804    }
805    if (MaxVectorSize > 64) {
806      FLAG_SET_DEFAULT(MaxVectorSize, 64);
807    }
808    if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
809      // 32 bytes vectors (in YMM) are only supported with AVX+
810      FLAG_SET_DEFAULT(MaxVectorSize, 16);
811    }
812    if (UseSSE < 2) {
813      // Vectors (in XMM) are only supported with SSE2+
814      FLAG_SET_DEFAULT(MaxVectorSize, 0);
815    }
816#if defined(COMPILER2) && defined(ASSERT)
817    if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
818      tty->print_cr("State of YMM registers after signal handle:");
819      int nreg = 2 LP64_ONLY(+2);
820      const char* ymm_name[4] = {"0", "7", "8", "15"};
821      for (int i = 0; i < nreg; i++) {
822        tty->print("YMM%s:", ymm_name[i]);
823        for (int j = 7; j >=0; j--) {
824          tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
825        }
826        tty->cr();
827      }
828    }
829#endif // COMPILER2 && ASSERT
830  }
831#endif // COMPILER2 || INCLUDE_JVMCI
832
833#ifdef COMPILER2
834#ifdef _LP64
835  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
836    UseMultiplyToLenIntrinsic = true;
837  }
838  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
839    UseSquareToLenIntrinsic = true;
840  }
841  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
842    UseMulAddIntrinsic = true;
843  }
844  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
845    UseMontgomeryMultiplyIntrinsic = true;
846  }
847  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
848    UseMontgomerySquareIntrinsic = true;
849  }
850#else
851  if (UseMultiplyToLenIntrinsic) {
852    if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
853      warning("multiplyToLen intrinsic is not available in 32-bit VM");
854    }
855    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
856  }
857  if (UseMontgomeryMultiplyIntrinsic) {
858    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
859      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
860    }
861    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
862  }
863  if (UseMontgomerySquareIntrinsic) {
864    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
865      warning("montgomerySquare intrinsic is not available in 32-bit VM");
866    }
867    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
868  }
869  if (UseSquareToLenIntrinsic) {
870    if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
871      warning("squareToLen intrinsic is not available in 32-bit VM");
872    }
873    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
874  }
875  if (UseMulAddIntrinsic) {
876    if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
877      warning("mulAdd intrinsic is not available in 32-bit VM");
878    }
879    FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
880  }
881#endif
882#endif // COMPILER2
883
884  // On new cpus instructions which update whole XMM register should be used
885  // to prevent partial register stall due to dependencies on high half.
886  //
887  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
888  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
889  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
890  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
891
892  if( is_amd() ) { // AMD cpus specific settings
893    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
894      // Use it on new AMD cpus starting from Opteron.
895      UseAddressNop = true;
896    }
897    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
898      // Use it on new AMD cpus starting from Opteron.
899      UseNewLongLShift = true;
900    }
901    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
902      if (supports_sse4a()) {
903        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
904      } else {
905        UseXmmLoadAndClearUpper = false;
906      }
907    }
908    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
909      if( supports_sse4a() ) {
910        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
911      } else {
912        UseXmmRegToRegMoveAll = false;
913      }
914    }
915    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
916      if( supports_sse4a() ) {
917        UseXmmI2F = true;
918      } else {
919        UseXmmI2F = false;
920      }
921    }
922    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
923      if( supports_sse4a() ) {
924        UseXmmI2D = true;
925      } else {
926        UseXmmI2D = false;
927      }
928    }
929    if (supports_sse4_2() && UseSSE >= 4) {
930      if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
931        FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
932      }
933    } else {
934      if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
935        warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
936      }
937      FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
938    }
939
940    // some defaults for AMD family 15h
941    if ( cpu_family() == 0x15 ) {
942      // On family 15h processors default is no sw prefetch
943      if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
944        AllocatePrefetchStyle = 0;
945      }
946      // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
947      if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
948        AllocatePrefetchInstr = 3;
949      }
950      // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
951      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
952        UseXMMForArrayCopy = true;
953      }
954      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
955        UseUnalignedLoadStores = true;
956      }
957    }
958
959#ifdef COMPILER2
960    if (MaxVectorSize > 16) {
961      // Limit vectors size to 16 bytes on current AMD cpus.
962      FLAG_SET_DEFAULT(MaxVectorSize, 16);
963    }
964#endif // COMPILER2
965  }
966
967  if( is_intel() ) { // Intel cpus specific settings
968    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
969      UseStoreImmI16 = false; // don't use it on Intel cpus
970    }
971    if( cpu_family() == 6 || cpu_family() == 15 ) {
972      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
973        // Use it on all Intel cpus starting from PentiumPro
974        UseAddressNop = true;
975      }
976    }
977    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
978      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
979    }
980    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
981      if( supports_sse3() ) {
982        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
983      } else {
984        UseXmmRegToRegMoveAll = false;
985      }
986    }
987    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
988#ifdef COMPILER2
989      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
990        // For new Intel cpus do the next optimization:
991        // don't align the beginning of a loop if there are enough instructions
992        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
993        // in current fetch line (OptoLoopAlignment) or the padding
994        // is big (> MaxLoopPad).
995        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
996        // generated NOP instructions. 11 is the largest size of one
997        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
998        MaxLoopPad = 11;
999      }
1000#endif // COMPILER2
1001      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1002        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1003      }
1004      if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
1005        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1006          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1007        }
1008      }
1009      if (supports_sse4_2() && UseSSE >= 4) {
1010        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1011          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1012        }
1013      } else {
1014        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1015          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1016        }
1017        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1018      }
1019    }
1020    if ((cpu_family() == 0x06) &&
1021        ((extended_cpu_model() == 0x36) || // Centerton
1022         (extended_cpu_model() == 0x37) || // Silvermont
1023         (extended_cpu_model() == 0x4D))) {
1024#ifdef COMPILER2
1025      if (FLAG_IS_DEFAULT(OptoScheduling)) {
1026        OptoScheduling = true;
1027      }
1028#endif
1029      if (supports_sse4_2()) { // Silvermont
1030        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1031          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1032        }
1033      }
1034    }
1035    if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1036      AllocatePrefetchInstr = 3;
1037    }
1038  }
1039
1040#ifdef _LP64
1041  if (UseSSE42Intrinsics) {
1042    if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1043      UseVectorizedMismatchIntrinsic = true;
1044    }
1045  } else if (UseVectorizedMismatchIntrinsic) {
1046    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1047      warning("vectorizedMismatch intrinsics are not available on this CPU");
1048    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1049  }
1050#else
1051  if (UseVectorizedMismatchIntrinsic) {
1052    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1053      warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1054    }
1055    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1056  }
1057#endif // _LP64
1058
1059  // Use count leading zeros count instruction if available.
1060  if (supports_lzcnt()) {
1061    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1062      UseCountLeadingZerosInstruction = true;
1063    }
1064   } else if (UseCountLeadingZerosInstruction) {
1065    warning("lzcnt instruction is not available on this CPU");
1066    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1067  }
1068
1069  // Use count trailing zeros instruction if available
1070  if (supports_bmi1()) {
1071    // tzcnt does not require VEX prefix
1072    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1073      if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1074        // Don't use tzcnt if BMI1 is switched off on command line.
1075        UseCountTrailingZerosInstruction = false;
1076      } else {
1077        UseCountTrailingZerosInstruction = true;
1078      }
1079    }
1080  } else if (UseCountTrailingZerosInstruction) {
1081    warning("tzcnt instruction is not available on this CPU");
1082    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1083  }
1084
1085  // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1086  // VEX prefix is generated only when AVX > 0.
1087  if (supports_bmi1() && supports_avx()) {
1088    if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1089      UseBMI1Instructions = true;
1090    }
1091  } else if (UseBMI1Instructions) {
1092    warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1093    FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1094  }
1095
1096  if (supports_bmi2() && supports_avx()) {
1097    if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1098      UseBMI2Instructions = true;
1099    }
1100  } else if (UseBMI2Instructions) {
1101    warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1102    FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1103  }
1104
1105  // Use population count instruction if available.
1106  if (supports_popcnt()) {
1107    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1108      UsePopCountInstruction = true;
1109    }
1110  } else if (UsePopCountInstruction) {
1111    warning("POPCNT instruction is not available on this CPU");
1112    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1113  }
1114
1115  // Use fast-string operations if available.
1116  if (supports_erms()) {
1117    if (FLAG_IS_DEFAULT(UseFastStosb)) {
1118      UseFastStosb = true;
1119    }
1120  } else if (UseFastStosb) {
1121    warning("fast-string operations are not available on this CPU");
1122    FLAG_SET_DEFAULT(UseFastStosb, false);
1123  }
1124
1125#ifdef COMPILER2
1126  if (FLAG_IS_DEFAULT(AlignVector)) {
1127    // Modern processors allow misaligned memory operations for vectors.
1128    AlignVector = !UseUnalignedLoadStores;
1129  }
1130#endif // COMPILER2
1131
1132  if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
1133  if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
1134
1135  // Allocation prefetch settings
1136  intx cache_line_size = prefetch_data_size();
1137  if( cache_line_size > AllocatePrefetchStepSize )
1138    AllocatePrefetchStepSize = cache_line_size;
1139
1140  assert(AllocatePrefetchLines > 0, "invalid value");
1141  if( AllocatePrefetchLines < 1 )     // set valid value in product VM
1142    AllocatePrefetchLines = 3;
1143  assert(AllocateInstancePrefetchLines > 0, "invalid value");
1144  if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM
1145    AllocateInstancePrefetchLines = 1;
1146
1147  AllocatePrefetchDistance = allocate_prefetch_distance();
1148  AllocatePrefetchStyle    = allocate_prefetch_style();
1149
1150  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1151    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
1152#ifdef _LP64
1153      AllocatePrefetchDistance = 384;
1154#else
1155      AllocatePrefetchDistance = 320;
1156#endif
1157    }
1158    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1159      AllocatePrefetchDistance = 192;
1160      AllocatePrefetchLines = 4;
1161    }
1162#ifdef COMPILER2
1163    if (supports_sse4_2()) {
1164      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1165        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1166      }
1167    }
1168#endif
1169  }
1170
1171#ifdef _LP64
1172  // Prefetch settings
1173  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
1174  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
1175  PrefetchFieldsAhead         = prefetch_fields_ahead();
1176#endif
1177
1178  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1179     (cache_line_size > ContendedPaddingWidth))
1180     ContendedPaddingWidth = cache_line_size;
1181
1182  // This machine allows unaligned memory accesses
1183  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1184    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1185  }
1186
1187#ifndef PRODUCT
1188  if (PrintMiscellaneous && Verbose) {
1189    tty->print_cr("Logical CPUs per core: %u",
1190                  logical_processors_per_package());
1191    tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1192    tty->print("UseSSE=%d", (int) UseSSE);
1193    if (UseAVX > 0) {
1194      tty->print("  UseAVX=%d", (int) UseAVX);
1195    }
1196    if (UseAES) {
1197      tty->print("  UseAES=1");
1198    }
1199#ifdef COMPILER2
1200    if (MaxVectorSize > 0) {
1201      tty->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1202    }
1203#endif
1204    tty->cr();
1205    tty->print("Allocation");
1206    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
1207      tty->print_cr(": no prefetching");
1208    } else {
1209      tty->print(" prefetching: ");
1210      if (UseSSE == 0 && supports_3dnow_prefetch()) {
1211        tty->print("PREFETCHW");
1212      } else if (UseSSE >= 1) {
1213        if (AllocatePrefetchInstr == 0) {
1214          tty->print("PREFETCHNTA");
1215        } else if (AllocatePrefetchInstr == 1) {
1216          tty->print("PREFETCHT0");
1217        } else if (AllocatePrefetchInstr == 2) {
1218          tty->print("PREFETCHT2");
1219        } else if (AllocatePrefetchInstr == 3) {
1220          tty->print("PREFETCHW");
1221        }
1222      }
1223      if (AllocatePrefetchLines > 1) {
1224        tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1225      } else {
1226        tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1227      }
1228    }
1229
1230    if (PrefetchCopyIntervalInBytes > 0) {
1231      tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1232    }
1233    if (PrefetchScanIntervalInBytes > 0) {
1234      tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1235    }
1236    if (PrefetchFieldsAhead > 0) {
1237      tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1238    }
1239    if (ContendedPaddingWidth > 0) {
1240      tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1241    }
1242  }
1243#endif // !PRODUCT
1244}
1245
1246bool VM_Version::use_biased_locking() {
1247#if INCLUDE_RTM_OPT
1248  // RTM locking is most useful when there is high lock contention and
1249  // low data contention.  With high lock contention the lock is usually
1250  // inflated and biased locking is not suitable for that case.
1251  // RTM locking code requires that biased locking is off.
1252  // Note: we can't switch off UseBiasedLocking in get_processor_features()
1253  // because it is used by Thread::allocate() which is called before
1254  // VM_Version::initialize().
1255  if (UseRTMLocking && UseBiasedLocking) {
1256    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1257      FLAG_SET_DEFAULT(UseBiasedLocking, false);
1258    } else {
1259      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1260      UseBiasedLocking = false;
1261    }
1262  }
1263#endif
1264  return UseBiasedLocking;
1265}
1266
1267void VM_Version::initialize() {
1268  ResourceMark rm;
1269  // Making this stub must be FIRST use of assembler
1270
1271  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1272  if (stub_blob == NULL) {
1273    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1274  }
1275  CodeBuffer c(stub_blob);
1276  VM_Version_StubGenerator g(&c);
1277  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1278                                     g.generate_get_cpu_info());
1279
1280  get_processor_features();
1281}
1282