vm_version_x86.cpp revision 13254:c044f8d03932
1/*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "logging/log.hpp"
29#include "memory/resourceArea.hpp"
30#include "prims/jvm.h"
31#include "runtime/java.hpp"
32#include "runtime/os.hpp"
33#include "runtime/stubCodeGenerator.hpp"
34#include "vm_version_x86.hpp"
35
36
37int VM_Version::_cpu;
38int VM_Version::_model;
39int VM_Version::_stepping;
40VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
41
42// Address of instruction which causes SEGV
43address VM_Version::_cpuinfo_segv_addr = 0;
44// Address of instruction after the one which causes SEGV
45address VM_Version::_cpuinfo_cont_addr = 0;
46
47static BufferBlob* stub_blob;
48static const int stub_size = 1000;
49
50extern "C" {
51  typedef void (*get_cpu_info_stub_t)(void*);
52}
53static get_cpu_info_stub_t get_cpu_info_stub = NULL;
54
55
56class VM_Version_StubGenerator: public StubCodeGenerator {
57 public:
58
59  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
60
61  address generate_get_cpu_info() {
62    // Flags to test CPU type.
63    const uint32_t HS_EFL_AC = 0x40000;
64    const uint32_t HS_EFL_ID = 0x200000;
65    // Values for when we don't have a CPUID instruction.
66    const int      CPU_FAMILY_SHIFT = 8;
67    const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
68    const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
69    bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
70
71    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
72    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
73    Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
74
75    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
76#   define __ _masm->
77
78    address start = __ pc();
79
80    //
81    // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
82    //
83    // LP64: rcx and rdx are first and second argument registers on windows
84
85    __ push(rbp);
86#ifdef _LP64
87    __ mov(rbp, c_rarg0); // cpuid_info address
88#else
89    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
90#endif
91    __ push(rbx);
92    __ push(rsi);
93    __ pushf();          // preserve rbx, and flags
94    __ pop(rax);
95    __ push(rax);
96    __ mov(rcx, rax);
97    //
98    // if we are unable to change the AC flag, we have a 386
99    //
100    __ xorl(rax, HS_EFL_AC);
101    __ push(rax);
102    __ popf();
103    __ pushf();
104    __ pop(rax);
105    __ cmpptr(rax, rcx);
106    __ jccb(Assembler::notEqual, detect_486);
107
108    __ movl(rax, CPU_FAMILY_386);
109    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
110    __ jmp(done);
111
112    //
113    // If we are unable to change the ID flag, we have a 486 which does
114    // not support the "cpuid" instruction.
115    //
116    __ bind(detect_486);
117    __ mov(rax, rcx);
118    __ xorl(rax, HS_EFL_ID);
119    __ push(rax);
120    __ popf();
121    __ pushf();
122    __ pop(rax);
123    __ cmpptr(rcx, rax);
124    __ jccb(Assembler::notEqual, detect_586);
125
126    __ bind(cpu486);
127    __ movl(rax, CPU_FAMILY_486);
128    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
129    __ jmp(done);
130
131    //
132    // At this point, we have a chip which supports the "cpuid" instruction
133    //
134    __ bind(detect_586);
135    __ xorl(rax, rax);
136    __ cpuid();
137    __ orl(rax, rax);
138    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
139                                        // value of at least 1, we give up and
140                                        // assume a 486
141    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
142    __ movl(Address(rsi, 0), rax);
143    __ movl(Address(rsi, 4), rbx);
144    __ movl(Address(rsi, 8), rcx);
145    __ movl(Address(rsi,12), rdx);
146
147    __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
148    __ jccb(Assembler::belowEqual, std_cpuid4);
149
150    //
151    // cpuid(0xB) Processor Topology
152    //
153    __ movl(rax, 0xb);
154    __ xorl(rcx, rcx);   // Threads level
155    __ cpuid();
156
157    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
158    __ movl(Address(rsi, 0), rax);
159    __ movl(Address(rsi, 4), rbx);
160    __ movl(Address(rsi, 8), rcx);
161    __ movl(Address(rsi,12), rdx);
162
163    __ movl(rax, 0xb);
164    __ movl(rcx, 1);     // Cores level
165    __ cpuid();
166    __ push(rax);
167    __ andl(rax, 0x1f);  // Determine if valid topology level
168    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
169    __ andl(rax, 0xffff);
170    __ pop(rax);
171    __ jccb(Assembler::equal, std_cpuid4);
172
173    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
174    __ movl(Address(rsi, 0), rax);
175    __ movl(Address(rsi, 4), rbx);
176    __ movl(Address(rsi, 8), rcx);
177    __ movl(Address(rsi,12), rdx);
178
179    __ movl(rax, 0xb);
180    __ movl(rcx, 2);     // Packages level
181    __ cpuid();
182    __ push(rax);
183    __ andl(rax, 0x1f);  // Determine if valid topology level
184    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
185    __ andl(rax, 0xffff);
186    __ pop(rax);
187    __ jccb(Assembler::equal, std_cpuid4);
188
189    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
190    __ movl(Address(rsi, 0), rax);
191    __ movl(Address(rsi, 4), rbx);
192    __ movl(Address(rsi, 8), rcx);
193    __ movl(Address(rsi,12), rdx);
194
195    //
196    // cpuid(0x4) Deterministic cache params
197    //
198    __ bind(std_cpuid4);
199    __ movl(rax, 4);
200    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
201    __ jccb(Assembler::greater, std_cpuid1);
202
203    __ xorl(rcx, rcx);   // L1 cache
204    __ cpuid();
205    __ push(rax);
206    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
207    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
208    __ pop(rax);
209    __ jccb(Assembler::equal, std_cpuid1);
210
211    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
212    __ movl(Address(rsi, 0), rax);
213    __ movl(Address(rsi, 4), rbx);
214    __ movl(Address(rsi, 8), rcx);
215    __ movl(Address(rsi,12), rdx);
216
217    //
218    // Standard cpuid(0x1)
219    //
220    __ bind(std_cpuid1);
221    __ movl(rax, 1);
222    __ cpuid();
223    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
224    __ movl(Address(rsi, 0), rax);
225    __ movl(Address(rsi, 4), rbx);
226    __ movl(Address(rsi, 8), rcx);
227    __ movl(Address(rsi,12), rdx);
228
229    //
230    // Check if OS has enabled XGETBV instruction to access XCR0
231    // (OSXSAVE feature flag) and CPU supports AVX
232    //
233    __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
234    __ cmpl(rcx, 0x18000000);
235    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
236
237    //
238    // XCR0, XFEATURE_ENABLED_MASK register
239    //
240    __ xorl(rcx, rcx);   // zero for XCR0 register
241    __ xgetbv();
242    __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
243    __ movl(Address(rsi, 0), rax);
244    __ movl(Address(rsi, 4), rdx);
245
246    //
247    // cpuid(0x7) Structured Extended Features
248    //
249    __ bind(sef_cpuid);
250    __ movl(rax, 7);
251    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
252    __ jccb(Assembler::greater, ext_cpuid);
253
254    __ xorl(rcx, rcx);
255    __ cpuid();
256    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
257    __ movl(Address(rsi, 0), rax);
258    __ movl(Address(rsi, 4), rbx);
259
260    //
261    // Extended cpuid(0x80000000)
262    //
263    __ bind(ext_cpuid);
264    __ movl(rax, 0x80000000);
265    __ cpuid();
266    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
267    __ jcc(Assembler::belowEqual, done);
268    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
269    __ jccb(Assembler::belowEqual, ext_cpuid1);
270    __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
271    __ jccb(Assembler::belowEqual, ext_cpuid5);
272    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
273    __ jccb(Assembler::belowEqual, ext_cpuid7);
274    //
275    // Extended cpuid(0x80000008)
276    //
277    __ movl(rax, 0x80000008);
278    __ cpuid();
279    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
280    __ movl(Address(rsi, 0), rax);
281    __ movl(Address(rsi, 4), rbx);
282    __ movl(Address(rsi, 8), rcx);
283    __ movl(Address(rsi,12), rdx);
284
285    //
286    // Extended cpuid(0x80000007)
287    //
288    __ bind(ext_cpuid7);
289    __ movl(rax, 0x80000007);
290    __ cpuid();
291    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
292    __ movl(Address(rsi, 0), rax);
293    __ movl(Address(rsi, 4), rbx);
294    __ movl(Address(rsi, 8), rcx);
295    __ movl(Address(rsi,12), rdx);
296
297    //
298    // Extended cpuid(0x80000005)
299    //
300    __ bind(ext_cpuid5);
301    __ movl(rax, 0x80000005);
302    __ cpuid();
303    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
304    __ movl(Address(rsi, 0), rax);
305    __ movl(Address(rsi, 4), rbx);
306    __ movl(Address(rsi, 8), rcx);
307    __ movl(Address(rsi,12), rdx);
308
309    //
310    // Extended cpuid(0x80000001)
311    //
312    __ bind(ext_cpuid1);
313    __ movl(rax, 0x80000001);
314    __ cpuid();
315    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
316    __ movl(Address(rsi, 0), rax);
317    __ movl(Address(rsi, 4), rbx);
318    __ movl(Address(rsi, 8), rcx);
319    __ movl(Address(rsi,12), rdx);
320
321    //
322    // Check if OS has enabled XGETBV instruction to access XCR0
323    // (OSXSAVE feature flag) and CPU supports AVX
324    //
325    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
326    __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
327    __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
328    __ cmpl(rcx, 0x18000000);
329    __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
330
331    __ movl(rax, 0x6);
332    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
333    __ cmpl(rax, 0x6);
334    __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
335
336    // we need to bridge farther than imm8, so we use this island as a thunk
337    __ bind(done);
338    __ jmp(wrapup);
339
340    __ bind(start_simd_check);
341    //
342    // Some OSs have a bug when upper 128/256bits of YMM/ZMM
343    // registers are not restored after a signal processing.
344    // Generate SEGV here (reference through NULL)
345    // and check upper YMM/ZMM bits after it.
346    //
347    intx saved_useavx = UseAVX;
348    intx saved_usesse = UseSSE;
349    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
350    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
351    __ movl(rax, 0x10000);
352    __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
353    __ cmpl(rax, 0x10000);
354    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
355    // check _cpuid_info.xem_xcr0_eax.bits.opmask
356    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
357    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
358    __ movl(rax, 0xE0);
359    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
360    __ cmpl(rax, 0xE0);
361    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
362
363    // If UseAVX is unitialized or is set by the user to include EVEX
364    if (use_evex) {
365      // EVEX setup: run in lowest evex mode
366      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
367      UseAVX = 3;
368      UseSSE = 2;
369#ifdef _WINDOWS
370      // xmm5-xmm15 are not preserved by caller on windows
371      // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
372      __ subptr(rsp, 64);
373      __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
374#ifdef _LP64
375      __ subptr(rsp, 64);
376      __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
377      __ subptr(rsp, 64);
378      __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
379#endif // _LP64
380#endif // _WINDOWS
381
382      // load value into all 64 bytes of zmm7 register
383      __ movl(rcx, VM_Version::ymm_test_value());
384      __ movdl(xmm0, rcx);
385      __ movl(rcx, 0xffff);
386      __ kmovwl(k1, rcx);
387      __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
388      __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
389#ifdef _LP64
390      __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
391      __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
392#endif
393      VM_Version::clean_cpuFeatures();
394      __ jmp(save_restore_except);
395    }
396
397    __ bind(legacy_setup);
398    // AVX setup
399    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
400    UseAVX = 1;
401    UseSSE = 2;
402#ifdef _WINDOWS
403    __ subptr(rsp, 32);
404    __ vmovdqu(Address(rsp, 0), xmm7);
405#ifdef _LP64
406    __ subptr(rsp, 32);
407    __ vmovdqu(Address(rsp, 0), xmm8);
408    __ subptr(rsp, 32);
409    __ vmovdqu(Address(rsp, 0), xmm15);
410#endif // _LP64
411#endif // _WINDOWS
412
413    // load value into all 32 bytes of ymm7 register
414    __ movl(rcx, VM_Version::ymm_test_value());
415
416    __ movdl(xmm0, rcx);
417    __ pshufd(xmm0, xmm0, 0x00);
418    __ vinsertf128_high(xmm0, xmm0);
419    __ vmovdqu(xmm7, xmm0);
420#ifdef _LP64
421    __ vmovdqu(xmm8, xmm0);
422    __ vmovdqu(xmm15, xmm0);
423#endif
424    VM_Version::clean_cpuFeatures();
425
426    __ bind(save_restore_except);
427    __ xorl(rsi, rsi);
428    VM_Version::set_cpuinfo_segv_addr(__ pc());
429    // Generate SEGV
430    __ movl(rax, Address(rsi, 0));
431
432    VM_Version::set_cpuinfo_cont_addr(__ pc());
433    // Returns here after signal. Save xmm0 to check it later.
434
435    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
436    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
437    __ movl(rax, 0x10000);
438    __ andl(rax, Address(rsi, 4));
439    __ cmpl(rax, 0x10000);
440    __ jcc(Assembler::notEqual, legacy_save_restore);
441    // check _cpuid_info.xem_xcr0_eax.bits.opmask
442    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
443    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
444    __ movl(rax, 0xE0);
445    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
446    __ cmpl(rax, 0xE0);
447    __ jcc(Assembler::notEqual, legacy_save_restore);
448
449    // If UseAVX is unitialized or is set by the user to include EVEX
450    if (use_evex) {
451      // EVEX check: run in lowest evex mode
452      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
453      UseAVX = 3;
454      UseSSE = 2;
455      __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
456      __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
457      __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
458#ifdef _LP64
459      __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
460      __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
461#endif
462
463#ifdef _WINDOWS
464#ifdef _LP64
465      __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
466      __ addptr(rsp, 64);
467      __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
468      __ addptr(rsp, 64);
469#endif // _LP64
470      __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
471      __ addptr(rsp, 64);
472#endif // _WINDOWS
473      generate_vzeroupper(wrapup);
474      VM_Version::clean_cpuFeatures();
475      UseAVX = saved_useavx;
476      UseSSE = saved_usesse;
477      __ jmp(wrapup);
478   }
479
480    __ bind(legacy_save_restore);
481    // AVX check
482    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
483    UseAVX = 1;
484    UseSSE = 2;
485    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
486    __ vmovdqu(Address(rsi, 0), xmm0);
487    __ vmovdqu(Address(rsi, 32), xmm7);
488#ifdef _LP64
489    __ vmovdqu(Address(rsi, 64), xmm8);
490    __ vmovdqu(Address(rsi, 96), xmm15);
491#endif
492
493#ifdef _WINDOWS
494#ifdef _LP64
495    __ vmovdqu(xmm15, Address(rsp, 0));
496    __ addptr(rsp, 32);
497    __ vmovdqu(xmm8, Address(rsp, 0));
498    __ addptr(rsp, 32);
499#endif // _LP64
500    __ vmovdqu(xmm7, Address(rsp, 0));
501    __ addptr(rsp, 32);
502#endif // _WINDOWS
503    generate_vzeroupper(wrapup);
504    VM_Version::clean_cpuFeatures();
505    UseAVX = saved_useavx;
506    UseSSE = saved_usesse;
507
508    __ bind(wrapup);
509    __ popf();
510    __ pop(rsi);
511    __ pop(rbx);
512    __ pop(rbp);
513    __ ret(0);
514
515#   undef __
516
517    return start;
518  };
519  void generate_vzeroupper(Label& L_wrapup) {
520#   define __ _masm->
521    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
522    __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
523    __ jcc(Assembler::notEqual, L_wrapup);
524    __ movl(rcx, 0x0FFF0FF0);
525    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
526    __ andl(rcx, Address(rsi, 0));
527    __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
528    __ jcc(Assembler::equal, L_wrapup);
529    __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
530    __ jcc(Assembler::equal, L_wrapup);
531    __ vzeroupper();
532#   undef __
533  }
534};
535
536void VM_Version::get_processor_features() {
537
538  _cpu = 4; // 486 by default
539  _model = 0;
540  _stepping = 0;
541  _features = 0;
542  _logical_processors_per_package = 1;
543  // i486 internal cache is both I&D and has a 16-byte line size
544  _L1_data_cache_line_size = 16;
545
546  // Get raw processor info
547
548  get_cpu_info_stub(&_cpuid_info);
549
550  assert_is_initialized();
551  _cpu = extended_cpu_family();
552  _model = extended_cpu_model();
553  _stepping = cpu_stepping();
554
555  if (cpu_family() > 4) { // it supports CPUID
556    _features = feature_flags();
557    // Logical processors are only available on P4s and above,
558    // and only if hyperthreading is available.
559    _logical_processors_per_package = logical_processor_count();
560    _L1_data_cache_line_size = L1_line_size();
561  }
562
563  _supports_cx8 = supports_cmpxchg8();
564  // xchg and xadd instructions
565  _supports_atomic_getset4 = true;
566  _supports_atomic_getadd4 = true;
567  LP64_ONLY(_supports_atomic_getset8 = true);
568  LP64_ONLY(_supports_atomic_getadd8 = true);
569
570#ifdef _LP64
571  // OS should support SSE for x64 and hardware should support at least SSE2.
572  if (!VM_Version::supports_sse2()) {
573    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
574  }
575  // in 64 bit the use of SSE2 is the minimum
576  if (UseSSE < 2) UseSSE = 2;
577#endif
578
579#ifdef AMD64
580  // flush_icache_stub have to be generated first.
581  // That is why Icache line size is hard coded in ICache class,
582  // see icache_x86.hpp. It is also the reason why we can't use
583  // clflush instruction in 32-bit VM since it could be running
584  // on CPU which does not support it.
585  //
586  // The only thing we can do is to verify that flushed
587  // ICache::line_size has correct value.
588  guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
589  // clflush_size is size in quadwords (8 bytes).
590  guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
591#endif
592
593  // If the OS doesn't support SSE, we can't use this feature even if the HW does
594  if (!os::supports_sse())
595    _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
596
597  if (UseSSE < 4) {
598    _features &= ~CPU_SSE4_1;
599    _features &= ~CPU_SSE4_2;
600  }
601
602  if (UseSSE < 3) {
603    _features &= ~CPU_SSE3;
604    _features &= ~CPU_SSSE3;
605    _features &= ~CPU_SSE4A;
606  }
607
608  if (UseSSE < 2)
609    _features &= ~CPU_SSE2;
610
611  if (UseSSE < 1)
612    _features &= ~CPU_SSE;
613
614  // first try initial setting and detect what we can support
615  if (UseAVX > 0) {
616    if (UseAVX > 2 && supports_evex()) {
617      UseAVX = 3;
618    } else if (UseAVX > 1 && supports_avx2()) {
619      UseAVX = 2;
620    } else if (UseAVX > 0 && supports_avx()) {
621      UseAVX = 1;
622    } else {
623      UseAVX = 0;
624    }
625  } else if (UseAVX < 0) {
626    UseAVX = 0;
627  }
628
629  if (UseAVX < 3) {
630    _features &= ~CPU_AVX512F;
631    _features &= ~CPU_AVX512DQ;
632    _features &= ~CPU_AVX512CD;
633    _features &= ~CPU_AVX512BW;
634    _features &= ~CPU_AVX512VL;
635  }
636
637  if (UseAVX < 2)
638    _features &= ~CPU_AVX2;
639
640  if (UseAVX < 1) {
641    _features &= ~CPU_AVX;
642    _features &= ~CPU_VZEROUPPER;
643  }
644
645  if (logical_processors_per_package() == 1) {
646    // HT processor could be installed on a system which doesn't support HT.
647    _features &= ~CPU_HT;
648  }
649
650  if( is_intel() ) { // Intel cpus specific settings
651    if (is_knights_family()) {
652      _features &= ~CPU_VZEROUPPER;
653    }
654  }
655
656  char buf[256];
657  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
658               cores_per_cpu(), threads_per_core(),
659               cpu_family(), _model, _stepping,
660               (supports_cmov() ? ", cmov" : ""),
661               (supports_cmpxchg8() ? ", cx8" : ""),
662               (supports_fxsr() ? ", fxsr" : ""),
663               (supports_mmx()  ? ", mmx"  : ""),
664               (supports_sse()  ? ", sse"  : ""),
665               (supports_sse2() ? ", sse2" : ""),
666               (supports_sse3() ? ", sse3" : ""),
667               (supports_ssse3()? ", ssse3": ""),
668               (supports_sse4_1() ? ", sse4.1" : ""),
669               (supports_sse4_2() ? ", sse4.2" : ""),
670               (supports_popcnt() ? ", popcnt" : ""),
671               (supports_avx()    ? ", avx" : ""),
672               (supports_avx2()   ? ", avx2" : ""),
673               (supports_aes()    ? ", aes" : ""),
674               (supports_clmul()  ? ", clmul" : ""),
675               (supports_erms()   ? ", erms" : ""),
676               (supports_rtm()    ? ", rtm" : ""),
677               (supports_mmx_ext() ? ", mmxext" : ""),
678               (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
679               (supports_lzcnt()   ? ", lzcnt": ""),
680               (supports_sse4a()   ? ", sse4a": ""),
681               (supports_ht() ? ", ht": ""),
682               (supports_tsc() ? ", tsc": ""),
683               (supports_tscinv_bit() ? ", tscinvbit": ""),
684               (supports_tscinv() ? ", tscinv": ""),
685               (supports_bmi1() ? ", bmi1" : ""),
686               (supports_bmi2() ? ", bmi2" : ""),
687               (supports_adx() ? ", adx" : ""),
688               (supports_evex() ? ", evex" : ""),
689               (supports_sha() ? ", sha" : ""),
690               (supports_fma() ? ", fma" : ""));
691  _features_string = os::strdup(buf);
692
693  // UseSSE is set to the smaller of what hardware supports and what
694  // the command line requires.  I.e., you cannot set UseSSE to 2 on
695  // older Pentiums which do not support it.
696  if (UseSSE > 4) UseSSE=4;
697  if (UseSSE < 0) UseSSE=0;
698  if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
699    UseSSE = MIN2((intx)3,UseSSE);
700  if (!supports_sse3()) // Drop to 2 if no SSE3 support
701    UseSSE = MIN2((intx)2,UseSSE);
702  if (!supports_sse2()) // Drop to 1 if no SSE2 support
703    UseSSE = MIN2((intx)1,UseSSE);
704  if (!supports_sse ()) // Drop to 0 if no SSE  support
705    UseSSE = 0;
706
707  // Use AES instructions if available.
708  if (supports_aes()) {
709    if (FLAG_IS_DEFAULT(UseAES)) {
710      FLAG_SET_DEFAULT(UseAES, true);
711    }
712    if (!UseAES) {
713      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
714        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
715      }
716      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
717    } else {
718      if (UseSSE > 2) {
719        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
720          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
721        }
722      } else {
723        // The AES intrinsic stubs require AES instruction support (of course)
724        // but also require sse3 mode or higher for instructions it use.
725        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
726          warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
727        }
728        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
729      }
730
731      // --AES-CTR begins--
732      if (!UseAESIntrinsics) {
733        if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
734          warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
735          FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
736        }
737      } else {
738        if(supports_sse4_1()) {
739          if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
740            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
741          }
742        } else {
743           // The AES-CTR intrinsic stubs require AES instruction support (of course)
744           // but also require sse4.1 mode or higher for instructions it use.
745          if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
746             warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
747           }
748           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
749        }
750      }
751      // --AES-CTR ends--
752    }
753  } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
754    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
755      warning("AES instructions are not available on this CPU");
756      FLAG_SET_DEFAULT(UseAES, false);
757    }
758    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
759      warning("AES intrinsics are not available on this CPU");
760      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
761    }
762    if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
763      warning("AES-CTR intrinsics are not available on this CPU");
764      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
765    }
766  }
767
768  // Use CLMUL instructions if available.
769  if (supports_clmul()) {
770    if (FLAG_IS_DEFAULT(UseCLMUL)) {
771      UseCLMUL = true;
772    }
773  } else if (UseCLMUL) {
774    if (!FLAG_IS_DEFAULT(UseCLMUL))
775      warning("CLMUL instructions not available on this CPU (AVX may also be required)");
776    FLAG_SET_DEFAULT(UseCLMUL, false);
777  }
778
779  if (UseCLMUL && (UseSSE > 2)) {
780    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
781      UseCRC32Intrinsics = true;
782    }
783  } else if (UseCRC32Intrinsics) {
784    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
785      warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
786    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
787  }
788
789  if (supports_sse4_2() && supports_clmul()) {
790    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
791      UseCRC32CIntrinsics = true;
792    }
793  } else if (UseCRC32CIntrinsics) {
794    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
795      warning("CRC32C intrinsics are not available on this CPU");
796    }
797    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
798  }
799
800  // GHASH/GCM intrinsics
801  if (UseCLMUL && (UseSSE > 2)) {
802    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
803      UseGHASHIntrinsics = true;
804    }
805  } else if (UseGHASHIntrinsics) {
806    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
807      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
808    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
809  }
810
811  if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
812    if (FLAG_IS_DEFAULT(UseFMA)) {
813      UseFMA = true;
814    }
815  } else if (UseFMA) {
816    warning("FMA instructions are not available on this CPU");
817    FLAG_SET_DEFAULT(UseFMA, false);
818  }
819
820  if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
821    if (FLAG_IS_DEFAULT(UseSHA)) {
822      UseSHA = true;
823    }
824  } else if (UseSHA) {
825    warning("SHA instructions are not available on this CPU");
826    FLAG_SET_DEFAULT(UseSHA, false);
827  }
828
829  if (supports_sha() && UseSHA) {
830    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
831      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
832    }
833  } else if (UseSHA1Intrinsics) {
834    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
835    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
836  }
837
838  if (UseSHA) {
839    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
840      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
841    }
842  } else if (UseSHA256Intrinsics) {
843    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
844    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
845  }
846
847  if (UseSHA) {
848    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
849      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
850    }
851  } else if (UseSHA512Intrinsics) {
852    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
853    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
854  }
855
856  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
857    FLAG_SET_DEFAULT(UseSHA, false);
858  }
859
860  if (UseAdler32Intrinsics) {
861    warning("Adler32Intrinsics not available on this CPU.");
862    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
863  }
864
865  if (!supports_rtm() && UseRTMLocking) {
866    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
867    // setting during arguments processing. See use_biased_locking().
868    // VM_Version_init() is executed after UseBiasedLocking is used
869    // in Thread::allocate().
870    vm_exit_during_initialization("RTM instructions are not available on this CPU");
871  }
872
873#if INCLUDE_RTM_OPT
874  if (UseRTMLocking) {
875    if (is_client_compilation_mode_vm()) {
876      // Only C2 does RTM locking optimization.
877      // Can't continue because UseRTMLocking affects UseBiasedLocking flag
878      // setting during arguments processing. See use_biased_locking().
879      vm_exit_during_initialization("RTM locking optimization is not supported in emulated client VM");
880    }
881    if (is_intel_family_core()) {
882      if ((_model == CPU_MODEL_HASWELL_E3) ||
883          (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
884          (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
885        // currently a collision between SKL and HSW_E3
886        if (!UnlockExperimentalVMOptions && UseAVX < 3) {
887          vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
888        } else {
889          warning("UseRTMLocking is only available as experimental option on this platform.");
890        }
891      }
892    }
893    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
894      // RTM locking should be used only for applications with
895      // high lock contention. For now we do not use it by default.
896      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
897    }
898    if (!is_power_of_2(RTMTotalCountIncrRate)) {
899      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
900      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
901    }
902    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
903      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
904      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
905    }
906  } else { // !UseRTMLocking
907    if (UseRTMForStackLocks) {
908      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
909        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
910      }
911      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
912    }
913    if (UseRTMDeopt) {
914      FLAG_SET_DEFAULT(UseRTMDeopt, false);
915    }
916    if (PrintPreciseRTMLockingStatistics) {
917      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
918    }
919  }
920#else
921  if (UseRTMLocking) {
922    // Only C2 does RTM locking optimization.
923    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
924    // setting during arguments processing. See use_biased_locking().
925    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
926  }
927#endif
928
929#ifdef COMPILER2
930  if (UseFPUForSpilling) {
931    if (UseSSE < 2) {
932      // Only supported with SSE2+
933      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
934    }
935  }
936#endif
937#if defined(COMPILER2) || INCLUDE_JVMCI
938  if (MaxVectorSize > 0) {
939    if (!is_power_of_2(MaxVectorSize)) {
940      warning("MaxVectorSize must be a power of 2");
941      FLAG_SET_DEFAULT(MaxVectorSize, 64);
942    }
943    if (UseSSE < 2) {
944      // Vectors (in XMM) are only supported with SSE2+
945      if (MaxVectorSize > 0) {
946        if (!FLAG_IS_DEFAULT(MaxVectorSize))
947          warning("MaxVectorSize must be 0");
948        FLAG_SET_DEFAULT(MaxVectorSize, 0);
949      }
950    }
951    else if (UseAVX == 0 || !os_supports_avx_vectors()) {
952      // 32 bytes vectors (in YMM) are only supported with AVX+
953      if (MaxVectorSize > 16) {
954        if (!FLAG_IS_DEFAULT(MaxVectorSize))
955          warning("MaxVectorSize must be <= 16");
956        FLAG_SET_DEFAULT(MaxVectorSize, 16);
957      }
958    }
959    else if (UseAVX == 1 || UseAVX == 2) {
960      // 64 bytes vectors (in ZMM) are only supported with AVX 3
961      if (MaxVectorSize > 32) {
962        if (!FLAG_IS_DEFAULT(MaxVectorSize))
963          warning("MaxVectorSize must be <= 32");
964        FLAG_SET_DEFAULT(MaxVectorSize, 32);
965      }
966    }
967    else if (UseAVX > 2 ) {
968      if (MaxVectorSize > 64) {
969        if (!FLAG_IS_DEFAULT(MaxVectorSize))
970          warning("MaxVectorSize must be <= 64");
971        FLAG_SET_DEFAULT(MaxVectorSize, 64);
972      }
973    }
974#if defined(COMPILER2) && defined(ASSERT)
975    if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
976      tty->print_cr("State of YMM registers after signal handle:");
977      int nreg = 2 LP64_ONLY(+2);
978      const char* ymm_name[4] = {"0", "7", "8", "15"};
979      for (int i = 0; i < nreg; i++) {
980        tty->print("YMM%s:", ymm_name[i]);
981        for (int j = 7; j >=0; j--) {
982          tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
983        }
984        tty->cr();
985      }
986    }
987#endif // COMPILER2 && ASSERT
988  }
989#endif // COMPILER2 || INCLUDE_JVMCI
990
991#ifdef COMPILER2
992#ifdef _LP64
993  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
994    UseMultiplyToLenIntrinsic = true;
995  }
996  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
997    UseSquareToLenIntrinsic = true;
998  }
999  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1000    UseMulAddIntrinsic = true;
1001  }
1002  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1003    UseMontgomeryMultiplyIntrinsic = true;
1004  }
1005  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1006    UseMontgomerySquareIntrinsic = true;
1007  }
1008#else
1009  if (UseMultiplyToLenIntrinsic) {
1010    if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1011      warning("multiplyToLen intrinsic is not available in 32-bit VM");
1012    }
1013    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1014  }
1015  if (UseMontgomeryMultiplyIntrinsic) {
1016    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1017      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1018    }
1019    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1020  }
1021  if (UseMontgomerySquareIntrinsic) {
1022    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1023      warning("montgomerySquare intrinsic is not available in 32-bit VM");
1024    }
1025    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1026  }
1027  if (UseSquareToLenIntrinsic) {
1028    if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1029      warning("squareToLen intrinsic is not available in 32-bit VM");
1030    }
1031    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1032  }
1033  if (UseMulAddIntrinsic) {
1034    if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1035      warning("mulAdd intrinsic is not available in 32-bit VM");
1036    }
1037    FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1038  }
1039#endif
1040#endif // COMPILER2
1041
1042  // On new cpus instructions which update whole XMM register should be used
1043  // to prevent partial register stall due to dependencies on high half.
1044  //
1045  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1046  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1047  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1048  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1049
1050  if( is_amd() ) { // AMD cpus specific settings
1051    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
1052      // Use it on new AMD cpus starting from Opteron.
1053      UseAddressNop = true;
1054    }
1055    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
1056      // Use it on new AMD cpus starting from Opteron.
1057      UseNewLongLShift = true;
1058    }
1059    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1060      if (supports_sse4a()) {
1061        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1062      } else {
1063        UseXmmLoadAndClearUpper = false;
1064      }
1065    }
1066    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1067      if( supports_sse4a() ) {
1068        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1069      } else {
1070        UseXmmRegToRegMoveAll = false;
1071      }
1072    }
1073    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
1074      if( supports_sse4a() ) {
1075        UseXmmI2F = true;
1076      } else {
1077        UseXmmI2F = false;
1078      }
1079    }
1080    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
1081      if( supports_sse4a() ) {
1082        UseXmmI2D = true;
1083      } else {
1084        UseXmmI2D = false;
1085      }
1086    }
1087    if (supports_sse4_2()) {
1088      if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1089        FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1090      }
1091    } else {
1092      if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1093        warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1094      }
1095      FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1096    }
1097
1098    // some defaults for AMD family 15h
1099    if ( cpu_family() == 0x15 ) {
1100      // On family 15h processors default is no sw prefetch
1101      if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1102        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1103      }
1104      // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1105      if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1106        FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1107      }
1108      // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1109      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1110        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1111      }
1112      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1113        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1114      }
1115    }
1116
1117#ifdef COMPILER2
1118    if (MaxVectorSize > 16) {
1119      // Limit vectors size to 16 bytes on current AMD cpus.
1120      FLAG_SET_DEFAULT(MaxVectorSize, 16);
1121    }
1122#endif // COMPILER2
1123  }
1124
1125  if( is_intel() ) { // Intel cpus specific settings
1126    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1127      UseStoreImmI16 = false; // don't use it on Intel cpus
1128    }
1129    if( cpu_family() == 6 || cpu_family() == 15 ) {
1130      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1131        // Use it on all Intel cpus starting from PentiumPro
1132        UseAddressNop = true;
1133      }
1134    }
1135    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1136      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1137    }
1138    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1139      if( supports_sse3() ) {
1140        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1141      } else {
1142        UseXmmRegToRegMoveAll = false;
1143      }
1144    }
1145    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
1146#ifdef COMPILER2
1147      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
1148        // For new Intel cpus do the next optimization:
1149        // don't align the beginning of a loop if there are enough instructions
1150        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1151        // in current fetch line (OptoLoopAlignment) or the padding
1152        // is big (> MaxLoopPad).
1153        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1154        // generated NOP instructions. 11 is the largest size of one
1155        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1156        MaxLoopPad = 11;
1157      }
1158#endif // COMPILER2
1159      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1160        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1161      }
1162      if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
1163        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1164          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1165        }
1166      }
1167      if (supports_sse4_2()) {
1168        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1169          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1170        }
1171      } else {
1172        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1173          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1174        }
1175        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1176      }
1177    }
1178    if (is_atom_family() || is_knights_family()) {
1179#ifdef COMPILER2
1180      if (FLAG_IS_DEFAULT(OptoScheduling)) {
1181        OptoScheduling = true;
1182      }
1183#endif
1184      if (supports_sse4_2()) { // Silvermont
1185        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1186          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1187        }
1188      }
1189      if (FLAG_IS_DEFAULT(UseIncDec)) {
1190        FLAG_SET_DEFAULT(UseIncDec, false);
1191      }
1192    }
1193    if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1194      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1195    }
1196  }
1197
1198#ifdef _LP64
1199  if (UseSSE42Intrinsics) {
1200    if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1201      UseVectorizedMismatchIntrinsic = true;
1202    }
1203  } else if (UseVectorizedMismatchIntrinsic) {
1204    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1205      warning("vectorizedMismatch intrinsics are not available on this CPU");
1206    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1207  }
1208#else
1209  if (UseVectorizedMismatchIntrinsic) {
1210    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1211      warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1212    }
1213    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1214  }
1215#endif // _LP64
1216
1217  // Use count leading zeros count instruction if available.
1218  if (supports_lzcnt()) {
1219    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1220      UseCountLeadingZerosInstruction = true;
1221    }
1222   } else if (UseCountLeadingZerosInstruction) {
1223    warning("lzcnt instruction is not available on this CPU");
1224    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1225  }
1226
1227  // Use count trailing zeros instruction if available
1228  if (supports_bmi1()) {
1229    // tzcnt does not require VEX prefix
1230    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1231      if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1232        // Don't use tzcnt if BMI1 is switched off on command line.
1233        UseCountTrailingZerosInstruction = false;
1234      } else {
1235        UseCountTrailingZerosInstruction = true;
1236      }
1237    }
1238  } else if (UseCountTrailingZerosInstruction) {
1239    warning("tzcnt instruction is not available on this CPU");
1240    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1241  }
1242
1243  // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1244  // VEX prefix is generated only when AVX > 0.
1245  if (supports_bmi1() && supports_avx()) {
1246    if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1247      UseBMI1Instructions = true;
1248    }
1249  } else if (UseBMI1Instructions) {
1250    warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1251    FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1252  }
1253
1254  if (supports_bmi2() && supports_avx()) {
1255    if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1256      UseBMI2Instructions = true;
1257    }
1258  } else if (UseBMI2Instructions) {
1259    warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1260    FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1261  }
1262
1263  // Use population count instruction if available.
1264  if (supports_popcnt()) {
1265    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1266      UsePopCountInstruction = true;
1267    }
1268  } else if (UsePopCountInstruction) {
1269    warning("POPCNT instruction is not available on this CPU");
1270    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1271  }
1272
1273  // Use fast-string operations if available.
1274  if (supports_erms()) {
1275    if (FLAG_IS_DEFAULT(UseFastStosb)) {
1276      UseFastStosb = true;
1277    }
1278  } else if (UseFastStosb) {
1279    warning("fast-string operations are not available on this CPU");
1280    FLAG_SET_DEFAULT(UseFastStosb, false);
1281  }
1282
1283#ifdef COMPILER2
1284  if (FLAG_IS_DEFAULT(AlignVector)) {
1285    // Modern processors allow misaligned memory operations for vectors.
1286    AlignVector = !UseUnalignedLoadStores;
1287  }
1288#endif // COMPILER2
1289
1290  if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1291    if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1292      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1293    } else if (!supports_sse() && supports_3dnow_prefetch()) {
1294      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1295    }
1296  }
1297
1298  // Allocation prefetch settings
1299  intx cache_line_size = prefetch_data_size();
1300  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1301      (cache_line_size > AllocatePrefetchStepSize)) {
1302    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1303  }
1304
1305  if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1306    assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1307    if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1308      warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1309    }
1310    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1311  }
1312
1313  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1314    bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1315    FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1316  }
1317
1318  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1319    if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1320        supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1321      FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1322    }
1323#ifdef COMPILER2
1324    if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1325      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1326    }
1327#endif
1328  }
1329
1330#ifdef _LP64
1331  // Prefetch settings
1332
1333  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1334  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1335  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1336  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1337
1338  // gc copy/scan is disabled if prefetchw isn't supported, because
1339  // Prefetch::write emits an inlined prefetchw on Linux.
1340  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1341  // The used prefetcht0 instruction works for both amd64 and em64t.
1342
1343  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1344    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1345  }
1346  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1347    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1348  }
1349  if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1350    FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
1351  }
1352#endif
1353
1354  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1355     (cache_line_size > ContendedPaddingWidth))
1356     ContendedPaddingWidth = cache_line_size;
1357
1358  // This machine allows unaligned memory accesses
1359  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1360    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1361  }
1362
1363#ifndef PRODUCT
1364  if (log_is_enabled(Info, os, cpu)) {
1365    outputStream* log = Log(os, cpu)::info_stream();
1366    log->print_cr("Logical CPUs per core: %u",
1367                  logical_processors_per_package());
1368    log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1369    log->print("UseSSE=%d", (int) UseSSE);
1370    if (UseAVX > 0) {
1371      log->print("  UseAVX=%d", (int) UseAVX);
1372    }
1373    if (UseAES) {
1374      log->print("  UseAES=1");
1375    }
1376#ifdef COMPILER2
1377    if (MaxVectorSize > 0) {
1378      log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1379    }
1380#endif
1381    log->cr();
1382    log->print("Allocation");
1383    if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1384      log->print_cr(": no prefetching");
1385    } else {
1386      log->print(" prefetching: ");
1387      if (UseSSE == 0 && supports_3dnow_prefetch()) {
1388        log->print("PREFETCHW");
1389      } else if (UseSSE >= 1) {
1390        if (AllocatePrefetchInstr == 0) {
1391          log->print("PREFETCHNTA");
1392        } else if (AllocatePrefetchInstr == 1) {
1393          log->print("PREFETCHT0");
1394        } else if (AllocatePrefetchInstr == 2) {
1395          log->print("PREFETCHT2");
1396        } else if (AllocatePrefetchInstr == 3) {
1397          log->print("PREFETCHW");
1398        }
1399      }
1400      if (AllocatePrefetchLines > 1) {
1401        log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1402      } else {
1403        log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1404      }
1405    }
1406
1407    if (PrefetchCopyIntervalInBytes > 0) {
1408      log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1409    }
1410    if (PrefetchScanIntervalInBytes > 0) {
1411      log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1412    }
1413    if (PrefetchFieldsAhead > 0) {
1414      log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1415    }
1416    if (ContendedPaddingWidth > 0) {
1417      log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1418    }
1419  }
1420#endif // !PRODUCT
1421}
1422
1423bool VM_Version::use_biased_locking() {
1424#if INCLUDE_RTM_OPT
1425  // RTM locking is most useful when there is high lock contention and
1426  // low data contention.  With high lock contention the lock is usually
1427  // inflated and biased locking is not suitable for that case.
1428  // RTM locking code requires that biased locking is off.
1429  // Note: we can't switch off UseBiasedLocking in get_processor_features()
1430  // because it is used by Thread::allocate() which is called before
1431  // VM_Version::initialize().
1432  if (UseRTMLocking && UseBiasedLocking) {
1433    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1434      FLAG_SET_DEFAULT(UseBiasedLocking, false);
1435    } else {
1436      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1437      UseBiasedLocking = false;
1438    }
1439  }
1440#endif
1441  return UseBiasedLocking;
1442}
1443
1444void VM_Version::initialize() {
1445  ResourceMark rm;
1446  // Making this stub must be FIRST use of assembler
1447
1448  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1449  if (stub_blob == NULL) {
1450    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1451  }
1452  CodeBuffer c(stub_blob);
1453  VM_Version_StubGenerator g(&c);
1454  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1455                                     g.generate_get_cpu_info());
1456
1457  get_processor_features();
1458}
1459