1/*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "logging/log.hpp"
29#include "logging/logStream.hpp"
30#include "memory/resourceArea.hpp"
31#include "prims/jvm.h"
32#include "runtime/java.hpp"
33#include "runtime/os.hpp"
34#include "runtime/stubCodeGenerator.hpp"
35#include "vm_version_x86.hpp"
36
37
38int VM_Version::_cpu;
39int VM_Version::_model;
40int VM_Version::_stepping;
41VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
42
43// Address of instruction which causes SEGV
44address VM_Version::_cpuinfo_segv_addr = 0;
45// Address of instruction after the one which causes SEGV
46address VM_Version::_cpuinfo_cont_addr = 0;
47
48static BufferBlob* stub_blob;
49static const int stub_size = 1000;
50
51extern "C" {
52  typedef void (*get_cpu_info_stub_t)(void*);
53}
54static get_cpu_info_stub_t get_cpu_info_stub = NULL;
55
56
57class VM_Version_StubGenerator: public StubCodeGenerator {
58 public:
59
60  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
61
62  address generate_get_cpu_info() {
63    // Flags to test CPU type.
64    const uint32_t HS_EFL_AC = 0x40000;
65    const uint32_t HS_EFL_ID = 0x200000;
66    // Values for when we don't have a CPUID instruction.
67    const int      CPU_FAMILY_SHIFT = 8;
68    const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
69    const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
70    bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
71
72    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
73    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
74    Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
75
76    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
77#   define __ _masm->
78
79    address start = __ pc();
80
81    //
82    // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
83    //
84    // LP64: rcx and rdx are first and second argument registers on windows
85
86    __ push(rbp);
87#ifdef _LP64
88    __ mov(rbp, c_rarg0); // cpuid_info address
89#else
90    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
91#endif
92    __ push(rbx);
93    __ push(rsi);
94    __ pushf();          // preserve rbx, and flags
95    __ pop(rax);
96    __ push(rax);
97    __ mov(rcx, rax);
98    //
99    // if we are unable to change the AC flag, we have a 386
100    //
101    __ xorl(rax, HS_EFL_AC);
102    __ push(rax);
103    __ popf();
104    __ pushf();
105    __ pop(rax);
106    __ cmpptr(rax, rcx);
107    __ jccb(Assembler::notEqual, detect_486);
108
109    __ movl(rax, CPU_FAMILY_386);
110    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
111    __ jmp(done);
112
113    //
114    // If we are unable to change the ID flag, we have a 486 which does
115    // not support the "cpuid" instruction.
116    //
117    __ bind(detect_486);
118    __ mov(rax, rcx);
119    __ xorl(rax, HS_EFL_ID);
120    __ push(rax);
121    __ popf();
122    __ pushf();
123    __ pop(rax);
124    __ cmpptr(rcx, rax);
125    __ jccb(Assembler::notEqual, detect_586);
126
127    __ bind(cpu486);
128    __ movl(rax, CPU_FAMILY_486);
129    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
130    __ jmp(done);
131
132    //
133    // At this point, we have a chip which supports the "cpuid" instruction
134    //
135    __ bind(detect_586);
136    __ xorl(rax, rax);
137    __ cpuid();
138    __ orl(rax, rax);
139    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
140                                        // value of at least 1, we give up and
141                                        // assume a 486
142    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
143    __ movl(Address(rsi, 0), rax);
144    __ movl(Address(rsi, 4), rbx);
145    __ movl(Address(rsi, 8), rcx);
146    __ movl(Address(rsi,12), rdx);
147
148    __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
149    __ jccb(Assembler::belowEqual, std_cpuid4);
150
151    //
152    // cpuid(0xB) Processor Topology
153    //
154    __ movl(rax, 0xb);
155    __ xorl(rcx, rcx);   // Threads level
156    __ cpuid();
157
158    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
159    __ movl(Address(rsi, 0), rax);
160    __ movl(Address(rsi, 4), rbx);
161    __ movl(Address(rsi, 8), rcx);
162    __ movl(Address(rsi,12), rdx);
163
164    __ movl(rax, 0xb);
165    __ movl(rcx, 1);     // Cores level
166    __ cpuid();
167    __ push(rax);
168    __ andl(rax, 0x1f);  // Determine if valid topology level
169    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
170    __ andl(rax, 0xffff);
171    __ pop(rax);
172    __ jccb(Assembler::equal, std_cpuid4);
173
174    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
175    __ movl(Address(rsi, 0), rax);
176    __ movl(Address(rsi, 4), rbx);
177    __ movl(Address(rsi, 8), rcx);
178    __ movl(Address(rsi,12), rdx);
179
180    __ movl(rax, 0xb);
181    __ movl(rcx, 2);     // Packages level
182    __ cpuid();
183    __ push(rax);
184    __ andl(rax, 0x1f);  // Determine if valid topology level
185    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
186    __ andl(rax, 0xffff);
187    __ pop(rax);
188    __ jccb(Assembler::equal, std_cpuid4);
189
190    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
191    __ movl(Address(rsi, 0), rax);
192    __ movl(Address(rsi, 4), rbx);
193    __ movl(Address(rsi, 8), rcx);
194    __ movl(Address(rsi,12), rdx);
195
196    //
197    // cpuid(0x4) Deterministic cache params
198    //
199    __ bind(std_cpuid4);
200    __ movl(rax, 4);
201    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
202    __ jccb(Assembler::greater, std_cpuid1);
203
204    __ xorl(rcx, rcx);   // L1 cache
205    __ cpuid();
206    __ push(rax);
207    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
208    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
209    __ pop(rax);
210    __ jccb(Assembler::equal, std_cpuid1);
211
212    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
213    __ movl(Address(rsi, 0), rax);
214    __ movl(Address(rsi, 4), rbx);
215    __ movl(Address(rsi, 8), rcx);
216    __ movl(Address(rsi,12), rdx);
217
218    //
219    // Standard cpuid(0x1)
220    //
221    __ bind(std_cpuid1);
222    __ movl(rax, 1);
223    __ cpuid();
224    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
225    __ movl(Address(rsi, 0), rax);
226    __ movl(Address(rsi, 4), rbx);
227    __ movl(Address(rsi, 8), rcx);
228    __ movl(Address(rsi,12), rdx);
229
230    //
231    // Check if OS has enabled XGETBV instruction to access XCR0
232    // (OSXSAVE feature flag) and CPU supports AVX
233    //
234    __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
235    __ cmpl(rcx, 0x18000000);
236    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
237
238    //
239    // XCR0, XFEATURE_ENABLED_MASK register
240    //
241    __ xorl(rcx, rcx);   // zero for XCR0 register
242    __ xgetbv();
243    __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
244    __ movl(Address(rsi, 0), rax);
245    __ movl(Address(rsi, 4), rdx);
246
247    //
248    // cpuid(0x7) Structured Extended Features
249    //
250    __ bind(sef_cpuid);
251    __ movl(rax, 7);
252    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
253    __ jccb(Assembler::greater, ext_cpuid);
254
255    __ xorl(rcx, rcx);
256    __ cpuid();
257    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
258    __ movl(Address(rsi, 0), rax);
259    __ movl(Address(rsi, 4), rbx);
260
261    //
262    // Extended cpuid(0x80000000)
263    //
264    __ bind(ext_cpuid);
265    __ movl(rax, 0x80000000);
266    __ cpuid();
267    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
268    __ jcc(Assembler::belowEqual, done);
269    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
270    __ jccb(Assembler::belowEqual, ext_cpuid1);
271    __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
272    __ jccb(Assembler::belowEqual, ext_cpuid5);
273    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
274    __ jccb(Assembler::belowEqual, ext_cpuid7);
275    //
276    // Extended cpuid(0x80000008)
277    //
278    __ movl(rax, 0x80000008);
279    __ cpuid();
280    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
281    __ movl(Address(rsi, 0), rax);
282    __ movl(Address(rsi, 4), rbx);
283    __ movl(Address(rsi, 8), rcx);
284    __ movl(Address(rsi,12), rdx);
285
286    //
287    // Extended cpuid(0x80000007)
288    //
289    __ bind(ext_cpuid7);
290    __ movl(rax, 0x80000007);
291    __ cpuid();
292    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
293    __ movl(Address(rsi, 0), rax);
294    __ movl(Address(rsi, 4), rbx);
295    __ movl(Address(rsi, 8), rcx);
296    __ movl(Address(rsi,12), rdx);
297
298    //
299    // Extended cpuid(0x80000005)
300    //
301    __ bind(ext_cpuid5);
302    __ movl(rax, 0x80000005);
303    __ cpuid();
304    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
305    __ movl(Address(rsi, 0), rax);
306    __ movl(Address(rsi, 4), rbx);
307    __ movl(Address(rsi, 8), rcx);
308    __ movl(Address(rsi,12), rdx);
309
310    //
311    // Extended cpuid(0x80000001)
312    //
313    __ bind(ext_cpuid1);
314    __ movl(rax, 0x80000001);
315    __ cpuid();
316    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
317    __ movl(Address(rsi, 0), rax);
318    __ movl(Address(rsi, 4), rbx);
319    __ movl(Address(rsi, 8), rcx);
320    __ movl(Address(rsi,12), rdx);
321
322    //
323    // Check if OS has enabled XGETBV instruction to access XCR0
324    // (OSXSAVE feature flag) and CPU supports AVX
325    //
326    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
327    __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
328    __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
329    __ cmpl(rcx, 0x18000000);
330    __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
331
332    __ movl(rax, 0x6);
333    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
334    __ cmpl(rax, 0x6);
335    __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
336
337    // we need to bridge farther than imm8, so we use this island as a thunk
338    __ bind(done);
339    __ jmp(wrapup);
340
341    __ bind(start_simd_check);
342    //
343    // Some OSs have a bug when upper 128/256bits of YMM/ZMM
344    // registers are not restored after a signal processing.
345    // Generate SEGV here (reference through NULL)
346    // and check upper YMM/ZMM bits after it.
347    //
348    intx saved_useavx = UseAVX;
349    intx saved_usesse = UseSSE;
350    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
351    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
352    __ movl(rax, 0x10000);
353    __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
354    __ cmpl(rax, 0x10000);
355    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
356    // check _cpuid_info.xem_xcr0_eax.bits.opmask
357    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
358    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
359    __ movl(rax, 0xE0);
360    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
361    __ cmpl(rax, 0xE0);
362    __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
363
364    // If UseAVX is unitialized or is set by the user to include EVEX
365    if (use_evex) {
366      // EVEX setup: run in lowest evex mode
367      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
368      UseAVX = 3;
369      UseSSE = 2;
370#ifdef _WINDOWS
371      // xmm5-xmm15 are not preserved by caller on windows
372      // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
373      __ subptr(rsp, 64);
374      __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
375#ifdef _LP64
376      __ subptr(rsp, 64);
377      __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
378      __ subptr(rsp, 64);
379      __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
380#endif // _LP64
381#endif // _WINDOWS
382
383      // load value into all 64 bytes of zmm7 register
384      __ movl(rcx, VM_Version::ymm_test_value());
385      __ movdl(xmm0, rcx);
386      __ movl(rcx, 0xffff);
387      __ kmovwl(k1, rcx);
388      __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
389      __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
390#ifdef _LP64
391      __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
392      __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
393#endif
394      VM_Version::clean_cpuFeatures();
395      __ jmp(save_restore_except);
396    }
397
398    __ bind(legacy_setup);
399    // AVX setup
400    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
401    UseAVX = 1;
402    UseSSE = 2;
403#ifdef _WINDOWS
404    __ subptr(rsp, 32);
405    __ vmovdqu(Address(rsp, 0), xmm7);
406#ifdef _LP64
407    __ subptr(rsp, 32);
408    __ vmovdqu(Address(rsp, 0), xmm8);
409    __ subptr(rsp, 32);
410    __ vmovdqu(Address(rsp, 0), xmm15);
411#endif // _LP64
412#endif // _WINDOWS
413
414    // load value into all 32 bytes of ymm7 register
415    __ movl(rcx, VM_Version::ymm_test_value());
416
417    __ movdl(xmm0, rcx);
418    __ pshufd(xmm0, xmm0, 0x00);
419    __ vinsertf128_high(xmm0, xmm0);
420    __ vmovdqu(xmm7, xmm0);
421#ifdef _LP64
422    __ vmovdqu(xmm8, xmm0);
423    __ vmovdqu(xmm15, xmm0);
424#endif
425    VM_Version::clean_cpuFeatures();
426
427    __ bind(save_restore_except);
428    __ xorl(rsi, rsi);
429    VM_Version::set_cpuinfo_segv_addr(__ pc());
430    // Generate SEGV
431    __ movl(rax, Address(rsi, 0));
432
433    VM_Version::set_cpuinfo_cont_addr(__ pc());
434    // Returns here after signal. Save xmm0 to check it later.
435
436    // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
437    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
438    __ movl(rax, 0x10000);
439    __ andl(rax, Address(rsi, 4));
440    __ cmpl(rax, 0x10000);
441    __ jcc(Assembler::notEqual, legacy_save_restore);
442    // check _cpuid_info.xem_xcr0_eax.bits.opmask
443    // check _cpuid_info.xem_xcr0_eax.bits.zmm512
444    // check _cpuid_info.xem_xcr0_eax.bits.zmm32
445    __ movl(rax, 0xE0);
446    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
447    __ cmpl(rax, 0xE0);
448    __ jcc(Assembler::notEqual, legacy_save_restore);
449
450    // If UseAVX is unitialized or is set by the user to include EVEX
451    if (use_evex) {
452      // EVEX check: run in lowest evex mode
453      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
454      UseAVX = 3;
455      UseSSE = 2;
456      __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
457      __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
458      __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
459#ifdef _LP64
460      __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
461      __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
462#endif
463
464#ifdef _WINDOWS
465#ifdef _LP64
466      __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
467      __ addptr(rsp, 64);
468      __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
469      __ addptr(rsp, 64);
470#endif // _LP64
471      __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
472      __ addptr(rsp, 64);
473#endif // _WINDOWS
474      generate_vzeroupper(wrapup);
475      VM_Version::clean_cpuFeatures();
476      UseAVX = saved_useavx;
477      UseSSE = saved_usesse;
478      __ jmp(wrapup);
479   }
480
481    __ bind(legacy_save_restore);
482    // AVX check
483    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
484    UseAVX = 1;
485    UseSSE = 2;
486    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
487    __ vmovdqu(Address(rsi, 0), xmm0);
488    __ vmovdqu(Address(rsi, 32), xmm7);
489#ifdef _LP64
490    __ vmovdqu(Address(rsi, 64), xmm8);
491    __ vmovdqu(Address(rsi, 96), xmm15);
492#endif
493
494#ifdef _WINDOWS
495#ifdef _LP64
496    __ vmovdqu(xmm15, Address(rsp, 0));
497    __ addptr(rsp, 32);
498    __ vmovdqu(xmm8, Address(rsp, 0));
499    __ addptr(rsp, 32);
500#endif // _LP64
501    __ vmovdqu(xmm7, Address(rsp, 0));
502    __ addptr(rsp, 32);
503#endif // _WINDOWS
504    generate_vzeroupper(wrapup);
505    VM_Version::clean_cpuFeatures();
506    UseAVX = saved_useavx;
507    UseSSE = saved_usesse;
508
509    __ bind(wrapup);
510    __ popf();
511    __ pop(rsi);
512    __ pop(rbx);
513    __ pop(rbp);
514    __ ret(0);
515
516#   undef __
517
518    return start;
519  };
520  void generate_vzeroupper(Label& L_wrapup) {
521#   define __ _masm->
522    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
523    __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
524    __ jcc(Assembler::notEqual, L_wrapup);
525    __ movl(rcx, 0x0FFF0FF0);
526    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
527    __ andl(rcx, Address(rsi, 0));
528    __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
529    __ jcc(Assembler::equal, L_wrapup);
530    __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
531    __ jcc(Assembler::equal, L_wrapup);
532    __ vzeroupper();
533#   undef __
534  }
535};
536
537void VM_Version::get_processor_features() {
538
539  _cpu = 4; // 486 by default
540  _model = 0;
541  _stepping = 0;
542  _features = 0;
543  _logical_processors_per_package = 1;
544  // i486 internal cache is both I&D and has a 16-byte line size
545  _L1_data_cache_line_size = 16;
546
547  // Get raw processor info
548
549  get_cpu_info_stub(&_cpuid_info);
550
551  assert_is_initialized();
552  _cpu = extended_cpu_family();
553  _model = extended_cpu_model();
554  _stepping = cpu_stepping();
555
556  if (cpu_family() > 4) { // it supports CPUID
557    _features = feature_flags();
558    // Logical processors are only available on P4s and above,
559    // and only if hyperthreading is available.
560    _logical_processors_per_package = logical_processor_count();
561    _L1_data_cache_line_size = L1_line_size();
562  }
563
564  _supports_cx8 = supports_cmpxchg8();
565  // xchg and xadd instructions
566  _supports_atomic_getset4 = true;
567  _supports_atomic_getadd4 = true;
568  LP64_ONLY(_supports_atomic_getset8 = true);
569  LP64_ONLY(_supports_atomic_getadd8 = true);
570
571#ifdef _LP64
572  // OS should support SSE for x64 and hardware should support at least SSE2.
573  if (!VM_Version::supports_sse2()) {
574    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
575  }
576  // in 64 bit the use of SSE2 is the minimum
577  if (UseSSE < 2) UseSSE = 2;
578#endif
579
580#ifdef AMD64
581  // flush_icache_stub have to be generated first.
582  // That is why Icache line size is hard coded in ICache class,
583  // see icache_x86.hpp. It is also the reason why we can't use
584  // clflush instruction in 32-bit VM since it could be running
585  // on CPU which does not support it.
586  //
587  // The only thing we can do is to verify that flushed
588  // ICache::line_size has correct value.
589  guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
590  // clflush_size is size in quadwords (8 bytes).
591  guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
592#endif
593
594  // If the OS doesn't support SSE, we can't use this feature even if the HW does
595  if (!os::supports_sse())
596    _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
597
598  if (UseSSE < 4) {
599    _features &= ~CPU_SSE4_1;
600    _features &= ~CPU_SSE4_2;
601  }
602
603  if (UseSSE < 3) {
604    _features &= ~CPU_SSE3;
605    _features &= ~CPU_SSSE3;
606    _features &= ~CPU_SSE4A;
607  }
608
609  if (UseSSE < 2)
610    _features &= ~CPU_SSE2;
611
612  if (UseSSE < 1)
613    _features &= ~CPU_SSE;
614
615  // first try initial setting and detect what we can support
616  if (UseAVX > 0) {
617    if (UseAVX > 2 && supports_evex()) {
618      UseAVX = 3;
619    } else if (UseAVX > 1 && supports_avx2()) {
620      UseAVX = 2;
621    } else if (UseAVX > 0 && supports_avx()) {
622      UseAVX = 1;
623    } else {
624      UseAVX = 0;
625    }
626  } else if (UseAVX < 0) {
627    UseAVX = 0;
628  }
629
630  if (UseAVX < 3) {
631    _features &= ~CPU_AVX512F;
632    _features &= ~CPU_AVX512DQ;
633    _features &= ~CPU_AVX512CD;
634    _features &= ~CPU_AVX512BW;
635    _features &= ~CPU_AVX512VL;
636  }
637
638  if (UseAVX < 2)
639    _features &= ~CPU_AVX2;
640
641  if (UseAVX < 1) {
642    _features &= ~CPU_AVX;
643    _features &= ~CPU_VZEROUPPER;
644  }
645
646  if (logical_processors_per_package() == 1) {
647    // HT processor could be installed on a system which doesn't support HT.
648    _features &= ~CPU_HT;
649  }
650
651  if( is_intel() ) { // Intel cpus specific settings
652    if (is_knights_family()) {
653      _features &= ~CPU_VZEROUPPER;
654    }
655  }
656
657  char buf[256];
658  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
659               cores_per_cpu(), threads_per_core(),
660               cpu_family(), _model, _stepping,
661               (supports_cmov() ? ", cmov" : ""),
662               (supports_cmpxchg8() ? ", cx8" : ""),
663               (supports_fxsr() ? ", fxsr" : ""),
664               (supports_mmx()  ? ", mmx"  : ""),
665               (supports_sse()  ? ", sse"  : ""),
666               (supports_sse2() ? ", sse2" : ""),
667               (supports_sse3() ? ", sse3" : ""),
668               (supports_ssse3()? ", ssse3": ""),
669               (supports_sse4_1() ? ", sse4.1" : ""),
670               (supports_sse4_2() ? ", sse4.2" : ""),
671               (supports_popcnt() ? ", popcnt" : ""),
672               (supports_avx()    ? ", avx" : ""),
673               (supports_avx2()   ? ", avx2" : ""),
674               (supports_aes()    ? ", aes" : ""),
675               (supports_clmul()  ? ", clmul" : ""),
676               (supports_erms()   ? ", erms" : ""),
677               (supports_rtm()    ? ", rtm" : ""),
678               (supports_mmx_ext() ? ", mmxext" : ""),
679               (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
680               (supports_lzcnt()   ? ", lzcnt": ""),
681               (supports_sse4a()   ? ", sse4a": ""),
682               (supports_ht() ? ", ht": ""),
683               (supports_tsc() ? ", tsc": ""),
684               (supports_tscinv_bit() ? ", tscinvbit": ""),
685               (supports_tscinv() ? ", tscinv": ""),
686               (supports_bmi1() ? ", bmi1" : ""),
687               (supports_bmi2() ? ", bmi2" : ""),
688               (supports_adx() ? ", adx" : ""),
689               (supports_evex() ? ", evex" : ""),
690               (supports_sha() ? ", sha" : ""),
691               (supports_fma() ? ", fma" : ""));
692  _features_string = os::strdup(buf);
693
694  // UseSSE is set to the smaller of what hardware supports and what
695  // the command line requires.  I.e., you cannot set UseSSE to 2 on
696  // older Pentiums which do not support it.
697  if (UseSSE > 4) UseSSE=4;
698  if (UseSSE < 0) UseSSE=0;
699  if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
700    UseSSE = MIN2((intx)3,UseSSE);
701  if (!supports_sse3()) // Drop to 2 if no SSE3 support
702    UseSSE = MIN2((intx)2,UseSSE);
703  if (!supports_sse2()) // Drop to 1 if no SSE2 support
704    UseSSE = MIN2((intx)1,UseSSE);
705  if (!supports_sse ()) // Drop to 0 if no SSE  support
706    UseSSE = 0;
707
708  // Use AES instructions if available.
709  if (supports_aes()) {
710    if (FLAG_IS_DEFAULT(UseAES)) {
711      FLAG_SET_DEFAULT(UseAES, true);
712    }
713    if (!UseAES) {
714      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
715        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
716      }
717      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
718    } else {
719      if (UseSSE > 2) {
720        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
721          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
722        }
723      } else {
724        // The AES intrinsic stubs require AES instruction support (of course)
725        // but also require sse3 mode or higher for instructions it use.
726        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
727          warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
728        }
729        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
730      }
731
732      // --AES-CTR begins--
733      if (!UseAESIntrinsics) {
734        if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
735          warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
736          FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
737        }
738      } else {
739        if(supports_sse4_1()) {
740          if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
741            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
742          }
743        } else {
744           // The AES-CTR intrinsic stubs require AES instruction support (of course)
745           // but also require sse4.1 mode or higher for instructions it use.
746          if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
747             warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
748           }
749           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
750        }
751      }
752      // --AES-CTR ends--
753    }
754  } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
755    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
756      warning("AES instructions are not available on this CPU");
757      FLAG_SET_DEFAULT(UseAES, false);
758    }
759    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
760      warning("AES intrinsics are not available on this CPU");
761      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
762    }
763    if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
764      warning("AES-CTR intrinsics are not available on this CPU");
765      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
766    }
767  }
768
769  // Use CLMUL instructions if available.
770  if (supports_clmul()) {
771    if (FLAG_IS_DEFAULT(UseCLMUL)) {
772      UseCLMUL = true;
773    }
774  } else if (UseCLMUL) {
775    if (!FLAG_IS_DEFAULT(UseCLMUL))
776      warning("CLMUL instructions not available on this CPU (AVX may also be required)");
777    FLAG_SET_DEFAULT(UseCLMUL, false);
778  }
779
780  if (UseCLMUL && (UseSSE > 2)) {
781    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
782      UseCRC32Intrinsics = true;
783    }
784  } else if (UseCRC32Intrinsics) {
785    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
786      warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
787    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
788  }
789
790  if (supports_sse4_2() && supports_clmul()) {
791    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
792      UseCRC32CIntrinsics = true;
793    }
794  } else if (UseCRC32CIntrinsics) {
795    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
796      warning("CRC32C intrinsics are not available on this CPU");
797    }
798    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
799  }
800
801  // GHASH/GCM intrinsics
802  if (UseCLMUL && (UseSSE > 2)) {
803    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
804      UseGHASHIntrinsics = true;
805    }
806  } else if (UseGHASHIntrinsics) {
807    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
808      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
809    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
810  }
811
812  if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
813    if (FLAG_IS_DEFAULT(UseFMA)) {
814      UseFMA = true;
815    }
816  } else if (UseFMA) {
817    warning("FMA instructions are not available on this CPU");
818    FLAG_SET_DEFAULT(UseFMA, false);
819  }
820
821  if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
822    if (FLAG_IS_DEFAULT(UseSHA)) {
823      UseSHA = true;
824    }
825  } else if (UseSHA) {
826    warning("SHA instructions are not available on this CPU");
827    FLAG_SET_DEFAULT(UseSHA, false);
828  }
829
830  if (supports_sha() && UseSHA) {
831    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
832      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
833    }
834  } else if (UseSHA1Intrinsics) {
835    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
836    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
837  }
838
839  if (UseSHA) {
840    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
841      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
842    }
843  } else if (UseSHA256Intrinsics) {
844    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
845    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
846  }
847
848  if (UseSHA) {
849    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
850      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
851    }
852  } else if (UseSHA512Intrinsics) {
853    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
854    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
855  }
856
857  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
858    FLAG_SET_DEFAULT(UseSHA, false);
859  }
860
861  if (UseAdler32Intrinsics) {
862    warning("Adler32Intrinsics not available on this CPU.");
863    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
864  }
865
866  if (!supports_rtm() && UseRTMLocking) {
867    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
868    // setting during arguments processing. See use_biased_locking().
869    // VM_Version_init() is executed after UseBiasedLocking is used
870    // in Thread::allocate().
871    vm_exit_during_initialization("RTM instructions are not available on this CPU");
872  }
873
874#if INCLUDE_RTM_OPT
875  if (UseRTMLocking) {
876    if (is_client_compilation_mode_vm()) {
877      // Only C2 does RTM locking optimization.
878      // Can't continue because UseRTMLocking affects UseBiasedLocking flag
879      // setting during arguments processing. See use_biased_locking().
880      vm_exit_during_initialization("RTM locking optimization is not supported in emulated client VM");
881    }
882    if (is_intel_family_core()) {
883      if ((_model == CPU_MODEL_HASWELL_E3) ||
884          (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
885          (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
886        // currently a collision between SKL and HSW_E3
887        if (!UnlockExperimentalVMOptions && UseAVX < 3) {
888          vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
889                                        "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
890        } else {
891          warning("UseRTMLocking is only available as experimental option on this platform.");
892        }
893      }
894    }
895    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
896      // RTM locking should be used only for applications with
897      // high lock contention. For now we do not use it by default.
898      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
899    }
900  } else { // !UseRTMLocking
901    if (UseRTMForStackLocks) {
902      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
903        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
904      }
905      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
906    }
907    if (UseRTMDeopt) {
908      FLAG_SET_DEFAULT(UseRTMDeopt, false);
909    }
910    if (PrintPreciseRTMLockingStatistics) {
911      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
912    }
913  }
914#else
915  if (UseRTMLocking) {
916    // Only C2 does RTM locking optimization.
917    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
918    // setting during arguments processing. See use_biased_locking().
919    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
920  }
921#endif
922
923#ifdef COMPILER2
924  if (UseFPUForSpilling) {
925    if (UseSSE < 2) {
926      // Only supported with SSE2+
927      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
928    }
929  }
930#endif
931#if defined(COMPILER2) || INCLUDE_JVMCI
932  if (MaxVectorSize > 0) {
933    if (!is_power_of_2(MaxVectorSize)) {
934      warning("MaxVectorSize must be a power of 2");
935      FLAG_SET_DEFAULT(MaxVectorSize, 64);
936    }
937    if (UseSSE < 2) {
938      // Vectors (in XMM) are only supported with SSE2+
939      if (MaxVectorSize > 0) {
940        if (!FLAG_IS_DEFAULT(MaxVectorSize))
941          warning("MaxVectorSize must be 0");
942        FLAG_SET_DEFAULT(MaxVectorSize, 0);
943      }
944    }
945    else if (UseAVX == 0 || !os_supports_avx_vectors()) {
946      // 32 bytes vectors (in YMM) are only supported with AVX+
947      if (MaxVectorSize > 16) {
948        if (!FLAG_IS_DEFAULT(MaxVectorSize))
949          warning("MaxVectorSize must be <= 16");
950        FLAG_SET_DEFAULT(MaxVectorSize, 16);
951      }
952    }
953    else if (UseAVX == 1 || UseAVX == 2) {
954      // 64 bytes vectors (in ZMM) are only supported with AVX 3
955      if (MaxVectorSize > 32) {
956        if (!FLAG_IS_DEFAULT(MaxVectorSize))
957          warning("MaxVectorSize must be <= 32");
958        FLAG_SET_DEFAULT(MaxVectorSize, 32);
959      }
960    }
961    else if (UseAVX > 2 ) {
962      if (MaxVectorSize > 64) {
963        if (!FLAG_IS_DEFAULT(MaxVectorSize))
964          warning("MaxVectorSize must be <= 64");
965        FLAG_SET_DEFAULT(MaxVectorSize, 64);
966      }
967    }
968#if defined(COMPILER2) && defined(ASSERT)
969    if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
970      tty->print_cr("State of YMM registers after signal handle:");
971      int nreg = 2 LP64_ONLY(+2);
972      const char* ymm_name[4] = {"0", "7", "8", "15"};
973      for (int i = 0; i < nreg; i++) {
974        tty->print("YMM%s:", ymm_name[i]);
975        for (int j = 7; j >=0; j--) {
976          tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
977        }
978        tty->cr();
979      }
980    }
981#endif // COMPILER2 && ASSERT
982  }
983#endif // COMPILER2 || INCLUDE_JVMCI
984
985#ifdef COMPILER2
986#ifdef _LP64
987  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
988    UseMultiplyToLenIntrinsic = true;
989  }
990  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
991    UseSquareToLenIntrinsic = true;
992  }
993  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
994    UseMulAddIntrinsic = true;
995  }
996  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
997    UseMontgomeryMultiplyIntrinsic = true;
998  }
999  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1000    UseMontgomerySquareIntrinsic = true;
1001  }
1002#else
1003  if (UseMultiplyToLenIntrinsic) {
1004    if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1005      warning("multiplyToLen intrinsic is not available in 32-bit VM");
1006    }
1007    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1008  }
1009  if (UseMontgomeryMultiplyIntrinsic) {
1010    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1011      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1012    }
1013    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1014  }
1015  if (UseMontgomerySquareIntrinsic) {
1016    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1017      warning("montgomerySquare intrinsic is not available in 32-bit VM");
1018    }
1019    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1020  }
1021  if (UseSquareToLenIntrinsic) {
1022    if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1023      warning("squareToLen intrinsic is not available in 32-bit VM");
1024    }
1025    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1026  }
1027  if (UseMulAddIntrinsic) {
1028    if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1029      warning("mulAdd intrinsic is not available in 32-bit VM");
1030    }
1031    FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1032  }
1033#endif
1034#endif // COMPILER2
1035
1036  // On new cpus instructions which update whole XMM register should be used
1037  // to prevent partial register stall due to dependencies on high half.
1038  //
1039  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1040  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1041  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1042  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1043
1044  if( is_amd() ) { // AMD cpus specific settings
1045    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
1046      // Use it on new AMD cpus starting from Opteron.
1047      UseAddressNop = true;
1048    }
1049    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
1050      // Use it on new AMD cpus starting from Opteron.
1051      UseNewLongLShift = true;
1052    }
1053    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1054      if (supports_sse4a()) {
1055        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1056      } else {
1057        UseXmmLoadAndClearUpper = false;
1058      }
1059    }
1060    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1061      if( supports_sse4a() ) {
1062        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1063      } else {
1064        UseXmmRegToRegMoveAll = false;
1065      }
1066    }
1067    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
1068      if( supports_sse4a() ) {
1069        UseXmmI2F = true;
1070      } else {
1071        UseXmmI2F = false;
1072      }
1073    }
1074    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
1075      if( supports_sse4a() ) {
1076        UseXmmI2D = true;
1077      } else {
1078        UseXmmI2D = false;
1079      }
1080    }
1081    if (supports_sse4_2()) {
1082      if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1083        FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1084      }
1085    } else {
1086      if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1087        warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1088      }
1089      FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1090    }
1091
1092    // some defaults for AMD family 15h
1093    if ( cpu_family() == 0x15 ) {
1094      // On family 15h processors default is no sw prefetch
1095      if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1096        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1097      }
1098      // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1099      if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1100        FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1101      }
1102      // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1103      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1104        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1105      }
1106      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1107        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1108      }
1109    }
1110
1111#ifdef COMPILER2
1112    if (MaxVectorSize > 16) {
1113      // Limit vectors size to 16 bytes on current AMD cpus.
1114      FLAG_SET_DEFAULT(MaxVectorSize, 16);
1115    }
1116#endif // COMPILER2
1117  }
1118
1119  if( is_intel() ) { // Intel cpus specific settings
1120    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1121      UseStoreImmI16 = false; // don't use it on Intel cpus
1122    }
1123    if( cpu_family() == 6 || cpu_family() == 15 ) {
1124      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1125        // Use it on all Intel cpus starting from PentiumPro
1126        UseAddressNop = true;
1127      }
1128    }
1129    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1130      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1131    }
1132    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1133      if( supports_sse3() ) {
1134        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1135      } else {
1136        UseXmmRegToRegMoveAll = false;
1137      }
1138    }
1139    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
1140#ifdef COMPILER2
1141      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
1142        // For new Intel cpus do the next optimization:
1143        // don't align the beginning of a loop if there are enough instructions
1144        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1145        // in current fetch line (OptoLoopAlignment) or the padding
1146        // is big (> MaxLoopPad).
1147        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1148        // generated NOP instructions. 11 is the largest size of one
1149        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1150        MaxLoopPad = 11;
1151      }
1152#endif // COMPILER2
1153      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1154        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1155      }
1156      if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
1157        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1158          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1159        }
1160      }
1161      if (supports_sse4_2()) {
1162        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1163          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1164        }
1165      } else {
1166        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1167          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1168        }
1169        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1170      }
1171    }
1172    if (is_atom_family() || is_knights_family()) {
1173#ifdef COMPILER2
1174      if (FLAG_IS_DEFAULT(OptoScheduling)) {
1175        OptoScheduling = true;
1176      }
1177#endif
1178      if (supports_sse4_2()) { // Silvermont
1179        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1180          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1181        }
1182      }
1183      if (FLAG_IS_DEFAULT(UseIncDec)) {
1184        FLAG_SET_DEFAULT(UseIncDec, false);
1185      }
1186    }
1187    if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1188      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1189    }
1190  }
1191
1192#ifdef _LP64
1193  if (UseSSE42Intrinsics) {
1194    if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1195      UseVectorizedMismatchIntrinsic = true;
1196    }
1197  } else if (UseVectorizedMismatchIntrinsic) {
1198    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1199      warning("vectorizedMismatch intrinsics are not available on this CPU");
1200    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1201  }
1202#else
1203  if (UseVectorizedMismatchIntrinsic) {
1204    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1205      warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1206    }
1207    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1208  }
1209#endif // _LP64
1210
1211  // Use count leading zeros count instruction if available.
1212  if (supports_lzcnt()) {
1213    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1214      UseCountLeadingZerosInstruction = true;
1215    }
1216   } else if (UseCountLeadingZerosInstruction) {
1217    warning("lzcnt instruction is not available on this CPU");
1218    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1219  }
1220
1221  // Use count trailing zeros instruction if available
1222  if (supports_bmi1()) {
1223    // tzcnt does not require VEX prefix
1224    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1225      if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1226        // Don't use tzcnt if BMI1 is switched off on command line.
1227        UseCountTrailingZerosInstruction = false;
1228      } else {
1229        UseCountTrailingZerosInstruction = true;
1230      }
1231    }
1232  } else if (UseCountTrailingZerosInstruction) {
1233    warning("tzcnt instruction is not available on this CPU");
1234    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1235  }
1236
1237  // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1238  // VEX prefix is generated only when AVX > 0.
1239  if (supports_bmi1() && supports_avx()) {
1240    if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1241      UseBMI1Instructions = true;
1242    }
1243  } else if (UseBMI1Instructions) {
1244    warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1245    FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1246  }
1247
1248  if (supports_bmi2() && supports_avx()) {
1249    if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1250      UseBMI2Instructions = true;
1251    }
1252  } else if (UseBMI2Instructions) {
1253    warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1254    FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1255  }
1256
1257  // Use population count instruction if available.
1258  if (supports_popcnt()) {
1259    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1260      UsePopCountInstruction = true;
1261    }
1262  } else if (UsePopCountInstruction) {
1263    warning("POPCNT instruction is not available on this CPU");
1264    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1265  }
1266
1267  // Use fast-string operations if available.
1268  if (supports_erms()) {
1269    if (FLAG_IS_DEFAULT(UseFastStosb)) {
1270      UseFastStosb = true;
1271    }
1272  } else if (UseFastStosb) {
1273    warning("fast-string operations are not available on this CPU");
1274    FLAG_SET_DEFAULT(UseFastStosb, false);
1275  }
1276
1277#ifdef COMPILER2
1278  if (FLAG_IS_DEFAULT(AlignVector)) {
1279    // Modern processors allow misaligned memory operations for vectors.
1280    AlignVector = !UseUnalignedLoadStores;
1281  }
1282#endif // COMPILER2
1283
1284  if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1285    if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1286      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1287    } else if (!supports_sse() && supports_3dnow_prefetch()) {
1288      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1289    }
1290  }
1291
1292  // Allocation prefetch settings
1293  intx cache_line_size = prefetch_data_size();
1294  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1295      (cache_line_size > AllocatePrefetchStepSize)) {
1296    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1297  }
1298
1299  if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1300    assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1301    if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1302      warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1303    }
1304    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1305  }
1306
1307  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1308    bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1309    FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1310  }
1311
1312  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1313    if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1314        supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1315      FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1316    }
1317#ifdef COMPILER2
1318    if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1319      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1320    }
1321#endif
1322  }
1323
1324#ifdef _LP64
1325  // Prefetch settings
1326
1327  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1328  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1329  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1330  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1331
1332  // gc copy/scan is disabled if prefetchw isn't supported, because
1333  // Prefetch::write emits an inlined prefetchw on Linux.
1334  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1335  // The used prefetcht0 instruction works for both amd64 and em64t.
1336
1337  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1338    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1339  }
1340  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1341    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1342  }
1343  if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1344    FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
1345  }
1346#endif
1347
1348  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1349     (cache_line_size > ContendedPaddingWidth))
1350     ContendedPaddingWidth = cache_line_size;
1351
1352  // This machine allows unaligned memory accesses
1353  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1354    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1355  }
1356
1357#ifndef PRODUCT
1358  if (log_is_enabled(Info, os, cpu)) {
1359    LogStream ls(Log(os, cpu)::info());
1360    outputStream* log = &ls;
1361    log->print_cr("Logical CPUs per core: %u",
1362                  logical_processors_per_package());
1363    log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1364    log->print("UseSSE=%d", (int) UseSSE);
1365    if (UseAVX > 0) {
1366      log->print("  UseAVX=%d", (int) UseAVX);
1367    }
1368    if (UseAES) {
1369      log->print("  UseAES=1");
1370    }
1371#ifdef COMPILER2
1372    if (MaxVectorSize > 0) {
1373      log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1374    }
1375#endif
1376    log->cr();
1377    log->print("Allocation");
1378    if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1379      log->print_cr(": no prefetching");
1380    } else {
1381      log->print(" prefetching: ");
1382      if (UseSSE == 0 && supports_3dnow_prefetch()) {
1383        log->print("PREFETCHW");
1384      } else if (UseSSE >= 1) {
1385        if (AllocatePrefetchInstr == 0) {
1386          log->print("PREFETCHNTA");
1387        } else if (AllocatePrefetchInstr == 1) {
1388          log->print("PREFETCHT0");
1389        } else if (AllocatePrefetchInstr == 2) {
1390          log->print("PREFETCHT2");
1391        } else if (AllocatePrefetchInstr == 3) {
1392          log->print("PREFETCHW");
1393        }
1394      }
1395      if (AllocatePrefetchLines > 1) {
1396        log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1397      } else {
1398        log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1399      }
1400    }
1401
1402    if (PrefetchCopyIntervalInBytes > 0) {
1403      log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1404    }
1405    if (PrefetchScanIntervalInBytes > 0) {
1406      log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1407    }
1408    if (PrefetchFieldsAhead > 0) {
1409      log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1410    }
1411    if (ContendedPaddingWidth > 0) {
1412      log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1413    }
1414  }
1415#endif // !PRODUCT
1416}
1417
1418bool VM_Version::use_biased_locking() {
1419#if INCLUDE_RTM_OPT
1420  // RTM locking is most useful when there is high lock contention and
1421  // low data contention.  With high lock contention the lock is usually
1422  // inflated and biased locking is not suitable for that case.
1423  // RTM locking code requires that biased locking is off.
1424  // Note: we can't switch off UseBiasedLocking in get_processor_features()
1425  // because it is used by Thread::allocate() which is called before
1426  // VM_Version::initialize().
1427  if (UseRTMLocking && UseBiasedLocking) {
1428    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1429      FLAG_SET_DEFAULT(UseBiasedLocking, false);
1430    } else {
1431      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1432      UseBiasedLocking = false;
1433    }
1434  }
1435#endif
1436  return UseBiasedLocking;
1437}
1438
1439void VM_Version::initialize() {
1440  ResourceMark rm;
1441  // Making this stub must be FIRST use of assembler
1442
1443  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1444  if (stub_blob == NULL) {
1445    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1446  }
1447  CodeBuffer c(stub_blob);
1448  VM_Version_StubGenerator g(&c);
1449  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1450                                     g.generate_get_cpu_info());
1451
1452  get_processor_features();
1453}
1454