vm_version_x86.hpp revision 1472:c18cbe5936b8
1/*
2 * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25class VM_Version : public Abstract_VM_Version {
26public:
27  // cpuid result register layouts.  These are all unions of a uint32_t
28  // (in case anyone wants access to the register as a whole) and a bitfield.
29
30  union StdCpuid1Eax {
31    uint32_t value;
32    struct {
33      uint32_t stepping   : 4,
34               model      : 4,
35               family     : 4,
36               proc_type  : 2,
37                          : 2,
38               ext_model  : 4,
39               ext_family : 8,
40                          : 4;
41    } bits;
42  };
43
44  union StdCpuid1Ebx { // example, unused
45    uint32_t value;
46    struct {
47      uint32_t brand_id         : 8,
48               clflush_size     : 8,
49               threads_per_cpu  : 8,
50               apic_id          : 8;
51    } bits;
52  };
53
54  union StdCpuid1Ecx {
55    uint32_t value;
56    struct {
57      uint32_t sse3     : 1,
58                        : 2,
59               monitor  : 1,
60                        : 1,
61               vmx      : 1,
62                        : 1,
63               est      : 1,
64                        : 1,
65               ssse3    : 1,
66               cid      : 1,
67                        : 2,
68               cmpxchg16: 1,
69                        : 4,
70               dca      : 1,
71               sse4_1   : 1,
72               sse4_2   : 1,
73                        : 2,
74               popcnt   : 1,
75                        : 8;
76    } bits;
77  };
78
79  union StdCpuid1Edx {
80    uint32_t value;
81    struct {
82      uint32_t          : 4,
83               tsc      : 1,
84                        : 3,
85               cmpxchg8 : 1,
86                        : 6,
87               cmov     : 1,
88                        : 7,
89               mmx      : 1,
90               fxsr     : 1,
91               sse      : 1,
92               sse2     : 1,
93                        : 1,
94               ht       : 1,
95                        : 3;
96    } bits;
97  };
98
99  union DcpCpuid4Eax {
100    uint32_t value;
101    struct {
102      uint32_t cache_type    : 5,
103                             : 21,
104               cores_per_cpu : 6;
105    } bits;
106  };
107
108  union DcpCpuid4Ebx {
109    uint32_t value;
110    struct {
111      uint32_t L1_line_size  : 12,
112               partitions    : 10,
113               associativity : 10;
114    } bits;
115  };
116
117  union ExtCpuid1Ecx {
118    uint32_t value;
119    struct {
120      uint32_t LahfSahf     : 1,
121               CmpLegacy    : 1,
122                            : 4,
123               lzcnt        : 1,
124               sse4a        : 1,
125               misalignsse  : 1,
126               prefetchw    : 1,
127                            : 22;
128    } bits;
129  };
130
131  union ExtCpuid1Edx {
132    uint32_t value;
133    struct {
134      uint32_t           : 22,
135               mmx_amd   : 1,
136               mmx       : 1,
137               fxsr      : 1,
138                         : 4,
139               long_mode : 1,
140               tdnow2    : 1,
141               tdnow     : 1;
142    } bits;
143  };
144
145  union ExtCpuid5Ex {
146    uint32_t value;
147    struct {
148      uint32_t L1_line_size : 8,
149               L1_tag_lines : 8,
150               L1_assoc     : 8,
151               L1_size      : 8;
152    } bits;
153  };
154
155  union ExtCpuid8Ecx {
156    uint32_t value;
157    struct {
158      uint32_t cores_per_cpu : 8,
159                             : 24;
160    } bits;
161  };
162
163protected:
164   static int _cpu;
165   static int _model;
166   static int _stepping;
167   static int _cpuFeatures;     // features returned by the "cpuid" instruction
168                                // 0 if this instruction is not available
169   static const char* _features_str;
170
171   enum {
172     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
173     CPU_CMOV   = (1 << 1),
174     CPU_FXSR   = (1 << 2),
175     CPU_HT     = (1 << 3),
176     CPU_MMX    = (1 << 4),
177     CPU_3DNOW  = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
178     CPU_SSE    = (1 << 6),
179     CPU_SSE2   = (1 << 7),
180     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
181     CPU_SSSE3  = (1 << 9),
182     CPU_SSE4A  = (1 << 10),
183     CPU_SSE4_1 = (1 << 11),
184     CPU_SSE4_2 = (1 << 12),
185     CPU_POPCNT = (1 << 13),
186     CPU_LZCNT  = (1 << 14)
187   } cpuFeatureFlags;
188
189  // cpuid information block.  All info derived from executing cpuid with
190  // various function numbers is stored here.  Intel and AMD info is
191  // merged in this block: accessor methods disentangle it.
192  //
193  // The info block is laid out in subblocks of 4 dwords corresponding to
194  // eax, ebx, ecx and edx, whether or not they contain anything useful.
195  struct CpuidInfo {
196    // cpuid function 0
197    uint32_t std_max_function;
198    uint32_t std_vendor_name_0;
199    uint32_t std_vendor_name_1;
200    uint32_t std_vendor_name_2;
201
202    // cpuid function 1
203    StdCpuid1Eax std_cpuid1_eax;
204    StdCpuid1Ebx std_cpuid1_ebx;
205    StdCpuid1Ecx std_cpuid1_ecx;
206    StdCpuid1Edx std_cpuid1_edx;
207
208    // cpuid function 4 (deterministic cache parameters)
209    DcpCpuid4Eax dcp_cpuid4_eax;
210    DcpCpuid4Ebx dcp_cpuid4_ebx;
211    uint32_t     dcp_cpuid4_ecx; // unused currently
212    uint32_t     dcp_cpuid4_edx; // unused currently
213
214    // cpuid function 0x80000000 // example, unused
215    uint32_t ext_max_function;
216    uint32_t ext_vendor_name_0;
217    uint32_t ext_vendor_name_1;
218    uint32_t ext_vendor_name_2;
219
220    // cpuid function 0x80000001
221    uint32_t     ext_cpuid1_eax; // reserved
222    uint32_t     ext_cpuid1_ebx; // reserved
223    ExtCpuid1Ecx ext_cpuid1_ecx;
224    ExtCpuid1Edx ext_cpuid1_edx;
225
226    // cpuid functions 0x80000002 thru 0x80000004: example, unused
227    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
228    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
229    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
230
231    // cpuid function 0x80000005 //AMD L1, Intel reserved
232    uint32_t     ext_cpuid5_eax; // unused currently
233    uint32_t     ext_cpuid5_ebx; // reserved
234    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
235    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
236
237    // cpuid function 0x80000008
238    uint32_t     ext_cpuid8_eax; // unused currently
239    uint32_t     ext_cpuid8_ebx; // reserved
240    ExtCpuid8Ecx ext_cpuid8_ecx;
241    uint32_t     ext_cpuid8_edx; // reserved
242  };
243
244  // The actual cpuid info block
245  static CpuidInfo _cpuid_info;
246
247  // Extractors and predicates
248  static uint32_t extended_cpu_family() {
249    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
250    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
251    return result;
252  }
253  static uint32_t extended_cpu_model() {
254    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
255    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
256    return result;
257  }
258  static uint32_t cpu_stepping() {
259    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
260    return result;
261  }
262  static uint logical_processor_count() {
263    uint result = threads_per_core();
264    return result;
265  }
266  static uint32_t feature_flags() {
267    uint32_t result = 0;
268    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
269      result |= CPU_CX8;
270    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
271      result |= CPU_CMOV;
272    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
273        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
274      result |= CPU_FXSR;
275    // HT flag is set for multi-core processors also.
276    if (threads_per_core() > 1)
277      result |= CPU_HT;
278    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
279        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
280      result |= CPU_MMX;
281    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
282      result |= CPU_SSE;
283    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
284      result |= CPU_SSE2;
285    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
286      result |= CPU_SSE3;
287    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
288      result |= CPU_SSSE3;
289    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
290      result |= CPU_SSE4_1;
291    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
292      result |= CPU_SSE4_2;
293    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
294      result |= CPU_POPCNT;
295
296    // AMD features.
297    if (is_amd()) {
298      if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
299        result |= CPU_3DNOW;
300      if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
301        result |= CPU_LZCNT;
302      if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
303        result |= CPU_SSE4A;
304    }
305
306    return result;
307  }
308
309  static void get_processor_features();
310
311public:
312  // Offsets for cpuid asm stub
313  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
314  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
315  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
316  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
317  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
318  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
319
320  // Initialization
321  static void initialize();
322
323  // Asserts
324  static void assert_is_initialized() {
325    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
326  }
327
328  //
329  // Processor family:
330  //       3   -  386
331  //       4   -  486
332  //       5   -  Pentium
333  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
334  //              Pentium M, Core Solo, Core Duo, Core2 Duo
335  //    family 6 model:   9,        13,       14,        15
336  //    0x0f   -  Pentium 4, Opteron
337  //
338  // Note: The cpu family should be used to select between
339  //       instruction sequences which are valid on all Intel
340  //       processors.  Use the feature test functions below to
341  //       determine whether a particular instruction is supported.
342  //
343  static int  cpu_family()        { return _cpu;}
344  static bool is_P6()             { return cpu_family() >= 6; }
345
346  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
347  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
348
349  static uint cores_per_cpu()  {
350    uint result = 1;
351    if (is_intel()) {
352      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
353    } else if (is_amd()) {
354      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
355    }
356    return result;
357  }
358
359  static uint threads_per_core()  {
360    uint result = 1;
361    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
362      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
363               cores_per_cpu();
364    }
365    return result;
366  }
367
368  static intx L1_data_cache_line_size()  {
369    intx result = 0;
370    if (is_intel()) {
371      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
372    } else if (is_amd()) {
373      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
374    }
375    if (result < 32) // not defined ?
376      result = 32;   // 32 bytes by default on x86 and other x64
377    return result;
378  }
379
380  //
381  // Feature identification
382  //
383  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
384  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
385  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
386  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
387  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
388  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
389  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
390  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
391  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
392  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
393  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
394  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
395  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
396  //
397  // AMD features
398  //
399  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
400  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
401  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
402  static bool supports_lzcnt()    { return (_cpuFeatures & CPU_LZCNT) != 0; }
403  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
404
405  static bool supports_compare_and_exchange() { return true; }
406
407  static const char* cpu_features()           { return _features_str; }
408
409  static intx allocate_prefetch_distance() {
410    // This method should be called before allocate_prefetch_style().
411    //
412    // Hardware prefetching (distance/size in bytes):
413    // Pentium 3 -  64 /  32
414    // Pentium 4 - 256 / 128
415    // Athlon    -  64 /  32 ????
416    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
417    // Core      - 128 /  64
418    //
419    // Software prefetching (distance in bytes / instruction with best score):
420    // Pentium 3 - 128 / prefetchnta
421    // Pentium 4 - 512 / prefetchnta
422    // Athlon    - 128 / prefetchnta
423    // Opteron   - 256 / prefetchnta
424    // Core      - 256 / prefetchnta
425    // It will be used only when AllocatePrefetchStyle > 0
426
427    intx count = AllocatePrefetchDistance;
428    if (count < 0) {   // default ?
429      if (is_amd()) {  // AMD
430        if (supports_sse2())
431          count = 256; // Opteron
432        else
433          count = 128; // Athlon
434      } else {         // Intel
435        if (supports_sse2())
436          if (cpu_family() == 6) {
437            count = 256; // Pentium M, Core, Core2
438          } else {
439            count = 512; // Pentium 4
440          }
441        else
442          count = 128; // Pentium 3 (and all other old CPUs)
443      }
444    }
445    return count;
446  }
447  static intx allocate_prefetch_style() {
448    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
449    // Return 0 if AllocatePrefetchDistance was not defined.
450    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
451  }
452
453  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
454  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
455  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
456  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
457
458  // gc copy/scan is disabled if prefetchw isn't supported, because
459  // Prefetch::write emits an inlined prefetchw on Linux.
460  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
461  // The used prefetcht0 instruction works for both amd64 and em64t.
462  static intx prefetch_copy_interval_in_bytes() {
463    intx interval = PrefetchCopyIntervalInBytes;
464    return interval >= 0 ? interval : 576;
465  }
466  static intx prefetch_scan_interval_in_bytes() {
467    intx interval = PrefetchScanIntervalInBytes;
468    return interval >= 0 ? interval : 576;
469  }
470  static intx prefetch_fields_ahead() {
471    intx count = PrefetchFieldsAhead;
472    return count >= 0 ? count : 1;
473  }
474};
475