1/**
2 * Identify the characteristics of the host CPU, providing information
3 * about cache sizes and assembly optimisation hints. This module is
4 * provided primarily for assembly language programmers.
5 *
6 * References:
7 * Some of this information was extremely difficult to track down. Some of the
8 * documents below were found only in cached versions stored by search engines!
9 * This code relies on information found in:
10 *
11 * $(UL
12 * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13 *    Volume 2A: Instruction Set Reference, A-M" (2007).
14 * )
15 * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
16 * )
17 * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18 *    Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
19 * )
20 * $(LI "AMD Geode(TM) GX Processors Data Book",
21 *    Advanced Micro Devices, Publication ID 31505E, (2005).
22 * )
23 * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
24 * )
25 * $(LI "Application note 106: Software Customization for the 6x86 Family",
26 *    Cyrix Corporation, Rev 1.5 (1998)
27 * )
28 * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29 * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30 *   National Semiconductor, (2002)
31 * )
32 * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
33 * )
34 * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35 * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36 * $(LI "What every programmer should know about memory",
37 *    Ulrich Depper, Red Hat, Inc., (2007).
38 * )
39 * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40 *   $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
41 * )
42 * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
43 *    Note 485" (2009).
44 * )
45 * )
46 *
47 * Bugs: Currently only works on x86 and Itanium CPUs.
48 *      Many processors have bugs in their microcode for the CPUID instruction,
49 *      so sometimes the cache information may be incorrect.
50 *
51 * Copyright: Copyright Don Clugston 2007 - 2009.
52 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53 * Authors:   Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54 * Source:    $(DRUNTIMESRC core/_cpuid.d)
55 */
56
57module core.cpuid;
58
59version (GNU) version = GNU_OR_LDC;
60version (LDC) version = GNU_OR_LDC;
61
62@trusted:
63nothrow:
64@nogc:
65
66// If optimizing for a particular processor, it is generally better
67// to identify based on features rather than model. NOTE: Normally
68// it's only worthwhile to optimise for the latest Intel and AMD CPU,
69// with a backup for other CPUs.
70// Pentium    -- preferPentium1()
71// PMMX       --   + mmx()
72// PPro       -- default
73// PII        --   + mmx()
74// PIII       --   + mmx() + sse()
75// PentiumM   --   + mmx() + sse() + sse2()
76// Pentium4   -- preferPentium4()
77// PentiumD   --   + isX86_64()
78// Core2      -- default + isX86_64()
79// AMD K5     -- preferPentium1()
80// AMD K6     --   + mmx()
81// AMD K6-II  --   + mmx() + 3dnow()
82// AMD K7     -- preferAthlon()
83// AMD K8     --   + sse2()
84// AMD K10    --   + isX86_64()
85// Cyrix 6x86 -- preferPentium1()
86//    6x86MX  --   + mmx()
87
88// GDC support uses extended inline assembly:
89//   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html        (general information and hints)
90//   https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html  (binding variables to registers)
91//   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
92
93public:
94
95/// Cache size and behaviour
96struct CacheInfo
97{
98    /// Size of the cache, in kilobytes, per CPU.
99    /// For L1 unified (data + code) caches, this size is half the physical size.
100    /// (we don't halve it for larger sizes, since normally
101    /// data size is much greater than code size for critical loops).
102    size_t size;
103    /// Number of ways of associativity, eg:
104    /// $(UL
105    /// $(LI 1 = direct mapped)
106    /// $(LI 2 = 2-way set associative)
107    /// $(LI 3 = 3-way set associative)
108    /// $(LI ubyte.max = fully associative)
109    /// )
110    ubyte associativity;
111    /// Number of bytes read into the cache when a cache miss occurs.
112    uint lineSize;
113}
114
115public:
116    /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
117    // Note: When we deprecate it, we simply make it private.
118    __gshared CacheInfo[5] datacache;
119
120@property pure
121{
122    /// The data caches. If there are fewer than 5 physical caches levels,
123    /// the remaining levels are set to size_t.max (== entire memory space)
124    const(CacheInfo)[5] dataCaches() { return _dataCaches; }
125
126    /// Returns vendor string, for display purposes only.
127    /// Do NOT use this to determine features!
128    /// Note that some CPUs have programmable vendorIDs.
129    string vendor()     {return _vendor;}
130    /// Returns processor string, for display purposes only
131    string processor()  {return _processor;}
132
133    /// Does it have an x87 FPU on-chip?
134    bool x87onChip()    {return _x87onChip;}
135    /// Is MMX supported?
136    bool mmx()          {return _mmx;}
137    /// Is SSE supported?
138    bool sse()          {return _sse;}
139    /// Is SSE2 supported?
140    bool sse2()         {return _sse2;}
141    /// Is SSE3 supported?
142    bool sse3()         {return _sse3;}
143    /// Is SSSE3 supported?
144    bool ssse3()         {return _ssse3;}
145    /// Is SSE4.1 supported?
146    bool sse41()        {return _sse41;}
147    /// Is SSE4.2 supported?
148    bool sse42()        {return _sse42;}
149    /// Is SSE4a supported?
150    bool sse4a()        {return _sse4a;}
151    /// Is AES supported
152    bool aes()          {return _aes;}
153    /// Is pclmulqdq supported
154    bool hasPclmulqdq() {return _hasPclmulqdq;}
155    /// Is rdrand supported
156    bool hasRdrand()    {return _hasRdrand;}
157    /// Is AVX supported
158    bool avx()          {return _avx;}
159    /// Is VEX-Encoded AES supported
160    bool vaes()         {return _vaes;}
161    /// Is vpclmulqdq supported
162    bool hasVpclmulqdq(){return _hasVpclmulqdq; }
163    /// Is FMA supported
164    bool fma()          {return _fma;}
165    /// Is FP16C supported
166    bool fp16c()        {return _fp16c;}
167    /// Is AVX2 supported
168    bool avx2()         {return _avx2;}
169    /// Is HLE (hardware lock elision) supported
170    bool hle()          {return _hle;}
171    /// Is RTM (restricted transactional memory) supported
172    bool rtm()          {return _rtm;}
173    /// Is rdseed supported
174    bool hasRdseed()    {return _hasRdseed;}
175    /// Is SHA supported
176    bool hasSha()       {return _hasSha;}
177    /// Is AMD 3DNOW supported?
178    bool amd3dnow()     {return _amd3dnow;}
179    /// Is AMD 3DNOW Ext supported?
180    bool amd3dnowExt()  {return _amd3dnowExt;}
181    /// Are AMD extensions to MMX supported?
182    bool amdMmx()       {return _amdMmx;}
183    /// Is fxsave/fxrstor supported?
184    bool hasFxsr()          {return _hasFxsr;}
185    /// Is cmov supported?
186    bool hasCmov()          {return _hasCmov;}
187    /// Is rdtsc supported?
188    bool hasRdtsc()         {return _hasRdtsc;}
189    /// Is cmpxchg8b supported?
190    bool hasCmpxchg8b()     {return _hasCmpxchg8b;}
191    /// Is cmpxchg8b supported?
192    bool hasCmpxchg16b()    {return _hasCmpxchg16b;}
193    /// Is SYSENTER/SYSEXIT supported?
194    bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
195    /// Is 3DNow prefetch supported?
196    bool has3dnowPrefetch()   {return _has3dnowPrefetch;}
197    /// Are LAHF and SAHF supported in 64-bit mode?
198    bool hasLahfSahf()        {return _hasLahfSahf;}
199    /// Is POPCNT supported?
200    bool hasPopcnt()        {return _hasPopcnt;}
201    /// Is LZCNT supported?
202    bool hasLzcnt()         {return _hasLzcnt;}
203    /// Is this an Intel64 or AMD 64?
204    bool isX86_64()         {return _isX86_64;}
205
206    /// Is this an IA64 (Itanium) processor?
207    bool isItanium()        { return _isItanium; }
208
209    /// Is hyperthreading supported?
210    bool hyperThreading()   { return _hyperThreading; }
211    /// Returns number of threads per CPU
212    uint threadsPerCPU()    {return _threadsPerCPU;}
213    /// Returns number of cores in CPU
214    uint coresPerCPU()      {return _coresPerCPU;}
215
216    /// Optimisation hints for assembly code.
217    ///
218    /// For forward compatibility, the CPU is compared against different
219    /// microarchitectures. For 32-bit x86, comparisons are made against
220    /// the Intel PPro/PII/PIII/PM family.
221    ///
222    /// The major 32-bit x86 microarchitecture 'dynasties' have been:
223    ///
224    /// $(UL
225    /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
226    /// $(LI AMD Athlon (K7, K8, K10). )
227    /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
228    /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
229    /// )
230    ///
231    /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
232    /// Cyrix, Rise) were mostly in-order.
233    ///
234    /// Some new processors do not fit into the existing categories:
235    ///
236    /// $(UL
237    /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
238    /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
239    /// )
240    ///
241    /// Within each dynasty, the optimisation techniques are largely
242    /// identical (eg, use instruction pairing for group 4). Major
243    /// instruction set improvements occur within each dynasty.
244
245    /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
246    bool preferAthlon() { return _preferAthlon; }
247    /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
248    bool preferPentium4() { return _preferPentium4; }
249    /// Does this CPU perform better on Pentium I code than Pentium Pro code?
250    bool preferPentium1() { return _preferPentium1; }
251}
252
253private immutable
254{
255    /* These exist as immutables so that the query property functions can
256     * be backwards compatible with code that called them with ().
257     * Also, immutables can only be set by the static this().
258     */
259    const(CacheInfo)[5] _dataCaches;
260    string _vendor;
261    string _processor;
262    bool _x87onChip;
263    bool _mmx;
264    bool _sse;
265    bool _sse2;
266    bool _sse3;
267    bool _ssse3;
268    bool _sse41;
269    bool _sse42;
270    bool _sse4a;
271    bool _aes;
272    bool _hasPclmulqdq;
273    bool _hasRdrand;
274    bool _avx;
275    bool _vaes;
276    bool _hasVpclmulqdq;
277    bool _fma;
278    bool _fp16c;
279    bool _avx2;
280    bool _hle;
281    bool _rtm;
282    bool _hasRdseed;
283    bool _hasSha;
284    bool _amd3dnow;
285    bool _amd3dnowExt;
286    bool _amdMmx;
287    bool _hasFxsr;
288    bool _hasCmov;
289    bool _hasRdtsc;
290    bool _hasCmpxchg8b;
291    bool _hasCmpxchg16b;
292    bool _hasSysEnterSysExit;
293    bool _has3dnowPrefetch;
294    bool _hasLahfSahf;
295    bool _hasPopcnt;
296    bool _hasLzcnt;
297    bool _isX86_64;
298    bool _isItanium;
299    bool _hyperThreading;
300    uint _threadsPerCPU;
301    uint _coresPerCPU;
302    bool _preferAthlon;
303    bool _preferPentium4;
304    bool _preferPentium1;
305}
306
307__gshared:
308    // All these values are set only once, and never subsequently modified.
309public:
310    /// $(RED Warning: This field will be turned into a property in a future release.)
311    ///
312    /// Processor type (vendor-dependent).
313    /// This should be visible ONLY for display purposes.
314    uint stepping, model, family;
315    /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
316    uint numCacheLevels = 1;
317    /// The number of cache levels in the CPU.
318    @property uint cacheLevels() { return numCacheLevels; }
319private:
320
321struct CpuFeatures
322{
323    bool probablyIntel; // true = _probably_ an Intel processor, might be faking
324    bool probablyAMD; // true = _probably_ an AMD or Hygon processor
325    string processorName;
326    char [12] vendorID = 0;
327    char [48] processorNameBuffer = 0;
328    uint features = 0;     // mmx, sse, sse2, hyperthreading, etc
329    uint miscfeatures = 0; // sse3, etc.
330    uint extfeatures = 0;  // HLE, AVX2, RTM, etc.
331    uint amdfeatures = 0;  // 3DNow!, mmxext, etc
332    uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
333    ulong xfeatures = 0;   // XFEATURES_ENABLED_MASK
334    uint maxCores = 1;
335    uint maxThreads = 1;
336}
337
338CpuFeatures cpuFeatures;
339
340/* Hide from the optimizer where cf (a register) is coming from, so that
341 * cf doesn't get "optimized away". The idea is to  reference
342 * the global data through cf so not so many fixups are inserted
343 * into the executable image.
344 */
345CpuFeatures* getCpuFeatures() @nogc nothrow
346{
347    pragma(inline, false);
348    return &cpuFeatures;
349}
350
351    // Note that this may indicate multi-core rather than hyperthreading.
352    @property bool hyperThreadingBit()    { return (cpuFeatures.features&HTT_BIT)!=0;}
353
354    // feature flags CPUID1_EDX
355    enum : uint
356    {
357        FPU_BIT = 1,
358        TIMESTAMP_BIT = 1<<4, // rdtsc
359        MDSR_BIT = 1<<5,      // RDMSR/WRMSR
360        CMPXCHG8B_BIT = 1<<8,
361        SYSENTERSYSEXIT_BIT = 1<<11,
362        CMOV_BIT = 1<<15,
363        MMX_BIT = 1<<23,
364        FXSR_BIT = 1<<24,
365        SSE_BIT = 1<<25,
366        SSE2_BIT = 1<<26,
367        HTT_BIT = 1<<28,
368        IA64_BIT = 1<<30
369    }
370    // feature flags misc CPUID1_ECX
371    enum : uint
372    {
373        SSE3_BIT = 1,
374        PCLMULQDQ_BIT = 1<<1, // from AVX
375        MWAIT_BIT = 1<<3,
376        SSSE3_BIT = 1<<9,
377        FMA_BIT = 1<<12,     // from AVX
378        CMPXCHG16B_BIT = 1<<13,
379        SSE41_BIT = 1<<19,
380        SSE42_BIT = 1<<20,
381        POPCNT_BIT = 1<<23,
382        AES_BIT = 1<<25, // AES instructions from AVX
383        OSXSAVE_BIT = 1<<27, // Used for AVX
384        AVX_BIT = 1<<28,
385        FP16C_BIT = 1<<29,
386        RDRAND_BIT = 1<<30,
387    }
388    // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
389    enum : uint
390    {
391        FSGSBASE_BIT = 1 << 0,
392        BMI1_BIT = 1 << 3,
393        HLE_BIT = 1 << 4,
394        AVX2_BIT = 1 << 5,
395        SMEP_BIT = 1 << 7,
396        BMI2_BIT = 1 << 8,
397        ERMS_BIT = 1 << 9,
398        INVPCID_BIT = 1 << 10,
399        RTM_BIT = 1 << 11,
400        RDSEED_BIT = 1 << 18,
401        SHA_BIT = 1 << 29,
402    }
403    // feature flags XFEATURES_ENABLED_MASK
404    enum : ulong
405    {
406        XF_FP_BIT  = 0x1,
407        XF_SSE_BIT = 0x2,
408        XF_YMM_BIT = 0x4,
409    }
410    // AMD feature flags CPUID80000001_EDX
411    enum : uint
412    {
413        AMD_MMX_BIT = 1<<22,
414//      FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
415        FFXSR_BIT = 1<<25,
416        PAGE1GB_BIT = 1<<26, // support for 1GB pages
417        RDTSCP_BIT = 1<<27,
418        AMD64_BIT = 1<<29,
419        AMD_3DNOW_EXT_BIT = 1<<30,
420        AMD_3DNOW_BIT = 1<<31
421    }
422    // AMD misc feature flags CPUID80000001_ECX
423    enum : uint
424    {
425        LAHFSAHF_BIT = 1,
426        LZCNT_BIT = 1<<5,
427        SSE4A_BIT = 1<<6,
428        AMD_3DNOW_PREFETCH_BIT = 1<<8,
429    }
430
431
432version (GNU_OR_LDC) {
433    version (X86)
434        enum supportedX86 = true;
435    else version (X86_64)
436        enum supportedX86 = true;
437    else
438        enum supportedX86 = false;
439} else version (D_InlineAsm_X86) {
440    enum supportedX86 = true;
441} else version (D_InlineAsm_X86_64) {
442    enum supportedX86 = true;
443} else {
444    enum supportedX86 = false;
445}
446
447static if (supportedX86) {
448// Note that this code will also work for Itanium in x86 mode.
449
450__gshared uint max_cpuid, max_extended_cpuid;
451
452// CPUID2: "cache and tlb information"
453void getcacheinfoCPUID2()
454{
455    // We are only interested in the data caches
456    void decipherCpuid2(ubyte x) @nogc nothrow {
457        if (x==0) return;
458        // Values from http://www.sandpile.org/ia32/cpuid.htm.
459        // Includes Itanium and non-Intel CPUs.
460        //
461        static immutable ubyte [63] ids = [
462            0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
463            // level 2 cache
464            0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
465            0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
466            0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
467            // level 3 cache
468            0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
469
470            0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
471            0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
472        ];
473        static immutable uint [63] sizes = [
474            8, 16, 16, 64, 16, 24, 8, 16, 32,
475            128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
476            256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
477            128, 192, 128, 256, 384, 512, 3072, 512, 128,
478            512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
479
480            512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
481            2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
482        ];
483    // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
484        static immutable ubyte [63] ways = [
485            2, 4, 4, 8, 8, 6, 4, 4, 4,
486            4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
487            8, 8, 8, 8, 4, 8, 16, 24,
488            4, 6, 2, 4, 6, 4, 12, 8, 8,
489            4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
490            4, 4, 4, 8, 8, 8, 12, 12, 12,
491            16, 16, 16, 24, 24, 24
492        ];
493        enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
494        for (size_t i=0; i< ids.length; ++i) {
495            if (x==ids[i]) {
496                int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
497                if (x==0x49 && family==0xF && model==0x6) level=2;
498                datacache[level].size=sizes[i];
499                datacache[level].associativity=ways[i];
500                if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
501                                   || x==0x86 || x==0x87
502                                   || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
503                    datacache[level].lineSize = 64;
504                } else datacache[level].lineSize = 32;
505            }
506        }
507    }
508
509    uint[4] a;
510    bool firstTime = true;
511    // On a multi-core system, this could theoretically fail, but it's only used
512    // for old single-core CPUs.
513    uint numinfos = 1;
514    do {
515        version (GNU_OR_LDC) asm pure nothrow @nogc {
516            "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
517        } else asm pure nothrow @nogc {
518            mov EAX, 2;
519            cpuid;
520            mov a+0, EAX;
521            mov a+4, EBX;
522            mov a+8, ECX;
523            mov a+12, EDX;
524        }
525        if (firstTime) {
526            if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
527        // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
528        // These are NOT standard Intel values
529        // (TLB = 32 entry, 4 way associative, 4K pages)
530        // (L1 cache = 16K, 4way, linesize16)
531                datacache[0].size=8;
532                datacache[0].associativity=4;
533                datacache[0].lineSize=16;
534                return;
535            }
536            // lsb of a is how many times to loop.
537            numinfos = a[0] & 0xFF;
538            // and otherwise it should be ignored
539            a[0] &= 0xFFFF_FF00;
540            firstTime = false;
541        }
542        for (int c=0; c<4;++c) {
543            // high bit set == no info.
544            if (a[c] & 0x8000_0000) continue;
545            decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
546            decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
547            decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
548            decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
549        }
550    } while (--numinfos);
551}
552
553// CPUID4: "Deterministic cache parameters" leaf
554void getcacheinfoCPUID4()
555{
556    int cachenum = 0;
557    for (;;) {
558        uint a, b, number_of_sets;
559        version (GNU_OR_LDC) asm pure nothrow @nogc {
560            "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
561        } else asm pure nothrow @nogc {
562            mov EAX, 4;
563            mov ECX, cachenum;
564            cpuid;
565            mov a, EAX;
566            mov b, EBX;
567            mov number_of_sets, ECX;
568        }
569        ++cachenum;
570        if ((a&0x1F)==0) break; // no more caches
571        immutable uint numthreads = ((a>>14) & 0xFFF)  + 1;
572        immutable uint numcores = ((a>>26) & 0x3F) + 1;
573        if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
574        if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
575
576        ++number_of_sets;
577        immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
578        if (level > datacache.length) continue; // ignore deep caches
579        datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
580        datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
581        immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
582        // Size = number of sets * associativity * cachelinesize * linepartitions
583        // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
584        immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
585            datacache[level].associativity : number_of_sets;
586        datacache[level].size = cast(size_t)(
587                (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
588        if (level == 0 && (a&0xF)==3) {
589            // Halve the size for unified L1 caches
590            datacache[level].size/=2;
591        }
592    }
593}
594
595// CPUID8000_0005 & 6
596void getAMDcacheinfo()
597{
598    uint dummy, c5, c6, d6;
599    version (GNU_OR_LDC) asm pure nothrow @nogc {
600        "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
601    } else asm pure nothrow @nogc {
602        mov EAX, 0x8000_0005; // L1 cache
603        cpuid;
604        // EAX has L1_TLB_4M.
605        // EBX has L1_TLB_4K
606        // EDX has L1 instruction cache
607        mov c5, ECX;
608    }
609
610    datacache[0].size = ( (c5>>24) & 0xFF);
611    datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
612    datacache[0].lineSize = c5 & 0xFF;
613
614    if (max_extended_cpuid >= 0x8000_0006) {
615        // AMD K6-III or K6-2+ or later.
616        ubyte numcores = 1;
617        if (max_extended_cpuid >= 0x8000_0008) {
618            version (GNU_OR_LDC) asm pure nothrow @nogc {
619                "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
620            } else asm pure nothrow @nogc {
621                mov EAX, 0x8000_0008;
622                cpuid;
623                mov numcores, CL;
624            }
625            ++numcores;
626            if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
627        }
628
629        version (GNU_OR_LDC) asm pure nothrow @nogc {
630            "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
631        } else asm pure nothrow @nogc {
632            mov EAX, 0x8000_0006; // L2/L3 cache
633            cpuid;
634            mov c6, ECX; // L2 cache info
635            mov d6, EDX; // L3 cache info
636        }
637
638        static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
639        datacache[1].size = (c6>>16) & 0xFFFF;
640        datacache[1].associativity = assocmap[(c6>>12)&0xF];
641        datacache[1].lineSize = c6 & 0xFF;
642
643        // The L3 cache value is TOTAL, not per core.
644        datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
645        datacache[2].associativity = assocmap[(d6>>12)&0xF];
646        datacache[2].lineSize = d6 & 0xFF;
647    }
648}
649
650// For Intel CoreI7 and later, use function 0x0B
651// to determine number of processors.
652void getCpuInfo0B()
653{
654    int threadsPerCore;
655    uint a, b, c, d;
656    // I'm not sure about this. The docs state that there
657    // are 2 hyperthreads per core if HT is factory enabled.
658    for (int level = 0; level < 2; level++)
659    {
660        version (GNU_OR_LDC) asm pure nothrow @nogc {
661            "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
662        } else asm pure nothrow @nogc {
663            mov EAX, 0x0B;
664            mov ECX, level;
665            cpuid;
666            mov a, EAX;
667            mov b, EBX;
668            mov c, ECX;
669            mov d, EDX;
670        }
671        if (b != 0)
672        {
673            if (level == 0)
674                threadsPerCore = b & 0xFFFF;
675            else if (level == 1)
676            {
677                cpuFeatures.maxThreads = b & 0xFFFF;
678                cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
679            }
680        }
681        // Got "invalid domain" returned from cpuid
682        if (a == 0 && b == 0)
683            break;
684    }
685}
686
687void cpuidX86()
688{
689    auto cf = getCpuFeatures();
690
691    uint a, b, c, d;
692    uint* venptr = cast(uint*)cf.vendorID.ptr;
693    version (GNU_OR_LDC)
694    {
695        asm pure nothrow @nogc {
696            "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
697            "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
698        }
699    }
700    else
701    {
702        uint a2;
703        version (D_InlineAsm_X86)
704        {
705            asm pure nothrow @nogc {
706                mov EAX, 0;
707                cpuid;
708                mov a, EAX;
709                mov EAX, venptr;
710                mov [EAX], EBX;
711                mov [EAX + 4], EDX;
712                mov [EAX + 8], ECX;
713            }
714        }
715        else version (D_InlineAsm_X86_64)
716        {
717            asm pure nothrow @nogc {
718                mov EAX, 0;
719                cpuid;
720                mov a, EAX;
721                mov RAX, venptr;
722                mov [RAX], EBX;
723                mov [RAX + 4], EDX;
724                mov [RAX + 8], ECX;
725            }
726        }
727        asm pure nothrow @nogc {
728            mov EAX, 0x8000_0000;
729            cpuid;
730            mov a2, EAX;
731        }
732        max_cpuid = a;
733        max_extended_cpuid = a2;
734    }
735
736
737    cf.probablyIntel = cf.vendorID == "GenuineIntel";
738    cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
739    uint apic = 0; // brand index, apic id
740    version (GNU_OR_LDC) asm pure nothrow @nogc {
741        "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
742    } else {
743        asm pure nothrow @nogc {
744            mov EAX, 1; // model, stepping
745            cpuid;
746            mov a, EAX;
747            mov apic, EBX;
748            mov c, ECX;
749            mov d, EDX;
750        }
751        cf.features = d;
752        cf.miscfeatures = c;
753    }
754    stepping = a & 0xF;
755    immutable uint fbase = (a >> 8) & 0xF;
756    immutable uint mbase = (a >> 4) & 0xF;
757    family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
758    model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
759         mbase + ((a >> 12) & 0xF0) : mbase;
760
761    if (max_cpuid >= 7)
762    {
763        version (GNU_OR_LDC) asm pure nothrow @nogc {
764            "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
765        } else {
766            uint ext;
767            asm pure nothrow @nogc {
768                mov EAX, 7; // Structured extended feature leaf.
769                mov ECX, 0; // Main leaf.
770                cpuid;
771                mov ext, EBX; // HLE, AVX2, RTM, etc.
772            }
773            cf.extfeatures = ext;
774        }
775    }
776
777    if (cf.miscfeatures & OSXSAVE_BIT)
778    {
779        version (GNU_OR_LDC) asm pure nothrow @nogc {
780            /* Old assemblers do not recognize xgetbv, and there is no easy way
781             * to conditionally compile based on the assembler used, so use the
782             * raw .byte sequence instead.  */
783            ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0);
784        } else asm pure nothrow @nogc {
785            mov ECX, 0;
786            xgetbv;
787            mov d, EDX;
788            mov a, EAX;
789        }
790        cf.xfeatures = cast(ulong)d << 32 | a;
791    }
792
793    cf.amdfeatures = 0;
794    cf.amdmiscfeatures = 0;
795    if (max_extended_cpuid >= 0x8000_0001) {
796        version (GNU_OR_LDC) asm pure nothrow @nogc {
797            "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
798        } else {
799            asm pure nothrow @nogc {
800                mov EAX, 0x8000_0001;
801                cpuid;
802                mov c, ECX;
803                mov d, EDX;
804            }
805            cf.amdmiscfeatures = c;
806            cf.amdfeatures = d;
807        }
808    }
809    // Try to detect fraudulent vendorIDs
810    if (amd3dnow) cf.probablyIntel = false;
811
812    if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
813        //http://support.amd.com/TechDocs/25481.pdf pg.36
814        cf.maxCores = 1;
815        if (hyperThreadingBit) {
816            // determine max number of cores for AMD
817            version (GNU_OR_LDC) asm pure nothrow @nogc {
818                "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
819            } else asm pure nothrow @nogc {
820                mov EAX, 0x8000_0008;
821                cpuid;
822                mov c, ECX;
823            }
824            cf.maxCores += c & 0xFF;
825        }
826    }
827
828    if (max_extended_cpuid >= 0x8000_0004) {
829        uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
830        version (GNU_OR_LDC)
831        {
832            asm pure nothrow @nogc {
833                "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
834                "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
835                "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
836            }
837        }
838        else version (D_InlineAsm_X86)
839        {
840            asm pure nothrow @nogc {
841                push ESI;
842                mov ESI, pnb;
843                mov EAX, 0x8000_0002;
844                cpuid;
845                mov [ESI], EAX;
846                mov [ESI+4], EBX;
847                mov [ESI+8], ECX;
848                mov [ESI+12], EDX;
849                mov EAX, 0x8000_0003;
850                cpuid;
851                mov [ESI+16], EAX;
852                mov [ESI+20], EBX;
853                mov [ESI+24], ECX;
854                mov [ESI+28], EDX;
855                mov EAX, 0x8000_0004;
856                cpuid;
857                mov [ESI+32], EAX;
858                mov [ESI+36], EBX;
859                mov [ESI+40], ECX;
860                mov [ESI+44], EDX;
861                pop ESI;
862            }
863        }
864        else version (D_InlineAsm_X86_64)
865        {
866            asm pure nothrow @nogc {
867                push RSI;
868                mov RSI, pnb;
869                mov EAX, 0x8000_0002;
870                cpuid;
871                mov [RSI], EAX;
872                mov [RSI+4], EBX;
873                mov [RSI+8], ECX;
874                mov [RSI+12], EDX;
875                mov EAX, 0x8000_0003;
876                cpuid;
877                mov [RSI+16], EAX;
878                mov [RSI+20], EBX;
879                mov [RSI+24], ECX;
880                mov [RSI+28], EDX;
881                mov EAX, 0x8000_0004;
882                cpuid;
883                mov [RSI+32], EAX;
884                mov [RSI+36], EBX;
885                mov [RSI+40], ECX;
886                mov [RSI+44], EDX;
887                pop RSI;
888            }
889        }
890        // Intel P4 and PM pad at front with spaces.
891        // Other CPUs pad at end with nulls.
892        int start = 0, end = 0;
893        while (cf.processorNameBuffer[start] == ' ') { ++start; }
894        while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
895        cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
896    } else {
897        cf.processorName = "Unknown CPU";
898    }
899    // Determine cache sizes
900
901    // Intel docs specify that they return 0 for 0x8000_0005.
902    // AMD docs do not specify the behaviour for 0004 and 0002.
903    // Centaur/VIA and most other manufacturers use the AMD method,
904    // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
905    // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
906    // for CPUID80000005. But Geode GX uses the AMD method
907
908    // Deal with Geode GX1 - make it same as MediaGX MMX.
909    if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
910        max_extended_cpuid = 0x8000_0004;
911    }
912    // Therefore, we try the AMD method unless it's an Intel chip.
913    // If we still have no info, try the Intel methods.
914    datacache[0].size = 0;
915    if (max_cpuid<2 || !cf.probablyIntel) {
916        if (max_extended_cpuid >= 0x8000_0005) {
917            getAMDcacheinfo();
918        } else if (cf.probablyAMD) {
919            // According to AMDProcRecognitionAppNote, this means CPU
920            // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
921            // Am5x86 has 16Kb 4-way unified data & code cache.
922            datacache[0].size = 8;
923            datacache[0].associativity = 4;
924            datacache[0].lineSize = 32;
925        } else {
926            // Some obscure CPU.
927            // Values for Cyrix 6x86MX (family 6, model 0)
928            datacache[0].size = 64;
929            datacache[0].associativity = 4;
930            datacache[0].lineSize = 32;
931        }
932    }
933    if ((datacache[0].size == 0) && max_cpuid>=4) {
934        getcacheinfoCPUID4();
935    }
936    if ((datacache[0].size == 0) && max_cpuid>=2) {
937        getcacheinfoCPUID2();
938    }
939    if (datacache[0].size == 0) {
940        // Pentium, PMMX, late model 486, or an obscure CPU
941        if (mmx) { // Pentium MMX. Also has 8kB code cache.
942            datacache[0].size = 16;
943            datacache[0].associativity = 4;
944            datacache[0].lineSize = 32;
945        } else { // Pentium 1 (which also has 8kB code cache)
946                 // or 486.
947            // Cyrix 6x86: 16, 4way, 32 linesize
948            datacache[0].size = 8;
949            datacache[0].associativity = 2;
950            datacache[0].lineSize = 32;
951        }
952    }
953    if (cf.probablyIntel && max_cpuid >= 0x0B) {
954        // For Intel i7 and later, use function 0x0B to determine
955        // cores and hyperthreads.
956        getCpuInfo0B();
957    } else {
958        if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
959        else cf.maxThreads = cf.maxCores;
960
961        if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
962            version (GNU_OR_LDC) asm pure nothrow @nogc {
963                "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
964            } else {
965                asm pure nothrow @nogc {
966                    mov EAX, 0x8000_001e;
967                    cpuid;
968                    mov b, EBX;
969                }
970            }
971            ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
972            cf.maxCores = cf.maxThreads / coresPerComputeUnit;
973        }
974    }
975}
976
977// Return true if the cpuid instruction is supported.
978// BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
979bool hasCPUID()
980{
981    version (X86_64)
982        return true;
983    else
984    {
985        uint flags;
986        version (GNU_OR_LDC)
987        {
988            // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
989            asm nothrow @nogc { "
990                pushfl                    # Save EFLAGS
991                pushfl                    # Store EFLAGS
992                xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
993                popfl                     # Load stored EFLAGS (with ID bit inverted)
994                pushfl                    # Store EFLAGS again (ID bit may or may not be inverted)
995                popl %%eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
996                xorl (%%esp), %%eax       # eax = whichever bits were changed
997                popfl                     # Restore original EFLAGS
998                " : "=a" (flags);
999            }
1000        }
1001        else version (D_InlineAsm_X86)
1002        {
1003            asm nothrow @nogc {
1004                pushfd;
1005                pop EAX;
1006                mov flags, EAX;
1007                xor EAX, 0x0020_0000;
1008                push EAX;
1009                popfd;
1010                pushfd;
1011                pop EAX;
1012                xor flags, EAX;
1013            }
1014        }
1015        return (flags & 0x0020_0000) != 0;
1016    }
1017}
1018
1019} else { // supported X86
1020
1021    bool hasCPUID() { return false; }
1022
1023    void cpuidX86()
1024    {
1025            datacache[0].size = 8;
1026            datacache[0].associativity = 2;
1027            datacache[0].lineSize = 32;
1028    }
1029}
1030
1031/*
1032// TODO: Implement this function with OS support
1033void cpuidPPC()
1034{
1035    enum :int  { PPC601, PPC603, PPC603E, PPC604,
1036                 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1037
1038    // TODO:
1039    // asm { mfpvr; } returns the CPU version but unfortunately it can
1040    // only be used in kernel mode. So OS support is required.
1041    int cputype = PPC603;
1042
1043    // 601 has a 8KB combined data & code L1 cache.
1044    uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1045    ubyte ways[] = [8, 2,  4,  4,  4,  8,  8,  8,  8];
1046    uint L2size[]= [0, 0,  0,  0,  0,  0,  0,  256,  512];
1047    uint L3size[]= [0, 0,  0,  0,  0,  0,  0,  2048,  0];
1048
1049    datacache[0].size = sizes[cputype];
1050    datacache[0].associativity = ways[cputype];
1051    datacache[0].lineSize = (cputype==PPCG5)? 128 :
1052        (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1053    datacache[1].size = L2size[cputype];
1054    datacache[2].size = L3size[cputype];
1055    datacache[1].lineSize = datacache[0].lineSize;
1056    datacache[2].lineSize = datacache[0].lineSize;
1057}
1058
1059// TODO: Implement this function with OS support
1060void cpuidSparc()
1061{
1062    // UltaSparcIIi  : L1 = 16,  2way. L2 = 512, 4 way.
1063    // UltraSparcIII : L1 = 64,  4way. L2= 4096 or 8192.
1064    // UltraSparcIIIi: L1 = 64,  4way. L2= 1024, 4 way
1065    // UltraSparcIV  : L1 = 64,  4way. L2 = 16*1024.
1066    // UltraSparcIV+ : L1 = 64,  4way. L2 = 2048, L3=32*1024.
1067    // Sparc64V      : L1 = 128, 2way. L2 = 4096 4way.
1068}
1069*/
1070
1071shared static this()
1072{
1073    auto cf = getCpuFeatures();
1074
1075    if (hasCPUID()) {
1076        cpuidX86();
1077    } else {
1078        // it's a 386 or 486, or a Cyrix 6x86.
1079        //Probably still has an external cache.
1080    }
1081    if (datacache[0].size==0) {
1082            // Guess same as Pentium 1.
1083            datacache[0].size = 8;
1084            datacache[0].associativity = 2;
1085            datacache[0].lineSize = 32;
1086    }
1087    numCacheLevels = 1;
1088    // And now fill up all the unused levels with full memory space.
1089    for (size_t i=1; i< datacache.length; ++i) {
1090        if (datacache[i].size==0) {
1091            // Set all remaining levels of cache equal to full address space.
1092            datacache[i].size = size_t.max/1024;
1093            datacache[i].associativity = 1;
1094            datacache[i].lineSize = datacache[i-1].lineSize;
1095        }
1096        else
1097            ++numCacheLevels;
1098    }
1099
1100    // Set the immortals
1101
1102    _dataCaches =     datacache;
1103    _vendor =         cast(string)cf.vendorID;
1104    _processor =      cf.processorName;
1105    _x87onChip =      (cf.features&FPU_BIT)!=0;
1106    _mmx =            (cf.features&MMX_BIT)!=0;
1107    _sse =            (cf.features&SSE_BIT)!=0;
1108    _sse2 =           (cf.features&SSE2_BIT)!=0;
1109    _sse3 =           (cf.miscfeatures&SSE3_BIT)!=0;
1110    _ssse3 =          (cf.miscfeatures&SSSE3_BIT)!=0;
1111    _sse41 =          (cf.miscfeatures&SSE41_BIT)!=0;
1112    _sse42 =          (cf.miscfeatures&SSE42_BIT)!=0;
1113    _sse4a =          (cf.amdmiscfeatures&SSE4A_BIT)!=0;
1114    _aes =            (cf.miscfeatures&AES_BIT)!=0;
1115    _hasPclmulqdq =   (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
1116    _hasRdrand =      (cf.miscfeatures&RDRAND_BIT)!=0;
1117
1118    enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
1119    _avx =            (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
1120
1121    _vaes =           avx && aes;
1122    _hasVpclmulqdq =  avx && hasPclmulqdq;
1123    _fma =            avx && (cf.miscfeatures&FMA_BIT)!=0;
1124    _fp16c =          avx && (cf.miscfeatures&FP16C_BIT)!=0;
1125    _avx2 =           avx && (cf.extfeatures & AVX2_BIT) != 0;
1126    _hle =            (cf.extfeatures & HLE_BIT) != 0;
1127    _rtm =            (cf.extfeatures & RTM_BIT) != 0;
1128    _hasRdseed =      (cf.extfeatures&RDSEED_BIT)!=0;
1129    _hasSha =         (cf.extfeatures&SHA_BIT)!=0;
1130    _amd3dnow =       (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
1131    _amd3dnowExt =    (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
1132    _amdMmx =         (cf.amdfeatures&AMD_MMX_BIT)!=0;
1133    _hasFxsr =        (cf.features&FXSR_BIT)!=0;
1134    _hasCmov =        (cf.features&CMOV_BIT)!=0;
1135    _hasRdtsc =       (cf.features&TIMESTAMP_BIT)!=0;
1136    _hasCmpxchg8b =   (cf.features&CMPXCHG8B_BIT)!=0;
1137    _hasCmpxchg16b =  (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
1138    _hasSysEnterSysExit =
1139        // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1140        // (REF: www.geoffchappell.com).
1141        (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
1142            ? false
1143            : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
1144    _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
1145    _hasLahfSahf =    (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
1146    _hasPopcnt =      (cf.miscfeatures&POPCNT_BIT)!=0;
1147    _hasLzcnt =       (cf.amdmiscfeatures&LZCNT_BIT)!=0;
1148    _isX86_64 =       (cf.amdfeatures&AMD64_BIT)!=0;
1149    _isItanium =      (cf.features&IA64_BIT)!=0;
1150    _hyperThreading = cf.maxThreads>cf.maxCores;
1151    _threadsPerCPU =  cf.maxThreads;
1152    _coresPerCPU =    cf.maxCores;
1153    _preferAthlon =   cf.probablyAMD && family >=6;
1154    _preferPentium4 = cf.probablyIntel && family == 0xF;
1155    _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
1156}
1157