kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 2cbc26b44bf11e8416a40ca39366d42ef53ae1f3
parent dd654512eaaf6d7aedfd2f6b6e147b047d594ea2
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Wed,  9 Nov 2016 10:09:14 +0300

Move cpuid.hpp and cpuid_auto.hpp to cpuid folder

Diffstat:
Minclude/kfr/all.hpp | 1+
Minclude/kfr/base.hpp | 2--
Dinclude/kfr/base/cpuid.hpp | 291------------------------------------------------------------------------------
Ainclude/kfr/cpuid.hpp | 26++++++++++++++++++++++++++
Ainclude/kfr/cpuid/cpuid.hpp | 291++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rinclude/kfr/base/cpuid_auto.hpp -> include/kfr/cpuid/cpuid_auto.hpp | 0
Msources.cmake | 5+++--
Mtests/multiarch.cpp | 1+
8 files changed, 322 insertions(+), 295 deletions(-)

diff --git a/include/kfr/all.hpp b/include/kfr/all.hpp @@ -22,6 +22,7 @@ */ #include "base.hpp" +#include "cpuid.hpp" #include "dft.hpp" #include "dsp.hpp" #include "io.hpp" diff --git a/include/kfr/base.hpp b/include/kfr/base.hpp @@ -30,8 +30,6 @@ #include "base/compiletime.hpp" #include "base/complex.hpp" #include "base/constants.hpp" -#include "base/cpuid.hpp" -#include "base/cpuid_auto.hpp" #include "base/digitreverse.hpp" #include "base/expression.hpp" #include "base/function.hpp" diff --git a/include/kfr/base/cpuid.hpp b/include/kfr/base/cpuid.hpp @@ -1,291 +0,0 @@ -/** @addtogroup cpuid - * @{ - */ -/* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) - This file is part of KFR - - KFR is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - KFR is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with KFR. - - If GPL is not suitable for your project, you must purchase a commercial license to use KFR. - Buying a commercial license is mandatory as soon as you develop commercial activities without - disclosing the source code of your own applications. - See https://www.kfrlib.com for details. - */ -#pragma once - -#ifdef _MSC_VER -#include <intrin.h> -#endif - -#include "types.hpp" -#include <cstring> - -namespace kfr -{ -#ifdef CMT_ARCH_X86 - -struct cpu_features -{ - u32 max; - u32 exmax; - u32 isIntel : 1; - u32 isAMD : 1; - u32 has3DNOW : 1; - u32 has3DNOWEXT : 1; - u32 hasABM : 1; - u32 hasADX : 1; - u32 hasAES : 1; - u32 hasAVX : 1; - u32 hasAVX2 : 1; - u32 hasAVXOSSUPPORT : 1; - u32 hasAVX512OSSUPPORT : 1; - u32 hasAVX512CD : 1; - u32 hasAVX512ER : 1; - u32 hasAVX512F : 1; - u32 hasAVX512DQ : 1; - u32 hasAVX512PF : 1; - u32 hasAVX512BW : 1; - u32 hasBMI1 : 1; - u32 hasBMI2 : 1; - u32 hasCLFSH : 1; - u32 hasCMOV : 1; - u32 hasCMPXCHG16B : 1; - u32 hasCX8 : 1; - u32 hasERMS : 1; - u32 hasF16C : 1; - u32 hasFMA : 1; - u32 hasFSGSBASE : 1; - u32 hasFXSR : 1; - u32 hasHLE : 1; - u32 hasINVPCID : 1; - u32 hasLAHF : 1; - u32 hasLZCNT : 1; - u32 hasMMX : 1; - u32 hasMMXEXT : 1; - u32 hasMONITOR : 1; - u32 hasMOVBE : 1; - u32 hasMSR : 1; - u32 hasOSXSAVE : 1; - u32 hasPCLMULQDQ : 1; - u32 hasPOPCNT : 1; - u32 hasPREFETCHWT1 : 1; - u32 hasRDRAND : 1; - u32 hasRDSEED : 1; - u32 hasRDTSCP : 1; - u32 hasRTM : 1; - u32 hasSEP : 1; - u32 hasSHA : 1; - u32 hasSSE : 1; - u32 hasSSE2 : 1; - u32 hasSSE3 : 1; - u32 hasSSE41 : 1; - u32 hasSSE42 : 1; - u32 hasSSE4a : 1; - u32 hasSSSE3 : 1; - u32 hasSYSCALL : 1; - u32 hasTBM : 1; - u32 hasXOP : 1; - u32 hasXSAVE : 1; - u32 padding1 : 6; - char vendor[17]; - char model[49]; - char padding2[2]; -}; - -namespace internal -{ - -struct cpu_data -{ - u32 data[4]; -}; - -#if defined CMT_COMPILER_GNU || defined CMT_COMPILER_CLANG -CMT_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx) -{ - __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc)); - return 1; -} -CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) -{ - get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]); -} -CMT_INLINE u32 get_xcr0() -{ - u32 xcr0; - __asm__ __volatile__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx"); - return xcr0; -} -#elif defined CMT_COMPILER_MSVC - -CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) { __cpuidex((int*)ptr, (int)func, (int)subfunc); } -CMT_INLINE u32 get_xcr0() -{ -#ifdef _XCR_XFEATURE_ENABLED_MASK - unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); - return (u32)Result; -#else - return 0; -#endif -} -#endif - -template <size_t = 0> -cpu_t detect_cpu() -{ - cpu_features c; - memset(&c, 0, sizeof(c)); - cpu_data data0; - cpu_data exdata0; - - u32 f_1_ECX(0); - u32 f_1_EDX(0); - u32 f_7_EBX(0); - u32 f_7_ECX(0); - u32 f_81_ECX(0); - u32 f_81_EDX(0); - - cpuid(data0.data, 0); - c.max = static_cast<u32>(data0.data[0]); - cpuid(exdata0.data, 0x80000000); - c.exmax = static_cast<u32>(exdata0.data[0]); - - *ptr_cast<u32>(c.vendor) = static_cast<u32>(data0.data[1]); - *ptr_cast<u32>(c.vendor + 4) = static_cast<u32>(data0.data[3]); - *ptr_cast<u32>(c.vendor + 8) = static_cast<u32>(data0.data[2]); - - c.isIntel = strncmp(c.vendor, "GenuineIntel", sizeof(c.vendor)) == 0 ? 1 : 0; - c.isAMD = strncmp(c.vendor, "AuthenticAMD", sizeof(c.vendor)) == 0 ? 1 : 0; - - if (c.max >= 1) - { - cpu_data data1; - cpuid(data1.data, 1); - f_1_ECX = static_cast<u32>(data1.data[2]); - f_1_EDX = static_cast<u32>(data1.data[3]); - } - - if (c.max >= 7) - { - cpu_data data7; - cpuid(data7.data, 7); - f_7_EBX = static_cast<u32>(data7.data[1]); - f_7_ECX = static_cast<u32>(data7.data[2]); - } - - if (c.exmax >= 0x80000001) - { - cpu_data data81; - cpuid(data81.data, 0x80000001); - f_81_ECX = static_cast<u32>(data81.data[2]); - f_81_EDX = static_cast<u32>(data81.data[3]); - } - - if (c.exmax >= 0x80000004) - { - cpu_data data82; - cpu_data data83; - cpu_data data84; - cpuid(data82.data, 0x80000002); - cpuid(data83.data, 0x80000003); - cpuid(data84.data, 0x80000004); - memcpy(c.model, data82.data, sizeof(cpu_data)); - memcpy(c.model + 16, data83.data, sizeof(cpu_data)); - memcpy(c.model + 32, data84.data, sizeof(cpu_data)); - } - - c.hasSSE3 = f_1_ECX >> 0 & 1; - c.hasPCLMULQDQ = f_1_ECX >> 1 & 1; - c.hasMONITOR = f_1_ECX >> 3 & 1; - c.hasSSSE3 = f_1_ECX >> 9 & 1; - c.hasFMA = f_1_ECX >> 12 & 1; - c.hasCMPXCHG16B = f_1_ECX >> 13 & 1; - c.hasSSE41 = f_1_ECX >> 19 & 1; - c.hasSSE42 = f_1_ECX >> 20 & 1; - c.hasMOVBE = f_1_ECX >> 22 & 1; - c.hasPOPCNT = f_1_ECX >> 23 & 1; - c.hasAES = f_1_ECX >> 25 & 1; - c.hasXSAVE = f_1_ECX >> 26 & 1; - c.hasOSXSAVE = f_1_ECX >> 27 & 1; - c.hasAVX = f_1_ECX >> 28 & 1; - c.hasF16C = f_1_ECX >> 29 & 1; - c.hasRDRAND = f_1_ECX >> 30 & 1; - c.hasMSR = f_1_EDX >> 5 & 1; - c.hasCX8 = f_1_EDX >> 8 & 1; - c.hasSEP = f_1_EDX >> 11 & 1; - c.hasCMOV = f_1_EDX >> 15 & 1; - c.hasCLFSH = f_1_EDX >> 19 & 1; - c.hasMMX = f_1_EDX >> 23 & 1; - c.hasFXSR = f_1_EDX >> 24 & 1; - c.hasSSE = f_1_EDX >> 25 & 1; - c.hasSSE2 = f_1_EDX >> 26 & 1; - c.hasFSGSBASE = f_7_EBX >> 0 & 1; - c.hasBMI1 = f_7_EBX >> 3 & 1; - c.hasHLE = c.isIntel && f_7_EBX >> 4 & 1; - c.hasAVX2 = f_7_EBX >> 5 & 1; - c.hasBMI2 = f_7_EBX >> 8 & 1; - c.hasERMS = f_7_EBX >> 9 & 1; - c.hasINVPCID = f_7_EBX >> 10 & 1; - c.hasRTM = c.isIntel && f_7_EBX >> 11 & 1; - c.hasAVX512F = f_7_EBX >> 16 & 1; - c.hasAVX512DQ = f_7_EBX >> 17 & 1; - c.hasRDSEED = f_7_EBX >> 18 & 1; - c.hasADX = f_7_EBX >> 19 & 1; - c.hasAVX512PF = f_7_EBX >> 26 & 1; - c.hasAVX512ER = f_7_EBX >> 27 & 1; - c.hasAVX512CD = f_7_EBX >> 28 & 1; - c.hasSHA = f_7_EBX >> 29 & 1; - c.hasAVX512BW = f_7_EBX >> 30 & 1; - c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1; - c.hasLAHF = f_81_ECX >> 0 & 1; - c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1; - c.hasABM = c.isAMD && f_81_ECX >> 5 & 1; - c.hasSSE4a = c.isAMD && f_81_ECX >> 6 & 1; - c.hasXOP = c.isAMD && f_81_ECX >> 11 & 1; - c.hasTBM = c.isAMD && f_81_ECX >> 21 & 1; - c.hasSYSCALL = c.isIntel && f_81_EDX >> 11 & 1; - c.hasMMXEXT = c.isAMD && f_81_EDX >> 22 & 1; - c.hasRDTSCP = c.isIntel && f_81_EDX >> 27 & 1; - c.has3DNOWEXT = c.isAMD && f_81_EDX >> 30 & 1; - c.has3DNOW = c.isAMD && f_81_EDX >> 31 & 1; - - c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (get_xcr0() & 0x06) == 0x06; - c.hasAVX512OSSUPPORT = c.hasAVXOSSUPPORT && c.hasAVX512F && c.hasOSXSAVE && (get_xcr0() & 0xE0) == 0xE0; - - if (c.hasAVX2 && c.hasAVXOSSUPPORT) - return cpu_t::avx2; - if (c.hasAVX && c.hasAVXOSSUPPORT) - return cpu_t::avx1; - if (c.hasSSE41) - return cpu_t::sse41; - if (c.hasSSSE3) - return cpu_t::ssse3; - if (c.hasSSE3) - return cpu_t::sse3; - if (c.hasSSE2) - return cpu_t::sse2; - return cpu_t::lowest; -} -} -#else - -template <size_t = 0> -cpu_t detect_cpu() -{ - return cpu_t::native; -} - -#endif -} diff --git a/include/kfr/cpuid.hpp b/include/kfr/cpuid.hpp @@ -0,0 +1,26 @@ +/* + Copyright (C) 2016 D Levin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#pragma once + +#include "cpuid/cpuid.hpp" +#include "cpuid/cpuid_auto.hpp" diff --git a/include/kfr/cpuid/cpuid.hpp b/include/kfr/cpuid/cpuid.hpp @@ -0,0 +1,291 @@ +/** @addtogroup cpuid + * @{ + */ +/* + Copyright (C) 2016 D Levin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#pragma once + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#include "../base/types.hpp" +#include <cstring> + +namespace kfr +{ +#ifdef CMT_ARCH_X86 + +struct cpu_features +{ + u32 max; + u32 exmax; + u32 isIntel : 1; + u32 isAMD : 1; + u32 has3DNOW : 1; + u32 has3DNOWEXT : 1; + u32 hasABM : 1; + u32 hasADX : 1; + u32 hasAES : 1; + u32 hasAVX : 1; + u32 hasAVX2 : 1; + u32 hasAVXOSSUPPORT : 1; + u32 hasAVX512OSSUPPORT : 1; + u32 hasAVX512CD : 1; + u32 hasAVX512ER : 1; + u32 hasAVX512F : 1; + u32 hasAVX512DQ : 1; + u32 hasAVX512PF : 1; + u32 hasAVX512BW : 1; + u32 hasBMI1 : 1; + u32 hasBMI2 : 1; + u32 hasCLFSH : 1; + u32 hasCMOV : 1; + u32 hasCMPXCHG16B : 1; + u32 hasCX8 : 1; + u32 hasERMS : 1; + u32 hasF16C : 1; + u32 hasFMA : 1; + u32 hasFSGSBASE : 1; + u32 hasFXSR : 1; + u32 hasHLE : 1; + u32 hasINVPCID : 1; + u32 hasLAHF : 1; + u32 hasLZCNT : 1; + u32 hasMMX : 1; + u32 hasMMXEXT : 1; + u32 hasMONITOR : 1; + u32 hasMOVBE : 1; + u32 hasMSR : 1; + u32 hasOSXSAVE : 1; + u32 hasPCLMULQDQ : 1; + u32 hasPOPCNT : 1; + u32 hasPREFETCHWT1 : 1; + u32 hasRDRAND : 1; + u32 hasRDSEED : 1; + u32 hasRDTSCP : 1; + u32 hasRTM : 1; + u32 hasSEP : 1; + u32 hasSHA : 1; + u32 hasSSE : 1; + u32 hasSSE2 : 1; + u32 hasSSE3 : 1; + u32 hasSSE41 : 1; + u32 hasSSE42 : 1; + u32 hasSSE4a : 1; + u32 hasSSSE3 : 1; + u32 hasSYSCALL : 1; + u32 hasTBM : 1; + u32 hasXOP : 1; + u32 hasXSAVE : 1; + u32 padding1 : 6; + char vendor[17]; + char model[49]; + char padding2[2]; +}; + +namespace internal +{ + +struct cpu_data +{ + u32 data[4]; +}; + +#if defined CMT_COMPILER_GNU || defined CMT_COMPILER_CLANG +CMT_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx) +{ + __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc)); + return 1; +} +CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) +{ + get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]); +} +CMT_INLINE u32 get_xcr0() +{ + u32 xcr0; + __asm__ __volatile__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx"); + return xcr0; +} +#elif defined CMT_COMPILER_MSVC + +CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) { __cpuidex((int*)ptr, (int)func, (int)subfunc); } +CMT_INLINE u32 get_xcr0() +{ +#ifdef _XCR_XFEATURE_ENABLED_MASK + unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + return (u32)Result; +#else + return 0; +#endif +} +#endif + +template <size_t = 0> +cpu_t detect_cpu() +{ + cpu_features c; + memset(&c, 0, sizeof(c)); + cpu_data data0; + cpu_data exdata0; + + u32 f_1_ECX(0); + u32 f_1_EDX(0); + u32 f_7_EBX(0); + u32 f_7_ECX(0); + u32 f_81_ECX(0); + u32 f_81_EDX(0); + + cpuid(data0.data, 0); + c.max = static_cast<u32>(data0.data[0]); + cpuid(exdata0.data, 0x80000000); + c.exmax = static_cast<u32>(exdata0.data[0]); + + *ptr_cast<u32>(c.vendor) = static_cast<u32>(data0.data[1]); + *ptr_cast<u32>(c.vendor + 4) = static_cast<u32>(data0.data[3]); + *ptr_cast<u32>(c.vendor + 8) = static_cast<u32>(data0.data[2]); + + c.isIntel = strncmp(c.vendor, "GenuineIntel", sizeof(c.vendor)) == 0 ? 1 : 0; + c.isAMD = strncmp(c.vendor, "AuthenticAMD", sizeof(c.vendor)) == 0 ? 1 : 0; + + if (c.max >= 1) + { + cpu_data data1; + cpuid(data1.data, 1); + f_1_ECX = static_cast<u32>(data1.data[2]); + f_1_EDX = static_cast<u32>(data1.data[3]); + } + + if (c.max >= 7) + { + cpu_data data7; + cpuid(data7.data, 7); + f_7_EBX = static_cast<u32>(data7.data[1]); + f_7_ECX = static_cast<u32>(data7.data[2]); + } + + if (c.exmax >= 0x80000001) + { + cpu_data data81; + cpuid(data81.data, 0x80000001); + f_81_ECX = static_cast<u32>(data81.data[2]); + f_81_EDX = static_cast<u32>(data81.data[3]); + } + + if (c.exmax >= 0x80000004) + { + cpu_data data82; + cpu_data data83; + cpu_data data84; + cpuid(data82.data, 0x80000002); + cpuid(data83.data, 0x80000003); + cpuid(data84.data, 0x80000004); + memcpy(c.model, data82.data, sizeof(cpu_data)); + memcpy(c.model + 16, data83.data, sizeof(cpu_data)); + memcpy(c.model + 32, data84.data, sizeof(cpu_data)); + } + + c.hasSSE3 = f_1_ECX >> 0 & 1; + c.hasPCLMULQDQ = f_1_ECX >> 1 & 1; + c.hasMONITOR = f_1_ECX >> 3 & 1; + c.hasSSSE3 = f_1_ECX >> 9 & 1; + c.hasFMA = f_1_ECX >> 12 & 1; + c.hasCMPXCHG16B = f_1_ECX >> 13 & 1; + c.hasSSE41 = f_1_ECX >> 19 & 1; + c.hasSSE42 = f_1_ECX >> 20 & 1; + c.hasMOVBE = f_1_ECX >> 22 & 1; + c.hasPOPCNT = f_1_ECX >> 23 & 1; + c.hasAES = f_1_ECX >> 25 & 1; + c.hasXSAVE = f_1_ECX >> 26 & 1; + c.hasOSXSAVE = f_1_ECX >> 27 & 1; + c.hasAVX = f_1_ECX >> 28 & 1; + c.hasF16C = f_1_ECX >> 29 & 1; + c.hasRDRAND = f_1_ECX >> 30 & 1; + c.hasMSR = f_1_EDX >> 5 & 1; + c.hasCX8 = f_1_EDX >> 8 & 1; + c.hasSEP = f_1_EDX >> 11 & 1; + c.hasCMOV = f_1_EDX >> 15 & 1; + c.hasCLFSH = f_1_EDX >> 19 & 1; + c.hasMMX = f_1_EDX >> 23 & 1; + c.hasFXSR = f_1_EDX >> 24 & 1; + c.hasSSE = f_1_EDX >> 25 & 1; + c.hasSSE2 = f_1_EDX >> 26 & 1; + c.hasFSGSBASE = f_7_EBX >> 0 & 1; + c.hasBMI1 = f_7_EBX >> 3 & 1; + c.hasHLE = c.isIntel && f_7_EBX >> 4 & 1; + c.hasAVX2 = f_7_EBX >> 5 & 1; + c.hasBMI2 = f_7_EBX >> 8 & 1; + c.hasERMS = f_7_EBX >> 9 & 1; + c.hasINVPCID = f_7_EBX >> 10 & 1; + c.hasRTM = c.isIntel && f_7_EBX >> 11 & 1; + c.hasAVX512F = f_7_EBX >> 16 & 1; + c.hasAVX512DQ = f_7_EBX >> 17 & 1; + c.hasRDSEED = f_7_EBX >> 18 & 1; + c.hasADX = f_7_EBX >> 19 & 1; + c.hasAVX512PF = f_7_EBX >> 26 & 1; + c.hasAVX512ER = f_7_EBX >> 27 & 1; + c.hasAVX512CD = f_7_EBX >> 28 & 1; + c.hasSHA = f_7_EBX >> 29 & 1; + c.hasAVX512BW = f_7_EBX >> 30 & 1; + c.hasPREFETCHWT1 = f_7_ECX >> 0 & 1; + c.hasLAHF = f_81_ECX >> 0 & 1; + c.hasLZCNT = c.isIntel && f_81_ECX >> 5 & 1; + c.hasABM = c.isAMD && f_81_ECX >> 5 & 1; + c.hasSSE4a = c.isAMD && f_81_ECX >> 6 & 1; + c.hasXOP = c.isAMD && f_81_ECX >> 11 & 1; + c.hasTBM = c.isAMD && f_81_ECX >> 21 & 1; + c.hasSYSCALL = c.isIntel && f_81_EDX >> 11 & 1; + c.hasMMXEXT = c.isAMD && f_81_EDX >> 22 & 1; + c.hasRDTSCP = c.isIntel && f_81_EDX >> 27 & 1; + c.has3DNOWEXT = c.isAMD && f_81_EDX >> 30 & 1; + c.has3DNOW = c.isAMD && f_81_EDX >> 31 & 1; + + c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (get_xcr0() & 0x06) == 0x06; + c.hasAVX512OSSUPPORT = c.hasAVXOSSUPPORT && c.hasAVX512F && c.hasOSXSAVE && (get_xcr0() & 0xE0) == 0xE0; + + if (c.hasAVX2 && c.hasAVXOSSUPPORT) + return cpu_t::avx2; + if (c.hasAVX && c.hasAVXOSSUPPORT) + return cpu_t::avx1; + if (c.hasSSE41) + return cpu_t::sse41; + if (c.hasSSSE3) + return cpu_t::ssse3; + if (c.hasSSE3) + return cpu_t::sse3; + if (c.hasSSE2) + return cpu_t::sse2; + return cpu_t::lowest; +} +} +#else + +template <size_t = 0> +cpu_t detect_cpu() +{ + return cpu_t::native; +} + +#endif +} diff --git a/include/kfr/base/cpuid_auto.hpp b/include/kfr/cpuid/cpuid_auto.hpp diff --git a/sources.cmake b/sources.cmake @@ -7,6 +7,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/all.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base.hpp ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cpuid.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io.hpp @@ -21,8 +22,6 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/base/compiletime.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/cpuid.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/cpuid_auto.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/function.hpp @@ -67,6 +66,8 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/cometa/result.hpp ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp ${PROJECT_SOURCE_DIR}/include/kfr/cometa/tuple.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cpuid/cpuid.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cpuid/cpuid_auto.hpp ${PROJECT_SOURCE_DIR}/include/kfr/data/bitrev.hpp ${PROJECT_SOURCE_DIR}/include/kfr/data/sincos.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/bitrev.hpp diff --git a/tests/multiarch.cpp b/tests/multiarch.cpp @@ -7,6 +7,7 @@ #include <kfr/testo/testo.hpp> #include <kfr/base.hpp> +#include <kfr/cpuid.hpp> #include <kfr/dsp.hpp> #include <kfr/io.hpp>