Extract platform specific constants and functions into platform.hpp - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 70db31777ca0115a5969c96c1fb1f8f39cbd934a
parent 70da7e0ab909a61e5332caab21ed9f60b55f0f92
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Thu,  8 Sep 2016 16:58:15 +0300

Extract platform specific constants and functions into platform.hpp

Diffstat:
M include/kfr/base/expression.hpp  | 1 +
A include/kfr/base/platform.hpp  | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M include/kfr/base/types.hpp  | 156 -------------------------------------------------------------------------------
M sources.cmake  | 1 +

4 files changed, 193 insertions(+), 156 deletions(-)
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -27,6 +27,7 @@
 
 #include "types.hpp"
 #include "vec.hpp"
+#include "platform.hpp"
 
 #include <tuple>
 
diff --git a/include/kfr/base/platform.hpp b/include/kfr/base/platform.hpp
@@ -0,0 +1,191 @@
+/** @addtogroup types
+ *  @{
+ */
+/*
+  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  This file is part of KFR
+
+  KFR is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  KFR is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with KFR.
+
+  If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+  Buying a commercial license is mandatory as soon as you develop commercial activities without
+  disclosing the source code of your own applications.
+  See https://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "types.hpp"
+
+namespace kfr
+{
+
+/// @brief An enumeration representing cpu instruction set
+enum class cpu_t : int
+{
+    common = 0,
+#ifdef CMT_ARCH_X86
+    sse2    = 1,
+    sse3    = 2,
+    ssse3   = 3,
+    sse41   = 4,
+    sse42   = 5,
+    avx1    = 6,
+    avx2    = 7,
+    avx     = static_cast<int>(avx1),
+    lowest  = static_cast<int>(sse2),
+    highest = static_cast<int>(avx2),
+#endif
+#ifdef CMT_ARCH_ARM
+    neon    = 1,
+    neon64  = 2,
+    lowest  = static_cast<int>(neon),
+    highest = static_cast<int>(neon64),
+#endif
+    native  = static_cast<int>(CMT_ARCH_NAME),
+    runtime = -1,
+};
+
+#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native
+
+template <cpu_t cpu>
+using ccpu_t = cval_t<cpu_t, cpu>;
+
+template <cpu_t cpu>
+constexpr ccpu_t<cpu> ccpu{};
+
+namespace internal
+{
+constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); }
+constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); }
+
+#ifdef CMT_ARCH_X86
+constexpr auto cpu_list =
+    cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>;
+#else
+constexpr auto cpu_list = cvals<cpu_t, cpu_t::neon>;
+#endif
+}
+
+template <cpu_t cpu>
+using cpuval_t = cval_t<cpu_t, cpu>;
+template <cpu_t cpu>
+constexpr auto cpuval = cpuval_t<cpu>{};
+
+constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuval<cpu_t::native>);
+
+/// @brief Returns name of the cpu instruction set
+__attribute__((unused)) static const char* cpu_name(cpu_t set)
+{
+    static const char* names[] = { "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" };
+    if (set >= cpu_t::lowest && set <= cpu_t::highest)
+        return names[static_cast<size_t>(set)];
+    return "-";
+}
+
+template <typename T>
+constexpr inline const T& bitness_const(const T& x32, const T& x64)
+{
+#ifdef CMT_ARCH_X64
+    (void)x32;
+    return x64;
+#else
+    (void)x64;
+    return x32;
+#endif
+}
+
+constexpr inline const char* bitness_const(const char* x32, const char* x64)
+{
+#ifdef CMT_ARCH_X64
+    (void)x32;
+    return x64;
+#else
+    (void)x64;
+    return x32;
+#endif
+}
+
+constexpr size_t native_cache_alignment        = 64;
+constexpr size_t native_cache_alignment_mask   = native_cache_alignment - 1;
+constexpr size_t maximum_vector_alignment      = 32;
+constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
+constexpr size_t native_register_count         = bitness_const(8, 16);
+
+constexpr size_t common_float_vector_size = 16;
+constexpr size_t common_int_vector_size   = 16;
+
+template <cpu_t c>
+constexpr size_t native_float_vector_size =
+#ifdef CMT_ARCH_X86
+    c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
+#endif
+#ifdef CMT_ARCH_ARM
+c == cpu_t::neon ? 16 : common_float_vector_size;
+#endif
+template <cpu_t c>
+constexpr size_t native_int_vector_size =
+#ifdef CMT_ARCH_X86
+    c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
+#endif
+#ifdef CMT_ARCH_ARM
+c == cpu_t::neon ? 16 : common_int_vector_size;
+#endif
+
+/// @brief SIMD vector width for the given cpu instruction set
+template <typename T, cpu_t c = cpu_t::native>
+constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f
+                                                         ? native_float_vector_size<c> / sizeof(T)
+                                                         : native_int_vector_size<c> / sizeof(T));
+
+template <cpu_t c>
+constexpr size_t vector_width<void, c> = 0;
+
+namespace internal
+{
+
+template <cpu_t c>
+constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>);
+
+template <cpu_t c>
+constexpr bool fast_unaligned =
+#ifdef CMT_ARCH_X86
+    c >= cpu_t::avx1;
+#else
+    false;
+#endif
+
+template <cpu_t c>
+constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1;
+
+template <typename T, cpu_t c>
+constexpr inline size_t get_vector_width(size_t scale = 1)
+{
+    return scale * vector_width<T, c>;
+}
+template <typename T, cpu_t c>
+constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale)
+{
+    return bitness_const(x32scale, x64scale) * vector_width<T, c>;
+}
+
+template <typename T, cpu_t c>
+constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>;
+
+template <typename T, cpu_t c>
+constexpr size_t vector_capacity = native_register_count* vector_width<T, c>;
+
+template <typename T, cpu_t c>
+constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4);
+}
+}
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -280,60 +280,6 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y)
     return static_cast<const unsigned char*>(x) - static_cast<const unsigned char*>(y);
 }
 
-/// @brief An enumeration representing cpu instruction set
-enum class cpu_t : int
-{
-    common = 0,
-#ifdef CMT_ARCH_X86
-    sse2    = 1,
-    sse3    = 2,
-    ssse3   = 3,
-    sse41   = 4,
-    sse42   = 5,
-    avx1    = 6,
-    avx2    = 7,
-    avx     = static_cast<int>(avx1),
-    lowest  = static_cast<int>(sse2),
-    highest = static_cast<int>(avx2),
-#endif
-#ifdef CMT_ARCH_ARM
-    neon    = 1,
-    neon64  = 2,
-    lowest  = static_cast<int>(neon),
-    highest = static_cast<int>(neon64),
-#endif
-    native  = static_cast<int>(CMT_ARCH_NAME),
-    runtime = -1,
-};
-
-#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native
-
-template <cpu_t cpu>
-using ccpu_t = cval_t<cpu_t, cpu>;
-
-template <cpu_t cpu>
-constexpr ccpu_t<cpu> ccpu{};
-
-namespace internal
-{
-constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); }
-constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); }
-
-#ifdef CMT_ARCH_X86
-constexpr auto cpu_list =
-    cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>;
-#else
-constexpr auto cpu_list = cvals<cpu_t, cpu_t::neon>;
-#endif
-}
-
-template <cpu_t cpu>
-using cpuval_t = cval_t<cpu_t, cpu>;
-template <cpu_t cpu>
-constexpr auto cpuval = cpuval_t<cpu>{};
-
-constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuval<cpu_t::native>);
-
 template <typename T>
 constexpr datatype typeclass = std::is_floating_point<typename compound_type_traits<T>::subtype>::value
                                    ? datatype::f
@@ -556,15 +502,6 @@ CMT_INLINE constexpr static T implicit_cast(U&& value)
 
 #pragma clang diagnostic pop
 
-/// @brief Returns name of the cpu instruction set
-__attribute__((unused)) static const char* cpu_name(cpu_t set)
-{
-    static const char* names[] = { "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" };
-    if (set >= cpu_t::lowest && set <= cpu_t::highest)
-        return names[static_cast<size_t>(set)];
-    return "-";
-}
-
 #define KFR_FN_S(fn)                                                                                         \
     template <typename Arg, typename... Args>                                                                \
     CMT_INLINE enable_if_not_vec<Arg> fn(Arg arg, Args... args)                                              \
@@ -623,55 +560,6 @@ constexpr size_t widthof()
     return compound_type_traits<T>::width;
 }
 
-template <typename T>
-constexpr inline const T& bitness_const(const T& x32, const T& x64)
-{
-#ifdef CMT_ARCH_X64
-    (void)x32;
-    return x64;
-#else
-    (void)x64;
-    return x32;
-#endif
-}
-
-constexpr inline const char* bitness_const(const char* x32, const char* x64)
-{
-#ifdef CMT_ARCH_X64
-    (void)x32;
-    return x64;
-#else
-    (void)x64;
-    return x32;
-#endif
-}
-
-constexpr size_t native_cache_alignment        = 64;
-constexpr size_t native_cache_alignment_mask   = native_cache_alignment - 1;
-constexpr size_t maximum_vector_alignment      = 32;
-constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1;
-constexpr size_t native_register_count         = bitness_const(8, 16);
-
-constexpr size_t common_float_vector_size = 16;
-constexpr size_t common_int_vector_size   = 16;
-
-template <cpu_t c>
-constexpr size_t native_float_vector_size =
-#ifdef CMT_ARCH_X86
-    c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size;
-#endif
-#ifdef CMT_ARCH_ARM
-c == cpu_t::neon ? 16 : common_float_vector_size;
-#endif
-template <cpu_t c>
-constexpr size_t native_int_vector_size =
-#ifdef CMT_ARCH_X86
-    c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size;
-#endif
-#ifdef CMT_ARCH_ARM
-c == cpu_t::neon ? 16 : common_int_vector_size;
-#endif
-
 constexpr size_t infinite_size = static_cast<size_t>(-1);
 
 constexpr inline size_t size_add(size_t x, size_t y)
@@ -738,52 +626,8 @@ using is_numeric = is_number<deep_subtype<T>>;
 template <typename... Ts>
 using is_numeric_args = and_t<is_numeric<Ts>...>;
 
-/// @brief SIMD vector width for the given cpu instruction set
-template <typename T, cpu_t c = cpu_t::native>
-constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f
-                                                         ? native_float_vector_size<c> / sizeof(T)
-                                                         : native_int_vector_size<c> / sizeof(T));
-
-template <cpu_t c>
-constexpr size_t vector_width<void, c> = 0;
-
 namespace internal
 {
-
-template <cpu_t c>
-constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>);
-
-template <cpu_t c>
-constexpr bool fast_unaligned =
-#ifdef CMT_ARCH_X86
-    c >= cpu_t::avx1;
-#else
-    false;
-#endif
-
-template <cpu_t c>
-constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1;
-
-template <typename T, cpu_t c>
-constexpr inline size_t get_vector_width(size_t scale = 1)
-{
-    return scale * vector_width<T, c>;
-}
-template <typename T, cpu_t c>
-constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale)
-{
-    return bitness_const(x32scale, x64scale) * vector_width<T, c>;
-}
-
-template <typename T, cpu_t c>
-constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>;
-
-template <typename T, cpu_t c>
-constexpr size_t vector_capacity = native_register_count* vector_width<T, c>;
-
-template <typename T, cpu_t c>
-constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4);
-
 template <size_t width, typename Fn>
 CMT_INLINE void block_process_impl(size_t& i, size_t size, Fn&& fn)
 {
diff --git a/sources.cmake b/sources.cmake
@@ -36,6 +36,7 @@ set(
     ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/modzerobessel.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/platform.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	include/kfr/base/expression.hpp	\|	1	+
A	include/kfr/base/platform.hpp	\|	191	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	include/kfr/base/types.hpp	\|	156	-------------------------------------------------------------------------------
M	sources.cmake	\|	1	+