kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 70db31777ca0115a5969c96c1fb1f8f39cbd934a
parent 70da7e0ab909a61e5332caab21ed9f60b55f0f92
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Thu,  8 Sep 2016 16:58:15 +0300

Extract platform specific constants and functions into platform.hpp

Diffstat:
Minclude/kfr/base/expression.hpp | 1+
Ainclude/kfr/base/platform.hpp | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/types.hpp | 156-------------------------------------------------------------------------------
Msources.cmake | 1+
4 files changed, 193 insertions(+), 156 deletions(-)

diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -27,6 +27,7 @@ #include "types.hpp" #include "vec.hpp" +#include "platform.hpp" #include <tuple> diff --git a/include/kfr/base/platform.hpp b/include/kfr/base/platform.hpp @@ -0,0 +1,191 @@ +/** @addtogroup types + * @{ + */ +/* + Copyright (C) 2016 D Levin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#pragma once + +#include "types.hpp" + +namespace kfr +{ + +/// @brief An enumeration representing cpu instruction set +enum class cpu_t : int +{ + common = 0, +#ifdef CMT_ARCH_X86 + sse2 = 1, + sse3 = 2, + ssse3 = 3, + sse41 = 4, + sse42 = 5, + avx1 = 6, + avx2 = 7, + avx = static_cast<int>(avx1), + lowest = static_cast<int>(sse2), + highest = static_cast<int>(avx2), +#endif +#ifdef CMT_ARCH_ARM + neon = 1, + neon64 = 2, + lowest = static_cast<int>(neon), + highest = static_cast<int>(neon64), +#endif + native = static_cast<int>(CMT_ARCH_NAME), + runtime = -1, +}; + +#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native + +template <cpu_t cpu> +using ccpu_t = cval_t<cpu_t, cpu>; + +template <cpu_t cpu> +constexpr ccpu_t<cpu> ccpu{}; + +namespace internal +{ +constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); } +constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); } + +#ifdef CMT_ARCH_X86 +constexpr auto cpu_list = + cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>; +#else +constexpr auto cpu_list = cvals<cpu_t, cpu_t::neon>; +#endif +} + +template <cpu_t cpu> +using cpuval_t = cval_t<cpu_t, cpu>; +template <cpu_t cpu> +constexpr auto cpuval = cpuval_t<cpu>{}; + +constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuval<cpu_t::native>); + +/// @brief Returns name of the cpu instruction set +__attribute__((unused)) static const char* cpu_name(cpu_t set) +{ + static const char* names[] = { "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" }; + if (set >= cpu_t::lowest && set <= cpu_t::highest) + return names[static_cast<size_t>(set)]; + return "-"; +} + +template <typename T> +constexpr inline const T& bitness_const(const T& x32, const T& x64) +{ +#ifdef CMT_ARCH_X64 + (void)x32; + return x64; +#else + (void)x64; + return x32; +#endif +} + +constexpr inline const char* bitness_const(const char* x32, const char* x64) +{ +#ifdef CMT_ARCH_X64 + (void)x32; + return x64; +#else + (void)x64; + return x32; +#endif +} + +constexpr size_t native_cache_alignment = 64; +constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1; +constexpr size_t maximum_vector_alignment = 32; +constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1; +constexpr size_t native_register_count = bitness_const(8, 16); + +constexpr size_t common_float_vector_size = 16; +constexpr size_t common_int_vector_size = 16; + +template <cpu_t c> +constexpr size_t native_float_vector_size = +#ifdef CMT_ARCH_X86 + c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size; +#endif +#ifdef CMT_ARCH_ARM +c == cpu_t::neon ? 16 : common_float_vector_size; +#endif +template <cpu_t c> +constexpr size_t native_int_vector_size = +#ifdef CMT_ARCH_X86 + c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size; +#endif +#ifdef CMT_ARCH_ARM +c == cpu_t::neon ? 16 : common_int_vector_size; +#endif + +/// @brief SIMD vector width for the given cpu instruction set +template <typename T, cpu_t c = cpu_t::native> +constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f + ? native_float_vector_size<c> / sizeof(T) + : native_int_vector_size<c> / sizeof(T)); + +template <cpu_t c> +constexpr size_t vector_width<void, c> = 0; + +namespace internal +{ + +template <cpu_t c> +constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>); + +template <cpu_t c> +constexpr bool fast_unaligned = +#ifdef CMT_ARCH_X86 + c >= cpu_t::avx1; +#else + false; +#endif + +template <cpu_t c> +constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1; + +template <typename T, cpu_t c> +constexpr inline size_t get_vector_width(size_t scale = 1) +{ + return scale * vector_width<T, c>; +} +template <typename T, cpu_t c> +constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale) +{ + return bitness_const(x32scale, x64scale) * vector_width<T, c>; +} + +template <typename T, cpu_t c> +constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>; + +template <typename T, cpu_t c> +constexpr size_t vector_capacity = native_register_count* vector_width<T, c>; + +template <typename T, cpu_t c> +constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4); +} +} diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp @@ -280,60 +280,6 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y) return static_cast<const unsigned char*>(x) - static_cast<const unsigned char*>(y); } -/// @brief An enumeration representing cpu instruction set -enum class cpu_t : int -{ - common = 0, -#ifdef CMT_ARCH_X86 - sse2 = 1, - sse3 = 2, - ssse3 = 3, - sse41 = 4, - sse42 = 5, - avx1 = 6, - avx2 = 7, - avx = static_cast<int>(avx1), - lowest = static_cast<int>(sse2), - highest = static_cast<int>(avx2), -#endif -#ifdef CMT_ARCH_ARM - neon = 1, - neon64 = 2, - lowest = static_cast<int>(neon), - highest = static_cast<int>(neon64), -#endif - native = static_cast<int>(CMT_ARCH_NAME), - runtime = -1, -}; - -#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native - -template <cpu_t cpu> -using ccpu_t = cval_t<cpu_t, cpu>; - -template <cpu_t cpu> -constexpr ccpu_t<cpu> ccpu{}; - -namespace internal -{ -constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); } -constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); } - -#ifdef CMT_ARCH_X86 -constexpr auto cpu_list = - cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>; -#else -constexpr auto cpu_list = cvals<cpu_t, cpu_t::neon>; -#endif -} - -template <cpu_t cpu> -using cpuval_t = cval_t<cpu_t, cpu>; -template <cpu_t cpu> -constexpr auto cpuval = cpuval_t<cpu>{}; - -constexpr auto cpu_all = cfilter(internal::cpu_list, internal::cpu_list >= cpuval<cpu_t::native>); - template <typename T> constexpr datatype typeclass = std::is_floating_point<typename compound_type_traits<T>::subtype>::value ? datatype::f @@ -556,15 +502,6 @@ CMT_INLINE constexpr static T implicit_cast(U&& value) #pragma clang diagnostic pop -/// @brief Returns name of the cpu instruction set -__attribute__((unused)) static const char* cpu_name(cpu_t set) -{ - static const char* names[] = { "sse2", "sse3", "ssse3", "sse41", "sse42", "avx1", "avx2" }; - if (set >= cpu_t::lowest && set <= cpu_t::highest) - return names[static_cast<size_t>(set)]; - return "-"; -} - #define KFR_FN_S(fn) \ template <typename Arg, typename... Args> \ CMT_INLINE enable_if_not_vec<Arg> fn(Arg arg, Args... args) \ @@ -623,55 +560,6 @@ constexpr size_t widthof() return compound_type_traits<T>::width; } -template <typename T> -constexpr inline const T& bitness_const(const T& x32, const T& x64) -{ -#ifdef CMT_ARCH_X64 - (void)x32; - return x64; -#else - (void)x64; - return x32; -#endif -} - -constexpr inline const char* bitness_const(const char* x32, const char* x64) -{ -#ifdef CMT_ARCH_X64 - (void)x32; - return x64; -#else - (void)x64; - return x32; -#endif -} - -constexpr size_t native_cache_alignment = 64; -constexpr size_t native_cache_alignment_mask = native_cache_alignment - 1; -constexpr size_t maximum_vector_alignment = 32; -constexpr size_t maximum_vector_alignment_mask = maximum_vector_alignment - 1; -constexpr size_t native_register_count = bitness_const(8, 16); - -constexpr size_t common_float_vector_size = 16; -constexpr size_t common_int_vector_size = 16; - -template <cpu_t c> -constexpr size_t native_float_vector_size = -#ifdef CMT_ARCH_X86 - c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size; -#endif -#ifdef CMT_ARCH_ARM -c == cpu_t::neon ? 16 : common_float_vector_size; -#endif -template <cpu_t c> -constexpr size_t native_int_vector_size = -#ifdef CMT_ARCH_X86 - c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size; -#endif -#ifdef CMT_ARCH_ARM -c == cpu_t::neon ? 16 : common_int_vector_size; -#endif - constexpr size_t infinite_size = static_cast<size_t>(-1); constexpr inline size_t size_add(size_t x, size_t y) @@ -738,52 +626,8 @@ using is_numeric = is_number<deep_subtype<T>>; template <typename... Ts> using is_numeric_args = and_t<is_numeric<Ts>...>; -/// @brief SIMD vector width for the given cpu instruction set -template <typename T, cpu_t c = cpu_t::native> -constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f - ? native_float_vector_size<c> / sizeof(T) - : native_int_vector_size<c> / sizeof(T)); - -template <cpu_t c> -constexpr size_t vector_width<void, c> = 0; - namespace internal { - -template <cpu_t c> -constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>); - -template <cpu_t c> -constexpr bool fast_unaligned = -#ifdef CMT_ARCH_X86 - c >= cpu_t::avx1; -#else - false; -#endif - -template <cpu_t c> -constexpr size_t native_vector_alignment_mask = native_vector_alignment<c> - 1; - -template <typename T, cpu_t c> -constexpr inline size_t get_vector_width(size_t scale = 1) -{ - return scale * vector_width<T, c>; -} -template <typename T, cpu_t c> -constexpr inline size_t get_vector_width(size_t x32scale, size_t x64scale) -{ - return bitness_const(x32scale, x64scale) * vector_width<T, c>; -} - -template <typename T, cpu_t c> -constexpr auto vector_width_range = csize<1> << csizeseq<ilog2(vector_width<T, c>) + 1>; - -template <typename T, cpu_t c> -constexpr size_t vector_capacity = native_register_count* vector_width<T, c>; - -template <typename T, cpu_t c> -constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4); - template <size_t width, typename Fn> CMT_INLINE void block_process_impl(size_t& i, size_t size, Fn&& fn) { diff --git a/sources.cmake b/sources.cmake @@ -36,6 +36,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/modzerobessel.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/platform.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp