kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 3e6daf2fc5363a8c84bffb91a7934cf859b6b055
parent efff98d4c12e0dac317b5cfd960a524e91c8c4a7
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Wed, 11 Oct 2023 09:26:32 +0100

Fix vec<bit<>> in MSVC and GCC

Diffstat:
Minclude/kfr/simd/impl/backend_clang.hpp | 16++++++++--------
Minclude/kfr/simd/impl/backend_generic.hpp | 40++++++++++++++++++++++------------------
Minclude/kfr/simd/impl/simd.hpp | 6------
Minclude/kfr/simd/operators.hpp | 4++--
Minclude/kfr/simd/shuffle.hpp | 4++--
Minclude/kfr/simd/types.hpp | 67+++++++++++++++++++++++++++++++++++++++++--------------------------
Minclude/kfr/simd/vec.hpp | 17+++++++++++++++--
Msources.cmake | 1+
Atests/unit/simd/logical.cpp | 29+++++++++++++++++++++++++++++
Mtests/unit/simd/vec.cpp | 13+++++++++++++
10 files changed, 133 insertions(+), 64 deletions(-)

diff --git a/include/kfr/simd/impl/backend_clang.hpp b/include/kfr/simd/impl/backend_clang.hpp @@ -51,13 +51,13 @@ KFR_INTRINSIC void simd_make(ctype_t<Tout>) = delete; template <typename Tout, typename Arg> KFR_INTRINSIC simd<Tout, 1> simd_make(ctype_t<Tout>, const Arg& arg) { - return (simd<Tout, 1>){ static_cast<unwrap_bit<Tout>>(arg) }; + return (simd<Tout, 1>){ unwrap_bit_value(arg) }; } template <typename Tout, typename... Args, size_t N = sizeof...(Args), KFR_ENABLE_IF(N > 1)> KFR_INTRINSIC simd<Tout, N> simd_make(ctype_t<Tout>, const Args&... args) { - return (simd<Tout, N>){ static_cast<unwrap_bit<Tout>>(args)... }; + return (simd<Tout, N>){ unwrap_bit_value(args)... }; } // @brief Returns vector with undefined value @@ -79,7 +79,7 @@ KFR_INTRINSIC simd<Tout, N> simd_zeros() template <typename Tout, size_t N> KFR_INTRINSIC simd<Tout, N> simd_allones() { - return special_constants<Tout>::allones(); + return unwrap_bit_value(special_constants<Tout>::allones()); } // @brief Converts input vector to vector with subtype Tout @@ -98,20 +98,20 @@ KFR_INTRINSIC simd<T, N> simd_bitcast(simd_cvt_t<T, T, N>, const simd<T, N>& x) template <typename T, size_t N, size_t index> KFR_INTRINSIC T simd_get_element(const simd<T, N>& value, csize_t<index>) { - return value[index]; + return wrap_bit_value<T>(value[index]); } template <typename T, size_t N, size_t index> KFR_INTRINSIC simd<T, N> simd_set_element(simd<T, N> value, csize_t<index>, T x) { - value[index] = x; + value[index] = unwrap_bit_value(x); return value; } template <typename T, size_t N> KFR_INTRINSIC simd<T, N> simd_broadcast(simd_t<T, N>, identity<T> value) { - return static_cast<unwrap_bit<T>>(value); + return unwrap_bit_value(value); } template <typename T, size_t N, size_t... indices, size_t Nout = sizeof...(indices)> @@ -178,13 +178,13 @@ using simd_storage = struct_with_alignment<simd<T, N>, A>; template <typename T, size_t N> KFR_INTRINSIC T simd_get_element(const simd<T, N>& value, size_t index) { - return value[index]; + return wrap_bit_value<T>(value[index]); } template <typename T, size_t N> KFR_INTRINSIC simd<T, N> simd_set_element(simd<T, N> value, size_t index, T x) { - value[index] = x; + value[index] = unwrap_bit_value(x); return value; } } // namespace intrinsics diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp @@ -458,7 +458,7 @@ KFR_INTRIN_SHUFFLE_SWAP(f32, 2, _mm_cvtsd_f64(_mm_castps_pd(KFR_swap_ps(_mm_castpd_ps(_mm_set1_pd(x.whole)))))) #else KFR_INTRIN_SHUFFLE_CONCAT(f32, 2, _mm_setr_ps(x.low, x.high, y.low, y.high)) -KFR_INTRIN_SHUFFLE_SWAP(f32, 2, simd<f32, 2>(x.high, x.low)) +KFR_INTRIN_SHUFFLE_SWAP(f32, 2, simd<f32, 2>{ x.high, x.low }) #endif #if defined CMT_COMPILER_IS_MSVC && defined CMT_ARCH_X32 @@ -1077,7 +1077,7 @@ KFR_INTRINSIC simd<T, N> from_simd_array(const simd_array<T, N>& x) CMT_NOEXCEPT template <typename T, size_t N, size_t... indices> KFR_INTRINSIC simd<T, N> from_simd_array_impl(const simd_array<T, N>& x, csizes_t<indices...>) CMT_NOEXCEPT { - return { static_cast<unwrap_bit<T>>(x.val[indices])... }; + return { unwrap_bit_value(x.val[indices])... }; } template <typename T, size_t N, KFR_ENABLE_IF(is_simd_small_array<simd<T, N>>)> @@ -1100,7 +1100,7 @@ KFR_INTRINSIC void simd_make(ctype_t<Tout>) CMT_NOEXCEPT = delete; template <typename Tout, typename Arg> KFR_INTRINSIC simd<Tout, 1> simd_make(ctype_t<Tout>, const Arg& arg) CMT_NOEXCEPT { - return simd<Tout, 1>{ static_cast<unwrap_bit<Tout>>(static_cast<Tout>(arg)) }; + return simd<Tout, 1>{ unwrap_bit_value(static_cast<Tout>(arg)) }; } template <typename T, size_t... indices, typename... Args, size_t N = sizeof...(indices)> @@ -1179,9 +1179,9 @@ KFR_INTRINSIC simd<Tout, Nout> simd_bitcast(simd_cvt_t<Tout, Tin, N>, const simd constexpr size_t Nlow = prev_poweroftwo(N - 1); return simd_concat<Tout, Nlow * Nout / N, (N - Nlow) * Nout / N>( simd_bitcast(simd_cvt_t<Tout, Tin, Nlow>{}, - simd_shuffle(simd_t<Tin, N>{}, x, csizeseq<Nlow>, overload_auto)), + unwrap_bit_value(simd_shuffle(simd_t<Tin, N>{}, x, csizeseq<Nlow>, overload_auto))), simd_bitcast(simd_cvt_t<Tout, Tin, N - Nlow>{}, - simd_shuffle(simd_t<Tin, N>{}, x, csizeseq<N - Nlow, Nlow>, overload_auto))); + unwrap_bit_value(simd_shuffle(simd_t<Tin, N>{}, x, csizeseq<N - Nlow, Nlow>, overload_auto)))); } template <typename T, size_t N> @@ -1193,11 +1193,11 @@ KFR_INTRINSIC const simd<T, N>& simd_bitcast(simd_cvt_t<T, T, N>, const simd<T, template <typename T, size_t N, size_t index> KFR_INTRINSIC T simd_get_element(const simd<T, N>& value, csize_t<index>) CMT_NOEXCEPT { - return simd_shuffle(simd_t<T, N>{}, value, csizes<index>, overload_auto); + return wrap_bit_value<T>(simd_shuffle(simd_t<T, N>{}, value, csizes<index>, overload_auto)); } template <typename T, size_t N, size_t index> -KFR_INTRINSIC simd<T, N> simd_set_element(simd<T, N> value, csize_t<index>, T x) CMT_NOEXCEPT +KFR_INTRINSIC simd<T, N> simd_set_element(simd<T, N> value, csize_t<index>, unwrap_bit<T> x) CMT_NOEXCEPT { not_optimized(CMT_FUNC_SIGNATURE); simd_array<T, N> arr = to_simd_array<T, N>(value); @@ -1239,7 +1239,7 @@ KFR_INTRINSIC simd<T, N + N> simd_shuffle(simd2_t<T, N, N>, const simd<T, N>& x, template <typename T> KFR_INTRINSIC simd<T, 1> simd_broadcast(simd_t<T, 1>, identity<T> value) CMT_NOEXCEPT { - return { static_cast<unwrap_bit<T>>(value) }; + return { unwrap_bit_value(value) }; } template <typename T, size_t N, KFR_ENABLE_IF(N >= 2), size_t Nlow = prev_poweroftwo(N - 1)> @@ -1267,7 +1267,7 @@ KFR_INTRINSIC simd<T, N / 2> simd_shuffle(simd_t<T, N>, const simd<T, N>& x, csi template <typename T, size_t N, size_t index> KFR_INTRINSIC T simd_shuffle(simd_t<T, N>, const simd<T, N>& x, csizes_t<index>, - overload_priority<6>) CMT_NOEXCEPT + overload_priority<6>) CMT_NOEXCEPT { return to_simd_array<T, N>(x).val[index]; } @@ -1310,7 +1310,7 @@ KFR_INTRINSIC simd<T, Nout> simd_shuffle(simd_t<T, N>, const simd<T, N>& x, csiz return from_simd_array<T, Nout>(simd_shuffle_generic<T, Nout, N>(xx, indices_array)); #else return from_simd_array<T, Nout>( - { (indices >= N ? T() : static_cast<T>(to_simd_array<T, N>(x).val[indices]))... }); + { (indices >= N ? T() : unwrap_bit_value(to_simd_array<T, N>(x).val[indices]))... }); #endif } @@ -1328,8 +1328,8 @@ KFR_INTRINSIC simd<T, Nout> simd_shuffle(simd2_t<T, N, N>, const simd<T, N>& x, #else return from_simd_array<T, Nout>( { (indices >= N * 2 ? T() - : indices >= N ? static_cast<T>(to_simd_array<T, N>(y).val[indices - N]) - : static_cast<T>(to_simd_array<T, N>(x).val[indices]))... }); + : indices >= N ? unwrap_bit_value(to_simd_array<T, N>(y).val[indices - N]) + : unwrap_bit_value(to_simd_array<T, N>(x).val[indices]))... }); #endif } @@ -1349,8 +1349,8 @@ KFR_INTRINSIC simd<T, Nout> simd_shuffle(simd2_t<T, N1, N2>, const simd<T, N1>& return from_simd_array<T, Nout>( { (indices > N1 + N2 ? T() - : indices >= N1 ? static_cast<T>(to_simd_array<T, N2>(y).val[indices - N1]) - : static_cast<T>(to_simd_array<T, N1>(x).val[indices]))... }); + : indices >= N1 ? unwrap_bit_value(to_simd_array<T, N2>(y).val[indices - N1]) + : unwrap_bit_value(to_simd_array<T, N1>(x).val[indices]))... }); #endif } @@ -1433,7 +1433,7 @@ KFR_INTRINSIC T simd_get_element(const simd<T, N>& value, size_t index) CMT_NOEX } template <typename T, size_t N> -KFR_INTRINSIC simd<T, N> simd_set_element(const simd<T, N>& value, size_t index, T x) CMT_NOEXCEPT +KFR_INTRINSIC simd<T, N> simd_set_element(const simd<T, N>& value, size_t index, unwrap_bit<T> x) CMT_NOEXCEPT { simd_array<T, N> arr = to_simd_array<T, N>(value); arr.val[index] = x; @@ -1441,9 +1441,9 @@ KFR_INTRINSIC simd<T, N> simd_set_element(const simd<T, N>& value, size_t index, } #define SIMD_TYPE_INTRIN(T, N, TO_SCALAR, FROM_SCALAR, FROM_BROADCAST, FROM_ZERO) \ - KFR_INTRINSIC T simd_to_scalar(simd_t<T, N>, const simd<T, N>& x) { return TO_SCALAR; } \ - KFR_INTRINSIC simd<T, N> simd_from_scalar(simd_t<T, N>, T x) { return FROM_SCALAR; } \ - KFR_INTRINSIC simd<T, N> simd_from_broadcast(simd_t<T, N>, T x) { return FROM_BROADCAST; } \ + KFR_INTRINSIC T simd_to_scalar(simd_t<T, N>, const simd<T, N>& x) { return TO_SCALAR; } \ + KFR_INTRINSIC simd<T, N> simd_from_scalar(simd_t<T, N>, unwrap_bit<T> x) { return FROM_SCALAR; } \ + KFR_INTRINSIC simd<T, N> simd_from_broadcast(simd_t<T, N>, unwrap_bit<T> x) { return FROM_BROADCAST; } \ KFR_INTRINSIC simd<T, N> simd_from_zero(simd_t<T, N>) { return FROM_ZERO; } #define SIMD_TYPE_INTRIN_EX(T, N, TO_SCALAR, FROM_SCALAR, FROM_BROADCAST, FROM_ZERO, GET_LOW, GET_HIGH, \ @@ -1496,7 +1496,11 @@ KFR_INTRINSIC simd<T, N> simd_from_halves(simd_t<T, N>, const simd<T, N / 2>& x, KFR_INTRINSIC simd<float, 4> simd_from_halves(simd_t<float, 4>, const simd<float, 2>& x, const simd<float, 2>& y) { +#ifndef KFR_f32x2_array return _mm_castpd_ps(_mm_setr_pd(x.whole, y.whole)); +#else + return _mm_setr_ps(x.low, x.high, y.low, y.high); +#endif } KFR_INTRINSIC simd<double, 2> simd_from_halves(simd_t<double, 2>, const simd<double, 1>& x, diff --git a/include/kfr/simd/impl/simd.hpp b/include/kfr/simd/impl/simd.hpp @@ -74,12 +74,6 @@ struct alignas(force_compiletime_size_t<alignment<T, N>()>) simd_array }; template <typename T, size_t N> -struct alignas(force_compiletime_size_t<alignment<T, N>()>) simd_array<bit<T>, N> -{ - bit_value<T> val[next_poweroftwo(N)]; -}; - -template <typename T, size_t N> struct simd_type; template <typename T> diff --git a/include/kfr/simd/operators.hpp b/include/kfr/simd/operators.hpp @@ -179,12 +179,12 @@ KFR_INTRINSIC mask<C, N> operator^(const mask<T1, N>& x, const mask<T2, N>& y) C template <typename T, size_t N> KFR_INTRINSIC mask<T, N> operator~(const mask<T, N>& x) CMT_NOEXCEPT { - return ~x.asvec(); + return mask<T, N>(~x.asvec()); } template <typename T, size_t N> KFR_INTRINSIC mask<T, N> operator!(const mask<T, N>& x) CMT_NOEXCEPT { - return ~x.asvec(); + return mask<T, N>(~x.asvec()); } KFR_INTRINSIC float bitwisenot(float x) { return fbitcast(~ubitcast(x)); } diff --git a/include/kfr/simd/shuffle.hpp b/include/kfr/simd/shuffle.hpp @@ -365,12 +365,12 @@ namespace internal template <typename T, size_t N> KFR_INTRINSIC mask<T, N> evenmask() { - return broadcast<N>(maskbits<T>(true), maskbits<T>(false)); + return mask<T, N>(broadcast<N>(maskbits<T>(true), maskbits<T>(false))); } template <typename T, size_t N> KFR_INTRINSIC mask<T, N> oddmask() { - return broadcast<N>(maskbits<T>(false), maskbits<T>(true)); + return mask<T, N>(broadcast<N>(maskbits<T>(false), maskbits<T>(true))); } } // namespace internal diff --git a/include/kfr/simd/types.hpp b/include/kfr/simd/types.hpp @@ -201,48 +201,37 @@ constexpr inline T maskbits(bool value) { return value ? special_constants<T>::allones() : special_constants<T>::allzeros(); } - template <typename T> -struct bit_value; +constexpr inline bool from_maskbits(T value) +{ + return bitcast_anything<itype<T>>(value) < 0; +} template <typename T> struct bit { - alignas(T) bool value; + T value; bit() CMT_NOEXCEPT = default; - constexpr bit(const bit_value<T>& value) CMT_NOEXCEPT : value(static_cast<bool>(value)) {} - - constexpr explicit bit(T value) CMT_NOEXCEPT : value(bitcast_anything<itype<T>>(value) < 0) {} - constexpr bit(bool value) CMT_NOEXCEPT : value(value) {} + constexpr bit(bool value) CMT_NOEXCEPT : value(maskbits<T>(value)) {} template <typename U> - constexpr bit(const bit<U>& value) CMT_NOEXCEPT : value(value.value) + constexpr bit(const bit<U>& value) CMT_NOEXCEPT : value(value.operator bool()) { } - constexpr operator bool() const CMT_NOEXCEPT { return value; } - constexpr explicit operator T() const CMT_NOEXCEPT { return maskbits<T>(value); } -}; - -template <typename T> -struct bit_value -{ - T value; - bit_value() CMT_NOEXCEPT = default; - - constexpr bit_value(const bit<T>& value) CMT_NOEXCEPT : bit_value(value.value) {} + constexpr operator bool() const CMT_NOEXCEPT { return bitcast_anything<itype<T>>(value) < 0; } - constexpr bit_value(T value) CMT_NOEXCEPT : value(value) {} - constexpr bit_value(bool value) CMT_NOEXCEPT : value(maskbits<T>(value)) {} + constexpr bit(T value) CMT_NOEXCEPT = delete; + constexpr operator T() const CMT_NOEXCEPT = delete; - template <typename U> - constexpr bit_value(const bit_value<U>& value) CMT_NOEXCEPT : bit_value(value.operator bool()) + constexpr bool operator==(const bit& other) const CMT_NOEXCEPT { + return operator bool() == other.operator bool(); } - - constexpr operator bool() const CMT_NOEXCEPT { return bitcast_anything<itype<T>>(value) < 0; } - constexpr explicit operator T() const CMT_NOEXCEPT { return value; } + constexpr bool operator!=(const bit& other) const CMT_NOEXCEPT { return !operator==(other); } + constexpr bool operator==(bool other) const CMT_NOEXCEPT { return operator bool() == other; } + constexpr bool operator!=(bool other) const CMT_NOEXCEPT { return !operator==(other); } }; template <typename T> @@ -272,11 +261,37 @@ struct unwrap_bit<bit<T>> template <typename T> using unwrap_bit = typename internal_generic::unwrap_bit<T>::type; + template <typename T> constexpr inline bool is_bit = false; template <typename T> constexpr inline bool is_bit<bit<T>> = true; +template <typename T> +CMT_INTRINSIC T unwrap_bit_value(const T& value) +{ + return value; +} +template <typename T> +CMT_INTRINSIC T unwrap_bit_value(const bit<T>& value) +{ + return value.value; +} + +template <typename T, KFR_ENABLE_IF(is_bit<T>)> +CMT_INTRINSIC T wrap_bit_value(const unwrap_bit<T>& value) +{ + T result; + result.value = value; + return result; +} + +template <typename T, KFR_ENABLE_IF(!is_bit<T>)> +CMT_INTRINSIC T wrap_bit_value(const T& value) +{ + return value; +} + namespace fn_generic { ///@copybrief cometa::pass_through diff --git a/include/kfr/simd/vec.hpp b/include/kfr/simd/vec.hpp @@ -261,7 +261,7 @@ struct alignas(internal::vec_alignment<T, N_>) vec // default KFR_MEM_INTRINSIC constexpr vec() CMT_NOEXCEPT {} -#if defined(_MSC_VER) && !defined(__clang__) +#ifdef CMT_COMPILER_IS_MSVC // MSVC Internal Compiler Error workaround // copy KFR_MEM_INTRINSIC constexpr vec(const vec& value) CMT_NOEXCEPT : v(value.v) {} @@ -295,7 +295,7 @@ struct alignas(internal::vec_alignment<T, N_>) vec KFR_ENABLE_IF(std::is_convertible_v<U, value_type>&& compound_type_traits<T>::is_scalar)> KFR_MEM_INTRINSIC vec(const U& s) CMT_NOEXCEPT : v(intrinsics::simd_broadcast(intrinsics::simd_t<unwrap_bit<ST>, SN>{}, - static_cast<unwrap_bit<ST>>(static_cast<ST>(s)))) + unwrap_bit_value(static_cast<ST>(s)))) { } @@ -332,6 +332,19 @@ struct alignas(internal::vec_alignment<T, N_>) vec { } + // from mask of the same type + template <typename U = T, KFR_ENABLE_IF(!is_bit<U> && compound_type_traits<T>::is_scalar)> + KFR_MEM_INTRINSIC explicit vec(const vec<bit<T>, N>& x) CMT_NOEXCEPT + : v(x.v) + { + } + // from vec to mask of the same type + template <typename U = T, KFR_ENABLE_IF(is_bit<U> && compound_type_traits<T>::is_scalar)> + KFR_MEM_INTRINSIC explicit vec(const vec<unwrap_bit<T>, N>& x) CMT_NOEXCEPT + : v(x.v) + { + } + // from lambda template <typename Fn, KFR_ENABLE_IF(std::is_invocable_r_v<T, Fn, size_t>)> KFR_MEM_INTRINSIC vec(Fn&& fn) CMT_NOEXCEPT diff --git a/sources.cmake b/sources.cmake @@ -522,6 +522,7 @@ set( ${PROJECT_SOURCE_DIR}/tests/unit/math/tan.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/abs.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/complex.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/simd/logical.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/min_max.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/operators.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/round.cpp diff --git a/tests/unit/simd/logical.cpp b/tests/unit/simd/logical.cpp @@ -0,0 +1,29 @@ +/** + * KFR (https://www.kfrlib.com) + * Copyright (C) 2016-2023 Dan Cazarin + * See LICENSE.txt for details + */ + +#include <kfr/simd/logical.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +TEST(logical_all) +{ + CHECK(all(mask<f32, 4>{ true, true, true, true }) == true); + CHECK(all(mask<f32, 4>{ true, false, true, false }) == false); + CHECK(all(mask<f32, 4>{ false, true, false, true }) == false); + CHECK(all(mask<f32, 4>{ false, false, false, false }) == false); +} +TEST(logical_any) +{ + CHECK(any(mask<f32, 4>{ true, true, true, true }) == true); + CHECK(any(mask<f32, 4>{ true, false, true, false }) == true); + CHECK(any(mask<f32, 4>{ false, true, false, true }) == true); + CHECK(any(mask<f32, 4>{ false, false, false, false }) == false); +} +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/simd/vec.cpp b/tests/unit/simd/vec.cpp @@ -208,6 +208,19 @@ TEST(masks) CHECK(float(v[1]) == maskbits<float>(true)); CHECK(float(v[2]) == maskbits<float>(false)); CHECK(float(v[3]) == maskbits<float>(true)); + + CHECK(bitcast_anything<std::array<int32_t, 1>>(mask<i32, 1>{ true }) == std::array<int32_t, 1>{ -1 }); + CHECK(bitcast_anything<std::array<int32_t, 2>>(mask<i32, 2>{ true, true }) == + std::array<int32_t, 2>{ -1, -1 }); + CHECK(bitcast_anything<std::array<int32_t, 4>>(mask<i32, 4>{ true, true, true, true }) == + std::array<int32_t, 4>{ -1, -1, -1, -1 }); + + CHECK(bitcast_anything<u8x16>(mask<f32, 4>{ true, true, true, true }) == + u8x16{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }); + CHECK(bitcast_anything<u8x16>(bitcast<bit<u8>>(mask<i32, 4>{ true, true, true, true })) == + u8x16{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }); + CHECK(bitcast_anything<u8x16>(bitcast<bit<u8>>(mask<f32, 4>{ true, true, true, true })) == + u8x16{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }); } TEST(vec_deduction)