kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 73c5446bd8040f030b87da4392e41643e54b6eeb
parent 8de4b0fe7e8c96548de8dd62c698399fdd58f8e6
Author: samuriddle@gmail.com <samuriddle@gmail.com>
Date:   Sun,  7 Aug 2016 07:09:20 +0300

New conversion functions and refactoring

Diffstat:
Minclude/kfr/base/asin_acos.hpp | 4++--
Minclude/kfr/base/complex.hpp | 66+++++++++++++++++++++++++++++++++++-------------------------------
Minclude/kfr/base/expression.hpp | 15+++++++--------
Minclude/kfr/base/generators.hpp | 10+++++-----
Minclude/kfr/base/log_exp.hpp | 2+-
Minclude/kfr/base/operators.hpp | 115+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Minclude/kfr/base/pointer.hpp | 2+-
Minclude/kfr/base/random.hpp | 8++++----
Minclude/kfr/base/round.hpp | 20++++++++++++++++++++
Minclude/kfr/base/select.hpp | 7+++----
Minclude/kfr/base/types.hpp | 18++++++++++++++----
Minclude/kfr/base/univector.hpp | 6+++---
Minclude/kfr/base/vec.hpp | 256+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Minclude/kfr/cometa.hpp | 20++++++++++++--------
Minclude/kfr/dft/ft.hpp | 3+++
Minclude/kfr/dsp/biquad.hpp | 135+++++++++++++++++++++++++++++++++++++------------------------------------------
Minclude/kfr/dsp/fir.hpp | 8++++++--
Minclude/kfr/dsp/goertzel.hpp | 6+++---
Minclude/kfr/dsp/units.hpp | 2+-
Minclude/kfr/dsp/window.hpp | 26+++++++++++++-------------
Minclude/kfr/io/file.hpp | 4++--
Mtests/complex_test.cpp | 6+++---
22 files changed, 417 insertions(+), 322 deletions(-)

diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp @@ -36,14 +36,14 @@ namespace intrinsics template <typename T, size_t N, typename Tout = flt_type<T>> KFR_SINTRIN vec<Tout, N> asin(const vec<T, N>& x) { - const vec<Tout, N> xx = cast<Tout>(x); + const vec<Tout, N> xx = x; return atan2(xx, sqrt(Tout(1) - xx * xx)); } template <typename T, size_t N, typename Tout = flt_type<T>> KFR_SINTRIN vec<Tout, N> acos(const vec<T, N>& x) { - const vec<Tout, N> xx = cast<Tout>(x); + const vec<Tout, N> xx = x; return atan2(sqrt(Tout(1) - xx * xx), xx); } KFR_I_CONVERTER(asin) diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp @@ -100,10 +100,12 @@ namespace cometa template <typename T> struct compound_type_traits<kfr::complex<T>> { - constexpr static size_t width = 2; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; + constexpr static size_t width = 2; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::complex<U>; template <typename U> @@ -157,21 +159,21 @@ struct vec_op<complex<T>> : private vec_op<T> template <typename T, size_t N> KFR_INLINE vec<complex<T>, N> cdupreal(const vec<complex<T>, N>& x) { - return subcast<complex<T>>(dupeven(subcast<T>(x))); + return compcast<complex<T>>(dupeven(compcast<T>(x))); } KFR_FN(cdupreal) template <typename T, size_t N> KFR_INLINE vec<complex<T>, N> cdupimag(const vec<complex<T>, N>& x) { - return subcast<complex<T>>(dupodd(subcast<T>(x))); + return compcast<complex<T>>(dupodd(compcast<T>(x))); } KFR_FN(cdupimag) template <typename T, size_t N> KFR_INLINE vec<complex<T>, N> cswapreim(const vec<complex<T>, N>& x) { - return subcast<complex<T>>(swap<2>(subcast<T>(x))); + return compcast<complex<T>>(swap<2>(compcast<T>(x))); } KFR_FN(cswapreim) @@ -205,41 +207,43 @@ template <typename T> struct is_complex_impl<complex<T>> : std::true_type { }; -} - -// real to complex -template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept -{ - const vec<subtype<To>, N> casted = cast<subtype<To>>(value); - return subcast<To>(interleave(casted, zerovector(casted))); -} -// complex to complex -template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<complex<From>, N>& value) noexcept +// vector<complex> to vector<complex> +template <typename To, typename From, size_t N> +struct conversion<vec<complex<To>, N>, vec<complex<From>, N>> { - return subcast<To>(cast<subtype<To>>(subcast<From>(value))); -} + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<complex<To>, N> cast(const vec<complex<From>, N>& value) + { + return builtin_convertvector<complex<To>>(value); + } +}; -// complex to real -template <typename To, typename From, size_t N, KFR_ENABLE_IF(!internal::is_complex_impl<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<complex<From>, N>& value) noexcept +// vector to vector<complex> +template <typename To, typename From, size_t N> +struct conversion<vec<complex<To>, N>, vec<From, N>> { - static_assert(sizeof(To) == 0, "Can't cast complex to real"); - return {}; + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<complex<To>, N> cast(const vec<From, N>& value) + { + const vec<To, N> casted = static_cast<vec<To, N>>(value); + return *interleave(casted, zerovector(casted)); + } +}; } template <typename T, size_t N> constexpr KFR_INLINE vec<complex<T>, N / 2> ccomp(const vec<T, N>& x) { - return subcast<complex<T>>(x); + return compcast<complex<T>>(x); } template <typename T, size_t N> constexpr KFR_INLINE vec<T, N * 2> cdecom(const vec<complex<T>, N>& x) { - return subcast<T>(x); + return compcast<T>(x); } template <typename T> @@ -250,7 +254,7 @@ constexpr KFR_INLINE T real(const complex<T>& value) template <typename T, size_t N> constexpr KFR_INLINE vec<T, N> real(const vec<complex<T>, N>& value) { - return even(subcast<T>(value)); + return even(compcast<T>(value)); } template <typename T> @@ -273,7 +277,7 @@ constexpr KFR_INLINE T imag(const complex<T>& value) template <typename T, size_t N> constexpr KFR_INLINE vec<T, N> imag(const vec<complex<T>, N>& value) { - return odd(subcast<T>(value)); + return odd(compcast<T>(value)); } KFR_FN(imag) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> @@ -285,7 +289,7 @@ KFR_INLINE internal::expression_function<fn_imag, E1> imag(E1&& x) template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, T2>> constexpr KFR_INLINE vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0)) { - return subcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag))); + return compcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag))); } template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>> diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -99,8 +99,7 @@ protected: private: template <typename Arg, size_t N, typename Tin, - typename Tout1 = conditional<is_generic<Arg>::value, Tin, typename decay<Arg>::value_type>, - typename Tout = Tout1> + typename Tout = conditional<is_generic<Arg>::value, Tin, value_type_of<Arg>>> KFR_INLINE vec_t<Tout, N> vec_t_for() const { return {}; @@ -112,8 +111,8 @@ private: constexpr size_t Nin = N * ratio::input / ratio::output; using Tout = conditional<is_same<generic, value_type>::value, T, common_type<T, value_type>>; - return cast<T>(fn(cast<Tout>(std::get<indices>(this->args)( - cinput, index * ratio::input / ratio::output, vec_t_for<Args, Nin, Tout>()))...)); + return fn(std::get<indices>(this->args)(cinput, index * ratio::input / ratio::output, + vec_t_for<Args, Nin, Tout>())...); } template <size_t... indices> KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) @@ -149,7 +148,7 @@ struct expression_scalar : input_expression template <typename U, size_t N> KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const { - return resize<N>(cast<U>(val)); + return resize<N>(static_cast<vec<U, width>>(val)); } }; @@ -204,7 +203,7 @@ KFR_INLINE void process_cycle(OutFn&& outfn, const Fn& fn, size_t& i, size_t siz KFR_LOOP_NOUNROLL for (; i < count; i += width) { - outfn(coutput, i, cast<Tout>(fn(cinput, i, vec_t<Tin, width>()))); + outfn(coutput, i, fn(cinput, i, vec_t<Tin, width>())); } } } @@ -269,7 +268,7 @@ struct expressoin_typed : input_expression template <typename U, size_t N> KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { - return cast<U>(e1(cinput, index, vec_t<T, N>())); + return e1(cinput, index, vec_t<T, N>()); } E1 e1; }; @@ -286,7 +285,7 @@ struct expressoin_sized : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { auto val = e1(cinput, index, vec_t<T, N>()); - return cast<U>(val); + return val; } constexpr size_t size() const noexcept { return m_size; } diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp @@ -43,7 +43,7 @@ struct generator : input_expression template <typename U, size_t N> KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const { - return cast<U>(generate(t)); + return generate(t); } void resync(T start) const { ptr_cast<Class>(this)->sync(start); } @@ -108,7 +108,7 @@ protected: T vstep; }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP> struct generator_exp : generator<T, width, generator_exp<T, width>> { generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1) @@ -125,7 +125,7 @@ protected: T vstep; }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP> struct generator_exp2 : generator<T, width, generator_exp2<T, width>> { generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1) @@ -142,7 +142,7 @@ protected: T vstep; }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP> struct generator_cossin : generator<T, width, generator_cossin<T, width>> { generator_cossin(T start, T step) @@ -167,7 +167,7 @@ protected: } }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4), KFR_ARCH_DEP> struct generator_sin : generator<T, width, generator_sin<T, width>> { generator_sin(T start, T step) diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp @@ -81,7 +81,7 @@ KFR_SINTRIN vec<f64, N> vldexpk(const vec<f64, N>& x, const vec<i64, N>& q) template <typename T, size_t N> KFR_SINTRIN vec<T, N> logb(const vec<T, N>& x) { - return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1)); + return select(x == T(), -c_infinity<T>, static_cast<vec<T, N>>(vilogbp1(x) - 1)); } template <size_t N> diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp @@ -81,9 +81,7 @@ KFR_INLINE internal::expression_function<fn_add, E1, E2> add(E1&& x, E2&& y) template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> KFR_INLINE internal::expression_function<fn_add, E1> add(E1&& x, E2&& y, E3&& z) { - return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) - - }; + return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } template <typename T1, typename T2> @@ -101,9 +99,7 @@ KFR_FN(sub) template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> KFR_INLINE internal::expression_function<fn_sub, E1, E2> sub(E1&& x, E2&& y) { - return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y) - - }; + return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y) }; } template <typename T1> @@ -111,10 +107,10 @@ constexpr inline T1 mul(T1 x) { return x; } -template <typename T1, typename T2, typename... Ts> -constexpr inline common_type<T1, T2, Ts...> mul(T1 x, T2 y, Ts... rest) +template <typename T1, typename T2, typename... Ts, typename Tout = common_type<T1, T2, Ts...>> +constexpr inline Tout mul(T1 x, T2 y, Ts... rest) { - return x * mul(std::forward<T2>(y), std::forward<Ts>(rest)...); + return static_cast<Tout>(x) * static_cast<Tout>(mul(std::forward<T2>(y), std::forward<Ts>(rest)...)); } template <typename T> @@ -156,9 +152,7 @@ KFR_FN(cub) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> KFR_INLINE internal::expression_function<fn_cub, E1> cub(E1&& x) { - return { fn_cub(), std::forward<E1>(x) - - }; + return { fn_cub(), std::forward<E1>(x) }; } template <typename T> @@ -192,30 +186,22 @@ KFR_FN(pow5) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> KFR_INLINE internal::expression_function<fn_pow2, E1> pow2(E1&& x) { - return { fn_pow2(), std::forward<E1>(x) - - }; + return { fn_pow2(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> KFR_INLINE internal::expression_function<fn_pow3, E1> pow3(E1&& x) { - return { fn_pow3(), std::forward<E1>(x) - - }; + return { fn_pow3(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> KFR_INLINE internal::expression_function<fn_pow4, E1> pow4(E1&& x) { - return { fn_pow4(), std::forward<E1>(x) - - }; + return { fn_pow4(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> KFR_INLINE internal::expression_function<fn_pow5, E1> pow5(E1&& x) { - return { fn_pow5(), std::forward<E1>(x) - - }; + return { fn_pow5(), std::forward<E1>(x) }; } /// Raise x to the power base $x^{base}$ @@ -265,24 +251,24 @@ KFR_FN(sqrsum) KFR_FN(sqrdiff) /// Division -template <typename T1, typename T2> -inline common_type<T1, T2> div(T1 x, T2 y) +template <typename T1, typename T2, typename Tout = common_type<T1, T2>> +inline Tout div(const T1& x, const T2& y) { - return x / y; + return static_cast<Tout>(x) / static_cast<Tout>(y); } KFR_FN(div) /// Remainder -template <typename T1, typename T2> -inline common_type<T1, T2> rem(T1 x, T2 y) +template <typename T1, typename T2, typename Tout = common_type<T1, T2>> +inline Tout rem(const T1& x, const T2& y) { - return x % y; + return static_cast<Tout>(x) % static_cast<Tout>(y); } KFR_FN(rem) /// Negation template <typename T1> -inline T1 neg(T1 x) +inline T1 neg(const T1& x) { return -x; } @@ -290,7 +276,7 @@ KFR_FN(neg) /// Bitwise Not template <typename T1> -inline T1 bitwisenot(T1 x) +inline T1 bitwisenot(const T1& x) { return ~x; } @@ -499,26 +485,6 @@ constexpr KFR_INLINE vec<T, N> copysign(const vec<T, N>& x, const vec<T, N>& y) return (x & internal::highbitmask<T>) | (y & internal::highbitmask<T>); } -template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> -KFR_INLINE vec<T, N> fmod(const vec<T, N>& x, const vec<T, N>& y) -{ - return x - cast<itype<T>>(x / y) * y; -} - -KFR_FN_S(fmod) -KFR_FN(fmod) - -template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> -constexpr KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) -{ - return x % y; -} -template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> -KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) -{ - return fmod(x, y); -} - template <typename T, size_t N> KFR_INLINE mask<T, N> isnan(const vec<T, N>& x) { @@ -695,4 +661,49 @@ KFR_EXPR_BINARY(fn_less, <) KFR_EXPR_BINARY(fn_greater, >) KFR_EXPR_BINARY(fn_lessorequal, <=) KFR_EXPR_BINARY(fn_greaterorequal, >=) + +template <typename T, size_t N1, size_t... Ns> +vec<vec<T, sizeof...(Ns) + 1>, N1> packtranspose(const vec<T, N1>& x, const vec<T, Ns>&... rest) +{ + const vec<T, N1*(sizeof...(Ns) + 1)> t = transpose<N1>(concat(x, rest...)); + return compcast<vec<T, sizeof...(Ns) + 1>>(t); +} + +KFR_FN(packtranspose) + +namespace internal +{ +template <typename... E> +struct expression_pack : expression<E...>, output_expression +{ + expression_pack(E&&... e) : expression<E...>(std::forward<E>(e)...) {} + using value_type = vec<common_type<value_type_of<E>...>, sizeof...(E)>; + using size_type = typename expression<E...>::size_type; + constexpr size_type size() const noexcept { return expression<E...>::size(); } + + template <typename U, size_t N> + KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + { + return this->call(fn_packtranspose(), index, x); + } + template <typename U, size_t N> + KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) + { + output(index, x, csizeseq<sizeof...(E)>); + } + +private: + template <typename U, size_t N, size_t... indices> + void output(size_t index, const vec<U, N>& x, csizes_t<indices...>) + { + swallow{ (std::get<indices>(this->args)(coutput, index, x[indices]), void(), 0)... }; + } +}; +} + +template <typename... E, KFR_ENABLE_IF(is_input_expressions<E...>::value)> +internal::expression_pack<internal::arg<E>...> pack(E&&... e) +{ + return internal::expression_pack<internal::arg<E>...>(std::forward<E>(e)...); +} } diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp @@ -82,7 +82,7 @@ struct expression_pointer : input_expression constexpr size_t findex = ilog2(N); static_assert(N <= maxwidth, "N is greater than maxwidth"); func_t func = reinterpret_cast<func_t>(vtable->get(csize<2 + findex>)); - vec<U, N> result = cast<U>(func(instance, index)); + vec<U, N> result = func(instance, index); return result; } KFR_INLINE void begin_block(size_t size) const diff --git a/include/kfr/base/random.hpp b/include/kfr/base/random.hpp @@ -114,8 +114,8 @@ inline enable_if_not_f<vec<T, N>> random_range(random_bit_generator& gen, T min, using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>; vec<T, N> u = random_uniform<T, N>(gen); - const vec<big_type, N> tmp = cast<big_type>(u); - return cast<T>((tmp * (max - min) + min) >> typebits<T>::bits); + const vec<big_type, N> tmp = u; + return (tmp * (max - min) + min) >> typebits<T>::bits; } namespace internal @@ -128,7 +128,7 @@ struct expression_random_uniform : input_expression template <typename U, size_t N> vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const { - return cast<U>(random_uniform<T, N>(gen)); + return random_uniform<T, N>(gen); } mutable random_bit_generator gen; }; @@ -146,7 +146,7 @@ struct expression_random_range : input_expression template <typename U, size_t N> vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const { - return cast<U>(random_range<N, T>(gen, min, max)); + return random_range<N, T>(gen, min, max); } mutable random_bit_generator gen; const T min; diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp @@ -318,6 +318,26 @@ KFR_INTRIN internal::expression_function<fn::itrunc, E1> itrunc(E1&& x) { return { fn::itrunc(), std::forward<E1>(x) }; } + +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_INLINE vec<T, N> fmod(const vec<T, N>& x, const vec<T, N>& y) +{ + return x - trunc(x / y) * y; +} + +KFR_FN_S(fmod) +KFR_FN(fmod) + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +constexpr KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) +{ + return x % y; +} +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) +{ + return fmod(x, y); +} } #undef KFR_mm_trunc_ps diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp @@ -180,9 +180,9 @@ KFR_SINTRIN vec<T, N> select(const mask<T, N>& m, const vec<T, N>& x, const vec< // fallback template <typename T, size_t N> -KFR_SINTRIN vec<T, N> select(mask<T, N> m, const vec<T, N>& x, const vec<T, N>& y) +KFR_SINTRIN vec<T, N> select(const mask<T, N>& m, const vec<T, N>& x, const vec<T, N>& y) { - return y ^ ((x ^ y) & m); + return y ^ ((x ^ y) & m.asvec()); } #endif } @@ -193,8 +193,7 @@ template <typename T1, size_t N, typename T2, typename T3, KFR_ENABLE_IF(is_nume KFR_INTRIN vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y) { static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types"); - return intrinsics::select(bitcast<Tout>(m).asmask(), static_cast<vec<Tout, N>>(x), - static_cast<vec<Tout, N>>(y)); + return intrinsics::select(bitcast<Tout>(m), static_cast<vec<Tout, N>>(x), static_cast<vec<Tout, N>>(y)); } template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp @@ -349,6 +349,8 @@ enum class cpu_t : int runtime = -1, }; +#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native + template <cpu_t cpu> using ccpu_t = cval_t<cpu_t, cpu>; @@ -578,6 +580,12 @@ constexpr inline static const T* derived_cast(const U* ptr) return static_cast<const T*>(ptr); } +template <typename T, typename U> +constexpr inline static T implicit_cast(U&& value) +{ + return std::forward<T>(value); +} + #pragma clang diagnostic pop __attribute__((unused)) static const char* cpu_name(cpu_t set) @@ -781,10 +789,12 @@ namespace cometa template <typename T, size_t N> struct compound_type_traits<kfr::vec_t<T, N>> { - constexpr static size_t width = N; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::vec_t<U, N>; diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp @@ -43,13 +43,13 @@ struct univector_base : input_expression, output_expression KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& value) { T* data = derived_cast<Class>(this)->data(); - write(ptr_cast<T>(data) + index, cast<T>(value)); + write(ptr_cast<T>(data) + index, vec<T, N>(value)); } template <typename U, size_t N> KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { const T* data = derived_cast<Class>(this)->data(); - return cast<U>(read<N>(ptr_cast<T>(data) + index)); + return static_cast<vec<U, N>>(read<N>(ptr_cast<T>(data) + index)); } template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)> @@ -197,7 +197,7 @@ struct univector<T, tag_array_ref> : array_ref<T>, univector_base<T, univector<T constexpr static bool is_array_ref = true; constexpr static bool is_vector = false; constexpr static bool is_aligned = false; - using value_type = T; + using value_type = remove_const<T>; using univector_base<T, univector>::operator=; }; diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp @@ -99,7 +99,7 @@ struct vec_ptr template <typename To, typename From, size_t N, KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value), size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width> -constexpr KFR_INLINE vec<To, Nout> subcast(const vec<From, N>& value) noexcept +constexpr KFR_INLINE vec<To, Nout> compcast(const vec<From, N>& value) noexcept { return *value; } @@ -154,8 +154,8 @@ template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>: KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x, const vec<T, N>& y) { - return subcast<T>( - shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y))); + return compcast<T>(shufflevector(inflate(csize<widthof<T>()>, indices), compcast<subtype<T>>(x), + compcast<subtype<T>>(y))); } template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N> @@ -225,14 +225,90 @@ constexpr swiz<14> s14{}; constexpr swiz<15> s15{}; } -template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To cast(From value) noexcept +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" + +template <size_t N, typename T> +constexpr KFR_INLINE vec<T, N> broadcast(T x) +{ + return (simd<T, N>)(x); +} + +#pragma clang diagnostic pop + +namespace internal +{ + +template <typename To, typename From, size_t N, typename Tsub = deep_subtype<To>, + size_t Nout = N* compound_type_traits<To>::deep_width> +constexpr KFR_INLINE vec<To, N> builtin_convertvector(const vec<From, N>& value) noexcept { - return static_cast<To>(value); + return __builtin_convertvector(*value, simd<Tsub, Nout>); } -template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To bitcast(From value) noexcept + +// scalar to scalar +template <typename To, typename From> +struct conversion { + static_assert(std::is_convertible<From, To>::value, ""); + static To cast(const From& value) { return value; } +}; + +// vector to vector +template <typename To, typename From, size_t N> +struct conversion<vec<To, N>, vec<From, N>> +{ + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<To, N> cast(const vec<From, N>& value) { return builtin_convertvector<To>(value); } +}; + +// vector<vector> to vector<vector> +template <typename To, typename From, size_t N1, size_t N2> +struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, N1>, N2>> +{ + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value) + { + return builtin_convertvector<vec<To, N1>>(value); + } +}; + +// scalar to vector +template <typename To, typename From, size_t N> +struct conversion<vec<To, N>, From> +{ + static_assert(std::is_convertible<From, To>::value, ""); + static vec<To, N> cast(const From& value) { return broadcast<N>(static_cast<To>(value)); } +}; + +// mask to mask +template <typename To, typename From, size_t N> +struct conversion<mask<To, N>, mask<From, N>> +{ + static_assert(sizeof(To) == sizeof(From), ""); + static mask<To, N> cast(const mask<From, N>& value) { return reinterpret_cast<simd<To, N>>(*value); } +}; +} + +template <typename From, size_t N, typename Tsub = deep_subtype<From>, + size_t Nout = N * sizeof(From) / sizeof(Tsub)> +constexpr KFR_INLINE vec<Tsub, Nout> flatten(const vec<From, N>& value) noexcept +{ + return *value; +} + +template <typename To, typename From, typename Tout = deep_rebind<From, To>> +constexpr KFR_INLINE Tout cast(const From& value) noexcept +{ + return static_cast<Tout>(value); +} + +template <typename To, typename From> +constexpr KFR_INLINE To bitcast(const From& value) noexcept +{ + static_assert(sizeof(From) == sizeof(To), "bitcast: Incompatible types"); union { From from; To to; @@ -240,43 +316,34 @@ constexpr KFR_INLINE To bitcast(From value) noexcept return u.to; } -template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To ubitcast(From value) noexcept +template <typename To, typename From, size_t N, size_t Nout = N * sizeof(From) / sizeof(To)> +constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept { - return bitcast<To>(value); + return reinterpret_cast<typename vec<To, Nout>::simd_t>(*value); } -template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To ibitcast(From value) noexcept +template <typename To, typename From, size_t N, size_t Nout = N * sizeof(From) / sizeof(To)> +constexpr KFR_INLINE mask<To, Nout> bitcast(const mask<From, N>& value) noexcept { - return bitcast<To>(value); + return reinterpret_cast<typename mask<To, Nout>::simd_t>(*value); } -template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To fbitcast(From value) noexcept +template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To ubitcast(const From& value) noexcept { return bitcast<To>(value); } -template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept -{ - return __builtin_convertvector(*value, simd<To, N>); -} -template <typename To, typename From, simdindex N> -constexpr KFR_INLINE simd<To, N> cast(const simd<From, N>& value) noexcept -{ - return __builtin_convertvector(value, simd<To, N>); -} -template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept +template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To ibitcast(const From& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(*value); + return bitcast<To>(value); } -template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> bitcast(const simd<From, N>& value) noexcept + +template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr KFR_INLINE To fbitcast(const From& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(value); + return bitcast<To>(value); } template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> @@ -297,27 +364,6 @@ constexpr KFR_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept return reinterpret_cast<simd<To, Nout>>(*value); } -template <typename From, simdindex N, typename To = utype<From>, - simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> ubitcast(const simd<From, N>& value) noexcept -{ - return reinterpret_cast<simd<To, Nout>>(value); -} - -template <typename From, simdindex N, typename To = itype<From>, - simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> ibitcast(const simd<From, N>& value) noexcept -{ - return reinterpret_cast<simd<To, Nout>>(value); -} - -template <typename From, simdindex N, typename To = ftype<From>, - simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> fbitcast(const simd<From, N>& value) noexcept -{ - return reinterpret_cast<simd<To, Nout>>(value); -} - constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); } template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)> @@ -345,17 +391,6 @@ KFR_INLINE vec<T, Nout> repeat(const vec<T, N>& x) } KFR_FN(repeat) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wold-style-cast" - -template <size_t N, typename T> -constexpr KFR_INLINE vec<T, N> broadcast(T x) -{ - return (simd<T, N>)(x); -} - -#pragma clang diagnostic pop - template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)> KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x) { @@ -422,7 +457,8 @@ private: template <typename T> struct vec_op { - using scalar_type = subtype<T>; + using scalar_type = subtype<T>; + using uscalar_type = utype<scalar_type>; template <simdindex N> constexpr static simd<scalar_type, N> add(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept @@ -467,53 +503,56 @@ struct vec_op template <simdindex N> constexpr static simd<scalar_type, N> band(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(ubitcast(x) & ubitcast(y)); + return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) & + reinterpret_cast<simd<uscalar_type, N>>(y)); } template <simdindex N> constexpr static simd<scalar_type, N> bor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(ubitcast(x) | ubitcast(y)); + return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) | + reinterpret_cast<simd<uscalar_type, N>>(y)); } template <simdindex N> constexpr static simd<scalar_type, N> bxor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(ubitcast(x) ^ ubitcast(y)); + return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) ^ + reinterpret_cast<simd<uscalar_type, N>>(y)); } template <simdindex N> constexpr static simd<scalar_type, N> bnot(simd<scalar_type, N> x) noexcept { - return bitcast<scalar_type>(~ubitcast(x)); + return reinterpret_cast<simd<scalar_type, N>>(~reinterpret_cast<simd<uscalar_type, N>>(x)); } template <simdindex N> constexpr static simd<scalar_type, N> eq(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x == y); + return reinterpret_cast<simd<scalar_type, N>>(x == y); } template <simdindex N> constexpr static simd<scalar_type, N> ne(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x != y); + return reinterpret_cast<simd<scalar_type, N>>(x != y); } template <simdindex N> constexpr static simd<scalar_type, N> lt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x < y); + return reinterpret_cast<simd<scalar_type, N>>(x < y); } template <simdindex N> constexpr static simd<scalar_type, N> gt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x > y); + return reinterpret_cast<simd<scalar_type, N>>(x > y); } template <simdindex N> constexpr static simd<scalar_type, N> le(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x <= y); + return reinterpret_cast<simd<scalar_type, N>>(x <= y); } template <simdindex N> constexpr static simd<scalar_type, N> ge(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x >= y); + return reinterpret_cast<simd<scalar_type, N>>(x >= y); } }; @@ -554,7 +593,8 @@ constexpr KFR_INLINE vec<T, N> make_vector(cvals_t<T, Values...>) KFR_FN(make_vector) template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1), - typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>> + typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>, + KFR_ENABLE_IF(is_numeric<SubType>::value)> constexpr KFR_INLINE vec<SubType, N> pack(const Arg& x, const Args&... rest) { return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x), @@ -567,6 +607,7 @@ struct vec : vec_t<T, N> { static_assert(N > 0 && N <= 256, "Invalid vector size"); + using UT = utype<T>; using value_type = T; using scalar_type = subtype<T>; constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; } @@ -582,6 +623,10 @@ struct vec : vec_t<T, N> : v(*internal_read_write::read<N, false>(value.data())) { } + constexpr KFR_INLINE vec(const array_ref<const T>& value) noexcept + : v(*internal_read_write::read<N, false>(value.data())) + { + } template <typename U, KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width > 1)> constexpr KFR_INLINE vec(const U& value) noexcept @@ -690,10 +735,10 @@ struct vec : vec_t<T, N> using array_t = T (&)[N]; KFR_INLINE array_t arr() { return ref_cast<array_t>(v); } - template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value)> + template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value && !std::is_same<U, vec>::value)> constexpr operator vec<U, N>() const noexcept { - return cast<U>(*this); + return internal::conversion<vec<U, N>, vec<T, N>>::cast(*this); } private: @@ -730,6 +775,7 @@ private: template <typename T, size_t N> struct mask : public vec<T, N> { + using UT = utype<T>; using type = T; constexpr static size_t width = N; @@ -758,23 +804,19 @@ struct mask : public vec<T, N> { } - // template <typename M, typename = u8[sizeof(T) == sizeof(M)]> - // constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) - // { - // } - constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); } - constexpr KFR_INLINE mask operator&(const vec<T, N>& x) const + friend constexpr KFR_INLINE mask operator&(const mask& x, const mask& y) { - return bitcast<T>(ubitcast(this->v) & ubitcast(x.v)); + return vec_op<T>::band(x.v, y.v); } - constexpr KFR_INLINE mask operator|(const vec<T, N>& x) const + friend constexpr KFR_INLINE mask operator|(const mask& x, const mask& y) { - return bitcast<T>(ubitcast(this->v) | ubitcast(x.v)); + return vec_op<T>::bor(x.v, y.v); } - constexpr KFR_INLINE mask operator^(const vec<T, N>& x) const + friend constexpr KFR_INLINE mask operator^(const mask& x, const mask& y) { - return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v)); + return vec_op<T>::bxor(x.v, y.v); } + friend constexpr KFR_INLINE mask operator~(const mask& x) { return vec_op<T>::bnot(x.v); } constexpr KFR_INLINE mask operator&&(const mask& x) const { return *this & x; } constexpr KFR_INLINE mask operator||(const mask& x) const { return *this | x; } @@ -794,8 +836,8 @@ struct mask : public vec<T, N> KFR_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; } }; -template <typename T, size_t N> -using cvec = vec<T, N * 2>; +template <typename T, size_t N1, size_t N2 = N1> +using mat = vec<vec<T, N1>, N2>; namespace internal { @@ -1171,7 +1213,7 @@ template <typename T, size_t N> constexpr KFR_INLINE vec<T, N> zerovector() { constexpr size_t width = N * compound_type_traits<T>::width; - return subcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>())); + return compcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>())); } template <typename T, size_t N> @@ -1285,10 +1327,12 @@ namespace cometa template <typename T, size_t N> struct compound_type_traits<kfr::simd<T, N>> { - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static size_t width = N; - constexpr static bool is_scalar = false; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::simd<U, N>; template <typename U> @@ -1300,10 +1344,12 @@ struct compound_type_traits<kfr::simd<T, N>> template <typename T, size_t N> struct compound_type_traits<kfr::vec<T, N>> { - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static size_t width = N; - constexpr static bool is_scalar = false; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::vec<U, N>; template <typename U> @@ -1315,10 +1361,12 @@ struct compound_type_traits<kfr::vec<T, N>> template <typename T, size_t N> struct compound_type_traits<kfr::mask<T, N>> { - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static size_t width = N; - constexpr static bool is_scalar = false; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::mask<U, N>; template <typename U> diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp @@ -135,10 +135,12 @@ constexpr size_t typeindex() template <typename T> struct compound_type_traits { - constexpr static size_t width = 1; - using subtype = T; - using deep_subtype = T; - constexpr static bool is_scalar = true; + constexpr static size_t width = 1; + constexpr static size_t deep_width = width; + using subtype = T; + using deep_subtype = T; + constexpr static size_t depth = 0; + constexpr static bool is_scalar = true; template <typename U> using rebind = U; @@ -166,10 +168,12 @@ using deep_rebind = typename compound_type_traits<T>::template deep_rebind<SubTy template <typename T> struct compound_type_traits<std::pair<T, T>> { - constexpr static size_t width = 2; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; + constexpr static size_t width = 2; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = std::pair<U, U>; diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp @@ -103,6 +103,9 @@ KFR_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y) KFR_FN(cmul_conj) KFR_FN(cmul_2conj) +template <typename T, size_t N> +using cvec = vec<T, N * 2>; + template <size_t N, bool A = false, typename T> KFR_INLINE cvec<T, N> cread(const complex<T>* src) { diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -74,99 +74,92 @@ struct biquad_params namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_biquad +template <typename T, size_t filters, KFR_ARCH_DEP> +struct biquad_block { -private: -public: - template <typename T, size_t filters> - struct biquad_block - { - vec<T, filters> s1; - vec<T, filters> s2; - vec<T, filters> a1; - vec<T, filters> a2; - vec<T, filters> b0; - vec<T, filters> b1; - vec<T, filters> b2; + vec<T, filters> s1; + vec<T, filters> s2; + vec<T, filters> a1; + vec<T, filters> a2; + vec<T, filters> b0; + vec<T, filters> b1; + vec<T, filters> b2; - vec<T, filters> out; - biquad_block() : s1(0), s2(0), a1(0), a2(0), b0(1), b1(0), b2(0), out(0) {} - biquad_block(const biquad_params<T>* bq, size_t count) : s1(0), s2(0), out(0) + vec<T, filters> out; + biquad_block() : s1(0), s2(0), a1(0), a2(0), b0(1), b1(0), b2(0), out(0) {} + biquad_block(const biquad_params<T>* bq, size_t count) : s1(0), s2(0), out(0) + { + count = count > filters ? filters : count; + for (size_t i = 0; i < count; i++) { - count = count > filters ? filters : count; - for (size_t i = 0; i < count; i++) - { - a1(i) = bq[i].a1; - a2(i) = bq[i].a2; - b0(i) = bq[i].b0; - b1(i) = bq[i].b1; - b2(i) = bq[i].b2; - } - for (size_t i = count; i < filters; i++) - { - a1(i) = T(0); - a2(i) = T(0); - b0(i) = T(1); - b1(i) = T(0); - b2(i) = T(0); - } + a1(i) = bq[i].a1; + a2(i) = bq[i].a2; + b0(i) = bq[i].b0; + b1(i) = bq[i].b1; + b2(i) = bq[i].b2; } - - template <size_t count> - biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count) + for (size_t i = count; i < filters; i++) { - static_assert(count <= filters, "count > filters"); + a1(i) = T(0); + a2(i) = T(0); + b0(i) = T(1); + b1(i) = T(0); + b2(i) = T(0); } - }; + } - template <size_t filters, typename T, typename E1> - struct expression_biquads : public expression<E1> + template <size_t count> + biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count) { - using value_type = T; + static_assert(count <= filters, "count > filters"); + } +}; - expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) - : expression<E1>(std::forward<E1>(e1)), bq(bq) - { - } - template <size_t width> - KFR_INTRIN vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const - { - const vec<T, width> in = this->argument_first(index, t); - vec<T, width> out; +template <size_t filters, typename T, typename E1, KFR_ARCH_DEP> +struct expression_biquads : public expression<E1> +{ + using value_type = T; - KFR_LOOP_UNROLL - for (size_t i = 0; i < width; i++) - { - bq.out = process(bq, insertleft(in[i], bq.out)); - out(i) = bq.out[filters - 1]; - } + expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) + : expression<E1>(std::forward<E1>(e1)), bq(bq) + { + } + template <size_t width> + KFR_INTRIN vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const + { + const vec<T, width> in = this->argument_first(index, t); + vec<T, width> out; - return out; - } - KFR_SINTRIN vec<T, filters> process(biquad_block<T, filters>& bq, vec<T, filters> in) + KFR_LOOP_UNROLL + for (size_t i = 0; i < width; i++) { - const vec<T, filters> out = bq.b0 * in + bq.s1; - bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out; - bq.s2 = bq.b2 * in - bq.a2 * out; - return out; + bq.out = process(bq, insertleft(in[i], bq.out)); + out(i) = bq.out[filters - 1]; } - mutable biquad_block<T, filters> bq; - }; + + return out; + } + KFR_SINTRIN vec<T, filters> process(biquad_block<T, filters>& bq, vec<T, filters> in) + { + const vec<T, filters> out = bq.b0 * in + bq.s1; + bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out; + bq.s2 = bq.b2 * in - bq.a2 * out; + return out; + } + mutable biquad_block<T, filters> bq; }; } template <typename T, typename E1> -KFR_INLINE internal::in_biquad<>::expression_biquads<1, T, internal::arg<E1>> biquad( - const biquad_params<T>& bq, E1&& e1) +KFR_INLINE internal::expression_biquads<1, T, internal::arg<E1>> biquad(const biquad_params<T>& bq, E1&& e1) { const biquad_params<T> bqs[1] = { bq }; - return internal::in_biquad<>::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1)); + return internal::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1)); } template <size_t filters, typename T, typename E1> -KFR_INLINE internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>> biquad( +KFR_INLINE internal::expression_biquads<filters, T, internal::arg<E1>> biquad( const biquad_params<T> (&bq)[filters], E1&& e1) { - return internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1)); + return internal::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1)); } } diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -38,7 +38,7 @@ using fir_taps = univector<T, Size>; namespace internal { -template <size_t tapcount, typename T, typename E1> +template <size_t tapcount, typename T, typename E1, KFR_ARCH_DEP> struct expression_short_fir : expression<E1> { static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two"); @@ -47,6 +47,10 @@ struct expression_short_fir : expression<E1> : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) { } + expression_short_fir(E1&& e1, const array_ref<const T>& taps) + : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) + { + } template <typename U, size_t N> KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const { @@ -63,7 +67,7 @@ struct expression_short_fir : expression<E1> mutable vec<T, tapcount - 1> delayline; }; -template <typename T, typename E1> +template <typename T, typename E1, KFR_ARCH_DEP> struct expression_fir : expression<E1> { expression_fir(E1&& e1, const array_ref<const T>& taps) diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp @@ -32,7 +32,7 @@ namespace kfr namespace internal { -template <typename T> +template <typename T, KFR_ARCH_DEP> struct expression_goertzel : output_expression { expression_goertzel(complex<T>& result, T omega) @@ -47,7 +47,7 @@ struct expression_goertzel : output_expression template <typename U, size_t N> KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) { - vec<T, N> in = cast<T>(x); + vec<T, N> in = x; KFR_LOOP_UNROLL for (size_t i = 0; i < N; i++) { @@ -84,7 +84,7 @@ struct expression_parallel_goertzel : output_expression template <typename U, size_t N> KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) { - const vec<T, N> in = cast<T>(x); + const vec<T, N> in = x; KFR_LOOP_UNROLL for (size_t i = 0; i < N; i++) { diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp @@ -41,7 +41,7 @@ namespace intrinsics template <typename T, typename TF = ftype<T>> KFR_SINTRIN TF amp_to_dB(T amp) { - return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322); + return log(static_cast<TF>(amp)) * subtype<TF>(8.6858896380650365530225783783322); // return T( 20.0 ) * log10( level ); } diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp @@ -147,7 +147,7 @@ struct expression_triangular : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(1 - abs(linspace(cinput, index, y))); + return 1 - abs(linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -169,7 +169,7 @@ struct expression_bartlett : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(1 - abs(linspace(cinput, index, y))); + return 1 - abs(linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -191,7 +191,7 @@ struct expression_cosine : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(sin(c_pi<T> * linspace(cinput, index, y))); + return sin(c_pi<T> * linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -213,7 +213,7 @@ struct expression_hann : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y)))); + return T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y))); } size_t size() const { return m_size; } @@ -236,7 +236,7 @@ struct expression_bartlett_hann : input_expression { constexpr vec_t<T, N> y{}; const vec<T, N> xx = linspace(cinput, index, y); - return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)))); + return T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))); } size_t size() const { return m_size; } @@ -258,7 +258,7 @@ struct expression_hamming : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y)))); + return alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y))); } size_t size() const { return m_size; } @@ -282,7 +282,7 @@ struct expression_bohman : input_expression { constexpr vec_t<T, N> y{}; const vec<U, N> n = abs(linspace(cinput, index, y)); - return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n)); + return (T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n); } size_t size() const { return m_size; } @@ -305,7 +305,7 @@ struct expression_blackman : input_expression { constexpr vec_t<T, N> y{}; const vec<T, N> n = linspace(cinput, index, y); - return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n)); + return a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n); } size_t size() const { return m_size; } @@ -330,7 +330,7 @@ struct expression_blackman_harris : input_expression constexpr vec_t<T, N> y{}; const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; - return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n)); + return T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n); } size_t size() const { return m_size; } @@ -353,7 +353,7 @@ struct expression_kaiser : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m); + return modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m; } size_t size() const { return m_size; } @@ -383,7 +383,7 @@ struct expression_flattop : input_expression constexpr T a2 = 1.29; constexpr T a3 = 0.388; constexpr T a4 = 0.028; - return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n)); + return a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n); } size_t size() const { return m_size; } @@ -405,7 +405,7 @@ struct expression_gaussian : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y)))); + return exp(-0.5 * sqr(alpha * linspace(cinput, index, y))); } size_t size() const { return m_size; } @@ -428,7 +428,7 @@ struct expression_lanczos : input_expression KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(sinc(linspace(cinput, index, y))); + return sinc(linspace(cinput, index, y)); } size_t size() const { return m_size; } diff --git a/include/kfr/io/file.hpp b/include/kfr/io/file.hpp @@ -85,7 +85,7 @@ struct expression_file_writer : expression_file_base, output_expression { if (position != index) fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET); - const vec<T, N> output = cast<T>(value); + const vec<T, N> output = value; fwrite(output.data(), sizeof(T), output.size(), file); position = index + N; } @@ -104,7 +104,7 @@ struct expression_file_reader : expression_file_base, input_expression vec<T, N> input = qnan; fread(input.data(), sizeof(T), input.size(), file); position = index + N; - return cast<U>(input); + return input; } mutable size_t position = 0; }; diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp @@ -44,17 +44,17 @@ TEST(complex_vector) TEST(complex_cast) { - const vec<f32, 4> v1 = subcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 })); + const vec<f32, 4> v1 = bitcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 })); CHECK(v1(0) == 0.f); CHECK(v1(1) == 1.f); CHECK(v1(2) == 2.f); CHECK(v1(3) == 3.f); - const vec<c32, 1> v2 = subcast<c32>(make_vector(1.f, 2.f)); + const vec<c32, 1> v2 = bitcast<c32>(make_vector(1.f, 2.f)); CHECK(v2(0) == 1.f); CHECK(v2(1) == 2.f); - const vec<c32, 2> v3 = cast<c32>(make_vector(1.f, 2.f)); + const vec<c32, 2> v3 = make_vector(1.f, 2.f); CHECK(v3(0) == 1.f); CHECK(v3(1) == 0.f); CHECK(v3(2) == 2.f);