kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 1556f65ddc63e3da13bfc340a6bf3dbbb3512a4f
parent 336ba664c337c202ee5749091645f7aca106a861
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Fri, 28 Oct 2022 10:26:34 +0100

Expressions refactoring

Diffstat:
Mexamples/biquads.cpp | 4+++-
Mexamples/ccv.cpp | 41+++++++++++++++++++++--------------------
Mexamples/fir.cpp | 2+-
Minclude/kfr/base.hpp | 4++++
Minclude/kfr/base/basic_expressions.hpp | 460+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Minclude/kfr/base/conversion.hpp | 22+++++++++++++---------
Minclude/kfr/base/expression.hpp | 686++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Minclude/kfr/base/filter.hpp | 22+++++++++++-----------
Minclude/kfr/base/generators.hpp | 108++++++++++++++++++++++++++++++++++++++-----------------------------------------
Minclude/kfr/base/impl/static_array.hpp | 2+-
Minclude/kfr/base/math_expressions.hpp | 264+++++++++++++++++++------------------------------------------------------------
Dinclude/kfr/base/old_basic_expressions.hpp | 708-------------------------------------------------------------------------------
Minclude/kfr/base/pointer.hpp | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Minclude/kfr/base/random.hpp | 134++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Minclude/kfr/base/random_bits.hpp | 90++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Minclude/kfr/base/reduce.hpp | 69++++++++++++++++++++++++++++++++++-----------------------------------
Minclude/kfr/base/shape.hpp | 102++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Minclude/kfr/base/simd_expressions.hpp | 308+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Ainclude/kfr/base/state_holder.hpp | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/tensor.hpp | 34++++++----------------------------
Minclude/kfr/base/univector.hpp | 102+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Minclude/kfr/capi.h | 2+-
Minclude/kfr/cometa.hpp | 19+++++++++++++------
Minclude/kfr/cometa/cstring.hpp | 6+++---
Minclude/kfr/cometa/function.hpp | 4++--
Minclude/kfr/cometa/numeric.hpp | 12+++++-------
Minclude/kfr/dft/convolution.hpp | 10+++++-----
Minclude/kfr/dft/fft.hpp | 7+++----
Minclude/kfr/dft/impl/convolution-impl.cpp | 4++--
Minclude/kfr/dft/impl/dft-impl.hpp | 46+++++++++++++++++++++++++---------------------
Minclude/kfr/dft/impl/fft-impl.hpp | 108+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Minclude/kfr/dft/impl/ft.hpp | 30++++++++++++++++--------------
Minclude/kfr/dsp/biquad.hpp | 212++++++++++++++++++++++++++++++++++++++-----------------------------------------
Minclude/kfr/dsp/dcremove.hpp | 10++++------
Minclude/kfr/dsp/delay.hpp | 98++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Minclude/kfr/dsp/ebu.hpp | 9+++++----
Minclude/kfr/dsp/fir.hpp | 144+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Minclude/kfr/dsp/fir_design.hpp | 13++++++-------
Minclude/kfr/dsp/fracdelay.hpp | 4++--
Minclude/kfr/dsp/goertzel.hpp | 27+++++++++++++--------------
Minclude/kfr/dsp/iir_design.hpp | 16++++++++--------
Minclude/kfr/dsp/mixdown.hpp | 13++++---------
Minclude/kfr/dsp/oscillators.hpp | 34++++++++++++++++++----------------
Minclude/kfr/dsp/sample_rate_conversion.hpp | 69++++++++++++++++++++++++++++++++++++++-------------------------------
Minclude/kfr/dsp/special.hpp | 15+++++++++------
Dinclude/kfr/dsp/state_holder.hpp | 41-----------------------------------------
Minclude/kfr/dsp/units.hpp | 26+++++++++++++-------------
Minclude/kfr/dsp/waveshaper.hpp | 6+++---
Minclude/kfr/dsp/weighting.hpp | 6+++---
Minclude/kfr/dsp/window.hpp | 457++++++++++++++++++++++++++++++++++---------------------------------------------
Minclude/kfr/graphics.hpp | 3+--
Minclude/kfr/graphics/color.hpp | 2+-
Minclude/kfr/graphics/geometry.hpp | 20+++++++++++++-------
Ainclude/kfr/graphics/impl/scaled.hpp | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dinclude/kfr/graphics/scaled.hpp | 59-----------------------------------------------------------
Minclude/kfr/kfr.h | 5+++--
Minclude/kfr/math/complex_math.hpp | 6+++---
Minclude/kfr/math/impl/atan.hpp | 8++++----
Minclude/kfr/math/impl/gamma.hpp | 2+-
Minclude/kfr/math/impl/hyperbolic.hpp | 6+++---
Minclude/kfr/math/impl/log_exp.hpp | 62+++++++++++++++++++++++++++++++-------------------------------
Minclude/kfr/math/impl/sin_cos.hpp | 8++++----
Minclude/kfr/math/impl/tan.hpp | 5++++-
Minclude/kfr/simd.hpp | 1+
Minclude/kfr/simd/complex.hpp | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Minclude/kfr/simd/complex_type.hpp | 2+-
Minclude/kfr/simd/impl/abs.hpp | 2+-
Minclude/kfr/simd/impl/backend_clang.hpp | 12+++++++-----
Minclude/kfr/simd/impl/backend_generic.hpp | 3++-
Minclude/kfr/simd/impl/basicoperators_clang.hpp | 6++++++
Minclude/kfr/simd/impl/basicoperators_complex.hpp | 2+-
Minclude/kfr/simd/impl/basicoperators_generic.hpp | 137++++++++++++++++++++++++-------------------------------------------------------
Minclude/kfr/simd/impl/function.hpp | 2+-
Minclude/kfr/simd/impl/logical.hpp | 2+-
Minclude/kfr/simd/impl/min_max.hpp | 2+-
Minclude/kfr/simd/impl/operators.hpp | 77++++++-----------------------------------------------------------------------
Minclude/kfr/simd/impl/round.hpp | 26+++++++++++++-------------
Minclude/kfr/simd/impl/saturation.hpp | 2+-
Minclude/kfr/simd/impl/select.hpp | 2+-
Rinclude/kfr/simd/impl/specializations.i -> include/kfr/simd/impl/specializations.hpp | 0
Minclude/kfr/simd/operators.hpp | 68+++++++++++++++++++++++++++++++++++++++++---------------------------
Minclude/kfr/simd/read_write.hpp | 15+++++++++++----
Minclude/kfr/simd/select.hpp | 2+-
Minclude/kfr/simd/shuffle.hpp | 5++---
Minclude/kfr/simd/vec.hpp | 251+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Msources.cmake | 331++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtests/CMakeLists.txt | 3++-
Mtests/asm_test.cpp | 4++--
Mtests/base_test.cpp | 22+++-------------------
Mtests/complex_test.cpp | 10+++-------
Mtests/dsp_test.cpp | 565-------------------------------------------------------------------------------
Mtests/expression_test.cpp | 149+++----------------------------------------------------------------------------
Mtests/intrinsic_test.cpp | 42+++++++++++++++++++++++-------------------
Mtests/numeric_tests.hpp | 96++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mtests/tensor_test.cpp | 2+-
Atests/unit/base/base.cpp | 7+++++++
Atests/unit/base/basic_expressions.cpp | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/base/generators.cpp | 33+++++++++++++++++++++++++++++++++
Atests/unit/base/pointer.cpp | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtests/unit/base/random.cpp | 16++++++++--------
Mtests/unit/base/reduce.cpp | 9+++++++++
Atests/unit/base/shape.cpp | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtests/unit/base/tensor.cpp | 165++++++++++++++++++++++++++-----------------------------------------------------
Atests/unit/dsp/biquad.cpp | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/dsp/biquad_design.cpp | 7+++++++
Atests/unit/dsp/dsp.cpp | 7+++++++
Atests/unit/dsp/ebu.cpp | 270+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/dsp/fir.cpp | 234+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/dsp/sample_rate_conversion.cpp | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/dsp/units.cpp | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/dsp/window.cpp | 189+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/graphics/graphics.cpp | 7+++++++
Atests/unit/math/math.cpp | 7+++++++
Mtests/unit/simd/abs.cpp | 2+-
Mtests/unit/simd/min_max.cpp | 8+++++---
Mtests/unit/simd/operators.cpp | 47++++++++++++++++++++++++++++++++++++++---------
Mtests/unit/simd/round.cpp | 27+++++++++++----------------
Mtests/unit/simd/select.cpp | 8+++++---
Mtests/unit/simd/shuffle.cpp | 6++++++
Atests/unit/simd/simd.cpp | 7+++++++
Mtests/unit/simd/vec.cpp | 54++++++++++++++++++++++++++++++++++--------------------
Mupdate-sources.py | 10+++++++++-
122 files changed, 4690 insertions(+), 3913 deletions(-)

diff --git a/examples/biquads.cpp b/examples/biquads.cpp @@ -5,7 +5,9 @@ */ #include <kfr/base.hpp> -#include <kfr/dsp.hpp> +#include <kfr/dsp/biquad.hpp> +#include <kfr/dsp/biquad_design.hpp> +#include <kfr/dsp/special.hpp> #include <kfr/io.hpp> using namespace kfr; diff --git a/examples/ccv.cpp b/examples/ccv.cpp @@ -26,8 +26,8 @@ int main() // Create filters. size_t const block_size = 256; - convolve_filter<complex<fbase>> conv_filter_complex(univector<complex<fbase>>(make_complex(taps127, zeros())), - block_size); + convolve_filter<complex<fbase>> conv_filter_complex( + univector<complex<fbase>>(make_complex(taps127, zeros())), block_size); convolve_filter<fbase> conv_filter_real(taps127, block_size); // Create noise to filter. @@ -35,8 +35,7 @@ int main() univector<complex<fbase>> cnoise = make_complex(truncate(gen_random_range(random_bit_generator{ 1, 2, 3, 4 }, -1.f, +1.f), size), truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size)); - univector<fbase> noise = - truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size); + univector<fbase> noise = truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size); // Filter results. univector<complex<fbase>> filtered_cnoise_ccv(size), filtered_cnoise_fir(size); @@ -45,27 +44,29 @@ int main() // Complex filtering (time and compare). auto tic = std::chrono::high_resolution_clock::now(); conv_filter_complex.apply(filtered_cnoise_ccv, cnoise); - auto toc = std::chrono::high_resolution_clock::now(); - auto const ccv_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); - tic = toc; - filtered_cnoise_fir = kfr::fir(cnoise, taps127); - toc = std::chrono::high_resolution_clock::now(); - auto const fir_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); - auto const cdiff = rms(cabs(filtered_cnoise_fir - filtered_cnoise_ccv)); + auto toc = std::chrono::high_resolution_clock::now(); + auto const ccv_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + tic = toc; + filtered_cnoise_fir = kfr::fir(cnoise, taps127); + toc = std::chrono::high_resolution_clock::now(); + auto const fir_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + auto const cdiff = rms(cabs(filtered_cnoise_fir - filtered_cnoise_ccv)); // Real filtering (time and compare). tic = std::chrono::high_resolution_clock::now(); conv_filter_real.apply(filtered_noise_ccv, noise); - toc = std::chrono::high_resolution_clock::now(); - auto const ccv_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); - tic = toc; - filtered_noise_fir = kfr::fir(noise, taps127); - toc = std::chrono::high_resolution_clock::now(); - auto const fir_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); - auto const diff = rms(filtered_noise_fir - filtered_noise_ccv); + toc = std::chrono::high_resolution_clock::now(); + auto const ccv_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + tic = toc; + filtered_noise_fir = kfr::fir(noise, taps127); + toc = std::chrono::high_resolution_clock::now(); + auto const fir_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic); + auto const diff = rms(filtered_noise_fir - filtered_noise_ccv); - println("complex: convolution_filter ", ccv_time_complex.count(), " fir ", fir_time_complex.count(), " diff=", cdiff); - println("real: convolution_filter ", ccv_time_real.count(), " fir ", fir_time_real.count(), " diff=", diff); + println("complex: convolution_filter ", ccv_time_complex.count(), " fir ", fir_time_complex.count(), + " diff=", cdiff); + println("real: convolution_filter ", ccv_time_real.count(), " fir ", fir_time_real.count(), + " diff=", diff); return 0; } diff --git a/examples/fir.cpp b/examples/fir.cpp @@ -103,7 +103,7 @@ int main() // Prepare 10000 samples of white noise univector<float> noise = - truncate(gen_random_range(random_bit_generator{ 1, 2, 3, 4 }, -1.f, +1.f), 10000); + truncate(gen_random_range(random_init(1, 2, 3, 4), -1.f, +1.f), 10000); // Apply band stop filter univector<float> filtered_noise = fir(noise, taps127); diff --git a/include/kfr/base.hpp b/include/kfr/base.hpp @@ -34,7 +34,11 @@ #include "base/memory.hpp" #include "base/pointer.hpp" #include "base/random.hpp" +#include "base/random_bits.hpp" #include "base/reduce.hpp" +#include "base/shape.hpp" #include "base/simd_expressions.hpp" #include "base/small_buffer.hpp" +#include "base/state_holder.hpp" +#include "base/tensor.hpp" #include "base/univector.hpp" diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp @@ -32,35 +32,35 @@ namespace kfr // ---------------------------------------------------------------------------- template <typename T> -struct xscalar +struct expression_scalar { T value; }; template <typename T> -struct expression_traits<xscalar<T>> : expression_traits_defaults +struct expression_traits<expression_scalar<T>> : expression_traits_defaults { using value_type = T; constexpr static size_t dims = 0; - constexpr static shape<0> shapeof(const xscalar<T>& self) { return {}; } + constexpr static shape<0> shapeof(const expression_scalar<T>& self) { return {}; } constexpr static shape<0> shapeof() { return {}; } }; template <typename T> -KFR_INTRINSIC xscalar<T> scalar(T value) +KFR_INTRINSIC expression_scalar<T> scalar(T value) { return { std::move(value) }; } template <typename T = fbase> -KFR_INTRINSIC xscalar<T> zeros() +KFR_INTRINSIC expression_scalar<T> zeros() { return { static_cast<T>(0) }; } template <typename T = fbase> -KFR_INTRINSIC xscalar<T> ones() +KFR_INTRINSIC expression_scalar<T> ones() { return { static_cast<T>(1) }; } @@ -68,7 +68,7 @@ KFR_INTRINSIC xscalar<T> ones() inline namespace CMT_ARCH_NAME { template <typename T, index_t Axis, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xscalar<T>& self, const shape<0>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_scalar<T>& self, const shape<0>& index, const axis_params<Axis, N>&) { return self.value; @@ -78,7 +78,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const xscalar<T>& self, const shape<0>& ind // ---------------------------------------------------------------------------- template <typename T, index_t Dims = 1> -struct xcounter +struct expression_counter { T start; T steps[Dims]; @@ -88,26 +88,32 @@ struct xcounter }; template <typename T, index_t Dims> -struct expression_traits<xcounter<T, Dims>> : expression_traits_defaults +struct expression_traits<expression_counter<T, Dims>> : expression_traits_defaults { using value_type = T; constexpr static size_t dims = Dims; - constexpr static shape<dims> shapeof(const xcounter<T, Dims>& self) { return shape<dims>(infinite_size); } + constexpr static shape<dims> shapeof(const expression_counter<T, Dims>& self) + { + return shape<dims>(infinite_size); + } constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); } }; -template <typename T, typename... Args, typename Tout = std::common_type_t<T, Args...>> -KFR_INTRINSIC xcounter<Tout, sizeof...(Args)> counter(T start, Args... steps) +template <typename T = int, typename Arg = T, typename... Args, + typename Tout = std::common_type_t<T, Arg, Args...>> +KFR_INTRINSIC expression_counter<Tout, 1 + sizeof...(Args)> counter(T start = T(0), Arg step = 1, + Args... steps) { - return { static_cast<Tout>(std::move(start)), { static_cast<Tout>(std::move(steps))... } }; + return { static_cast<Tout>(std::move(start)), + { static_cast<Tout>(std::move(step)), static_cast<Tout>(std::move(steps))... } }; } inline namespace CMT_ARCH_NAME { template <typename T, index_t Axis, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, 1>& self, const shape<1>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_counter<T, 1>& self, const shape<1>& index, const axis_params<Axis, N>&) { T acc = self.start; @@ -115,7 +121,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, 1>& self, const shape<1>& return acc + enumerate(vec_shape<T, N>(), self.back()); } template <typename T, index_t dims, index_t Axis, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, dims>& self, const shape<dims>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_counter<T, dims>& self, const shape<dims>& index, const axis_params<Axis, N>&) { T acc = self.start; @@ -128,20 +134,20 @@ KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, dims>& self, const shape< // ---------------------------------------------------------------------------- template <typename Arg> -struct xslice : public xwitharguments<Arg> +struct expression_slice : public expression_with_arguments<Arg> { constexpr static index_t dims = expression_dims<Arg>; shape<dims> start; shape<dims> size; - KFR_MEM_INTRINSIC xslice(Arg&& arg, shape<dims> start, shape<dims> size) - : xwitharguments<Arg>{ std::forward<Arg>(arg) }, start(start), size(size) + KFR_MEM_INTRINSIC expression_slice(Arg&& arg, shape<dims> start, shape<dims> size) + : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, start(start), size(size) { } }; template <typename Arg> -struct expression_traits<xslice<Arg>> : expression_traits_defaults +struct expression_traits<expression_slice<Arg>> : expression_traits_defaults { using ArgTraits = expression_traits<Arg>; @@ -149,7 +155,7 @@ struct expression_traits<xslice<Arg>> : expression_traits_defaults constexpr static size_t dims = ArgTraits::dims; constexpr static bool random_access = ArgTraits::random_access; - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xslice<Arg>& self) + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_slice<Arg>& self) { return min(sub_shape(ArgTraits::shapeof(self.first()), self.start), self.size); } @@ -157,13 +163,14 @@ struct expression_traits<xslice<Arg>> : expression_traits_defaults }; template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg), index_t Dims = expression_dims<Arg>> -KFR_INTRINSIC xslice<Arg> slice(Arg&& arg, shape<Dims> start, shape<Dims> size) +KFR_INTRINSIC expression_slice<Arg> slice(Arg&& arg, identity<shape<Dims>> start, + identity<shape<Dims>> size = shape<Dims>(infinite_size)) { return { std::forward<Arg>(arg), start, size }; } template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg), index_t Dims = expression_dims<Arg>> -KFR_INTRINSIC xslice<Arg> truncate(Arg&& arg, shape<Dims> size) +KFR_INTRINSIC expression_slice<Arg> truncate(Arg&& arg, identity<shape<Dims>> size) { return { std::forward<Arg>(arg), shape<Dims>{ 0 }, size }; } @@ -172,16 +179,16 @@ inline namespace CMT_ARCH_NAME { template <typename Arg, index_t NDims, index_t Axis, size_t N, - typename T = typename expression_traits<xslice<Arg>>::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xslice<Arg>& self, const shape<NDims>& index, + typename T = typename expression_traits<expression_slice<Arg>>::value_type> +KFR_INTRINSIC vec<T, N> get_elements(const expression_slice<Arg>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh) { return static_cast<vec<T, N>>(get_elements(self.first(), index.add(self.start), sh)); } template <typename Arg, index_t NDims, index_t Axis, size_t N, - typename T = typename expression_traits<xslice<Arg>>::value_type> -KFR_INTRINSIC void set_elements(const xslice<Arg>& self, const shape<NDims>& index, + typename T = typename expression_traits<expression_slice<Arg>>::value_type> +KFR_INTRINSIC void set_elements(const expression_slice<Arg>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value) { set_elements(self.first(), index.add(self.start), sh, value); @@ -191,13 +198,13 @@ KFR_INTRINSIC void set_elements(const xslice<Arg>& self, const shape<NDims>& ind // ---------------------------------------------------------------------------- template <typename T, typename Arg> -struct xcast : public xwitharguments<Arg> +struct expression_cast : public expression_with_arguments<Arg> { - using xwitharguments<Arg>::xwitharguments; + using expression_with_arguments<Arg>::expression_with_arguments; }; template <typename T, typename Arg> -struct expression_traits<xcast<T, Arg>> : expression_traits_defaults +struct expression_traits<expression_cast<T, Arg>> : expression_traits_defaults { using ArgTraits = expression_traits<Arg>; @@ -205,7 +212,7 @@ struct expression_traits<xcast<T, Arg>> : expression_traits_defaults constexpr static size_t dims = ArgTraits::dims; constexpr static bool random_access = ArgTraits::random_access; - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xcast<T, Arg>& self) + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_cast<T, Arg>& self) { return ArgTraits::shapeof(self.first()); } @@ -213,13 +220,13 @@ struct expression_traits<xcast<T, Arg>> : expression_traits_defaults }; template <typename T, typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg)> -KFR_INTRINSIC xcast<T, Arg> cast(Arg&& arg) +KFR_INTRINSIC expression_cast<T, Arg> cast(Arg&& arg) { return { std::forward<Arg>(arg) }; } template <typename T, typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg)> -KFR_INTRINSIC xcast<T, Arg> cast(Arg&& arg, ctype_t<T>) +KFR_INTRINSIC expression_cast<T, Arg> cast(Arg&& arg, ctype_t<T>) { return { std::forward<Arg>(arg) }; } @@ -228,14 +235,14 @@ inline namespace CMT_ARCH_NAME { template <typename T, typename Arg, index_t NDims, index_t Axis, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xcast<T, Arg>& self, const shape<NDims>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_cast<T, Arg>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh) { return static_cast<vec<T, N>>(get_elements(self.first(), index, sh)); } template <typename T, typename Arg, index_t NDims, index_t Axis, size_t N> -KFR_INTRINSIC void set_elements(const xcast<T, Arg>& self, const shape<NDims>& index, +KFR_INTRINSIC void set_elements(const expression_cast<T, Arg>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value) { set_elements(self.first(), index, sh, value); @@ -245,19 +252,19 @@ KFR_INTRINSIC void set_elements(const xcast<T, Arg>& self, const shape<NDims>& i // ---------------------------------------------------------------------------- template <typename T, index_t Dims, typename Fn, bool Rnd> -struct xlambda +struct expression_lambda { - Fn&& fn; + Fn fn; }; template <typename T, index_t Dims, typename Fn, bool Rnd> -struct expression_traits<xlambda<T, Dims, Fn, Rnd>> : expression_traits_defaults +struct expression_traits<expression_lambda<T, Dims, Fn, Rnd>> : expression_traits_defaults { using value_type = T; constexpr static size_t dims = Dims; constexpr static inline bool random_access = Rnd; - KFR_MEM_INTRINSIC constexpr static shape<Dims> shapeof(const xlambda<T, Dims, Fn, Rnd>& self) + KFR_MEM_INTRINSIC constexpr static shape<Dims> shapeof(const expression_lambda<T, Dims, Fn, Rnd>& self) { return shape<Dims>(infinite_size); } @@ -265,12 +272,12 @@ struct expression_traits<xlambda<T, Dims, Fn, Rnd>> : expression_traits_defaults }; template <typename T, index_t Dims = 1, typename Fn, bool RandomAccess = true> -KFR_INTRINSIC xlambda<T, Dims, Fn, RandomAccess> lambda(Fn&& fn, cbool_t<RandomAccess> = {}) +KFR_INTRINSIC expression_lambda<T, Dims, Fn, RandomAccess> lambda(Fn&& fn, cbool_t<RandomAccess> = {}) { return { std::forward<Fn>(fn) }; } template <typename T, index_t Dims = 1, typename Fn> -KFR_INTRINSIC xlambda<T, Dims, Fn, false> lambda_generator(Fn&& fn) +KFR_INTRINSIC expression_lambda<T, Dims, Fn, false> lambda_generator(Fn&& fn) { return { std::forward<Fn>(fn) }; } @@ -278,25 +285,46 @@ KFR_INTRINSIC xlambda<T, Dims, Fn, false> lambda_generator(Fn&& fn) template <typename... Ts, typename T = std::common_type_t<Ts...>> KFR_INTRINSIC auto sequence(const Ts&... list) { - return lambda<T>([seq = std::array<T, sizeof...(Ts)>{ { static_cast<T>(list)... } }](size_t index) - { return seq[index % seq.size()]; }); + return lambda<T>([seq = std::array<T, sizeof...(Ts)>{ { static_cast<T>(list)... } }](size_t index) { // + return seq[index % seq.size()]; + }); } inline namespace CMT_ARCH_NAME { template <typename T, index_t Dims, typename Fn, bool Rnd, index_t Axis, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xlambda<T, Dims, Fn, Rnd>& self, const shape<Dims>& index, - const axis_params<Axis, N>& sh) +KFR_INTRINSIC vec<T, N> get_elements(const expression_lambda<T, Dims, Fn, Rnd>& self, + const shape<Dims>& index, const axis_params<Axis, N>& sh) { - if constexpr (std::is_invocable_v<Fn, shape<Dims>, csize_t<N>>) + if constexpr (std::is_invocable_v<Fn, shape<Dims>, axis_params<Axis, N>>) return self.fn(index, sh); + else if constexpr (std::is_invocable_v<Fn, shape<Dims>, csize_t<N>>) + return self.fn(index, csize<N>); else if constexpr (std::is_invocable_v<Fn, shape<Dims>>) - return vec<T, N>{ [&](size_t idx) { return self.fn(index.add(idx)); } }; + { + portable_vec<T, N> result; + shape<Dims> cur_index = index; + for (index_t i = 0; i < N; ++i) + { + result[i] = self.fn(cur_index); + ++cur_index.back(); + } + return result; + // return vec<T, N>{ [&](size_t idx) { // + // return self.fn(index.add(idx)); + // } }; + } else if constexpr (std::is_invocable_v<Fn>) return apply<N>(self.fn); else + { + static_assert(std::is_invocable_v<Fn, shape<Dims>, axis_params<Axis, N>> || + std::is_invocable_v<Fn, shape<Dims>, csize_t<N>> || + std::is_invocable_v<Fn, shape<Dims>> || std::is_invocable_v<Fn>, + "Lambda must be callable"); return czeros; + } } } // namespace CMT_ARCH_NAME @@ -304,28 +332,28 @@ KFR_INTRINSIC vec<T, N> get_elements(const xlambda<T, Dims, Fn, Rnd>& self, cons // ---------------------------------------------------------------------------- template <typename Arg> -struct xpadded : public xwitharguments<Arg> +struct expression_padded : public expression_with_arguments<Arg> { - using ArgTraits = typename xwitharguments<Arg>::first_arg_trait; + using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits; typename ArgTraits::value_type fill_value; shape<ArgTraits::dims> input_shape; - KFR_MEM_INTRINSIC xpadded(Arg&& arg, typename ArgTraits::value_type fill_value) - : xwitharguments<Arg>{ std::forward<Arg>(arg) }, fill_value(std::move(fill_value)), + KFR_MEM_INTRINSIC expression_padded(Arg&& arg, typename ArgTraits::value_type fill_value) + : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, fill_value(std::move(fill_value)), input_shape(ArgTraits::shapeof(this->first())) { } }; -template <typename Arg, typename T = expression_value_type<Arg>> -KFR_INTRINSIC xpadded<Arg> padded(Arg&& arg, T fill_value = T{}) +template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg), typename T = expression_value_type<Arg>> +KFR_INTRINSIC expression_padded<Arg> padded(Arg&& arg, T fill_value = T{}) { static_assert(expression_dims<Arg> >= 1); return { std::forward<Arg>(arg), std::move(fill_value) }; } template <typename Arg> -struct expression_traits<xpadded<Arg>> : expression_traits_defaults +struct expression_traits<expression_padded<Arg>> : expression_traits_defaults { using ArgTraits = expression_traits<Arg>; @@ -333,7 +361,7 @@ struct expression_traits<xpadded<Arg>> : expression_traits_defaults constexpr static size_t dims = ArgTraits::dims; constexpr static bool random_access = ArgTraits::random_access; - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xpadded<Arg>& self) + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_padded<Arg>& self) { return shape<dims>(infinite_size); } @@ -343,16 +371,16 @@ struct expression_traits<xpadded<Arg>> : expression_traits_defaults inline namespace CMT_ARCH_NAME { -template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<xpadded<Arg>>, +template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<expression_padded<Arg>>, typename T = typename Traits::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xpadded<Arg>& self, const shape<Traits::dims>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_padded<Arg>& self, const shape<Traits::dims>& index, const axis_params<Axis, N>& sh) { - if (index.ge(self.input_size)) + if (index.ge(self.input_shape)) { return self.fill_value; } - else if (CMT_LIKELY(index.add(N).le(self.input_size))) + else if (CMT_LIKELY(index.add(N).le(self.input_shape))) { return get_elements(self.first(), index, sh); } @@ -362,8 +390,8 @@ KFR_INTRINSIC vec<T, N> get_elements(const xpadded<Arg>& self, const shape<Trait for (size_t i = 0; i < N; i++) { shape ish = index.add(i); - if (ish.back() < self.input_size.back()) - x[i] = get_elements(self.first(), ish, csize_t<1>()).front(); + if (ish.back() < self.input_shape.back()) + x[i] = get_elements(self.first(), ish, axis_params_v<Axis, 1>).front(); } return x; } @@ -374,34 +402,35 @@ KFR_INTRINSIC vec<T, N> get_elements(const xpadded<Arg>& self, const shape<Trait // ---------------------------------------------------------------------------- template <typename Arg> -struct xreverse : public xwitharguments<Arg> +struct expression_reverse : public expression_with_arguments<Arg> { - using ArgTraits = typename xwitharguments<Arg>::first_arg_trait; + using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits; shape<ArgTraits::dims> input_shape; - KFR_MEM_INTRINSIC xreverse(Arg&& arg) - : xwitharguments<Arg>{ std::forward<Arg>(arg) }, input_shape(ArgTraits::shapeof(this->first())) + KFR_MEM_INTRINSIC expression_reverse(Arg&& arg) + : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, + input_shape(ArgTraits::shapeof(this->first())) { } }; -template <typename Arg> -KFR_INTRINSIC xreverse<Arg> x_reverse(Arg&& arg) +template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg)> +KFR_INTRINSIC expression_reverse<Arg> reverse(Arg&& arg) { static_assert(expression_dims<Arg> >= 1); return { std::forward<Arg>(arg) }; } template <typename Arg> -struct expression_traits<xreverse<Arg>> : expression_traits_defaults +struct expression_traits<expression_reverse<Arg>> : expression_traits_defaults { using ArgTraits = expression_traits<Arg>; using value_type = typename ArgTraits::value_type; constexpr static size_t dims = ArgTraits::dims; - static_assert(ArgTraits::random_access, "xreverse requires an expression with random access"); + static_assert(ArgTraits::random_access, "expression_reverse requires an expression with random access"); - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xreverse<Arg>& self) + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_reverse<Arg>& self) { return ArgTraits::shapeof(self.first()); } @@ -411,9 +440,9 @@ struct expression_traits<xreverse<Arg>> : expression_traits_defaults inline namespace CMT_ARCH_NAME { -template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<xreverse<Arg>>, +template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<expression_reverse<Arg>>, typename T = typename Traits::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xreverse<Arg>& self, const shape<Traits::dims>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_reverse<Arg>& self, const shape<Traits::dims>& index, const axis_params<Axis, N>& sh) { return reverse(get_elements(self.first(), self.input_shape.sub(index).sub(shape<Traits::dims>(N)), sh)); @@ -424,62 +453,77 @@ KFR_INTRINSIC vec<T, N> get_elements(const xreverse<Arg>& self, const shape<Trai // ---------------------------------------------------------------------------- template <index_t... Values> -struct fixed_shape +struct fixed_shape_t { + constexpr fixed_shape_t() = default; constexpr static shape<sizeof...(Values)> get() { return { Values... }; } }; +template <index_t... Values> +constexpr inline fixed_shape_t<Values...> fixed_shape{}; + template <typename Arg, typename Shape> -struct xfixshape : public xwitharguments<Arg> +struct expression_fixshape : public expression_with_arguments<Arg> { - using ArgTraits = typename xwitharguments<Arg>::first_arg_trait; + using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits; - KFR_MEM_INTRINSIC xfixshape(Arg&& arg) : xwitharguments<Arg>{ std::forward<Arg>(arg) } {} + KFR_MEM_INTRINSIC expression_fixshape(Arg&& arg) + : expression_with_arguments<Arg>{ std::forward<Arg>(arg) } + { + } }; -template <typename Arg, index_t... ShapeValues> -KFR_INTRINSIC xfixshape<Arg, fixed_shape<ShapeValues...>> fixshape(Arg&& arg, - const fixed_shape<ShapeValues...>&) +template <typename Arg, index_t... ShapeValues, KFR_ACCEPT_EXPRESSIONS(Arg)> +KFR_INTRINSIC expression_fixshape<Arg, fixed_shape_t<ShapeValues...>> fixshape( + Arg&& arg, const fixed_shape_t<ShapeValues...>&) { return { std::forward<Arg>(arg) }; } template <typename Arg, index_t... ShapeValues> -struct expression_traits<xfixshape<Arg, fixed_shape<ShapeValues...>>> : expression_traits_defaults +struct expression_traits<expression_fixshape<Arg, fixed_shape_t<ShapeValues...>>> : expression_traits_defaults { using ArgTraits = expression_traits<Arg>; using value_type = typename ArgTraits::value_type; - constexpr static size_t dims = ArgTraits::dims; + constexpr static size_t dims = sizeof...(ShapeValues); // ArgTraits::dims; constexpr static bool random_access = ArgTraits::random_access; KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof( - const xfixshape<Arg, fixed_shape<ShapeValues...>>& self) + const expression_fixshape<Arg, fixed_shape_t<ShapeValues...>>& self) { - return fixed_shape<ShapeValues...>::get(); + return fixed_shape_t<ShapeValues...>::get(); } - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { return fixed_shape<ShapeValues...>::get(); } + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { return fixed_shape_t<ShapeValues...>::get(); } }; inline namespace CMT_ARCH_NAME { template <typename Arg, typename Shape, index_t Axis, size_t N, - typename Traits = expression_traits<xfixshape<Arg, Shape>>, + typename Traits = expression_traits<expression_fixshape<Arg, Shape>>, typename T = typename Traits::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xfixshape<Arg, Shape>& self, const shape<Traits::dims>& index, - const axis_params<Axis, N>& sh) +KFR_INTRINSIC vec<T, N> get_elements(const expression_fixshape<Arg, Shape>& self, + const shape<Traits::dims>& index, const axis_params<Axis, N>& sh) { - return get_elements(self.first(), index, sh); + using ArgTraits = expression_traits<Arg>; + return get_elements(self.first(), index.template trim<ArgTraits::dims>(), sh); } template <typename Arg, typename Shape, index_t Axis, size_t N, - typename Traits = expression_traits<xfixshape<Arg, Shape>>, + typename Traits = expression_traits<expression_fixshape<Arg, Shape>>, typename T = typename Traits::value_type> -KFR_INTRINSIC void set_elements(xfixshape<Arg, Shape>& self, const shape<Traits::dims>& index, +KFR_INTRINSIC void set_elements(expression_fixshape<Arg, Shape>& self, const shape<Traits::dims>& index, const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value) { - set_elements(self.first(), index, sh, value); + using ArgTraits = expression_traits<Arg>; + if constexpr (is_output_expression<Arg>) + { + set_elements(self.first(), index.template trim<ArgTraits::dims>(), sh, value); + } + else + { + } } } // namespace CMT_ARCH_NAME @@ -487,27 +531,27 @@ KFR_INTRINSIC void set_elements(xfixshape<Arg, Shape>& self, const shape<Traits: // ---------------------------------------------------------------------------- template <typename Arg, index_t OutDims> -struct xreshape : public xwitharguments<Arg> +struct expression_reshape : public expression_with_arguments<Arg> { - using ArgTraits = typename xwitharguments<Arg>::first_arg_trait; + using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits; shape<ArgTraits::dims> in_shape; shape<OutDims> out_shape; - KFR_MEM_INTRINSIC xreshape(Arg&& arg, const shape<OutDims>& out_shape) - : xwitharguments<Arg>{ std::forward<Arg>(arg) }, in_shape(ArgTraits::shapeof(arg)), + KFR_MEM_INTRINSIC expression_reshape(Arg&& arg, const shape<OutDims>& out_shape) + : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, in_shape(ArgTraits::shapeof(arg)), out_shape(out_shape) { } }; -template <typename Arg, index_t OutDims> -KFR_INTRINSIC xreshape<Arg, OutDims> reshape(Arg&& arg, const shape<OutDims>& out_shape) +template <typename Arg, index_t OutDims, KFR_ACCEPT_EXPRESSIONS(Arg)> +KFR_INTRINSIC expression_reshape<Arg, OutDims> reshape(Arg&& arg, const shape<OutDims>& out_shape) { return { std::forward<Arg>(arg), out_shape }; } template <typename Arg, index_t OutDims> -struct expression_traits<xreshape<Arg, OutDims>> : expression_traits_defaults +struct expression_traits<expression_reshape<Arg, OutDims>> : expression_traits_defaults { using ArgTraits = expression_traits<Arg>; @@ -515,7 +559,7 @@ struct expression_traits<xreshape<Arg, OutDims>> : expression_traits_defaults constexpr static size_t dims = OutDims; constexpr static bool random_access = ArgTraits::random_access; - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xreshape<Arg, OutDims>& self) + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_reshape<Arg, OutDims>& self) { return self.out_shape; } @@ -526,10 +570,10 @@ inline namespace CMT_ARCH_NAME { template <typename Arg, index_t outdims, index_t Axis, size_t N, - typename Traits = expression_traits<xreshape<Arg, outdims>>, + typename Traits = expression_traits<expression_reshape<Arg, outdims>>, typename T = typename Traits::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xreshape<Arg, outdims>& self, const shape<Traits::dims>& index, - const axis_params<Axis, N>& sh) +KFR_INTRINSIC vec<T, N> get_elements(const expression_reshape<Arg, outdims>& self, + const shape<Traits::dims>& index, const axis_params<Axis, N>& sh) { using ArgTraits = typename Traits::ArgTraits; constexpr index_t indims = ArgTraits::dims; @@ -581,9 +625,9 @@ KFR_INTRINSIC vec<T, N> get_elements(const xreshape<Arg, outdims>& self, const s } template <typename Arg, index_t outdims, index_t Axis, size_t N, - typename Traits = expression_traits<xreshape<Arg, outdims>>, + typename Traits = expression_traits<expression_reshape<Arg, outdims>>, typename T = typename Traits::value_type> -KFR_INTRINSIC void set_elements(xreshape<Arg, outdims>& self, const shape<Traits::dims>& index, +KFR_INTRINSIC void set_elements(expression_reshape<Arg, outdims>& self, const shape<Traits::dims>& index, const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value) { using ArgTraits = typename Traits::ArgTraits; @@ -631,44 +675,84 @@ KFR_INTRINSIC void set_elements(xreshape<Arg, outdims>& self, const shape<Traits // ---------------------------------------------------------------------------- +struct symmetric_linspace_t +{ +}; +constexpr inline const symmetric_linspace_t symmetric_linspace{}; + template <typename T, bool truncated = true> -struct xlinspace +struct expression_linspace { T start; T stop; index_t size; bool endpoint; + T invsize; + + expression_linspace(T start, T stop, size_t size, bool endpoint = false) + : start(start), stop(stop), size(size), invsize(1.0 / T(endpoint ? size - 1 : size)) + { + } + + expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false) + : expression_linspace(-symsize, +symsize, size, endpoint) + { + } }; template <typename T, bool truncated> -struct expression_traits<xlinspace<T, truncated>> : expression_traits_defaults +struct expression_traits<expression_linspace<T, truncated>> : expression_traits_defaults { using value_type = T; constexpr static size_t dims = 1; - constexpr static shape<dims> shapeof(const xlinspace<T, truncated>& self) + constexpr static shape<dims> shapeof(const expression_linspace<T, truncated>& self) { return shape<dims>(truncated ? self.size : infinite_size); } constexpr static shape<dims> shapeof() { return shape<dims>(truncated ? undefined_size : infinite_size); } }; -template <bool truncated = false, typename T1, typename T2, typename Tout = std::common_type_t<T1, T2>> -KFR_INTRINSIC xlinspace<Tout, truncated> linspace(T1 start, T2 stop, size_t size, bool endpoint = false) +/** @brief Returns evenly spaced numbers over a specified interval. + * + * @param start The starting value of the sequence + * @param stop The end value of the sequence. if ``endpoint`` is ``false``, the last value is excluded + * @param size Number of samples to generate + * @param endpoint If ``true``, ``stop`` is the last sample. Otherwise, it is not included + * @tparam truncated If ``true``, linspace returns exactly size elements, otherwise, returns infinite sequence + * @tparam precise No longer used since KFR5, calculations are always precise + */ +template <typename T = void, bool precise = false, bool truncated = false, typename T1, typename T2, + typename Tout = or_type<T, ftype<std::common_type_t<T1, T2>>>> +KFR_INTRINSIC expression_linspace<Tout, truncated> linspace(T1 start, T2 stop, size_t size, + bool endpoint = false, cbool_t<truncated> = {}) { return { static_cast<Tout>(start), static_cast<Tout>(stop), size, endpoint }; } +/** @brief Returns evenly spaced numbers over a specified interval. + * + * @param symsize The sequence will have interval [-symsize..symsize] + * @param size Number of samples to generate + * @tparam truncated If ``true``, linspace returns exactly size elements, otherwise, returns infinite sequence + * @tparam precise No longer used since KFR5, calculations are always precise + */ +template <typename T, bool precise = false, bool truncated = false, typename Tout = ftype<T>> +KFR_INTRINSIC expression_linspace<Tout, truncated> symmlinspace(T symsize, size_t size, + cbool_t<truncated> = {}) +{ + return { symmetric_linspace, static_cast<Tout>(symsize), size, true }; +} + inline namespace CMT_ARCH_NAME { template <typename T, bool truncated, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xlinspace<T, truncated>& self, const shape<1>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_linspace<T, truncated>& self, const shape<1>& index, const axis_params<0, N>&) { - T acc = self.start; using TI = itype<T>; - return mix(enumerate(vec_shape<T, N>(), static_cast<T>(static_cast<TI>(index))) * self.invsize, + return mix((enumerate(vec_shape<T, N>()) + static_cast<T>(static_cast<TI>(index.front()))) * self.invsize, self.start, self.stop); } } // namespace CMT_ARCH_NAME @@ -676,22 +760,22 @@ KFR_INTRINSIC vec<T, N> get_elements(const xlinspace<T, truncated>& self, const // ---------------------------------------------------------------------------- template <typename Arg1, typename Arg2, index_t ConcatAxis> -struct xconcatenate : public xwitharguments<Arg1, Arg2> +struct expression_concatenate : public expression_with_arguments<Arg1, Arg2> { static_assert(expression_dims<Arg1> == expression_dims<Arg2>); static_assert(std::is_same_v<expression_value_type<Arg1>, expression_value_type<Arg2>>); constexpr static index_t dims = expression_dims<Arg1>; shape<dims> size1; - KFR_MEM_INTRINSIC xconcatenate(Arg1&& arg1, Arg2&& arg2) - : xwitharguments<Arg1, Arg2>{ std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) }, + KFR_MEM_INTRINSIC expression_concatenate(Arg1&& arg1, Arg2&& arg2) + : expression_with_arguments<Arg1, Arg2>{ std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) }, size1(expression_traits<Arg1>::shapeof(arg1)) { } }; template <typename Arg1, typename Arg2, index_t ConcatAxis> -struct expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>> : expression_traits_defaults +struct expression_traits<expression_concatenate<Arg1, Arg2, ConcatAxis>> : expression_traits_defaults { using ArgTraits1 = expression_traits<Arg1>; using ArgTraits2 = expression_traits<Arg2>; @@ -708,9 +792,11 @@ struct expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>> : expression_trai return result; } - KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xconcatenate<Arg1, Arg2, ConcatAxis>& self) + KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof( + const expression_concatenate<Arg1, Arg2, ConcatAxis>& self) { - return concat_shape(ArgTraits1::shapeof(std::get<0>(self)), ArgTraits2::shapeof(std::get<1>(self))); + return concat_shape(ArgTraits1::shapeof(std::get<0>(self.args)), + ArgTraits2::shapeof(std::get<1>(self.args))); } KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { @@ -719,16 +805,15 @@ struct expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>> : expression_trai }; template <index_t ConcatAxis = 0, typename Arg1, typename Arg2, KFR_ACCEPT_EXPRESSIONS(Arg1, Arg2)> -KFR_INTRINSIC xconcatenate<Arg1, Arg2, ConcatAxis> concatenate(Arg1&& arg1, Arg2&& arg2) +KFR_INTRINSIC expression_concatenate<Arg1, Arg2, ConcatAxis> concatenate(Arg1&& arg1, Arg2&& arg2) { return { std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) }; } template <index_t ConcatAxis = 0, typename Arg1, typename Arg2, typename Arg3, KFR_ACCEPT_EXPRESSIONS(Arg1, Arg2, Arg3)> -KFR_INTRINSIC xconcatenate<Arg1, xconcatenate<Arg2, Arg3, ConcatAxis>, ConcatAxis> concatenate(Arg1&& arg1, - Arg2&& arg2, - Arg3&& arg3) +KFR_INTRINSIC expression_concatenate<Arg1, expression_concatenate<Arg2, Arg3, ConcatAxis>, ConcatAxis> +concatenate(Arg1&& arg1, Arg2&& arg2, Arg3&& arg3) { return { std::forward<Arg1>(arg1), { std::forward<Arg2>(arg2), std::forward<Arg3>(arg3) } }; } @@ -737,11 +822,11 @@ inline namespace CMT_ARCH_NAME { template <typename Arg1, typename Arg2, index_t ConcatAxis, index_t NDims, index_t Axis, size_t N, - typename T = typename expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>>::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xconcatenate<Arg1, Arg2, ConcatAxis>& self, + typename T = typename expression_traits<expression_concatenate<Arg1, Arg2, ConcatAxis>>::value_type> +KFR_INTRINSIC vec<T, N> get_elements(const expression_concatenate<Arg1, Arg2, ConcatAxis>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh) { - const index_t size1 = self.size1; + const shape<NDims> size1 = self.size1; constexpr index_t Naxis = ConcatAxis == Axis ? N : 1; if (index[ConcatAxis] >= size1[ConcatAxis]) { @@ -773,8 +858,133 @@ KFR_INTRINSIC vec<T, N> get_elements(const xconcatenate<Arg1, Arg2, ConcatAxis>& } } -} // namespace CMT_ARCH_NAME +// ---------------------------------------------------------------------------- + +template <typename... Args> +using expression_pack = expression_function<fn::packtranspose, Args...>; + +template <typename... Args, KFR_ACCEPT_EXPRESSIONS(Args...)> +KFR_INTRINSIC expression_pack<Args...> pack(Args&&... args) +{ + return { std::forward<Args>(args)... }; +} + +namespace internal +{ +template <typename... Args, index_t Axis, size_t N, + typename Tr = expression_traits<expression_function<fn::packtranspose, Args...>>, size_t... Indices> +KFR_INTRINSIC void set_elements_packed(expression_function<fn::packtranspose, Args...>& self, + shape<Tr::dims> index, axis_params<Axis, N> sh, + const vec<typename Tr::value_type, N>& x, csizes_t<Indices...>) +{ + constexpr size_t count = sizeof...(Args); + using ST = subtype<typename Tr::value_type>; + const vec<vec<ST, N>, count> xx = vec<vec<ST, N>, count>::from_flatten(transpose<count>(flatten(x))); + (set_elements(std::get<Indices>(self.args), index, sh, xx[Indices]), ...); +} +} // namespace internal + +template <typename... Args, index_t Axis, size_t N, + typename Tr = expression_traits<expression_function<fn::packtranspose, Args...>>> +KFR_INTRINSIC void set_elements(expression_function<fn::packtranspose, Args...>& self, shape<Tr::dims> index, + axis_params<Axis, N> sh, const identity<vec<typename Tr::value_type, N>>& x) +{ + internal::set_elements_packed(self, index, sh, x, csizeseq<sizeof...(Args)>); +} // ---------------------------------------------------------------------------- +template <typename... E> +struct expression_unpack : expression_with_arguments<E...>, expression_traits_defaults +{ + constexpr static size_t count = sizeof...(E); + + using first_arg_traits = typename expression_with_arguments<E...>::first_arg_traits; + + constexpr static index_t dims = first_arg_traits::dims; + using first_value_type = typename first_arg_traits::value_type; + + using value_type = vec<first_value_type, count>; + + static_assert(((expression_dims<E> == dims) && ...)); + static_assert(((std::is_same_v<expression_value_type<E>, first_value_type>)&&...)); + + constexpr static shape<dims> shapeof(const expression_unpack& self) + { + return first_arg_traits::shapeof(self.first()); + } + constexpr static shape<dims> shapeof() { return first_arg_traits::shapeof(); } + + expression_unpack(E&&... e) : expression_with_arguments<E...>(std::forward<E>(e)...) {} + + template <index_t Axis, size_t N> + KFR_INTRINSIC friend void set_elements(expression_unpack& self, shape<dims> index, + axis_params<Axis, N> sh, const identity<vec<value_type, N>>& x) + { + self.output(index, sh, x, csizeseq<count>); + } + + template <typename Input, KFR_ACCEPT_EXPRESSIONS(Input)> + KFR_MEM_INTRINSIC expression_unpack& operator=(Input&& input) + { + process(*this, std::forward<Input>(input)); + return *this; + } + +private: + template <index_t Axis, size_t N, size_t... indices> + KFR_MEM_INTRINSIC void output(shape<dims> index, axis_params<Axis, N> sh, const vec<value_type, N>& x, + csizes_t<indices...>) + { + const vec<vec<first_value_type, N>, count> xx = + vec<vec<first_value_type, N>, count>::from_flatten(transpose<count>(flatten(x))); + (set_elements(std::get<indices>(this->args), index, sh, xx[indices]), ...); + } +}; + +// ---------------------------------------------------------------------------- + +template <typename... E, enable_if_output_expressions<E...>* = nullptr> +KFR_FUNCTION expression_unpack<E...> unpack(E&&... e) +{ + return expression_unpack<E...>(std::forward<E>(e)...); +} + +// ---------------------------------------------------------------------------- + +template <typename Fn, typename E> +struct expression_adjacent : expression_with_traits<E> +{ + using value_type = typename expression_with_traits<E>::value_type; + constexpr static inline index_t dims = expression_with_traits<E>::dims; + constexpr static inline bool random_access = false; + + expression_adjacent(Fn&& fn, E&& e) + : expression_with_traits<E>(std::forward<E>(e)), fn(std::forward<Fn>(fn)) + { + } + + template <size_t N, index_t VecAxis> + KFR_INTRINSIC friend vec<value_type, N> get_elements(const expression_adjacent& self, shape<dims> index, + axis_params<VecAxis, N> sh) + { + const vec<value_type, N> in = get_elements(self.first(), index, sh); + const vec<value_type, N> delayed = insertleft(self.data, in); + self.data = in.back(); + return self.fn(in, delayed); + } + Fn fn; + mutable value_type data = value_type(0); +}; + +/** + * @brief Returns template expression that returns the result of calling \f$ fn(x_i, x_{i-1}) \f$ + */ +template <typename Fn, typename E1> +KFR_INTRINSIC expression_adjacent<Fn, E1> adjacent(Fn&& fn, E1&& e1) +{ + return expression_adjacent<Fn, E1>(std::forward<Fn>(fn), std::forward<E1>(e1)); +} + +} // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/base/conversion.hpp b/include/kfr/base/conversion.hpp @@ -183,7 +183,7 @@ template <typename Tout, typename Tin, typename Tout_traits = audio_sample_trait inline Tout convert_sample(const Tin& in) { constexpr auto scale = Tout_traits::scale / Tin_traits::scale; - return innercast<Tout>(clamp(in * scale, -Tout_traits::scale, +Tout_traits::scale)); + return broadcastto<Tout>(clamp(in * scale, -Tout_traits::scale, +Tout_traits::scale)); } /// @brief Deinterleaves and converts audio samples @@ -264,20 +264,24 @@ void convert(Tout* out, const Tin* in, size_t size) template <typename Tout, typename Tout_traits = audio_sample_traits<Tout>> void convert(Tout* out, const void* in, audio_sample_type in_type, size_t size) { - cswitch(audio_sample_type_clist{}, in_type, [&](auto t) { - using type = typename audio_sample_get_type<val_of(decltype(t)())>::type; - convert(out, reinterpret_cast<const type*>(in), size); - }); + cswitch(audio_sample_type_clist{}, in_type, + [&](auto t) + { + using type = typename audio_sample_get_type<val_of(decltype(t)())>::type; + convert(out, reinterpret_cast<const type*>(in), size); + }); } /// @brief Converts audio samples (output format is known at runtime) template <typename Tin, typename Tin_traits = audio_sample_traits<Tin>> void convert(void* out, audio_sample_type out_type, const Tin* in, size_t size) { - cswitch(audio_sample_type_clist{}, out_type, [&](auto t) { - using type = typename audio_sample_get_type<val_of(decltype(t)())>::type; - convert(reinterpret_cast<type*>(out), in, size); - }); + cswitch(audio_sample_type_clist{}, out_type, + [&](auto t) + { + using type = typename audio_sample_get_type<val_of(decltype(t)())>::type; + convert(reinterpret_cast<type*>(out), in, size); + }); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -81,6 +81,10 @@ constexpr inline shape<expression_dims<T>> shapeof() } template <typename T> +struct expression_traits<const T, std::void_t<expression_value_type<T>>> : expression_traits<T> +{ +}; +template <typename T> struct expression_traits<T&, std::void_t<expression_value_type<T>>> : expression_traits<T> { }; @@ -98,6 +102,19 @@ struct expression_traits<const T&&, std::void_t<typename expression_traits<T>::v { }; +// This allows old style expressions+traits +template <typename T> +struct expression_traits<T, std::void_t<decltype(T::random_access), decltype(T::shapeof())>> +{ + using value_type = typename T::value_type; + constexpr static size_t dims = T::dims; + constexpr static shape<dims> shapeof(const T& self) { return T::shapeof(self); } + constexpr static shape<dims> shapeof() { return T::shapeof(); } + + constexpr static inline bool explicit_operand = T::explicit_operand; + constexpr static inline bool random_access = T::random_access; +}; + struct expression_traits_defaults { // using value_type = accepts_any; @@ -166,6 +183,9 @@ constexpr inline bool is_input_output_expression<E, enable_if_input_output_expre template <typename T> constexpr inline bool is_expr_element = std::is_same_v<std::remove_cv_t<T>, T>&& is_vec_element<T>; +template <typename E> +constexpr inline bool is_infinite = expression_traits<E>::shapeof().has_infinity(); + template <typename T> struct expression_traits<T, std::enable_if_t<is_expr_element<T>>> : expression_traits_defaults { @@ -177,8 +197,29 @@ struct expression_traits<T, std::enable_if_t<is_expr_element<T>>> : expression_t KFR_MEM_INTRINSIC constexpr static shape<0> shapeof() { return {}; } }; +namespace internal_generic +{ +struct anything +{ + template <typename Expr> + constexpr anything(Expr&&) + { + } +}; +} // namespace internal_generic + inline namespace CMT_ARCH_NAME { + +template <index_t Dims> +KFR_INTRINSIC void begin_pass(const internal_generic::anything&, shape<Dims> start, shape<Dims> stop) +{ +} +template <index_t Dims> +KFR_INTRINSIC void end_pass(const internal_generic::anything&, shape<Dims> start, shape<Dims> stop) +{ +} + template <typename T, index_t Axis, size_t N, KFR_ENABLE_IF(is_expr_element<std::decay_t<T>>)> KFR_INTRINSIC vec<std::decay_t<T>, N> get_elements(T&& self, const shape<0>& index, const axis_params<Axis, N>&) @@ -193,10 +234,313 @@ KFR_INTRINSIC void set_elements(T& self, const shape<0>& index, const axis_param static_assert(!std::is_const_v<T>); self = val.front(); } -} // namespace CMT_ARCH_NAME -inline namespace CMT_ARCH_NAME +template <typename... Args> +struct expression_with_arguments +{ + constexpr static size_t count = sizeof...(Args); + + using type_list = ctypes_t<Args...>; + + template <size_t idx> + using nth = typename type_list::template nth<idx>; + + using first_arg = typename type_list::template nth<0>; + + template <size_t idx> + using nth_trait = expression_traits<typename type_list::template nth<idx>>; + + using first_arg_traits = expression_traits<first_arg>; + + std::tuple<Args...> args; + std::array<dimset, count> masks; + + KFR_INTRINSIC auto& first() { return std::get<0>(args); } + KFR_INTRINSIC const auto& first() const { return std::get<0>(args); } + + template <size_t idx> + KFR_INTRINSIC dimset getmask(csize_t<idx> = {}) const + { + static_assert(idx < count); + using Traits = expression_traits<nth<idx>>; + if constexpr (sizeof...(Args) <= 1 || Traits::dims == 0) + { + return -1; + } + else + { + constexpr shape<Traits::dims> sh = Traits::shapeof(); + if constexpr (sh.cproduct() > 0) + { + return sh.tomask(); + } + else + { + return std::get<idx>(masks); + } + } + } + + template <typename Fn> + KFR_INTRINSIC constexpr auto fold(Fn&& fn) const + { + return fold_impl(std::forward<Fn>(fn), csizeseq<count>); + } + template <typename Fn> + KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn) + { + return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>); + } + + KFR_INTRINSIC expression_with_arguments(Args&&... args) : args{ std::forward<Args>(args)... } + { + cforeach(csizeseq<count>, + [&](auto idx_) CMT_INLINE_LAMBDA + { + constexpr size_t idx = val_of(decltype(idx_)()); + shape sh = expression_traits<nth<idx>>::shapeof(std::get<idx>(this->args)); + masks[idx] = sh.tomask(); + }); + } + +private: + template <typename Fn, size_t... indices> + KFR_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const + { + return fn(std::get<indices>(args)...); + } + template <typename Fn, size_t... indices> + KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>) + { + return fn(csize<indices>...); + } +}; + +template <typename Arg> +struct expression_with_arguments<Arg> +{ + constexpr static size_t count = 1; + + using type_list = ctypes_t<Arg>; + + template <size_t idx> + using nth = Arg; + + using first_arg = Arg; + + template <size_t idx> + using nth_trait = expression_traits<Arg>; + + using first_arg_traits = expression_traits<first_arg>; + + std::tuple<Arg> args; + + KFR_MEM_INTRINSIC auto& first() { return std::get<0>(args); } + KFR_MEM_INTRINSIC const auto& first() const { return std::get<0>(args); } + + template <size_t idx> + KFR_MEM_INTRINSIC dimset getmask(csize_t<idx> = {}) const + { + return -1; + } + + template <typename Fn> + KFR_MEM_INTRINSIC constexpr auto fold(Fn&& fn) const + { + return fold_impl(std::forward<Fn>(fn), csizeseq<count>); + } + template <typename Fn> + KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn) + { + return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>); + } + + KFR_MEM_INTRINSIC expression_with_arguments(Arg&& arg) : args{ std::forward<Arg>(arg) } {} + +private: + template <typename Fn, size_t... indices> + KFR_MEM_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const + { + return fn(std::get<indices>(args)...); + } + template <typename Fn, size_t... indices> + KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>) + { + return fn(csize<indices>...); + } +}; + +template <typename... Args> +expression_with_arguments(Args&&... args) -> expression_with_arguments<Args...>; + +template <typename Arg> +struct expression_with_traits : expression_with_arguments<Arg>, expression_traits_defaults +{ + using first_arg_traits = expression_traits<Arg>; + using value_type = typename first_arg_traits::value_type; + constexpr static size_t dims = first_arg_traits::dims; + constexpr static shape<dims> shapeof(const expression_with_traits& self) + { + return first_arg_traits::shapeof(self.first()); + } + constexpr static shape<dims> shapeof() { return first_arg_traits::shapeof(); } + + using expression_with_arguments<Arg>::expression_with_arguments; +}; + +template <typename Fn, typename... Args> +struct expression_function : expression_with_arguments<Args...>, expression_traits_defaults +{ + using value_type = + typename std::invoke_result_t<Fn, + vec<typename expression_traits<Args>::value_type, 1>...>::value_type; + constexpr static size_t dims = const_max(expression_traits<Args>::dims...); + + constexpr static shape<dims> shapeof(const expression_function& self) + { + return self.fold([&](auto&&... args) CMT_INLINE_LAMBDA constexpr->auto { + return internal_generic::common_shape(expression_traits<decltype(args)>::shapeof(args)...); + }); + } + constexpr static shape<dims> shapeof() + { + return expression_function<Fn, + Args...>::fold_idx([&](auto... args) CMT_INLINE_LAMBDA constexpr->auto { + return internal_generic::common_shape( + expression_traits< + typename expression_function::template nth<val_of(decltype(args)())>>::shapeof()...); + }); + } + + constexpr static inline bool random_access = (expression_traits<Args>::random_access && ...); + + Fn fn; + + KFR_MEM_INTRINSIC expression_function(expression_with_arguments<Args...> args, Fn&& fn) + : expression_with_arguments<Args...>{ std::move(args) }, fn(std::forward<Fn>(fn)) + { + check_shapes(); + } + KFR_MEM_INTRINSIC expression_function(Fn&& fn, Args&&... args) + : expression_with_arguments<Args...>{ std::forward<Args>(args)... }, fn(std::forward<Fn>(fn)) + { + check_shapes(); + } + KFR_MEM_INTRINSIC expression_function(Args&&... args) + : expression_with_arguments<Args...>{ std::forward<Args>(args)... }, fn{} + { + check_shapes(); + } + KFR_MEM_INTRINSIC void check_shapes() + { + if constexpr (dims > 0) + { + shape<dims> sh = shapeof(*this); + if (sh == shape<dims>(0)) + { + // throw std::runtime_error("KFR: Invalid shapes in expression_function"); + } + } + } + + template <typename In, enable_if_input_expression<In>* = nullptr> + expression_function& operator=(In&& in) + { + static_assert(is_output_expression<expression_function>); + process(*this, std::forward<In>(in)); + return *this; + } +}; + +template <typename... Args, typename Fn> +expression_function(const expression_with_arguments<Args...>& args, Fn&& fn) + -> expression_function<Fn, Args...>; +template <typename... Args, typename Fn> +expression_function(expression_with_arguments<Args...>&& args, Fn&& fn) -> expression_function<Fn, Args...>; +template <typename... Args, typename Fn> +expression_function(expression_with_arguments<Args...>& args, Fn&& fn) -> expression_function<Fn, Args...>; + +namespace internal +{ + +template <typename... Args, index_t Dims, size_t... idx> +KFR_INTRINSIC void begin_pass_args(const expression_with_arguments<Args...>& self, shape<Dims> start, + shape<Dims> stop, csizes_t<idx...>) +{ + (begin_pass(std::get<idx>(self.args), start, stop), ...); +} + +template <typename... Args, index_t Dims, size_t... idx> +KFR_INTRINSIC void end_pass_args(const expression_with_arguments<Args...>& self, shape<Dims> start, + shape<Dims> stop, csizes_t<idx...>) +{ + (end_pass(std::get<idx>(self.args), start, stop), ...); +} + +template <index_t outdims, typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, size_t idx, + typename Traits = expression_traits<typename expression_function<Fn, Args...>::template nth<idx>>> +KFR_MEM_INTRINSIC vec<typename Traits::value_type, N> get_arg(const expression_function<Fn, Args...>& self, + const shape<Dims>& index, + const axis_params<Axis, N>& sh, csize_t<idx>) +{ + if constexpr (Traits::dims == 0) + { + return repeat<N>(get_elements(std::get<idx>(self.args), {}, axis_params<Axis, 1>{})); + } + else + { + auto indices = internal_generic::adapt<Traits::dims>(index, self.getmask(csize<idx>)); + constexpr index_t last_dim = Traits::shapeof().back(); + if constexpr (last_dim != undefined_size) + { + constexpr index_t last_dim_pot = prev_poweroftwo(last_dim); + return repeat<N / std::min(last_dim_pot, N)>(get_elements( + std::get<idx>(self.args), indices, axis_params<Axis, std::min(last_dim_pot, N)>{})); + } + else + { + if constexpr (sizeof...(Args) > 1 && N > 1) + { + if (CMT_UNLIKELY(self.masks[idx].back() == 0)) + return get_elements(std::get<idx>(self.args), indices, axis_params<Axis, 1>{}).front(); + else + return get_elements(std::get<idx>(self.args), indices, sh); + } + else + { + return get_elements(std::get<idx>(self.args), indices, sh); + } + } + } +} +} // namespace internal + +template <typename... Args, index_t Dims> +KFR_INTRINSIC void begin_pass(const expression_with_arguments<Args...>& self, shape<Dims> start, + shape<Dims> stop) { + internal::begin_pass_args(self, start, stop, indicesfor<Args...>); +} + +template <typename... Args, index_t Dims> +KFR_INTRINSIC void end_pass(const expression_with_arguments<Args...>& self, shape<Dims> start, + shape<Dims> stop) +{ + internal::end_pass_args(self, start, stop, indicesfor<Args...>); +} + +template <typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, + typename Tr = expression_traits<expression_function<Fn, Args...>>, + typename T = typename Tr::value_type> +KFR_INTRINSIC vec<T, N> get_elements(const expression_function<Fn, Args...>& self, const shape<Dims>& index, + const axis_params<Axis, N>& sh) +{ + constexpr index_t outdims = Tr::dims; + return self.fold_idx( + [&](auto... idx) CMT_INLINE_LAMBDA -> vec<T, N> { + return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...); + }); +} template <typename Out, typename In, index_t OutAxis, size_t w, size_t gw, typename Tin, index_t outdims, index_t indims> @@ -234,8 +578,10 @@ KFR_INTRINSIC static void tprocess_body(Out&& out, In&& in, size_t start, size_t { outidx[OutAxis] = x; inidx[InAxis] = std::min(x, insize - 1); + auto v = get_elements(in, inidx, axis_params_v<InAxis, w>); + // println("## i=", x, "\n", v); set_elements(out, outidx, axis_params_v<OutAxis, w>, - get_elements(in, inidx, axis_params_v<InAxis, w>)); + v ); } } CMT_LOOP_NOUNROLL @@ -253,7 +599,14 @@ template <size_t width = 0, index_t Axis = 0, typename Out, typename In, size_t CMT_ENABLE_IF(expression_traits<Out>::dims == 0)> static auto process(Out&& out, In&& in, shape<0> = {}, shape<0> = {}, csize_t<gw> = {}) -> shape<0> { + static_assert(is_input_expression<In>, "In must be an input expression"); + static_assert(is_output_expression<Out>, "Out must be an output expression"); + static_assert(expression_traits<In>::dims == 0); + begin_pass(out, shape{}, shape{}); + begin_pass(in, shape{}, shape{}); set_elements(out, shape<0>{}, axis_params_v<0, 1>, get_elements(in, shape<0>{}, axis_params_v<0, 1>)); + end_pass(in, shape{}, shape{}); + end_pass(out, shape{}, shape{}); return {}; } @@ -305,6 +658,9 @@ template <size_t width = 0, index_t Axis = infinite_size, typename Out, typename static auto process(Out&& out, In&& in, shape<outdims> start = shape<outdims>(0), shape<outdims> size = shape<outdims>(infinite_size), csize_t<gw> = {}) -> shape<outdims> { + static_assert(is_input_expression<In>, "In must be an input expression"); + static_assert(is_output_expression<Out>, "Out must be an output expression"); + using Trin = expression_traits<In>; using Trout = expression_traits<Out>; using Tin = typename Trin::value_type; @@ -332,12 +688,15 @@ static auto process(Out&& out, In&& in, shape<outdims> start = shape<outdims>(0) const shape<indims> inshape = Trin::shapeof(in); if (CMT_UNLIKELY(!internal_generic::can_assign_from(outshape, inshape))) return shape<outdims>{ 0 }; - shape<outdims> stop = min(add_shape(start, size), outshape); + shape<outdims> stop = min(min(add_shape(start, size), outshape), inshape.template extend<outdims>()); index_t in_size = 0; if constexpr (indims > 0) in_size = inshape[in_axis]; + begin_pass(out, start, stop); + begin_pass(in, inshape.adapt(start), inshape.adapt(stop)); + shape<outdims> outidx; if constexpr (outdims == 1) { @@ -407,255 +766,15 @@ static auto process(Out&& out, In&& in, shape<outdims> start = shape<outdims>(0) outidx[out_axis] = stop[out_axis] - 1; } while (internal_generic::increment_indices(outidx, start, stop)); } + end_pass(in, inshape.adapt(start), inshape.adapt(stop)); + end_pass(out, start, stop); return stop; } -} // namespace CMT_ARCH_NAME - -template <typename... Args> -struct xwitharguments -{ - constexpr static size_t count = sizeof...(Args); - - using type_list = ctypes_t<Args...>; - - template <size_t idx> - using nth = typename type_list::template nth<idx>; - - using first_arg = typename type_list::template nth<0>; - - template <size_t idx> - using nth_trait = expression_traits<typename type_list::template nth<idx>>; - - using first_arg_trait = expression_traits<first_arg>; - - std::tuple<Args...> args; - std::array<dimset, count> masks; - - KFR_INTRINSIC auto& first() { return std::get<0>(args); } - KFR_INTRINSIC const auto& first() const { return std::get<0>(args); } - - template <size_t idx> - KFR_INTRINSIC dimset getmask(csize_t<idx> = {}) const - { - static_assert(idx < count); - using Traits = expression_traits<nth<idx>>; - if constexpr (sizeof...(Args) <= 1 || Traits::dims == 0) - { - return -1; - } - else - { - constexpr shape<Traits::dims> sh = Traits::shapeof(); - if constexpr (sh.cproduct() > 0) - { - return sh.tomask(); - } - else - { - return std::get<idx>(masks); - } - } - } - - template <typename Fn> - KFR_INTRINSIC constexpr auto fold(Fn&& fn) const - { - return fold_impl(std::forward<Fn>(fn), csizeseq<count>); - } - template <typename Fn> - KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn) - { - return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>); - } - - KFR_INTRINSIC xwitharguments(Args&&... args) : args{ std::forward<Args>(args)... } - { - cforeach(csizeseq<count>, - [&](auto idx_) CMT_INLINE_LAMBDA - { - constexpr size_t idx = val_of(decltype(idx_)()); - shape sh = expression_traits<nth<idx>>::shapeof(std::get<idx>(this->args)); - masks[idx] = sh.tomask(); - }); - } - -private: - template <typename Fn, size_t... indices> - KFR_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const - { - return fn(std::get<indices>(args)...); - } - template <typename Fn, size_t... indices> - KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>) - { - return fn(csize<indices>...); - } -}; - -template <typename Arg> -struct xwitharguments<Arg> -{ - constexpr static size_t count = 1; - - using type_list = ctypes_t<Arg>; - - template <size_t idx> - using nth = Arg; - - using first_arg = Arg; - - template <size_t idx> - using nth_trait = expression_traits<Arg>; - - using first_arg_trait = expression_traits<first_arg>; - - std::tuple<Arg> args; - - KFR_MEM_INTRINSIC auto& first() { return std::get<0>(args); } - KFR_MEM_INTRINSIC const auto& first() const { return std::get<0>(args); } - - template <size_t idx> - KFR_MEM_INTRINSIC dimset getmask(csize_t<idx> = {}) const - { - return -1; - } - - template <typename Fn> - KFR_MEM_INTRINSIC constexpr auto fold(Fn&& fn) const - { - return fold_impl(std::forward<Fn>(fn), csizeseq<count>); - } - template <typename Fn> - KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn) - { - return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>); - } - - KFR_MEM_INTRINSIC xwitharguments(Arg&& arg) : args{ std::forward<Arg>(arg) } {} - -private: - template <typename Fn, size_t... indices> - KFR_MEM_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const - { - return fn(std::get<indices>(args)...); - } - template <typename Fn, size_t... indices> - KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>) - { - return fn(csize<indices>...); - } -}; - -template <typename... Args> -xwitharguments(Args&&... args) -> xwitharguments<Args...>; - -template <typename Fn, typename... Args> -struct xfunction : public xwitharguments<Args...> -{ - Fn fn; - - KFR_MEM_INTRINSIC xfunction(xwitharguments<Args...> args, Fn&& fn) - : xwitharguments<Args...>{ std::move(args) }, fn(std::move(fn)) - { - } - KFR_MEM_INTRINSIC xfunction(Fn&& fn, Args&&... args) - : xwitharguments<Args...>{ std::forward<Args>(args)... }, fn(std::move(fn)) - { - } -}; - -template <typename... Args, typename Fn> -xfunction(const xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>; -template <typename... Args, typename Fn> -xfunction(xwitharguments<Args...>&& args, Fn&& fn) -> xfunction<Fn, Args...>; -template <typename... Args, typename Fn> -xfunction(xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>; template <typename Fn, typename... Args> -struct expression_traits<xfunction<Fn, Args...>> : expression_traits_defaults -{ - using E = xfunction<Fn, Args...>; - - using value_type = - typename std::invoke_result_t<Fn, - vec<typename expression_traits<Args>::value_type, 1>...>::value_type; - constexpr static size_t dims = const_max(expression_traits<Args>::dims...); - - constexpr static shape<dims> shapeof(const E& self) - { - return self.fold([&](auto&&... args) CMT_INLINE_LAMBDA -> auto { - return internal_generic::common_shape(expression_traits<decltype(args)>::shapeof(args)...); - }); - } - constexpr static shape<dims> shapeof() - { - return xfunction<Fn, Args...>::fold_idx([&](auto... args) CMT_INLINE_LAMBDA -> auto { - return internal_generic::common_shape( - expression_traits<typename E::template nth<val_of(decltype(args)())>>::shapeof()...); - }); - } - - constexpr static inline bool random_access = (expression_traits<Args>::random_access && ...); -}; - -inline namespace CMT_ARCH_NAME -{ - -namespace internal +KFR_FUNCTION expression_function<decay<Fn>, Args...> bind_expression(Fn&& fn, Args&&... args) { -template <index_t outdims, typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, size_t idx, - typename Traits = expression_traits<typename xfunction<Fn, Args...>::template nth<idx>>> -KFR_MEM_INTRINSIC vec<typename Traits::value_type, N> get_arg(const xfunction<Fn, Args...>& self, - const shape<Dims>& index, - const axis_params<Axis, N>& sh, csize_t<idx>) -{ - if constexpr (Traits::dims == 0) - { - return repeat<N>(get_elements(std::get<idx>(self.args), {}, axis_params<Axis, 1>{})); - } - else - { - auto indices = internal_generic::adapt<Traits::dims>(index, self.getmask(csize<idx>)); - constexpr index_t last_dim = Traits::shapeof().back(); - if constexpr (last_dim > 0) - { - return repeat<N / std::min(last_dim, N)>( - get_elements(std::get<idx>(self.args), indices, axis_params<Axis, std::min(last_dim, N)>{})); - } - else - { - if constexpr (sizeof...(Args) > 1 && N > 1) - { - if (CMT_UNLIKELY(self.masks[idx].back() == 0)) - return get_elements(std::get<idx>(self.args), indices, axis_params<Axis, 1>{}).front(); - else - return get_elements(std::get<idx>(self.args), indices, sh); - } - else - { - return get_elements(std::get<idx>(self.args), indices, sh); - } - } - } -} -} // namespace internal - -template <typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, - typename Tr = expression_traits<xfunction<Fn, Args...>>, typename T = typename Tr::value_type> -KFR_INTRINSIC vec<T, N> get_elements(const xfunction<Fn, Args...>& self, const shape<Dims>& index, - const axis_params<Axis, N>& sh) -{ - constexpr index_t outdims = Tr::dims; - return self.fold_idx( - [&](auto... idx) CMT_INLINE_LAMBDA -> vec<T, N> { - return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...); - }); -} - -template <typename Fn, typename... Args> -KFR_FUNCTION xfunction<decay<Fn>, Args...> bind_expression(Fn&& fn, Args&&... args) -{ - return xfunction<decay<Fn>, Args...>(std::forward<Fn>(fn), std::forward<Args>(args)...); + return expression_function<decay<Fn>, Args...>(std::forward<Fn>(fn), std::forward<Args>(args)...); } /** * @brief Construct a new expression using the same function as in @c e and new arguments @@ -663,22 +782,79 @@ KFR_FUNCTION xfunction<decay<Fn>, Args...> bind_expression(Fn&& fn, Args&&... ar * @param args new arguments for the function */ template <typename Fn, typename... OldArgs, typename... NewArgs> -KFR_FUNCTION xfunction<Fn, NewArgs...> rebind(const xfunction<Fn, OldArgs...>& e, NewArgs&&... args) +KFR_FUNCTION expression_function<Fn, NewArgs...> rebind(const expression_function<Fn, OldArgs...>& e, + NewArgs&&... args) { - return xfunction<Fn, NewArgs...>(e.fn, std::forward<NewArgs>(args)...); + return expression_function<Fn, NewArgs...>(Fn{ e.fn }, std::forward<NewArgs>(args)...); } template <typename Fn, typename... OldArgs, typename... NewArgs> -KFR_FUNCTION xfunction<Fn, NewArgs...> rebind(xfunction<Fn, OldArgs...>&& e, NewArgs&&... args) +KFR_FUNCTION expression_function<Fn, NewArgs...> rebind(expression_function<Fn, OldArgs...>&& e, + NewArgs&&... args) +{ + return expression_function<Fn, NewArgs...>(std::move(e.fn), std::forward<NewArgs>(args)...); +} + +#ifdef KFR_TESTING +namespace internal { - return xfunction<Fn, NewArgs...>(std::move(e.fn), std::forward<NewArgs>(args)...); +template <typename T, size_t N, typename Fn> +inline vec<T, N> get_fn_value(size_t index, Fn&& fn) +{ + return apply(fn, enumerate<size_t, N>() + index); } +} // namespace internal -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::interleave, E1, E2> interleave(E1&& x, E2&& y) +template <typename E, typename Fn, KFR_ENABLE_IF(std::is_invocable_v<Fn, size_t>)> +void test_expression(const E& expr, size_t size, Fn&& fn, const char* expression = nullptr, + const char* file = nullptr, int line = 0) +{ + static_assert(expression_dims<E> == 1, "CHECK_EXPRESSION supports only 1-dim expressions"); + using T = expression_value_type<E>; + size_t expr_size = shapeof(expr).front(); + ::testo::test_case* test = ::testo::active_test(); + auto&& c = ::testo::make_comparison(); + test->check(c <= expr_size == size, expression, file, line); + if (expr_size != size) + return; + size = min(shape{ size }, shape{ 200 }).front(); + constexpr size_t maxsize = 2 + ilog2(vector_width<T> * 2); + size_t g = 1; + for (size_t i = 0; i < size;) + { + const size_t next_size = std::min(prev_poweroftwo(size - i), g); + g *= 2; + if (g > (1 << (maxsize - 1))) + g = 1; + + cswitch(csize<1> << csizeseq<maxsize>, next_size, + [&](auto x) + { + constexpr size_t nsize = val_of(decltype(x)()); + ::testo::scope s(as_string("i = ", i, " width = ", nsize)); + test->check(c <= get_elements(expr, shape<1>(i), axis_params_v<0, nsize>) == + internal::get_fn_value<T, nsize>(i, fn), + expression, file, line); + }); + i += next_size; + } +} + +template <typename E, typename T = expression_value_type<E>> +void test_expression(const E& expr, std::initializer_list<cometa::identity<T>> list, + const char* expression = nullptr, const char* file = nullptr, int line = 0) { - return xfunction<Fn, E1, E2>{ fn::interleave(), std::forward<E1>(x), std::forward<E2>(y) }; + test_expression( + expr, list.size(), [&](size_t i) { return list.begin()[i]; }, expression, file, line); } +#define TESTO_CHECK_EXPRESSION(expr, ...) ::kfr::test_expression(expr, __VA_ARGS__, #expr, __FILE__, __LINE__) + +#ifndef TESTO_NO_SHORT_MACROS +#define CHECK_EXPRESSION TESTO_CHECK_EXPRESSION +#endif +#endif + } // namespace CMT_ARCH_NAME + } // namespace kfr CMT_PRAGMA_GNU(GCC diagnostic pop) diff --git a/include/kfr/base/filter.hpp b/include/kfr/base/filter.hpp @@ -78,12 +78,12 @@ public: void apply(T* dest, const T* src, size_t size) { process_buffer(dest, src, size); } template <univector_tag Tag> - void apply(univector<T, Tag>& dest, const expression_pointer<T>& src) + void apply(univector<T, Tag>& dest, const expression_pointer<T, 1>& src) { process_expression(dest.data(), src, size_min(dest.size(), src.size())); } - void apply(T* dest, const expression_pointer<T>& src, size_t size) + void apply(T* dest, const expression_pointer<T, 1>& src, size_t size) { process_expression(dest, src, size_min(size, src.size())); } @@ -101,36 +101,36 @@ public: } protected: - virtual void process_buffer(T* dest, const T* src, size_t size) = 0; - virtual void process_expression(T* dest, const expression_pointer<T>& src, size_t size) = 0; + virtual void process_buffer(T* dest, const T* src, size_t size) = 0; + virtual void process_expression(T* dest, const expression_pointer<T, 1>& src, size_t size) = 0; }; template <typename T> class expression_filter : public filter<T> { public: - explicit expression_filter(expression_pointer<T>&& filter_expr) : filter_expr(std::move(filter_expr)) {} + explicit expression_filter(expression_pointer<T, 1> filter_expr) : filter_expr(std::move(filter_expr)) {} protected: void process_buffer(T* dest, const T* src, size_t size) override { substitute(filter_expr, to_pointer(make_univector(src, size))); - process(make_univector(dest, size), filter_expr, 0, size); + process(make_univector(dest, size), filter_expr, shape<1>(0), shape<1>(size)); } - void process_expression(T* dest, const expression_pointer<T>& src, size_t size) override + void process_expression(T* dest, const expression_pointer<T, 1>& src, size_t size) override { substitute(filter_expr, src); - process(make_univector(dest, size), filter_expr, 0, size); + process(make_univector(dest, size), filter_expr, shape<1>(0), shape<1>(size)); } - expression_pointer<T> filter_expr; + expression_pointer<T, 1> filter_expr; }; inline namespace CMT_ARCH_NAME { /// @brief Converts expression with placeholder to filter. Placeholder and filter must have the same type -template <typename E, typename T = value_type_of<E>> +template <typename E, typename T = expression_value_type<E>> KFR_INTRINSIC expression_filter<T> to_filter(E&& e) { return expression_filter<T>(to_pointer(std::move(e))); @@ -139,7 +139,7 @@ KFR_INTRINSIC expression_filter<T> to_filter(E&& e) /// @brief Converts expression with placeholder to filter. Placeholder and filter must have the same type template <typename T, typename E> -KFR_INTRINSIC expression_filter<T> to_filter(expression_pointer<T>&& e) +KFR_INTRINSIC expression_filter<T> to_filter(expression_pointer<T, 1>&& e) { return expression_filter<T>(std::move(e)); } diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp @@ -27,17 +27,29 @@ #include "../math/log_exp.hpp" #include "../math/sin_cos.hpp" +#include "../simd/complex.hpp" #include "../simd/impl/function.hpp" #include "../simd/select.hpp" #include "../simd/vec.hpp" +#include "expression.hpp" #include "shape.hpp" namespace kfr { +inline namespace CMT_ARCH_NAME +{ + template <typename T, size_t VecWidth, typename Class, typename Twork = T> -struct xgenerator +struct generator : public expression_traits_defaults { + using value_type = T; + constexpr static size_t dims = 1; + constexpr static shape<1> shapeof(const Class&) { return infinite_size; } + constexpr static shape<1> shapeof() { return infinite_size; } + + constexpr static inline bool random_access = false; + constexpr static size_t width = VecWidth; void resync(T start) const { ptr_cast<Class>(this)->sync(start); } @@ -53,10 +65,11 @@ struct xgenerator value = slice<N, width>(oldvalue, value); return result; } - else if (N > width) + else if constexpr (N > width) { - const vec lo = generate(low(x)); - const vec hi = generate(high(x)); + constexpr size_t Nlow = prev_poweroftwo(N - 1); + const vec lo = generate<Nlow>(); + const vec hi = generate<N - Nlow>(); return concat(lo, hi); } else // N == width @@ -68,6 +81,13 @@ struct xgenerator } mutable vec<Twork, width> value; + template <size_t N> + friend KFR_INTRINSIC vec<T, N> get_elements(const generator& self, const shape<1>& index, + const axis_params<0, N>&) + { + return self.template generate<N>(); + } + private: KFR_MEM_INTRINSIC void call_next() const { ptr_cast<Class>(this)->next(); } @@ -79,32 +99,10 @@ private: } }; -template <typename T, size_t VecWidth, typename Class> -struct expression_traits<xgenerator<T, VecWidth, Class>> : public expression_traits_defaults -{ - using value_type = T; - constexpr static size_t dims = 1; - constexpr static shape<1> shapeof(const T&) { return shape<1>(infinite_size); } - constexpr static shape<1> shapeof() { return shape<1>(infinite_size); } - - constexpr static inline bool explicit_operand = true; - constexpr static inline bool random_access = false; -}; - -inline namespace CMT_ARCH_NAME -{ -template <typename T, size_t VecWidth, typename Class, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xgenerator<T, VecWidth, Class>& self, const shape<1>& index, - const axis_params<0, N>&) -{ - return self.template generate<N>(); -} -} // namespace CMT_ARCH_NAME - template <typename T, size_t VecWidth = vector_capacity<T> / 8> -struct xgenlinear : public xgenerator<T, VecWidth, xgenlinear<T, VecWidth>> +struct generator_linear : public generator<T, VecWidth, generator_linear<T, VecWidth>> { - xgenlinear(T start, T step) CMT_NOEXCEPT : step{ step }, vstep{ step * VecWidth } { sync(start); } + generator_linear(T start, T step) CMT_NOEXCEPT : vstep{ step * VecWidth } { sync(start); } KFR_MEM_INTRINSIC void sync(T start) const CMT_NOEXCEPT { @@ -117,10 +115,10 @@ struct xgenlinear : public xgenerator<T, VecWidth, xgenlinear<T, VecWidth>> }; template <typename T, size_t VecWidth = vector_capacity<T> / 8> -struct xgenexp : public xgenerator<T, VecWidth, xgenexp<T, VecWidth>> +struct generator_exp : public generator<T, VecWidth, generator_exp<T, VecWidth>> { - xgenexp(T start, T step) CMT_NOEXCEPT : step{ step }, - vstep{ exp(make_vector(step * VecWidth)).front() - 1 } + generator_exp(T start, T step) CMT_NOEXCEPT : step{ step }, + vstep{ exp(make_vector(step * VecWidth)).front() - 1 } { this->resync(start); } @@ -138,12 +136,12 @@ protected: }; template <typename T, size_t VecWidth = vector_capacity<deep_subtype<T>> / 8 / 2> -struct xgenexpj : public xgenerator<T, VecWidth, xgenexpj<T, VecWidth>> +struct generator_expj : public generator<T, VecWidth, generator_expj<T, VecWidth>> { using ST = deep_subtype<T>; - static_assert(std::is_same_v<complex<deep_subtype<T>>, T>, "xgenexpj requires complex type"); + static_assert(std::is_same_v<complex<deep_subtype<T>>, T>, "generator_expj requires complex type"); - xgenexpj(ST start_, ST step_) + generator_expj(ST start_, ST step_) : step(step_), alpha(2 * sqr(sin(VecWidth * step / 2))), beta(-sin(VecWidth * step)) { this->resync(T(start_)); @@ -162,14 +160,15 @@ protected: ST beta; CMT_NOINLINE static vec<T, VecWidth> init_cossin(ST w, ST phase) { - return ccomp(cossin(dup(phase + enumerate<ST, width>() * w))); + return ccomp(cossin(dup(phase + enumerate<ST, VecWidth>() * w))); } }; template <typename T, size_t VecWidth = vector_capacity<T> / 8> -struct xgenexp2 : public xgenerator<T, VecWidth, xgenexp2<T, VecWidth>> +struct generator_exp2 : public generator<T, VecWidth, generator_exp2<T, VecWidth>> { - xgenexp2(T start, T step) CMT_NOEXCEPT : step{ step }, vstep{ exp2(make_vector(step * VecWidth))[0] - 1 } + generator_exp2(T start, T step) CMT_NOEXCEPT : step{ step }, + vstep{ exp2(make_vector(step * VecWidth))[0] - 1 } { this->resync(start); } @@ -187,10 +186,10 @@ protected: }; template <typename T, size_t VecWidth = vector_capacity<T> / 8> -struct xgencossin : public xgenerator<T, VecWidth, xgencossin<T, VecWidth>> +struct generator_cossin : public generator<T, VecWidth, generator_cossin<T, VecWidth>> { static_assert(VecWidth % 2 == 0); - xgencossin(T start, T step) + generator_cossin(T start, T step) : step(step), alpha(2 * sqr(sin(VecWidth / 2 * step / 2))), beta(-sin(VecWidth / 2 * step)) { this->resync(start); @@ -213,9 +212,9 @@ protected: }; template <typename T, size_t VecWidth = vector_capacity<T> / 8 / 2> -struct xgensin : public xgenerator<T, VecWidth, xgensin<T, VecWidth>, vec<T, 2>> +struct generator_sin : public generator<T, VecWidth, generator_sin<T, VecWidth>, vec<T, 2>> { - xgensin(T start, T step) + generator_sin(T start, T step) : step(step), alpha(2 * sqr(sin(VecWidth * step / 2))), beta(sin(VecWidth * step)) { this->resync(start); @@ -248,9 +247,6 @@ protected: T beta; }; -inline namespace CMT_ARCH_NAME -{ - /** * @brief Returns template expression that generates values starting from the start and using the step as the * increment between numbers. @@ -260,9 +256,9 @@ inline namespace CMT_ARCH_NAME \f] */ template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> -KFR_FUNCTION xgenlinear<TF> gen_linear(T1 start, T2 step) +KFR_FUNCTION generator_linear<TF> gen_linear(T1 start, T2 step) { - return xgenlinear<TF>(start, step); + return generator_linear<TF>(start, step); } /** @@ -272,9 +268,9 @@ KFR_FUNCTION xgenlinear<TF> gen_linear(T1 start, T2 step) \f] */ template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> -KFR_FUNCTION xgenexp<TF> gen_exp(T1 start, T2 step) +KFR_FUNCTION generator_exp<TF> gen_exp(T1 start, T2 step) { - return xgenexp<TF>(start, step); + return generator_exp<TF>(start, step); } /** @@ -284,9 +280,9 @@ KFR_FUNCTION xgenexp<TF> gen_exp(T1 start, T2 step) \f] */ template <typename T1, typename T2, typename TF = complex<ftype<common_type<T1, T2>>>> -KFR_FUNCTION xgenexpj<TF> gen_expj(T1 start, T2 step) +KFR_FUNCTION generator_expj<TF> gen_expj(T1 start, T2 step) { - return xgenexpj<TF>(start, step); + return generator_expj<TF>(start, step); } /** @@ -296,9 +292,9 @@ KFR_FUNCTION xgenexpj<TF> gen_expj(T1 start, T2 step) \f] */ template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> -KFR_FUNCTION xgenexp2<TF> gen_exp2(T1 start, T2 step) +KFR_FUNCTION generator_exp2<TF> gen_exp2(T1 start, T2 step) { - return xgenexp2<TF>(start, step); + return generator_exp2<TF>(start, step); } /** @@ -312,9 +308,9 @@ KFR_FUNCTION xgenexp2<TF> gen_exp2(T1 start, T2 step) \f] */ template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> -KFR_FUNCTION xgencossin<TF> gen_cossin(T1 start, T2 step) +KFR_FUNCTION generator_cossin<TF> gen_cossin(T1 start, T2 step) { - return xgencossin<TF>(start, step); + return generator_cossin<TF>(start, step); } /** @@ -324,9 +320,9 @@ KFR_FUNCTION xgencossin<TF> gen_cossin(T1 start, T2 step) \f] */ template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>> -KFR_FUNCTION xgensin<TF> gen_sin(T1 start, T2 step) +KFR_FUNCTION generator_sin<TF> gen_sin(T1 start, T2 step) { - return xgensin<TF>(start, step); + return generator_sin<TF>(start, step); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/base/impl/static_array.hpp b/include/kfr/base/impl/static_array.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify diff --git a/include/kfr/base/math_expressions.hpp b/include/kfr/base/math_expressions.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify @@ -33,120 +33,10 @@ namespace kfr { /** - * @brief Returns template expression that returns x if m is true, otherwise return y. Order of the arguments - * is same as in ternary operator. - */ -template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> -KFR_FUNCTION xfunction<fn::select, E1, E2, E3> select(E1&& m, E2&& x, E3&& y) -{ - return { fn::select(), std::forward<E1>(m), std::forward<E2>(x), std::forward<E3>(y) }; -} - -/** - * @brief Returns template expression that returns the absolute value of x. - */ -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::abs, E1> abs(E1&& x) -{ - return { fn::abs(), std::forward<E1>(x) }; -} - -/** - * @brief Returns the smaller of two values. Accepts and returns expressions. - */ -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::min, E1, E2> min(E1&& x, E2&& y) -{ - return { fn::min(), std::forward<E1>(x), std::forward<E2>(y) }; -} - -/** - * @brief Returns the greater of two values. Accepts and returns expressions. - */ -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::max, E1, E2> max(E1&& x, E2&& y) -{ - return { fn::max(), std::forward<E1>(x), std::forward<E2>(y) }; -} - -/** - * @brief Returns the smaller in magnitude of two values. Accepts and returns expressions. - */ -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::absmin, E1, E2> absmin(E1&& x, E2&& y) -{ - return { fn::absmin(), std::forward<E1>(x), std::forward<E2>(y) }; -} - -/** - * @brief Returns the greater in magnitude of two values. Accepts and returns expressions. - */ -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::absmax, E1, E2> absmax(E1&& x, E2&& y) -{ - return { fn::absmax(), std::forward<E1>(x), std::forward<E2>(y) }; -} - -/// @brief Returns the largest integer value not greater than x. Accepts and returns expressions. -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::floor, E1> floor(E1&& x) -{ - return { fn::floor(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::ceil, E1> ceil(E1&& x) -{ - return { fn::ceil(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::round, E1> round(E1&& x) -{ - return { fn::round(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::trunc, E1> trunc(E1&& x) -{ - return { fn::trunc(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::fract, E1> fract(E1&& x) -{ - return { fn::fract(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::ifloor, E1> ifloor(E1&& x) -{ - return { fn::ifloor(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::iceil, E1> iceil(E1&& x) -{ - return { fn::iceil(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::iround, E1> iround(E1&& x) -{ - return { fn::iround(), std::forward<E1>(x) }; -} - -template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::itrunc, E1> itrunc(E1&& x) -{ - return { fn::itrunc(), std::forward<E1>(x) }; -} - -/** * @brief Returns the trigonometric sine of x. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sin, E1> sin(E1&& x) +KFR_FUNCTION expression_function<fn::sin, E1> sin(E1&& x) { return { fn::sin(), std::forward<E1>(x) }; } @@ -155,7 +45,7 @@ KFR_FUNCTION xfunction<fn::sin, E1> sin(E1&& x) * @brief Returns the trigonometric cosine of x. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cos, E1> cos(E1&& x) +KFR_FUNCTION expression_function<fn::cos, E1> cos(E1&& x) { return { fn::cos(), std::forward<E1>(x) }; } @@ -164,7 +54,7 @@ KFR_FUNCTION xfunction<fn::cos, E1> cos(E1&& x) * @brief Returns an approximation of the trigonometric sine of x. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::fastsin, E1> fastsin(E1&& x) +KFR_FUNCTION expression_function<fn::fastsin, E1> fastsin(E1&& x) { return { fn::fastsin(), std::forward<E1>(x) }; } @@ -173,7 +63,7 @@ KFR_FUNCTION xfunction<fn::fastsin, E1> fastsin(E1&& x) * @brief Returns an approximation of the trigonometric cosine of x. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::fastcos, E1> fastcos(E1&& x) +KFR_FUNCTION expression_function<fn::fastcos, E1> fastcos(E1&& x) { return { fn::fastcos(), std::forward<E1>(x) }; } @@ -183,7 +73,7 @@ KFR_FUNCTION xfunction<fn::fastcos, E1> fastcos(E1&& x) * cosine of the odd elements. x must be a vector. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sincos, E1> sincos(E1&& x) +KFR_FUNCTION expression_function<fn::sincos, E1> sincos(E1&& x) { return { fn::sincos(), std::forward<E1>(x) }; } @@ -193,7 +83,7 @@ KFR_FUNCTION xfunction<fn::sincos, E1> sincos(E1&& x) * sine of the odd elements. x must be a vector. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cossin, E1> cossin(E1&& x) +KFR_FUNCTION expression_function<fn::cossin, E1> cossin(E1&& x) { return { fn::cossin(), std::forward<E1>(x) }; } @@ -202,7 +92,7 @@ KFR_FUNCTION xfunction<fn::cossin, E1> cossin(E1&& x) * @brief Returns the trigonometric sine of the x (expressed in degrees). Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sindeg, E1> sindeg(E1&& x) +KFR_FUNCTION expression_function<fn::sindeg, E1> sindeg(E1&& x) { return { fn::sindeg(), std::forward<E1>(x) }; } @@ -211,7 +101,7 @@ KFR_FUNCTION xfunction<fn::sindeg, E1> sindeg(E1&& x) * @brief Returns the trigonometric cosine of the x (expressed in degrees). Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cosdeg, E1> cosdeg(E1&& x) +KFR_FUNCTION expression_function<fn::cosdeg, E1> cosdeg(E1&& x) { return { fn::cosdeg(), std::forward<E1>(x) }; } @@ -221,7 +111,7 @@ KFR_FUNCTION xfunction<fn::cosdeg, E1> cosdeg(E1&& x) * (expressed in degrees). Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::fastsindeg, E1> fastsindeg(E1&& x) +KFR_FUNCTION expression_function<fn::fastsindeg, E1> fastsindeg(E1&& x) { return { fn::fastsindeg(), std::forward<E1>(x) }; } @@ -231,7 +121,7 @@ KFR_FUNCTION xfunction<fn::fastsindeg, E1> fastsindeg(E1&& x) * (expressed in degrees). Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::fastcosdeg, E1> fastcosdeg(E1&& x) +KFR_FUNCTION expression_function<fn::fastcosdeg, E1> fastcosdeg(E1&& x) { return { fn::fastcosdeg(), std::forward<E1>(x) }; } @@ -241,7 +131,7 @@ KFR_FUNCTION xfunction<fn::fastcosdeg, E1> fastcosdeg(E1&& x) * cosine of the odd elements. x must be expressed in degrees. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sincosdeg, E1> sincosdeg(E1&& x) +KFR_FUNCTION expression_function<fn::sincosdeg, E1> sincosdeg(E1&& x) { return { fn::sincosdeg(), std::forward<E1>(x) }; } @@ -251,7 +141,7 @@ KFR_FUNCTION xfunction<fn::sincosdeg, E1> sincosdeg(E1&& x) * sine of the odd elements. x must be expressed in degrees. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cossindeg, E1> cossindeg(E1&& x) +KFR_FUNCTION expression_function<fn::cossindeg, E1> cossindeg(E1&& x) { return { fn::cossindeg(), std::forward<E1>(x) }; } @@ -260,35 +150,21 @@ KFR_FUNCTION xfunction<fn::cossindeg, E1> cossindeg(E1&& x) * @brief Returns the sinc function of x. Accepts and returns expressions. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sinc, E1> sinc(E1&& x) +KFR_FUNCTION expression_function<fn::sinc, E1> sinc(E1&& x) { return { fn::sinc(), std::forward<E1>(x) }; } -/// @brief Creates an expression that returns the first argument clamped to a range [lo, hi] -template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> -KFR_FUNCTION xfunction<fn::clamp, E1, E2, E3> clamp(E1&& x, E2&& lo, E3&& hi) -{ - return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(lo), std::forward<E3>(hi) }; -} - -/// @brief Creates an expression that returns the first argument clamped to a range [0, hi] -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::clamp, E1, E2> clamp(E1&& x, E2&& hi) -{ - return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(hi) }; -} - /// @brief Creates expression that returns the approximate gamma function of an argument template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::gamma, E1> gamma(E1&& x) +KFR_FUNCTION expression_function<fn::gamma, E1> gamma(E1&& x) { return { fn::gamma(), std::forward<E1>(x) }; } /// @brief Creates expression that returns the approximate factorial of an argument template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::factorial_approx, E1> factorial_approx(E1&& x) +KFR_FUNCTION expression_function<fn::factorial_approx, E1> factorial_approx(E1&& x) { return { fn::factorial_approx(), std::forward<E1>(x) }; } @@ -297,19 +173,19 @@ KFR_FUNCTION xfunction<fn::factorial_approx, E1> factorial_approx(E1&& x) * @brief Returns template expression that returns the positive square root of the x. \f$\sqrt{x}\f$ */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sqrt, E1> sqrt(E1&& x) +KFR_FUNCTION expression_function<fn::sqrt, E1> sqrt(E1&& x) { return { fn::sqrt(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::tan, E1> tan(E1&& x) +KFR_FUNCTION expression_function<fn::tan, E1> tan(E1&& x) { return { fn::tan(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::tandeg, E1> tandeg(E1&& x) +KFR_FUNCTION expression_function<fn::tandeg, E1> tandeg(E1&& x) { return { fn::tandeg(), std::forward<E1>(x) }; } @@ -318,7 +194,7 @@ KFR_FUNCTION xfunction<fn::tandeg, E1> tandeg(E1&& x) * @brief Returns template expression that returns the arc sine of x. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::asin, E1> asin(E1&& x) +KFR_INTRINSIC expression_function<fn::asin, E1> asin(E1&& x) { return { fn::asin(), std::forward<E1>(x) }; } @@ -327,35 +203,35 @@ KFR_INTRINSIC xfunction<fn::asin, E1> asin(E1&& x) * @brief Returns template expression that returns the arc cosine of x. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::acos, E1> acos(E1&& x) +KFR_INTRINSIC expression_function<fn::acos, E1> acos(E1&& x) { return { fn::acos(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the sine of the the complex value x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::csin, E1> csin(E1&& x) +KFR_FUNCTION expression_function<fn::csin, E1> csin(E1&& x) { return { fn::csin(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the hyperbolic sine of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::csinh, E1> csinh(E1&& x) +KFR_FUNCTION expression_function<fn::csinh, E1> csinh(E1&& x) { return { fn::csinh(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the cosine of the the complex value x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::ccos, E1> ccos(E1&& x) +KFR_FUNCTION expression_function<fn::ccos, E1> ccos(E1&& x) { return { fn::ccos(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the hyperbolic cosine of the the complex value x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::ccosh, E1> ccosh(E1&& x) +KFR_FUNCTION expression_function<fn::ccosh, E1> ccosh(E1&& x) { return { fn::ccosh(), std::forward<E1>(x) }; } @@ -363,91 +239,91 @@ KFR_FUNCTION xfunction<fn::ccosh, E1> ccosh(E1&& x) /// @brief Returns template expression that returns the squared absolute value (magnitude squared) of the /// complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cabssqr, E1> cabssqr(E1&& x) +KFR_FUNCTION expression_function<fn::cabssqr, E1> cabssqr(E1&& x) { return { fn::cabssqr(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the absolute value (magnitude) of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cabs, E1> cabs(E1&& x) +KFR_FUNCTION expression_function<fn::cabs, E1> cabs(E1&& x) { return { fn::cabs(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the phase angle (argument) of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::carg, E1> carg(E1&& x) +KFR_FUNCTION expression_function<fn::carg, E1> carg(E1&& x) { return { fn::carg(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the natural logarithm of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::clog, E1> clog(E1&& x) +KFR_FUNCTION expression_function<fn::clog, E1> clog(E1&& x) { return { fn::clog(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the binary (base-2) logarithm of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::clog2, E1> clog2(E1&& x) +KFR_FUNCTION expression_function<fn::clog2, E1> clog2(E1&& x) { return { fn::clog2(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the common (base-10) logarithm of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::clog10, E1> clog10(E1&& x) +KFR_FUNCTION expression_function<fn::clog10, E1> clog10(E1&& x) { return { fn::clog10(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns \f$e\f$ raised to the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cexp, E1> cexp(E1&& x) +KFR_FUNCTION expression_function<fn::cexp, E1> cexp(E1&& x) { return { fn::cexp(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns 2 raised to the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cexp2, E1> cexp2(E1&& x) +KFR_FUNCTION expression_function<fn::cexp2, E1> cexp2(E1&& x) { return { fn::cexp2(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns 10 raised to the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cexp10, E1> cexp10(E1&& x) +KFR_FUNCTION expression_function<fn::cexp10, E1> cexp10(E1&& x) { return { fn::cexp10(), std::forward<E1>(x) }; } /// @brief Returns template expression that converts complex number to polar template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::polar, E1> polar(E1&& x) +KFR_FUNCTION expression_function<fn::polar, E1> polar(E1&& x) { return { fn::polar(), std::forward<E1>(x) }; } /// @brief Returns template expression that converts complex number to cartesian template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cartesian, E1> cartesian(E1&& x) +KFR_FUNCTION expression_function<fn::cartesian, E1> cartesian(E1&& x) { return { fn::cartesian(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns square root of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::csqrt, E1> csqrt(E1&& x) +KFR_FUNCTION expression_function<fn::csqrt, E1> csqrt(E1&& x) { return { fn::csqrt(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns square of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::csqr, E1> csqr(E1&& x) +KFR_FUNCTION expression_function<fn::csqr, E1> csqr(E1&& x) { return { fn::csqr(), std::forward<E1>(x) }; } @@ -456,7 +332,7 @@ KFR_FUNCTION xfunction<fn::csqr, E1> csqr(E1&& x) * @brief Returns template expression that returns the arc tangent of x. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::atan, E1> atan(E1&& x) +KFR_FUNCTION expression_function<fn::atan, E1> atan(E1&& x) { return { fn::atan(), std::forward<E1>(x) }; } @@ -465,7 +341,7 @@ KFR_FUNCTION xfunction<fn::atan, E1> atan(E1&& x) * @brief Returns template expression that returns the arc tangent of the x, expressed in degrees. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::atandeg, E1> atandeg(E1&& x) +KFR_FUNCTION expression_function<fn::atandeg, E1> atandeg(E1&& x) { return { fn::atandeg(), std::forward<E1>(x) }; } @@ -474,7 +350,7 @@ KFR_FUNCTION xfunction<fn::atandeg, E1> atandeg(E1&& x) * @brief Returns template expression that returns the arc tangent of y/x. */ template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::atan2, E1, E2> atan2(E1&& x, E2&& y) +KFR_FUNCTION expression_function<fn::atan2, E1, E2> atan2(E1&& x, E2&& y) { return { fn::atan2(), std::forward<E1>(x), std::forward<E2>(y) }; } @@ -483,55 +359,41 @@ KFR_FUNCTION xfunction<fn::atan2, E1, E2> atan2(E1&& x, E2&& y) * @brief Returns template expression that returns the arc tangent of y/x (expressed in degrees). */ template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::atan2deg, E1, E2> atan2deg(E1&& x, E2&& y) +KFR_FUNCTION expression_function<fn::atan2deg, E1, E2> atan2deg(E1&& x, E2&& y) { return { fn::atan2deg(), std::forward<E1>(x), std::forward<E2>(y) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::modzerobessel, E1> modzerobessel(E1&& x) +KFR_FUNCTION expression_function<fn::modzerobessel, E1> modzerobessel(E1&& x) { return { fn::modzerobessel(), std::forward<E1>(x) }; } -/// @brief Creates an expression that adds two arguments using saturation -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::satadd, E1, E2> satadd(E1&& x, E2&& y) -{ - return { fn::satadd(), std::forward<E1>(x), std::forward<E2>(y) }; -} - -/// @brief Creates an expression that subtracts two arguments using saturation -template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::satsub, E1, E2> satsub(E1&& x, E2&& y) -{ - return { fn::satsub(), std::forward<E1>(x), std::forward<E2>(y) }; -} - /// @brief Returns template expression that returns the hyperbolic sine of the x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sinh, E1> sinh(E1&& x) +KFR_FUNCTION expression_function<fn::sinh, E1> sinh(E1&& x) { return { fn::sinh(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the hyperbolic cosine of the x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cosh, E1> cosh(E1&& x) +KFR_FUNCTION expression_function<fn::cosh, E1> cosh(E1&& x) { return { fn::cosh(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the hyperbolic tangent of the x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::tanh, E1> tanh(E1&& x) +KFR_FUNCTION expression_function<fn::tanh, E1> tanh(E1&& x) { return { fn::tanh(), std::forward<E1>(x) }; } /// @brief Returns template expression that returns the hyperbolic cotangent of the x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::coth, E1> coth(E1&& x) +KFR_FUNCTION expression_function<fn::coth, E1> coth(E1&& x) { return { fn::coth(), std::forward<E1>(x) }; } @@ -539,7 +401,7 @@ KFR_FUNCTION xfunction<fn::coth, E1> coth(E1&& x) /// @brief Returns template expression that returns the hyperbolic sine of the even elements of the x and the /// hyperbolic cosine of the odd elements of the x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::sinhcosh, E1> sinhcosh(E1&& x) +KFR_FUNCTION expression_function<fn::sinhcosh, E1> sinhcosh(E1&& x) { return { fn::sinhcosh(), std::forward<E1>(x) }; } @@ -547,49 +409,49 @@ KFR_FUNCTION xfunction<fn::sinhcosh, E1> sinhcosh(E1&& x) /// @brief Returns template expression that returns the hyperbolic cosine of the even elements of the x and /// the hyperbolic sine of the odd elements of the x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::coshsinh, E1> coshsinh(E1&& x) +KFR_FUNCTION expression_function<fn::coshsinh, E1> coshsinh(E1&& x) { return { fn::coshsinh(), std::forward<E1>(x) }; } /// @brief Returns e raised to the given power x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::exp, E1> exp(E1&& x) +KFR_FUNCTION expression_function<fn::exp, E1> exp(E1&& x) { return { fn::exp(), std::forward<E1>(x) }; } /// @brief Returns 2 raised to the given power x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::exp2, E1> exp2(E1&& x) +KFR_FUNCTION expression_function<fn::exp2, E1> exp2(E1&& x) { return { fn::exp2(), std::forward<E1>(x) }; } /// @brief Returns 10 raised to the given power x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::exp10, E1> exp10(E1&& x) +KFR_FUNCTION expression_function<fn::exp10, E1> exp10(E1&& x) { return { fn::exp10(), std::forward<E1>(x) }; } /// @brief Returns the natural logarithm of the x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::log, E1> log(E1&& x) +KFR_FUNCTION expression_function<fn::log, E1> log(E1&& x) { return { fn::log(), std::forward<E1>(x) }; } /// @brief Returns the binary (base-2) logarithm of the x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::log2, E1> log2(E1&& x) +KFR_FUNCTION expression_function<fn::log2, E1> log2(E1&& x) { return { fn::log2(), std::forward<E1>(x) }; } /// @brief Returns the common (base-10) logarithm of the x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::log10, E1> log10(E1&& x) +KFR_FUNCTION expression_function<fn::log10, E1> log10(E1&& x) { return { fn::log10(), std::forward<E1>(x) }; } @@ -597,56 +459,56 @@ KFR_FUNCTION xfunction<fn::log10, E1> log10(E1&& x) /// @brief Returns the rounded binary (base-2) logarithm of the x. Version that accepts and returns /// expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::logb, E1> logb(E1&& x) +KFR_FUNCTION expression_function<fn::logb, E1> logb(E1&& x) { return { fn::logb(), std::forward<E1>(x) }; } /// @brief Returns the logarithm of the x with base y. Accepts and returns expressions. template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::logn, E1, E2> logn(E1&& x, E2&& y) +KFR_FUNCTION expression_function<fn::logn, E1, E2> logn(E1&& x, E2&& y) { return { fn::logn(), std::forward<E1>(x), std::forward<E2>(y) }; } /// @brief Returns log(x) * y. Accepts and returns expressions. template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::logm, E1, E2> logm(E1&& x, E2&& y) +KFR_FUNCTION expression_function<fn::logm, E1, E2> logm(E1&& x, E2&& y) { return { fn::logm(), std::forward<E1>(x), std::forward<E2>(y) }; } /// @brief Returns exp(x * m + a). Accepts and returns expressions. template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> -KFR_FUNCTION xfunction<fn::exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& y, E3&& z) +KFR_FUNCTION expression_function<fn::exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& y, E3&& z) { return { fn::exp_fmadd(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } /// @brief Returns log(x) * m + a. Accepts and returns expressions. template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> -KFR_FUNCTION xfunction<fn::log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& y, E3&& z) +KFR_FUNCTION expression_function<fn::log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& y, E3&& z) { return { fn::log_fmadd(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } /// @brief Returns the x raised to the given power y. Accepts and returns expressions. template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::pow, E1, E2> pow(E1&& x, E2&& y) +KFR_FUNCTION expression_function<fn::pow, E1, E2> pow(E1&& x, E2&& y) { return { fn::pow(), std::forward<E1>(x), std::forward<E2>(y) }; } /// @brief Returns the real nth root of the x. Accepts and returns expressions. template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_FUNCTION xfunction<fn::root, E1, E2> root(E1&& x, E2&& y) +KFR_FUNCTION expression_function<fn::root, E1, E2> root(E1&& x, E2&& y) { return { fn::root(), std::forward<E1>(x), std::forward<E2>(y) }; } /// @brief Returns the cube root of the x. Accepts and returns expressions. template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cbrt, E1> cbrt(E1&& x) +KFR_FUNCTION expression_function<fn::cbrt, E1> cbrt(E1&& x) { return { fn::cbrt(), std::forward<E1>(x) }; } diff --git a/include/kfr/base/old_basic_expressions.hpp b/include/kfr/base/old_basic_expressions.hpp @@ -1,708 +0,0 @@ -/** @addtogroup expressions - * @{ - */ -/* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) - This file is part of KFR - - KFR is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - KFR is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with KFR. - - If GPL is not suitable for your project, you must purchase a commercial license to use KFR. - Buying a commercial license is mandatory as soon as you develop commercial activities without - disclosing the source code of your own applications. - See https://www.kfrlib.com for details. - */ -#pragma once - -#include "../simd/operators.hpp" -#include "../simd/vec.hpp" -#include "univector.hpp" -#include <algorithm> - -namespace kfr -{ -inline namespace CMT_ARCH_NAME -{ - -namespace internal -{ -template <size_t width, typename Fn> -KFR_INTRINSIC void block_process_impl(size_t& i, size_t size, Fn&& fn) -{ - CMT_LOOP_NOUNROLL - for (; i < size / width * width; i += width) - fn(i, csize_t<width>()); -} -} // namespace internal - -template <size_t... widths, typename Fn> -KFR_INTRINSIC void block_process(size_t size, csizes_t<widths...>, Fn&& fn) -{ - size_t i = 0; - swallow{ (internal::block_process_impl<widths>(i, size, std::forward<Fn>(fn)), 0)... }; -} - -namespace internal -{ - -template <typename To, typename E> -struct expression_cast : expression_with_arguments<E> -{ - using value_type = To; - KFR_MEM_INTRINSIC expression_cast(E&& expr) CMT_NOEXCEPT - : expression_with_arguments<E>(std::forward<E>(expr)) - { - } - - template <size_t N> - friend KFR_INTRINSIC vec<To, N> get_elements(const expression_cast& self, cinput_t input, size_t index, - vec_shape<To, N>) - { - return self.argument_first(input, index, vec_shape<To, N>()); - } -}; - -template <typename T, typename E1> -struct expression_iterator -{ - constexpr expression_iterator(E1&& e1) : e1(std::forward<E1>(e1)) {} - struct iterator - { - T operator*() const { return get(); } - T get() const { return get_elements(expr.e1, cinput, position, vec_shape<T, 1>()).front(); } - iterator& operator++() - { - ++position; - return *this; - } - iterator operator++(int) - { - iterator copy = *this; - ++(*this); - return copy; - } - bool operator!=(const iterator& other) const { return position != other.position; } - const expression_iterator& expr; - size_t position; - }; - iterator begin() const { return { *this, 0 }; } - iterator end() const { return { *this, e1.size() }; } - E1 e1; -}; -} // namespace internal - -template <typename To, typename E, KFR_ENABLE_IF(is_input_expression<E>)> -KFR_INTRINSIC internal::expression_cast<To, E> cast(E&& expr) -{ - return internal::expression_cast<To, E>(std::forward<E>(expr)); -} - -template <typename E1, typename T = value_type_of<E1>> -KFR_INTRINSIC internal::expression_iterator<T, E1> to_iterator(E1&& e1) -{ - return internal::expression_iterator<T, E1>(std::forward<E1>(e1)); -} - -template <typename... Ts, typename T = common_type<Ts...>> -inline auto sequence(const Ts&... list) -{ - return lambda<T>([seq = std::array<T, sizeof...(Ts)>{ { static_cast<T>(list)... } }](size_t index) - { return seq[index % seq.size()]; }); -} - -template <typename T = int> -KFR_INTRINSIC auto zeros() -{ - return lambda<T>([](cinput_t, size_t, auto x) { return zerovector(x); }); -} - -template <typename T = int> -KFR_INTRINSIC auto ones() -{ - return lambda<T>([](cinput_t, size_t, auto) { return 1; }); -} - -template <typename T = int> -KFR_INTRINSIC auto counter() -{ - return lambda<T>([](cinput_t, size_t index, auto x) { return enumerate(x) + index; }); -} - -template <typename T1> -KFR_INTRINSIC auto counter(T1 start) -{ - return lambda<T1>([start](cinput_t, size_t index, auto x) { return enumerate(x) + index + start; }); -} -template <typename T1, typename T2> -KFR_INTRINSIC auto counter(T1 start, T2 step) -{ - return lambda<common_type<T1, T2>>([start, step](cinput_t, size_t index, auto x) - { return (enumerate(x) + index) * step + start; }); -} - -template <typename Gen> -struct segment -{ - template <typename Gen_> - constexpr segment(size_t start, Gen_&& gen) : start(start), gen(std::forward<Gen_>(gen)) - { - } - size_t start; - Gen gen; -}; - -enum symmetric_linspace_t -{ - symmetric_linspace -}; - -namespace internal -{ -template <typename T, typename E1> -struct expression_reader -{ - constexpr expression_reader(E1&& e1) CMT_NOEXCEPT : e1(std::forward<E1>(e1)) {} - T read() const - { - const T result = get_elements(e1, cinput, m_position, vec_shape<T, 1>()); - m_position++; - return result; - } - mutable size_t m_position = 0; - E1 e1; -}; -template <typename T, typename E1> -struct expression_writer -{ - constexpr expression_writer(E1&& e1) CMT_NOEXCEPT : e1(std::forward<E1>(e1)) {} - template <typename U> - void write(U value) - { - e1(coutput, m_position, vec<U, 1>(value)); - m_position++; - } - size_t m_position = 0; - E1 e1; -}; -} // namespace internal - -template <typename T, typename E1> -internal::expression_reader<T, E1> reader(E1&& e1) -{ - static_assert(is_input_expression<E1>, "E1 must be an expression"); - return internal::expression_reader<T, E1>(std::forward<E1>(e1)); -} - -template <typename T, typename E1> -internal::expression_writer<T, E1> writer(E1&& e1) -{ - static_assert(is_output_expression<E1>, "E1 must be an output expression"); - return internal::expression_writer<T, E1>(std::forward<E1>(e1)); -} - -namespace internal -{ - -template <typename E1> -struct expression_slice : expression_with_arguments<E1> -{ - using value_type = value_type_of<E1>; - using T = value_type; - expression_slice(E1&& e1, size_t start, size_t size) - : expression_with_arguments<E1>(std::forward<E1>(e1)), start(start), - new_size(size_min(size, size_sub(std::get<0>(this->args).size(), start))) - { - } - template <size_t N> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_slice& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) - { - return self.argument_first(cinput, index + self.start, y); - } - size_t size() const { return new_size; } - size_t start; - size_t new_size; -}; - -template <typename E1> -struct expression_reverse : expression_with_arguments<E1> -{ - using value_type = value_type_of<E1>; - using T = value_type; - expression_reverse(E1&& e1) : expression_with_arguments<E1>(std::forward<E1>(e1)), expr_size(e1.size()) {} - template <size_t N> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_reverse& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) - { - return reverse(self.argument_first(cinput, self.expr_size - index - N, y)); - } - size_t size() const { return expr_size; } - size_t expr_size; -}; - -template <typename T, bool precise = false> -struct expression_linspace; - -template <typename T> -struct expression_linspace<T, false> : input_expression -{ - using value_type = T; - - KFR_MEM_INTRINSIC constexpr size_t size() const CMT_NOEXCEPT { return truncate_size; } - - expression_linspace(T start, T stop, size_t size, bool endpoint = false, bool truncate = false) - : start(start), offset((stop - start) / T(endpoint ? size - 1 : size)), - truncate_size(truncate ? size : infinite_size) - { - } - - expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false) - : expression_linspace(-symsize, +symsize, size, endpoint) - { - } - - template <size_t N> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_linspace& self, cinput_t, size_t index, - vec_shape<T, N> x) - { - using TI = itype<T>; - return T(self.start) + (enumerate(x) + static_cast<T>(static_cast<TI>(index))) * T(self.offset); - } - - T start; - T offset; - size_t truncate_size; -}; - -template <typename T> -struct expression_linspace<T, true> : input_expression -{ - using value_type = T; - - KFR_MEM_INTRINSIC constexpr size_t size() const CMT_NOEXCEPT { return truncate_size; } - - expression_linspace(T start, T stop, size_t size, bool endpoint = false, bool truncate = false) - : start(start), stop(stop), invsize(1.0 / T(endpoint ? size - 1 : size)), - truncate_size(truncate ? size : infinite_size) - { - } - - expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false) - : expression_linspace(-symsize, +symsize, size, endpoint) - { - } - - template <size_t N> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_linspace& self, cinput_t, size_t index, - vec_shape<T, N> x) - { - using TI = itype<T>; - return mix((enumerate(x) + static_cast<T>(static_cast<TI>(index))) * self.invsize, self.start, - self.stop); - } - template <typename U, size_t N> - KFR_MEM_INTRINSIC static vec<U, N> mix(const vec<U, N>& t, U x, U y) - { - return (U(1.0) - t) * x + t * y; - } - - T start; - T stop; - T invsize; - size_t truncate_size; -}; - -template <typename... E> -struct expression_sequence : expression_with_arguments<E...> -{ -public: - using base = expression_with_arguments<E...>; - - using value_type = common_type<value_type_of<E>...>; - using T = value_type; - - template <typename... Expr_> - KFR_MEM_INTRINSIC expression_sequence(const size_t (&segments)[base::count], Expr_&&... expr) CMT_NOEXCEPT - : base(std::forward<Expr_>(expr)...) - { - std::copy(std::begin(segments), std::end(segments), this->segments.begin() + 1); - this->segments[0] = 0; - this->segments[base::count + 1] = size_t(-1); - } - - template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_sequence& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) - { - std::size_t sindex = - size_t(std::upper_bound(std::begin(self.segments), std::end(self.segments), index) - 1 - - std::begin(self.segments)); - if (CMT_LIKELY(self.segments[sindex + 1] - index >= N)) - return get_elements(self, cinput, index, sindex - 1, y); - else - { - vec<T, N> result; - CMT_PRAGMA_CLANG(clang loop unroll_count(4)) - for (size_t i = 0; i < N; i++) - { - sindex = self.segments[sindex + 1] == index ? sindex + 1 : sindex; - result.data()[i] = get_elements(self, cinput, index, sindex - 1, vec_shape<T, 1>()).front(); - index++; - } - return result; - } - } - -protected: - template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_sequence& self, cinput_t cinput, - size_t index, size_t expr_index, vec_shape<T, N> y) - { - return cswitch( - indicesfor_t<E...>(), expr_index, [&](auto val) { return self.argument(cinput, val, index, y); }, - [&]() { return zerovector(y); }); - } - - std::array<size_t, base::count + 2> segments; -}; - -template <typename Fn, typename E> -struct expression_adjacent : expression_with_arguments<E> -{ - using value_type = value_type_of<E>; - using T = value_type; - - expression_adjacent(Fn&& fn, E&& e) - : expression_with_arguments<E>(std::forward<E>(e)), fn(std::forward<Fn>(fn)) - { - } - - template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_adjacent& self, cinput_t cinput, - size_t index, vec_shape<T, N>) - { - const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - const vec<T, N> delayed = insertleft(self.data, in); - self.data = in[N - 1]; - return self.fn(in, delayed); - } - Fn fn; - mutable value_type data = value_type(0); -}; -} // namespace internal - -/** @brief Returns the subrange of the given expression - */ -template <typename E1> -KFR_INTRINSIC internal::expression_slice<E1> slice(E1&& e1, size_t start, size_t size = infinite_size) -{ - return internal::expression_slice<E1>(std::forward<E1>(e1), start, size); -} - -/** @brief Returns the expression truncated to the given size - */ -template <typename E1> -KFR_INTRINSIC internal::expression_slice<E1> truncate(E1&& e1, size_t size) -{ - return internal::expression_slice<E1>(std::forward<E1>(e1), 0, size); -} - -/** @brief Returns the reversed expression - */ -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_INTRINSIC internal::expression_reverse<E1> reverse(E1&& e1) -{ - static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); - return internal::expression_reverse<E1>(std::forward<E1>(e1)); -} - -/** @brief Returns evenly spaced numbers over a specified interval. - * - * @param start The starting value of the sequence - * @param stop The end value of the sequence. if ``endpoint`` is ``false``, the last value is excluded - * @param size Number of samples to generate - * @param endpoint If ``true``, ``stop`` is the last sample. Otherwise, it is not included - * @param truncate If ``true``, linspace returns exactly size elements, otherwise, returns infinite sequence - */ -template <typename T1, typename T2, bool precise = false, typename TF = ftype<common_type<T1, T2>>> -KFR_INTRINSIC internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop, size_t size, - bool endpoint = false, - bool truncate = false) -{ - return internal::expression_linspace<TF, precise>(start, stop, size, endpoint, truncate); -} -KFR_FN(linspace) - -template <typename T, bool precise = false, typename TF = ftype<T>> -KFR_INTRINSIC internal::expression_linspace<TF, precise> symmlinspace(T symsize, size_t size, - bool endpoint = false) -{ - return internal::expression_linspace<TF, precise>(symmetric_linspace, symsize, size, endpoint); -} -KFR_FN(symmlinspace) - -template <size_t size, typename... E> -KFR_INTRINSIC internal::expression_sequence<decay<E>...> gen_sequence(const size_t (&list)[size], E&&... gens) -{ - static_assert(size == sizeof...(E), "Lists must be of equal length"); - return internal::expression_sequence<decay<E>...>(list, std::forward<E>(gens)...); -} -KFR_FN(gen_sequence) - -/** - * @brief Returns template expression that returns the result of calling \f$ fn(x_i, x_{i-1}) \f$ - */ -template <typename Fn, typename E1> -KFR_INTRINSIC internal::expression_adjacent<Fn, E1> adjacent(Fn&& fn, E1&& e1) -{ - return internal::expression_adjacent<Fn, E1>(std::forward<Fn>(fn), std::forward<E1>(e1)); -} - -namespace internal -{ -template <typename E> -struct expression_padded : expression_with_arguments<E> -{ - using value_type = value_type_of<E>; - - KFR_MEM_INTRINSIC constexpr static size_t size() CMT_NOEXCEPT { return infinite_size; } - - expression_padded(value_type fill_value, E&& e) - : expression_with_arguments<E>(std::forward<E>(e)), fill_value(fill_value), input_size(e.size()) - { - } - - template <size_t N> - KFR_INTRINSIC friend vec<value_type, N> get_elements(const expression_padded& self, cinput_t cinput, - size_t index, vec_shape<value_type, N> y) - { - if (CMT_UNLIKELY(index >= self.input_size)) - { - return self.fill_value; - } - else if (CMT_LIKELY(index + N <= self.input_size)) - { - return self.argument_first(cinput, index, y); - } - else - { - vec<value_type, N> x{}; - for (size_t i = 0; i < N; i++) - { - if (CMT_LIKELY(index + i < self.input_size)) - x[i] = self.argument_first(cinput, index + i, vec_shape<value_type, 1>()).front(); - else - x[i] = self.fill_value; - } - return x; - } - } - value_type fill_value; - const size_t input_size; -}; -} // namespace internal - -/** - * @brief Returns infinite template expression that pads e with fill_value (default value = 0) - */ -template <typename E, typename T = value_type_of<E>> -internal::expression_padded<E> padded(E&& e, const T& fill_value = T(0)) -{ - static_assert(is_input_expression<E>, "E must be an input expression"); - return internal::expression_padded<E>(fill_value, std::forward<E>(e)); -} - -namespace internal -{ -template <typename... E> -struct multioutput : output_expression -{ - template <typename... E_> - multioutput(E_&&... e) : outputs(std::forward<E_>(e)...) - { - } - template <typename T, size_t N> - KFR_INTRINSIC friend void set_elements(multioutput& self, coutput_t coutput, size_t index, - const vec<T, N>& x) - { - cfor(csize_t<0>(), csize_t<sizeof...(E)>(), - [&](auto n) { set_elements(std::get<val_of(decltype(n)())>(self.outputs), coutput, index, x); }); - } - std::tuple<E...> outputs; - -private: -}; - -template <typename... E> -struct expression_pack : expression_with_arguments<E...> -{ - constexpr static size_t count = sizeof...(E); - - expression_pack(E&&... e) : expression_with_arguments<E...>(std::forward<E>(e)...) {} - using value_type = vec<common_type<value_type_of<E>...>, count>; - using T = value_type; - - using expression_with_arguments<E...>::size; - - template <size_t N> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_pack& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) - { - return self.call(cinput, fn::packtranspose(), index, y); - } -}; - -template <typename... E> -struct expression_unpack : private expression_with_arguments<E...>, output_expression -{ - using expression_with_arguments<E...>::begin_block; - using expression_with_arguments<E...>::end_block; - using output_expression::begin_block; - using output_expression::end_block; - constexpr static size_t count = sizeof...(E); - - expression_unpack(E&&... e) : expression_with_arguments<E...>(std::forward<E>(e)...) {} - - using expression_with_arguments<E...>::size; - - template <typename U, size_t N> - KFR_INTRINSIC friend void set_elements(expression_unpack& self, coutput_t coutput, size_t index, - const vec<vec<U, count>, N>& x) - { - self.output(coutput, index, x, csizeseq<count>); - } - - template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>)> - KFR_MEM_INTRINSIC expression_unpack& operator=(Input&& input) - { - process(*this, std::forward<Input>(input)); - return *this; - } - -private: - template <typename U, size_t N, size_t... indices> - void output(coutput_t coutput, size_t index, const vec<vec<U, count>, N>& x, csizes_t<indices...>) - { - const vec<vec<U, N>, count> xx = vec<vec<U, N>, count>::from_flatten(transpose<count>(flatten(x))); - swallow{ (set_elements(std::get<indices>(this->args), coutput, index, xx[indices]), void(), 0)... }; - } -}; -} // namespace internal - -template <typename... E, KFR_ENABLE_IF(is_output_expressions<E...>)> -internal::expression_unpack<E...> unpack(E&&... e) -{ - return internal::expression_unpack<E...>(std::forward<E>(e)...); -} - -template <typename... E, KFR_ENABLE_IF(is_input_expressions<E...>)> -internal::expression_pack<internal::arg<E>...> pack(E&&... e) -{ - return internal::expression_pack<internal::arg<E>...>(std::forward<E>(e)...); -} - -template <typename OutExpr, typename InExpr> -struct task_partition -{ - task_partition(OutExpr&& output, InExpr&& input, size_t size, size_t chunk_size, size_t count) - : output(std::forward<OutExpr>(output)), input(std::forward<InExpr>(input)), size(size), - chunk_size(chunk_size), count(count) - { - } - OutExpr output; - InExpr input; - size_t size; - size_t chunk_size; - size_t count; - size_t operator()(size_t index) - { - if (CMT_UNLIKELY(index >= count)) - return 0; - return process(output, input, index * chunk_size, - index == count - 1 ? size - (count - 1) * chunk_size : chunk_size); - } -}; - -template <typename OutExpr, typename InExpr, typename T = value_type_of<InExpr>> -task_partition<OutExpr, InExpr> partition(OutExpr&& output, InExpr&& input, size_t count, - size_t minimum_size = 0) -{ - static_assert(!is_infinite<OutExpr> || !is_infinite<InExpr>, ""); - - minimum_size = minimum_size == 0 ? vector_width<T> * 8 : minimum_size; - const size_t size = size_min(output.size(), input.size()); - const size_t chunk_size = align_up(std::max(size / count, minimum_size), vector_width<T>); - - task_partition<OutExpr, InExpr> result(std::forward<OutExpr>(output), std::forward<InExpr>(input), size, - chunk_size, (size + chunk_size - 1) / chunk_size); - return result; -} - -namespace internal -{ - -template <typename E1, typename E2> -struct concatenate_expression : expression_with_arguments<E1, E2> -{ - using value_type = common_type<value_type_of<E1>, value_type_of<E2>>; - using T = value_type; - - KFR_MEM_INTRINSIC constexpr size_t size() const CMT_NOEXCEPT - { - return size_add(std::get<0>(this->args).size(), std::get<1>(this->args).size()); - } - template <typename E1_, typename E2_> - concatenate_expression(E1_&& e1, E2_&& e2) - : expression_with_arguments<E1, E2>(std::forward<E1_>(e1), std::forward<E2_>(e2)) - { - } - - template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const concatenate_expression& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) - { - const size_t size0 = std::get<0>(self.args).size(); - if (index >= size0) - { - return self.argument(cinput, csize<1>, index - size0, y); - } - else if (CMT_LIKELY(index + N <= size0)) - { - return self.argument(cinput, csize<0>, index, y); - } - else // (index < size0) && (index + N > size0) - { - vec<T, N> result; - for (size_t i = 0; i < size0 - index; ++i) - { - result[i] = self.argument(cinput, csize<0>, index + i, vec_shape<T, 1>{})[0]; - } - for (size_t i = size0 - index; i < N; ++i) - { - result[i] = self.argument(cinput, csize<1>, index + i - size0, vec_shape<T, 1>{})[0]; - } - return result; - } - } -}; -} // namespace internal - -template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expression<E1>&& is_input_expression<E2>)> -internal::concatenate_expression<E1, E2> concatenate(E1&& e1, E2&& e2) -{ - return { std::forward<E1>(e1), std::forward<E2>(e2) }; -} - -} // namespace CMT_ARCH_NAME -} // namespace kfr diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp @@ -32,7 +32,7 @@ namespace kfr { -template <typename T, bool enable_resource = true> +template <typename T, index_t Dims> struct expression_pointer; template <typename T> @@ -53,8 +53,8 @@ inline namespace CMT_ARCH_NAME namespace internal { -template <typename Expression, typename T, size_t key = 0> -KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T>&& new_pointer, +template <typename Expression, typename T, index_t Dims, size_t key = 0> +KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T, Dims> new_pointer, csize_t<key> = {}); } } // namespace CMT_ARCH_NAME @@ -63,32 +63,40 @@ template <typename T, index_t Dims> struct expression_vtable { constexpr static const size_t Nsizes = 1 + ilog2(maximum_expression_width<T>); + constexpr static const size_t Nmax = 1 << Nsizes; using func_get = void (*)(void*, shape<Dims>, T*); using func_set = void (*)(void*, shape<Dims>, const T*); using func_shapeof = void (*)(void*, shape<Dims>&); - using func_substitute = bool (*)(void*, expression_pointer<T>&&); + using func_substitute = bool (*)(void*, expression_pointer<T, Dims>); + using func_pass = void (*)(void*, shape<Dims>, shape<Dims>); func_shapeof fn_shapeof; func_substitute fn_substitute; + func_pass fn_begin_pass; + func_pass fn_end_pass; std::array<std::array<func_get, Nsizes>, Dims> fn_get_elements; std::array<std::array<func_set, Nsizes>, Dims> fn_set_elements; template <typename Expression> - expression_vtable(ctype_t<Expression> t) + KFR_MEM_INTRINSIC expression_vtable(ctype_t<Expression> t) { fn_shapeof = &static_shapeof<Expression>; fn_substitute = &static_substitute<Expression>; + fn_begin_pass = &static_begin_pass<Expression>; + fn_end_pass = &static_end_pass<Expression>; cforeach(csizeseq<Nsizes>, - [&](size_t size) CMT_INLINE_LAMBDA + [&](auto size_) CMT_INLINE_LAMBDA { cforeach(csizeseq<Dims>, - [&](size_t axis) CMT_INLINE_LAMBDA + [&](auto axis_) CMT_INLINE_LAMBDA { + constexpr size_t size = decltype(size_)::value; + constexpr size_t axis = decltype(axis_)::value; fn_get_elements[axis][size] = - &expression_vtable::static_get_elements<Expression, 1 << size, axis>; + &static_get_elements<Expression, 1 << size, axis>; fn_set_elements[axis][size] = - &expression_vtable::static_set_elements<Expression, 1 << size, axis>; + &static_set_elements<Expression, 1 << size, axis>; }); }); } @@ -96,12 +104,24 @@ struct expression_vtable template <typename Expression, size_t N, index_t VecAxis> static void static_get_elements(void* instance, shape<Dims> index, T* dest) { - write(dest, get_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>)); + if constexpr (is_input_expression<Expression>) + { + write(dest, get_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>)); + } + else + { + } } template <typename Expression, size_t N, index_t VecAxis> static void static_set_elements(void* instance, shape<Dims> index, const T* src) { - set_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>, read<N>(src)); + if constexpr (is_output_expression<Expression>) + { + set_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>, read<N>(src)); + } + else + { + } } template <typename Expression> static void static_shapeof(void* instance, shape<Dims>& result) @@ -109,10 +129,20 @@ struct expression_vtable result = expression_traits<Expression>::shapeof(*static_cast<Expression*>(instance)); } template <typename Expression> - static bool static_substitute(void* instance, expression_pointer<T> ptr) + static bool static_substitute(void* instance, expression_pointer<T, Dims> ptr) { return internal::invoke_substitute(*static_cast<Expression*>(instance), std::move(ptr)); } + template <typename Expression> + static void static_begin_pass(void* instance, shape<Dims> start, shape<Dims> stop) + { + begin_pass(*static_cast<Expression*>(instance), start, stop); + } + template <typename Expression> + static void static_end_pass(void* instance, shape<Dims> start, shape<Dims> stop) + { + end_pass(*static_cast<Expression*>(instance), start, stop); + } }; struct expression_resource @@ -126,7 +156,7 @@ struct expression_resource_impl : expression_resource { expression_resource_impl(E&& e) CMT_NOEXCEPT : e(std::move(e)) {} virtual ~expression_resource_impl() {} - virtual void* instance() override final { return &e; } + KFR_INTRINSIC virtual void* instance() override final { return &e; } public: #ifdef __cpp_aligned_new @@ -145,32 +175,38 @@ KFR_INTRINSIC std::shared_ptr<expression_resource> make_resource(E&& e) new (aligned_allocate<T>()) T(std::move(e)), [](T* pi) { aligned_deallocate<T>(pi); })); } -template <typename T, index_t Dims> -struct xpointer +template <typename T, index_t Dims = 1> +struct expression_pointer { void* instance; const expression_vtable<T, Dims>* vtable; std::shared_ptr<expression_resource> resource; - xpointer() CMT_NOEXCEPT : instance(nullptr), vtable(nullptr) {} - xpointer(const void* instance, const expression_vtable<T, Dims>* vtable, - std::shared_ptr<expression_resource> resource = nullptr) + expression_pointer() CMT_NOEXCEPT : instance(nullptr), vtable(nullptr) {} + expression_pointer(const void* instance, const expression_vtable<T, Dims>* vtable, + std::shared_ptr<expression_resource> resource = nullptr) : instance(const_cast<void*>(instance)), vtable(vtable), resource(std::move(resource)) { } explicit operator bool() const { return instance != nullptr; } + + bool substitute(expression_pointer<T, Dims> new_pointer) + { + return vtable->fn_substitute(instance, std::move(new_pointer)); + } }; template <typename T, index_t Dims> -struct expression_traits<xpointer<T, Dims>> : expression_traits_defaults +struct expression_traits<expression_pointer<T, Dims>> : expression_traits_defaults { using value_type = T; constexpr static size_t dims = Dims; - constexpr static shape<dims> shapeof(const xpointer<T, Dims>& self) + constexpr static shape<dims> shapeof(const expression_pointer<T, Dims>& self) { shape<dims> result; self.vtable->fn_shapeof(self.instance, result); + return result; } constexpr static shape<dims> shapeof() { return shape<dims>(undefined_size); } @@ -180,8 +216,19 @@ struct expression_traits<xpointer<T, Dims>> : expression_traits_defaults inline namespace CMT_ARCH_NAME { +template <typename T, index_t NDims> +KFR_INTRINSIC void begin_pass(const expression_pointer<T, NDims>& self, shape<NDims> start, shape<NDims> stop) +{ + self.vtable->fn_begin_pass(self.instance, start, stop); +} +template <typename T, index_t NDims> +KFR_INTRINSIC void end_pass(const expression_pointer<T, NDims>& self, shape<NDims> start, shape<NDims> stop) +{ + self.vtable->fn_end_pass(self.instance, start, stop); +} + template <typename T, index_t NDims, index_t Axis, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const xpointer<T, NDims>& self, const shape<NDims>& index, +KFR_INTRINSIC vec<T, N> get_elements(const expression_pointer<T, NDims>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh) { static_assert(is_poweroftwo(N) && N >= 1); @@ -200,7 +247,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const xpointer<T, NDims>& self, const shape } template <typename T, index_t NDims, index_t Axis, size_t N> -KFR_INTRINSIC void set_elements(const xpointer<T, NDims>& self, const shape<NDims>& index, +KFR_INTRINSIC void set_elements(const expression_pointer<T, NDims>& self, const shape<NDims>& index, const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value) { static_assert(is_poweroftwo(N) && N >= 1); @@ -231,6 +278,8 @@ KFR_INTRINSIC expression_vtable<T, Dims>* make_expression_vtable() } } // namespace internal +} // namespace CMT_ARCH_NAME + /** @brief Converts the given expression into an opaque object. * This overload takes reference to the expression. * @warning Use with caution with local variables. @@ -250,89 +299,109 @@ KFR_INTRINSIC expression_pointer<T, Dims> to_pointer(E&& expr) { std::shared_ptr<expression_resource> ptr = make_resource(std::move(expr)); void* instance = ptr->instance(); - return expression_pointer<T, Dims>(instance, internal::make_expression_vtable<T, E>(), std::move(ptr)); + return expression_pointer<T, Dims>(instance, internal::make_expression_vtable<T, Dims, E>(), + std::move(ptr)); } -#if 0 -template <typename T, size_t key> -class expression_placeholder : public input_expression +template <typename T, index_t Dims = 1, size_t Key = 0> +struct expression_placeholder { public: using value_type = T; expression_placeholder() CMT_NOEXCEPT = default; - template <typename U, size_t N> - friend KFR_INTRINSIC vec<U, N> get_elements(const expression_placeholder& self, cinput_t, size_t index, - vec_shape<U, N>) + expression_pointer<T, Dims> pointer; +}; + +template <typename T, index_t Dims, size_t Key> +struct expression_traits<expression_placeholder<T, Dims, Key>> : public expression_traits_defaults +{ + using value_type = T; + constexpr static size_t dims = Dims; + constexpr static shape<dims> shapeof(const expression_placeholder<T, Dims, Key>& self) { - return self.pointer ? elemcast<U>(get_elements(self.pointer, cinput, index, vec_shape<T, N>())) : 0; + return self.pointer ? ::kfr::shapeof(self.pointer) : shape<dims>(infinite_size); } - expression_pointer<T> pointer; + constexpr static shape<dims> shapeof() { return shape<dims>(undefined_size); } }; -template <typename T, size_t key = 0> -KFR_INTRINSIC expression_placeholder<T, key> placeholder(csize_t<key> = csize_t<key>{}) +inline namespace CMT_ARCH_NAME +{ + +template <typename T, index_t Dims, size_t Key, index_t VecAxis, size_t N> +KFR_INTRINSIC vec<T, N> get_elements(const expression_placeholder<T, Dims, Key>& self, shape<Dims> index, + axis_params<VecAxis, N> sh) { - return expression_placeholder<T, key>(); + return self.pointer ? get_elements(self.pointer, index, sh) : 0; +} +} // namespace CMT_ARCH_NAME + +template <typename T, index_t Dims = 1, size_t Key = 0> +KFR_INTRINSIC expression_placeholder<T, Dims, Key> placeholder(csize_t<Key> = csize_t<Key>{}) +{ + return expression_placeholder<T, Dims, Key>(); } template <typename... Args> -KFR_INTRINSIC bool substitute(input_expression&, Args&&...) +KFR_INTRINSIC bool substitute(const internal_generic::anything&, Args&&...) { return false; } +inline namespace CMT_ARCH_NAME +{ namespace internal { -template <typename... Args, typename T, size_t key, size_t... indices> -KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr, - expression_pointer<T>&& new_pointer, csize_t<key>, csizes_t<indices...>); +template <typename... Args, typename T, index_t Dims, size_t Key, size_t... indices> +KFR_INTRINSIC bool substitute_helper(expression_with_arguments<Args...>& expr, + expression_pointer<T, Dims> new_pointer, csize_t<Key>, + csizes_t<indices...>); } +} // namespace CMT_ARCH_NAME -template <typename T, size_t key = 0> -KFR_INTRINSIC bool substitute(expression_placeholder<T, key>& expr, expression_pointer<T>&& new_pointer, - csize_t<key> = csize_t<key>{}) +template <typename T, index_t Dims, size_t Key = 0> +KFR_INTRINSIC bool substitute(expression_placeholder<T, Dims, Key>& expr, + expression_pointer<T, Dims> new_pointer, csize_t<Key> = csize_t<Key>{}) { expr.pointer = std::move(new_pointer); return true; } -template <typename... Args, typename T, size_t key = 0> -KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr, - expression_pointer<T>&& new_pointer, csize_t<key> = csize_t<key>{}) +template <typename... Args, typename T, index_t Dims, size_t Key = 0> +KFR_INTRINSIC bool substitute(expression_with_arguments<Args...>& expr, + expression_pointer<T, Dims> new_pointer, csize_t<Key> = csize_t<Key>{}) { - return internal::substitute(expr, std::move(new_pointer), csize_t<key>{}, indicesfor_t<Args...>{}); + return internal::substitute_helper(expr, std::move(new_pointer), csize_t<Key>{}, indicesfor_t<Args...>{}); } -template <typename T, size_t key = 0> -KFR_INTRINSIC bool substitute(expression_pointer<T>& expr, expression_pointer<T>&& new_pointer, - csize_t<key> = csize_t<key>{}) +template <typename T, index_t Dims, size_t Key = 0> +KFR_INTRINSIC bool substitute(expression_pointer<T, Dims>& expr, expression_pointer<T, Dims> new_pointer, + csize_t<Key> = csize_t<Key>{}) { - return expr.substitute(std::move(new_pointer), csize_t<key>{}); + static_assert(Key == 0, "expression_pointer supports only Key = 0"); + return expr.substitute(std::move(new_pointer)); } +inline namespace CMT_ARCH_NAME +{ namespace internal { -template <typename... Args, typename T, size_t key, size_t... indices> -KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr, - expression_pointer<T>&& new_pointer, csize_t<key>, csizes_t<indices...>) +template <typename... Args, typename T, index_t Dims, size_t Key, size_t... indices> +KFR_INTRINSIC bool substitute_helper(expression_with_arguments<Args...>& expr, + expression_pointer<T, Dims> new_pointer, csize_t<Key>, + csizes_t<indices...>) { - return (substitute(std::get<indices>(expr.args), std::move(new_pointer), csize_t<key>()) || ...); + return (substitute(std::get<indices>(expr.args), std::move(new_pointer), csize_t<Key>()) || ...); } -} // namespace internal - -namespace internal +template <typename Expression, typename T, index_t Dims, size_t Key> +KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T, Dims> new_pointer, csize_t<Key>) { - -template <typename Expression, typename T, size_t key> -KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T>&& new_pointer, csize_t<key>) -{ - return kfr::substitute(expr, std::move(new_pointer), csize_t<key>{}); + return kfr::substitute(expr, std::move(new_pointer), csize_t<Key>{}); } } // namespace internal -#endif + } // namespace CMT_ARCH_NAME -} +} // namespace kfr diff --git a/include/kfr/base/random.hpp b/include/kfr/base/random.hpp @@ -26,6 +26,7 @@ #pragma once #include "random_bits.hpp" +#include "state_holder.hpp" namespace kfr { @@ -34,128 +35,141 @@ inline namespace CMT_ARCH_NAME { template <typename T, size_t N, KFR_ENABLE_IF(is_integral<T>)> -KFR_INTRINSIC vec<T, N> random_uniform(random_bit_generator& gen) +KFR_INTRINSIC vec<T, N> random_uniform(random_state& state) { - return bitcast<T>(random_bits<N * sizeof(T)>(gen)); + return bitcast<T>(random_bits<N * sizeof(T)>(state)); } template <typename T, size_t N, KFR_ENABLE_IF(is_same<T, f32>)> -KFR_INTRINSIC vec<f32, N> randommantissa(random_bit_generator& gen) +KFR_INTRINSIC vec<f32, N> randommantissa(random_state& state) { - return bitcast<f32>((random_uniform<u32, N>(gen) & u32(0x7FFFFFu)) | u32(0x3f800000u)) + 0.0f; + return bitcast<f32>((random_uniform<u32, N>(state) & u32(0x7FFFFFu)) | u32(0x3f800000u)) + 0.0f; } template <typename T, size_t N, KFR_ENABLE_IF(is_same<T, f64>)> -KFR_INTRINSIC vec<f64, N> randommantissa(random_bit_generator& gen) +KFR_INTRINSIC vec<f64, N> randommantissa(random_state& state) { - return bitcast<f64>((random_uniform<u64, N>(gen) & u64(0x000FFFFFFFFFFFFFull)) | + return bitcast<f64>((random_uniform<u64, N>(state) & u64(0x000FFFFFFFFFFFFFull)) | u64(0x3FF0000000000000ull)) + 0.0; } template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>)> -KFR_INTRINSIC vec<T, N> random_uniform(random_bit_generator& gen) +KFR_INTRINSIC vec<T, N> random_uniform(random_state& state) { - return randommantissa<T, N>(gen) - 1.f; + return randommantissa<T, N>(state) - 1.f; } template <size_t N, typename T, KFR_ENABLE_IF(is_f_class<T>)> -KFR_INTRINSIC vec<T, N> random_range(random_bit_generator& gen, T min, T max) +KFR_INTRINSIC vec<T, N> random_range(random_state& state, T min, T max) { - return mix(random_uniform<T, N>(gen), min, max); + return mix(random_uniform<T, N>(state), min, max); } template <size_t N, typename T, KFR_ENABLE_IF(!is_f_class<T>)> -KFR_INTRINSIC vec<T, N> random_range(random_bit_generator& gen, T min, T max) +KFR_INTRINSIC vec<T, N> random_range(random_state& state, T min, T max) { using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>; - vec<T, N> u = random_uniform<T, N>(gen); + vec<T, N> u = random_uniform<T, N>(state); const vec<big_type, N> tmp = u; return (tmp * (max - min) + min) >> typebits<T>::bits; } -namespace internal +template <typename T, index_t Dims, bool Reference = false> +struct expression_random_uniform : expression_traits_defaults { -template <typename T, typename Gen = random_bit_generator> -struct expression_random_uniform : input_expression -{ - using value_type = T; - constexpr expression_random_uniform(Gen gen) CMT_NOEXCEPT : gen(gen) {} - template <size_t N> - friend vec<T, N> get_elements(const expression_random_uniform& self, cinput_t, size_t, vec_shape<T, N>) + using value_type = T; + constexpr static size_t dims = Dims; + constexpr static shape<dims> shapeof(const expression_random_uniform&) + { + return shape<dims>(infinite_size); + } + constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); } + + mutable state_holder<random_state, Reference> state; + + template <size_t N, index_t VecAxis> + friend KFR_INTRINSIC vec<T, N> get_elements(const expression_random_uniform& self, shape<Dims>, + axis_params<VecAxis, N>) { - return random_uniform<T, N>(self.gen); + return random_uniform<N, T>(*self.state); } - mutable Gen gen; }; -template <typename T, typename Gen = random_bit_generator> -struct expression_random_range : input_expression +template <typename T, index_t Dims, bool Reference = false> +struct expression_random_range : expression_traits_defaults { - using value_type = T; - constexpr expression_random_range(Gen gen, T min, T max) CMT_NOEXCEPT : gen(gen), min(min), max(max) {} + using value_type = T; + constexpr static size_t dims = Dims; + constexpr static shape<dims> shapeof(const expression_random_range&) + { + return shape<dims>(infinite_size); + } + constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); } + + mutable state_holder<random_state, Reference> state; + T min; + T max; - template <size_t N> - friend vec<T, N> get_elements(const expression_random_range& self, cinput_t, size_t, vec_shape<T, N>) + template <size_t N, index_t VecAxis> + friend KFR_INTRINSIC vec<T, N> get_elements(const expression_random_range& self, shape<Dims>, + axis_params<VecAxis, N>) { - return random_range<N, T>(self.gen, self.min, self.max); + return random_range<N, T>(*self.state, self.min, self.max); } - mutable Gen gen; - const T min; - const T max; }; -} // namespace internal -/// @brief Returns expression that returns pseudo random values. Copies the given generator -template <typename T> -KFR_FUNCTION internal::expression_random_uniform<T> gen_random_uniform(const random_bit_generator& gen) +/// @brief Returns expression that returns pseudorandom values. Copies the given generator +template <typename T, index_t Dims = 1> +KFR_FUNCTION expression_random_uniform<T, Dims> gen_random_uniform(const random_state& state) { - return internal::expression_random_uniform<T>(gen); + return { {}, state }; } -/// @brief Returns expression that returns pseudo random values. References the given +/// @brief Returns expression that returns pseudorandom values. References the given /// generator. Use std::ref(gen) to force this overload -template <typename T> -KFR_FUNCTION internal::expression_random_uniform<T, std::reference_wrapper<random_bit_generator>> -gen_random_uniform(std::reference_wrapper<random_bit_generator> gen) +template <typename T, index_t Dims = 1> +KFR_FUNCTION expression_random_range<T, Dims, true> gen_random_uniform( + std::reference_wrapper<random_state> state) { - return internal::expression_random_uniform<T, std::reference_wrapper<random_bit_generator>>(gen); + return { {}, state }; } #ifndef KFR_DISABLE_READCYCLECOUNTER -/// @brief Returns expression that returns pseudo random values -template <typename T> -KFR_FUNCTION internal::expression_random_uniform<T> gen_random_uniform() +/// @brief Returns expression that returns pseudorandom values +template <typename T, index_t Dims = 1> +KFR_FUNCTION expression_random_range<T, Dims> gen_random_uniform() { - return internal::expression_random_uniform<T>(random_bit_generator(seed_from_rdtsc)); + return expression_random_uniform<T, Dims>{ random_init() }; } #endif -/// @brief Returns expression that returns pseudo random values of the given range. Copies the given generator -template <typename T> -KFR_FUNCTION internal::expression_random_range<T> gen_random_range(const random_bit_generator& gen, T min, - T max) +/// @brief Returns expression that returns pseudorandom values of the given range. Copies the given generator +template <typename T, index_t Dims = 1> +KFR_FUNCTION expression_random_range<T, Dims> gen_random_range(const random_state& state, T min, T max) { - return internal::expression_random_range<T>(gen, min, max); + return { {}, state, min, max }; } -/// @brief Returns expression that returns pseudo random values of the given range. References the given +/// @brief Returns expression that returns pseudorandom values of the given range. References the given /// generator. Use std::ref(gen) to force this overload -template <typename T> -KFR_FUNCTION internal::expression_random_range<T, std::reference_wrapper<random_bit_generator>> -gen_random_range(std::reference_wrapper<random_bit_generator> gen, T min, T max) +template <typename T, index_t Dims = 1> +KFR_FUNCTION expression_random_range<T, Dims, true> gen_random_range( + std::reference_wrapper<random_state> state, T min, T max) { - return internal::expression_random_range<T, std::reference_wrapper<random_bit_generator>>(gen, min, max); + return { {}, state, min, max }; } #ifndef KFR_DISABLE_READCYCLECOUNTER -/// @brief Returns expression that returns pseudo random values of the given range -template <typename T> -KFR_FUNCTION internal::expression_random_range<T> gen_random_range(T min, T max) +/// @brief Returns expression that returns pseudorandom values of the given range +template <typename T, index_t Dims = 1> +KFR_FUNCTION expression_random_range<T, Dims> gen_random_range(T min, T max) { - return internal::expression_random_range<T>(random_bit_generator(seed_from_rdtsc), min, max); + return { {}, random_init(), min, max }; } #endif + } // namespace CMT_ARCH_NAME + } // namespace kfr diff --git a/include/kfr/base/random_bits.hpp b/include/kfr/base/random_bits.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify @@ -47,10 +47,16 @@ struct seed_from_rdtsc_t constexpr seed_from_rdtsc_t seed_from_rdtsc{}; #endif -inline namespace CMT_ARCH_NAME +struct random_state { - -using random_state = u32x4; + constexpr random_state() : v{ 0, 0, 0, 0 } {} + constexpr random_state(random_state&&) = default; + constexpr random_state(const random_state&) = default; + constexpr random_state& operator=(random_state&&) = default; + constexpr random_state& operator=(const random_state&) = default; + // internal field + portable_vec<u32, 4> v; +}; #ifndef KFR_DISABLE_READCYCLECOUNTER #ifdef CMT_COMPILER_CLANG @@ -61,53 +67,57 @@ using random_state = u32x4; #endif #endif -struct random_bit_generator +static_assert(sizeof(random_state) == 16, "sizeof(random_state) == 16"); + +inline namespace CMT_ARCH_NAME +{ + +KFR_INTRINSIC void random_next(random_state& state) +{ + constexpr static portable_vec<u32, 4> mul{ 214013u, 17405u, 214013u, 69069u }; + constexpr static portable_vec<u32, 4> add{ 2531011u, 10395331u, 13737667u, 1u }; + state.v = bitcast<u32>(rotateright<3>( + bitcast<u8>(fmadd(static_cast<u32x4>(state.v), static_cast<u32x4>(mul), static_cast<u32x4>(add))))); +} +KFR_INTRINSIC random_state random_init() { -#ifndef KFR_DISABLE_READCYCLECOUNTER - KFR_MEM_INTRINSIC random_bit_generator(seed_from_rdtsc_t) CMT_NOEXCEPT - : state(bitcast<u32>(make_vector(KFR_builtin_readcyclecounter(), - (KFR_builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull))) - { - (void)operator()(); - } -#endif - KFR_MEM_INTRINSIC random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) CMT_NOEXCEPT - : state(x0, x1, x2, x3) - { - (void)operator()(); - } - KFR_MEM_INTRINSIC random_bit_generator(u64 x0, u64 x1) CMT_NOEXCEPT - : state(bitcast<u32>(make_vector(x0, x1))) - { - (void)operator()(); - } - - KFR_MEM_INTRINSIC random_state operator()() - { - const static random_state mul{ 214013u, 17405u, 214013u, 69069u }; - const static random_state add{ 2531011u, 10395331u, 13737667u, 1u }; - state = bitcast<u32>(rotateright<3>(bitcast<u8>(fmadd(state, mul, add)))); - return state; - } - -protected: random_state state; -}; + state.v = portable_vec<u32, 4>{ bitcast<u32>(make_vector( + KFR_builtin_readcyclecounter(), (KFR_builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)) }; + random_next(state); + return state; +} -static_assert(sizeof(random_state) == 16, "sizeof(random_state) == 16"); +KFR_INTRINSIC random_state random_init(u32 x0, u32 x1, u32 x2, u32 x3) +{ + random_state state; + state.v = portable_vec<u32, 4>{ x0, x1, x2, x3 }; + random_next(state); + return state; +} + +KFR_INTRINSIC random_state random_init(u64 x0, u64 x1) +{ + random_state state; + state.v = portable_vec<u32, 4>{ static_cast<u32>(x0), static_cast<u32>(x0 >> 32), static_cast<u32>(x1), + static_cast<u32>(x1 >> 32) }; + random_next(state); + return state; +} template <size_t N, KFR_ENABLE_IF(N <= sizeof(random_state))> -KFR_INTRINSIC vec<u8, N> random_bits(random_bit_generator& gen) +KFR_INTRINSIC vec<u8, N> random_bits(random_state& state) { - return narrow<N>(bitcast<u8>(gen())); + random_next(state); + return narrow<N>(bitcast<u8>(u32x4(state.v))); } template <size_t N, KFR_ENABLE_IF(N > sizeof(random_state))> -KFR_INTRINSIC vec<u8, N> random_bits(random_bit_generator& gen) +KFR_INTRINSIC vec<u8, N> random_bits(random_state& state) { constexpr size_t N2 = prev_poweroftwo(N - 1); - const vec<u8, N2> bits1 = random_bits<N2>(gen); - const vec<u8, N - N2> bits2 = random_bits<N - N2>(gen); + const vec<u8, N2> bits1 = random_bits<N2>(state); + const vec<u8, N - N2> bits2 = random_bits<N - N2>(state); return concat(bits1, bits2); } } // namespace CMT_ARCH_NAME diff --git a/include/kfr/base/reduce.hpp b/include/kfr/base/reduce.hpp @@ -25,11 +25,12 @@ */ #pragma once -#include "../simd/min_max.hpp" #include "../simd/horizontal.hpp" #include "../simd/impl/function.hpp" +#include "../simd/min_max.hpp" #include "../simd/operators.hpp" #include "../simd/vec.hpp" +#include "simd_expressions.hpp" #include "basic_expressions.hpp" namespace kfr @@ -47,12 +48,10 @@ KFR_FN(final_mean) template <typename T> KFR_INTRINSIC T final_rootmean(T value, size_t size) { - return builtin_sqrt(value / T(size)); + return sqrt(value / T(size)); } KFR_FN(final_rootmean) -namespace internal -{ template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, T, size_t>)> KFR_INTRINSIC auto reduce_call_final(FinalFn&& finalfn, size_t size, T value) { @@ -64,10 +63,15 @@ KFR_INTRINSIC auto reduce_call_final(FinalFn&& finalfn, size_t, T value) return finalfn(value); } -template <typename Tout, typename Twork, typename Tin, typename ReduceFn, typename TransformFn, +template <typename Tout, index_t Dims, typename Twork, typename Tin, typename ReduceFn, typename TransformFn, typename FinalFn> -struct expression_reduce : output_expression +struct expression_reduce : public expression_traits_defaults { + using value_type = Tin; + constexpr static size_t dims = Dims; + constexpr static shape<dims> shapeof(const expression_reduce&) { return shape<dims>(infinite_size); } + constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); } + constexpr static size_t width = vector_width<Tin> * bitness_const(1, 2); using value_type = Tin; @@ -78,18 +82,16 @@ struct expression_reduce : output_expression { } - template <size_t N> - KFR_INTRINSIC friend void set_elements(expression_reduce& self, coutput_t, size_t, const vec<Tin, N>& x) + KFR_MEM_INTRINSIC Tout get() { return reduce_call_final(finalfn, counter, horizontal(value, reducefn)); } + + template <size_t N, index_t VecAxis> + friend KFR_INTRINSIC void set_elements(expression_reduce& self, shape<Dims>, axis_params<VecAxis, N>, + const identity<vec<Tin, N>>& x) { self.counter += N; self.process(x); } - KFR_MEM_INTRINSIC Tout get() - { - return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); - } - protected: void reset() { counter = 0; } KFR_MEM_INTRINSIC void process(const vec<Tin, width>& x) const @@ -116,21 +118,20 @@ protected: FinalFn finalfn; mutable vec<Twork, width> value; }; -} // namespace internal template <typename ReduceFn, typename TransformFn = fn_generic::pass_through, - typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = value_type_of<E1>, + typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = expression_value_type<E1>, typename Twork = decay<decltype(std::declval<TransformFn>()(std::declval<Tin>()))>, - typename Tout = decay<decltype(internal::reduce_call_final( - std::declval<FinalFn>(), std::declval<size_t>(), std::declval<Twork>()))>, + typename Tout = decay<decltype(reduce_call_final(std::declval<FinalFn>(), std::declval<size_t>(), + std::declval<Twork>()))>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_generic::pass_through(), FinalFn&& finalfn = fn_generic::pass_through()) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); - using reducer_t = - internal::expression_reduce<Tout, Twork, Tin, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>; + using reducer_t = expression_reduce<Tout, expression_dims<E1>, Twork, Tin, decay<ReduceFn>, + decay<TransformFn>, decay<FinalFn>>; reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn), std::forward<FinalFn>(finalfn)); process(red, e1); @@ -139,10 +140,10 @@ KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn, } template <typename ReduceFn, typename TransformFn = fn_generic::pass_through, - typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = value_type_of<E1>, + typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = expression_value_type<E1>, typename Twork = decay<decltype(std::declval<TransformFn>()(std::declval<Tin>()))>, - typename Tout = decay<decltype(internal::reduce_call_final( - std::declval<FinalFn>(), std::declval<size_t>(), std::declval<Twork>()))>, + typename Tout = decay<decltype(reduce_call_final(std::declval<FinalFn>(), std::declval<size_t>(), + std::declval<Twork>()))>, KFR_ENABLE_IF(!is_input_expression<E1>)> KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn, TransformFn&& transformfn = fn_generic::pass_through(), @@ -158,8 +159,6 @@ KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn, return internal::reduce_call_final(finalfn, counter, result); } -KFR_FN(reduce) - /** * @brief Returns the sum of all the elements in x. * @@ -168,7 +167,7 @@ KFR_FN(reduce) * x_0 + x_1 + \ldots + x_{N-1} * \f] */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T sum(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -183,7 +182,7 @@ KFR_FUNCTION T sum(const E1& x) * \frac{1}{N}(x_0 + x_1 + \ldots + x_{N-1}) * \f] */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T mean(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -195,7 +194,7 @@ KFR_FUNCTION T mean(const E1& x) * * x must have its size and type specified. */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T minof(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -207,7 +206,7 @@ KFR_FUNCTION T minof(const E1& x) * * x must have its size and type specified. */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T maxof(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -219,7 +218,7 @@ KFR_FUNCTION T maxof(const E1& x) * * x must have its size and type specified. */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T absminof(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -231,7 +230,7 @@ KFR_FUNCTION T absminof(const E1& x) * * x must have its size and type specified. */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T absmaxof(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -247,8 +246,8 @@ KFR_FUNCTION T absmaxof(const E1& x) * \f] */ template <typename E1, typename E2, - typename T = value_type_of<decltype(std::declval<E1>() * std::declval<E2>())>, - KFR_ENABLE_IF(is_input_expressions<E1, E2>)> + typename T = expression_value_type<decltype(std::declval<E1>() * std::declval<E2>())>, + KFR_ACCEPT_EXPRESSIONS(E1, E2)> KFR_FUNCTION T dotproduct(E1&& x, E2&& y) { auto m = std::forward<E1>(x) * std::forward<E2>(y); @@ -265,7 +264,7 @@ KFR_FUNCTION T dotproduct(E1&& x, E2&& y) \sqrt{\frac{1}{N}( x_0^2 + x_1^2 + \ldots + x_{N-1}^2)} \f] */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T rms(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -280,7 +279,7 @@ KFR_FUNCTION T rms(const E1& x) x_0^2 + x_1^2 + \ldots + x_{N-1}^2 \f] */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T sumsqr(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); @@ -295,7 +294,7 @@ KFR_FUNCTION T sumsqr(const E1& x) x_0 \cdot x_1 \cdot \ldots \cdot x_{N-1} \f] */ -template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> +template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)> KFR_FUNCTION T product(const E1& x) { static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())"); diff --git a/include/kfr/base/shape.hpp b/include/kfr/base/shape.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify @@ -27,6 +27,8 @@ #include "impl/static_array.hpp" +#include "../cometa/string.hpp" +#include "../simd/logical.hpp" #include "../simd/min_max.hpp" #include "../simd/shuffle.hpp" #include "../simd/types.hpp" @@ -83,11 +85,17 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>> static_assert(dims < maximum_dims); + template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && dims == 1)> + operator index_t() const + { + return this->front(); + } + bool ge(const shape& other) const { if constexpr (dims == 1) { - return front() >= other.front(); + return this->front() >= other.front(); } else { @@ -109,7 +117,7 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>> { if constexpr (dims == 1) { - return front() <= other.front(); + return this->front() <= other.front(); } else { @@ -267,7 +275,17 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>> return result; } - template <size_t odims> + template <index_t new_dims> + KFR_MEM_INTRINSIC shape<new_dims> extend(index_t value = infinite_size) const + { + static_assert(new_dims >= dims); + if constexpr (new_dims == dims) + return *this; + else + return concat(broadcast<new_dims - dims>(value), **this); + } + + template <index_t odims> shape<odims> trim() const { static_assert(odims <= dims); @@ -293,8 +311,6 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>> } } - shape<dims + 1> extend() const { return concat(**this, vec{ index_t(0) }); } - KFR_MEM_INTRINSIC constexpr index_t revindex(size_t index) const { return index < dims ? this->operator[](dims - 1 - index) : 1; @@ -313,8 +329,8 @@ struct shape<0> static constexpr size_t size() { return static_size; } - shape() = default; - shape(index_t value) {} + constexpr shape() = default; + constexpr shape(index_t value) {} constexpr bool has_infinity() const { return false; } @@ -331,13 +347,20 @@ struct shape<0> KFR_MEM_INTRINSIC index_t dot(const shape& other) const { return 0; } - KFR_MEM_INTRINSIC size_t product() const { return 0; } + KFR_MEM_INTRINSIC index_t product() const { return 0; } KFR_MEM_INTRINSIC dimset tomask() const { return -1; } - shape<1> extend() const { return { 0 }; } + template <index_t new_dims> + KFR_MEM_INTRINSIC shape<new_dims> extend(index_t value = infinite_size) const + { + if constexpr (new_dims == 0) + return *this; + else + return shape<new_dims>{ value }; + } - template <size_t new_dims> + template <index_t new_dims> shape<new_dims> trim() const { static_assert(new_dims == 0); @@ -468,7 +491,7 @@ bool can_assign_from(const shape<dims1>& dst_shape, const shape<dims2>& src_shap vec<index_t, outdims> dst = padlow<outdims - dims1>(*dst_shape, 1); vec<index_t, outdims> src = padlow<outdims - dims2>(*src_shape, 1); - mask<index_t, outdims> match = src + 1 <= 2 || src == dst; + mask<index_t, outdims> match = src + 1 <= 2 || src == dst || dst == infinite_size; return all(match); } else @@ -477,7 +500,8 @@ bool can_assign_from(const shape<dims1>& dst_shape, const shape<dims2>& src_shap { index_t dst_size = dst_shape.revindex(i); index_t src_size = src_shape.revindex(i); - if (src_size == 1 || src_size == infinite_size || src_size == dst_size) + if (src_size == 1 || src_size == infinite_size || src_size == dst_size || + dst_size == infinite_size) { } else @@ -497,13 +521,20 @@ constexpr shape<dims> common_shape(const shape<dims>& shape) } template <index_t dims1, index_t dims2, index_t outdims = const_max(dims1, dims2)> -constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<dims2>& shape2) +KFR_MEM_INTRINSIC constexpr shape<outdims> common_shape(const shape<dims1>& shape1, + const shape<dims2>& shape2) { shape<outdims> result; for (size_t i = 0; i < outdims; ++i) { index_t size1 = shape1.revindex(i); index_t size2 = shape2.revindex(i); + if (!size1 || !size2) + { + result[outdims - 1 - i] = 0; + continue; + } + if (size1 == infinite_size) { if (size2 == infinite_size) @@ -512,14 +543,14 @@ constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<di } else { - result[outdims - 1 - i] = size2; + result[outdims - 1 - i] = size2 == 1 ? infinite_size : size2; } } else { if (size2 == infinite_size) { - result[outdims - 1 - i] = size1; + result[outdims - 1 - i] = size1 == 1 ? infinite_size : size1; } else { @@ -540,11 +571,19 @@ constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<di } template <> -KFR_MEM_INTRINSIC shape<0> common_shape(const shape<0>& shape1, const shape<0>& shape2) +KFR_MEM_INTRINSIC constexpr shape<0> common_shape(const shape<0>& shape1, const shape<0>& shape2) { return {}; } +template <index_t dims1, index_t dims2, index_t... dims, index_t outdims = const_max(dims1, dims2, dims...)> +KFR_MEM_INTRINSIC constexpr shape<outdims> common_shape(const shape<dims1>& shape1, + const shape<dims2>& shape2, + const shape<dims>&... shapes) +{ + return common_shape(shape1, common_shape(shape2, shapes...)); +} + template <index_t dims1, index_t dims2> KFR_MEM_INTRINSIC bool same_layout(const shape<dims1>& x, const shape<dims2>& y) { @@ -745,6 +784,7 @@ struct axis_params { constexpr static index_t axis = Axis; constexpr static index_t width = N; + constexpr static index_t value = N; constexpr axis_params() = default; }; @@ -753,3 +793,31 @@ template <index_t Axis, index_t N> constexpr inline const axis_params<Axis, N> axis_params_v{}; } // namespace kfr + +namespace cometa +{ +template <kfr::index_t dims> +struct representation<kfr::shape<dims>> +{ + using type = std::string; + static std::string get(const kfr::shape<dims>& value) + { + if constexpr (dims == 0) + { + return "shape{}"; + } + else + { + std::string s; + for (kfr::index_t i = 0; i < dims; ++i) + { + if (CMT_LIKELY(i > 0)) + s += ", "; + s += as_string(value[i]); + } + return "shape{" + s + "}"; + } + } +}; + +} // namespace cometa diff --git a/include/kfr/base/simd_expressions.hpp b/include/kfr/base/simd_expressions.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify @@ -25,9 +25,15 @@ */ #pragma once +#include "../simd/abs.hpp" +#include "../simd/clamp.hpp" #include "../simd/comparison.hpp" #include "../simd/complex.hpp" +#include "../simd/min_max.hpp" #include "../simd/operators.hpp" +#include "../simd/round.hpp" +#include "../simd/saturation.hpp" +#include "../simd/select.hpp" #include "../simd/vec.hpp" #include "expression.hpp" #include "univector.hpp" @@ -36,17 +42,20 @@ namespace kfr { +inline namespace CMT_ARCH_NAME +{ + /** * @brief Returns template expression that returns sum of all the arguments passed to a function. */ template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)> -KFR_INTRINSIC xfunction<fn::add, E...> add(E&&... x) +KFR_INTRINSIC expression_function<fn::add, E...> add(E&&... x) { return { fn::add(), std::forward<E>(x)... }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::sub, E1, E2> sub(E1&& x, E2&& y) +KFR_INTRINSIC expression_function<fn::sub, E1, E2> sub(E1&& x, E2&& y) { return { fn::sub(), std::forward<E1>(x), std::forward<E2>(y) }; } @@ -55,97 +64,102 @@ KFR_INTRINSIC xfunction<fn::sub, E1, E2> sub(E1&& x, E2&& y) * @brief Returns template expression that returns product of all the arguments passed to a function. */ template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)> -KFR_INTRINSIC xfunction<fn::mul, E...> mul(E&&... x) +KFR_INTRINSIC expression_function<fn::mul, E...> mul(E&&... x) { return { fn::mul(), std::forward<E>(x)... }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::ipow, E1, E2> ipow(E1&& x, E2&& b) +KFR_INTRINSIC expression_function<fn::ipow, E1, E2> ipow(E1&& x, E2&& b) { return { fn::ipow(), std::forward<E1>(x), std::forward<E2>(b) }; } template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> -KFR_INTRINSIC xfunction<fn::mix, E1, E2, E3> mix(E1&& c, E2&& x, E3&& y) +KFR_INTRINSIC expression_function<fn::mix, E1, E2, E3> mix(E1&& c, E2&& x, E3&& y) { return { fn::mix(), std::forward<E1>(c), std::forward<E2>(x), std::forward<E3>(y) }; } template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> -KFR_INTRINSIC xfunction<fn::mixs, E1, E2, E3> mixs(E1&& c, E2&& x, E3&& y) +KFR_INTRINSIC expression_function<fn::mixs, E1, E2, E3> mixs(E1&& c, E2&& x, E3&& y) { return { fn::mixs(), std::forward<E1>(c), std::forward<E2>(x), std::forward<E3>(y) }; } template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)> -KFR_INTRINSIC xfunction<fn::horner, E...> horner(E&&... x) +KFR_INTRINSIC expression_function<fn::horner, E...> horner(E&&... x) { return { fn::horner(), std::forward<E>(x)... }; } template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)> -KFR_INTRINSIC xfunction<fn::horner_even, E...> horner_even(E&&... x) +KFR_INTRINSIC expression_function<fn::horner_even, E...> horner_even(E&&... x) { return { fn::horner_even(), std::forward<E>(x)... }; } template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)> -KFR_INTRINSIC xfunction<fn::horner_odd, E...> horner_odd(E&&... x) +KFR_INTRINSIC expression_function<fn::horner_odd, E...> horner_odd(E&&... x) { return { fn::horner_odd(), std::forward<E>(x)... }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::add, E1, E2> operator+(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::add, E1, E2> operator+(E1&& e1, E2&& e2) { return { fn::add(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::sub, E1, E2> operator-(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::sub, E1, E2> operator-(E1&& e1, E2&& e2) { return { fn::sub(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::mul, E1, E2> operator*(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::mul, E1, E2> operator*(E1&& e1, E2&& e2) { return { fn::mul(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::div, E1, E2> operator/(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::div, E1, E2> operator/(E1&& e1, E2&& e2) { return { fn::div(), std::forward<E1>(e1), std::forward<E2>(e2) }; } +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_INTRINSIC expression_function<fn::mod, E1, E2> operator%(E1&& e1, E2&& e2) +{ + return { fn::mod(), std::forward<E1>(e1), std::forward<E2>(e2) }; +} template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::bitwiseand, E1, E2> operator&(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::bitwiseand, E1, E2> operator&(E1&& e1, E2&& e2) { return { fn::bitwiseand(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::bitwiseor, E1, E2> operator|(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::bitwiseor, E1, E2> operator|(E1&& e1, E2&& e2) { return { fn::bitwiseor(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::bitwisexor, E1, E2> operator^(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::bitwisexor, E1, E2> operator^(E1&& e1, E2&& e2) { return { fn::bitwisexor(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::shl, E1, E2> operator<<(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::shl, E1, E2> operator<<(E1&& e1, E2&& e2) { return { fn::shl(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::shr, E1, E2> operator>>(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::shr, E1, E2> operator>>(E1&& e1, E2&& e2) { return { fn::shr(), std::forward<E1>(e1), std::forward<E2>(e2) }; } @@ -154,7 +168,7 @@ KFR_INTRINSIC xfunction<fn::shr, E1, E2> operator>>(E1&& e1, E2&& e2) * @brief Returns template expression that returns square of x. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::sqr, E1> sqr(E1&& x) +KFR_INTRINSIC expression_function<fn::sqr, E1> sqr(E1&& x) { return { fn::sqr(), std::forward<E1>(x) }; } @@ -163,112 +177,330 @@ KFR_INTRINSIC xfunction<fn::sqr, E1> sqr(E1&& x) * @brief Returns template expression that returns cube of x. */ template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::cub, E1> cub(E1&& x) +KFR_INTRINSIC expression_function<fn::cub, E1> cub(E1&& x) { return { fn::cub(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::pow2, E1> pow2(E1&& x) +KFR_INTRINSIC expression_function<fn::pow2, E1> pow2(E1&& x) { return { fn::pow2(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::pow3, E1> pow3(E1&& x) +KFR_INTRINSIC expression_function<fn::pow3, E1> pow3(E1&& x) { return { fn::pow3(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::pow4, E1> pow4(E1&& x) +KFR_INTRINSIC expression_function<fn::pow4, E1> pow4(E1&& x) { return { fn::pow4(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::pow5, E1> pow5(E1&& x) +KFR_INTRINSIC expression_function<fn::pow5, E1> pow5(E1&& x) { return { fn::pow5(), std::forward<E1>(x) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::neg, E1> operator-(E1&& e1) +KFR_INTRINSIC expression_function<fn::neg, E1> operator-(E1&& e1) { return { fn::neg(), std::forward<E1>(e1) }; } template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::bitwisenot, E1> operator~(E1&& e1) +KFR_INTRINSIC expression_function<fn::bitwisenot, E1> operator~(E1&& e1) { return { fn::bitwisenot(), std::forward<E1>(e1) }; } /// @brief Constructs complex value from real and imaginary parts template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::make_complex, E1, E2> make_complex(E1&& re, E2&& im) +KFR_INTRINSIC expression_function<fn::make_complex, E1, E2> make_complex(E1&& re, E2&& im) { return { fn::make_complex{}, std::forward<E1>(re), std::forward<E2>(im) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::equal, E1, E2> operator==(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::equal, E1, E2> operator==(E1&& e1, E2&& e2) { return { fn::equal(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::notequal, E1, E2> operator!=(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::notequal, E1, E2> operator!=(E1&& e1, E2&& e2) { return { fn::notequal(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::less, E1, E2> operator<(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::less, E1, E2> operator<(E1&& e1, E2&& e2) { return { fn::less(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::greater, E1, E2> operator>(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::greater, E1, E2> operator>(E1&& e1, E2&& e2) { return { fn::greater(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::lessorequal, E1, E2> operator<=(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::lessorequal, E1, E2> operator<=(E1&& e1, E2&& e2) { return { fn::lessorequal(), std::forward<E1>(e1), std::forward<E2>(e2) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -KFR_INTRINSIC xfunction<fn::greaterorequal, E1, E2> operator>=(E1&& e1, E2&& e2) +KFR_INTRINSIC expression_function<fn::greaterorequal, E1, E2> operator>=(E1&& e1, E2&& e2) { return { fn::greaterorequal(), std::forward<E1>(e1), std::forward<E2>(e2) }; } /// @brief Returns the real part of the complex value template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::real, E1> real(E1&& x) +KFR_INTRINSIC expression_function<fn::real, E1> real(E1&& x) { return { fn::real{}, std::forward<E1>(x) }; } /// @brief Returns the imaginary part of the complex value template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_INTRINSIC xfunction<fn::imag, E1> imag(E1&& x) +KFR_INTRINSIC expression_function<fn::imag, E1> imag(E1&& x) { return { fn::imag{}, std::forward<E1>(x) }; } /// @brief Returns template expression that returns the complex conjugate of the complex number x template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> -KFR_FUNCTION xfunction<fn::cconj, E1> cconj(E1&& x) +KFR_FUNCTION expression_function<fn::cconj, E1> cconj(E1&& x) { return { fn::cconj(), std::forward<E1>(x) }; } template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> -CMT_INTRINSIC xfunction<fn::interleave, E1, E2> interleave(E1&& x, E2&& y) +CMT_INTRINSIC expression_function<fn::interleave, E1, E2> interleave(E1&& x, E2&& y) { return { fn::interleave(), std::forward<E1>(x), std::forward<E2>(y) }; } +/** + * @brief Returns template expression that returns x if m is true, otherwise return y. Order of the arguments + * is same as in ternary operator. + */ +template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> +KFR_FUNCTION expression_function<fn::select, E1, E2, E3> select(E1&& m, E2&& x, E3&& y) +{ + return { fn::select(), std::forward<E1>(m), std::forward<E2>(x), std::forward<E3>(y) }; +} + +/** + * @brief Returns template expression that returns the absolute value of x. + */ +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::abs, E1> abs(E1&& x) +{ + return { fn::abs(), std::forward<E1>(x) }; +} + +/** + * @brief Returns the smaller of two values. Accepts and returns expressions. + */ +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_FUNCTION expression_function<fn::min, E1, E2> min(E1&& x, E2&& y) +{ + return { fn::min(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +/** + * @brief Returns the greater of two values. Accepts and returns expressions. + */ +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_FUNCTION expression_function<fn::max, E1, E2> max(E1&& x, E2&& y) +{ + return { fn::max(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +/** + * @brief Returns the smaller in magnitude of two values. Accepts and returns expressions. + */ +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_FUNCTION expression_function<fn::absmin, E1, E2> absmin(E1&& x, E2&& y) +{ + return { fn::absmin(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +/** + * @brief Returns the greater in magnitude of two values. Accepts and returns expressions. + */ +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_FUNCTION expression_function<fn::absmax, E1, E2> absmax(E1&& x, E2&& y) +{ + return { fn::absmax(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +/// @brief Returns the largest integer value not greater than x. Accepts and returns expressions. +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::floor, E1> floor(E1&& x) +{ + return { fn::floor(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::ceil, E1> ceil(E1&& x) +{ + return { fn::ceil(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::round, E1> round(E1&& x) +{ + return { fn::round(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::trunc, E1> trunc(E1&& x) +{ + return { fn::trunc(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::fract, E1> fract(E1&& x) +{ + return { fn::fract(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::ifloor, E1> ifloor(E1&& x) +{ + return { fn::ifloor(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::iceil, E1> iceil(E1&& x) +{ + return { fn::iceil(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::iround, E1> iround(E1&& x) +{ + return { fn::iround(), std::forward<E1>(x) }; +} + +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::itrunc, E1> itrunc(E1&& x) +{ + return { fn::itrunc(), std::forward<E1>(x) }; +} + +/// @brief Creates an expression that returns the first argument clamped to a range [lo, hi] +template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)> +KFR_FUNCTION expression_function<fn::clamp, E1, E2, E3> clamp(E1&& x, E2&& lo, E3&& hi) +{ + return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(lo), std::forward<E3>(hi) }; +} + +/// @brief Creates an expression that returns the first argument clamped to a range [0, hi] +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_FUNCTION expression_function<fn::clamp, E1, E2> clamp(E1&& x, E2&& hi) +{ + return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(hi) }; +} + +/// @brief Creates an expression that adds two arguments using saturation +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_INTRINSIC expression_function<fn::satadd, E1, E2> satadd(E1&& x, E2&& y) +{ + return { fn::satadd(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +/// @brief Creates an expression that subtracts two arguments using saturation +template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)> +KFR_INTRINSIC expression_function<fn::satsub, E1, E2> satsub(E1&& x, E2&& y) +{ + return { fn::satsub(), std::forward<E1>(x), std::forward<E2>(y) }; +} + +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator+=(E1& e1, E2&& e2) +{ + process(e1, operator+(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator-=(E1& e1, E2&& e2) +{ + process(e1, operator-(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator*=(E1& e1, E2&& e2) +{ + process(e1, operator*(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator*=(E1&& e1, E2&& e2) +{ + process(e1, operator*(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator/=(E1& e1, E2&& e2) +{ + process(e1, operator/(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator%=(E1& e1, E2&& e2) +{ + process(e1, operator%(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator|=(E1& e1, E2&& e2) +{ + process(e1, operator|(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator&=(E1& e1, E2&& e2) +{ + process(e1, operator&(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator^=(E1& e1, E2&& e2) +{ + process(e1, operator^(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator<<=(E1& e1, E2&& e2) +{ + process(e1, operator<<(e1, e2)); + return e1; +} +template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr, + enable_if_input_expression<E2>* = nullptr> +KFR_INTRINSIC E1& operator>>=(E1& e1, E2&& e2) +{ + process(e1, operator>>(e1, e2)); + return e1; +} + +} // namespace CMT_ARCH_NAME + } // namespace kfr diff --git a/include/kfr/base/state_holder.hpp b/include/kfr/base/state_holder.hpp @@ -0,0 +1,49 @@ +/** @addtogroup fir + * @{ + */ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ +#pragma once + +#include "../cident.h" +#include <functional> + +namespace kfr +{ + +template <typename T, bool Stateless> +struct state_holder +{ + constexpr state_holder() = delete; + constexpr state_holder(const state_holder&) = default; + constexpr state_holder(state_holder&&) = default; + constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} + constexpr state_holder(std::reference_wrapper<const T> state) CMT_NOEXCEPT : s(state) {} + T s; + + const T* operator->() const { return &s; } + T* operator->() { return &s; } + const T& operator*() const { return s; } + T& operator*() { return s; } +}; + +template <typename T> +struct state_holder<T, true> +{ + constexpr state_holder() = delete; + constexpr state_holder(const state_holder&) = default; + constexpr state_holder(state_holder&&) = default; + constexpr state_holder(T& state) CMT_NOEXCEPT : s(state) {} + constexpr state_holder(std::reference_wrapper<T> state) CMT_NOEXCEPT : s(state) {} + T& s; + + const T* operator->() const { return &s; } + T* operator->() { return &s; } + const T& operator*() const { return s; } + T& operator*() { return s; } +}; + +} // namespace kfr diff --git a/include/kfr/base/tensor.hpp b/include/kfr/base/tensor.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify @@ -29,13 +29,15 @@ #include "../cometa/array.hpp" -#include "../simd/logical.hpp" -#include "../simd/min_max.hpp" #include "../simd/horizontal.hpp" #include "../simd/impl/function.hpp" +#include "../simd/logical.hpp" +#include "../simd/min_max.hpp" #include "../simd/read_write.hpp" #include "../simd/types.hpp" +#include "expression.hpp" #include "memory.hpp" +#include "shape.hpp" CMT_PRAGMA_MSVC(warning(push)) CMT_PRAGMA_MSVC(warning(disable : 4324)) @@ -822,7 +824,7 @@ public: shape_type index{ 0 }; std::string open_filler(open.size(), ' '); std::string separator_trimmed = separator.substr(0, 1 + separator.find_last_not_of(" \t")); - int columns = 0; + int columns = 0; do { std::string str = as_string(wrap_fmt(access(index), ctype<Fmt>)); @@ -999,30 +1001,6 @@ tensor<typename Traits::value_type, Traits::dims> trender(const E& expr, shape<T namespace cometa { -template <kfr::index_t dims> -struct representation<kfr::shape<dims>> -{ - using type = std::string; - static std::string get(const kfr::shape<dims>& value) - { - if constexpr (dims == 0) - { - return "()"; - } - else - { - std::string s; - for (kfr::index_t i = 0; i < dims; ++i) - { - if (CMT_LIKELY(i > 0)) - s += ", "; - s += as_string(value[i]); - } - return s; - } - } -}; - template <typename T, kfr::index_t dims> struct representation<kfr::tensor<T, dims>> { diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp @@ -97,30 +97,12 @@ struct univector_base<T, Class, true> template <typename Input, KFR_ACCEPT_EXPRESSIONS(Input)> KFR_MEM_INTRINSIC Class& operator=(Input&& input) { + constexpr index_t dims = expression_dims<Input>; + static_assert(dims <= 1, "univector accepts only expressions with dims <= 1"); assign_expr(std::forward<Input>(input)); return *derived_cast<Class>(this); } -#define KFR_UVEC_ASGN_OP(aop, op) \ - template <typename Input> \ - KFR_MEM_INTRINSIC Class& aop(Input&& input) \ - { \ - assign_expr(*derived_cast<Class>(this) op std::forward<Input>(input)); \ - return *derived_cast<Class>(this); \ - } - KFR_UVEC_ASGN_OP(operator+=, +) - KFR_UVEC_ASGN_OP(operator-=, -) - KFR_UVEC_ASGN_OP(operator*=, *) - KFR_UVEC_ASGN_OP(operator/=, /) - KFR_UVEC_ASGN_OP(operator%=, %) - - KFR_UVEC_ASGN_OP(operator&=, &) - KFR_UVEC_ASGN_OP(operator|=, |) - KFR_UVEC_ASGN_OP(operator^=, ^) - - KFR_UVEC_ASGN_OP(operator<<=, <<) - KFR_UVEC_ASGN_OP(operator>>=, >>) - /// @brief Returns subrange of the vector. /// If start is greater or equal to this->size, returns empty univector /// If requested size is greater than this->size, returns only available elements @@ -336,13 +318,14 @@ struct alignas(platform<>::maximum_vector_alignment) univector constexpr univector() CMT_NOEXCEPT_SPEC(noexcept(std::array<T, Size>())) = default; constexpr univector(size_t, const T& value) { std::fill(this->begin(), this->end(), value); } - constexpr static bool size_known = true; - constexpr static bool is_array = true; - constexpr static bool is_array_ref = false; - constexpr static bool is_vector = false; - constexpr static bool is_aligned = true; - constexpr static bool is_pod = kfr::is_pod<T>; - using value_type = T; + constexpr static bool size_known = true; + constexpr static size_t static_size = Size; + constexpr static bool is_array = true; + constexpr static bool is_array_ref = false; + constexpr static bool is_vector = false; + constexpr static bool is_aligned = true; + constexpr static bool is_pod = kfr::is_pod<T>; + using value_type = T; value_type get(size_t index, value_type fallback_value) const CMT_NOEXCEPT { @@ -384,6 +367,10 @@ struct univector<T, tag_array_ref> : array_ref<T>, constexpr univector(univector<U, Tag>& other) : array_ref<T>(other.data(), other.size()) { } + template <typename U, univector_tag Tag, KFR_ENABLE_IF(is_same<remove_const<T>, U>&& is_const<T>)> + constexpr univector(univector<U, Tag>&& other) : array_ref<T>(other.data(), other.size()) + { + } void resize(size_t) CMT_NOEXCEPT {} constexpr static bool size_known = false; constexpr static bool is_array = false; @@ -417,7 +404,12 @@ struct univector<T, tag_dynamic_vector> univector(Input&& input) { static_assert(!is_infinite<Input>, "Dynamically sized vector requires finite input expression"); - this->resize(input.size()); + constexpr index_t dims = expression_dims<Input>; + static_assert(dims <= 1, "univector accepts only expressions with dims <= 1"); + if constexpr (dims > 0) + { + this->resize(shapeof(input).front()); + } this->assign_expr(std::forward<Input>(input)); } constexpr univector() CMT_NOEXCEPT_SPEC(noexcept(std::vector<T, allocator<T>>())) = default; @@ -452,13 +444,44 @@ struct univector<T, tag_dynamic_vector> template <typename Input, KFR_ACCEPT_EXPRESSIONS(Input)> KFR_MEM_INTRINSIC univector& operator=(Input&& input) { - if (input.size() != infinite_size) - this->resize(input.size()); + constexpr index_t dims = expression_dims<Input>; + static_assert(dims <= 1, "univector accepts only expressions with dims <= 1"); + if constexpr (dims > 0) + { + if (shapeof(input).front() != infinite_size) + this->resize(shapeof(input).front()); + } this->assign_expr(std::forward<Input>(input)); return *this; } }; +template <typename T, univector_tag Tag> +struct expression_traits<univector<T, Tag>> : public expression_traits_defaults +{ + using value_type = std::remove_const_t<T>; + constexpr static size_t dims = 1; + constexpr static shape<dims> shapeof(const univector<T, Tag>& u) { return shape<1>(u.size()); } + constexpr static shape<dims> shapeof() + { + if constexpr (univector<T, Tag>::size_known) + return shape<1>{ univector<T, Tag>::static_size }; + else + return shape<1>{ undefined_size }; + } +}; + +template <typename T, univector_tag T1, univector_tag T2> +KFR_FUNCTION bool operator==(const univector<T, T1>& x, const univector<T, T2>& y) +{ + return std::equal(x.begin(), x.end(), y.begin(), y.end()); +} +template <typename T, univector_tag T1, univector_tag T2> +KFR_FUNCTION bool operator!=(const univector<T, T1>& x, const univector<T, T2>& y) +{ + return !operator==(x, y); +} + /// @brief Alias for ``univector<T, tag_array_ref>``; template <typename T> using univector_ref = univector<T, tag_array_ref>; @@ -586,35 +609,36 @@ inline namespace CMT_ARCH_NAME { template <typename T, univector_tag Tag, size_t N> -KFR_INTRINSIC vec<T, N> get_elements(const univector<T, Tag>& self, const shape<1>& index, - const axis_params<0, N>&) +KFR_INTRINSIC vec<std::remove_const_t<T>, N> get_elements(const univector<T, Tag>& self, + const shape<1>& index, const axis_params<0, N>&) { const T* data = self.data(); - return static_cast<vec<U, N>>(read<N>(ptr_cast<T>(data) + index.front())); + return read<N>(ptr_cast<T>(data) + index.front()); } -template <typename T, univector_tag Tag, size_t N> +template <typename T, univector_tag Tag, size_t N, KFR_ENABLE_IF(!std::is_const_v<T>)> KFR_INTRINSIC void set_elements(univector<T, Tag>& self, const shape<1>& index, const axis_params<0, N>&, const identity<vec<T, N>>& value) { T* data = self.data(); - write(ptr_cast<T>(data) + index.front(), vec<T, N>(value)); + write(ptr_cast<T>(data) + index.front(), value); } /// @brief Converts an expression to univector -template <typename Expr, typename T = value_type_of<Expr>> +template <typename Expr, typename T = expression_value_type<Expr>> KFR_INTRINSIC univector<T> render(Expr&& expr) { + static_assert(expression_dims<Expr> == 1); static_assert(!is_infinite<Expr>, "render: Can't process infinite expressions. Pass size as a second argument to render."); univector<T> result; - result.resize(expr.size()); + result.resize(shapeof(expr).front()); result = expr; return result; } /// @brief Converts an expression to univector -template <typename Expr, typename T = value_type_of<Expr>> +template <typename Expr, typename T = expression_value_type<Expr>> KFR_INTRINSIC univector<T> render(Expr&& expr, size_t size, size_t offset = 0) { univector<T> result; @@ -624,7 +648,7 @@ KFR_INTRINSIC univector<T> render(Expr&& expr, size_t size, size_t offset = 0) } /// @brief Converts an expression to univector -template <typename Expr, size_t Size, typename T = value_type_of<Expr>> +template <typename Expr, size_t Size, typename T = expression_value_type<Expr>> KFR_INTRINSIC univector<T, Size> render(Expr&& expr, csize_t<Size>) { univector<T, Size> result; diff --git a/include/kfr/capi.h b/include/kfr/capi.h @@ -238,7 +238,7 @@ typedef double kfr_c64; size_t size); KFR_API_SPEC void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input, size_t size); - + KFR_API_SPEC void kfr_filter_reset_f32(KFR_FILTER_F32* plan); KFR_API_SPEC void kfr_filter_reset_f64(KFR_FILTER_F64* plan); diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp @@ -149,6 +149,9 @@ using remove_const = typename std::remove_const<T>::type; template <typename T> using underlying_type = typename std::underlying_type<T>::type; +template <typename T1, typename T2> +using or_type = std::conditional_t<std::is_same_v<T1, void>, T2, T1>; + template <typename T> constexpr inline bool is_pod = std::is_pod<T>::value || details::is_pod_impl<T>::value; @@ -267,7 +270,8 @@ struct compound_type_traits<std::pair<T, T>> using deep_rebind = std::pair<typename compound_type_traits<subtype>::template deep_rebind<U>, typename compound_type_traits<subtype>::template deep_rebind<U>>; - CMT_MEM_INTRINSIC static constexpr const subtype& at(const std::pair<subtype, subtype>& value, size_t index) + CMT_MEM_INTRINSIC static constexpr const subtype& at(const std::pair<subtype, subtype>& value, + size_t index) { return index == 0 ? value.first : value.second; } @@ -684,12 +688,12 @@ constexpr CMT_INTRINSIC Ret cfilter(cvals_t<T, vals...>, cvals_t<bool, flags...> #define CMT_UN_OP(op) \ template <typename T1, T1... vals1, \ typename Ret = cvals_t<decltype(op std::declval<T1>()), (op vals1)...>> \ - constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>) \ + constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>) \ { \ return Ret{}; \ } \ template <typename T1, T1 val1, typename Ret = cval_t<decltype(op std::declval<T1>()), (op val1)>> \ - constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>) \ + constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>) \ { \ return Ret{}; \ } @@ -698,21 +702,21 @@ constexpr CMT_INTRINSIC Ret cfilter(cvals_t<T, vals...>, cvals_t<bool, flags...> template <typename T1, T1... vals1, typename T2, T2... vals2, \ typename Ret = \ cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op vals2)...>> \ - constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cvals_t<T2, vals2...>) \ + constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cvals_t<T2, vals2...>) \ { \ return Ret{}; \ } \ template <typename T1, T1... vals1, typename T2, T2 val2, \ typename Ret = \ cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op val2)...>> \ - constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cval_t<T2, val2>) \ + constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cval_t<T2, val2>) \ { \ return Ret{}; \ } \ template <typename T1, T1 val1, typename T2, T2... vals2, \ typename Ret = \ cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (val1 op vals2)...>> \ - constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>, cvals_t<T2, vals2...>) \ + constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>, cvals_t<T2, vals2...>) \ { \ return Ret{}; \ } @@ -2120,6 +2124,9 @@ CMT_INTRINSIC constexpr Tout pack_elements(Arg x, Args... args) (pack_elements<Tout, Arg>(args...) << (sizeof(Arg) * 8)); } +template <typename T, bool reference> +using value_or_ref = std::conditional_t<reference, const T&, T>; + enum class special_constant { default_constructed, diff --git a/include/kfr/cometa/cstring.hpp b/include/kfr/cometa/cstring.hpp @@ -104,9 +104,9 @@ CMT_INTRINSIC cstring<N1 - Nfrom + Nto> str_replace_impl(size_t pos, const cstri { if (pos == size_t(-1)) stop_constexpr(); - return { { (indices < pos - ? str[indices] - : (indices < pos + Nto - 1) ? to[indices - pos] : str[indices - Nto + Nfrom])..., + return { { (indices < pos ? str[indices] + : (indices < pos + Nto - 1) ? to[indices - pos] + : str[indices - Nto + Nfrom])..., 0 } }; } } // namespace details diff --git a/include/kfr/cometa/function.hpp b/include/kfr/cometa/function.hpp @@ -145,8 +145,8 @@ inline function<Ret(Args...)> cdispatch(cvals_t<T, v0, values...>, identity<T> v { if (value == v0) { - return [=](Args... args) - CMT_INLINE_LAMBDA -> Ret { return fn(cval_t<T, v0>(), std::forward<Args>(args)...); }; + return [=](Args... args) CMT_INLINE_LAMBDA -> Ret + { return fn(cval_t<T, v0>(), std::forward<Args>(args)...); }; } else { diff --git a/include/kfr/cometa/numeric.hpp b/include/kfr/cometa/numeric.hpp @@ -124,13 +124,11 @@ constexpr inline datatype operator&(datatype x, datatype y) } template <typename T> -constexpr inline datatype typeclass = is_floating_point<typename compound_type_traits<T>::subtype> - ? datatype::f - : is_integral<typename compound_type_traits<T>::subtype> - ? (is_unsigned<typename compound_type_traits<T>::subtype> - ? datatype::u - : datatype::i) - : datatype(); +constexpr inline datatype typeclass = + is_floating_point<typename compound_type_traits<T>::subtype> ? datatype::f + : is_integral<typename compound_type_traits<T>::subtype> + ? (is_unsigned<typename compound_type_traits<T>::subtype> ? datatype::u : datatype::i) + : datatype(); template <typename T> constexpr inline bool is_f_class = typeclass<T> == datatype::f; diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp @@ -80,18 +80,18 @@ namespace internal { /// @brief Utility class to abstract real/complex differences template <typename T> -struct dft_conv_plan: public dft_plan_real<T> +struct dft_conv_plan : public dft_plan_real<T> { dft_conv_plan(size_t size) : dft_plan_real<T>(size, dft_pack_format::Perm) {} - + size_t csize() const { return this->size / 2; } }; template <typename T> -struct dft_conv_plan<complex<T>>: public dft_plan<T> +struct dft_conv_plan<complex<T>> : public dft_plan<T> { dft_conv_plan(size_t size) : dft_plan<T>(size) {} - + size_t csize() const { return this->size; } }; } // namespace internal @@ -118,7 +118,7 @@ protected: using ST = subtype<T>; static constexpr auto real_fft = !std::is_same<T, complex<ST>>::value; - using plan_t = internal::dft_conv_plan<T>; + using plan_t = internal::dft_conv_plan<T>; // Length of filter data. size_t data_size; diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -163,7 +163,7 @@ struct dft_plan } } explicit dft_plan(size_t size, dft_order order = dft_order::normal) - :dft_plan(cpu_t::runtime, size, order) + : dft_plan(cpu_t::runtime, size, order) { } #else @@ -174,7 +174,6 @@ struct dft_plan } #endif - void dump() const { for (const std::unique_ptr<dft_stage<T>>& s : stages) @@ -291,11 +290,11 @@ protected: } else { - size_t offset = 0; + size_t offset = 0; while (offset < this->size) { stages[depth]->execute(cbool<inverse>, select_out(depth, out, scratch) + offset, - select_in(depth, out, in, scratch, in_scratch) + offset, temp); + select_in(depth, out, in, scratch, in_scratch) + offset, temp); offset += stages[depth]->stage_size; } depth++; diff --git a/include/kfr/dft/impl/convolution-impl.cpp b/include/kfr/dft/impl/convolution-impl.cpp @@ -23,9 +23,9 @@ disclosing the source code of your own applications. See https://www.kfrlib.com for details. */ -#include "../convolution.hpp" -#include "../../simd/complex.hpp" #include "../../base/simd_expressions.hpp" +#include "../../simd/complex.hpp" +#include "../convolution.hpp" namespace kfr { diff --git a/include/kfr/dft/impl/dft-impl.hpp b/include/kfr/dft/impl/dft-impl.hpp @@ -25,9 +25,9 @@ */ #pragma once -#include "dft-fft.hpp" -#include "../../base/simd_expressions.hpp" #include "../../base/math_expressions.hpp" +#include "../../base/simd_expressions.hpp" +#include "dft-fft.hpp" CMT_PRAGMA_GNU(GCC diagnostic push) #if CMT_HAS_WARNING("-Wshadow") @@ -96,9 +96,9 @@ struct dft_stage_fixed_impl : dft_stage<T> constexpr static size_t rradix = fixed_radix; - constexpr static size_t width = fixed_radix >= 7 - ? fft_vector_width<T> / 2 - : fixed_radix >= 4 ? fft_vector_width<T> : fft_vector_width<T> * 2; + constexpr static size_t width = fixed_radix >= 7 ? fft_vector_width<T> / 2 + : fixed_radix >= 4 ? fft_vector_width<T> + : fft_vector_width<T> * 2; virtual void do_initialize(size_t) override final { dft_stage_fixed_initialize(this, width); } DFT_STAGE_FN @@ -132,9 +132,9 @@ struct dft_stage_fixed_final_impl : dft_stage<T> this->recursion = false; this->can_inplace = false; } - constexpr static size_t width = fixed_radix >= 7 - ? fft_vector_width<T> / 2 - : fixed_radix >= 4 ? fft_vector_width<T> : fft_vector_width<T> * 2; + constexpr static size_t width = fixed_radix >= 7 ? fft_vector_width<T> / 2 + : fixed_radix >= 4 ? fft_vector_width<T> + : fft_vector_width<T> * 2; DFT_STAGE_FN template <bool inverse> @@ -434,7 +434,8 @@ protected: { cswitch( dft_radices, radices[0], - [&](auto first_radix) { + [&](auto first_radix) + { if (count == 3) { dft_permute(out, in, radices[2], radices[1], first_radix); @@ -449,7 +450,8 @@ protected: } } }, - [&]() { + [&]() + { if (count == 3) { dft_permute(out, in, radices[2], radices[1], radices[0]); @@ -473,14 +475,14 @@ void prepare_dft_stage(dft_plan<T>* self, size_t radix, size_t iterations, size_ { return cswitch( dft_radices, radix, - [self, iterations, blocks](auto radix) CMT_INLINE_LAMBDA { + [self, iterations, blocks](auto radix) CMT_INLINE_LAMBDA + { add_stage<conditional<is_final, intrinsics::dft_stage_fixed_final_impl<T, val_of(radix)>, intrinsics::dft_stage_fixed_impl<T, val_of(radix)>>>(self, radix, iterations, blocks); }, - [self, radix, iterations, blocks]() { - add_stage<intrinsics::dft_stage_generic_impl<T, is_final>>(self, radix, iterations, blocks); - }); + [self, radix, iterations, blocks]() + { add_stage<intrinsics::dft_stage_generic_impl<T, is_final>>(self, radix, iterations, blocks); }); } template <typename T> @@ -502,13 +504,15 @@ void init_dft(dft_plan<T>* self, size_t size, dft_order) int radices[32] = { 0 }; size_t radices_size = 0; - cforeach(dft_radices[csizeseq<dft_radices.size(), dft_radices.size() - 1, -1>], [&](auto radix) { - while (cur_size && cur_size % val_of(radix) == 0) - { - count[val_of(radix)]++; - cur_size /= val_of(radix); - } - }); + cforeach(dft_radices[csizeseq<dft_radices.size(), dft_radices.size() - 1, -1>], + [&](auto radix) + { + while (cur_size && cur_size % val_of(radix) == 0) + { + count[val_of(radix)]++; + cur_size /= val_of(radix); + } + }); if (cur_size >= 101) { diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp @@ -951,17 +951,21 @@ KFR_INTRINSIC void init_fft(dft_plan<T>* self, size_t size, dft_order) const size_t log2n = ilog2(size); cswitch( csizes_t<1, 2, 3, 4, 5, 6, 7, 8, 9, 10>(), log2n, - [&](auto log2n) { + [&](auto log2n) + { (void)log2n; constexpr size_t log2nv = val_of(decltype(log2n)()); add_stage<intrinsics::fft_specialization<T, log2nv>>(self, size); }, - [&]() { - cswitch(cfalse_true, is_even(log2n), [&](auto is_even) { - make_fft(self, size, is_even, ctrue); - constexpr size_t is_evenv = val_of(decltype(is_even)()); - add_stage<intrinsics::fft_reorder_stage_impl<T, is_evenv>>(self, size); - }); + [&]() + { + cswitch(cfalse_true, is_even(log2n), + [&](auto is_even) + { + make_fft(self, size, is_even, ctrue); + constexpr size_t is_evenv = val_of(decltype(is_even)()); + add_stage<intrinsics::fft_reorder_stage_impl<T, is_evenv>>(self, size); + }); }); } @@ -970,15 +974,19 @@ KFR_INTRINSIC void generate_real_twiddles(dft_plan_real<T>* self, size_t size) { using namespace intrinsics; constexpr size_t width = vector_width<T> * 2; - block_process(size / 4, csizes_t<width, 1>(), [=](size_t i, auto w) { - constexpr size_t width = val_of(decltype(w)()); - cwrite<width>(self->rtwiddle.data() + i, - cossin(dup(-constants<T>::pi * ((enumerate<T, width>() + i + size / 4) / (size / 2))))); - }); + block_process(size / 4, csizes_t<width, 1>(), + [=](size_t i, auto w) + { + constexpr size_t width = val_of(decltype(w)()); + cwrite<width>(self->rtwiddle.data() + i, + cossin(dup(-constants<T>::pi * + ((enumerate<T, width>() + i + size / 4) / (size / 2))))); + }); } template <typename T> -#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) && ((defined __APPLE__) || (__clang_major__ == 8)) +#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) && \ + ((defined __APPLE__) || (__clang_major__ == 8)) // Fix for Clang 8.0 bug (x32 with FMA instructions) // Xcode has different versions but x86 is very rare on macOS these days, // so disable inlining and FMA for x32 macOS and Clang 8.x @@ -997,20 +1005,22 @@ to_fmt(size_t real_size, const complex<T>* rtwiddle, complex<T>* out, const comp const cvec<T, 1> dc = cread<1>(out); const size_t count = csize / 2; - block_process(count - 1, csizes_t<width, 1>(), [&](size_t i, auto w) { - i++; - constexpr size_t width = val_of(decltype(w)()); - constexpr size_t widthm1 = width - 1; - const cvec<T, width> tw = cread<width>(rtwiddle + i); - const cvec<T, width> fpk = cread<width>(in + i); - const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1))); + block_process(count - 1, csizes_t<width, 1>(), + [&](size_t i, auto w) + { + i++; + constexpr size_t width = val_of(decltype(w)()); + constexpr size_t widthm1 = width - 1; + const cvec<T, width> tw = cread<width>(rtwiddle + i); + const cvec<T, width> fpk = cread<width>(in + i); + const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1))); - const cvec<T, width> f1k = fpk + fpnk; - const cvec<T, width> f2k = fpk - fpnk; - const cvec<T, width> t = cmul(f2k, tw); - cwrite<width>(out + i, T(0.5) * (f1k + t)); - cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(T(0.5) * (f1k - t)))); - }); + const cvec<T, width> f1k = fpk + fpnk; + const cvec<T, width> f2k = fpk - fpnk; + const cvec<T, width> t = cmul(f2k, tw); + cwrite<width>(out + i, T(0.5) * (f1k + t)); + cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(T(0.5) * (f1k - t)))); + }); { size_t k = csize / 2; @@ -1030,7 +1040,8 @@ to_fmt(size_t real_size, const complex<T>* rtwiddle, complex<T>* out, const comp } template <typename T> -#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) && ((defined __APPLE__) || (__clang_major__ == 8)) +#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) && \ + ((defined __APPLE__) || (__clang_major__ == 8)) // Fix for Clang 8.0 bug (x32 with FMA instructions) // Xcode has different versions but x86 is very rare on macOS these days, // so disable inlining and FMA for x32 macOS and Clang 8.x @@ -1059,20 +1070,22 @@ void from_fmt(size_t real_size, complex<T>* rtwiddle, complex<T>* out, const com constexpr size_t width = vector_width<T> * 2; const size_t count = csize / 2; - block_process(count - 1, csizes_t<width, 1>(), [&](size_t i, auto w) { - i++; - constexpr size_t width = val_of(decltype(w)()); - constexpr size_t widthm1 = width - 1; - const cvec<T, width> tw = cread<width>(rtwiddle + i); - const cvec<T, width> fpk = cread<width>(in + i); - const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1))); + block_process(count - 1, csizes_t<width, 1>(), + [&](size_t i, auto w) + { + i++; + constexpr size_t width = val_of(decltype(w)()); + constexpr size_t widthm1 = width - 1; + const cvec<T, width> tw = cread<width>(rtwiddle + i); + const cvec<T, width> fpk = cread<width>(in + i); + const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1))); - const cvec<T, width> f1k = fpk + fpnk; - const cvec<T, width> f2k = fpk - fpnk; - const cvec<T, width> t = cmul_conj(f2k, tw); - cwrite<width>(out + i, f1k + t); - cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(f1k - t))); - }); + const cvec<T, width> f1k = fpk + fpnk; + const cvec<T, width> f2k = fpk - fpnk; + const cvec<T, width> t = cmul_conj(f2k, tw); + cwrite<width>(out + i, f1k + t); + cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(f1k - t))); + }); { size_t k = csize / 2; @@ -1125,12 +1138,15 @@ public: constexpr size_t width = vector_width<T> * 2; size_t real_size = this->stage_size; complex<T>* rtwiddle = ptr_cast<complex<T>>(this->data); - block_process(real_size / 4, csizes_t<width, 1>(), [=](size_t i, auto w) { - constexpr size_t width = val_of(decltype(w)()); - cwrite<width>(rtwiddle + i, - cossin(dup(-constants<T>::pi * - ((enumerate<T, width>() + i + real_size / 4) / (real_size / 2))))); - }); + block_process(real_size / 4, csizes_t<width, 1>(), + [=](size_t i, auto w) + { + constexpr size_t width = val_of(decltype(w)()); + cwrite<width>( + rtwiddle + i, + cossin(dup(-constants<T>::pi * + ((enumerate<T, width>() + i + real_size / 4) / (real_size / 2))))); + }); } void do_execute(cdirect_t, complex<T>* out, const complex<T>* in, u8* temp) override { diff --git a/include/kfr/dft/impl/ft.hpp b/include/kfr/dft/impl/ft.hpp @@ -1092,19 +1092,19 @@ template <typename T, bool inverse = false> static constexpr KFR_INTRINSIC cvec<T, 1> tw9_1() { return { T(0.76604444311897803520239265055541), - (inverse ? -1 : 1) * T(-0.64278760968653932632264340990727) }; + (inverse ? -1 : 1) * T(-0.64278760968653932632264340990727) }; } template <typename T, bool inverse = false> static constexpr KFR_INTRINSIC cvec<T, 1> tw9_2() { return { T(0.17364817766693034885171662676931), - (inverse ? -1 : 1) * T(-0.98480775301220805936674302458952) }; + (inverse ? -1 : 1) * T(-0.98480775301220805936674302458952) }; } template <typename T, bool inverse = false> static constexpr KFR_INTRINSIC cvec<T, 1> tw9_4() { return { T(-0.93969262078590838405410927732473), - (inverse ? -1 : 1) * T(-0.34202014332566873304409961468226) }; + (inverse ? -1 : 1) * T(-0.34202014332566873304409961468226) }; } template <size_t N, bool inverse = false, typename T> @@ -1205,7 +1205,7 @@ KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv const cvec<T, N> d1 = dif1 * tw7i1<T, N, inverse>() + dif2 * tw7i2<T, N, inverse>() + dif3 * tw7i3<T, N, inverse>(); const cvec<T, N> d2 = - dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>()- dif3 * tw7i1<T, N, inverse>(); + dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>() - dif3 * tw7i1<T, N, inverse>(); const cvec<T, N> d3 = dif1 * tw7i3<T, N, inverse>() - dif2 * tw7i1<T, N, inverse>() + dif3 * tw7i2<T, N, inverse>(); @@ -1729,15 +1729,18 @@ template <typename T, bool inverse, typename Tstride = csize_t<1>> KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, complex<T>*, const complex<T>* twiddle, Tstride ostride = {}) { - cswitch(csizes_t<11, 13>(), radix, - [&](auto radix_) CMT_INLINE_LAMBDA { - constexpr size_t width = vector_width<T>; - spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride); - }, - [&]() CMT_INLINE_LAMBDA { - constexpr size_t width = vector_width<T>; - generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride); - }); + cswitch( + csizes_t<11, 13>(), radix, + [&](auto radix_) CMT_INLINE_LAMBDA + { + constexpr size_t width = vector_width<T>; + spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride); + }, + [&]() CMT_INLINE_LAMBDA + { + constexpr size_t width = vector_width<T>; + generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride); + }); } template <typename T, size_t N> @@ -1809,4 +1812,3 @@ KFR_INTRINSIC void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, cons } // namespace kfr CMT_PRAGMA_MSVC(warning(pop)) - diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -93,8 +93,6 @@ struct biquad_params inline namespace CMT_ARCH_NAME { -namespace internal -{ template <typename T, size_t filters> struct biquad_state { @@ -143,125 +141,126 @@ struct biquad_block }; template <size_t filters, typename T, typename E1> -struct expression_biquads_l : public expression_with_arguments<E1> +struct expression_biquads_l : public expression_with_traits<E1> { using value_type = T; expression_biquads_l(const biquad_block<T, filters>& bq, E1&& e1) - : expression_with_arguments<E1>(std::forward<E1>(e1)), bq(bq) + : expression_with_traits<E1>(std::forward<E1>(e1)), bq(bq) { } - template <size_t width> - friend KFR_INTRINSIC vec<T, width> get_elements(const expression_biquads_l& self, cinput_t cinput, - size_t index, vec_shape<T, width> t) - { - const vec<T, width> in = self.argument_first(cinput, index, t); - vec<T, width> out; + biquad_block<T, filters> bq; + mutable biquad_state<T, filters> state; +}; - CMT_LOOP_UNROLL - for (size_t i = 0; i < width; i++) - { - self.state.out = process(self.bq, self.state, insertleft(in[i], self.state.out)); - out[i] = self.state.out[filters - 1]; - } +template <size_t filters, typename T, typename E1> +struct expression_biquads : expression_with_traits<E1> +{ + using value_type = T; - return out; - } - static KFR_MEM_INTRINSIC vec<T, filters> process(const biquad_block<T, filters>& bq, - biquad_state<T, filters>& state, - const vec<T, filters>& in) + expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) + : expression_with_traits<E1>(std::forward<E1>(e1)), bq(bq), block_end(0) { - const vec<T, filters> out = bq.b0 * in + state.s1; - state.s1 = state.s2 + bq.b1 * in - bq.a1 * out; - state.s2 = bq.b2 * in - bq.a2 * out; - return out; } + biquad_block<T, filters> bq; + mutable biquad_state<T, filters> state; + mutable biquad_state<T, filters> saved_state; + mutable size_t block_end; }; -template <size_t filters, typename T, typename E1> -struct expression_biquads : expression_with_arguments<E1> +template <size_t filters, typename T> +KFR_INTRINSIC vec<T, filters> biquad_process(const biquad_block<T, filters>& bq, + biquad_state<T, filters>& state, const vec<T, filters>& in) { - using value_type = T; + const vec<T, filters> out = bq.b0 * in + state.s1; + state.s1 = state.s2 + bq.b1 * in - bq.a1 * out; + state.s2 = bq.b2 * in - bq.a2 * out; + return out; +} - expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) - : expression_with_arguments<E1>(std::forward<E1>(e1)), bq(bq), block_end(0) +template <size_t filters, typename T, typename E1, size_t N> +KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads_l<filters, T, E1>& self, shape<1> index, + axis_params<0, N> t) +{ + const vec<T, N> in = get_elements(self.first(), index, t); + vec<T, N> out; + + CMT_LOOP_UNROLL + for (size_t i = 0; i < N; i++) { + self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out)); + out[i] = self.state.out[filters - 1]; } - void begin_block(cinput_t cinput, size_t size) const + return out; +} + +template <size_t filters, typename T, typename E1> +KFR_INTRINSIC void begin_pass(const expression_biquads<filters, T, E1>& self, shape<1> start, shape<1> stop) +{ + size_t size = stop.front(); + self.block_end = size; + for (size_t i = 0; i < filters - 1; i++) { - block_end = size; - for (size_t i = 0; i < filters - 1; i++) + const vec<T, 1> in = i < size ? get_elements(self.first(), shape<1>{ i }, axis_params_v<0, 1>) : 0; + self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out)); + } +} +template <size_t filters, typename T, typename E1> +KFR_INTRINSIC void end_pass(const expression_biquads<filters, T, E1>& self, shape<1> start, shape<1> stop) +{ + self.state = self.saved_state; +} + +template <size_t filters, typename T, typename E1, size_t N> +KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& self, shape<1> index, + axis_params<0, N> t) +{ + index.front() += filters - 1; + vec<T, N> out{}; + if (index.front() + N <= self.block_end) + { + const vec<T, N> in = get_elements(self.first(), shape<1>{ index.front() }, t); + + CMT_LOOP_UNROLL + for (size_t i = 0; i < N; i++) { - const vec<T, 1> in = i < size ? this->argument_first(cinput, i, vec_shape<T, 1>()) : 0; - state.out = process(bq, state, insertleft(in[0], state.out)); + self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out)); + out[i] = self.state.out[filters - 1]; } + if (index.front() + N == self.block_end) + self.saved_state = self.state; } - void end_block(cinput_t, size_t) const { state = saved_state; } - - template <size_t width> - friend KFR_INTRINSIC vec<T, width> get_elements(const expression_biquads& self, cinput_t cinput, - size_t index, vec_shape<T, width> t) + else if (index.front() >= self.block_end) { - index += filters - 1; - vec<T, width> out{}; - if (index + width <= self.block_end) + CMT_LOOP_UNROLL + for (size_t i = 0; i < N; i++) { - const vec<T, width> in = self.argument_first(cinput, index, t); - - CMT_LOOP_UNROLL - for (size_t i = 0; i < width; i++) - { - self.state.out = process(self.bq, self.state, insertleft(in[i], self.state.out)); - out[i] = self.state.out[filters - 1]; - } - if (index + width == self.block_end) - self.saved_state = self.state; + self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out)); + out[i] = self.state.out[filters - 1]; } - else if (index >= self.block_end) + } + else + { + size_t i = 0; + for (; i < std::min(N, self.block_end - index.front()); i++) { - CMT_LOOP_UNROLL - for (size_t i = 0; i < width; i++) - { - self.state.out = process(self.bq, self.state, insertleft(T(0), self.state.out)); - out[i] = self.state.out[filters - 1]; - } + const vec<T, 1> in = + get_elements(self.first(), index.add_at(i, cval<index_t, 0>), axis_params_v<0, 1>); + self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out)); + out[i] = self.state.out[filters - 1]; } - else + self.saved_state = self.state; + for (; i < N; i++) { - size_t i = 0; - for (; i < std::min(width, self.block_end - index); i++) - { - const vec<T, 1> in = self.argument_first(cinput, index + i, vec_shape<T, 1>()); - self.state.out = process(self.bq, self.state, insertleft(in[0], self.state.out)); - out[i] = self.state.out[filters - 1]; - } - self.saved_state = self.state; - for (; i < width; i++) - { - self.state.out = process(self.bq, self.state, insertleft(T(0), self.state.out)); - out[i] = self.state.out[filters - 1]; - } + self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out)); + out[i] = self.state.out[filters - 1]; } - return out; - } - static KFR_MEM_INTRINSIC vec<T, filters> process(const biquad_block<T, filters>& bq, - biquad_state<T, filters>& state, vec<T, filters> in) - { - const vec<T, filters> out = bq.b0 * in + state.s1; - state.s1 = state.s2 + bq.b1 * in - bq.a1 * out; - state.s2 = bq.b2 * in - bq.a2 * out; - return out; } - biquad_block<T, filters> bq; - - mutable biquad_state<T, filters> state; - mutable biquad_state<T, filters> saved_state; - mutable size_t block_end; -}; -} // namespace internal + return out; +} /** * @brief Returns template expressions that applies biquad filter to the input. @@ -269,10 +268,10 @@ struct expression_biquads : expression_with_arguments<E1> * @param e1 Input expression */ template <typename T, typename E1> -KFR_FUNCTION internal::expression_biquads<1, T, E1> biquad(const biquad_params<T>& bq, E1&& e1) +KFR_FUNCTION expression_biquads<1, T, E1> biquad(const biquad_params<T>& bq, E1&& e1) { const biquad_params<T> bqs[1] = { bq }; - return internal::expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1)); + return expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1)); } /** @@ -282,10 +281,9 @@ KFR_FUNCTION internal::expression_biquads<1, T, E1> biquad(const biquad_params<T * @note This implementation introduces delay of N - 1 samples, where N is the filter count. */ template <size_t filters, typename T, typename E1> -KFR_FUNCTION internal::expression_biquads_l<filters, T, E1> biquad_l(const biquad_params<T> (&bq)[filters], - E1&& e1) +KFR_FUNCTION expression_biquads_l<filters, T, E1> biquad_l(const biquad_params<T> (&bq)[filters], E1&& e1) { - return internal::expression_biquads_l<filters, T, E1>(bq, std::forward<E1>(e1)); + return expression_biquads_l<filters, T, E1>(bq, std::forward<E1>(e1)); } /** @@ -295,10 +293,9 @@ KFR_FUNCTION internal::expression_biquads_l<filters, T, E1> biquad_l(const biqua * @note This implementation has zero latency */ template <size_t filters, typename T, typename E1> -KFR_FUNCTION internal::expression_biquads<filters, T, E1> biquad(const biquad_params<T> (&bq)[filters], - E1&& e1) +KFR_FUNCTION expression_biquads<filters, T, E1> biquad(const biquad_params<T> (&bq)[filters], E1&& e1) { - return internal::expression_biquads<filters, T, E1>(bq, std::forward<E1>(e1)); + return expression_biquads<filters, T, E1>(bq, std::forward<E1>(e1)); } /** @@ -308,21 +305,22 @@ KFR_FUNCTION internal::expression_biquads<filters, T, E1> biquad(const biquad_pa * @note This implementation has zero latency */ template <size_t maxfiltercount = 4, typename T, typename E1> -KFR_FUNCTION expression_pointer<T> biquad(const biquad_params<T>* bq, size_t count, E1&& e1) +KFR_FUNCTION expression_pointer<T, 1> biquad(const biquad_params<T>* bq, size_t count, E1&& e1) { constexpr csizes_t<1, 2, 4, 8, 16, 32, 64> sizes; return cswitch( cfilter(sizes, sizes <= csize_t<maxfiltercount>{}), next_poweroftwo(count), - [&](auto x) { + [&](auto x) + { constexpr size_t filters = x; - return to_pointer(internal::expression_biquads<filters, T, E1>( - internal::biquad_block<T, filters>(bq, count), std::forward<E1>(e1))); + return to_pointer(expression_biquads<filters, T, E1>(biquad_block<T, filters>(bq, count), + std::forward<E1>(e1))); }, - [&] { return to_pointer(zeros<T>()); }); + [&] { return to_pointer(fixshape(zeros<T>(), fixed_shape<infinite_size>)); }); } template <size_t maxfiltercount = 4, typename T, typename E1> -KFR_FUNCTION expression_pointer<T> biquad(const std::vector<biquad_params<T>>& bq, E1&& e1) +KFR_FUNCTION expression_pointer<T, 1> biquad(const std::vector<biquad_params<T>>& bq, E1&& e1) { return biquad<maxfiltercount>(bq.data(), bq.size(), std::forward<E1>(e1)); } @@ -341,11 +339,7 @@ public: { } - biquad_filter(const std::vector<biquad_params<T>>& bq) - : biquad_filter(bq.data(), bq.size()) - { - } - + biquad_filter(const std::vector<biquad_params<T>>& bq) : biquad_filter(bq.data(), bq.size()) {} }; } // namespace CMT_ARCH_NAME diff --git a/include/kfr/dsp/dcremove.hpp b/include/kfr/dsp/dcremove.hpp @@ -30,14 +30,12 @@ namespace kfr { -inline namespace CMT_ARCH_NAME -{ -template <typename E1, typename T = flt_type<value_type_of<E1>>> -KFR_INTRINSIC internal::expression_biquads<1, T, E1> dcremove(E1&& e1, double cutoff = 0.00025) +template <typename E1, typename T = flt_type<expression_value_type<E1>>> +KFR_INTRINSIC expression_biquads<1, T, E1> dcremove(E1&& e1, double cutoff = 0.00025) { const biquad_params<T> bqs[1] = { biquad_highpass(cutoff, 0.5) }; - return internal::expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1)); + return expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1)); } -} // namespace CMT_ARCH_NAME + } // namespace kfr diff --git a/include/kfr/dsp/delay.hpp b/include/kfr/dsp/delay.hpp @@ -27,8 +27,8 @@ #include "../base/basic_expressions.hpp" #include "../base/expression.hpp" +#include "../base/state_holder.hpp" #include "../base/univector.hpp" -#include "state_holder.hpp" namespace kfr { @@ -58,14 +58,21 @@ struct delay_state<T, 1, 1> mutable T data = T(0); }; -namespace internal -{ - template <size_t delay, typename E, bool stateless, univector_tag STag> -struct expression_delay : expression_with_arguments<E> +struct expression_delay : expression_with_arguments<E>, public expression_traits_defaults { - using value_type = value_type_of<E>; - using T = value_type; + using ArgTraits = expression_traits<E>; + static_assert(ArgTraits::dims == 1, "expression_delay requires argument with dims == 1"); + using value_type = typename ArgTraits::value_type; + constexpr static size_t dims = 1; + constexpr static shape<dims> shapeof(const expression_delay& self) + { + return ArgTraits::shapeof(self.first()); + } + constexpr static shape<dims> shapeof() { return ArgTraits::shapeof(); } + constexpr static inline bool random_access = false; + + using T = value_type; using expression_with_arguments<E>::expression_with_arguments; expression_delay(E&& e, const delay_state<T, delay, STag>& state) @@ -74,46 +81,54 @@ struct expression_delay : expression_with_arguments<E> } template <size_t N, KFR_ENABLE_IF(N <= delay)> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index, - vec_shape<T, N>) + friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, shape<1> index, + axis_params<0, N> sh) { vec<T, N> out; - size_t c = self.state.s.cursor; - self.state.s.data.ringbuf_read(c, out); - const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - self.state.s.data.ringbuf_write(self.state.s.cursor, in); + size_t c = self.state->cursor; + self.state->data.ringbuf_read(c, out); + const vec<T, N> in = get_elements(self.first(), index, sh); + self.state->data.ringbuf_write(self.state->cursor, in); return out; } - friend vec<T, 1> get_elements(const expression_delay& self, cinput_t cinput, size_t index, - vec_shape<T, 1>) + friend vec<T, 1> get_elements(const expression_delay& self, shape<1> index, axis_params<0, 1> sh) { T out; - size_t c = self.state.s.cursor; - self.state.s.data.ringbuf_read(c, out); - const T in = self.argument_first(cinput, index, vec_shape<T, 1>())[0]; - self.state.s.data.ringbuf_write(self.state.s.cursor, in); + size_t c = self.state->cursor; + self.state->data.ringbuf_read(c, out); + const T in = get_elements(self.first(), index, sh).front(); + self.state->data.ringbuf_write(self.state->cursor, in); return out; } template <size_t N, KFR_ENABLE_IF(N > delay)> - friend vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index, - vec_shape<T, N>) + friend vec<T, N> get_elements(const expression_delay& self, shape<1> index, axis_params<0, N> sh) { vec<T, delay> out; - size_t c = self.state.s.cursor; - self.state.s.data.ringbuf_read(c, out); - const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - self.state.s.data.ringbuf_write(self.state.s.cursor, slice<N - delay, delay>(in)); + size_t c = self.state->cursor; + self.state->data.ringbuf_read(c, out); + const vec<T, N> in = get_elements(self.first(), index, sh); + self.state->data.ringbuf_write(self.state->cursor, slice<N - delay, delay>(in)); return concat_and_slice<0, N>(out, in); } - state_holder<delay_state<T, delay, STag>, stateless> state; + state_holder<const delay_state<T, delay, STag>, stateless> state; }; template <typename E, bool stateless, univector_tag STag> -struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E> +struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E>, expression_traits_defaults { - using value_type = value_type_of<E>; - using T = value_type; + using ArgTraits = expression_traits<E>; + static_assert(ArgTraits::dims == 1, "expression_delay requires argument with dims == 1"); + using value_type = typename ArgTraits::value_type; + constexpr static size_t dims = 1; + constexpr static shape<dims> shapeof(const expression_delay& self) + { + return ArgTraits::shapeof(self.first()); + } + constexpr static shape<dims> shapeof() { return ArgTraits::shapeof(); } + constexpr static inline bool random_access = false; + + using T = value_type; using expression_with_arguments<E>::expression_with_arguments; expression_delay(E&& e, const delay_state<T, 1, STag>& state) @@ -122,17 +137,16 @@ struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E> } template <size_t N> - friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index, - vec_shape<T, N>) + friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, shape<1> index, + axis_params<0, N> sh) { - const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>()); - const vec<T, N> out = insertleft(self.state.s.data, in); - self.state.s.data = in[N - 1]; + const vec<T, N> in = get_elements(self.first(), index, sh); + const vec<T, N> out = insertleft(self.state->data, in); + self.state->data = in[N - 1]; return out; } - state_holder<delay_state<T, 1, STag>, stateless> state; + state_holder<const delay_state<T, 1, STag>, stateless> state; }; -} // namespace internal /** * @brief Returns template expression that applies delay to the input (uses ring buffer internally) @@ -143,12 +157,11 @@ struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E> * auto d = delay(v, csize<4>); * @endcode */ -template <size_t samples = 1, typename E1, typename T = value_type_of<E1>> -KFR_INTRINSIC internal::expression_delay<samples, E1, false, samples> delay(E1&& e1) +template <size_t samples = 1, typename E1, typename T = expression_value_type<E1>> +KFR_INTRINSIC expression_delay<samples, E1, false, samples> delay(E1&& e1) { static_assert(samples >= 1 && samples < 1024, ""); - return internal::expression_delay<samples, E1, false, samples>(std::forward<E1>(e1), - delay_state<T, samples>()); + return expression_delay<samples, E1, false, samples>(std::forward<E1>(e1), delay_state<T, samples>()); } /** @@ -162,11 +175,10 @@ KFR_INTRINSIC internal::expression_delay<samples, E1, false, samples> delay(E1&& * @endcode */ template <size_t samples, typename T, typename E1, univector_tag STag> -KFR_INTRINSIC internal::expression_delay<samples, E1, true, STag> delay(delay_state<T, samples, STag>& state, - E1&& e1) +KFR_INTRINSIC expression_delay<samples, E1, true, STag> delay(delay_state<T, samples, STag>& state, E1&& e1) { static_assert(STag == tag_dynamic_vector || (samples >= 1 && samples < 1024), ""); - return internal::expression_delay<samples, E1, true, STag>(std::forward<E1>(e1), state); + return expression_delay<samples, E1, true, STag>(std::forward<E1>(e1), state); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dsp/ebu.hpp b/include/kfr/dsp/ebu.hpp @@ -62,7 +62,7 @@ struct integrated_vec : public univector<T> private: void compute() const { - const T z_total = mean(*this); + const T z_total = mean(static_cast<const univector<T>&>(*this)); T relative_gate = energy_to_loudness(z_total) - 10; T z = 0; @@ -130,7 +130,7 @@ private: static const T PRC_LOW = T(0.10); static const T PRC_HIGH = T(0.95); - const T z_total = mean(*this); + const T z_total = mean(static_cast<const univector<T>&>(*this)); const T relative_gate = energy_to_loudness(z_total) - 20; if (this->size() < 2) @@ -184,7 +184,7 @@ private: }; template <typename T> -KFR_INTRINSIC expression_pointer<T> make_kfilter(int samplerate) +KFR_INTRINSIC expression_pointer<T, 1> make_kfilter(int samplerate) { const biquad_params<T> bq[] = { biquad_highshelf(T(1681.81 / samplerate), T(+4.0)), @@ -245,7 +245,7 @@ private: const Speaker m_speaker; const T m_input_gain; const size_t m_packet_size; - expression_pointer<T> m_kfilter; + expression_pointer<T, 1> m_kfilter; univector<T> m_short_sum_of_squares; univector<T> m_momentary_sum_of_squares; T m_output_energy_gain; @@ -311,6 +311,7 @@ public: T shortterm = 0; for (size_t ch = 0; ch < m_channels.size(); ch++) { + // println(ch, "=> ", source[ch][0], " ", source[ch][10], " ", source[ch][20] ); TESTO_ASSERT(source[ch].size() == m_packet_size); ebu_channel<T>& chan = m_channels[ch]; chan.process_packet(source[ch].data()); diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -26,13 +26,13 @@ #pragma once #include "../base/basic_expressions.hpp" -#include "../base/simd_expressions.hpp" #include "../base/filter.hpp" #include "../base/memory.hpp" #include "../base/reduce.hpp" +#include "../base/simd_expressions.hpp" +#include "../base/state_holder.hpp" #include "../base/univector.hpp" #include "../simd/vec.hpp" -#include "state_holder.hpp" namespace kfr { @@ -87,115 +87,122 @@ struct moving_sum_state<U, tag_dynamic_vector> mutable size_t head_cursor, tail_cursor; }; -namespace internal -{ - template <size_t tapcount, typename T, typename U, typename E1, bool stateless = false> -struct expression_short_fir : expression_with_arguments<E1> +struct expression_short_fir : expression_with_traits<E1> { - using value_type = U; + using value_type = U; // override value_type + + static_assert(expression_traits<E1>::dims == 1, "expression_short_fir requires input with dims == 1"); + constexpr static inline bool random_access = false; expression_short_fir(E1&& e1, const short_fir_state<tapcount, T, U>& state) - : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state) + : expression_with_traits<E1>(std::forward<E1>(e1)), state(state) { } template <size_t N> - KFR_INTRINSIC friend vec<U, N> get_elements(const expression_short_fir& self, cinput_t cinput, - size_t index, vec_shape<U, N> x) + KFR_INTRINSIC friend vec<U, N> get_elements(const expression_short_fir& self, shape<1> index, + axis_params<0, N> sh) { - vec<U, N> in = self.argument_first(cinput, index, x); + vec<U, N> in = get_elements(self.first(), index, sh); - vec<U, N> out = in * self.state.s.taps.front(); - cforeach(csizeseq<tapcount - 1, 1>, [&](auto I) { - out = out + - concat_and_slice<tapcount - 1 - I, N>(self.state.s.delayline, in) * self.state.s.taps[I]; - }); - self.state.s.delayline = concat_and_slice<N, tapcount - 1>(self.state.s.delayline, in); + vec<U, N> out = in * self.state->taps.front(); + cforeach(csizeseq<tapcount - 1, 1>, + [&](auto I) { + out = out + concat_and_slice<tapcount - 1 - I, N>(self.state->delayline, in) * + self.state->taps[I]; + }); + self.state->delayline = concat_and_slice<N, tapcount - 1>(self.state->delayline, in); return out; } - state_holder<short_fir_state<tapcount, T, U>, stateless> state; + state_holder<const short_fir_state<tapcount, T, U>, stateless> state; }; template <typename T, typename U, typename E1, bool stateless = false> -struct expression_fir : expression_with_arguments<E1> +struct expression_fir : expression_with_traits<E1> { - using value_type = U; + using value_type = U; // override value_type + + static_assert(expression_traits<E1>::dims == 1, "expression_fir requires input with dims == 1"); + constexpr static inline bool random_access = false; expression_fir(E1&& e1, const fir_state<T, U>& state) - : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state) + : expression_with_traits<E1>(std::forward<E1>(e1)), state(state) { } template <size_t N> - KFR_INTRINSIC friend vec<U, N> get_elements(const expression_fir& self, cinput_t cinput, size_t index, - vec_shape<U, N> x) + KFR_INTRINSIC friend vec<U, N> get_elements(const expression_fir& self, shape<1> index, + axis_params<0, N> sh) { - const size_t tapcount = self.state.s.taps.size(); - const vec<U, N> input = self.argument_first(cinput, index, x); + const size_t tapcount = self.state->taps.size(); + const vec<U, N> input = get_elements(self.first(), index, sh); vec<U, N> output; - size_t cursor = self.state.s.delayline_cursor; + size_t cursor = self.state->delayline_cursor; CMT_LOOP_NOUNROLL for (size_t i = 0; i < N; i++) { - self.state.s.delayline.ringbuf_write(cursor, input[i]); - output[i] = - dotproduct(self.state.s.taps, self.state.s.delayline.slice(cursor) /*, tapcount - cursor*/) + - dotproduct(self.state.s.taps.slice(tapcount - cursor), self.state.s.delayline /*, cursor*/); + self.state->delayline.ringbuf_write(cursor, input[i]); + U v = + dotproduct(self.state->taps.slice(0, tapcount - cursor), self.state->delayline.slice(cursor)); + if (cursor > 0) + v = v + dotproduct(self.state->taps.slice(tapcount - cursor), + self.state->delayline.slice(0, cursor)); + output[i] = v; } - self.state.s.delayline_cursor = cursor; + self.state->delayline_cursor = cursor; return output; } - state_holder<fir_state<T, U>, stateless> state; + state_holder<const fir_state<T, U>, stateless> state; }; template <typename U, typename E1, univector_tag STag, bool stateless = false> -struct expression_moving_sum : expression_with_arguments<E1> +struct expression_moving_sum : expression_with_traits<E1> { - using value_type = U; + using value_type = U; // override value_type + + static_assert(expression_traits<E1>::dims == 1, "expression_moving_sum requires input with dims == 1"); + constexpr static inline bool random_access = false; expression_moving_sum(E1&& e1, const moving_sum_state<U, STag>& state) - : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state) + : expression_with_traits<E1>(std::forward<E1>(e1)), state(state) { } template <size_t N> - KFR_INTRINSIC friend vec<U, N> get_elements(const expression_moving_sum& self, cinput_t cinput, - size_t index, vec_shape<U, N> x) + KFR_INTRINSIC friend vec<U, N> get_elements(const expression_moving_sum& self, shape<1> index, + axis_params<0, N> sh) { - static_assert(N >= 1, ""); - - const vec<U, N> input = self.argument_first(cinput, index, x); + const vec<U, N> input = get_elements(self.first(), index, sh); vec<U, N> output; - size_t wcursor = self.state.s.head_cursor; - size_t rcursor = self.state.s.tail_cursor; + size_t wcursor = self.state->head_cursor; + size_t rcursor = self.state->tail_cursor; // initial summation - self.state.s.delayline.ringbuf_write(wcursor, input[0]); - auto s = sum(self.state.s.delayline); + self.state->delayline.ringbuf_write(wcursor, input[0]); + auto s = sum(self.state->delayline); output[0] = s; CMT_LOOP_NOUNROLL for (size_t i = 1; i < N; i++) { U nextout; - self.state.s.delayline.ringbuf_read(rcursor, nextout); - U const nextin = input[i]; - self.state.s.delayline.ringbuf_write(wcursor, nextin); + self.state->delayline.ringbuf_read(rcursor, nextout); + const U nextin = input[i]; + self.state->delayline.ringbuf_write(wcursor, nextin); s += nextin - nextout; output[i] = s; } - self.state.s.delayline.ringbuf_step(rcursor, 1); - self.state.s.head_cursor = wcursor; - self.state.s.tail_cursor = rcursor; + self.state->delayline.ringbuf_step(rcursor, 1); + self.state->head_cursor = wcursor; + self.state->tail_cursor = rcursor; return output; } - state_holder<moving_sum_state<U, STag>, stateless> state; + state_holder<const moving_sum_state<U, STag>, stateless> state; }; -} // namespace internal /** * @brief Returns template expression that applies FIR filter to the input @@ -203,9 +210,9 @@ struct expression_moving_sum : expression_with_arguments<E1> * @param taps coefficients for the FIR filter */ template <typename T, typename E1, univector_tag Tag> -KFR_INTRINSIC internal::expression_fir<T, value_type_of<E1>, E1> fir(E1&& e1, const univector<T, Tag>& taps) +KFR_INTRINSIC expression_fir<T, expression_value_type<E1>, E1> fir(E1&& e1, const univector<T, Tag>& taps) { - return internal::expression_fir<T, value_type_of<E1>, E1>(std::forward<E1>(e1), taps.ref()); + return expression_fir<T, expression_value_type<E1>, E1>(std::forward<E1>(e1), taps.ref()); } /** @@ -214,21 +221,20 @@ KFR_INTRINSIC internal::expression_fir<T, value_type_of<E1>, E1> fir(E1&& e1, co * @param e1 an input expression */ template <typename T, typename U, typename E1> -KFR_INTRINSIC internal::expression_fir<T, U, E1, true> fir(fir_state<T, U>& state, E1&& e1) +KFR_INTRINSIC expression_fir<T, U, E1, true> fir(fir_state<T, U>& state, E1&& e1) { - return internal::expression_fir<T, U, E1, true>(std::forward<E1>(e1), state); + return expression_fir<T, U, E1, true>(std::forward<E1>(e1), state); } /** * @brief Returns template expression that performs moving sum on the input - * @param state moving sum state * @param e1 an input expression */ template <size_t sum_length, typename E1> -KFR_INTRINSIC internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic_vector> moving_sum(E1&& e1) +KFR_INTRINSIC expression_moving_sum<expression_value_type<E1>, E1, tag_dynamic_vector> moving_sum(E1&& e1) { - return internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic_vector>(std::forward<E1>(e1), - sum_length); + return expression_moving_sum<expression_value_type<E1>, E1, tag_dynamic_vector>(std::forward<E1>(e1), + sum_length); } /** @@ -237,10 +243,9 @@ KFR_INTRINSIC internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic * @param e1 an input expression */ template <typename U, typename E1, univector_tag STag> -KFR_INTRINSIC internal::expression_moving_sum<U, E1, STag, true> moving_sum(moving_sum_state<U, STag>& state, - E1&& e1) +KFR_INTRINSIC expression_moving_sum<U, E1, STag, true> moving_sum(moving_sum_state<U, STag>& state, E1&& e1) { - return internal::expression_moving_sum<U, E1, STag, true>(std::forward<E1>(e1), state); + return expression_moving_sum<U, E1, STag, true>(std::forward<E1>(e1), state); } /** @@ -250,11 +255,11 @@ KFR_INTRINSIC internal::expression_moving_sum<U, E1, STag, true> moving_sum(movi * @param taps coefficients for the FIR filter */ template <typename T, size_t TapCount, typename E1> -KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1> +KFR_INTRINSIC expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1> short_fir(E1&& e1, const univector<T, TapCount>& taps) { static_assert(TapCount >= 2 && TapCount <= 33, "Use short_fir only for small FIR filters"); - return internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1>( + return expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1>( std::forward<E1>(e1), taps); } @@ -265,12 +270,11 @@ short_fir(E1&& e1, const univector<T, TapCount>& taps) * @param e1 an input expression */ template <size_t TapCount, typename T, typename U, typename E1> -KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1, - true> - short_fir(short_fir_state<next_poweroftwo(TapCount - 1) + 1, T, U>& state, E1&& e1) +KFR_INTRINSIC expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1, true> +short_fir(short_fir_state<next_poweroftwo(TapCount - 1) + 1, T, U>& state, E1&& e1) { static_assert(TapCount >= 2 && TapCount <= 33, "Use short_fir only for small FIR filters"); - return internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1, true>( + return expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1, true>( std::forward<E1>(e1), state); } @@ -294,7 +298,7 @@ protected: { make_univector(dest, size) = fir(state, make_univector(src, size)); } - void process_expression(U* dest, const expression_pointer<U>& src, size_t size) final + void process_expression(U* dest, const expression_pointer<U, 1>& src, size_t size) final { make_univector(dest, size) = fir(state, src); } diff --git a/include/kfr/dsp/fir_design.hpp b/include/kfr/dsp/fir_design.hpp @@ -39,8 +39,7 @@ template <typename T> void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, bool normalize = true) { const T scale = 2.0 * cutoff; - taps = bind_expression(fn::sinc(), - symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) * + taps = bind_expression(fn::sinc(), symmlinspace<T>((taps.size() - 1) * cutoff * c_pi<T>, taps.size())) * scale * window; if (is_odd(taps.size())) @@ -57,7 +56,7 @@ void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& { const T scale = 2.0 * -cutoff; taps = bind_expression(fn::sinc(), - symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) * + symmlinspace<T>((taps.size() - 1) * cutoff * c_pi<T>, taps.size())) * scale * window; if (is_odd(taps.size())) @@ -80,8 +79,8 @@ void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2, const expre const T start1 = sc * frequency1; const T start2 = sc * frequency2; - taps = (bind_expression(fn::sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 - - bind_expression(fn::sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) * + taps = (bind_expression(fn::sinc(), symmlinspace<T>(start2, taps.size())) * scale2 - + bind_expression(fn::sinc(), symmlinspace<T>(start1, taps.size())) * scale1) * window; if (is_odd(taps.size())) @@ -104,8 +103,8 @@ void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2, const expre const T start1 = sc * frequency1; const T start2 = sc * frequency2; - taps = (bind_expression(fn::sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 - - bind_expression(fn::sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) * + taps = (bind_expression(fn::sinc(), symmlinspace<T>(start1, taps.size())) * scale1 - + bind_expression(fn::sinc(), symmlinspace<T>(start2, taps.size())) * scale2) * window; if (is_odd(taps.size())) diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp @@ -34,12 +34,12 @@ inline namespace CMT_ARCH_NAME { template <typename T, typename E1> -KFR_INTRINSIC internal::expression_short_fir<2, T, value_type_of<E1>, E1> fracdelay(E1&& e1, T delay) +KFR_INTRINSIC expression_short_fir<2, T, expression_value_type<E1>, E1> fracdelay(E1&& e1, T delay) { if (CMT_UNLIKELY(delay < 0)) delay = 0; univector<T, 2> taps({ 1 - delay, delay }); - return internal::expression_short_fir<2, T, value_type_of<E1>, E1>(std::forward<E1>(e1), taps); + return expression_short_fir<2, T, expression_value_type<E1>, E1>(std::forward<E1>(e1), taps); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp @@ -35,12 +35,11 @@ namespace kfr inline namespace CMT_ARCH_NAME { -namespace internal -{ - template <typename T> -struct expression_goertzel : output_expression +struct expression_goertzel : expression_traits_defaults { + using value_type = accepts_any; + expression_goertzel(complex<T>& result, T omega) : result(result), omega(omega), coeff(2 * cos(omega)), q0(), q1(), q2() { @@ -51,7 +50,7 @@ struct expression_goertzel : output_expression result.imag(q2 * sin(omega)); } template <typename U, size_t N> - KFR_INTRINSIC friend void set_elements(expression_goertzel& self, coutput_t, size_t, const vec<U, N>& x) + KFR_INTRINSIC friend void set_elements(expression_goertzel& self, shape<1>, const vec<U, N>& x) { vec<T, N> in = x; CMT_LOOP_UNROLL @@ -71,8 +70,10 @@ struct expression_goertzel : output_expression }; template <typename T, size_t width> -struct expression_parallel_goertzel : output_expression +struct expression_parallel_goertzel : expression_traits_defaults { + using value_type = accepts_any; + expression_parallel_goertzel(complex<T> result[], vec<T, width> omega) : result(result), omega(omega), coeff(cos(omega)), q0(), q1(), q2() { @@ -88,8 +89,7 @@ struct expression_parallel_goertzel : output_expression } } template <typename U, size_t N> - KFR_INTRINSIC friend void set_elements(expression_parallel_goertzel& self, coutput_t, size_t, - const vec<U, N>& x) + KFR_INTRINSIC friend void set_elements(expression_parallel_goertzel& self, shape<1>, const vec<U, N>& x) { const vec<T, N> in = x; CMT_LOOP_UNROLL @@ -107,19 +107,18 @@ struct expression_parallel_goertzel : output_expression vec<T, width> q1; vec<T, width> q2; }; -} // namespace internal template <typename T> -KFR_INTRINSIC internal::expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega) +KFR_INTRINSIC expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega) { - return internal::expression_goertzel<T>(result, omega); + return expression_goertzel<T>(result, omega); } template <typename T, size_t width> -KFR_INTRINSIC internal::expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width], - const T (&omega)[width]) +KFR_INTRINSIC expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width], + const T (&omega)[width]) { - return internal::expression_parallel_goertzel<T, width>(result, read<width>(omega)); + return expression_parallel_goertzel<T, width>(result, read<width>(omega)); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dsp/iir_design.hpp b/include/kfr/dsp/iir_design.hpp @@ -28,9 +28,9 @@ #include "../base/filter.hpp" #include "../base/pointer.hpp" #include "../math/hyperbolic.hpp" +#include "../simd/complex.hpp" #include "../simd/impl/function.hpp" #include "../simd/operators.hpp" -#include "../simd/complex.hpp" #include "../simd/vec.hpp" #include "../testo/assert.hpp" #include "biquad_design.hpp" @@ -60,7 +60,7 @@ KFR_FUNCTION zpk<T> chebyshev1(int N, identity<T> rp) T eps = sqrt(exp10(0.1 * rp) - 1.0); T mu = 1.0 / N * std::asinh(1 / eps); - univector<T> m = linspace(-N + 1, N + 1, N, false, true); + univector<T> m = linspace(-N + 1, N + 1, N, false, ctrue); univector<T> theta = c_pi<T> * m / (2 * N); univector<complex<T>> p = -csinh(make_complex(mu, theta)); @@ -85,17 +85,17 @@ KFR_FUNCTION zpk<T> chebyshev2(int N, identity<T> rs) if (N % 2) { - m = concatenate(linspace(-N + 1, -2, N / 2, true, true), linspace(2, N - 1, N / 2, true, true)); + m = concatenate(linspace(-N + 1, -2, N / 2, true, ctrue), linspace(2, N - 1, N / 2, true, ctrue)); } else { - m = linspace(-N + 1, N + 1, N, false, true); + m = linspace(-N + 1, N + 1, N, false, ctrue); } univector<complex<T>> z = -cconj(complex<T>(0, 1) / sin(m * c_pi<T> / (2.0 * N))); univector<complex<T>> p = - -cexp(complex<T>(0, 1) * c_pi<T> * linspace(-N + 1, N + 1, N, false, true) / (2 * N)); + -cexp(complex<T>(0, 1) * c_pi<T> * linspace(-N + 1, N + 1, N, false, ctrue) / (2 * N)); p = make_complex(sinh(mu) * real(p), cosh(mu) * imag(p)); p = 1.0 / p; @@ -898,7 +898,8 @@ template <typename T> KFR_FUNCTION univector<complex<T>> cplxreal(const univector<complex<T>>& list) { univector<complex<T>> x = list; - std::sort(x.begin(), x.end(), [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); }); + std::sort(x.begin(), x.end(), + [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); }); T tol = std::numeric_limits<T>::epsilon() * 100; univector<complex<T>> result = x; for (size_t i = result.size(); i > 1; i--) @@ -1214,4 +1215,4 @@ KFR_FUNCTION std::vector<biquad_params<T>> to_sos(const zpk<T>& filter) } } // namespace CMT_ARCH_NAME -} // namespace kfr -\ No newline at end of file +} // namespace kfr diff --git a/include/kfr/dsp/mixdown.hpp b/include/kfr/dsp/mixdown.hpp @@ -36,13 +36,11 @@ inline namespace CMT_ARCH_NAME * @brief Returns template expression that returns the sum of all the inputs */ template <typename... E> -internal::expression_function<fn::add, E...> mixdown(E&&... e) +expression_function<fn::add, E...> mixdown(E&&... e) { - return internal::expression_function<fn::add, E...>(fn::add(), std::forward<E>(e)...); + return expression_function<fn::add, E...>(fn::add(), std::forward<E>(e)...); } -namespace internal -{ struct stereo_matrix { template <typename T, size_t N> @@ -57,7 +55,6 @@ struct stereo_matrix } const f64x2x2 matrix; }; -} // namespace internal template <int = 0> CMT_GNU_CONSTEXPR f64x2x2 matrix_sum_diff() @@ -75,12 +72,10 @@ CMT_GNU_CONSTEXPR f64x2x2 matrix_halfsum_halfdiff() * channels */ template <typename Left, typename Right, - typename Result = - internal::expression_function<internal::stereo_matrix, internal::expression_pack<Left, Right>>> + typename Result = expression_function<stereo_matrix, expression_pack<Left, Right>>> Result mixdown_stereo(Left&& left, Right&& right, const f64x2x2& matrix) { - return Result(internal::stereo_matrix{ matrix }, - pack(std::forward<Left>(left), std::forward<Right>(right))); + return Result(stereo_matrix{ matrix }, pack(std::forward<Left>(left), std::forward<Right>(right))); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp @@ -26,7 +26,9 @@ #pragma once #include "../base/basic_expressions.hpp" +#include "../base/simd_expressions.hpp" #include "../math/sin_cos.hpp" +#include "../simd/round.hpp" namespace kfr { @@ -143,9 +145,9 @@ KFR_FUNCTION T1 rawsine(const T1& x) return intrinsics::rawsine(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::rawsine, E1> rawsine(E1&& x) +KFR_FUNCTION expression_function<fn::rawsine, E1> rawsine(E1&& x) { - return { fn::rawsine(), std::forward<E1>(x) }; + return { std::forward<E1>(x) }; } template <typename T1, KFR_ENABLE_IF(is_numeric<T1>)> KFR_FUNCTION T1 sine(const T1& x) @@ -153,9 +155,9 @@ KFR_FUNCTION T1 sine(const T1& x) return intrinsics::sine(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::sine, E1> sine(E1&& x) +KFR_FUNCTION expression_function<fn::sine, E1> sine(E1&& x) { - return { fn::sine(), std::forward<E1>(x) }; + return { std::forward<E1>(x) }; } template <typename T1, KFR_ENABLE_IF(is_numeric<T1>)> KFR_FUNCTION T1 sinenorm(const T1& x) @@ -163,7 +165,7 @@ KFR_FUNCTION T1 sinenorm(const T1& x) return intrinsics::sinenorm(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::sinenorm, E1> sinenorm(E1&& x) +KFR_FUNCTION expression_function<fn::sinenorm, E1> sinenorm(E1&& x) { return { fn::sinenorm(), std::forward<E1>(x) }; } @@ -173,7 +175,7 @@ KFR_FUNCTION T1 rawsquare(const T1& x) return intrinsics::rawsquare(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::rawsquare, E1> rawsquare(E1&& x) +KFR_FUNCTION expression_function<fn::rawsquare, E1> rawsquare(E1&& x) { return { fn::rawsquare(), std::forward<E1>(x) }; } @@ -183,7 +185,7 @@ KFR_FUNCTION T1 square(const T1& x) return intrinsics::square(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::square, E1> square(E1&& x) +KFR_FUNCTION expression_function<fn::square, E1> square(E1&& x) { return { fn::square(), std::forward<E1>(x) }; } @@ -193,7 +195,7 @@ KFR_FUNCTION T1 squarenorm(const T1& x) return intrinsics::squarenorm(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::squarenorm, E1> squarenorm(E1&& x) +KFR_FUNCTION expression_function<fn::squarenorm, E1> squarenorm(E1&& x) { return { fn::squarenorm(), std::forward<E1>(x) }; } @@ -203,7 +205,7 @@ KFR_FUNCTION T1 rawtriangle(const T1& x) return intrinsics::rawtriangle(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::rawtriangle, E1> rawtriangle(E1&& x) +KFR_FUNCTION expression_function<fn::rawtriangle, E1> rawtriangle(E1&& x) { return { fn::rawtriangle(), std::forward<E1>(x) }; } @@ -213,7 +215,7 @@ KFR_FUNCTION T1 triangle(const T1& x) return intrinsics::triangle(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::triangle, E1> triangle(E1&& x) +KFR_FUNCTION expression_function<fn::triangle, E1> triangle(E1&& x) { return { fn::triangle(), std::forward<E1>(x) }; } @@ -223,7 +225,7 @@ KFR_FUNCTION T1 trianglenorm(const T1& x) return intrinsics::trianglenorm(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::trianglenorm, E1> trianglenorm(E1&& x) +KFR_FUNCTION expression_function<fn::trianglenorm, E1> trianglenorm(E1&& x) { return { fn::trianglenorm(), std::forward<E1>(x) }; } @@ -233,7 +235,7 @@ KFR_FUNCTION T1 rawsawtooth(const T1& x) return intrinsics::rawsawtooth(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::rawsawtooth, E1> rawsawtooth(E1&& x) +KFR_FUNCTION expression_function<fn::rawsawtooth, E1> rawsawtooth(E1&& x) { return { fn::rawsawtooth(), std::forward<E1>(x) }; } @@ -243,7 +245,7 @@ KFR_FUNCTION T1 sawtooth(const T1& x) return intrinsics::sawtooth(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::sawtooth, E1> sawtooth(E1&& x) +KFR_FUNCTION expression_function<fn::sawtooth, E1> sawtooth(E1&& x) { return { fn::sawtooth(), std::forward<E1>(x) }; } @@ -253,7 +255,7 @@ KFR_FUNCTION T1 sawtoothnorm(const T1& x) return intrinsics::sawtoothnorm(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::sawtoothnorm, E1> sawtoothnorm(E1&& x) +KFR_FUNCTION expression_function<fn::sawtoothnorm, E1> sawtoothnorm(E1&& x) { return { fn::sawtoothnorm(), std::forward<E1>(x) }; } @@ -263,7 +265,7 @@ KFR_FUNCTION T1 isawtooth(const T1& x) return intrinsics::isawtooth(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::isawtooth, E1> isawtooth(E1&& x) +KFR_FUNCTION expression_function<fn::isawtooth, E1> isawtooth(E1&& x) { return { fn::isawtooth(), std::forward<E1>(x) }; } @@ -273,7 +275,7 @@ KFR_FUNCTION T1 isawtoothnorm(const T1& x) return intrinsics::isawtoothnorm(x); } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::isawtoothnorm, E1> isawtoothnorm(E1&& x) +KFR_FUNCTION expression_function<fn::isawtoothnorm, E1> isawtoothnorm(E1&& x) { return { fn::isawtoothnorm(), std::forward<E1>(x) }; } diff --git a/include/kfr/dsp/sample_rate_conversion.hpp b/include/kfr/dsp/sample_rate_conversion.hpp @@ -27,6 +27,9 @@ #include "../base/memory.hpp" #include "../base/reduce.hpp" +#include "../base/univector.hpp" +#include "../math/modzerobessel.hpp" +#include "../math/sqrt.hpp" #include "../simd/impl/function.hpp" #include "../simd/vec.hpp" #include "window.hpp" @@ -203,14 +206,16 @@ public: } else if (input_start >= input_position) { - output[i] = dotproduct(input.slice(input_start - input_position, depth), tap_ptr); + output[i] = + dotproduct(input.slice(input_start - input_position, depth), tap_ptr.truncate(depth)); } else { const itype prev_count = input_position - input_start; - output[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr) + - dotproduct(input.slice(0, size_t(depth - prev_count)), - tap_ptr.slice(size_t(prev_count), size_t(depth - prev_count))); + output[i] = + dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr.truncate(prev_count)) + + dotproduct(input.truncate(size_t(depth - prev_count)), + tap_ptr.slice(size_t(prev_count), size_t(depth - prev_count))); } } @@ -254,28 +259,28 @@ template <size_t factor, size_t offset, typename E> struct expression_downsample; template <typename E> -struct expression_upsample<2, E> : expression_with_arguments<E> +struct expression_upsample<2, E> : expression_with_arguments<E>, expression_traits_defaults { using expression_with_arguments<E>::expression_with_arguments; - using value_type = value_type_of<E>; + using value_type = expression_value_type<E>; using T = value_type; KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() * 2; } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, cinput_t cinput, - size_t index, vec_shape<T, N>) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, shape<1> index, + axis_params<0, N>) { - const vec<T, N / 2> x = self.argument_first(cinput, index / 2, vec_shape<T, N / 2>()); + const vec<T, N / 2> x = get_elements(self.first() index / 2, axis_params<0, N / 2>()); return interleave(x, zerovector(x)); } - KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, cinput_t cinput, - size_t index, vec_shape<T, 1>) + KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, shape<1> index, + axis_params<0, 1>) { if (index & 1) return 0; else - return self.argument_first(cinput, index / 2, vec_shape<T, 1>()); + return get_elements(self.first(), index / 2, axis_params<0, 1>()); } }; @@ -283,39 +288,41 @@ template <typename E> struct expression_upsample<4, E> : expression_with_arguments<E> { using expression_with_arguments<E>::expression_with_arguments; - using value_type = value_type_of<E>; + using value_type = expression_value_type<E>; using T = value_type; KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() * 4; } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, cinput_t cinput, - size_t index, vec_shape<T, N>) CMT_NOEXCEPT + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, shape<1> index, + axis_params<0, N>) CMT_NOEXCEPT { - const vec<T, N / 4> x = self.argument_first(cinput, index / 4, vec_shape<T, N / 4>()); + const vec<T, N / 4> x = self.argument_first(cinput, index / 4, axis_params<0, N / 4>()); const vec<T, N / 2> xx = interleave(x, zerovector(x)); return interleave(xx, zerovector(xx)); } - KFR_INTRINSIC friend vec<T, 2> get_elements(const expression_upsample& self, cinput_t cinput, - size_t index, vec_shape<T, 2>) CMT_NOEXCEPT + KFR_INTRINSIC friend vec<T, 2> get_elements(const expression_upsample& self, shape<1> index, + axis_params<0, 2>) CMT_NOEXCEPT { switch (index & 3) { case 0: - return interleave(self.argument_first(cinput, index / 4, vec_shape<T, 1>()), zerovector<T, 1>()); + return interleave(self.argument_first(cinput, index / 4, axis_params<0, 1>()), + zerovector<T, 1>()); case 3: - return interleave(zerovector<T, 1>(), self.argument_first(cinput, index / 4, vec_shape<T, 1>())); + return interleave(zerovector<T, 1>(), + self.argument_first(cinput, index / 4, axis_params<0, 1>())); default: return 0; } } - KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, cinput_t cinput, - size_t index, vec_shape<T, 1>) CMT_NOEXCEPT + KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, shape<1> index, + axis_params<0, 1>) CMT_NOEXCEPT { if (index & 3) return 0; else - return self.argument_first(cinput, index / 4, vec_shape<T, 1>()); + return self.argument_first(cinput, index / 4, axis_params<0, 1>()); } }; @@ -323,16 +330,16 @@ template <typename E, size_t offset> struct expression_downsample<2, offset, E> : expression_with_arguments<E> { using expression_with_arguments<E>::expression_with_arguments; - using value_type = value_type_of<E>; + using value_type = expression_value_type<E>; using T = value_type; KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() / 2; } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, cinput_t cinput, - size_t index, vec_shape<T, N>) CMT_NOEXCEPT + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, size_t index, + axis_params<0, N>) CMT_NOEXCEPT { - const vec<T, N* 2> x = self.argument_first(cinput, index * 2, vec_shape<T, N * 2>()); + const vec<T, N* 2> x = self.argument_first(cinput, index * 2, axis_params<0, N * 2>()); return x.shuffle(csizeseq<N, offset, 2>); } }; @@ -341,16 +348,16 @@ template <typename E, size_t offset> struct expression_downsample<4, offset, E> : expression_with_arguments<E> { using expression_with_arguments<E>::expression_with_arguments; - using value_type = value_type_of<E>; + using value_type = expression_value_type<E>; using T = value_type; KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() / 4; } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, cinput_t cinput, - size_t index, vec_shape<T, N>) CMT_NOEXCEPT + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, shape<1> index, + axis_params<0, N>) CMT_NOEXCEPT { - const vec<T, N* 4> x = self.argument_first(cinput, index * 4, vec_shape<T, N * 4>()); + const vec<T, N* 4> x = self.argument_first(cinput, index * 4, axis_params<0, N * 4>()); return x.shuffle(csizeseq<N, offset, 4>); } }; diff --git a/include/kfr/dsp/special.hpp b/include/kfr/dsp/special.hpp @@ -40,12 +40,15 @@ inline namespace CMT_ARCH_NAME template <typename T = int> auto unitimpulse() { - return lambda<T>([](cinput_t, size_t index, auto x) { - if (CMT_UNLIKELY(index == 0)) - return onoff(x); - else - return zerovector(x); - }); + return lambda<T>( + [](shape<1> index, auto x) + { + vec_shape<T, decltype(x)::value> sh{}; + if (CMT_UNLIKELY(index[0] == 0)) + return onoff(sh); + else + return zerovector(sh); + }); } template <typename T = fbase> diff --git a/include/kfr/dsp/state_holder.hpp b/include/kfr/dsp/state_holder.hpp @@ -1,41 +0,0 @@ -/** @addtogroup fir - * @{ - */ -/** - * KFR (http://kfrlib.com) - * Copyright (C) 2016-2022 Fractalium Ltd - * See LICENSE.txt for details - */ -#pragma once - -#include "../cident.h" - -namespace kfr -{ -inline namespace CMT_ARCH_NAME -{ -namespace internal -{ - -template <typename T, bool stateless> -struct state_holder -{ - state_holder() = delete; - state_holder(const state_holder&) = default; - state_holder(state_holder&&) = default; - constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} - T s; -}; - -template <typename T> -struct state_holder<T, true> -{ - state_holder() = delete; - state_holder(const state_holder&) = default; - state_holder(state_holder&&) = default; - constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {} - const T& s; -}; -} // namespace internal -} // namespace CMT_ARCH_NAME -} // namespace kfr diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp @@ -26,8 +26,8 @@ #pragma once #include "../base/basic_expressions.hpp" -#include "../math/abs.hpp" #include "../math/log_exp.hpp" +#include "../simd/abs.hpp" #include "../simd/vec.hpp" namespace kfr @@ -137,8 +137,8 @@ KFR_FUNCTION flt_type<T1> note_to_hertz(const T1& x) return intrinsics::note_to_hertz(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::note_to_hertz, E1> note_to_hertz(E1&& x) +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::note_to_hertz, E1> note_to_hertz(E1&& x) { return { fn::note_to_hertz(), std::forward<E1>(x) }; } @@ -149,8 +149,8 @@ KFR_FUNCTION flt_type<T1> hertz_to_note(const T1& x) return intrinsics::hertz_to_note(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::hertz_to_note, E1> hertz_to_note(E1&& x) +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::hertz_to_note, E1> hertz_to_note(E1&& x) { return { fn::hertz_to_note(), std::forward<E1>(x) }; } @@ -161,8 +161,8 @@ KFR_FUNCTION flt_type<T1> amp_to_dB(const T1& x) return intrinsics::amp_to_dB(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::amp_to_dB, E1> amp_to_dB(E1&& x) +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::amp_to_dB, E1> amp_to_dB(E1&& x) { return { fn::amp_to_dB(), std::forward<E1>(x) }; } @@ -173,8 +173,8 @@ KFR_FUNCTION flt_type<T1> dB_to_amp(const T1& x) return intrinsics::dB_to_amp(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::dB_to_amp, E1> dB_to_amp(E1&& x) +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::dB_to_amp, E1> dB_to_amp(E1&& x) { return { fn::dB_to_amp(), std::forward<E1>(x) }; } @@ -185,8 +185,8 @@ KFR_FUNCTION flt_type<T1> power_to_dB(const T1& x) return intrinsics::power_to_dB(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::power_to_dB, E1> power_to_dB(E1&& x) +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::power_to_dB, E1> power_to_dB(E1&& x) { return { fn::power_to_dB(), std::forward<E1>(x) }; } @@ -197,8 +197,8 @@ KFR_FUNCTION flt_type<T1> dB_to_power(const T1& x) return intrinsics::dB_to_power(x); } -template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::dB_to_power, E1> dB_to_power(E1&& x) +template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)> +KFR_FUNCTION expression_function<fn::dB_to_power, E1> dB_to_power(E1&& x) { return { fn::dB_to_power(), std::forward<E1>(x) }; } diff --git a/include/kfr/dsp/waveshaper.hpp b/include/kfr/dsp/waveshaper.hpp @@ -25,8 +25,8 @@ */ #pragma once -#include "../math/clamp.hpp" #include "../math/hyperbolic.hpp" +#include "../simd/clamp.hpp" #include "../simd/operators.hpp" namespace kfr @@ -63,13 +63,13 @@ KFR_FUNCTION flt_type<T1> saturate_II(const T1& x) KFR_FN(saturate_II) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::saturate_II, E1> saturate_I(E1&& x) +KFR_FUNCTION expression_function<fn::saturate_II, E1> saturate_I(E1&& x) { return { fn::saturate_I(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_FUNCTION internal::expression_function<fn::saturate_II, E1> saturate_II(E1&& x) +KFR_FUNCTION expression_function<fn::saturate_II, E1> saturate_II(E1&& x) { return { fn::saturate_II(), std::forward<E1>(x) }; } diff --git a/include/kfr/dsp/weighting.hpp b/include/kfr/dsp/weighting.hpp @@ -103,7 +103,7 @@ KFR_INTRINSIC T1 aweighting(const T1& x) } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_INTRINSIC internal::expression_function<fn::aweighting, E1> aweighting(E1&& x) +KFR_INTRINSIC expression_function<fn::aweighting, E1> aweighting(E1&& x) { return { fn::aweighting(), std::forward<E1>(x) }; } @@ -115,7 +115,7 @@ KFR_INTRINSIC T1 bweighting(const T1& x) } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_INTRINSIC internal::expression_function<fn::bweighting, E1> bweighting(E1&& x) +KFR_INTRINSIC expression_function<fn::bweighting, E1> bweighting(E1&& x) { return { fn::bweighting(), std::forward<E1>(x) }; } @@ -127,7 +127,7 @@ KFR_INTRINSIC T1 cweighting(const T1& x) } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)> -KFR_INTRINSIC internal::expression_function<fn::cweighting, E1> cweighting(E1&& x) +KFR_INTRINSIC expression_function<fn::cweighting, E1> cweighting(E1&& x) { return { fn::cweighting(), std::forward<E1>(x) }; } diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp @@ -51,6 +51,7 @@ enum class window_type flattop = 12, gaussian = 13, lanczos = 14, + cosine_np = 15, }; template <window_type type> @@ -68,324 +69,280 @@ enum class window_symmetry inline namespace CMT_ARCH_NAME { -namespace internal +enum class window_metrics { + metrics_0_1, + metrics_m1_1, + metrics_mpi_pi, + metrics_m1_1_trunc, + metrics_m1_1_trunc2, +}; template <typename T> -struct window_linspace_0_1 : expression_linspace<T> +struct window_linspace : expression_linspace<T> { - window_linspace_0_1(size_t size, window_symmetry symmetry) - : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric) + window_linspace(cval_t<window_metrics, window_metrics::metrics_0_1>, size_t size, + window_symmetry symmetry) + : expression_linspace<T>{ 0, 1, size, symmetry == window_symmetry::symmetric } { } -}; - -template <typename T> -struct window_linspace_m1_1 : expression_linspace<T> -{ - window_linspace_m1_1(size_t size, window_symmetry symmetry) - : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric) + window_linspace(cval_t<window_metrics, window_metrics::metrics_m1_1>, size_t size, + window_symmetry symmetry) + : expression_linspace<T>{ -1, 1, size, symmetry == window_symmetry::symmetric } { } -}; - -template <typename T> -struct window_linspace_mpi_pi : expression_linspace<T> -{ - window_linspace_mpi_pi(size_t size, window_symmetry symmetry) - : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric) + window_linspace(cval_t<window_metrics, window_metrics::metrics_mpi_pi>, size_t size, + window_symmetry symmetry) + : expression_linspace<T>{ -c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric } { } -}; - -template <typename T> -struct window_linspace_m1_1_trunc : expression_linspace<T> -{ - window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry) - : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size, - symmetry == window_symmetry::symmetric) + window_linspace(cval_t<window_metrics, window_metrics::metrics_m1_1_trunc>, size_t size, + window_symmetry symmetry) + : expression_linspace<T>{ symmetric_linspace, calc_p(size, symmetry == window_symmetry::symmetric), + size, symmetry == window_symmetry::symmetric } + { + } + window_linspace(cval_t<window_metrics, window_metrics::metrics_m1_1_trunc2>, size_t size, + window_symmetry symmetry) + : expression_linspace<T>{ symmetric_linspace, calc_p2(size, symmetry == window_symmetry::symmetric), + size, symmetry == window_symmetry::symmetric } { } + static T calc_p(size_t size, bool sym) + { + if (!sym) + ++size; + return T(size - 1) / (size); + } + static T calc_p2(size_t size, bool sym) + { + if (!sym) + ++size; + return (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size); + } }; template <typename T> -struct window_linspace_m1_1_trunc2 : expression_linspace<T> +struct expression_window : expression_traits_defaults { - window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry) - : expression_linspace<T>(symmetric_linspace, - (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size, - symmetry == window_symmetry::symmetric) + using value_type = T; + constexpr static size_t dims = 1; + constexpr static shape<dims> shapeof(const expression_window<T>& self) { + return shape<dims>(self.m_size); } + constexpr static shape<dims> shapeof() { return shape<1>(undefined_size); } + + constexpr expression_window(size_t size) : m_size(size) {} + + size_t m_size; + size_t size() const { return m_size; } }; template <typename T> -struct expression_rectangular : input_expression +struct expression_rectangular : expression_window<T> { - using value_type = T; - - expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric) : m_size(size) + expression_rectangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) + : expression_window<T>(size) { } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_rectangular& self, cinput_t, size_t index, - vec_shape<T, N>) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_rectangular& self, shape<1> index, + axis_params<0, N>) { using TI = utype<T>; - const vec<TI, N> i = enumerate(vec_shape<TI, N>()) + static_cast<TI>(index); + const vec<TI, N> i = enumerate(vec_shape<TI, N>()) + static_cast<TI>(index.front()); return select(i < static_cast<TI>(self.m_size), T(1), T(0)); } - size_t size() const { return m_size; } - -private: - size_t m_size; }; -template <typename T> -struct expression_triangular : input_expression +template <typename T, window_metrics metrics> +struct expression_window_with_metrics : expression_window<T> { - using value_type = T; - - expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } - template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_triangular& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) + expression_window_with_metrics(size_t size, T arg = T(), + window_symmetry symmetry = window_symmetry::symmetric) + : expression_window<T>(size), linspace(cval<window_metrics, metrics>, size, symmetry), arg(arg) { - return 1 - abs(get_elements(self.linspace, cinput, index, y)); } - size_t size() const { return m_size; } -private: - window_linspace_m1_1_trunc2<T> linspace; - size_t m_size; +protected: + window_linspace<T> linspace; + T arg; }; template <typename T> -struct expression_bartlett : input_expression +struct expression_triangular : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc2> { - using value_type = T; + using expression_window_with_metrics<T, + window_metrics::metrics_m1_1_trunc2>::expression_window_with_metrics; - expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) + template <size_t N> + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_triangular& self, shape<1> index, + axis_params<0, N> sh) { + return 1 - abs(get_elements(self.linspace, index, sh)); } +}; + +template <typename T> +struct expression_bartlett : expression_window_with_metrics<T, window_metrics::metrics_m1_1> +{ + using expression_window_with_metrics<T, window_metrics::metrics_m1_1>::expression_window_with_metrics; template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett& self, shape<1> index, + axis_params<0, N> sh) { - return 1 - abs(get_elements(self.linspace, cinput, index, y)); + return 1 - abs(get_elements(self.linspace, index, sh)); } - size_t size() const { return m_size; } - -private: - window_linspace_m1_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_cosine : input_expression +struct expression_cosine : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics; - expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) + template <size_t N> + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_cosine& self, shape<1> index, + axis_params<0, N> sh) { + return sin(c_pi<T> * (get_elements(self.linspace, index, sh))); } +}; +template <typename T> +struct expression_cosine_np : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc> +{ + using expression_window_with_metrics<T, + window_metrics::metrics_m1_1_trunc>::expression_window_with_metrics; + template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_cosine& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_cosine_np& self, shape<1> index, + axis_params<0, N> sh) { - return sin(c_pi<T> * get_elements(self.linspace, cinput, index, y)); + return sin(c_pi<T, 1, 2> * (1 + get_elements(self.linspace, index, sh))); } - size_t size() const { return m_size; } - -private: - window_linspace_0_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_hann : input_expression +struct expression_hann : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics; - expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hann& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hann& self, shape<1> index, + axis_params<0, N> sh) { - return T(0.5) * (T(1) - cos(c_pi<T, 2> * get_elements(self.linspace, cinput, index, y))); + return T(0.5) * (T(1) - cos(c_pi<T, 2> * get_elements(self.linspace, index, sh))); } - size_t size() const { return m_size; } - -private: - window_linspace_0_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_bartlett_hann : input_expression +struct expression_bartlett_hann : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics; - expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett_hann& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett_hann& self, shape<1> index, + axis_params<0, N> sh) { - const vec<T, N> xx = get_elements(self.linspace, cinput, index, y); + const vec<T, N> xx = get_elements(self.linspace, index, sh); return T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))); } - size_t size() const { return m_size; } - -private: - window_linspace_0_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_hamming : input_expression +struct expression_hamming : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; - expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), alpha(alpha), m_size(size) + : expression_window_with_metrics<T, window_metrics::metrics_0_1>(size, alpha, symmetry) { } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hamming& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hamming& self, shape<1> index, + axis_params<0, N> sh) { - return self.alpha - - (T(1.0) - self.alpha) * (cos(c_pi<T, 2> * get_elements(self.linspace, cinput, index, y))); + return self.arg - (T(1.0) - self.arg) * (cos(c_pi<T, 2> * get_elements(self.linspace, index, sh))); } - size_t size() const { return m_size; } - -private: - window_linspace_0_1<T> linspace; - T alpha; - size_t m_size; }; template <typename T> -struct expression_bohman : input_expression +struct expression_bohman : expression_window_with_metrics<T, window_metrics::metrics_m1_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_m1_1>::expression_window_with_metrics; - expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bohman& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bohman& self, shape<1> index, + axis_params<0, N> sh) { - const vec<T, N> n = abs(get_elements(self.linspace, cinput, index, y)); + const vec<T, N> n = abs(get_elements(self.linspace, index, sh)); return (T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n); } - size_t size() const { return m_size; } - -private: - window_linspace_m1_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_blackman : input_expression +struct expression_blackman : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics; expression_blackman(size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size) + : expression_window_with_metrics<T, window_metrics::metrics_0_1>(size, alpha, symmetry), + a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5) { } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman& self, shape<1> index, + axis_params<0, N> sh) { - const vec<T, N> n = get_elements(self.linspace, cinput, index, y); + const vec<T, N> n = get_elements(self.linspace, index, sh); return self.a0 - self.a1 * cos(c_pi<T, 2> * n) + self.a2 * cos(c_pi<T, 4> * n); } - size_t size() const { return m_size; } private: - window_linspace_0_1<T> linspace; T a0, a1, a2; - size_t m_size; }; template <typename T> -struct expression_blackman_harris : input_expression +struct expression_blackman_harris : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics; - expression_blackman_harris(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman_harris& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman_harris& self, shape<1> index, + axis_params<0, N> sh) { - const vec<T, N> n = get_elements(self.linspace, cinput, index, y) * c_pi<T, 2>; + const vec<T, N> n = get_elements(self.linspace, index, sh) * c_pi<T, 2>; return T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n); } - size_t size() const { return m_size; } - -private: - window_linspace_0_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_kaiser : input_expression +struct expression_kaiser : expression_window_with_metrics<T, window_metrics::metrics_m1_1> { - using value_type = T; - expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])), - m_size(size) + : expression_window_with_metrics<T, window_metrics::metrics_m1_1>(size, beta, symmetry), + m(reciprocal(modzerobessel(make_vector(beta))[0])) { } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_kaiser& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_kaiser& self, shape<1> index, + axis_params<0, N> sh) { - return modzerobessel(self.beta * sqrt(1 - sqr(get_elements(self.linspace, cinput, index, y)))) * - self.m; + return modzerobessel(self.arg * sqrt(1 - sqr(get_elements(self.linspace, index, sh)))) * self.m; } - size_t size() const { return m_size; } private: - window_linspace_m1_1<T> linspace; - T beta; T m; - size_t m_size; }; template <typename T> -struct expression_flattop : input_expression +struct expression_flattop : expression_window_with_metrics<T, window_metrics::metrics_0_1> { - using value_type = T; + using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics; - expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), m_size(size) - { - } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_flattop& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_flattop& self, shape<1> index, + axis_params<0, N> sh) { - const vec<T, N> n = get_elements(self.linspace, cinput, index, y) * c_pi<T, 2>; + const vec<T, N> n = get_elements(self.linspace, index, sh) * c_pi<T, 2>; constexpr T a0 = 0.21557895; constexpr T a1 = 0.41663158; constexpr T a2 = 0.277263158; @@ -393,58 +350,34 @@ struct expression_flattop : input_expression constexpr T a4 = 0.006947368; return a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n); } - size_t size() const { return m_size; } - -private: - window_linspace_0_1<T> linspace; - size_t m_size; }; template <typename T> -struct expression_gaussian : input_expression +struct expression_gaussian : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc> { - using value_type = T; - + /// alpha = std / 2N expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), alpha(alpha), m_size(size) + : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc>(size, alpha, symmetry) { } template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_gaussian& self, cinput_t cinput, - size_t index, vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_gaussian& self, shape<1> index, + axis_params<0, N> sh) { - return exp(T(-0.5) * sqr(self.alpha * get_elements(self.linspace, cinput, index, y))); + return exp(T(-0.5) * sqr(self.arg * get_elements(self.linspace, index, sh))); } - - size_t size() const { return m_size; } - -private: - window_linspace_m1_1_trunc<T> linspace; - T alpha; - size_t m_size; }; template <typename T> -struct expression_lanczos : input_expression +struct expression_lanczos : expression_window_with_metrics<T, window_metrics::metrics_mpi_pi> { - using value_type = T; - - expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric) - : linspace(size, symmetry), alpha(alpha), m_size(size) - { - } + using expression_window_with_metrics<T, window_metrics::metrics_mpi_pi>::expression_window_with_metrics; template <size_t N> - KFR_INTRINSIC friend vec<T, N> get_elements(const expression_lanczos& self, cinput_t cinput, size_t index, - vec_shape<T, N> y) + KFR_INTRINSIC friend vec<T, N> get_elements(const expression_lanczos& self, shape<1> index, + axis_params<0, N> sh) { - return sinc(get_elements(self.linspace, cinput, index, y)); + return sinc(get_elements(self.linspace, index, sh)); } - size_t size() const { return m_size; } - -private: - window_linspace_mpi_pi<T> linspace; - T alpha; - size_t m_size; }; template <window_type> @@ -471,62 +404,70 @@ KFR_WINDOW_BY_TYPE(kaiser) KFR_WINDOW_BY_TYPE(flattop) KFR_WINDOW_BY_TYPE(gaussian) KFR_WINDOW_BY_TYPE(lanczos) +KFR_WINDOW_BY_TYPE(cosine_np) #undef KFR_WINDOW_BY_TYPE -} // namespace internal /** * @brief Returns template expression that generates Rrectangular window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_rectangular<T> window_rectangular(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_rectangular<T> window_rectangular(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_rectangular<T>(size, T()); + return expression_rectangular<T>(size, T()); } /** * @brief Returns template expression that generates Triangular window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_triangular<T>(size); + return expression_triangular<T>(size); } /** * @brief Returns template expression that generates Bartlett window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_bartlett<T>(size); + return expression_bartlett<T>(size); } /** * @brief Returns template expression that generates Cosine window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) +{ + return expression_cosine<T>(size); +} + +/** + * @brief Returns template expression that generates Cosine window (numpy compatible) of length @c size + */ +template <typename T = fbase> +KFR_FUNCTION expression_cosine_np<T> window_cosine_np(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_cosine<T>(size); + return expression_cosine_np<T>(size); } /** * @brief Returns template expression that generates Hann window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_hann<T>(size); + return expression_hann<T>(size); } /** * @brief Returns template expression that generates Bartlett-Hann window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_bartlett_hann<T> window_bartlett_hann(size_t size, - ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_bartlett_hann<T> window_bartlett_hann(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_bartlett_hann<T>(size); + return expression_bartlett_hann<T>(size); } /** @@ -534,19 +475,19 @@ KFR_FUNCTION internal::expression_bartlett_hann<T> window_bartlett_hann(size_t s * alpha */ template <typename T = fbase> -KFR_FUNCTION internal::expression_hamming<T> window_hamming(size_t size, identity<T> alpha = 0.54, - ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_hamming<T> window_hamming(size_t size, identity<T> alpha = 0.54, + ctype_t<T> = ctype_t<T>()) { - return internal::expression_hamming<T>(size, alpha); + return expression_hamming<T>(size, alpha); } /** * @brief Returns template expression that generates Bohman window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_bohman<T>(size); + return expression_bohman<T>(size); } /** @@ -554,21 +495,21 @@ KFR_FUNCTION internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T * alpha */ template <typename T = fbase> -KFR_FUNCTION internal::expression_blackman<T> window_blackman( - size_t size, identity<T> alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric, - ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_blackman<T> window_blackman(size_t size, identity<T> alpha = 0.16, + window_symmetry symmetry = window_symmetry::symmetric, + ctype_t<T> = ctype_t<T>()) { - return internal::expression_blackman<T>(size, alpha, symmetry); + return expression_blackman<T>(size, alpha, symmetry); } /** * @brief Returns template expression that generates Blackman-Harris window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_blackman_harris<T> window_blackman_harris( +KFR_FUNCTION expression_blackman_harris<T> window_blackman_harris( size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { - return internal::expression_blackman_harris<T>(size, T(), symmetry); + return expression_blackman_harris<T>(size, T(), symmetry); } /** @@ -576,19 +517,19 @@ KFR_FUNCTION internal::expression_blackman_harris<T> window_blackman_harris( * beta */ template <typename T = fbase> -KFR_FUNCTION internal::expression_kaiser<T> window_kaiser(size_t size, identity<T> beta = T(0.5), - ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_kaiser<T> window_kaiser(size_t size, identity<T> beta = T(0.5), + ctype_t<T> = ctype_t<T>()) { - return internal::expression_kaiser<T>(size, beta); + return expression_kaiser<T>(size, beta); } /** * @brief Returns template expression that generates Flat top window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_flattop<T>(size); + return expression_flattop<T>(size); } /** @@ -596,23 +537,23 @@ KFR_FUNCTION internal::expression_flattop<T> window_flattop(size_t size, ctype_t * alpha */ template <typename T = fbase> -KFR_FUNCTION internal::expression_gaussian<T> window_gaussian(size_t size, identity<T> alpha = 2.5, - ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_gaussian<T> window_gaussian(size_t size, identity<T> alpha = 2.5, + ctype_t<T> = ctype_t<T>()) { - return internal::expression_gaussian<T>(size, alpha); + return expression_gaussian<T>(size, alpha); } /** * @brief Returns template expression that generates Lanczos window of length @c size */ template <typename T = fbase> -KFR_FUNCTION internal::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) +KFR_FUNCTION expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) { - return internal::expression_lanczos<T>(size); + return expression_lanczos<T>(size); } template <typename T = fbase, window_type type, - typename window_expr = typename internal::window_by_type<type>::template type<T>> + typename window_expr = typename window_by_type<type>::template type<T>> CMT_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, identity<T> win_param = T(), window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) @@ -629,12 +570,12 @@ CMT_NOINLINE expression_pointer<T> window(size_t size, window_type type, identit cvals_t<window_type, window_type::rectangular, window_type::triangular, window_type::bartlett, window_type::cosine, window_type::hann, window_type::bartlett_hann, window_type::hamming, window_type::bohman, window_type::blackman, window_type::blackman_harris, window_type::kaiser, - window_type::flattop, window_type::gaussian, window_type::lanczos>(), + window_type::flattop, window_type::gaussian, window_type::lanczos, window_type::cosine_np>(), type, - [size, win_param, symmetry](auto win) { + [size, win_param, symmetry](auto win) + { constexpr window_type window = val_of(decltype(win)()); - return to_pointer( - typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry)); + return to_pointer(typename window_by_type<window>::template type<T>(size, win_param, symmetry)); }, fn_generic::returns<expression_pointer<T>>()); } diff --git a/include/kfr/graphics.hpp b/include/kfr/graphics.hpp @@ -25,4 +25,4 @@ #include "math.hpp" #include "graphics/color.hpp" -#include "graphics/geometry.hpp" -\ No newline at end of file +#include "graphics/geometry.hpp" diff --git a/include/kfr/graphics/color.hpp b/include/kfr/graphics/color.hpp @@ -25,7 +25,7 @@ */ #pragma once -#include "scaled.hpp" +#include "impl/scaled.hpp" namespace kfr { diff --git a/include/kfr/graphics/geometry.hpp b/include/kfr/graphics/geometry.hpp @@ -25,7 +25,7 @@ */ #pragma once -#include "scaled.hpp" +#include "impl/scaled.hpp" namespace kfr { @@ -64,7 +64,8 @@ struct point constexpr point ceil() const { return kfr::ceil(v); } constexpr point trunc() const { return kfr::trunc(v); } - union { + union + { struct { T x; @@ -114,7 +115,8 @@ struct size constexpr bool operator==(const size& c) const { return all(v == c.v); } constexpr bool operator!=(const size& c) const { return !operator==(c); } - union { + union + { struct { T x; @@ -159,7 +161,8 @@ struct border constexpr bool operator==(const border& c) const { return all(v == c.v); } constexpr bool operator!=(const border& c) const { return !(operator==(c)); } - union { + union + { struct { T x1; @@ -187,7 +190,8 @@ struct vector4 constexpr bool operator==(const vector4& c) const { return all(v == c.v); } constexpr bool operator!=(const vector4& c) const { return !operator==(c); } - union { + union + { struct { T x; @@ -378,7 +382,8 @@ struct rectangle rectangle& operator&=(const rectangle& c) { return *this = *this & c; } rectangle& operator|=(const rectangle& c) { return *this = *this | c; } - union { + union + { struct { T x1; @@ -422,7 +427,8 @@ CMT_INTRINSIC size<T> max(const size<T>& a, const size<T>& b) template <typename T> struct matrix { - union { + union + { vec<T, 6> v; struct { diff --git a/include/kfr/graphics/impl/scaled.hpp b/include/kfr/graphics/impl/scaled.hpp @@ -0,0 +1,58 @@ +/** @addtogroup basic_math + * @{ + */ +/* + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#pragma once + +#include "../../cometa/string.hpp" +#include "../../math.hpp" + +namespace kfr +{ + +template <typename Tout, int Mout, int Min, typename Tin, size_t N, + KFR_ENABLE_IF(Mout != Min && + (std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))> +KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value) +{ + using Tcommon = common_type<Tin, Tout>; + return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min); +} + +template <typename Tout, int Mout, int Min, typename Tin, size_t N, + KFR_ENABLE_IF(Mout != Min && + !(std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))> +KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value) +{ + using Tcommon = + findinttype<std::numeric_limits<Tin>::min() * Mout, std::numeric_limits<Tin>::max() * Mout>; + return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min); +} + +template <typename Tout, int Mout, int Min, typename Tin, size_t N, KFR_ENABLE_IF(Mout == Min)> +KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value) +{ + return static_cast<vec<Tout, N>>(value); +} +} // namespace kfr diff --git a/include/kfr/graphics/scaled.hpp b/include/kfr/graphics/scaled.hpp @@ -1,58 +0,0 @@ -/** @addtogroup basic_math - * @{ - */ -/* - Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) - This file is part of KFR - - KFR is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - KFR is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with KFR. - - If GPL is not suitable for your project, you must purchase a commercial license to use KFR. - Buying a commercial license is mandatory as soon as you develop commercial activities without - disclosing the source code of your own applications. - See https://www.kfrlib.com for details. - */ -#pragma once - -#include "../cometa/string.hpp" -#include "../math.hpp" - -namespace kfr -{ - -template <typename Tout, int Mout, int Min, typename Tin, size_t N, - KFR_ENABLE_IF(Mout != Min && - (std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))> -KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value) -{ - using Tcommon = common_type<Tin, Tout>; - return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min); -} - -template <typename Tout, int Mout, int Min, typename Tin, size_t N, - KFR_ENABLE_IF(Mout != Min && - !(std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))> -KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value) -{ - using Tcommon = - findinttype<std::numeric_limits<Tin>::min() * Mout, std::numeric_limits<Tin>::max() * Mout>; - return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min); -} - -template <typename Tout, int Mout, int Min, typename Tin, size_t N, KFR_ENABLE_IF(Mout == Min)> -KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value) -{ - return static_cast<vec<Tout, N>>(value); -} -} // namespace kfr -\ No newline at end of file diff --git a/include/kfr/kfr.h b/include/kfr/kfr.h @@ -72,14 +72,15 @@ constexpr inline const char version_full[] = KFR_VERSION_FULL; #ifdef KFR_FUNCTION_IS_INTRINSIC #define KFR_FUNCTION CMT_INTRINSIC #else -#define KFR_FUNCTION +#define KFR_FUNCTION #endif #ifdef CMT_NATIVE_F64 #define KFR_NATIVE_F64 CMT_NATIVE_F64 #endif #if defined CMT_ARCH_ARM && !defined CMT_ARCH_NEON && !defined CMT_FORCE_GENERIC_CPU -#error "ARM builds require NEON support. Add -march=native for native build or skip the check with CMT_FORCE_GENERIC_CPU=1" +#error \ + "ARM builds require NEON support. Add -march=native for native build or skip the check with CMT_FORCE_GENERIC_CPU=1" #endif #if defined CMT_ARCH_ARM && !defined CMT_COMPILER_CLANG && !defined CMT_FORCE_NON_CLANG diff --git a/include/kfr/math/complex_math.hpp b/include/kfr/math/complex_math.hpp @@ -25,13 +25,13 @@ */ #pragma once -#include "../simd/complex.hpp" #include "../simd/abs.hpp" +#include "../simd/complex.hpp" +#include "../simd/min_max.hpp" +#include "../simd/select.hpp" #include "atan.hpp" #include "hyperbolic.hpp" #include "log_exp.hpp" -#include "../simd/min_max.hpp" -#include "../simd/select.hpp" #include "sin_cos.hpp" #include "sqrt.hpp" diff --git a/include/kfr/math/impl/atan.hpp b/include/kfr/math/impl/atan.hpp @@ -22,11 +22,11 @@ */ #pragma once #include "../../simd/abs.hpp" -#include "../../simd/select.hpp" -#include "../sin_cos.hpp" #include "../../simd/constants.hpp" #include "../../simd/impl/function.hpp" #include "../../simd/operators.hpp" +#include "../../simd/select.hpp" +#include "../sin_cos.hpp" namespace kfr { @@ -59,7 +59,7 @@ KFR_INTRINSIC vec<f32, N> atan2k(const vec<f32, N>& yy, const vec<f32, N>& xx) u = fmadd(u, t, 0.199926957488059997558594f); u = fmadd(u, t, -0.333331018686294555664062f); t = u * t * s + s; - t = innercast<f32>(q) * 1.5707963267948966192313216916398f + t; + t = broadcastto<f32>(q) * 1.5707963267948966192313216916398f + t; return t; } @@ -98,7 +98,7 @@ KFR_INTRINSIC vec<f64, N> atan2k(const vec<f64, N>& yy, const vec<f64, N>& xx) u = fmadd(u, t, 0.199999999996591265594148); u = fmadd(u, t, -0.333333333333311110369124); t = u * t * s + s; - t = innercast<f64>(q) * 1.5707963267948966192313216916398 + t; + t = broadcastto<f64>(q) * 1.5707963267948966192313216916398 + t; return t; } diff --git a/include/kfr/math/impl/gamma.hpp b/include/kfr/math/impl/gamma.hpp @@ -50,7 +50,7 @@ KFR_INTRINSIC vec<T, N> gamma(const vec<T, N>& z) vec<T, N> accm = gamma_precalc<T>[0]; CMT_LOOP_UNROLL for (size_t k = 1; k < Count; k++) - accm += gamma_precalc<T>[k] / (z + innercast<utype<T>>(k)); + accm += gamma_precalc<T>[k] / (z + broadcastto<utype<T>>(k)); accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5); return accm / z; } diff --git a/include/kfr/math/impl/hyperbolic.hpp b/include/kfr/math/impl/hyperbolic.hpp @@ -23,12 +23,12 @@ #pragma once #include "../../simd/abs.hpp" -#include "../log_exp.hpp" -#include "../../simd/min_max.hpp" -#include "../../simd/select.hpp" #include "../../simd/constants.hpp" #include "../../simd/impl/function.hpp" +#include "../../simd/min_max.hpp" #include "../../simd/operators.hpp" +#include "../../simd/select.hpp" +#include "../log_exp.hpp" namespace kfr { diff --git a/include/kfr/math/impl/log_exp.hpp b/include/kfr/math/impl/log_exp.hpp @@ -24,12 +24,12 @@ #include "../../simd/abs.hpp" #include "../../simd/clamp.hpp" -#include "../../simd/min_max.hpp" -#include "../../simd/round.hpp" -#include "../../simd/select.hpp" #include "../../simd/constants.hpp" #include "../../simd/impl/function.hpp" +#include "../../simd/min_max.hpp" #include "../../simd/operators.hpp" +#include "../../simd/round.hpp" +#include "../../simd/select.hpp" #include "../../simd/shuffle.hpp" namespace kfr @@ -65,8 +65,8 @@ KFR_INTRINSIC vec<f32, N> vldexpk(const vec<f32, N>& x, const vec<i32, N>& q) m = (((m + q) >> 6) - m) << 4; const vec<i32, N> qq = q - (m << 2); m = clamp(m + 0x7f, vec<i32, N>(0xff)); - vec<f32, N> u = pow4(bitcast<f32>(innercast<i32>(m) << 23)); - return x * u * bitcast<f32>((innercast<i32>(qq + 0x7f)) << 23); + vec<f32, N> u = pow4(bitcast<f32>(broadcastto<i32>(m) << 23)); + return x * u * bitcast<f32>((broadcastto<i32>(qq + 0x7f)) << 23); } template <size_t N> @@ -76,8 +76,8 @@ KFR_INTRINSIC vec<f64, N> vldexpk(const vec<f64, N>& x, const vec<i64, N>& q) m = (((m + q) >> 9) - m) << 7; const vec<i64, N> qq = q - (m << 2); m = clamp(m + 0x3ff, i64(0x7ff)); - vec<f64, N> u = pow4(bitcast<f64>(innercast<i64>(m) << 52)); - return x * u * bitcast<f64>((innercast<i64>(qq + 0x3ff)) << 52); + vec<f64, N> u = pow4(bitcast<f64>(broadcastto<i64>(m) << 52)); + return x * u * bitcast<f64>((broadcastto<i64>(qq + 0x3ff)) << 52); } template <typename T, size_t N> @@ -98,12 +98,12 @@ KFR_INTRINSIC vec<f32, N> log(const vec<f32, N>& d) vec<f32, N> sp = select(d < 0, constants<f32>::qnan, constants<f32>::neginfinity); vec<f32, N> t; - t = fmadd(0.2371599674224853515625f, x2, 0.285279005765914916992188f); - t = fmadd(t, x2, 0.400005519390106201171875f); - t = fmadd(t, x2, 0.666666567325592041015625f); - t = fmadd(t, x2, 2.0f); + t = fmadd(0.2371599674224853515625f, x2, 0.285279005765914916992188f); + t = fmadd(t, x2, 0.400005519390106201171875f); + t = fmadd(t, x2, 0.666666567325592041015625f); + t = fmadd(t, x2, 2.0f); - x = x * t + c_log_2<f32> * innercast<f32>(e); + x = x * t + c_log_2<f32> * broadcastto<f32>(e); x = select(d > 0, x, sp); return x; @@ -120,16 +120,16 @@ KFR_INTRINSIC vec<f64, N> log(const vec<f64, N>& d) vec<f64, N> sp = select(d < 0, constants<f64>::qnan, constants<f64>::neginfinity); - vec<f64, N> t; - t = fmadd(0.148197055177935105296783, x2, 0.153108178020442575739679); - t = fmadd(t, x2, 0.181837339521549679055568); - t = fmadd(t, x2, 0.22222194152736701733275); - t = fmadd(t, x2, 0.285714288030134544449368); - t = fmadd(t, x2, 0.399999999989941956712869); - t = fmadd(t, x2, 0.666666666666685503450651); - t = fmadd(t, x2, 2); + vec<f64, N> t; + t = fmadd(0.148197055177935105296783, x2, 0.153108178020442575739679); + t = fmadd(t, x2, 0.181837339521549679055568); + t = fmadd(t, x2, 0.22222194152736701733275); + t = fmadd(t, x2, 0.285714288030134544449368); + t = fmadd(t, x2, 0.399999999989941956712869); + t = fmadd(t, x2, 0.666666666666685503450651); + t = fmadd(t, x2, 2); - x = x * t + constants<f64>::log_2 * innercast<f64>(e); + x = x * t + constants<f64>::log_2 * broadcastto<f64>(e); x = select(d > 0, x, sp); return x; @@ -138,12 +138,12 @@ KFR_INTRINSIC vec<f64, N> log(const vec<f64, N>& d) template <typename T, size_t N, typename Tout = flt_type<T>> KFR_INTRINSIC vec<Tout, N> log2(const vec<T, N>& x) { - return log(innercast<Tout>(x)) * constants<Tout>::recip_log_2; + return log(broadcastto<Tout>(x)) * constants<Tout>::recip_log_2; } template <typename T, size_t N, typename Tout = flt_type<T>> KFR_INTRINSIC vec<Tout, N> log10(const vec<T, N>& x) { - return log(innercast<Tout>(x)) * constants<Tout>::recip_log_10; + return log(broadcastto<Tout>(x)) * constants<Tout>::recip_log_10; } template <size_t N> @@ -152,11 +152,11 @@ KFR_INTRINSIC vec<f32, N> exp(const vec<f32, N>& d) const f32 ln2_part1 = 0.6931457519f; const f32 ln2_part2 = 1.4286067653e-6f; - vec<i32, N> q = innercast<i32>(floor(d * constants<f32>::recip_log_2)); + vec<i32, N> q = broadcastto<i32>(floor(d * constants<f32>::recip_log_2)); vec<f32, N> s, u; - s = fmadd(innercast<f32>(q), -ln2_part1, d); - s = fmadd(innercast<f32>(q), -ln2_part2, s); + s = fmadd(broadcastto<f32>(q), -ln2_part1, d); + s = fmadd(broadcastto<f32>(q), -ln2_part2, s); const f32 c2 = 0.4999999105930328369140625f; const f32 c3 = 0.166668415069580078125f; @@ -185,11 +185,11 @@ KFR_INTRINSIC vec<f64, N> exp(const vec<f64, N>& d) const f64 ln2_part1 = 0.69314717501401901245; const f64 ln2_part2 = 5.545926273775592108e-009; - vec<i64, N> q = innercast<i64>(floor(d * constants<f64>::recip_log_2)); + vec<i64, N> q = broadcastto<i64>(floor(d * constants<f64>::recip_log_2)); vec<f64, N> s, u; - s = fmadd(innercast<f64>(q), -ln2_part1, d); - s = fmadd(innercast<f64>(q), -ln2_part2, s); + s = fmadd(broadcastto<f64>(q), -ln2_part1, d); + s = fmadd(broadcastto<f64>(q), -ln2_part2, s); const f64 c2 = 0.499999999999994948485237955537741072475910186767578; const f64 c3 = 0.166666666667024204739888659787538927048444747924805; @@ -235,7 +235,7 @@ KFR_INTRINSIC vec<T, N> pow(const vec<T, N>& a, const vec<T, N>& b) { const vec<T, N> t = exp(b * log(abs(a))); const mask<T, N> isint = floor(b) == b; - const mask<T, N> iseven = (innercast<itype<T>>(b) & 1) == 0; + const mask<T, N> iseven = (broadcastto<itype<T>>(b) & 1) == 0; return select( a > T(), t, select(a == T(), T(), select(isint, select(iseven, t, -t), broadcast<N>(constants<T>::qnan)))); @@ -256,7 +256,7 @@ KFR_INTRINSIC vec<T, N> cbrt(const vec<T, N>& x) template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>), typename Tout = flt_type<T>> KFR_INTRINSIC vec<Tout, N> cbrt(const vec<T, N>& x) { - return cbrt(innercast<Tout>(x)); + return cbrt(broadcastto<Tout>(x)); } KFR_HANDLE_SCALAR_1_T(exp, flt_type<T>) diff --git a/include/kfr/math/impl/sin_cos.hpp b/include/kfr/math/impl/sin_cos.hpp @@ -23,12 +23,12 @@ #pragma once #include "../../simd/abs.hpp" -#include "../../simd/min_max.hpp" -#include "../../simd/round.hpp" -#include "../../simd/select.hpp" #include "../../simd/constants.hpp" #include "../../simd/impl/function.hpp" +#include "../../simd/min_max.hpp" #include "../../simd/operators.hpp" +#include "../../simd/round.hpp" +#include "../../simd/select.hpp" #include "../../simd/shuffle.hpp" #if CMT_HAS_WARNING("-Wc99-extensions") @@ -62,7 +62,7 @@ KFR_INTRINSIC vec<T, N> trig_fold(const vec<T, N>& x, vec<itype<T>, N>& quadrant const vec<T, N> xabs = abs(x); constexpr T div = constants<T>::fold_constant_div; vec<T, N> y = floor(xabs / div); - quadrant = innercast<itype<T>>(innercast<int>(y - floor(y * T(1.0 / 16.0)) * T(16.0))); + quadrant = broadcastto<itype<T>>(broadcastto<int>(y - floor(y * T(1.0 / 16.0)) * T(16.0))); const mask<T, N> msk = (quadrant & 1) != 0; quadrant = kfr::select(msk, quadrant + 1, quadrant); diff --git a/include/kfr/math/impl/tan.hpp b/include/kfr/math/impl/tan.hpp @@ -46,7 +46,7 @@ KFR_INTRINSIC vec<T, N> trig_fold_simple(const vec<T, N>& x_full, mask<T, N>& in vec<T, N> scaled = y / pi_14; vec<T, N> k_real = floor(scaled); - vec<IT, N> k = innercast<IT>(k_real); + vec<IT, N> k = broadcastto<IT>(k_real); vec<T, N> x = y - k_real * pi_14; @@ -143,6 +143,9 @@ KFR_INTRINSIC flt_type<T> tandeg(const T& x) return tan(x * c_degtorad<flt_type<T>>); } } // namespace intrinsics +namespace fn +{ +} KFR_I_FN(tan) KFR_I_FN(tandeg) } // namespace CMT_ARCH_NAME diff --git a/include/kfr/simd.hpp b/include/kfr/simd.hpp @@ -26,6 +26,7 @@ #include "simd/clamp.hpp" #include "simd/comparison.hpp" #include "simd/complex.hpp" +#include "simd/complex_type.hpp" #include "simd/constants.hpp" #include "simd/digitreverse.hpp" #include "simd/horizontal.hpp" diff --git a/include/kfr/simd/complex.hpp b/include/kfr/simd/complex.hpp @@ -57,6 +57,30 @@ KFR_INTRINSIC complex<T> operator/(const complex<T>& x, const complex<T>& y) { return (make_vector(x) / make_vector(y))[0]; } +template <typename T> +KFR_INTRINSIC complex<T>& operator+=(complex<T>& x, const complex<T>& y) +{ + x = x + y; + return x; +} +template <typename T> +KFR_INTRINSIC complex<T>& operator-=(complex<T>& x, const complex<T>& y) +{ + x = x - y; + return x; +} +template <typename T> +KFR_INTRINSIC complex<T>& operator*=(complex<T>& x, const complex<T>& y) +{ + x = x * y; + return x; +} +template <typename T> +KFR_INTRINSIC complex<T>& operator/=(complex<T>& x, const complex<T>& y) +{ + x = x / y; + return x; +} template <typename T, typename U, KFR_ENABLE_IF(is_number<U>), typename C = common_type<complex<T>, U>> KFR_INTRINSIC C operator+(const complex<T>& x, const U& y) @@ -78,6 +102,30 @@ KFR_INTRINSIC C operator/(const complex<T>& x, const U& y) { return static_cast<C>(x) / static_cast<C>(y); } +template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)> +KFR_INTRINSIC complex<T>& operator+=(complex<T>& x, const U& y) +{ + x = x + y; + return x; +} +template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)> +KFR_INTRINSIC complex<T>& operator-=(complex<T>& x, const U& y) +{ + x = x - y; + return x; +} +template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)> +KFR_INTRINSIC complex<T>& operator*=(complex<T>& x, const U& y) +{ + x = x * y; + return x; +} +template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)> +KFR_INTRINSIC complex<T>& operator/=(complex<T>& x, const U& y) +{ + x = x / y; + return x; +} template <typename T, typename U, KFR_ENABLE_IF(is_number<U>), typename C = common_type<complex<T>, U>> KFR_INTRINSIC C operator+(const U& x, const complex<T>& y) @@ -275,8 +323,8 @@ struct is_complex_impl<complex<T>> : std::true_type }; // vector<complex> to vector<complex> -template <typename To, typename From, size_t N> -struct conversion<vec<complex<To>, N>, vec<complex<From>, N>> +template <typename To, typename From, size_t N, conv_t conv> +struct conversion<1, 1, vec<complex<To>, N>, vec<complex<From>, N>, conv> { static_assert(!is_compound<To>, ""); static_assert(!is_compound<From>, ""); @@ -287,8 +335,8 @@ struct conversion<vec<complex<To>, N>, vec<complex<From>, N>> }; // vector to vector<complex> -template <typename To, typename From, size_t N> -struct conversion<vec<complex<To>, N>, vec<From, N>> +template <typename To, typename From, size_t N, conv_t conv> +struct conversion<1, 1, vec<complex<To>, N>, vec<From, N>, conv> { static_assert(!is_compound<To>, ""); static_assert(!is_compound<From>, ""); @@ -349,7 +397,7 @@ template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, constexpr KFR_INTRINSIC vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0)) { - return ccomp(interleave(innercast<T>(real), innercast<T>(imag))); + return ccomp(interleave(promoteto<T>(real), promoteto<T>(imag))); } /// @brief Constructs complex value from real and imaginary parts @@ -357,7 +405,7 @@ template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>, KFR_ENABLE_IF(is_numeric_args<T1, T2>)> constexpr KFR_INTRINSIC complex<T> make_complex(T1 real, T2 imag = T2(0)) { - return complex<T>(innercast<T>(real), innercast<T>(imag)); + return complex<T>(promoteto<T>(real), promoteto<T>(imag)); } KFR_FN(make_complex) diff --git a/include/kfr/simd/complex_type.hpp b/include/kfr/simd/complex_type.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify diff --git a/include/kfr/simd/impl/abs.hpp b/include/kfr/simd/impl/abs.hpp @@ -22,9 +22,9 @@ */ #pragma once +#include "../operators.hpp" #include "../select.hpp" #include "function.hpp" -#include "../operators.hpp" namespace kfr { diff --git a/include/kfr/simd/impl/backend_clang.hpp b/include/kfr/simd/impl/backend_clang.hpp @@ -135,11 +135,13 @@ KFR_INTRINSIC simd<T, Nout> simd_shuffle(simd2_t<T, N1, N2>, const simd<T, N1>& csizes_t<indices...>, overload_generic) { constexpr size_t Nmax = (N1 > N2 ? N1 : N2); - return simd_shuffle( - simd2_t<T, Nmax, Nmax>{}, simd_shuffle(simd_t<T, N1>{}, x, csizeseq<Nmax>, overload_auto), - simd_shuffle(simd_t<T, N2>{}, y, csizeseq<Nmax>, overload_auto), - csizes<(indices < N1 ? indices : indices < N1 + N2 ? indices + (Nmax - N1) : index_undefined)...>, - overload_auto); + return simd_shuffle(simd2_t<T, Nmax, Nmax>{}, + simd_shuffle(simd_t<T, N1>{}, x, csizeseq<Nmax>, overload_auto), + simd_shuffle(simd_t<T, N2>{}, y, csizeseq<Nmax>, overload_auto), + csizes<(indices < N1 ? indices + : indices < N1 + N2 ? indices + (Nmax - N1) + : index_undefined)...>, + overload_auto); } template <typename T, size_t N1> diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp @@ -455,7 +455,8 @@ KFR_INTRINSIC __m128 KFR_swap_ps(__m128 x) { return _mm_shuffle_ps(x, x, _MM_SHU #ifndef KFR_f32x2_array // KFR_INTRIN_SHUFFLE_CONCAT(f32, 2, _mm_castpd_ps(_mm_setr_pd(x.whole, y.whole))) -KFR_INTRIN_SHUFFLE_SWAP(f32, 2, _mm_cvtsd_f64(_mm_castps_pd(KFR_swap_ps(_mm_castpd_ps(_mm_set1_pd(x.whole)))))) +KFR_INTRIN_SHUFFLE_SWAP(f32, 2, + _mm_cvtsd_f64(_mm_castps_pd(KFR_swap_ps(_mm_castpd_ps(_mm_set1_pd(x.whole)))))) #else KFR_INTRIN_SHUFFLE_CONCAT(f32, 2, _mm_setr_ps(x.low, x.high, y.low, y.high)) KFR_INTRIN_SHUFFLE_SWAP(f32, 2, simd<f32, 2>(x.high, x.low)) diff --git a/include/kfr/simd/impl/basicoperators_clang.hpp b/include/kfr/simd/impl/basicoperators_clang.hpp @@ -86,6 +86,12 @@ KFR_INTRINSIC vec<T, N> div(const vec<T, N>& x, const vec<T, N>& y) return x.v / y.v; } KFR_OP_SCALAR2(div, /, , , ) +template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)> +KFR_INTRINSIC vec<T, N> mod(const vec<T, N>& x, const vec<T, N>& y) +{ + return x.v % y.v; +} +KFR_OP_SCALAR2(mod, %, , , ) template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)> KFR_INTRINSIC vec<T, N> band(const vec<T, N>& x, const vec<T, N>& y) diff --git a/include/kfr/simd/impl/basicoperators_complex.hpp b/include/kfr/simd/impl/basicoperators_complex.hpp @@ -2,7 +2,7 @@ * @{ */ /* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) + Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com) This file is part of KFR KFR is free software: you can redistribute it and/or modify diff --git a/include/kfr/simd/impl/basicoperators_generic.hpp b/include/kfr/simd/impl/basicoperators_generic.hpp @@ -39,6 +39,16 @@ inline namespace CMT_ARCH_NAME namespace intrinsics { +#define KFR_DIV_MOD_FN(ty) \ + KFR_INTRINSIC ty div(const ty& x, const ty& y) \ + { \ + KFR_COMPONENTWISE_RET_I(ty, result[i] = y[i] ? x[i] / y[i] : 0); \ + } \ + KFR_INTRINSIC ty mod(const ty& x, const ty& y) \ + { \ + KFR_COMPONENTWISE_RET_I(ty, result[i] = y[i] ? x[i] % y[i] : 0); \ + } + #if defined CMT_ARCH_SSE2 && defined KFR_NATIVE_INTRINSICS KFR_INTRINSIC __m128 _mm_allones_ps() @@ -76,17 +86,11 @@ KFR_INTRINSIC f64sse div(const f64sse& x, const f64sse& y) { return f64sse(_mm_d KFR_INTRINSIC u8sse add(const u8sse& x, const u8sse& y) { return _mm_add_epi8(x.v, y.v); } KFR_INTRINSIC u8sse sub(const u8sse& x, const u8sse& y) { return _mm_sub_epi8(x.v, y.v); } -KFR_INTRINSIC u8sse div(const u8sse& x, const u8sse& y) -{ - KFR_COMPONENTWISE_RET_I(u8sse, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(u8sse) KFR_INTRINSIC i8sse add(const i8sse& x, const i8sse& y) { return _mm_add_epi8(x.v, y.v); } KFR_INTRINSIC i8sse sub(const i8sse& x, const i8sse& y) { return _mm_sub_epi8(x.v, y.v); } -KFR_INTRINSIC i8sse div(const i8sse& x, const i8sse& y) -{ - KFR_COMPONENTWISE_RET_I(i8sse, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(i8sse) KFR_INTRINSIC __m128i mul_epi8(const __m128i& x, const __m128i& y) { @@ -102,18 +106,12 @@ KFR_INTRINSIC i8sse mul(const i8sse& x, const i8sse& y) { return mul_epi8(x.v, y KFR_INTRINSIC u16sse add(const u16sse& x, const u16sse& y) { return _mm_add_epi16(x.v, y.v); } KFR_INTRINSIC u16sse sub(const u16sse& x, const u16sse& y) { return _mm_sub_epi16(x.v, y.v); } KFR_INTRINSIC u16sse mul(const u16sse& x, const u16sse& y) { return _mm_mullo_epi16(x.v, y.v); } -KFR_INTRINSIC u16sse div(const u16sse& x, const u16sse& y) -{ - KFR_COMPONENTWISE_RET_I(u16sse, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(u16sse) KFR_INTRINSIC i16sse add(const i16sse& x, const i16sse& y) { return _mm_add_epi16(x.v, y.v); } KFR_INTRINSIC i16sse sub(const i16sse& x, const i16sse& y) { return _mm_sub_epi16(x.v, y.v); } KFR_INTRINSIC i16sse mul(const i16sse& x, const i16sse& y) { return _mm_mullo_epi16(x.v, y.v); } -KFR_INTRINSIC i16sse div(const i16sse& x, const i16sse& y) -{ - KFR_COMPONENTWISE_RET_I(i16sse, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(i16sse) KFR_INTRINSIC u32sse add(const u32sse& x, const u32sse& y) { return _mm_add_epi32(x.v, y.v); } KFR_INTRINSIC u32sse sub(const u32sse& x, const u32sse& y) { return _mm_sub_epi32(x.v, y.v); } @@ -140,14 +138,8 @@ KFR_INTRINSIC i32sse mul(const i32sse& x, const i32sse& y) _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); } #endif -KFR_INTRINSIC u32sse div(const u32sse& x, const u32sse& y) -{ - KFR_COMPONENTWISE_RET_I(u32sse, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC i32sse div(const i32sse& x, const i32sse& y) -{ - KFR_COMPONENTWISE_RET_I(i32sse, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(u32sse) +KFR_DIV_MOD_FN(i32sse) KFR_INTRINSIC u64sse add(const u64sse& x, const u64sse& y) { return _mm_add_epi64(x.v, y.v); } KFR_INTRINSIC u64sse sub(const u64sse& x, const u64sse& y) { return _mm_sub_epi64(x.v, y.v); } @@ -155,10 +147,6 @@ KFR_INTRINSIC u64sse mul(const u64sse& x, const u64sse& y) { KFR_COMPONENTWISE_RET_I(u64sse, result[i] = x[i] * y[i]); } -KFR_INTRINSIC u64sse div(const u64sse& x, const u64sse& y) -{ - KFR_COMPONENTWISE_RET_I(u64sse, result[i] = y[i] ? x[i] / y[i] : 0); -} KFR_INTRINSIC i64sse add(const i64sse& x, const i64sse& y) { return _mm_add_epi64(x.v, y.v); } KFR_INTRINSIC i64sse sub(const i64sse& x, const i64sse& y) { return _mm_sub_epi64(x.v, y.v); } @@ -166,10 +154,8 @@ KFR_INTRINSIC i64sse mul(const i64sse& x, const i64sse& y) { KFR_COMPONENTWISE_RET_I(i64sse, result[i] = x[i] * y[i]); } -KFR_INTRINSIC i64sse div(const i64sse& x, const i64sse& y) -{ - KFR_COMPONENTWISE_RET_I(i64sse, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(u64sse) +KFR_DIV_MOD_FN(i64sse) KFR_INTRINSIC f32sse shl(const f32sse& x, unsigned y) { @@ -600,33 +586,21 @@ KFR_INTRINSIC f64avx shr(const f64avx& x, unsigned y) KFR_INTRINSIC u8avx add(const u8avx& x, const u8avx& y) { return _mm256_add_epi8(x.v, y.v); } KFR_INTRINSIC u8avx sub(const u8avx& x, const u8avx& y) { return _mm256_sub_epi8(x.v, y.v); } -KFR_INTRINSIC u8avx div(const u8avx& x, const u8avx& y) -{ - KFR_COMPONENTWISE_RET_I(u8avx, result[i] = x[i] / y[i]); -} +KFR_DIV_MOD_FN(u8avx) KFR_INTRINSIC i8avx add(const i8avx& x, const i8avx& y) { return _mm256_add_epi8(x.v, y.v); } KFR_INTRINSIC i8avx sub(const i8avx& x, const i8avx& y) { return _mm256_sub_epi8(x.v, y.v); } -KFR_INTRINSIC i8avx div(const i8avx& x, const i8avx& y) -{ - KFR_COMPONENTWISE_RET_I(i8avx, result[i] = x[i] / y[i]); -} +KFR_DIV_MOD_FN(i8avx) KFR_INTRINSIC u16avx add(const u16avx& x, const u16avx& y) { return _mm256_add_epi16(x.v, y.v); } KFR_INTRINSIC u16avx sub(const u16avx& x, const u16avx& y) { return _mm256_sub_epi16(x.v, y.v); } KFR_INTRINSIC u16avx mul(const u16avx& x, const u16avx& y) { return _mm256_mullo_epi16(x.v, y.v); } -KFR_INTRINSIC u16avx div(const u16avx& x, const u16avx& y) -{ - KFR_COMPONENTWISE_RET_I(u16avx, result[i] = x[i] / y[i]); -} +KFR_DIV_MOD_FN(u16avx) KFR_INTRINSIC i16avx add(const i16avx& x, const i16avx& y) { return _mm256_add_epi16(x.v, y.v); } KFR_INTRINSIC i16avx sub(const i16avx& x, const i16avx& y) { return _mm256_sub_epi16(x.v, y.v); } KFR_INTRINSIC i16avx mul(const i16avx& x, const i16avx& y) { return _mm256_mullo_epi16(x.v, y.v); } -KFR_INTRINSIC i16avx div(const i16avx& x, const i16avx& y) -{ - KFR_COMPONENTWISE_RET_I(i16avx, result[i] = x[i] / y[i]); -} +KFR_DIV_MOD_FN(i16avx) KFR_INTRINSIC u32avx add(const u32avx& x, const u32avx& y) { return _mm256_add_epi32(x.v, y.v); } KFR_INTRINSIC u32avx sub(const u32avx& x, const u32avx& y) { return _mm256_sub_epi32(x.v, y.v); } @@ -636,14 +610,8 @@ KFR_INTRINSIC i32avx sub(const i32avx& x, const i32avx& y) { return _mm256_sub_e KFR_INTRINSIC u32avx mul(const u32avx& x, const u32avx& y) { return _mm256_mullo_epi32(x.v, y.v); } KFR_INTRINSIC i32avx mul(const i32avx& x, const i32avx& y) { return _mm256_mullo_epi32(x.v, y.v); } -KFR_INTRINSIC u32avx div(const u32avx& x, const u32avx& y) -{ - KFR_COMPONENTWISE_RET_I(u32avx, result[i] = x[i] / y[i]); -} -KFR_INTRINSIC i32avx div(const i32avx& x, const i32avx& y) -{ - KFR_COMPONENTWISE_RET_I(i32avx, result[i] = x[i] / y[i]); -} +KFR_DIV_MOD_FN(u32avx) +KFR_DIV_MOD_FN(i32avx) KFR_INTRINSIC u64avx add(const u64avx& x, const u64avx& y) { return _mm256_add_epi64(x.v, y.v); } KFR_INTRINSIC u64avx sub(const u64avx& x, const u64avx& y) { return _mm256_sub_epi64(x.v, y.v); } @@ -651,10 +619,6 @@ KFR_INTRINSIC u64avx mul(const u64avx& x, const u64avx& y) { KFR_COMPONENTWISE_RET_I(u64avx, result[i] = x[i] * y[i]); } -KFR_INTRINSIC u64avx div(const u64avx& x, const u64avx& y) -{ - KFR_COMPONENTWISE_RET_I(u64avx, result[i] = y[i] ? x[i] / y[i] : 0); -} KFR_INTRINSIC i64avx add(const i64avx& x, const i64avx& y) { return _mm256_add_epi64(x.v, y.v); } KFR_INTRINSIC i64avx sub(const i64avx& x, const i64avx& y) { return _mm256_sub_epi64(x.v, y.v); } @@ -662,10 +626,8 @@ KFR_INTRINSIC i64avx mul(const i64avx& x, const i64avx& y) { KFR_COMPONENTWISE_RET_I(i64avx, result[i] = x[i] * y[i]); } -KFR_INTRINSIC i64avx div(const i64avx& x, const i64avx& y) -{ - KFR_COMPONENTWISE_RET_I(i64avx, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(u64avx) +KFR_DIV_MOD_FN(i64avx) KFR_INTRINSIC __m256i mul_epi8(const __m256i& x, const __m256i& y) { @@ -1319,38 +1281,14 @@ KFR_INTRINSIC u16avx512 mul(const u16avx512& x, const u16avx512& y) { return _mm KFR_INTRINSIC u32avx512 mul(const u32avx512& x, const u32avx512& y) { return _mm512_mullo_epi32(x.v, y.v); } KFR_INTRINSIC u64avx512 mul(const u64avx512& x, const u64avx512& y) { return _mm512_mullo_epi64(x.v, y.v); } -KFR_INTRINSIC i8avx512 div(const i8avx512& x, const i8avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u8avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC i16avx512 div(const i16avx512& x, const i16avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u16avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC i32avx512 div(const i32avx512& x, const i32avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u32avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC i64avx512 div(const i64avx512& x, const i64avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u64avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC u8avx512 div(const u8avx512& x, const u8avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u8avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC u16avx512 div(const u16avx512& x, const u16avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u16avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC u32avx512 div(const u32avx512& x, const u32avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u32avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} -KFR_INTRINSIC u64avx512 div(const u64avx512& x, const u64avx512& y) -{ - KFR_COMPONENTWISE_RET_I(u64avx512, result[i] = y[i] ? x[i] / y[i] : 0); -} +KFR_DIV_MOD_FN(i8avx512) +KFR_DIV_MOD_FN(i16avx512) +KFR_DIV_MOD_FN(i32avx512) +KFR_DIV_MOD_FN(i64avx512) +KFR_DIV_MOD_FN(u8avx512) +KFR_DIV_MOD_FN(u16avx512) +KFR_DIV_MOD_FN(u32avx512) +KFR_DIV_MOD_FN(u64avx512) KFR_INTRINSIC i8avx512 band(const i8avx512& x, const i8avx512& y) { return _mm512_and_si512(x.v, y.v); } KFR_INTRINSIC i16avx512 band(const i16avx512& x, const i16avx512& y) { return _mm512_and_si512(x.v, y.v); } @@ -1512,6 +1450,7 @@ KFR_HANDLE_ALL_SIZES_2(add) KFR_HANDLE_ALL_SIZES_2(sub) KFR_HANDLE_ALL_SIZES_2(mul) KFR_HANDLE_ALL_SIZES_2(div) +KFR_HANDLE_ALL_SIZES_2(mod) KFR_HANDLE_ALL_SIZES_2(eq) KFR_HANDLE_ALL_SIZES_2(ne) @@ -1619,6 +1558,11 @@ KFR_INTRINSIC vec<T, N> div(const vec<T, N>& x, const vec<T, N>& y) { KFR_COMPONENTWISE_RET(result[i] = x[i] / y[i]); } +template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)> +KFR_INTRINSIC vec<T, N> mod(const vec<T, N>& x, const vec<T, N>& y) +{ + KFR_COMPONENTWISE_RET(result[i] = x[i] % y[i]); +} #define KFR_HANDLE_VEC_SCA(fn) \ template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)> \ @@ -1636,6 +1580,7 @@ KFR_HANDLE_VEC_SCA(add) KFR_HANDLE_VEC_SCA(sub) KFR_HANDLE_VEC_SCA(mul) KFR_HANDLE_VEC_SCA(div) +KFR_HANDLE_VEC_SCA(mod) KFR_HANDLE_VEC_SCA(band) KFR_HANDLE_VEC_SCA(bor) KFR_HANDLE_VEC_SCA(bxor) diff --git a/include/kfr/simd/impl/function.hpp b/include/kfr/simd/impl/function.hpp @@ -38,7 +38,7 @@ inline namespace CMT_ARCH_NAME template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>)> \ KFR_INTRINSIC vec<flt_type<T>, N> fn(const vec<T, N>& a) CMT_NOEXCEPT \ { \ - return intrinsics::fn(elemcast<flt_type<T>>(a)); \ + return intrinsics::fn(promoteto<flt_type<T>>(a)); \ } #define KFR_HANDLE_SCALAR(fn) \ diff --git a/include/kfr/simd/impl/logical.hpp b/include/kfr/simd/impl/logical.hpp @@ -23,8 +23,8 @@ #pragma once #include "../abs.hpp" -#include "function.hpp" #include "../operators.hpp" +#include "function.hpp" namespace kfr { diff --git a/include/kfr/simd/impl/min_max.hpp b/include/kfr/simd/impl/min_max.hpp @@ -23,9 +23,9 @@ #pragma once #include "../abs.hpp" +#include "../operators.hpp" #include "../select.hpp" #include "function.hpp" -#include "../operators.hpp" namespace kfr { diff --git a/include/kfr/simd/impl/operators.hpp b/include/kfr/simd/impl/operators.hpp @@ -54,87 +54,31 @@ namespace intrinsics KFR_ENABLE_IF(is_simd_type<C>)> \ KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<vec<T1, N1>, N2>& x, const vec<vec<T2, N1>, N2>& y) \ { \ - return fn(innercast<C>(x.flatten()), innercast<C>(y.flatten())).v; \ + return fn(broadcastto<C>(x.flatten()), broadcastto<C>(y.flatten())).v; \ } \ template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>, \ KFR_ENABLE_IF(is_simd_type<C>)> \ KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<vec<T1, N1>, N2>& x, const T2& y) \ { \ - return fn(innercast<C>(x.flatten()), innercast<C>(y)).v; \ + return fn(broadcastto<C>(x.flatten()), broadcastto<C>(y)).v; \ } \ template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>, \ KFR_ENABLE_IF(is_simd_type<C>)> \ KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<vec<T1, N1>, N2>& x, const vec<T2, N1>& y) \ { \ - return fn(innercast<C>(x.flatten()), repeat<N2>(innercast<C>(y.flatten()))).v; \ + return fn(broadcastto<C>(x.flatten()), repeat<N2>(broadcastto<C>(y.flatten()))).v; \ } \ template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>, \ KFR_ENABLE_IF(is_simd_type<C>)> \ KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const T1& x, const vec<vec<T2, N1>, N2>& y) \ { \ - return fn(innercast<C>(x), innercast<C>(y.flatten())).v; \ + return fn(broadcastto<C>(x), broadcastto<C>(y.flatten())).v; \ } \ template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>, \ KFR_ENABLE_IF(is_simd_type<C>)> \ KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<T1, N1>& x, const vec<vec<T2, N1>, N2>& y) \ { \ - return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \ - } - -#define KFR_VECVECVEC_OP1(fn) \ - template <typename T1, size_t N1, size_t N2, size_t N3> \ - KFR_INTRINSIC vec<vec<vec<T1, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x) \ - { \ - return fn(x.flatten()).v; \ - } - -#define KFR_VECVECVEC_OP2(fn) \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \ - const vec<vec<vec<T2, N1>, N2>, N3>& y) \ - { /* VVV @ VVV */ \ - return fn(innercast<C>(x.flatten()), innercast<C>(y.flatten())).v; \ - } \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \ - const vec<vec<T2, N1>, N2>& y) \ - { /* VVV @ VV */ \ - return fn(innercast<C>(x.flatten()), repeat<N3>(innercast<C>(y.flatten()))).v; \ - } \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<T1, N1>, N2>& x, \ - const vec<vec<vec<T2, N1>, N2>, N3>& y) \ - { /* VV @ VVV */ \ - return fn(repeat<N3>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \ - } \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, const T2& y) \ - { /* VVV @ S */ \ - return fn(innercast<C>(x.flatten()), innercast<C>(y)).v; \ - } \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, \ - const vec<T2, N1>& y) \ - { /* VVV @ V */ \ - return fn(innercast<C>(x.flatten()), repeat<N2>(innercast<C>(y.flatten()))).v; \ - } \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const T1& x, const vec<vec<vec<T2, N1>, N2>, N3>& y) \ - { /* S @ VVV */ \ - return fn(innercast<C>(x), innercast<C>(y.flatten())).v; \ - } \ - template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>, \ - KFR_ENABLE_IF(is_simd_type<C>)> \ - KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<T1, N1>& x, \ - const vec<vec<vec<T2, N1>, N2>, N3>& y) \ - { /* V @ VVV */ \ - return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v; \ + return fn(repeat<N2>(broadcastto<C>(x.flatten())), broadcastto<C>(y.flatten())).v; \ } KFR_VECVEC_OP1(neg) @@ -143,20 +87,11 @@ KFR_VECVEC_OP2(add) KFR_VECVEC_OP2(sub) KFR_VECVEC_OP2(mul) KFR_VECVEC_OP2(div) +KFR_VECVEC_OP2(mod) KFR_VECVEC_OP2(band) KFR_VECVEC_OP2(bor) KFR_VECVEC_OP2(bxor) -KFR_VECVECVEC_OP1(neg) -KFR_VECVECVEC_OP1(bnot) -KFR_VECVECVEC_OP2(add) -KFR_VECVECVEC_OP2(sub) -KFR_VECVECVEC_OP2(mul) -KFR_VECVECVEC_OP2(div) -KFR_VECVECVEC_OP2(band) -KFR_VECVECVEC_OP2(bor) -KFR_VECVECVEC_OP2(bxor) - } // namespace intrinsics } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/simd/impl/round.hpp b/include/kfr/simd/impl/round.hpp @@ -22,9 +22,9 @@ */ #pragma once -#include "function.hpp" #include "../operators.hpp" #include "abs.hpp" +#include "function.hpp" namespace kfr { @@ -140,48 +140,48 @@ constexpr inline f32 fp_precision_limit<f32> = 16777216.0f; template <size_t N> KFR_INTRINSIC vec<f32, N> floor(const vec<f32, N>& x) { - vec<f32, N> t = innercast<f32>(innercast<i32>(x)); + vec<f32, N> t = broadcastto<f32>(broadcastto<i32>(x)); return select(abs(x) >= fp_precision_limit<f32>, x, t - select(x < t, 1.f, 0.f)); } template <size_t N> KFR_INTRINSIC vec<f64, N> floor(const vec<f64, N>& x) { - vec<f64, N> t = innercast<f64>(innercast<i64>(x)); + vec<f64, N> t = broadcastto<f64>(broadcastto<i64>(x)); return select(abs(x) >= fp_precision_limit<f64>, x, t - select(x < t, 1., 0.)); } template <size_t N> KFR_INTRINSIC vec<f32, N> ceil(const vec<f32, N>& x) { - vec<f32, N> t = innercast<f32>(innercast<i32>(x)); + vec<f32, N> t = broadcastto<f32>(broadcastto<i32>(x)); return select(abs(x) >= fp_precision_limit<f32>, x, t + select(x > t, 1.f, 0.f)); } template <size_t N> KFR_INTRINSIC vec<f64, N> ceil(const vec<f64, N>& x) { - vec<f64, N> t = innercast<f64>(innercast<i64>(x)); + vec<f64, N> t = broadcastto<f64>(broadcastto<i64>(x)); return select(abs(x) >= fp_precision_limit<f64>, x, t + select(x > t, 1., 0.)); } template <size_t N> KFR_INTRINSIC vec<f32, N> round(const vec<f32, N>& x) { return select(abs(x) >= fp_precision_limit<f32>, x, - innercast<f32>(innercast<i32>(x + mulsign(broadcast<N>(0.5f), x)))); + broadcastto<f32>(broadcastto<i32>(x + mulsign(broadcast<N>(0.5f), x)))); } template <size_t N> KFR_INTRINSIC vec<f64, N> round(const vec<f64, N>& x) { return select(abs(x) >= fp_precision_limit<f64>, x, - innercast<f64>(innercast<i64>(x + mulsign(broadcast<N>(0.5), x)))); + broadcastto<f64>(broadcastto<i64>(x + mulsign(broadcast<N>(0.5), x)))); } template <size_t N> KFR_INTRINSIC vec<f32, N> trunc(const vec<f32, N>& x) { - return select(abs(x) >= fp_precision_limit<f32>, x, innercast<f32>(innercast<i32>(x))); + return select(abs(x) >= fp_precision_limit<f32>, x, broadcastto<f32>(broadcastto<i32>(x))); } template <size_t N> KFR_INTRINSIC vec<f64, N> trunc(const vec<f64, N>& x) { - return select(abs(x) >= fp_precision_limit<f64>, x, innercast<f64>(innercast<i64>(x))); + return select(abs(x) >= fp_precision_limit<f64>, x, broadcastto<f64>(broadcastto<i64>(x))); } template <size_t N> KFR_INTRINSIC vec<f32, N> fract(const vec<f32, N>& x) @@ -224,22 +224,22 @@ KFR_INTRINSIC vec<T, N> fract(const vec<T, N>&) template <typename T, size_t N, typename IT = itype<T>> KFR_INTRINSIC vec<IT, N> ifloor(const vec<T, N>& value) { - return innercast<IT>(floor(value)); + return broadcastto<IT>(floor(value)); } template <typename T, size_t N, typename IT = itype<T>> KFR_INTRINSIC vec<IT, N> iceil(const vec<T, N>& value) { - return innercast<IT>(ceil(value)); + return broadcastto<IT>(ceil(value)); } template <typename T, size_t N, typename IT = itype<T>> KFR_INTRINSIC vec<IT, N> itrunc(const vec<T, N>& value) { - return innercast<IT>(trunc(value)); + return broadcastto<IT>(trunc(value)); } template <typename T, size_t N, typename IT = itype<T>> KFR_INTRINSIC vec<IT, N> iround(const vec<T, N>& value) { - return innercast<IT>(round(value)); + return broadcastto<IT>(round(value)); } KFR_HANDLE_SCALAR(floor) diff --git a/include/kfr/simd/impl/saturation.hpp b/include/kfr/simd/impl/saturation.hpp @@ -22,9 +22,9 @@ */ #pragma once +#include "../operators.hpp" #include "../select.hpp" #include "function.hpp" -#include "../operators.hpp" namespace kfr { diff --git a/include/kfr/simd/impl/select.hpp b/include/kfr/simd/impl/select.hpp @@ -22,8 +22,8 @@ */ #pragma once -#include "function.hpp" #include "../operators.hpp" +#include "function.hpp" namespace kfr { diff --git a/include/kfr/simd/impl/specializations.i b/include/kfr/simd/impl/specializations.hpp diff --git a/include/kfr/simd/operators.hpp b/include/kfr/simd/operators.hpp @@ -36,39 +36,42 @@ inline namespace CMT_ARCH_NAME { #define KFR_VEC_OPERATOR1(op, fn) \ - template <typename T, size_t N> \ + template <typename T, size_t N /* , KFR_ENABLE_IF(!is_vec<T>) */> \ constexpr KFR_INTRINSIC vec<T, N> operator op(const vec<T, N>& x) \ { \ return intrinsics::fn(x); \ } #define KFR_VEC_OPERATOR2(op, asgnop, fn) \ - template <typename T1, typename T2, size_t N> \ + template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)> \ constexpr KFR_INTRINSIC vec<T1, N>& operator asgnop(vec<T1, N>& x, const vec<T2, N>& y) \ { \ - x = intrinsics::fn(x, elemcast<T1>(y)); \ + x = intrinsics::fn(x, promoteto<T1>(y)); \ return x; \ } \ - template <typename T1, typename T2, size_t N> \ + template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(1 + vec_rank<T1> > vec_rank<T2>)> \ constexpr KFR_INTRINSIC vec<T1, N>& operator asgnop(vec<T1, N>& x, const T2& y) \ { \ x = intrinsics::fn(x, T1(y)); \ return x; \ } \ - template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>> \ + template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(1 + vec_rank<T1> > vec_rank<T2>)> \ constexpr KFR_INTRINSIC vec<C, N> operator op(const vec<T1, N>& x, const T2& y) \ { \ - return intrinsics::fn(elemcast<C>(x), C(y)); \ + return intrinsics::fn(promoteto<C>(x), C(y)); \ } \ - template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>> \ + template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(vec_rank<T1> < 1 + vec_rank<T2>)> \ constexpr KFR_INTRINSIC vec<C, N> operator op(const T1& x, const vec<T2, N>& y) \ { \ - return intrinsics::fn(C(x), elemcast<C>(y)); \ + return intrinsics::fn(C(x), promoteto<C>(y)); \ } \ - template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>> \ + template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)> \ constexpr KFR_INTRINSIC vec<C, N> operator op(const vec<T1, N>& x, const vec<T2, N>& y) \ { \ - return intrinsics::fn(elemcast<C>(x), elemcast<C>(y)); \ + return intrinsics::fn(promoteto<C>(x), promoteto<C>(y)); \ } #define KFR_VEC_SHIFT_OPERATOR(op, asgnop, fn) \ @@ -78,43 +81,46 @@ inline namespace CMT_ARCH_NAME x = intrinsics::fn(x, y); \ return x; \ } \ - template <typename T1, typename T2, size_t N> \ + template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)> \ constexpr KFR_INTRINSIC vec<T1, N>& operator asgnop(vec<T1, N>& x, const vec<T2, N>& y) \ { \ - x = intrinsics::fn(x, elemcast<utype<T1>>(y)); \ + x = intrinsics::fn(x, promoteto<utype<T1>>(y)); \ return x; \ } \ - template <typename T, size_t N> \ - constexpr KFR_INTRINSIC vec<T, N> operator op(const vec<T, N>& x, unsigned y) \ + template <typename T1, size_t N> \ + constexpr KFR_INTRINSIC vec<T1, N> operator op(const vec<T1, N>& x, unsigned y) \ { \ return intrinsics::fn(x, y); \ } \ - template <typename T, typename T2, size_t N> \ - constexpr KFR_INTRINSIC vec<T, N> operator op(const T& x, const vec<T2, N>& y) \ + template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> < 1 + vec_rank<T2>)> \ + constexpr KFR_INTRINSIC vec<T1, N> operator op(const T1& x, const vec<T2, N>& y) \ { \ - return intrinsics::fn(innercast<T>(x), elemcast<utype<T>>(y)); \ + return intrinsics::fn(broadcastto<T1>(x), promoteto<utype<T1>>(y)); \ } \ - template <typename T, typename T2, size_t N> \ - constexpr KFR_INTRINSIC vec<T, N> operator op(const vec<T, N>& x, const vec<T2, N>& y) \ + template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)> \ + constexpr KFR_INTRINSIC vec<T1, N> operator op(const vec<T1, N>& x, const vec<T2, N>& y) \ { \ - return intrinsics::fn(x, elemcast<utype<T>>(y)); \ + return intrinsics::fn(x, promoteto<utype<T1>>(y)); \ } #define KFR_VEC_CMP_OPERATOR(op, fn) \ - template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>> \ + template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(1 + vec_rank<T1> > vec_rank<T2>)> \ constexpr KFR_INTRINSIC mask<C, N> operator op(const vec<T1, N>& x, const T2& y) \ { \ - return intrinsics::fn(elemcast<C>(x), vec<C, N>(y)).asmask(); \ + return intrinsics::fn(promoteto<C>(x), vec<C, N>(y)).asmask(); \ } \ - template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>> \ + template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(vec_rank<T1> < 1 + vec_rank<T2>)> \ constexpr KFR_INTRINSIC mask<C, N> operator op(const T1& x, const vec<T2, N>& y) \ { \ - return intrinsics::fn(vec<C, N>(x), elemcast<C>(y)).asmask(); \ + return intrinsics::fn(vec<C, N>(x), promoteto<C>(y)).asmask(); \ } \ - template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>> \ + template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, \ + KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)> \ constexpr KFR_INTRINSIC mask<C, N> operator op(const vec<T1, N>& x, const vec<T2, N>& y) \ { \ - return intrinsics::fn(elemcast<C>(x), elemcast<C>(y)).asmask(); \ + return intrinsics::fn(promoteto<C>(x), promoteto<C>(y)).asmask(); \ } KFR_VEC_OPERATOR1(-, neg) @@ -124,6 +130,7 @@ KFR_VEC_OPERATOR2(+, +=, add) KFR_VEC_OPERATOR2(-, -=, sub) KFR_VEC_OPERATOR2(*, *=, mul) KFR_VEC_OPERATOR2(/, /=, div) +KFR_VEC_OPERATOR2(%, %=, mod) KFR_VEC_OPERATOR2(&, &=, band) KFR_VEC_OPERATOR2(|, |=, bor) @@ -140,7 +147,7 @@ KFR_VEC_CMP_OPERATOR(<, lt) template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>, KFR_ENABLE_IF(sizeof(T1) == sizeof(T2))> -KFR_INTRINSIC mask<C, N> operator&(const mask<T1, N>& x, const mask<T2, N>& y)CMT_NOEXCEPT +KFR_INTRINSIC mask<C, N> operator&(const mask<T1, N>& x, const mask<T2, N>& y) CMT_NOEXCEPT { return mask<C, N>((bitcast<C>(vec<T1, N>(x.v)) & bitcast<C>(vec<T2, N>(y.v))).v); } @@ -433,6 +440,13 @@ KFR_INTRINSIC Tout div(const T1& x, const T2& y) } KFR_FN(div) +/// Modulo +template <typename T1, typename T2, typename Tout = common_type<T1, T2>> +KFR_INTRINSIC Tout mod(const T1& x, const T2& y) +{ + return static_cast<Tout>(x) % static_cast<Tout>(y); +} +KFR_FN(mod) /// Remainder template <typename T1, typename T2, typename Tout = common_type<T1, T2>> KFR_INTRINSIC Tout rem(const T1& x, const T2& y) diff --git a/include/kfr/simd/read_write.hpp b/include/kfr/simd/read_write.hpp @@ -26,6 +26,7 @@ #pragma once #include "impl/read_write.hpp" +#include <array> namespace kfr { @@ -95,9 +96,9 @@ KFR_INTRINSIC vec<T, Nout * groupsize> gather_stride(const T* base, size_t strid if constexpr (Nout > 2) { constexpr size_t Nlow = prev_poweroftwo(Nout - 1); - return concat( - internal::gather_stride_s<Nlow, groupsize>(base, stride, csizeseq<Nlow>), - internal::gather_stride_s<Nout - Nlow, groupsize>(base + Nlow * stride, stride, csizeseq<Nout - Nlow>)); + return concat(internal::gather_stride_s<Nlow, groupsize>(base, stride, csizeseq<Nlow>), + internal::gather_stride_s<Nout - Nlow, groupsize>(base + Nlow * stride, stride, + csizeseq<Nout - Nlow>)); } else return internal::gather_stride_s<Nout, groupsize>(base, stride, csizeseq<Nout>); @@ -117,7 +118,7 @@ KFR_INTRINSIC vec<T, N * groupsize> gather_helper(const T* base, const vec<IT, N { return concat(read<groupsize>(base + groupsize * offset[Indices])...); } -} +} // namespace internal template <size_t groupsize = 1, typename T, size_t N, typename IT> KFR_INTRINSIC vec<T, N * groupsize> gather(const T* base, const vec<IT, N>& offset) { @@ -185,6 +186,12 @@ struct stride_pointer<const T, groupsize> } }; +template <typename T, size_t N> +KFR_INTRINSIC vec<T, N> v(const std::array<T, N>& a) +{ + return read<N>(a.data()); +} + template <typename T> constexpr T partial_masks[] = { constants<T>::allones(), constants<T>::allones(), diff --git a/include/kfr/simd/select.hpp b/include/kfr/simd/select.hpp @@ -43,7 +43,7 @@ template <typename T1, size_t N, typename T2, typename T3, KFR_ENABLE_IF(is_nume KFR_INTRINSIC vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y) { static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types"); - return intrinsics::select(bitcast<Tout>(m.asvec()).asmask(), innercast<Tout>(x), innercast<Tout>(y)); + return intrinsics::select(bitcast<Tout>(m.asvec()).asmask(), broadcastto<Tout>(x), broadcastto<Tout>(y)); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/simd/shuffle.hpp b/include/kfr/simd/shuffle.hpp @@ -630,6 +630,6 @@ KFR_FN(onoff) } // namespace CMT_ARCH_NAME } // namespace kfr #define KFR_SHUFFLE_SPECIALIZATIONS 1 -#include "impl/specializations.i" +#include "impl/specializations.hpp" -CMT_PRAGMA_MSVC(warning(pop)) -\ No newline at end of file +CMT_PRAGMA_MSVC(warning(pop)) diff --git a/include/kfr/simd/vec.hpp b/include/kfr/simd/vec.hpp @@ -93,6 +93,15 @@ struct alignas(next_poweroftwo(sizeof(T)) * next_poweroftwo(N)) portable_vec { static constexpr vec_shape<T, N> shape() CMT_NOEXCEPT { return {}; } + constexpr portable_vec() = default; + + constexpr portable_vec(T value) : portable_vec(csizeseq<N>, value) {} + + template <typename... Ts, size_t NN = N, KFR_ENABLE_IF(NN >= 2)> + constexpr portable_vec(T v1, T v2, Ts... args) : elem{ v1, v2, static_cast<T>(args)... } + { + } + static_assert(N > 0 && N <= 1024, "Invalid vector size"); static_assert(is_simd_type<T> || !compound_type_traits<T>::is_scalar, "Invalid vector type"); @@ -104,8 +113,18 @@ struct alignas(next_poweroftwo(sizeof(T)) * next_poweroftwo(N)) portable_vec T elem[N]; - T operator[](size_t index) const { return elem[index]; } - T& operator[](size_t index) { return elem[index]; } + constexpr T operator[](size_t index) const { return elem[index]; } + constexpr T& operator[](size_t index) { return elem[index]; } + constexpr T front() const { return elem[0]; } + constexpr T& front() { return elem[0]; } + constexpr T back() const { return elem[N - 1]; } + constexpr T& back() { return elem[N - 1]; } + +private: + template <size_t... indices> + constexpr portable_vec(csizes_t<indices...>, T value) : elem{ (static_cast<void>(indices), value)... } + { + } }; inline namespace CMT_ARCH_NAME @@ -114,6 +133,9 @@ inline namespace CMT_ARCH_NAME template <typename T, size_t N> struct vec; +template <typename T> +constexpr inline size_t vec_rank = 0; + template <typename T, size_t N> struct vec_halves { @@ -129,12 +151,22 @@ struct vec_halves<T, 1> namespace internal { +enum class conv_t +{ + promote, + broadcast, +}; // scalar to scalar -template <typename To, typename From> +template <size_t ToRank, size_t FromRank, typename To, typename From, conv_t conv> struct conversion { - static_assert(is_convertible<From, To>, ""); +}; + +template <typename To, typename From, conv_t conv> +struct conversion<0, 0, To, From, conv> +{ + static_assert(is_convertible<From, To>); static To cast(const From& value) { return value; } }; @@ -163,14 +195,6 @@ struct compoundcast<vec<vec<T, N1>, N2>> static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x.v; } }; -template <typename T, size_t N1, size_t N2, size_t N3> -struct compoundcast<vec<vec<vec<T, N1>, N2>, N3>> -{ - static vec<T, N1 * N2 * N3> to_flat(const vec<vec<vec<T, N1>, N2>, N3>& x) { return x.v; } - - static vec<vec<vec<T, N1>, N2>, N3> from_flat(const vec<T, N1 * N2 * N3>& x) { return x.v; } -}; - template <typename T, size_t N_> inline constexpr size_t vec_alignment = const_max(alignof(intrinsics::simd<typename compound_type_traits<T>::deep_subtype, @@ -224,7 +248,7 @@ struct alignas(internal::vec_alignment<T, N_>) vec // default KFR_MEM_INTRINSIC constexpr vec() CMT_NOEXCEPT {} -#if defined(_MSC_VER) && !defined (__clang__) +#if defined(_MSC_VER) && !defined(__clang__) // MSVC Internal Compiler Error workaround // copy KFR_MEM_INTRINSIC constexpr vec(const vec& value) CMT_NOEXCEPT : v(value.v) {} @@ -294,19 +318,22 @@ struct alignas(internal::vec_alignment<T, N_>) vec } // from lambda - template <typename Fn, KFR_ENABLE_IF(is_callable<Fn, size_t>)> + template <typename Fn, KFR_ENABLE_IF(std::is_invocable_r_v<T, Fn, size_t>)> KFR_MEM_INTRINSIC vec(Fn&& fn) CMT_NOEXCEPT { for (size_t i = 0; i < N; ++i) { - set(i, fn(i)); + auto v = fn(i); + set(i, v); } } template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> && !(compound_type_traits<T>::is_scalar && !is_bit<U>))> KFR_MEM_INTRINSIC vec(const vec<U, N>& x) CMT_NOEXCEPT - : v(internal::conversion<vec<T, N>, vec<U, N>>::cast(x).v) + : v(internal::conversion<vec_rank<T> + 1, vec_rank<U> + 1, vec<T, N>, vec<U, N>, + internal::conv_t::promote>::cast(x) + .v) { } @@ -603,8 +630,8 @@ constexpr KFR_INTRINSIC vec<To, N> builtin_convertvector(const vec<From, N>& val } // vector to vector -template <typename To, typename From, size_t N, size_t N2> -struct conversion<vec<To, N>, vec<From, N2>> +template <typename To, typename From, size_t N, size_t N2, conv_t conv> +struct conversion<1, 1, vec<To, N>, vec<From, N2>, conv> { static_assert(N == N2, ""); static_assert(!is_compound<To>, ""); @@ -614,8 +641,8 @@ struct conversion<vec<To, N>, vec<From, N2>> }; // scalar to vector -template <typename To, typename From, size_t N> -struct conversion<vec<To, N>, From> +template <typename To, typename From, size_t N, conv_t conv> +struct conversion<1, 0, vec<To, N>, From, conv> { static_assert(is_convertible<From, To>, ""); @@ -655,13 +682,6 @@ constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> cast(const vec<vec<Tin, N1>, N2>& return vec<vec<Tout, N1>, N2>(value); } -template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> cast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) - CMT_NOEXCEPT -{ - return vec<vec<vec<Tout, N1>, N2>, N3>(value); -} - template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)> constexpr KFR_INTRINSIC const vec<Tin, N>& cast(const vec<Tin, N>& value) CMT_NOEXCEPT { @@ -674,85 +694,67 @@ constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& cast(const vec<vec<Tin, N1> return value; } -template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& cast( - const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT -{ - return value; -} - // template <typename To, typename From, typename Tout = typename compound_type_traits<From>::template deep_rebind<To>> -constexpr KFR_INTRINSIC Tout innercast(const From& value) CMT_NOEXCEPT +constexpr KFR_INTRINSIC Tout broadcastto(const From& value) CMT_NOEXCEPT { return static_cast<Tout>(value); } template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<Tout, N> innercast(const vec<Tin, N>& value) CMT_NOEXCEPT +constexpr KFR_INTRINSIC vec<Tout, N> broadcastto(const vec<Tin, N>& value) CMT_NOEXCEPT { - return vec<Tout, N>(value); + return internal::conversion<vec_rank<Tout> + 1, 1, vec<Tout, N>, vec<Tin, N>, + internal::conv_t::broadcast>::cast(value); } template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> innercast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT +constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> broadcastto(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT { - return vec<vec<Tout, N1>, N2>(value); -} - -template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> innercast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) - CMT_NOEXCEPT -{ - return vec<vec<vec<Tout, N1>, N2>, N3>(value); + return internal::conversion<vec_rank<Tout> + 2, 2, vec<vec<Tout, N1>, N2>, vec<vec<Tin, N1>, N2>, + internal::conv_t::broadcast>::cast(value); } template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC const vec<Tin, N>& innercast(const vec<Tin, N>& value) CMT_NOEXCEPT +constexpr KFR_INTRINSIC const vec<Tin, N>& broadcastto(const vec<Tin, N>& value) CMT_NOEXCEPT { return value; } template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& innercast(const vec<vec<Tin, N1>, N2>& value) +constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& broadcastto(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT { return value; } -template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& innercast( - const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT -{ - return value; -} - // - -template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<Tout, N> elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT +template <typename Tout, typename Tin> +constexpr KFR_INTRINSIC Tout promoteto(const Tin& value) CMT_NOEXCEPT { - return vec<Tout, N>(value); -} - -template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC const vec<Tin, N>& elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT -{ - return value; + return static_cast<Tout>(value); } -template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<Tout, N2> elemcast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT +template <typename Tout, typename Tin, size_t N> +constexpr KFR_INTRINSIC vec<Tout, N> promoteto(const vec<Tin, N>& value) CMT_NOEXCEPT { - return vec<Tout, N2>(value); + if constexpr (std::is_same_v<Tin, Tout>) + return value; + else + return internal::conversion<vec_rank<Tout> + 1, 1, vec<Tout, N>, vec<Tin, N>, + internal::conv_t::promote>::cast(value); } -template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)> -constexpr KFR_INTRINSIC vec<Tout, N3> elemcast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT +template <typename Tout, typename Tin, size_t N1, size_t N2> +constexpr KFR_INTRINSIC vec<Tout, N2> promoteto(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT { - return vec<Tout, N3>(value); + if constexpr (std::is_same_v<Tin, Tout>) + return value; + else + return internal::conversion<vec_rank<Tout> + 1, 2, vec<Tout, N2>, vec<vec<Tin, N1>, N2>, + internal::conv_t::promote>::cast(value); } template <typename To, typename From> @@ -1226,7 +1228,8 @@ void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap { testo::matrix( named("value") = special_values(), named("type") = test_catogories::types(cat), - [&](special_value value, auto type) { + [&](special_value value, auto type) + { using T = typename decltype(type)::type; if (isapplicable(ctype<T>, value)) { @@ -1239,7 +1242,8 @@ void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap const auto fn_x = fn(x); const auto ref_x = apply(reffn, x); ::testo::active_test()->check(testo::deep_is_equal(ref_x, fn_x), - as_string(fn_x, " == ", ref_x), "fn(x) == apply(reffn, x)"); + as_string(fn_x, " == ", ref_x), "fn(x) == apply(reffn, x)", + __FILE__, __LINE__); // CHECK(fn(x) == apply(reffn, x)); } }); @@ -1253,22 +1257,29 @@ void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap }); } -template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>> -void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{}) +template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>, + typename IsDefined = fn_return_constant<bool, true>> +void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{}, + IsDefined&& = IsDefined{}) { + constexpr IsDefined isdefined{}; + testo::matrix(named("value1") = special_values(), // named("value2") = special_values(), named("type") = test_catogories::types(cat), [&](special_value value1, special_value value2, auto type) { using T = typename decltype(type)::type; - const T x1(value1); - const T x2(value2); - if (isapplicable(ctype<T>, value1, value2)) + if constexpr (isdefined(ctype<T>)) { - CHECK(is_same<decltype(fn(x1, x2)), - typename compound_type_traits<T>::template rebind<decltype(reffn( - std::declval<subtype<T>>(), std::declval<subtype<T>>()))>>); - CHECK(fn(x1, x2) == apply(reffn, x1, x2)); + const T x1(value1); + const T x2(value2); + if (isapplicable(ctype<T>, value1, value2)) + { + CHECK(is_same<decltype(fn(x1, x2)), + typename compound_type_traits<T>::template rebind<decltype(reffn( + std::declval<subtype<T>>(), std::declval<subtype<T>>()))>>); + CHECK(fn(x1, x2) == apply(reffn, x1, x2)); + } } }); @@ -1278,7 +1289,10 @@ void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap using T = typename decltype(type)::type; const T x1 = test_enumerate(T::shape(), csizeseq<T::size()>, 0, 1); const T x2 = test_enumerate(T::shape(), csizeseq<T::size()>, 100, -1); - CHECK(fn(x1, x2) == apply(reffn, x1, x2)); + if constexpr (isdefined(ctype<T>)) + { + CHECK(fn(x1, x2) == apply(reffn, x1, x2)); + } }); } @@ -1286,20 +1300,20 @@ void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap namespace internal { -// vector to vector<vector> -template <typename To, typename From, size_t N> -struct conversion<vec<bit<To>, N>, vec<bit<From>, N>> +// mask to mask +template <typename To, typename From, size_t N, conv_t conv> +struct conversion<1, 1, vec<bit<To>, N>, vec<bit<From>, N>, conv> { static vec<bit<To>, N> cast(const vec<bit<From>, N>& value) { - return vec<To, N>::frombits(innercast<itype<To>>(vec<itype<From>, N>::frombits(value.asvec()))) + return vec<To, N>::frombits(broadcastto<itype<To>>(vec<itype<From>, N>::frombits(value.asvec()))) .asmask(); } }; // vector to vector<vector> template <typename To, typename From, size_t N1, size_t N2, size_t Ns1> -struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>> +struct conversion<2, 1, vec<vec<To, N1>, N2>, vec<From, Ns1>, conv_t::broadcast> { static_assert(N1 == Ns1, ""); static_assert(!is_compound<To>, ""); @@ -1308,30 +1322,31 @@ struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>> static vec<vec<To, N1>, N2> cast(const vec<From, N1>& value) { return vec<vec<To, N1>, N2>::from_flatten( - kfr::innercast<To>(value.flatten()) + kfr::broadcastto<To>(value.flatten()) .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>)); } }; -// vector to vector<vector<vector>> -template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t Ns1> -struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<From, Ns1>> +// vector to vector<vector> +template <typename To, typename From, size_t N1, size_t N2, size_t Ns1> +struct conversion<2, 1, vec<vec<To, N1>, N2>, vec<From, Ns1>, conv_t::promote> { - static_assert(N1 == Ns1, ""); + static_assert(N2 == Ns1, ""); static_assert(!is_compound<To>, ""); static_assert(!is_compound<From>, ""); - static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<From, N1>& value) + static vec<vec<To, N1>, N2> cast(const vec<From, N2>& value) { - return vec<vec<vec<To, N1>, N2>, N3>::from_flatten( - kfr::innercast<To>(value.flatten()) - .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>)); + return vec<vec<To, N1>, N2>::from_flatten( + kfr::broadcastto<To>(value.flatten()) + .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> / csize<vec<From, N1>::scalar_size()> % + csize<N2>)); } }; // vector<vector> to vector<vector> -template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2> -struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>> +template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2, conv_t conv> +struct conversion<2, 2, vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>, conv> { static_assert(N1 == NN1, ""); static_assert(N2 == NN2, ""); @@ -1340,25 +1355,10 @@ struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>> static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value) { - return vec<vec<To, N1>, N2>::from_flatten(kfr::innercast<To>(value.flatten())); + return vec<vec<To, N1>, N2>::from_flatten(kfr::broadcastto<To>(value.flatten())); } }; -// vector<vector<vector>> to vector<vector<vector>> -template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t NN1, size_t NN2, size_t NN3> -struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<vec<vec<From, NN1>, NN2>, NN3>> -{ - static_assert(N1 == NN1, ""); - static_assert(N2 == NN2, ""); - static_assert(N3 == NN3, ""); - static_assert(!is_compound<To>, ""); - static_assert(!is_compound<From>, ""); - - static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<vec<vec<From, N1>, N2>, N3>& value) - { - return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(kfr::innercast<To>(value.flatten())); - } -}; } // namespace internal template <typename T, size_t N1, size_t N2 = N1> @@ -1417,22 +1417,33 @@ struct vecx_t<T, N1, N2> using type = vec<vec<T, N1>, N2>; }; -template <typename T, size_t N1, size_t N2, size_t N3> -struct vecx_t<T, N1, N2, N3> -{ - using type = vec<vec<vec<T, N1>, N2>, N3>; -}; } // namespace internal template <typename T, size_t... Ns> using vecx = typename internal::vecx_t<T, Ns...>::type; +template <typename T, size_t N> +constexpr inline size_t vec_rank<vec<T, N>> = 1; + +template <typename T, size_t N1, size_t N2> +constexpr inline size_t vec_rank<vec<vec<T, N1>, N2>> = 2; + +template <typename T, size_t N> +KFR_INTRINSIC vec<T, N> v(const portable_vec<T, N>& pv) +{ + return pv; +} + } // namespace CMT_ARCH_NAME template <typename T1, typename T2, size_t N> struct common_type_impl<kfr::vec<T1, N>, kfr::vec<T2, N>> : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type> { }; +template <typename T1, typename T2, size_t N1, size_t N2> +struct common_type_impl<kfr::vec<T1, N1>, kfr::vec<T2, N2>> +{ +}; template <typename T1, typename T2, size_t N> struct common_type_impl<kfr::vec<T1, N>, T2> : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type> diff --git a/sources.cmake b/sources.cmake @@ -27,7 +27,6 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/math_expressions.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/old_basic_expressions.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/random_bits.hpp @@ -35,6 +34,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/base/shape.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/simd_expressions.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/state_holder.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/tensor.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/impl/static_array.hpp @@ -77,14 +77,13 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/special.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/dsp/state_holder.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp ${PROJECT_SOURCE_DIR}/include/kfr/graphics/color.hpp ${PROJECT_SOURCE_DIR}/include/kfr/graphics/geometry.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/graphics/scaled.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/impl/scaled.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp @@ -154,8 +153,316 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/select.hpp ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/simd.hpp ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specialconstants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/intrinsics.h + ${PROJECT_SOURCE_DIR}/include/kfr/testo/assert.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/testo/comparison.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/testo/console_colors.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/testo/double_double.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/testo/testo.hpp +) + + +set( + KFR_SIMD_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/simd/abs.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/clamp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/comparison.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex_type.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/constants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/digitreverse.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/horizontal.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/logical.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/mask.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/min_max.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/platform.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/read_write.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/round.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/saturation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/select.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/shuffle.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/sort.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/types.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/vec.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/abs.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_clang.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_generic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_clang.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_complex.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_generic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/clamp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/function.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/logical.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/min_max.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/read_write.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/round.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/saturation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/select.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/simd.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specialconstants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/intrinsics.h +) + + +set( + KFR_MATH_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/math/asin_acos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/compiletime.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/complex_math.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/gamma.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/hyperbolic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/interpolation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/log_exp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/modzerobessel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/sin_cos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/sqrt.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/tan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/asin_acos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/gamma.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/hyperbolic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/log_exp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/modzerobessel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sin_cos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sqrt.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/tan.hpp +) + + +set( + KFR_BASE_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/filter.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/fraction.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/math_expressions.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/random_bits.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/reduce.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/shape.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/simd_expressions.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/state_holder.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/tensor.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/impl/static_array.hpp +) + + +set( + KFR_DSP_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad_design.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/dcremove.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/delay.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/ebu.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir_design.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fracdelay.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/iir_design.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/mixdown.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/special.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp +) + + +set( + KFR_IO_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_flac.h + ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_mp3.h + ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_wav.h +) + + +set( + KFR_RUNTIME_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid_auto.hpp +) + + +set( + KFR_GRAPHICS_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/color.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/geometry.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/impl/scaled.hpp +) + + +set( + KFR_SRC + ${PROJECT_SOURCE_DIR}/include/kfr/all.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/runtime.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/capi.h + ${PROJECT_SOURCE_DIR}/include/kfr/cident.h + ${PROJECT_SOURCE_DIR}/include/kfr/config.h + ${PROJECT_SOURCE_DIR}/include/kfr/kfr.h + ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/filter.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/fraction.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/math_expressions.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/random_bits.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/reduce.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/shape.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/simd_expressions.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/state_holder.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/tensor.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/impl/static_array.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/array.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/cstring.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/ctti.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/function.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/memory.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/named_arg.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/numeric.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/range.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/result.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/tuple.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/cache.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/convolution.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/data/bitrev.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/data/sincos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/bitrev.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-fft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-impl.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-templates.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/fft-impl.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/fft-templates.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/ft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad_design.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/dcremove.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/delay.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/ebu.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir_design.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fracdelay.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/iir_design.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/mixdown.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/special.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/color.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/geometry.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/graphics/impl/scaled.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_flac.h + ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_mp3.h + ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_wav.h + ${PROJECT_SOURCE_DIR}/include/kfr/math/asin_acos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/compiletime.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/complex_math.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/gamma.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/hyperbolic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/interpolation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/log_exp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/modzerobessel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/sin_cos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/sqrt.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/tan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/asin_acos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/gamma.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/hyperbolic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/log_exp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/modzerobessel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sin_cos.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sqrt.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/tan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid_auto.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/abs.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/clamp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/comparison.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex_type.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/constants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/digitreverse.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/horizontal.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/logical.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/mask.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/min_max.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/platform.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/read_write.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/round.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/saturation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/select.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/shuffle.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/sort.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/types.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/vec.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/abs.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_clang.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_generic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_clang.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_complex.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_generic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/clamp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/function.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/logical.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/min_max.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/read_write.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/round.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/saturation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/select.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/simd.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specialconstants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.hpp ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/intrinsics.h - ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.i ${PROJECT_SOURCE_DIR}/include/kfr/testo/assert.hpp ${PROJECT_SOURCE_DIR}/include/kfr/testo/comparison.hpp ${PROJECT_SOURCE_DIR}/include/kfr/testo/console_colors.hpp @@ -182,17 +489,32 @@ set( set( KFR_UNITTEST_SRC + ${PROJECT_SOURCE_DIR}/tests/unit/base/base.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/base/basic_expressions.cpp ${PROJECT_SOURCE_DIR}/tests/unit/base/conversion.cpp ${PROJECT_SOURCE_DIR}/tests/unit/base/fraction.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/base/generators.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/base/pointer.cpp ${PROJECT_SOURCE_DIR}/tests/unit/base/random.cpp ${PROJECT_SOURCE_DIR}/tests/unit/base/reduce.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/base/shape.cpp ${PROJECT_SOURCE_DIR}/tests/unit/base/tensor.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/biquad.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/biquad_design.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/dsp.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/ebu.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/fir.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/sample_rate_conversion.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/units.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/dsp/window.cpp ${PROJECT_SOURCE_DIR}/tests/unit/graphics/color.cpp ${PROJECT_SOURCE_DIR}/tests/unit/graphics/geometry.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/graphics/graphics.cpp ${PROJECT_SOURCE_DIR}/tests/unit/math/asin_acos.cpp ${PROJECT_SOURCE_DIR}/tests/unit/math/atan.cpp ${PROJECT_SOURCE_DIR}/tests/unit/math/hyperbolic.cpp ${PROJECT_SOURCE_DIR}/tests/unit/math/log_exp.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/math/math.cpp ${PROJECT_SOURCE_DIR}/tests/unit/math/sin_cos.cpp ${PROJECT_SOURCE_DIR}/tests/unit/math/tan.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/abs.cpp @@ -202,6 +524,7 @@ set( ${PROJECT_SOURCE_DIR}/tests/unit/simd/round.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/select.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/shuffle.cpp + ${PROJECT_SOURCE_DIR}/tests/unit/simd/simd.cpp ${PROJECT_SOURCE_DIR}/tests/unit/simd/vec.cpp ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -106,7 +106,8 @@ set(ALL_TESTS_CPP expression_test.cpp intrinsic_test.cpp io_test.cpp - ${KFR_UNITTEST_SRC}) + ${KFR_UNITTEST_SRC} + ) if (KFR_ENABLE_DFT) list(APPEND ALL_TESTS_CPP dft_test.cpp) diff --git a/tests/asm_test.cpp b/tests/asm_test.cpp @@ -143,7 +143,7 @@ using namespace kfr; { \ r = kfr::fn<n, true>(x); \ } \ - KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(vec<ty, n> & __restrict r, \ + KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(vec<ty, n>& __restrict r, \ const ty* __restrict x) \ { \ r = kfr::fn<n, false>(x); \ @@ -154,7 +154,7 @@ using namespace kfr; { \ kfr::fn<true>(p, x); \ } \ - KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(ty * __restrict p, const vec<ty, n>& x) \ + KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(ty* __restrict p, const vec<ty, n>& x) \ { \ kfr::fn<false>(p, x); \ } diff --git a/tests/base_test.cpp b/tests/base_test.cpp @@ -6,8 +6,8 @@ #include <kfr/testo/testo.hpp> -#include <kfr/io.hpp> -#include <kfr/simd.hpp> +// #include <kfr/io.hpp> +#include <kfr/base.hpp> using namespace kfr; @@ -92,23 +92,7 @@ TEST(test_basic) CHECK(inrange(pack(1, 2, 3), 1, 1) == make_mask<int>(true, false, false)); } -TEST(test_gen_expj) -{ - kfr::univector<cbase> v = kfr::truncate(kfr::gen_expj(0.f, constants<float>::pi_s(2) * 0.1f), 1000); - CHECK(rms(cabs(v.slice(990) - - univector<cbase>({ cbase(1., +0.00000000e+00), cbase(0.80901699, +5.87785252e-01), - cbase(0.30901699, +9.51056516e-01), cbase(-0.30901699, +9.51056516e-01), - cbase(-0.80901699, +5.87785252e-01), cbase(-1., +1.22464680e-16), - cbase(-0.80901699, -5.87785252e-01), - cbase(-0.30901699, -9.51056516e-01), cbase(0.30901699, -9.51056516e-01), - cbase(0.80901699, -5.87785252e-01) }))) < 0.00006); // error here depends on vector width - // In most cases error is much lower (less than 0.00001) -} - -TEST(ctti) -{ - CHECK(cometa::type_name<float>() == std::string("float")); -} +TEST(ctti) { CHECK(cometa::type_name<float>() == std::string("float")); } } // namespace CMT_ARCH_NAME diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp @@ -192,9 +192,9 @@ TEST(complex_function_expressions) CHECK(uv3[1] == 2.f); CHECK(uv3[2] == 8.f); CHECK(uv3[3] == 18.f); - testo::assert_is_same<c32, value_type_of<decltype(uv2)>>(); - testo::assert_is_same<f32, value_type_of<decltype(uv3)>>(); - testo::assert_is_same<f32, value_type_of<decltype(real(uv2))>>(); + testo::assert_is_same<c32, expression_value_type<decltype(uv2)>>(); + testo::assert_is_same<f32, expression_value_type<decltype(uv3)>>(); + testo::assert_is_same<f32, expression_value_type<decltype(real(uv2))>>(); } TEST(static_tests) @@ -219,10 +219,6 @@ TEST(static_tests) testo::assert_is_same<ftype<vec<complex<i32>, 4>>, vec<complex<f32>, 4>>(); testo::assert_is_same<ftype<vec<complex<i64>, 8>>, vec<complex<f64>, 8>>(); - testo::assert_is_same<kfr::internal::arg<int>, kfr::internal::expression_scalar<int>>(); - testo::assert_is_same<kfr::internal::arg<complex<int>>, - kfr::internal::expression_scalar<kfr::complex<int>>>(); - testo::assert_is_same<kfr::common_type<complex<int>, double>, complex<double>>(); } } // namespace CMT_ARCH_NAME diff --git a/tests/dsp_test.cpp b/tests/dsp_test.cpp @@ -18,269 +18,6 @@ using namespace kfr; namespace CMT_ARCH_NAME { -struct TestFragment -{ - float gain; // dB - float duration; // seconds - float frequency; // Hz -}; - -struct TestFragmentMultichannel -{ - float gain_L_R; // dB - float gain_C; // dB - float gain_Ls_Rs; // dB - float duration; // seconds - float frequency; // Hz -}; - -template <typename T> -static void ebu_test_stereo(int sample_rate, const std::initializer_list<TestFragment>& fragments, T refM, - T refS, T refI, T refLRA) -{ - ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right }); - - size_t total_length = 0; - for (const TestFragment& f : fragments) - { - total_length += static_cast<size_t>(f.duration * sample_rate); - } - - univector<T> left_right(total_length); - size_t pos = 0; - for (const TestFragment& f : fragments) - { - const size_t len = static_cast<size_t>(f.duration * sample_rate); - left_right.slice(pos, len) = dB_to_amp(f.gain) * sinenorm(phasor<float>(f.frequency, sample_rate)); - pos += len; - } - - for (size_t i = 0; i < total_length / loudness.packet_size(); i++) - { - loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()), - left_right.slice(i * loudness.packet_size(), loudness.packet_size()) }); - } - T M, S, I, RL, RH; - loudness.get_values(M, S, I, RL, RH); - if (!std::isnan(refM)) - CHECK(std::abs(M - refM) < 0.05f); - if (!std::isnan(refS)) - CHECK(std::abs(S - refS) < 0.05f); - if (!std::isnan(refI)) - CHECK(std::abs(I - refI) < 0.05f); - if (!std::isnan(refLRA)) - CHECK(std::abs((RH - RL) - refLRA) < 0.05f); -} - -template <typename T> -static void ebu_test_multichannel(int sample_rate, - const std::initializer_list<TestFragmentMultichannel>& fragments, T refM, - T refS, T refI, T refLRA) -{ - ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right, Speaker::Center, Speaker::LeftSurround, - Speaker::RightSurround }); - - size_t total_length = 0; - for (const TestFragmentMultichannel& f : fragments) - { - total_length += static_cast<size_t>(f.duration * sample_rate); - } - - univector<T> left_right(total_length); - univector<T> center(total_length); - univector<T> surround(total_length); - size_t pos = 0; - for (const TestFragmentMultichannel& f : fragments) - { - const size_t len = static_cast<size_t>(f.duration * sample_rate); - left_right.slice(pos, len) = - dB_to_amp(f.gain_L_R) * sinenorm(phasor<float>(f.frequency, sample_rate)); - center.slice(pos, len) = dB_to_amp(f.gain_C) * sinenorm(phasor<float>(f.frequency, sample_rate)); - surround.slice(pos, len) = - dB_to_amp(f.gain_Ls_Rs) * sinenorm(phasor<float>(f.frequency, sample_rate)); - pos += len; - } - - for (size_t i = 0; i < total_length / loudness.packet_size(); i++) - { - loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()), - left_right.slice(i * loudness.packet_size(), loudness.packet_size()), - center.slice(i * loudness.packet_size(), loudness.packet_size()), - surround.slice(i * loudness.packet_size(), loudness.packet_size()), - surround.slice(i * loudness.packet_size(), loudness.packet_size()) }); - } - T M, S, I, RL, RH; - loudness.get_values(M, S, I, RL, RH); - if (!std::isnan(refM)) - CHECK(std::abs(M - refM) < 0.05f); - if (!std::isnan(refS)) - CHECK(std::abs(S - refS) < 0.05f); - if (!std::isnan(refI)) - CHECK(std::abs(I - refI) < 0.05f); - if (!std::isnan(refLRA)) - CHECK(std::abs((RH - RL) - refLRA) < 0.05f); -} - -TEST(ebu_stereo_1_and_2) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, - named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) { - using T = typename decltype(type)::type; - - ebu_test_stereo<T>(sample_rate, { { -23.f, 20.f, 1000.f } }, -23.f, -23.f, -23.f, NAN); - ebu_test_stereo<T>(sample_rate, { { -33.f, 20.f, 1000.f } }, -33.f, -33.f, -33.f, NAN); - }); -} - -TEST(ebu_stereo_3_4_and_5) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, - named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) { - using T = typename decltype(type)::type; - - ebu_test_stereo<T>( - sample_rate, - { { -36.f, 10.f, 1000.f }, { -23.f, 60.f, 1000.f }, { -36.f, 10.f, 1000.f } }, NAN, - NAN, -23.f, NAN); - ebu_test_stereo<T>(sample_rate, - { { -72.f, 10.f, 1000.f }, - { -36.f, 10.f, 1000.f }, - { -23.f, 60.f, 1000.f }, - { -36.f, 10.f, 1000.f }, - { -72.f, 10.f, 1000.f } }, - NAN, NAN, -23.f, NAN); - }); -} - -TEST(ebu_multichannel_6) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, - named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) { - using T = typename decltype(type)::type; - - ebu_test_multichannel<T>(sample_rate, { { -28.f, -24.f, -30.f, 20.f, 1000.f } }, NAN, - NAN, -23.f, NAN); - }); -} - -TEST(ebu_stereo_9) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, - named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) { - using T = typename decltype(type)::type; - - ebu_test_stereo<T>(sample_rate, - { { -20.f, 1.34f, 1000.f }, - { -30.f, 1.66f, 1000.f }, - { -20.f, 1.34f, 1000.f }, - { -30.f, 1.66f, 1000.f }, - { -20.f, 1.34f, 1000.f }, - { -30.f, 1.66f, 1000.f }, - { -20.f, 1.34f, 1000.f }, - { -30.f, 1.66f, 1000.f }, - { -20.f, 1.34f, 1000.f }, - { -30.f, 1.66f, 1000.f } }, - NAN, -23.f, NAN, NAN); - }); -} - -TEST(ebu_stereo_12) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, - named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) { - using T = typename decltype(type)::type; - - ebu_test_stereo<T>( - sample_rate, - { { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, - { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, - { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f } }, - -23.f, NAN, NAN, NAN); - }); -} - -TEST(ebu_lra_1_2_3_and_4) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, - named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) { - using T = typename decltype(type)::type; - - ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -30.f, 20.f, 1000.f } }, - NAN, NAN, NAN, 10.f); - - ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -15.f, 20.f, 1000.f } }, - NAN, NAN, NAN, 5.f); - - ebu_test_stereo<T>(sample_rate, { { -40.f, 20.f, 1000.f }, { -20.f, 20.f, 1000.f } }, - NAN, NAN, NAN, 20.f); - - ebu_test_stereo<T>(sample_rate, - { { -50.f, 20.f, 1000.f }, - { -35.f, 20.f, 1000.f }, - { -20.f, 20.f, 1000.f }, - { -35.f, 20.f, 1000.f }, - { -50.f, 20.f, 1000.f } }, - NAN, NAN, NAN, 15.f); - }); -} - -TEST(note_to_hertz) -{ - testo::eplison_scope<void> eps(2000); - CHECK(kfr::note_to_hertz(60) == fbase(261.6255653005986346778499935233)); - CHECK(kfr::note_to_hertz(pack(60)) == pack(fbase(261.6255653005986346778499935233))); - - CHECK(kfr::note_to_hertz(69) == fbase(440.0)); - CHECK(kfr::note_to_hertz(pack(69)) == pack(fbase(440))); -} - -TEST(hertz_to_note) -{ - testo::eplison_scope<void> eps(1000); - CHECK(kfr::hertz_to_note(261.6255653005986346778499935233) == fbase(60)); - CHECK(kfr::hertz_to_note(pack(261.6255653005986346778499935233)) == pack(fbase(60))); - - CHECK(kfr::hertz_to_note(440) == fbase(69)); - CHECK(kfr::hertz_to_note(pack(440)) == pack(fbase(69))); -} - -TEST(amp_to_dB) -{ - testo::eplison_scope<void> eps(1000); - - CHECK(kfr::amp_to_dB(fbase(2.0)) == fbase(6.0205999132796239042747778944899)); - CHECK(kfr::amp_to_dB(fbase(-2.0)) == fbase(6.0205999132796239042747778944899)); - CHECK(kfr::amp_to_dB(fbase(1.0)) == fbase(0)); - CHECK(kfr::amp_to_dB(fbase(-1.0)) == fbase(0)); - CHECK(kfr::amp_to_dB(fbase(0.5)) == fbase(-6.0205999132796239042747778944899)); - CHECK(kfr::amp_to_dB(fbase(-0.5)) == fbase(-6.0205999132796239042747778944899)); - CHECK(kfr::amp_to_dB(fbase(0.0)) == fbase(-HUGE_VAL)); -} - -TEST(dB_to_amp) -{ - testo::eplison_scope<void> eps(1000); - - CHECK(kfr::dB_to_amp(fbase(-HUGE_VAL)) == fbase(0.0)); - CHECK(kfr::dB_to_amp(fbase(0.0)) == fbase(1.0)); - CHECK(kfr::dB_to_amp(fbase(6.0205999132796239042747778944899)) == fbase(2.0)); - CHECK(kfr::dB_to_amp(fbase(-6.0205999132796239042747778944899)) == fbase(0.5)); -} - TEST(delay) { const univector<float, 33> v1 = counter() + 100; @@ -337,193 +74,6 @@ TEST(phasor) CHECK(rms(v1 - v2) < 1.e-5); } -TEST(fir) -{ -#ifdef CMT_COMPILER_IS_MSVC - // testo::matrix causes error in MSVC - { - using T = float; - - const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5); - const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; - - CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - - CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - } - { - using T = double; - - const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5); - const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; - - CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - - CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - } -#else - testo::matrix(named("type") = ctypes_t<float -#ifdef CMT_NATIVE_F64 - , - double -#endif - >{}, - [](auto type) { - using T = typename decltype(type)::type; - - const univector<T, 100> data = - counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5); - const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; - - CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - - fir_state<T> state(taps.ref()); - - CHECK_EXPRESSION(fir(state, data), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - - CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - - short_fir_state<9, T> state2(taps); - - CHECK_EXPRESSION(short_fir<taps.size()>(state2, data), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0) * taps[i]; - return result; - }); - - CHECK_EXPRESSION(moving_sum<taps.size()>(data), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0); - return result; - }); - - moving_sum_state<T, 131> msstate1; - - CHECK_EXPRESSION(moving_sum(msstate1, data), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < msstate1.delayline.size(); i++) - result += data.get(index - i, 0); - return result; - }); - - moving_sum_state<T> msstate2(133); - - CHECK_EXPRESSION(moving_sum(msstate2, data), 100, [&](size_t index) -> T { - T result = 0; - for (size_t i = 0; i < msstate2.delayline.size(); i++) - result += data.get(index - i, 0); - return result; - }); - }); -#endif -} - -#ifdef CMT_NATIVE_F64 -TEST(fir_different) -{ - const univector<float, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f); - // const univector<double, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; - const univector<double, 4> taps{ 1, 2, 3, 4 }; - - CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> float { - double result = 0.0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0.0) * taps[i]; - return float(result); - }); - - CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> float { - double result = 0.0; - for (size_t i = 0; i < taps.size(); i++) - result += data.get(index - i, 0.0) * taps[i]; - return float(result); - }); -} -#endif - -#ifdef KFR_STD_COMPLEX -template <typename T> -inline std::complex<T> to_std(const std::complex<T>& c) -{ - return c; -} -template <typename T> -inline std::complex<T> from_std(const std::complex<T>& c) -{ - return c; -} -#else -template <typename T> -inline std::complex<T> to_std(const kfr::complex<T>& c) -{ - return { c.real(), c.imag() }; -} - -template <typename T> -inline kfr::complex<T> from_std(const std::complex<T>& c) -{ - return { c.real(), c.imag() }; -} -#endif - -TEST(fir_complex) -{ - const univector<complex<float>, 100> data = - counter() * complex<float>{ 0.f, 1.f } + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f); - const univector<float, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; - - CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> complex<float> { - std::complex<float> result = 0.0; - for (size_t i = 0; i < taps.size(); i++) - result = result + to_std(data.get(index - i, 0.0)) * taps[i]; - return from_std(result); - }); - - CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> complex<float> { - std::complex<float> result = 0.0; - for (size_t i = 0; i < taps.size(); i++) - result = result + to_std(data.get(index - i, 0.0)) * taps[i]; - return from_std(result); - }); -} - template <typename E, typename T, size_t size> void test_ir(E&& e, const univector<T, size>& test_vector) { @@ -532,121 +82,6 @@ void test_ir(E&& e, const univector<T, size>& test_vector) println(absmaxof(ir - test_vector)); } -template <typename T, typename... Ts, univector_tag Tag> -inline const univector<T, Tag>& choose_array(const univector<T, Tag>& array, const univector<Ts, Tag>&...) -{ - return array; -} - -template <typename T, typename T2, typename... Ts, univector_tag Tag, KFR_ENABLE_IF(!is_same<T, T2>)> -inline const univector<T, Tag>& choose_array(const univector<T2, Tag>&, const univector<Ts, Tag>&... arrays) -{ - return choose_array<T>(arrays...); -} - -TEST(biquad_lowpass1) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, [](auto type) { - using T = typename decltype(type)::type; - - const biquad_params<T> bq = biquad_lowpass<T>(0.1, 0.7); - - constexpr size_t size = 32; - - const univector<float, size> test_vector_f32{ - +0x8.9bce2p-7, +0xd.8383ep-6, +0x8.f908dp-5, +0xe.edc21p-6, +0x9.ae104p-6, +0x9.dcc24p-7, - +0xd.50584p-9, -0xf.2668p-13, -0xd.09ca1p-10, -0xe.15995p-10, -0xa.b90d2p-10, -0xc.edea4p-11, - -0xb.f14eap-12, -0xc.2cb44p-14, +0xb.4a4dep-15, +0xb.685dap-14, +0xa.b181fp-14, +0xf.0cb2bp-15, - +0x8.695d6p-15, +0xd.bedd4p-17, +0xf.5474p-20, -0xd.bb266p-19, -0x9.63ca1p-18, -0xf.ca567p-19, - -0xa.5231p-19, -0xa.9e934p-20, -0xe.ab52p-22, +0xa.3c4cp-26, +0xd.721ffp-23, +0xe.ccc1ap-23, - +0xb.5f248p-23, +0xd.d2c9ap-24, - }; - - const univector<double, size> test_vector_f64{ - +0x8.9bce2bf3663e8p-7, +0xd.8384010fdf1dp-6, +0x8.f908e7a36df6p-5, +0xe.edc2332a6d0bp-6, - +0x9.ae104af1da9ap-6, +0x9.dcc235ef68e7p-7, +0xd.5057ee425e05p-9, -0xf.266e42a99aep-13, - -0xd.09cad73642208p-10, -0xe.1599f32a83dp-10, -0xa.b90d8910a117p-10, -0xc.edeaabb890948p-11, - -0xb.f14edbb55383p-12, -0xc.2cb39b86f2dap-14, +0xb.4a506ecff055p-15, +0xb.685edfdb55358p-14, - +0xa.b182e32f8e298p-14, +0xf.0cb3dfd894b2p-15, +0x8.695df725b4438p-15, +0xd.beddc3606b9p-17, - +0xf.547004d20874p-20, -0xd.bb29b25b49b6p-19, -0x9.63cb9187da1dp-18, -0xf.ca588634fc618p-19, - -0xa.52322d320da78p-19, -0xa.9e9420154e4p-20, -0xe.ab51f7b0335ap-22, +0xa.3c6479980e1p-26, - +0xd.7223836599fp-23, +0xe.ccc47ddd18678p-23, +0xb.5f265b1be1728p-23, +0xd.d2cb83f8483f8p-24, - }; - - const univector<T, size> ir = biquad(bq, unitimpulse<T>()); - - CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0); - }); -} - -TEST(biquad_lowpass2) -{ - testo::matrix(named("type") = ctypes_t<float, double>{}, [](auto type) { - using T = typename decltype(type)::type; - - const biquad_params<T> bq = biquad_lowpass<T>(0.45, 0.2); - - constexpr size_t size = 32; - - const univector<float, size> test_vector_f32{ - +0x8.ce416p-4, +0x8.2979p-4, -0x8.a9d04p-7, +0xe.aeb3p-11, +0x8.204f8p-13, -0x8.20d78p-12, - +0x8.3379p-12, -0xf.83d81p-13, +0xe.8b5c4p-13, -0xd.9ddadp-13, +0xc.bedfcp-13, -0xb.ee123p-13, - +0xb.2a9e5p-13, -0xa.73ac4p-13, +0x9.c86f6p-13, -0x9.2828p-13, +0x8.92229p-13, -0x8.05b7p-13, - +0xf.048ffp-14, -0xe.0e849p-14, +0xd.28384p-14, -0xc.50a9p-14, +0xb.86e56p-14, -0xa.ca0b6p-14, - +0xa.19476p-14, -0x9.73d38p-14, +0x8.d8f64p-14, -0x8.48024p-14, +0xf.80aa2p-15, -0xe.82ad8p-15, - +0xd.94f22p-15, -0xc.b66d9p-15, - }; - - const univector<double, size> test_vector_f64{ - +0x8.ce416c0d31e88p-4, +0x8.2978efe51dafp-4, -0x8.a9d088b81da6p-7, +0xe.aeb56c029358p-11, - +0x8.20492639873ap-13, -0x8.20d4e21aab538p-12, +0x8.3376b2d53b4a8p-12, -0xf.83d3d1c17343p-13, - +0xe.8b584f0dd5ac8p-13, -0xd.9dd740ceaacf8p-13, +0xc.bedc85e7a621p-13, -0xb.ee0f472bf8968p-13, - +0xb.2a9baed1fe6cp-13, -0xa.73a9d1670f4ep-13, +0x9.c86d29d297798p-13, -0x9.2825f4d894088p-13, - +0x8.9220a956d651p-13, -0x8.05b539fdd79e8p-13, +0xf.048cb5194cfa8p-14, -0xe.0e819fa128938p-14, - +0xd.2835957d684cp-14, -0xc.50a69c2a8dc18p-14, +0xb.86e33bbaf3cbp-14, -0xa.ca097058af2cp-14, - +0xa.1945ad1703dcp-14, -0x9.73d1eef7d8b68p-14, +0x8.d8f4df1bb3efp-14, -0x8.48010323c6f7p-14, - +0xf.80a7f5baeeb2p-15, -0xe.82ab94bb68a8p-15, +0xd.94f05f80af008p-15, -0xc.b66c0799b21a8p-15, - }; - - const univector<T, size> ir = biquad(bq, unitimpulse<T>()); - - CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0); - }); -} - -TEST(resampler_test) -{ - const int in_sr = 44100; - const int out_sr = 48000; - const int freq = 100; - auto resampler = sample_rate_converter<fbase>(resample_quality::draft, out_sr, in_sr); - double delay = resampler.get_fractional_delay(); - univector<fbase> out(out_sr / 10); - univector<fbase> in = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10); - univector<fbase> ref = truncate( - sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))), - out_sr / 10); - resampler.process(out, in); - - CHECK(rms(slice(out - ref, static_cast<size_t>(ceil(delay * 2)))) < 0.005f); -} -TEST(resampler_test_complex) -{ - using type = complex<fbase>; - const int in_sr = 44100; - const int out_sr = 48000; - const int freq = 100; - auto resampler = sample_rate_converter<type>(resample_quality::draft, out_sr, in_sr); - double delay = resampler.get_fractional_delay(); - univector<type> out(out_sr / 10); - univector<type> in = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10); - univector<type> ref = truncate( - sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))), - out_sr / 10); - resampler.process(out, in); - - CHECK(rms(cabs(slice(out - ref, static_cast<size_t>(ceil(delay * 2))))) < 0.005f); -} } // namespace CMT_ARCH_NAME #ifndef KFR_NO_MAIN diff --git a/tests/expression_test.cpp b/tests/expression_test.cpp @@ -16,95 +16,6 @@ using namespace kfr; namespace CMT_ARCH_NAME { -TEST(pack) -{ - static_assert(is_same<vec<f32x2, 1>, invoke_result<fn::reverse, vec<f32x2, 1>>>); - const univector<float, 21> v1 = 1 + counter(); - const univector<float, 21> v2 = v1 * 11; - - CHECK_EXPRESSION(pack(v1, v2), 21, [](float i) { return f32x2{ 1 + i, (1 + i) * 11 }; }); - - CHECK_EXPRESSION(bind_expression(fn::reverse(), pack(v1, v2)), 21, [](float i) { - return f32x2{ (1 + i) * 11, 1 + i }; - }); -} - -TEST(adjacent) -{ - CHECK_EXPRESSION(adjacent(fn::mul(), counter()), infinite_size, - [](size_t i) { return i > 0 ? i * (i - 1) : 0; }); -} - -TEST(padded) -{ - static_assert(is_infinite<decltype(padded(counter()))>, ""); - static_assert(is_infinite<decltype(padded(truncate(counter(), 100)))>, ""); - - CHECK_EXPRESSION(padded(truncate(counter(), 6), -1), infinite_size, - [](size_t i) { return i >= 6 ? -1 : i; }); - - CHECK_EXPRESSION(padded(truncate(counter(), 0), -1), infinite_size, [](size_t i) { return -1; }); - - CHECK_EXPRESSION(padded(truncate(counter(), 501), -1), infinite_size, - [](size_t i) { return i >= 501 ? -1 : i; }); -} - -TEST(rebind) -{ - auto c_minus_two = counter() - 2; - auto four_minus_c = rebind(c_minus_two, 4, counter()); - CHECK_EXPRESSION(c_minus_two, infinite_size, [](size_t i) { return i - 2; }); - CHECK_EXPRESSION(four_minus_c, infinite_size, [](size_t i) { return 4 - i; }); -} - -TEST(test_arg_access) -{ - univector<float> v1(10); - v1 = counter(); - auto e1 = std::move(v1) + 10; - std::get<0>(e1.args)[0] = 100; - std::get<1>(e1.args).val = 1; - - CHECK_EXPRESSION(e1, 10, [](size_t i) { return (i == 0 ? 100 : i) + 1; }); -} - -TEST(to_pointer) -{ - auto e1 = to_pointer(counter<float>()); - - CHECK_EXPRESSION(e1, infinite_size, [](size_t i) { return static_cast<float>(i); }); - - auto e2 = to_pointer(gen_linear(0.f, 1.f)); - - CHECK_EXPRESSION(e2, infinite_size, [](size_t i) { return static_cast<float>(i); }); -} - -TEST(test_arg_replace) -{ - univector<float, 10> v1 = counter(); - univector<float, 10> v2 = -counter(); - auto e1 = to_pointer(v1) * 10; - std::get<0>(e1.args) = to_pointer(v2); - - CHECK_EXPRESSION(e1, 10, [](size_t i) { return i * -10.0; }); -} - -TEST(placeholders) -{ - auto expr = 100 * placeholder<float>(); - CHECK_EXPRESSION(expr, infinite_size, [](size_t) { return 0.f; }); - substitute(expr, to_pointer(counter<float>())); - CHECK_EXPRESSION(expr, infinite_size, [](size_t i) { return 100.f * i; }); -} - -TEST(placeholders_pointer) -{ - expression_pointer<float> expr = to_pointer(10 * placeholder<float>()); - CHECK_EXPRESSION(expr, infinite_size, [](size_t) { return 0.f; }); - substitute(expr, to_pointer(counter<float>())); - CHECK_EXPRESSION(expr, infinite_size, [](size_t i) { return 10.f * i; }); -} - TEST(univector_assignment) { univector<int> x = truncate(counter(), 10); @@ -115,28 +26,12 @@ TEST(univector_assignment) CHECK(y.size() == 10u); } -TEST(size_calc) -{ - auto a = counter(); - CHECK(a.size() == infinite_size); - auto b = slice(counter(), 100); - CHECK(b.size() == infinite_size); - auto c = slice(counter(), 100, 1000); - CHECK(c.size() == 1000u); - auto d = slice(c, 100); - CHECK(d.size() == 900u); -} - -TEST(reverse) -{ - CHECK_EXPRESSION(reverse(truncate(counter(), 21)), 21, [](size_t i) { return 20 - i; }); -} - TEST(mix) { - CHECK_EXPRESSION(mix(sequence(0, 0.5f, 1, 0.5f), counter(), counter() * 10), infinite_size, [](size_t i) { - return mix(std::array<float, 4>{ 0, 0.5f, 1, 0.5f }[i % 4], i, i * 10); - }); + CHECK_EXPRESSION(mix(sequence(0, 0.5f, 1, 0.5f), counter(), counter() * 10), infinite_size, + [](size_t i) { + return mix(std::array<float, 4>{ 0, 0.5f, 1, 0.5f }[i % 4], i, i * 10); + }); } TEST(expression_mask) @@ -146,42 +41,6 @@ TEST(expression_mask) x = select(x > y, 0.5f, 0.1f) * (y - x) + x; } -constexpr inline size_t fast_range_sum(size_t stop) { return stop * (stop + 1) / 2; } - -TEST(partition) -{ - { - univector<double, 385> output = zeros(); - auto result = partition(output, counter(), 5, 1); - CHECK(result.count == 5u); - CHECK(result.chunk_size == 80u); - - result(0); - CHECK(sum(output) >= fast_range_sum(80 - 1)); - result(1); - CHECK(sum(output) >= fast_range_sum(160 - 1)); - result(2); - CHECK(sum(output) >= fast_range_sum(240 - 1)); - result(3); - CHECK(sum(output) >= fast_range_sum(320 - 1)); - result(4); - CHECK(sum(output) == fast_range_sum(385 - 1)); - } - - { - univector<double, 385> output = zeros(); - auto result = partition(output, counter(), 5, 160); - CHECK(result.count == 3u); - CHECK(result.chunk_size == 160u); - - result(0); - CHECK(sum(output) >= fast_range_sum(160 - 1)); - result(1); - CHECK(sum(output) >= fast_range_sum(320 - 1)); - result(2); - CHECK(sum(output) == fast_range_sum(385 - 1)); - } -} } // namespace CMT_ARCH_NAME #ifndef KFR_NO_MAIN diff --git a/tests/intrinsic_test.cpp b/tests/intrinsic_test.cpp @@ -107,7 +107,8 @@ TEST(intrin_sqrt) CHECK(kfr::sqrt(make_vector(9)) == make_vector<fbase>(3.0)); CHECK(kfr::sqrt(make_vector(-9)) == make_vector<fbase>(qnan)); testo::matrix(named("type") = float_vector_types<vec>, named("value") = std::vector<int>{ 0, 2, 65536 }, - [](auto type, int value) { + [](auto type, int value) + { using T = typename decltype(type)::type; const T x(value); CHECK(kfr::sqrt(x) == apply([](auto x) -> decltype(x) { return std::sqrt(x); }, x)); @@ -117,7 +118,8 @@ TEST(intrin_sqrt) TEST(intrin_satadd_satsub) { testo::matrix(named("type") = cconcat(signed_vector_types<vec>, unsigned_vector_types<vec>), - [](auto type) { + [](auto type) + { using T = typename decltype(type)::type; using Tsub = subtype<T>; const T min = std::numeric_limits<Tsub>::min(); @@ -144,23 +146,25 @@ TEST(intrin_satadd_satsub) TEST(intrin_any_all) { - testo::matrix(named("type") = unsigned_vector_types<vec>, [](auto type) { - using T = typename decltype(type)::type; - constexpr size_t width = widthof<T>(); - using Tsub = subtype<T>; - const auto x = enumerate<Tsub, width>() == Tsub(0); - CHECK(any(x) == true); - if (width == 1) - CHECK(all(x) == true); - else - CHECK(all(x) == false); - const auto y = zerovector<Tsub, width>() == Tsub(127); - CHECK(all(y) == false); - CHECK(any(y) == false); - const auto z = zerovector<Tsub, width>() == Tsub(0); - CHECK(all(z) == true); - CHECK(any(z) == true); - }); + testo::matrix(named("type") = unsigned_vector_types<vec>, + [](auto type) + { + using T = typename decltype(type)::type; + constexpr size_t width = widthof<T>(); + using Tsub = subtype<T>; + const auto x = enumerate<Tsub, width>() == Tsub(0); + CHECK(any(x) == true); + if (width == 1) + CHECK(all(x) == true); + else + CHECK(all(x) == false); + const auto y = zerovector<Tsub, width>() == Tsub(127); + CHECK(all(y) == false); + CHECK(any(y) == false); + const auto z = zerovector<Tsub, width>() == Tsub(0); + CHECK(all(z) == true); + CHECK(any(z) == true); + }); } } // namespace CMT_ARCH_NAME diff --git a/tests/numeric_tests.hpp b/tests/numeric_tests.hpp @@ -10,6 +10,8 @@ namespace kfr { +inline bool show_measured_accuracy = false; + using testo::test_data_entry; inline namespace CMT_ARCH_NAME @@ -55,7 +57,7 @@ uint64_t ulps(vec<T, N> x, vec<T, N> y) inline const char* tname(ctype_t<f32>) { return "float"; } inline const char* tname(ctype_t<f64>) { return "double"; } -#define CHECK_DIFF(x_arg, y_arg, threshold) \ +#define CHECK_DIFF(x_arg, y_arg, threshold, file, line) \ do \ { \ ++checks_count; \ @@ -67,57 +69,67 @@ inline const char* tname(ctype_t<f64>) { return "double"; } ::testo::active_test()->check( \ arg_diff <= threshold, \ ::cometa::as_string(x_arg_value, " ~= ", y_arg_value, " (", arg_diff, " <= ", threshold, ")"), \ - #x_arg " ~= " #y_arg); \ + #x_arg " ~= " #y_arg, file, line); \ } while (0) #define KFR_AUTO_TEST_1(fn, datafile, maxulps, avgulps) \ TEST(fn##_##datafile) \ { \ - testo::matrix(named("type") = vector_types(), [&](auto type) { \ - using T = typename decltype(type)::type; \ - using Tsub = subtype<T>; \ - double error_sum = 0.0; \ - uint64_t error_peak = 0; \ - uint64_t checks_count = 0; \ - std::shared_ptr<file_reader<test_data_entry<Tsub, 1>>> reader = \ - open_file_for_reading<test_data_entry<Tsub, 1>>( \ - std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) + "_" #datafile); \ - test_data_entry<Tsub, 1> entry; \ - while (reader->read(entry)) \ - { \ - testo::scope s(as_string(entry.arguments[0])); \ - CHECK_DIFF(kfr::fn(entry.arguments[0]), entry.result, maxulps); \ - } \ - CHECK(checks_count > 0u); \ - CHECK(error_sum / checks_count <= avgulps); \ - println("measured accuracy: ", tname(ctype<Tsub>), " ", error_sum / checks_count, "(peak ", \ - error_peak, ")"); \ - }); \ + testo::matrix(named("type") = vector_types(), \ + [&](auto type) \ + { \ + using T = typename decltype(type)::type; \ + using Tsub = subtype<T>; \ + double error_sum = 0.0; \ + uint64_t error_peak = 0; \ + uint64_t checks_count = 0; \ + std::shared_ptr<file_reader<test_data_entry<Tsub, 1>>> reader = \ + open_file_for_reading<test_data_entry<Tsub, 1>>( \ + std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) + \ + "_" #datafile); \ + test_data_entry<Tsub, 1> entry; \ + while (reader->read(entry)) \ + { \ + testo::scope s(as_string(entry.arguments[0])); \ + CHECK_DIFF(kfr::fn(entry.arguments[0]), entry.result, maxulps, __FILE__, \ + __LINE__); \ + } \ + CHECK(checks_count > 0u); \ + CHECK(error_sum / checks_count <= avgulps); \ + if (show_measured_accuracy) \ + println("measured accuracy: ", tname(ctype<Tsub>), " ", \ + error_sum / checks_count, "(peak ", error_peak, ")"); \ + }); \ } #define KFR_AUTO_TEST_2(fn, datafile, maxulps, avgulps) \ TEST(fn##_##datafile) \ { \ - testo::matrix(named("type") = vector_types(), [&](auto type) { \ - using T = typename decltype(type)::type; \ - using Tsub = subtype<T>; \ - double error_sum = 0.0; \ - uint64_t error_peak = 0; \ - uint64_t checks_count = 0; \ - std::shared_ptr<file_reader<test_data_entry<Tsub, 2>>> reader = \ - open_file_for_reading<test_data_entry<Tsub, 2>>( \ - std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) + "_" #datafile); \ - test_data_entry<Tsub, 2> entry; \ - while (reader->read(entry)) \ - { \ - testo::scope s(as_string(entry.arguments[0], entry.arguments[1])); \ - CHECK_DIFF(kfr::fn(entry.arguments[0], entry.arguments[1]), entry.result, maxulps); \ - } \ - CHECK(checks_count > 0u); \ - CHECK(error_sum / checks_count <= avgulps); \ - println("measured accuracy: ", tname(ctype<Tsub>), " ", error_sum / checks_count, "(peak ", \ - error_peak, ")"); \ - }); \ + testo::matrix(named("type") = vector_types(), \ + [&](auto type) \ + { \ + using T = typename decltype(type)::type; \ + using Tsub = subtype<T>; \ + double error_sum = 0.0; \ + uint64_t error_peak = 0; \ + uint64_t checks_count = 0; \ + std::shared_ptr<file_reader<test_data_entry<Tsub, 2>>> reader = \ + open_file_for_reading<test_data_entry<Tsub, 2>>( \ + std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) + \ + "_" #datafile); \ + test_data_entry<Tsub, 2> entry; \ + while (reader->read(entry)) \ + { \ + testo::scope s(as_string(entry.arguments[0], entry.arguments[1])); \ + CHECK_DIFF(kfr::fn(entry.arguments[0], entry.arguments[1]), entry.result, \ + maxulps, __FILE__, __LINE__); \ + } \ + CHECK(checks_count > 0u); \ + CHECK(error_sum / checks_count <= avgulps); \ + if (show_measured_accuracy) \ + println("measured accuracy: ", tname(ctype<Tsub>), " ", \ + error_sum / checks_count, "(peak ", error_peak, ")"); \ + }); \ } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tests/tensor_test.cpp b/tests/tensor_test.cpp @@ -1,6 +1,6 @@ -#include "kfr/version.hpp" #include "kfr/runtime.hpp" #include "kfr/testo/testo.hpp" +#include "kfr/version.hpp" using namespace kfr; diff --git a/tests/unit/base/base.cpp b/tests/unit/base/base.cpp @@ -0,0 +1,7 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base.hpp> diff --git a/tests/unit/base/basic_expressions.cpp b/tests/unit/base/basic_expressions.cpp @@ -0,0 +1,131 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/basic_expressions.hpp> +#include <kfr/base/simd_expressions.hpp> +#include <kfr/base/univector.hpp> +#include <kfr/io/tostring.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +TEST(linspace) +{ + testo::eplison_scope<> eps(10); + CHECK_EXPRESSION(linspace(0.0, 1.0, 5, true, ctrue), { 0.0, 0.25, 0.50, 0.75, 1.0 }); + CHECK_EXPRESSION(linspace(0.0, 1.0, 4, false, ctrue), { 0.0, 0.25, 0.50, 0.75 }); + CHECK(shapeof(linspace(0.0, 1.0, 5, true, cfalse)) == shape{ infinite_size }); + CHECK_EXPRESSION(linspace(0.0, 1.0, 4, false, ctrue), { 0.0, 0.25, 0.50, 0.75 }); + CHECK_EXPRESSION(symmlinspace(3.0, 4, ctrue), { -3.0, -1.00, 1.00, 3.00 }); + + CHECK_EXPRESSION(linspace(1, 21, 4, false, ctrue), { 1, 6, 11, 16 }); + CHECK_EXPRESSION(linspace(1, 21, 4, true, ctrue), { 1, 7.66666667f, 14.3333333f, 21 }); +} + +TEST(counter_shape) +{ + CHECK(shapeof(1) == shape{}); + CHECK(shapeof(counter()) == shape{ infinite_size }); + CHECK(shapeof(counter() + 1) == shape{ infinite_size }); + CHECK(shapeof(counter(0, 1, 1)) == shape{ infinite_size, infinite_size }); +} + +TEST(pack) +{ + static_assert(is_same<vec<f32x2, 1>, invoke_result<fn::reverse, vec<f32x2, 1>>>); + const univector<float, 21> v1 = 1 + counter(); + const univector<float, 21> v2 = v1 * 11; + + CHECK_EXPRESSION(pack(v1, v2), 21, [](float i) { return f32x2{ 1 + i, (1 + i) * 11 }; }); + + CHECK_EXPRESSION(bind_expression(fn::reverse(), pack(v1, v2)), 21, + [](float i) { + return f32x2{ (1 + i) * 11, 1 + i }; + }); +} + +TEST(adjacent) +{ + CHECK_EXPRESSION(adjacent(fn::mul(), counter()), infinite_size, + [](size_t i) { return i > 0 ? i * (i - 1) : 0; }); +} + +TEST(padded) +{ + static_assert(is_infinite<decltype(padded(counter()))>, ""); + static_assert(is_infinite<decltype(padded(truncate(counter(), 100)))>, ""); + + CHECK_EXPRESSION(padded(truncate(counter(), 6), -1), infinite_size, + [](size_t i) { return i >= 6 ? -1 : i; }); + + CHECK_EXPRESSION(padded(truncate(counter(), 0), -1), infinite_size, [](size_t i) { return -1; }); + + CHECK_EXPRESSION(padded(truncate(counter(), 501), -1), infinite_size, + [](size_t i) { return i >= 501 ? -1 : i; }); +} + +TEST(rebind) +{ + auto c_minus_two = counter() - 2; + auto four_minus_c = rebind(c_minus_two, 4, counter()); + CHECK_EXPRESSION(counter(), infinite_size, [](size_t i) { return i; }); + CHECK_EXPRESSION(c_minus_two, infinite_size, [](size_t i) { return i - 2; }); + CHECK_EXPRESSION(four_minus_c, infinite_size, [](size_t i) { return 4 - i; }); +} + +TEST(test_arg_access) +{ + univector<float> v1(10); + v1 = counter(); + auto e1 = std::move(v1) + 10; + std::get<0>(e1.args)[0] = 100; + std::get<1>(e1.args) = 1; + + CHECK_EXPRESSION(e1, 10, [](size_t i) { return (i == 0 ? 100 : i) + 1; }); +} + +TEST(size_calc) +{ + auto a = counter(); + CHECK(shapeof(a) == shape{ infinite_size }); + auto b = slice(counter(), 100); + CHECK(shapeof(b) == shape{ infinite_size }); + auto c = slice(counter(), 100, 1000); + CHECK(shapeof(c) == shape{ 1000 }); + auto d = slice(c, 100); + CHECK(shapeof(d) == shape{ 900 }); +} + +TEST(reverse_expression) +{ + CHECK_EXPRESSION(reverse(truncate(counter(), 21)), 21, [](size_t i) { return 20 - i; }); +} + +TEST(sequence) +{ + CHECK_EXPRESSION(sequence(0, 0.5f, 1, 0.5f), infinite_size, + [](size_t i) { + return std::array<float, 4>{ 0, 0.5f, 1, 0.5f }[i % 4]; + }); +} + +TEST(assign_expression) +{ + univector<float> f = truncate(counter(0, 1), 10); + f *= 10; + CHECK_EXPRESSION(f, { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 }); + + univector<float> a = truncate(counter(0, 1), 10); + univector<float> b = truncate(counter(100, 1), 10); + pack(a, b) *= broadcast<2>(10.f); + CHECK_EXPRESSION(a, { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 }); + CHECK_EXPRESSION(b, { 1000, 1010, 1020, 1030, 1040, 1050, 1060, 1070, 1080, 1090 }); +} + +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/base/generators.cpp b/tests/unit/base/generators.cpp @@ -0,0 +1,33 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/basic_expressions.hpp> +#include <kfr/base/generators.hpp> +#include <kfr/base/math_expressions.hpp> +#include <kfr/base/reduce.hpp> +#include <kfr/base/simd_expressions.hpp> +#include <kfr/base/univector.hpp> + +using namespace kfr; + +namespace CMT_ARCH_NAME +{ + +TEST(test_gen_expj) +{ + univector<cbase> v = truncate(gen_expj(0.f, constants<float>::pi_s(2) * 0.1f), 1000); + CHECK(rms(cabs( + v.slice(990) - + univector<cbase>({ cbase(1., +0.00000000e+00), cbase(0.80901699, +5.87785252e-01), + cbase(0.30901699, +9.51056516e-01), cbase(-0.30901699, +9.51056516e-01), + cbase(-0.80901699, +5.87785252e-01), cbase(-1., +1.22464680e-16), + cbase(-0.80901699, -5.87785252e-01), cbase(-0.30901699, -9.51056516e-01), + cbase(0.30901699, -9.51056516e-01), cbase(0.80901699, -5.87785252e-01) }))) < + 0.00006); // error here depends on vector width + // In most cases error is much lower (less than 0.00001) +} + +} // namespace CMT_ARCH_NAME diff --git a/tests/unit/base/pointer.cpp b/tests/unit/base/pointer.cpp @@ -0,0 +1,57 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/pointer.hpp> +#include <kfr/base/simd_expressions.hpp> +#include <kfr/base/univector.hpp> +#include <kfr/base/generators.hpp> +#include <kfr/io/tostring.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ +TEST(to_pointer) +{ + auto e1 = to_pointer(counter<float>()); + + CHECK_EXPRESSION(e1, infinite_size, [](size_t i) { return static_cast<float>(i); }); + + auto e2 = to_pointer(gen_linear(0.f, 1.f)); + + CHECK_EXPRESSION(e2, infinite_size, [](size_t i) { return static_cast<float>(i); }); +} + +TEST(test_arg_replace) +{ + univector<float, 10> v1 = counter(); + univector<float, 10> v2 = -counter(); + auto e1 = to_pointer(v1) * 10; + std::get<0>(e1.args) = to_pointer(v2); + + CHECK_EXPRESSION(e1, 10, [](size_t i) { return i * -10.0; }); +} + +TEST(placeholders) +{ + auto expr1 = placeholder<float>(); + CHECK_EXPRESSION(expr1, infinite_size, [](size_t) { return 0.f; }); + auto expr2 = 100 * placeholder<float>(); + CHECK_EXPRESSION(expr2, infinite_size, [](size_t) { return 0.f; }); + substitute(expr2, to_pointer(counter<float>())); + CHECK_EXPRESSION(expr2, infinite_size, [](size_t i) { return 100.f * i; }); +} + +TEST(placeholders_pointer) +{ + expression_pointer<float> expr = to_pointer(10 * placeholder<float>()); + CHECK_EXPRESSION(expr, infinite_size, [](size_t) { return 0.f; }); + substitute(expr, to_pointer(counter<float>())); + CHECK_EXPRESSION(expr, infinite_size, [](size_t i) { return 10.f * i; }); +} + +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/base/random.cpp b/tests/unit/base/random.cpp @@ -14,15 +14,15 @@ inline namespace CMT_ARCH_NAME { template <typename T, size_t N> -static void test_random(kfr::random_bit_generator& gen, const vec<T, N>& value) +static void test_random(random_state& state, const vec<T, N>& value) { - const vec<T, N> r = kfr::random_uniform<T, N>(gen); + const vec<T, N> r = kfr::random_uniform<T, N>(state); CHECK(r == value); } TEST(random_bit_generator) { - kfr::random_bit_generator gen(1, 2, 3, 4); + random_state gen = random_init(1, 2, 3, 4); test_random(gen, pack<u8>(21, 62, 88, 30, 46, 234, 205, 29, 41, 190, 212, 81, 217, 135, 218, 227)); test_random(gen, pack<u16>(48589, 33814, 55928, 14799, 26904, 18521, 20808, 50888)); test_random(gen, pack<u32>(1554764222, 1538765785, 2072590063, 2837641155)); @@ -66,11 +66,11 @@ TEST(random_bit_generator) TEST(gen_random_range) { - random_bit_generator gen(1, 2, 3, 4); - univector<fbase, 1000> v = kfr::gen_random_range<fbase>(std::ref(gen), -1.0, 1.0); - CHECK(kfr::minof(v) >= fbase(-1.0)); - CHECK(kfr::maxof(v) <= fbase(1.0)); - println(kfr::mean(v)); + random_state gen = random_init(1, 2, 3, 4); + univector<fbase, 1000> v = gen_random_range<fbase>(std::ref(gen), -1.0, 1.0); + CHECK(minof(v) >= fbase(-1.0)); + CHECK(maxof(v) <= fbase(1.0)); + // println(mean(v)); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tests/unit/base/reduce.cpp b/tests/unit/base/reduce.cpp @@ -5,6 +5,8 @@ */ #include <kfr/base/reduce.hpp> +#include <kfr/base/simd_expressions.hpp> +#include <kfr/base/univector.hpp> namespace kfr { @@ -50,5 +52,12 @@ TEST(reduce) CHECK(product(a) == -1080); } } + +TEST(dotproduct) +{ + univector<float, 177> v1 = counter(); + univector<float, 177> v2 = counter() * 2 + 10; + CHECK(dotproduct(v1, v2) == 3821312); +} } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tests/unit/base/shape.cpp b/tests/unit/base/shape.cpp @@ -0,0 +1,72 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/shape.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +TEST(shape) +{ + using internal_generic::increment_indices_return; + using internal_generic::null_index; + CHECK(size_of_shape(shape{ 4, 3 }) == 12); + CHECK(size_of_shape(shape{ 1 }) == 1); + CHECK(size_of_shape<1>(1) == 1); + shape<1> sh1 = 1; + sh1 = 2; + + CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }) == shape{ 12, 4, 1 }); + + CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }, 10) == shape{ 120, 40, 10 }); + + CHECK(increment_indices_return(shape{ 0, 0, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 0, 1 }); + CHECK(increment_indices_return(shape{ 0, 0, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 1, 0 }); + CHECK(increment_indices_return(shape{ 0, 2, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 2, 1 }); + CHECK(increment_indices_return(shape{ 0, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 1, 0, 0 }); + CHECK(increment_indices_return(shape{ 1, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == + shape{ null_index, null_index, null_index }); + + CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 0, 0, 0 }) == 0); + CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 2, 3, 4 }) == 59); + + CHECK(shape{ 3, 4, 5 }.from_flat(0) == shape{ 0, 0, 0 }); + CHECK(shape{ 3, 4, 5 }.from_flat(59) == shape{ 2, 3, 4 }); +} +TEST(shape_broadcast) +{ + using internal_generic::can_assign_from; + using internal_generic::common_shape; + using internal_generic::same_layout; + + CHECK(common_shape(shape{ 1, 5 }, shape{ 5, 1 }) == shape{ 5, 5 }); + CHECK(common_shape(shape{ 5 }, shape{ 5, 1 }) == shape{ 5, 5 }); + CHECK(common_shape(shape{ 1, 1, 1 }, shape{ 2, 5, 1 }) == shape{ 2, 5, 1 }); + CHECK(common_shape(shape{ 1 }, shape{ 2, 5, 7 }) == shape{ 2, 5, 7 }); + + CHECK(common_shape(shape{}, shape{ 0 }) == shape{ 0 }); + CHECK(common_shape(shape{}, shape{ 0, 0 }) == shape{ 0, 0 }); + CHECK(common_shape(shape{ 0 }, shape{ 0, 0 }) == shape{ 0, 0 }); + + CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 4 })); + CHECK(!can_assign_from(shape{ 1, 4 }, shape{ 4, 1 })); + CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 1 })); + CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1 })); + CHECK(can_assign_from(shape{ 1, 4 }, shape{})); + + CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4 })); + CHECK(same_layout(shape{ 1, 2, 3, 4 }, shape{ 2, 3, 4 })); + CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 1, 1, 3, 4 })); + CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4, 1 })); + CHECK(same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 3, 4, 1 })); + + CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 1 })); + CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 0 })); +} +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/base/tensor.cpp b/tests/unit/base/tensor.cpp @@ -1,12 +1,13 @@ /** * KFR (http://kfrlib.com) - * Copyright (C) 2016 D Levin + * Copyright (C) 2016-2022 Fractalium Ltd * See LICENSE.txt for details */ -#include <kfr/base/simd_expressions.hpp> -#include <kfr/base/math_expressions.hpp> #include <kfr/base/basic_expressions.hpp> +#include <kfr/base/math_expressions.hpp> +#include <kfr/base/reduce.hpp> +#include <kfr/base/simd_expressions.hpp> #include <kfr/base/tensor.hpp> #include <kfr/io/tostring.hpp> #include <kfr/simd.hpp> @@ -20,39 +21,6 @@ namespace kfr inline namespace CMT_ARCH_NAME { -TEST(vec_deduction) -{ - vec v{ 1, 2, 3 }; - static_assert(std::is_same_v<decltype(v), vec<int, 3>>); - - tensor<float, 2> t2{ shape{ 20, 40 } }; -} - -TEST(shape) -{ - using internal_generic::increment_indices_return; - using internal_generic::null_index; - CHECK(size_of_shape(shape{ 4, 3 }) == 12); - CHECK(size_of_shape(shape{ 1 }) == 1); - - CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }) == shape{ 12, 4, 1 }); - - CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }, 10) == shape{ 120, 40, 10 }); - - CHECK(increment_indices_return(shape{ 0, 0, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 0, 1 }); - CHECK(increment_indices_return(shape{ 0, 0, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 1, 0 }); - CHECK(increment_indices_return(shape{ 0, 2, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 2, 1 }); - CHECK(increment_indices_return(shape{ 0, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 1, 0, 0 }); - CHECK(increment_indices_return(shape{ 1, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == - shape{ null_index, null_index, null_index }); - - CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 0, 0, 0 }) == 0); - CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 2, 3, 4 }) == 59); - - CHECK(shape{ 3, 4, 5 }.from_flat(0) == shape{ 0, 0, 0 }); - CHECK(shape{ 3, 4, 5 }.from_flat(59) == shape{ 2, 3, 4 }); -} - TEST(tensor_base) { tensor<float, 2> t{ shape{ 20, 40 } }; @@ -124,19 +92,7 @@ TEST(tensor_memory) CHECK(refs == 0); } -// TEST(tensor_expression_assign) -// { -// tensor<float, 1> t1{ shape{ 32 }, 0.f }; - -// t1 = counter(); - -// CHECK(t1.size() == 32); -// CHECK(t1(0) == 0.f); -// CHECK(t1(1) == 1.f); -// CHECK(t1(31) == 31.f); -// } - -DTEST(tensor_expression) +TEST(tensor_expression) { tensor<float, 1> t1{ shape{ 32 }, 0.f }; tensor<float, 1> t2{ shape{ 32 }, 100.f }; @@ -161,6 +117,8 @@ DTEST(tensor_expression) t4 = 1.f; CHECK(t4(0, 0) == 1.f); CHECK(t4(5, 5) == 1.f); + CHECK(minof(t4) == 1); + CHECK(maxof(t4) == 1); CHECK(sum(t4) == 36); t4(trange(2, 4), trange(2, 4)) = scalar(10); @@ -172,31 +130,17 @@ DTEST(tensor_expression) CHECK(t4(5, 5) == 1.f); CHECK(sum(t4) == 72); - t4(trange(2, 4), trange(2, 4)) = 10 + counter(); + t4(trange(2, 4), trange(2, 4)) = 10 + counter(0, 2, 1); CHECK(t4(2, 2) == 10.f); CHECK(t4(2, 3) == 11.f); CHECK(t4(3, 2) == 12.f); CHECK(t4(3, 3) == 13.f); + CHECK(sum(t4) == 78); } TEST(tensor_broadcast) { - using internal_generic::can_assign_from; - using internal_generic::common_shape; - using internal_generic::same_layout; - - CHECK(common_shape(shape{ 1, 5 }, shape{ 5, 1 }) == shape{ 5, 5 }); - CHECK(common_shape(shape{ 5 }, shape{ 5, 1 }) == shape{ 5, 5 }); - CHECK(common_shape(shape{ 1, 1, 1 }, shape{ 2, 5, 1 }) == shape{ 2, 5, 1 }); - CHECK(common_shape(shape{ 1 }, shape{ 2, 5, 7 }) == shape{ 2, 5, 7 }); - - CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 4 })); - CHECK(!can_assign_from(shape{ 1, 4 }, shape{ 4, 1 })); - CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 1 })); - CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1 })); - CHECK(can_assign_from(shape{ 1, 4 }, shape{})); - tensor<float, 2> t1{ shape{ 1, 5 }, { 1.f, 2.f, 3.f, 4.f, 5.f } }; tensor<float, 2> t2{ shape{ 5, 1 }, { 10.f, 20.f, 30.f, 40.f, 50.f } }; tensor<float, 1> t4{ shape{ 5 }, { 1.f, 2.f, 3.f, 4.f, 5.f } }; @@ -211,15 +155,6 @@ TEST(tensor_broadcast) tensor<float, 2> t5 = tapply(t4, t2, fn::add{}); // tensor<float, 2> t5 = t4 + t2; CHECK(t5 == tresult); - - CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4 })); - CHECK(same_layout(shape{ 1, 2, 3, 4 }, shape{ 2, 3, 4 })); - CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 1, 1, 3, 4 })); - CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4, 1 })); - CHECK(same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 3, 4, 1 })); - - CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 1 })); - CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 0 })); } } // namespace CMT_ARCH_NAME @@ -375,8 +310,9 @@ TEST(tensor_counter) { { 102.0, 112.0, 122.0, 132.0 } }, } }); } - -DTEST(tensor_dims) +namespace tests +{ +TEST(tensor_dims) { tensor<double, 6> t12{ shape{ 2, 3, 4, 5, 6, 7 } }; @@ -387,6 +323,7 @@ DTEST(tensor_dims) CHECK(t12.reduce(std::plus<>{}, 0) == 1648888920); } +} // namespace tests TEST(vec_from_cvals) { @@ -397,13 +334,14 @@ TEST(vec_from_cvals) TEST(xfunction_test) { - auto f = xfunction{ xwitharguments{ 3.f, 4.f }, std::plus<>{} }; + auto f = expression_function{ expression_with_arguments{ 3.f, 4.f }, std::plus<>{} }; float v; process(v, f); CHECK(v == 7.f); - static_assert(std::is_same_v<decltype(f), xfunction<std::plus<>, float, float>>); + static_assert(std::is_same_v<decltype(f), expression_function<std::plus<>, float, float>>); - auto f2 = xfunction{ xwitharguments{ 10.f, std::array{ 1.f, 2.f, 3.f, 4.f, 5.f } }, std::plus<>{} }; + auto f2 = expression_function{ expression_with_arguments{ 10.f, std::array{ 1.f, 2.f, 3.f, 4.f, 5.f } }, + std::plus<>{} }; std::array<float, 5> v2; process(v2, f2); CHECK(v2 == std::array{ 11.f, 12.f, 13.f, 14.f, 15.f }); @@ -413,9 +351,11 @@ TEST(xfunction_test) process(v3, f3); CHECK(v3 == std::array{ 11.f, 12.f, 13.f, 14.f, 15.f }); - auto f4 = scalar(0) + std::array<std::array<float, 1>, 5>{ - { { { 100.f } }, { { 200.f } }, { { 300.f } }, { { 400.f } }, { { 500.f } } } - } + std::array{ 1.f, 2.f, 3.f, 4.f, 5.f }; + auto f4 = scalar(0) + + std::array<std::array<float, 1>, 5>{ + { { { 100.f } }, { { 200.f } }, { { 300.f } }, { { 400.f } }, { { 500.f } } } + } + + std::array{ 1.f, 2.f, 3.f, 4.f, 5.f }; std::array<std::array<float, 5>, 5> v4; CHECK(expression_traits<decltype(f4)>::shapeof(f4) == shape{ 5, 5 }); @@ -435,9 +375,9 @@ TEST(xfunction_test2) } template <typename Type, index_t Dims> -KFR_FUNCTION xcounter<Type, Dims> debug_counter(uint64_t scale = 10) +KFR_FUNCTION expression_counter<Type, Dims> debug_counter(uint64_t scale = 10) { - xcounter<Type, Dims> result; + expression_counter<Type, Dims> result; result.start = 0; uint64_t val = 1; for (size_t i = 0; i < Dims; i++) @@ -554,10 +494,10 @@ static void test_reshape(const tensor<T, dims1>& t1, const tensor<T, dims>&... t test_reshape(ts...); } -TEST(xreshape) +TEST(expression_reshape) { std::array<float, 12> x; - process(reshape(x, shape{ 3, 4 }), xcounter<float, 2>{ 0, { 10, 1 } }); + process(reshape(x, shape{ 3, 4 }), expression_counter<float, 2>{ 0, { 10, 1 } }); CHECK(x == std::array<float, 12>{ { 0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23 } }); test_reshape(tensor<float, 1>{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }, // @@ -642,7 +582,7 @@ extern "C" __declspec(dllexport) void assembly_test9(int64_t* dst, size_t stride } constexpr inline index_t rank = 1; extern "C" __declspec(dllexport) void assembly_test10(tensor<double, rank>& t12, - const xcounter<double, rank>& ctr) + const expression_counter<double, rank>& ctr) { process(t12, ctr); } @@ -650,8 +590,8 @@ extern "C" __declspec(dllexport) void assembly_test11(f64x2& x, u64x2 y) { x = y extern "C" __declspec(dllexport) void assembly_test12( std::array<std::array<uint32_t, 4>, 4>& x, - const xfunction<std::plus<>, std::array<std::array<uint32_t, 1>, 4>&, - std::array<std::array<uint32_t, 4>, 1>&>& y) + const expression_function<std::plus<>, std::array<std::array<uint32_t, 1>, 4>&, + std::array<std::array<uint32_t, 4>, 1>&>& y) { process(x, y); } @@ -667,13 +607,13 @@ using array2d = std::array<std::array<T, N2>, N1>; extern "C" __declspec(dllexport) void assembly_test14(std::array<float, 32>& x, const std::array<float, 32>& y) { - process(x, x_reverse(y)); + process(x, reverse(y)); } extern "C" __declspec(dllexport) void assembly_test15(array2d<float, 32, 32>& x, const array2d<float, 32, 32>& y) { - process(x, x_reverse(y)); + process(x, reverse(y)); } extern "C" __declspec(dllexport) void assembly_test16a(array2d<double, 8, 2>& x, @@ -689,28 +629,28 @@ extern "C" __declspec(dllexport) void assembly_test16b(array2d<double, 8, 2>& x, extern "C" __declspec(dllexport) void assembly_test17a(const tensor<double, 2>& x, const tensor<double, 2>& y) { - xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} }; + expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} }; process<8, 0>(x, ysqr); } extern "C" __declspec(dllexport) void assembly_test17b(const tensor<double, 2>& x, const tensor<double, 2>& y) { - xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} }; + expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} }; process<2, 1>(x, ysqr); } extern "C" __declspec(dllexport) void assembly_test18a(const tensor<double, 2>& x, const tensor<double, 2>& y) { - xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} }; - process<8, 0>(fixshape(x, fixed_shape<8, 2>{}), fixshape(ysqr, fixed_shape<8, 2>{})); + expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} }; + process<8, 0>(fixshape(x, fixed_shape<8, 2>), fixshape(ysqr, fixed_shape<8, 2>)); } extern "C" __declspec(dllexport) void assembly_test18b(const tensor<double, 2>& x, const tensor<double, 2>& y) { - xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} }; - process<2, 1>(fixshape(x, fixed_shape<8, 2>{}), fixshape(ysqr, fixed_shape<8, 2>{})); + expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} }; + process<2, 1>(fixshape(x, fixed_shape<8, 2>), fixshape(ysqr, fixed_shape<8, 2>)); } extern "C" __declspec(dllexport) void assembly_test19(const tensor<double, 2>& x, - const xreshape<tensor<double, 2>, 2>& y) + const expression_reshape<tensor<double, 2>, 2>& y) { process(x, y); } @@ -728,6 +668,12 @@ extern "C" __declspec(dllexport) shape<4> assembly_test21(const shape<4>& x, siz { return x.from_flat(fl); } +extern "C" __declspec(dllexport) float assembly_test22(const std::array<float, 440>& x, + const std::array<float, 440>& y) +{ + return dotproduct(x, y); +} +extern "C" __declspec(dllexport) float assembly_test23(const std::array<float, 440>& x) { return rms(x); } #endif struct val @@ -750,15 +696,16 @@ val& lvint_func() static val v; return v; } -TEST(xwitharguments) +TEST(expression_with_arguments) { - xfunction fn1 = xfunction{ xwitharguments{ rvint_func() }, fn::add{} }; + expression_function fn1 = expression_function{ expression_with_arguments{ rvint_func() }, fn::add{} }; static_assert(std::is_same_v<decltype(fn1)::nth<0>, val>); - xfunction fn2 = xfunction{ xwitharguments{ lvint_func() }, fn::add{} }; + expression_function fn2 = expression_function{ expression_with_arguments{ lvint_func() }, fn::add{} }; static_assert(std::is_same_v<decltype(fn2)::nth<0>, val&>); - xfunction fn3 = xfunction{ xwitharguments{ std::as_const(lvint_func()) }, fn::add{} }; + expression_function fn3 = + expression_function{ expression_with_arguments{ std::as_const(lvint_func()) }, fn::add{} }; static_assert(std::is_same_v<decltype(fn3)::nth<0>, const val&>); } @@ -804,13 +751,13 @@ TEST(complex_tensors) tensor<complex<float>, 1> t1{ complex<float>(0, -1), }; - CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(0, 1) }, fn::mul{} }) == + CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(0, 1) }, fn::mul{} }) == tensor<complex<float>, 1>{ complex<float>(1, 0) }); - CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(1, 0) }, fn::mul{} }) == + CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(1, 0) }, fn::mul{} }) == tensor<complex<float>, 1>{ complex<float>(0, -1) }); - CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(0, -1) }, fn::mul{} }) == + CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(0, -1) }, fn::mul{} }) == tensor<complex<float>, 1>{ complex<float>(-1, 0) }); - CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(-1, 0) }, fn::mul{} }) == + CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(-1, 0) }, fn::mul{} }) == tensor<complex<float>, 1>{ complex<float>(0, 1) }); } @@ -829,12 +776,6 @@ TEST(from_ilist) CHECK(t4 == tensor<float, 3>(shape{ 2, 2, 2 }, { 10, 20, 30, 40, 50, 60, 70, 80 })); } -TEST(enumerate) -{ - CHECK(enumerate(vec_shape<int, 4>{}, 4) == vec{ 0, 4, 8, 12 }); - CHECK(enumerate(vec_shape<int, 8>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18, 21 }); - CHECK(enumerate(vec_shape<int, 7>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18 }); -} } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tests/unit/dsp/biquad.cpp b/tests/unit/dsp/biquad.cpp @@ -0,0 +1,113 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/reduce.hpp> +#include <kfr/base/simd_expressions.hpp> +#include <kfr/base/univector.hpp> +#include <kfr/dsp/biquad.hpp> +#include <kfr/dsp/biquad_design.hpp> +#include <kfr/dsp/special.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +template <typename T, typename... Ts, univector_tag Tag> +inline const univector<T, Tag>& choose_array(const univector<T, Tag>& array, const univector<Ts, Tag>&...) +{ + return array; +} + +template <typename T, typename T2, typename... Ts, univector_tag Tag, KFR_ENABLE_IF(!is_same<T, T2>)> +inline const univector<T, Tag>& choose_array(const univector<T2, Tag>&, const univector<Ts, Tag>&... arrays) +{ + return choose_array<T>(arrays...); +} + +TEST(biquad_lowpass1) +{ + testo::matrix(named("type") = ctypes_t<float, double>{}, + [](auto type) + { + using T = typename decltype(type)::type; + + const biquad_params<T> bq = biquad_lowpass<T>(0.1, 0.7); + + constexpr size_t size = 32; + + const univector<float, size> test_vector_f32{ + +0x8.9bce2p-7, +0xd.8383ep-6, +0x8.f908dp-5, +0xe.edc21p-6, +0x9.ae104p-6, + +0x9.dcc24p-7, +0xd.50584p-9, -0xf.2668p-13, -0xd.09ca1p-10, -0xe.15995p-10, + -0xa.b90d2p-10, -0xc.edea4p-11, -0xb.f14eap-12, -0xc.2cb44p-14, +0xb.4a4dep-15, + +0xb.685dap-14, +0xa.b181fp-14, +0xf.0cb2bp-15, +0x8.695d6p-15, +0xd.bedd4p-17, + +0xf.5474p-20, -0xd.bb266p-19, -0x9.63ca1p-18, -0xf.ca567p-19, -0xa.5231p-19, + -0xa.9e934p-20, -0xe.ab52p-22, +0xa.3c4cp-26, +0xd.721ffp-23, +0xe.ccc1ap-23, + +0xb.5f248p-23, +0xd.d2c9ap-24, + }; + + const univector<double, size> test_vector_f64{ + +0x8.9bce2bf3663e8p-7, +0xd.8384010fdf1dp-6, +0x8.f908e7a36df6p-5, + +0xe.edc2332a6d0bp-6, +0x9.ae104af1da9ap-6, +0x9.dcc235ef68e7p-7, + +0xd.5057ee425e05p-9, -0xf.266e42a99aep-13, -0xd.09cad73642208p-10, + -0xe.1599f32a83dp-10, -0xa.b90d8910a117p-10, -0xc.edeaabb890948p-11, + -0xb.f14edbb55383p-12, -0xc.2cb39b86f2dap-14, +0xb.4a506ecff055p-15, + +0xb.685edfdb55358p-14, +0xa.b182e32f8e298p-14, +0xf.0cb3dfd894b2p-15, + +0x8.695df725b4438p-15, +0xd.beddc3606b9p-17, +0xf.547004d20874p-20, + -0xd.bb29b25b49b6p-19, -0x9.63cb9187da1dp-18, -0xf.ca588634fc618p-19, + -0xa.52322d320da78p-19, -0xa.9e9420154e4p-20, -0xe.ab51f7b0335ap-22, + +0xa.3c6479980e1p-26, +0xd.7223836599fp-23, +0xe.ccc47ddd18678p-23, + +0xb.5f265b1be1728p-23, +0xd.d2cb83f8483f8p-24, + }; + + const univector<T, size> ir = biquad(bq, unitimpulse<T>()); + + CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0); + }); +} + +TEST(biquad_lowpass2) +{ + testo::matrix(named("type") = ctypes_t<float, double>{}, + [](auto type) + { + using T = typename decltype(type)::type; + + const biquad_params<T> bq = biquad_lowpass<T>(0.45, 0.2); + + constexpr size_t size = 32; + + const univector<float, size> test_vector_f32{ + +0x8.ce416p-4, +0x8.2979p-4, -0x8.a9d04p-7, +0xe.aeb3p-11, +0x8.204f8p-13, + -0x8.20d78p-12, +0x8.3379p-12, -0xf.83d81p-13, +0xe.8b5c4p-13, -0xd.9ddadp-13, + +0xc.bedfcp-13, -0xb.ee123p-13, +0xb.2a9e5p-13, -0xa.73ac4p-13, +0x9.c86f6p-13, + -0x9.2828p-13, +0x8.92229p-13, -0x8.05b7p-13, +0xf.048ffp-14, -0xe.0e849p-14, + +0xd.28384p-14, -0xc.50a9p-14, +0xb.86e56p-14, -0xa.ca0b6p-14, +0xa.19476p-14, + -0x9.73d38p-14, +0x8.d8f64p-14, -0x8.48024p-14, +0xf.80aa2p-15, -0xe.82ad8p-15, + +0xd.94f22p-15, -0xc.b66d9p-15, + }; + + const univector<double, size> test_vector_f64{ + +0x8.ce416c0d31e88p-4, +0x8.2978efe51dafp-4, -0x8.a9d088b81da6p-7, + +0xe.aeb56c029358p-11, +0x8.20492639873ap-13, -0x8.20d4e21aab538p-12, + +0x8.3376b2d53b4a8p-12, -0xf.83d3d1c17343p-13, +0xe.8b584f0dd5ac8p-13, + -0xd.9dd740ceaacf8p-13, +0xc.bedc85e7a621p-13, -0xb.ee0f472bf8968p-13, + +0xb.2a9baed1fe6cp-13, -0xa.73a9d1670f4ep-13, +0x9.c86d29d297798p-13, + -0x9.2825f4d894088p-13, +0x8.9220a956d651p-13, -0x8.05b539fdd79e8p-13, + +0xf.048cb5194cfa8p-14, -0xe.0e819fa128938p-14, +0xd.2835957d684cp-14, + -0xc.50a69c2a8dc18p-14, +0xb.86e33bbaf3cbp-14, -0xa.ca097058af2cp-14, + +0xa.1945ad1703dcp-14, -0x9.73d1eef7d8b68p-14, +0x8.d8f4df1bb3efp-14, + -0x8.48010323c6f7p-14, +0xf.80a7f5baeeb2p-15, -0xe.82ab94bb68a8p-15, + +0xd.94f05f80af008p-15, -0xc.b66c0799b21a8p-15, + }; + + const univector<T, size> ir = biquad(bq, unitimpulse<T>()); + + CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0); + }); +} +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/dsp/biquad_design.cpp b/tests/unit/dsp/biquad_design.cpp @@ -0,0 +1,7 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/dsp/biquad_design.hpp> diff --git a/tests/unit/dsp/dsp.cpp b/tests/unit/dsp/dsp.cpp @@ -0,0 +1,7 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/dsp.hpp> diff --git a/tests/unit/dsp/ebu.cpp b/tests/unit/dsp/ebu.cpp @@ -0,0 +1,270 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/dsp/ebu.hpp> +#include <kfr/dsp/oscillators.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +struct TestFragment +{ + float gain; // dB + float duration; // seconds + float frequency; // Hz +}; + +struct TestFragmentMultichannel +{ + float gain_L_R; // dB + float gain_C; // dB + float gain_Ls_Rs; // dB + float duration; // seconds + float frequency; // Hz +}; + +template <typename T> +static void ebu_test_stereo(int sample_rate, const std::initializer_list<TestFragment>& fragments, T refM, + T refS, T refI, T refLRA) +{ + ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right }); + + size_t total_length = 0; + for (const TestFragment& f : fragments) + { + total_length += static_cast<size_t>(f.duration * sample_rate); + } + + univector<T> left_right(total_length); + size_t pos = 0; + for (const TestFragment& f : fragments) + { + const size_t len = static_cast<size_t>(f.duration * sample_rate); + left_right.slice(pos, len) = dB_to_amp(f.gain) * sinenorm(phasor<float>(f.frequency, sample_rate)); + pos += len; + } + + for (size_t i = 0; i < total_length / loudness.packet_size(); i++) + { + loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()), + left_right.slice(i * loudness.packet_size(), loudness.packet_size()) }); + } + T M, S, I, RL, RH; + loudness.get_values(M, S, I, RL, RH); + if (!std::isnan(refM)) + { + testo::scope s(as_string("M = ", fmt<'f', -1, 2>(M))); + CHECK(std::abs(M - refM) < 0.05f); + } + if (!std::isnan(refS)) + { + testo::scope s(as_string("S = ", fmt<'f', -1, 2>(S))); + CHECK(std::abs(S - refS) < 0.05f); + } + if (!std::isnan(refI)) + { + testo::scope s(as_string("I = ", fmt<'f', -1, 2>(I))); + CHECK(std::abs(I - refI) < 0.05f); + } + if (!std::isnan(refLRA)) + { + testo::scope s(as_string("LRA = ", fmt<'f', -1, 2>((RH - RL)))); + CHECK(std::abs((RH - RL) - refLRA) < 0.05f); + } +} + +template <typename T> +static void ebu_test_multichannel(int sample_rate, + const std::initializer_list<TestFragmentMultichannel>& fragments, T refM, + T refS, T refI, T refLRA) +{ + ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right, Speaker::Center, Speaker::LeftSurround, + Speaker::RightSurround }); + + size_t total_length = 0; + for (const TestFragmentMultichannel& f : fragments) + { + total_length += static_cast<size_t>(f.duration * sample_rate); + } + + univector<T> left_right(total_length); + univector<T> center(total_length); + univector<T> surround(total_length); + size_t pos = 0; + for (const TestFragmentMultichannel& f : fragments) + { + const size_t len = static_cast<size_t>(f.duration * sample_rate); + left_right.slice(pos, len) = + dB_to_amp(f.gain_L_R) * sinenorm(phasor<float>(f.frequency, sample_rate)); + center.slice(pos, len) = dB_to_amp(f.gain_C) * sinenorm(phasor<float>(f.frequency, sample_rate)); + surround.slice(pos, len) = + dB_to_amp(f.gain_Ls_Rs) * sinenorm(phasor<float>(f.frequency, sample_rate)); + pos += len; + } + + for (size_t i = 0; i < total_length / loudness.packet_size(); i++) + { + loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()), + left_right.slice(i * loudness.packet_size(), loudness.packet_size()), + center.slice(i * loudness.packet_size(), loudness.packet_size()), + surround.slice(i * loudness.packet_size(), loudness.packet_size()), + surround.slice(i * loudness.packet_size(), loudness.packet_size()) }); + } + T M, S, I, RL, RH; + loudness.get_values(M, S, I, RL, RH); + if (!std::isnan(refM)) + { + testo::scope s(as_string("M = ", fmt<'f', -1, 2>(M))); + CHECK(std::abs(M - refM) < 0.05f); + } + if (!std::isnan(refS)) + { + testo::scope s(as_string("S = ", fmt<'f', -1, 2>(S))); + CHECK(std::abs(S - refS) < 0.05f); + } + if (!std::isnan(refI)) + { + testo::scope s(as_string("I = ", fmt<'f', -1, 2>(I))); + CHECK(std::abs(I - refI) < 0.05f); + } + if (!std::isnan(refLRA)) + { + testo::scope s(as_string("LRA = ", fmt<'f', -1, 2>((RH - RL)))); + CHECK(std::abs((RH - RL) - refLRA) < 0.05f); + } +} + +TEST(ebu_stereo_1_and_2) +{ + testo::matrix(named("type") = ctypes_t<float, double>{}, + named("sample_rate") = std::vector<int>{ 44100, 48000 }, + [](auto type, int sample_rate) + { + using T = typename decltype(type)::type; + + ebu_test_stereo<T>(sample_rate, { { -23.f, 20.f, 1000.f } }, -23.f, -23.f, -23.f, NAN); + ebu_test_stereo<T>(sample_rate, { { -33.f, 20.f, 1000.f } }, -33.f, -33.f, -33.f, NAN); + }); +} + +TEST(ebu_stereo_3_4_and_5) +{ + testo::matrix( + named("type") = ctypes_t<float, double>{}, named("sample_rate") = std::vector<int>{ 44100, 48000 }, + [](auto type, int sample_rate) + { + using T = typename decltype(type)::type; + + ebu_test_stereo<T>(sample_rate, + { { -36.f, 10.f, 1000.f }, { -23.f, 60.f, 1000.f }, { -36.f, 10.f, 1000.f } }, + NAN, NAN, -23.f, NAN); + ebu_test_stereo<T>(sample_rate, + { { -72.f, 10.f, 1000.f }, + { -36.f, 10.f, 1000.f }, + { -23.f, 60.f, 1000.f }, + { -36.f, 10.f, 1000.f }, + { -72.f, 10.f, 1000.f } }, + NAN, NAN, -23.f, NAN); + }); +} + +TEST(ebu_multichannel_6) +{ + testo::matrix(named("type") = ctypes_t<float, double>{}, + named("sample_rate") = std::vector<int>{ 44100, 48000 }, + [](auto type, int sample_rate) + { + using T = typename decltype(type)::type; + + ebu_test_multichannel<T>(sample_rate, { { -28.f, -24.f, -30.f, 20.f, 1000.f } }, NAN, + NAN, -23.f, NAN); + }); +} + +TEST(ebu_stereo_9) +{ + testo::matrix(named("type") = ctypes_t<float, double>{}, + named("sample_rate") = std::vector<int>{ 44100, 48000 }, + [](auto type, int sample_rate) + { + using T = typename decltype(type)::type; + + ebu_test_stereo<T>(sample_rate, + { { -20.f, 1.34f, 1000.f }, + { -30.f, 1.66f, 1000.f }, + { -20.f, 1.34f, 1000.f }, + { -30.f, 1.66f, 1000.f }, + { -20.f, 1.34f, 1000.f }, + { -30.f, 1.66f, 1000.f }, + { -20.f, 1.34f, 1000.f }, + { -30.f, 1.66f, 1000.f }, + { -20.f, 1.34f, 1000.f }, + { -30.f, 1.66f, 1000.f } }, + NAN, -23.f, NAN, NAN); + }); +} + +TEST(ebu_stereo_12) +{ + testo::matrix( + named("type") = ctypes_t<float, double>{}, named("sample_rate") = std::vector<int>{ 44100, 48000 }, + [](auto type, int sample_rate) + { + using T = typename decltype(type)::type; + + ebu_test_stereo<T>(sample_rate, + { { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, + { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, + { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f } }, + -23.f, NAN, NAN, NAN); + }); +} + +TEST(ebu_lra_1_2_3_and_4) +{ + testo::matrix(named("type") = ctypes_t<float, double>{}, + named("sample_rate") = std::vector<int>{ 44100, 48000 }, + [](auto type, int sample_rate) + { + using T = typename decltype(type)::type; + + ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -30.f, 20.f, 1000.f } }, + NAN, NAN, NAN, 10.f); + + ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -15.f, 20.f, 1000.f } }, + NAN, NAN, NAN, 5.f); + + ebu_test_stereo<T>(sample_rate, { { -40.f, 20.f, 1000.f }, { -20.f, 20.f, 1000.f } }, + NAN, NAN, NAN, 20.f); + + ebu_test_stereo<T>(sample_rate, + { { -50.f, 20.f, 1000.f }, + { -35.f, 20.f, 1000.f }, + { -20.f, 20.f, 1000.f }, + { -35.f, 20.f, 1000.f }, + { -50.f, 20.f, 1000.f } }, + NAN, NAN, NAN, 15.f); + }); +} +} // namespace CMT_ARCH_NAME + +} // namespace kfr diff --git a/tests/unit/dsp/fir.cpp b/tests/unit/dsp/fir.cpp @@ -0,0 +1,234 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <complex> +#include <kfr/dsp/fir.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +TEST(fir) +{ +#ifdef CMT_COMPILER_IS_MSVC + // testo::matrix causes error in MSVC + { + using T = float; + + const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5); + const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; + + CHECK_EXPRESSION(fir(data, taps), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + + CHECK_EXPRESSION(short_fir(data, taps), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + } + { + using T = double; + + const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5); + const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; + + CHECK_EXPRESSION(fir(data, taps), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + + CHECK_EXPRESSION(short_fir(data, taps), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + } +#else + testo::matrix(named("type") = ctypes_t<float +#ifdef CMT_NATIVE_F64 + , + double +#endif + >{}, + [](auto type) + { + using T = typename decltype(type)::type; + + const univector<T, 100> data = + counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5); + const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; + + CHECK_EXPRESSION(fir(data, taps), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + + fir_state<T> state(taps.ref()); + + CHECK_EXPRESSION(fir(state, data), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + + CHECK_EXPRESSION(short_fir(data, taps), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + + short_fir_state<9, T> state2(taps); + + CHECK_EXPRESSION(short_fir<taps.size()>(state2, data), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0) * taps[i]; + return result; + }); + + CHECK_EXPRESSION(moving_sum<taps.size()>(data), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0); + return result; + }); + + moving_sum_state<T, 131> msstate1; + + CHECK_EXPRESSION(moving_sum(msstate1, data), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < msstate1.delayline.size(); i++) + result += data.get(index - i, 0); + return result; + }); + + moving_sum_state<T> msstate2(133); + + CHECK_EXPRESSION(moving_sum(msstate2, data), 100, + [&](size_t index) -> T + { + T result = 0; + for (size_t i = 0; i < msstate2.delayline.size(); i++) + result += data.get(index - i, 0); + return result; + }); + }); +#endif +} + +#ifdef CMT_NATIVE_F64 +TEST(fir_different) +{ + const univector<float, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f); + // const univector<double, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; + const univector<double, 4> taps{ 1, 2, 3, 4 }; + + CHECK_EXPRESSION(fir(data, taps), 100, + [&](size_t index) -> float + { + double result = 0.0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0.0) * taps[i]; + return float(result); + }); + + CHECK_EXPRESSION(short_fir(data, taps), 100, + [&](size_t index) -> float + { + double result = 0.0; + for (size_t i = 0; i < taps.size(); i++) + result += data.get(index - i, 0.0) * taps[i]; + return float(result); + }); +} +#endif + +#ifdef KFR_STD_COMPLEX +template <typename T> +inline std::complex<T> to_std(const std::complex<T>& c) +{ + return c; +} +template <typename T> +inline std::complex<T> from_std(const std::complex<T>& c) +{ + return c; +} +#else +template <typename T> +inline std::complex<T> to_std(const kfr::complex<T>& c) +{ + return { c.real(), c.imag() }; +} + +template <typename T> +inline kfr::complex<T> from_std(const std::complex<T>& c) +{ + return { c.real(), c.imag() }; +} +#endif + +TEST(fir_complex) +{ + const univector<complex<float>, 100> data = + counter() * complex<float>{ 0.f, 1.f } + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f); + const univector<float, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 }; + + CHECK_EXPRESSION(fir(data, taps), 100, + [&](size_t index) -> complex<float> + { + std::complex<float> result = 0.0; + for (size_t i = 0; i < taps.size(); i++) + result = result + to_std(data.get(index - i, 0.0)) * taps[i]; + return from_std(result); + }); + + CHECK_EXPRESSION(short_fir(data, taps), 100, + [&](size_t index) -> complex<float> + { + std::complex<float> result = 0.0; + for (size_t i = 0; i < taps.size(); i++) + result = result + to_std(data.get(index - i, 0.0)) * taps[i]; + return from_std(result); + }); +} +} // namespace CMT_ARCH_NAME + +} // namespace kfr diff --git a/tests/unit/dsp/sample_rate_conversion.cpp b/tests/unit/dsp/sample_rate_conversion.cpp @@ -0,0 +1,51 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/math_expressions.hpp> +#include <kfr/dsp/oscillators.hpp> +#include <kfr/dsp/sample_rate_conversion.hpp> +#include <kfr/math/sin_cos.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +TEST(resampler_test) +{ + const int in_sr = 44100; + const int out_sr = 48000; + const int freq = 100; + auto resampler = sample_rate_converter<fbase>(resample_quality::draft, out_sr, in_sr); + double delay = resampler.get_fractional_delay(); + univector<fbase> out(out_sr / 10); + univector<fbase> in = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10); + univector<fbase> ref = truncate( + sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))), + out_sr / 10); + resampler.process(out, in); + + CHECK(rms(slice(out - ref, static_cast<size_t>(ceil(delay * 2)))) < 0.005f); +} +TEST(resampler_test_complex) +{ + using type = complex<fbase>; + const int in_sr = 44100; + const int out_sr = 48000; + const int freq = 100; + auto resampler = sample_rate_converter<type>(resample_quality::draft, out_sr, in_sr); + double delay = resampler.get_fractional_delay(); + univector<type> out(out_sr / 10); + univector<type> in = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10); + univector<type> ref = truncate( + sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))), + out_sr / 10); + resampler.process(out, in); + + CHECK(rms(cabs(slice(out - ref, static_cast<size_t>(ceil(delay * 2))))) < 0.005f); +} +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/dsp/units.cpp b/tests/unit/dsp/units.cpp @@ -0,0 +1,58 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/dsp/units.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +TEST(note_to_hertz) +{ + testo::eplison_scope<void> eps(2000); + CHECK(kfr::note_to_hertz(60) == fbase(261.6255653005986346778499935233)); + CHECK(kfr::note_to_hertz(pack(60)) == pack(fbase(261.6255653005986346778499935233))); + + CHECK(kfr::note_to_hertz(69) == fbase(440.0)); + CHECK(kfr::note_to_hertz(pack(69)) == pack(fbase(440))); +} + +TEST(hertz_to_note) +{ + testo::eplison_scope<void> eps(1000); + CHECK(kfr::hertz_to_note(261.6255653005986346778499935233) == fbase(60)); + CHECK(kfr::hertz_to_note(pack(261.6255653005986346778499935233)) == pack(fbase(60))); + + CHECK(kfr::hertz_to_note(440) == fbase(69)); + CHECK(kfr::hertz_to_note(pack(440)) == pack(fbase(69))); +} + +TEST(amp_to_dB) +{ + testo::eplison_scope<void> eps(1000); + + CHECK(kfr::amp_to_dB(fbase(2.0)) == fbase(6.0205999132796239042747778944899)); + CHECK(kfr::amp_to_dB(fbase(-2.0)) == fbase(6.0205999132796239042747778944899)); + CHECK(kfr::amp_to_dB(fbase(1.0)) == fbase(0)); + CHECK(kfr::amp_to_dB(fbase(-1.0)) == fbase(0)); + CHECK(kfr::amp_to_dB(fbase(0.5)) == fbase(-6.0205999132796239042747778944899)); + CHECK(kfr::amp_to_dB(fbase(-0.5)) == fbase(-6.0205999132796239042747778944899)); + CHECK(kfr::amp_to_dB(fbase(0.0)) == fbase(-HUGE_VAL)); +} + +TEST(dB_to_amp) +{ + testo::eplison_scope<void> eps(1000); + + CHECK(kfr::dB_to_amp(fbase(-HUGE_VAL)) == fbase(0.0)); + CHECK(kfr::dB_to_amp(fbase(0.0)) == fbase(1.0)); + CHECK(kfr::dB_to_amp(fbase(6.0205999132796239042747778944899)) == fbase(2.0)); + CHECK(kfr::dB_to_amp(fbase(-6.0205999132796239042747778944899)) == fbase(0.5)); +} + +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/dsp/window.cpp b/tests/unit/dsp/window.cpp @@ -0,0 +1,189 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/base/reduce.hpp> +#include <kfr/base/simd_expressions.hpp> +#include <kfr/base/tensor.hpp> +#include <kfr/dsp/window.hpp> +#include <kfr/io/tostring.hpp> + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ + +const char* wins[] = { + "", + "rectangular ", + "triangular ", + "bartlett ", + "cosine ", + "hann ", + "bartlett_hann ", + "hamming ", + "bohman ", + "blackman ", + "blackman_harris", + "kaiser ", + "flattop ", + "gaussian ", + "lanczos ", + "cosine_np ", +}; + +template <window_type type, typename T> +void win(size_t len, T arg, window_symmetry sym, univector<T> ref) +{ + univector<T> calc = render(window(len, cval_t<window_type, type>{}, arg, sym, ctype<T>)); + testo::scope sc(as_string("win=", wins[static_cast<int>(type)], " len=", len, " sym=", + sym == window_symmetry::symmetric, "\n calc=", calc, "\n ref =", ref)); + CHECK(rms(calc - ref) < 0.00001f); +} + +TEST(window) +{ + using w = window_type; + using s = window_symmetry; + using u = univector<f32>; + // clang-format + win<w::rectangular, f32>(7, 0.0, s::symmetric, u{ 1, 1, 1, 1, 1, 1, 1 }); + win<w::triangular, f32>(7, 0.0, s::symmetric, u{ 0.25, 0.5, 0.75, 1., 0.75, 0.5, 0.25 }); + win<w::bartlett, f32>(7, 0.0, s::symmetric, + u{ 0., 0.33333333, 0.66666667, 1., 0.66666667, 0.33333333, 0. }); + win<w::cosine, f32>(7, 0.0, s::symmetric, u{ 0, 0.5, 0.866025, 1, 0.866025, 0.5, 0 }); + win<w::hann, f32>(7, 0.0, s::symmetric, u{ 0., 0.25, 0.75, 1., 0.75, 0.25, 0. }); + win<w::bartlett_hann, f32>(7, 0.0, s::symmetric, u{ 0., 0.27, 0.73, 1., 0.73, 0.27, 0. }); + win<w::hamming, f32>(7, 0.54, s::symmetric, u{ 0.08, 0.31, 0.77, 1., 0.77, 0.31, 0.08 }); + win<w::bohman, f32>(7, 0.0, s::symmetric, + u{ 0., 0.10899778, 0.60899778, 1., 0.60899778, 0.10899778, 0. }); + win<w::blackman, f32>(7, 0.16, s::symmetric, + u{ -1.38777878e-17, 1.30000000e-01, 6.30000000e-01, 1.00000000e+00, 6.30000000e-01, + 1.30000000e-01, -1.38777878e-17 }); + win<w::blackman_harris, f32>( + 7, 0.0, s::symmetric, + u{ 6.00000e-05, 5.56450e-02, 5.20575e-01, 1.00000e+00, 5.20575e-01, 5.56450e-02, 6.00000e-05 }); + win<w::kaiser, f32>(7, 8.0, s::symmetric, + u{ 0.00233883, 0.1520107, 0.65247867, 1., 0.65247867, 0.1520107, 0.00233883 }); + win<w::flattop, f32>(7, 0.0, s::symmetric, + u{ -4.2105100e-04, -5.1263156e-02, 1.9821053e-01, 1.0000000e+00, 1.9821053e-01, + -5.1263156e-02, -4.2105100e-04 }); + win<w::gaussian, f32>(7, 2.5, s::symmetric, + u{ 0.1006689, 0.36044779, 0.77483743, 1., 0.77483743, 0.36044779, 0.1006689 }); + win<w::lanczos, f32>(7, 0.0, s::symmetric, + u{ -2.8e-08, 0.413497, 0.826993, 1, 0.826993, 0.413497, -2.8e-08 }); + win<w::cosine_np, f32>(7, 0.0, s::symmetric, + u{ 0.22252093, 0.6234898, 0.90096887, 1., 0.90096887, 0.6234898, 0.22252093 }); + + win<w::rectangular, f32>(8, 0.0, s::symmetric, u{ 1, 1, 1, 1, 1, 1, 1, 1 }); + win<w::triangular, f32>(8, 0.0, s::symmetric, + u{ 0.125, 0.375, 0.625, 0.875, 0.875, 0.625, 0.375, 0.125 }); + win<w::bartlett, f32>( + 8, 0.0, s::symmetric, + u{ 0., 0.28571429, 0.57142857, 0.85714286, 0.85714286, 0.57142857, 0.28571429, 0. }); + win<w::cosine, f32>(8, 0.0, s::symmetric, + u{ 0, 0.433884, 0.781832, 0.974928, 0.974928, 0.781831, 0.433883, 0 }); + win<w::hann, f32>(8, 0.0, s::symmetric, + u{ 0., 0.1882551, 0.61126047, 0.95048443, 0.95048443, 0.61126047, 0.1882551, 0. }); + win<w::bartlett_hann, f32>( + 8, 0.0, s::symmetric, + u{ 0., 0.2116453, 0.60170081, 0.92808246, 0.92808246, 0.60170081, 0.2116453, 0. }); + win<w::hamming, f32>( + 8, 0.54, s::symmetric, + u{ 0.08, 0.25319469, 0.64235963, 0.95444568, 0.95444568, 0.64235963, 0.25319469, 0.08 }); + win<w::bohman, f32>(8, 0.0, s::symmetric, + u{ 0., 0.07072475, 0.43748401, 0.91036851, 0.91036851, 0.43748401, 0.07072475, 0. }); + win<w::blackman, f32>(8, 0.16, s::symmetric, + u{ -1.38777878e-17, 9.04534244e-02, 4.59182958e-01, 9.20363618e-01, 9.20363618e-01, + 4.59182958e-01, 9.04534244e-02, -1.38777878e-17 }); + win<w::blackman_harris, f32>(8, 0.0, s::symmetric, + u{ 6.00000000e-05, 3.33917235e-02, 3.32833504e-01, 8.89369772e-01, + 8.89369772e-01, 3.32833504e-01, 3.33917235e-02, 6.00000000e-05 }); + win<w::kaiser, f32>( + 8, 8.0, s::symmetric, + u{ 0.00233883, 0.10919581, 0.48711868, 0.92615774, 0.92615774, 0.48711868, 0.10919581, 0.00233883 }); + win<w::flattop, f32>(8, 0.0, s::symmetric, + u{ -4.21051000e-04, -3.68407812e-02, 1.07037167e-02, 7.80873915e-01, 7.80873915e-01, + 1.07037167e-02, -3.68407812e-02, -4.21051000e-04 }); + win<w::gaussian, f32>( + 8, 2.5, s::symmetric, + u{ 0.09139376, 0.29502266, 0.64438872, 0.9523448, 0.9523448, 0.64438872, 0.29502266, 0.09139376 }); + win<w::lanczos, f32>(8, 0.0, s::symmetric, + u{ -2.8e-08, 0.348411, 0.724101, 0.966766, 0.966766, 0.724101, 0.34841, -2.8e-08 }); + win<w::cosine_np, f32>( + 8, 0.0, s::symmetric, + u{ 0.19509032, 0.55557023, 0.83146961, 0.98078528, 0.98078528, 0.83146961, 0.55557023, 0.19509032 }); + + win<w::rectangular, f32>(7, 0.0, s::periodic, u{ 1, 1, 1, 1, 1, 1, 1 }); + win<w::triangular, f32>(7, 0.0, s::periodic, u{ 0.125, 0.375, 0.625, 0.875, 0.875, 0.625, 0.375 }); + win<w::bartlett, f32>(7, 0.0, s::periodic, + u{ 0., 0.28571429, 0.57142857, 0.85714286, 0.85714286, 0.57142857, 0.28571429 }); + win<w::cosine, f32>(7, 0.0, s::periodic, + u{ 0, 0.433884, 0.781832, 0.974928, 0.974928, 0.781831, 0.433883 }); + win<w::hann, f32>(7, 0.0, s::periodic, + u{ 0., 0.1882551, 0.61126047, 0.95048443, 0.95048443, 0.61126047, 0.1882551 }); + win<w::bartlett_hann, f32>(7, 0.0, s::periodic, + u{ 0., 0.2116453, 0.60170081, 0.92808246, 0.92808246, 0.60170081, 0.2116453 }); + win<w::hamming, f32>(7, 0.54, s::periodic, + u{ 0.08, 0.25319469, 0.64235963, 0.95444568, 0.95444568, 0.64235963, 0.25319469 }); + win<w::bohman, f32>(7, 0.0, s::periodic, + u{ 0., 0.07072475, 0.43748401, 0.91036851, 0.91036851, 0.43748401, 0.07072475 }); + win<w::blackman, f32>(7, 0.16, s::periodic, + u{ -1.38777878e-17, 9.04534244e-02, 4.59182958e-01, 9.20363618e-01, 9.20363618e-01, + 4.59182958e-01, 9.04534244e-02 }); + win<w::blackman_harris, f32>(7, 0.0, s::periodic, + u{ 6.00000000e-05, 3.33917235e-02, 3.32833504e-01, 8.89369772e-01, + 8.89369772e-01, 3.32833504e-01, 3.33917235e-02 }); + win<w::kaiser, f32>( + 7, 8.0, s::periodic, + u{ 0.00233883, 0.10919581, 0.48711868, 0.92615774, 0.92615774, 0.48711868, 0.10919581 }); + win<w::flattop, f32>(7, 0.0, s::periodic, + u{ -4.21051000e-04, -3.68407812e-02, 1.07037167e-02, 7.80873915e-01, 7.80873915e-01, + 1.07037167e-02, -3.68407812e-02 }); + win<w::gaussian, f32>( + 7, 2.5, s::periodic, + u{ 0.09139376, 0.29502266, 0.64438872, 0.9523448, 0.9523448, 0.64438872, 0.29502266 }); + win<w::lanczos, f32>(7, 0.0, s::periodic, + u{ -2.8e-08, 0.348411, 0.724101, 0.966766, 0.966766, 0.724101, 0.34841 }); + win<w::cosine_np, f32>( + 7, 0.0, s::periodic, + u{ 0.19509032, 0.55557023, 0.83146961, 0.98078528, 0.98078528, 0.83146961, 0.55557023 }); + + win<w::rectangular, f32>(8, 0.0, s::periodic, u{ 1, 1, 1, 1, 1, 1, 1, 1 }); + win<w::triangular, f32>(8, 0.0, s::periodic, u{ 0.2, 0.4, 0.6, 0.8, 1., 0.8, 0.6, 0.4 }); + win<w::bartlett, f32>(8, 0.0, s::periodic, u{ 0., 0.25, 0.5, 0.75, 1., 0.75, 0.5, 0.25 }); + win<w::cosine, f32>(8, 0.0, s::periodic, + u{ 0, 0.382683, 0.707107, 0.92388, 1, 0.92388, 0.707107, 0.382683 }); + win<w::hann, f32>(8, 0.0, s::periodic, + u{ 0., 0.14644661, 0.5, 0.85355339, 1., 0.85355339, 0.5, 0.14644661 }); + win<w::bartlett_hann, f32>(8, 0.0, s::periodic, + u{ 0., 0.17129942, 0.5, 0.82870058, 1., 0.82870058, 0.5, 0.17129942 }); + win<w::hamming, f32>(8, 0.54, s::periodic, + u{ 0.08, 0.21473088, 0.54, 0.86526912, 1., 0.86526912, 0.54, 0.21473088 }); + win<w::bohman, f32>(8, 0.0, s::periodic, + u{ 0., 0.04830238, 0.31830989, 0.75540916, 1., 0.75540916, 0.31830989, 0.04830238 }); + win<w::blackman, f32>(8, 0.16, s::periodic, + u{ -1.38777878e-17, 6.64466094e-02, 3.40000000e-01, 7.73553391e-01, 1.00000000e+00, + 7.73553391e-01, 3.40000000e-01, 6.64466094e-02 }); + win<w::blackman_harris, f32>(8, 0.0, s::periodic, + u{ 6.00000000e-05, 2.17358370e-02, 2.17470000e-01, 6.95764163e-01, + 1.00000000e+00, 6.95764163e-01, 2.17470000e-01, 2.17358370e-02 }); + win<w::kaiser, f32>( + 8, 8.0, s::periodic, + u{ 0.00233883, 0.08273982, 0.36897272, 0.78875245, 1., 0.78875245, 0.36897272, 0.08273982 }); + win<w::flattop, f32>(8, 0.0, s::periodic, + u{ -4.21051000e-04, -2.68721933e-02, -5.47368400e-02, 4.44135357e-01, 1.00000000e+00, + 4.44135357e-01, -5.47368400e-02, -2.68721933e-02 }); + win<w::gaussian, f32>( + 8, 2.5, s::periodic, + u{ 0.08465799, 0.24935221, 0.53940751, 0.85699689, 1., 0.85699689, 0.53940751, 0.24935221 }); + win<w::lanczos, f32>(8, 0.0, s::periodic, + u{ -2.8e-08, 0.300105, 0.63662, 0.900316, 1, 0.900316, 0.63662, 0.300105 }); + win<w::cosine_np, f32>(8, 0.0, s::periodic, + u{ 0.17364818, 0.5, 0.76604444, 0.93969262, 1., 0.93969262, 0.76604444, 0.5 }); + // clang-format on +} +} // namespace CMT_ARCH_NAME +} // namespace kfr diff --git a/tests/unit/graphics/graphics.cpp b/tests/unit/graphics/graphics.cpp @@ -0,0 +1,7 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/graphics.hpp> diff --git a/tests/unit/math/math.cpp b/tests/unit/math/math.cpp @@ -0,0 +1,7 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/math.hpp> diff --git a/tests/unit/simd/abs.cpp b/tests/unit/simd/abs.cpp @@ -4,7 +4,7 @@ * See LICENSE.txt for details */ -#include <kfr/math/abs.hpp> +#include <kfr/simd/abs.hpp> #include <kfr/io.hpp> diff --git a/tests/unit/simd/min_max.cpp b/tests/unit/simd/min_max.cpp @@ -4,7 +4,7 @@ * See LICENSE.txt for details */ -#include <kfr/math/min_max.hpp> +#include <kfr/simd/min_max.hpp> #include <kfr/io.hpp> @@ -45,7 +45,8 @@ TEST(absmin) { test_function2( test_catogories::all, [](auto x, auto y) { return kfr::absmin(x, y); }, - [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { + [](auto x, auto y) -> common_type<decltype(x), decltype(y)> + { x = x >= 0 ? x : -x; y = y >= 0 ? y : -y; return x <= y ? x : y; @@ -57,7 +58,8 @@ TEST(absmax) { test_function2( test_catogories::all, [](auto x, auto y) { return kfr::absmax(x, y); }, - [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { + [](auto x, auto y) -> common_type<decltype(x), decltype(y)> + { x = x >= 0 ? x : -x; y = y >= 0 ? y : -y; return x >= y ? x : y; diff --git a/tests/unit/simd/operators.cpp b/tests/unit/simd/operators.cpp @@ -4,6 +4,7 @@ * See LICENSE.txt for details */ +#include <kfr/io/tostring.hpp> #include <kfr/simd/horizontal.hpp> #include <kfr/simd/operators.hpp> @@ -22,7 +23,8 @@ TEST(bnot) { test_function1( test_catogories::vectors, [](auto x) -> decltype(x) { return ~x; }, - [](auto x) -> decltype(x) { + [](auto x) -> decltype(x) + { utype<decltype(x)> u = ~ubitcast(x); return bitcast<decltype(x)>(u); }); @@ -59,19 +61,38 @@ TEST(div) { test_function2( test_catogories::vectors, - [](auto x, auto y) { - return is_safe_division<subtype<decltype(x)>>(x.front(), y.front()) ? x / y : 0; - }, + [](auto x, auto y) + { return is_safe_division<subtype<decltype(x)>>(x.front(), y.front()) ? x / y : 0; }, [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { return is_safe_division(x, y) ? x / y : 0; }); } +struct not_f +{ + template <typename T> + constexpr bool operator()(ctype_t<T>) const + { + return !is_f_class<subtype<T>>; + } +}; +TEST(mod) +{ + test_function2( + test_catogories::vectors, + [](auto x, auto y) + { return is_safe_division<subtype<decltype(x)>>(x.front(), y.front()) ? x % y : 0; }, + [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { + return is_safe_division(x, y) ? x % y : 0; + }, + fn_return_constant<bool, true>{}, not_f{}); +} TEST(bor) { test_function2( test_catogories::vectors, [](auto x, auto y) { return x | y; }, - [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { + [](auto x, auto y) -> common_type<decltype(x), decltype(y)> + { using T = common_type<decltype(x), decltype(y)>; return bitcast<T>(static_cast<utype<T>>(ubitcast(T(x)) | ubitcast(T(y)))); }); @@ -81,7 +102,8 @@ TEST(bxor) { test_function2( test_catogories::vectors, [](auto x, auto y) { return x ^ y; }, - [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { + [](auto x, auto y) -> common_type<decltype(x), decltype(y)> + { using T = common_type<decltype(x), decltype(y)>; return bitcast<T>(static_cast<utype<T>>(ubitcast(T(x)) ^ ubitcast(T(y)))); }); @@ -91,7 +113,8 @@ TEST(band) { test_function2( test_catogories::vectors, [](auto x, auto y) { return x & y; }, - [](auto x, auto y) -> common_type<decltype(x), decltype(y)> { + [](auto x, auto y) -> common_type<decltype(x), decltype(y)> + { using T = common_type<decltype(x), decltype(y)>; return bitcast<T>(static_cast<utype<T>>(ubitcast(T(x)) & ubitcast(T(y)))); }); @@ -102,7 +125,8 @@ TEST(shl) testo::matrix( named("type") = test_catogories::types(test_catogories::vectors), named("value1") = special_values(), named("shift") = std::vector<unsigned>{ 1, 2, 7, 8, 9, 15, 16, 31, 32, 63, 64 }, - [&](auto type, special_value value, unsigned shift) { + [&](auto type, special_value value, unsigned shift) + { using T = typename decltype(type)::type; if (shift < sizeof(subtype<T>)) { @@ -130,7 +154,8 @@ TEST(shr) testo::matrix( named("type") = test_catogories::types(test_catogories::vectors), named("value1") = special_values(), named("shift") = std::vector<unsigned>{ 1, 2, 7, 8, 9, 15, 16, 31, 32, 63, 64 }, - [&](auto type, special_value value, unsigned shift) { + [&](auto type, special_value value, unsigned shift) + { using T = typename decltype(type)::type; if (shift < sizeof(subtype<T>)) { @@ -222,6 +247,10 @@ TEST(matrix) CHECK(m22 * i32x2{ -1, 100 } == i32x2x2{ i32x2{ -1, 200 }, i32x2{ -3, 400 } }); + CHECK(vec{ vec{ 1, 1 }, vec{ 1, 1 } } * vec{ -1, 100 } == vec{ vec{ -1, 100 }, vec{ -1, 100 } }); + CHECK(vec{ vec{ 1, 1 }, vec{ 1, 1 }, vec{ 1, 1 } } * vec{ -1, 100 } == + vec{ vec{ -1, 100 }, vec{ -1, 100 }, vec{ -1, 100 } }); + i32x2 xy{ 10, 20 }; i32x2x2 m{ i32x2{ 1, 2 }, i32x2{ 3, 4 } }; xy = hadd(xy * m); diff --git a/tests/unit/simd/round.cpp b/tests/unit/simd/round.cpp @@ -4,7 +4,7 @@ * See LICENSE.txt for details */ -#include <kfr/math/round.hpp> +#include <kfr/simd/round.hpp> namespace kfr { @@ -14,45 +14,40 @@ TEST(floor) { test_function1( test_catogories::all, [](auto x) { return kfr::floor(x); }, - [](auto x) -> decltype(x) { - return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::floor(x)); - }); + [](auto x) -> decltype(x) + { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::floor(x)); }); } TEST(ceil) { test_function1( test_catogories::all, [](auto x) { return kfr::ceil(x); }, - [](auto x) -> decltype(x) { - return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::ceil(x)); - }); + [](auto x) -> decltype(x) + { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::ceil(x)); }); } TEST(trunc) { test_function1( test_catogories::all, [](auto x) { return kfr::trunc(x); }, - [](auto x) -> decltype(x) { - return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::trunc(x)); - }); + [](auto x) -> decltype(x) + { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::trunc(x)); }); } TEST(round) { test_function1( test_catogories::all, [](auto x) { return kfr::round(x); }, - [](auto x) -> decltype(x) { - return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::round(x)); - }); + [](auto x) -> decltype(x) + { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::round(x)); }); } TEST(fract) { test_function1( test_catogories::all, [](auto x) { return kfr::fract(x); }, - [](auto x) -> decltype(x) { - return std::is_integral<decltype(x)>::value ? 0 : static_cast<decltype(x)>(x - std::floor(x)); - }); + [](auto x) -> decltype(x) + { return std::is_integral<decltype(x)>::value ? 0 : static_cast<decltype(x)>(x - std::floor(x)); }); } } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tests/unit/simd/select.cpp b/tests/unit/simd/select.cpp @@ -4,7 +4,7 @@ * See LICENSE.txt for details */ -#include <kfr/math/select.hpp> +#include <kfr/simd/select.hpp> namespace kfr { @@ -14,7 +14,8 @@ TEST(select_true) { test_function2( test_catogories::vectors, - [](auto x, auto y) { + [](auto x, auto y) + { mask<subtype<decltype(x)>, decltype(x)::scalar_size()> m(true); return kfr::select(m, x, y); }, @@ -25,7 +26,8 @@ TEST(select_false) { test_function2( test_catogories::vectors, - [](auto x, auto y) { + [](auto x, auto y) + { mask<subtype<decltype(x)>, decltype(x)::scalar_size()> m(false); return kfr::select(m, x, y); }, diff --git a/tests/unit/simd/shuffle.cpp b/tests/unit/simd/shuffle.cpp @@ -180,5 +180,11 @@ TEST(low_high) CHECK(low(vec<u8, 2>(1, 2)) == vec<u8, 1>(1)); CHECK(high(vec<u8, 2>(1, 2)) == vec<u8, 1>(2)); } +TEST(enumerate) +{ + CHECK(enumerate(vec_shape<int, 4>{}, 4) == vec{ 0, 4, 8, 12 }); + CHECK(enumerate(vec_shape<int, 8>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18, 21 }); + CHECK(enumerate(vec_shape<int, 7>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18 }); +} } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/tests/unit/simd/simd.cpp b/tests/unit/simd/simd.cpp @@ -0,0 +1,7 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016-2022 Fractalium Ltd + * See LICENSE.txt for details + */ + +#include <kfr/simd.hpp> diff --git a/tests/unit/simd/vec.cpp b/tests/unit/simd/vec.cpp @@ -71,78 +71,86 @@ TEST(cast) static_assert(!is_convertible<u16x4, i32x3>, ""); static_assert(!is_convertible<u16x1, u16x16>, ""); - static_assert(is_same<decltype(innercast<f64>(f32x4x4(1))), f64x4x4>, ""); - static_assert(is_same<decltype(innercast<f64>(f32x4(1))), f64x4>, ""); - static_assert(is_same<decltype(innercast<f64>(f32(1))), f64>, ""); + static_assert(is_convertible<float, vecx<float, 2>>, ""); + static_assert(is_convertible<float, vecx<float, 2, 2>>, ""); - // N/A static_assert(is_same<decltype(innercast<f64x4>(f32x4x4(1))), f64x4x4>, ""); - static_assert(is_same<decltype(innercast<f64x4>(f32x4(1))), f64x4x4>, ""); - static_assert(is_same<decltype(innercast<f64x4>(f32(1))), f64x4>, ""); + static_assert(is_same<decltype(broadcastto<f64>(f32x4x4(1))), f64x4x4>, ""); + static_assert(is_same<decltype(broadcastto<f64>(f32x4(1))), f64x4>, ""); + static_assert(is_same<decltype(broadcastto<f64>(f32(1))), f64>, ""); - // N/A static_assert(is_same<decltype(elemcast<f64>(f32x4x4(1))), f64x4>, ""); - static_assert(is_same<decltype(elemcast<f64>(f32x4(1))), f64x4>, ""); + // N/A static_assert(is_same<decltype(broadcastto<f64x4>(f32x4x4(1))), f64x4x4>, ""); + static_assert(is_same<decltype(broadcastto<f64x4>(f32x4(1))), f64x4x4>, ""); + static_assert(is_same<decltype(broadcastto<f64x4>(f32(1))), f64x4>, ""); - static_assert(is_same<decltype(elemcast<f64x4>(f32x4x4(1))), f64x4x4>, ""); - static_assert(is_same<decltype(elemcast<f64x4>(f32x4(1))), f64x4x4>, ""); + // N/A static_assert(is_same<decltype(promoteto<f64>(f32x4x4(1))), f64x4>, ""); + static_assert(is_same<decltype(promoteto<f64>(f32x4(1))), f64x4>, ""); + + static_assert(is_same<decltype(promoteto<f64x4>(f32x4x4(1))), f64x4x4>, ""); + static_assert(is_same<decltype(promoteto<f64x4>(f32x4(1))), f64x4x4>, ""); + + CHECK(cast<vecx<float, 2, 2>>(123.f) == vec{ vec{ 123.f, 123.f }, vec{ 123.f, 123.f } }); + + CHECK(promoteto<vecx<float, 2>>(vecx<float, 4>{ 1.f, 2.f, 3.f, 4.f }) == + vec{ vec{ 1.f, 1.f }, vec{ 2.f, 2.f }, vec{ 3.f, 3.f }, vec{ 4.f, 4.f } }); testo::scope s(""); s.text = ("target_type = u8"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<u8>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<u8>(x); }, [](auto x) -> u8 { return static_cast<u8>(x); }, [](auto t, special_value x) { return is_in_range_of<u8>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = i8"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<i8>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<i8>(x); }, [](auto x) -> i8 { return static_cast<i8>(x); }, [](auto t, special_value x) { return is_in_range_of<i8>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = u16"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<u16>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<u16>(x); }, [](auto x) -> u16 { return static_cast<u16>(x); }, [](auto t, special_value x) { return is_in_range_of<u16>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = i16"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<i16>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<i16>(x); }, [](auto x) -> i16 { return static_cast<i16>(x); }, [](auto t, special_value x) { return is_in_range_of<i16>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = u32"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<u32>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<u32>(x); }, [](auto x) -> u32 { return static_cast<u32>(x); }, [](auto t, special_value x) { return is_in_range_of<u32>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = i32"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<i32>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<i32>(x); }, [](auto x) -> i32 { return static_cast<i32>(x); }, [](auto t, special_value x) { return is_in_range_of<i32>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = u64"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<u64>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<u64>(x); }, [](auto x) -> u64 { return static_cast<u64>(x); }, [](auto t, special_value x) { return is_in_range_of<u64>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = i64"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<i64>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<i64>(x); }, [](auto x) -> i64 { return static_cast<i64>(x); }, [](auto t, special_value x) { return is_in_range_of<i64>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = f32"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<f32>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<f32>(x); }, [](auto x) -> f32 { return static_cast<f32>(x); }, [](auto t, special_value x) { return is_in_range_of<f32>(x.get<subtype<typename decltype(t)::type>>()); }); s.text = ("target_type = f64"); test_function1( - test_catogories::all, [](auto x) { return kfr::innercast<f64>(x); }, + test_catogories::all, [](auto x) { return kfr::broadcastto<f64>(x); }, [](auto x) -> f64 { return static_cast<f64>(x); }, [](auto t, special_value x) { return is_in_range_of<f64>(x.get<subtype<typename decltype(t)::type>>()); }); @@ -198,5 +206,11 @@ TEST(masks) CHECK(float(v[3]) == maskbits<float>(true)); } +TEST(vec_deduction) +{ + vec v{ 1, 2, 3 }; + static_assert(std::is_same_v<decltype(v), vec<int, 3>>); +} + } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/update-sources.py b/update-sources.py @@ -30,7 +30,15 @@ cmake = """ # Use update-sources.py """ -list_sources("KFR_SRC", "include", ['*.hpp', '*.h', '*.i', '*.inc']) +list_sources("KFR_SRC", "include", ['*.hpp', '*.h']) +list_sources("KFR_SIMD_SRC", "include/kfr/simd", ['*.hpp', '*.h']) +list_sources("KFR_MATH_SRC", "include/kfr/math", ['*.hpp', '*.h']) +list_sources("KFR_BASE_SRC", "include/kfr/base", ['*.hpp', '*.h']) +list_sources("KFR_DSP_SRC", "include/kfr/dsp", ['*.hpp', '*.h']) +list_sources("KFR_IO_SRC", "include/kfr/io", ['*.hpp', '*.h']) +list_sources("KFR_RUNTIME_SRC", "include/kfr/runtime", ['*.hpp', '*.h']) +list_sources("KFR_GRAPHICS_SRC", "include/kfr/graphics", ['*.hpp', '*.h']) +list_sources("KFR_SRC", "include", ['*.hpp', '*.h']) list_sources("KFR_DFT_SRC", "include/kfr/dft", ['*.cpp'], ["dft-src.cpp"]) list_sources("KFR_IO_SRC", "include/kfr/io", ['*.cpp'])