Expressions refactoring - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 1556f65ddc63e3da13bfc340a6bf3dbbb3512a4f
parent 336ba664c337c202ee5749091645f7aca106a861
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Fri, 28 Oct 2022 10:26:34 +0100

Expressions refactoring

Diffstat:
M examples/biquads.cpp  | 4 +++-
M examples/ccv.cpp  | 41 +++++++++++++++++++++--------------------
M examples/fir.cpp  | 2 +-
M include/kfr/base.hpp  | 4 ++++
M include/kfr/base/basic_expressions.hpp  | 460 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
M include/kfr/base/conversion.hpp  | 22 +++++++++++++---------
M include/kfr/base/expression.hpp  | 686 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
M include/kfr/base/filter.hpp  | 22 +++++++++++-----------
M include/kfr/base/generators.hpp  | 108 ++++++++++++++++++++++++++++++++++++++-----------------------------------------
M include/kfr/base/impl/static_array.hpp  | 2 +-
M include/kfr/base/math_expressions.hpp  | 264 +++++++++++++++++++------------------------------------------------------------
D include/kfr/base/old_basic_expressions.hpp  | 708 -------------------------------------------------------------------------------
M include/kfr/base/pointer.hpp  | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
M include/kfr/base/random.hpp  | 134 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M include/kfr/base/random_bits.hpp  | 90 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M include/kfr/base/reduce.hpp  | 69 ++++++++++++++++++++++++++++++++++-----------------------------------
M include/kfr/base/shape.hpp  | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
M include/kfr/base/simd_expressions.hpp  | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
A include/kfr/base/state_holder.hpp  | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
M include/kfr/base/tensor.hpp  | 34 ++++++----------------------------
M include/kfr/base/univector.hpp  | 102 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
M include/kfr/capi.h  | 2 +-
M include/kfr/cometa.hpp  | 19 +++++++++++++------
M include/kfr/cometa/cstring.hpp  | 6 +++---
M include/kfr/cometa/function.hpp  | 4 ++--
M include/kfr/cometa/numeric.hpp  | 12 +++++-------
M include/kfr/dft/convolution.hpp  | 10 +++++-----
M include/kfr/dft/fft.hpp  | 7 +++----
M include/kfr/dft/impl/convolution-impl.cpp  | 4 ++--
M include/kfr/dft/impl/dft-impl.hpp  | 46 +++++++++++++++++++++++++---------------------
M include/kfr/dft/impl/fft-impl.hpp  | 108 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M include/kfr/dft/impl/ft.hpp  | 30 ++++++++++++++++--------------
M include/kfr/dsp/biquad.hpp  | 212 ++++++++++++++++++++++++++++++++++++++-----------------------------------------
M include/kfr/dsp/dcremove.hpp  | 10 ++++------
M include/kfr/dsp/delay.hpp  | 98 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M include/kfr/dsp/ebu.hpp  | 9 +++++----
M include/kfr/dsp/fir.hpp  | 144 +++++++++++++++++++++++++++++++++++++++++--------------------------------------
M include/kfr/dsp/fir_design.hpp  | 13 ++++++-------
M include/kfr/dsp/fracdelay.hpp  | 4 ++--
M include/kfr/dsp/goertzel.hpp  | 27 +++++++++++++--------------
M include/kfr/dsp/iir_design.hpp  | 16 ++++++++--------
M include/kfr/dsp/mixdown.hpp  | 13 ++++---------
M include/kfr/dsp/oscillators.hpp  | 34 ++++++++++++++++++----------------
M include/kfr/dsp/sample_rate_conversion.hpp  | 69 ++++++++++++++++++++++++++++++++++++++-------------------------------
M include/kfr/dsp/special.hpp  | 15 +++++++++------
D include/kfr/dsp/state_holder.hpp  | 41 -----------------------------------------
M include/kfr/dsp/units.hpp  | 26 +++++++++++++-------------
M include/kfr/dsp/waveshaper.hpp  | 6 +++---
M include/kfr/dsp/weighting.hpp  | 6 +++---
M include/kfr/dsp/window.hpp  | 457 ++++++++++++++++++++++++++++++++++---------------------------------------------
M include/kfr/graphics.hpp  | 3 +--
M include/kfr/graphics/color.hpp  | 2 +-
M include/kfr/graphics/geometry.hpp  | 20 +++++++++++++-------
A include/kfr/graphics/impl/scaled.hpp  | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D include/kfr/graphics/scaled.hpp  | 59 -----------------------------------------------------------
M include/kfr/kfr.h  | 5 +++--
M include/kfr/math/complex_math.hpp  | 6 +++---
M include/kfr/math/impl/atan.hpp  | 8 ++++----
M include/kfr/math/impl/gamma.hpp  | 2 +-
M include/kfr/math/impl/hyperbolic.hpp  | 6 +++---
M include/kfr/math/impl/log_exp.hpp  | 62 +++++++++++++++++++++++++++++++-------------------------------
M include/kfr/math/impl/sin_cos.hpp  | 8 ++++----
M include/kfr/math/impl/tan.hpp  | 5 ++++-
M include/kfr/simd.hpp  | 1 +
M include/kfr/simd/complex.hpp  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
M include/kfr/simd/complex_type.hpp  | 2 +-
M include/kfr/simd/impl/abs.hpp  | 2 +-
M include/kfr/simd/impl/backend_clang.hpp  | 12 +++++++-----
M include/kfr/simd/impl/backend_generic.hpp  | 3 ++-
M include/kfr/simd/impl/basicoperators_clang.hpp  | 6 ++++++
M include/kfr/simd/impl/basicoperators_complex.hpp  | 2 +-
M include/kfr/simd/impl/basicoperators_generic.hpp  | 137 ++++++++++++++++++++++++-------------------------------------------------------
M include/kfr/simd/impl/function.hpp  | 2 +-
M include/kfr/simd/impl/logical.hpp  | 2 +-
M include/kfr/simd/impl/min_max.hpp  | 2 +-
M include/kfr/simd/impl/operators.hpp  | 77 ++++++-----------------------------------------------------------------------
M include/kfr/simd/impl/round.hpp  | 26 +++++++++++++-------------
M include/kfr/simd/impl/saturation.hpp  | 2 +-
M include/kfr/simd/impl/select.hpp  | 2 +-
R include/kfr/simd/impl/specializations.i -> include/kfr/simd/impl/specializations.hpp  | 0 
M include/kfr/simd/operators.hpp  | 68 +++++++++++++++++++++++++++++++++++++++++---------------------------
M include/kfr/simd/read_write.hpp  | 15 +++++++++++----
M include/kfr/simd/select.hpp  | 2 +-
M include/kfr/simd/shuffle.hpp  | 5 ++---
M include/kfr/simd/vec.hpp  | 251 +++++++++++++++++++++++++++++++++++++++++--------------------------------------
M sources.cmake  | 331 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M tests/CMakeLists.txt  | 3 ++-
M tests/asm_test.cpp  | 4 ++--
M tests/base_test.cpp  | 22 +++-------------------
M tests/complex_test.cpp  | 10 +++-------
M tests/dsp_test.cpp  | 565 -------------------------------------------------------------------------------
M tests/expression_test.cpp  | 149 +++----------------------------------------------------------------------------
M tests/intrinsic_test.cpp  | 42 +++++++++++++++++++++++-------------------
M tests/numeric_tests.hpp  | 96 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M tests/tensor_test.cpp  | 2 +-
A tests/unit/base/base.cpp  | 7 +++++++
A tests/unit/base/basic_expressions.cpp  | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/base/generators.cpp  | 33 +++++++++++++++++++++++++++++++++
A tests/unit/base/pointer.cpp  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M tests/unit/base/random.cpp  | 16 ++++++++--------
M tests/unit/base/reduce.cpp  | 9 +++++++++
A tests/unit/base/shape.cpp  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M tests/unit/base/tensor.cpp  | 165 ++++++++++++++++++++++++++-----------------------------------------------------
A tests/unit/dsp/biquad.cpp  | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/dsp/biquad_design.cpp  | 7 +++++++
A tests/unit/dsp/dsp.cpp  | 7 +++++++
A tests/unit/dsp/ebu.cpp  | 270 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/dsp/fir.cpp  | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/dsp/sample_rate_conversion.cpp  | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/dsp/units.cpp  | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/dsp/window.cpp  | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tests/unit/graphics/graphics.cpp  | 7 +++++++
A tests/unit/math/math.cpp  | 7 +++++++
M tests/unit/simd/abs.cpp  | 2 +-
M tests/unit/simd/min_max.cpp  | 8 +++++---
M tests/unit/simd/operators.cpp  | 47 ++++++++++++++++++++++++++++++++++++++---------
M tests/unit/simd/round.cpp  | 27 +++++++++++----------------
M tests/unit/simd/select.cpp  | 8 +++++---
M tests/unit/simd/shuffle.cpp  | 6 ++++++
A tests/unit/simd/simd.cpp  | 7 +++++++
M tests/unit/simd/vec.cpp  | 54 ++++++++++++++++++++++++++++++++++--------------------
M update-sources.py  | 10 +++++++++-

122 files changed, 4690 insertions(+), 3913 deletions(-)
diff --git a/examples/biquads.cpp b/examples/biquads.cpp
@@ -5,7 +5,9 @@
  */
 
 #include <kfr/base.hpp>
-#include <kfr/dsp.hpp>
+#include <kfr/dsp/biquad.hpp>
+#include <kfr/dsp/biquad_design.hpp>
+#include <kfr/dsp/special.hpp>
 #include <kfr/io.hpp>
 
 using namespace kfr;
diff --git a/examples/ccv.cpp b/examples/ccv.cpp
@@ -26,8 +26,8 @@ int main()
 
     // Create filters.
     size_t const block_size = 256;
-    convolve_filter<complex<fbase>> conv_filter_complex(univector<complex<fbase>>(make_complex(taps127, zeros())),
-                                                block_size);
+    convolve_filter<complex<fbase>> conv_filter_complex(
+        univector<complex<fbase>>(make_complex(taps127, zeros())), block_size);
     convolve_filter<fbase> conv_filter_real(taps127, block_size);
 
     // Create noise to filter.
@@ -35,8 +35,7 @@ int main()
     univector<complex<fbase>> cnoise =
         make_complex(truncate(gen_random_range(random_bit_generator{ 1, 2, 3, 4 }, -1.f, +1.f), size),
                      truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size));
-    univector<fbase> noise =
-                     truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size);
+    univector<fbase> noise = truncate(gen_random_range(random_bit_generator{ 3, 4, 9, 8 }, -1.f, +1.f), size);
 
     // Filter results.
     univector<complex<fbase>> filtered_cnoise_ccv(size), filtered_cnoise_fir(size);
@@ -45,27 +44,29 @@ int main()
     // Complex filtering (time and compare).
     auto tic = std::chrono::high_resolution_clock::now();
     conv_filter_complex.apply(filtered_cnoise_ccv, cnoise);
-    auto toc           = std::chrono::high_resolution_clock::now();
-    auto const ccv_time_complex      = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
-    tic                = toc;
-    filtered_cnoise_fir = kfr::fir(cnoise, taps127);
-    toc                = std::chrono::high_resolution_clock::now();
-    auto const fir_time_complex      = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
-    auto const cdiff = rms(cabs(filtered_cnoise_fir - filtered_cnoise_ccv));
+    auto toc                    = std::chrono::high_resolution_clock::now();
+    auto const ccv_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
+    tic                         = toc;
+    filtered_cnoise_fir         = kfr::fir(cnoise, taps127);
+    toc                         = std::chrono::high_resolution_clock::now();
+    auto const fir_time_complex = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
+    auto const cdiff            = rms(cabs(filtered_cnoise_fir - filtered_cnoise_ccv));
 
     // Real filtering (time and compare).
     tic = std::chrono::high_resolution_clock::now();
     conv_filter_real.apply(filtered_noise_ccv, noise);
-    toc           = std::chrono::high_resolution_clock::now();
-    auto const ccv_time_real      = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
-    tic                = toc;
-    filtered_noise_fir = kfr::fir(noise, taps127);
-    toc                = std::chrono::high_resolution_clock::now();
-    auto const fir_time_real      = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
-    auto const diff = rms(filtered_noise_fir - filtered_noise_ccv);
+    toc                      = std::chrono::high_resolution_clock::now();
+    auto const ccv_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
+    tic                      = toc;
+    filtered_noise_fir       = kfr::fir(noise, taps127);
+    toc                      = std::chrono::high_resolution_clock::now();
+    auto const fir_time_real = std::chrono::duration_cast<std::chrono::duration<float>>(toc - tic);
+    auto const diff          = rms(filtered_noise_fir - filtered_noise_ccv);
 
-    println("complex: convolution_filter ", ccv_time_complex.count(), " fir ", fir_time_complex.count(), " diff=", cdiff);
-    println("real: convolution_filter ", ccv_time_real.count(), " fir ", fir_time_real.count(), " diff=", diff);
+    println("complex: convolution_filter ", ccv_time_complex.count(), " fir ", fir_time_complex.count(),
+            " diff=", cdiff);
+    println("real: convolution_filter ", ccv_time_real.count(), " fir ", fir_time_real.count(),
+            " diff=", diff);
 
     return 0;
 }
diff --git a/examples/fir.cpp b/examples/fir.cpp
@@ -103,7 +103,7 @@ int main()
 
     // Prepare 10000 samples of white noise
     univector<float> noise =
-        truncate(gen_random_range(random_bit_generator{ 1, 2, 3, 4 }, -1.f, +1.f), 10000);
+        truncate(gen_random_range(random_init(1, 2, 3, 4), -1.f, +1.f), 10000);
 
     // Apply band stop filter
     univector<float> filtered_noise = fir(noise, taps127);
diff --git a/include/kfr/base.hpp b/include/kfr/base.hpp
@@ -34,7 +34,11 @@
 #include "base/memory.hpp"
 #include "base/pointer.hpp"
 #include "base/random.hpp"
+#include "base/random_bits.hpp"
 #include "base/reduce.hpp"
+#include "base/shape.hpp"
 #include "base/simd_expressions.hpp"
 #include "base/small_buffer.hpp"
+#include "base/state_holder.hpp"
+#include "base/tensor.hpp"
 #include "base/univector.hpp"
diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp
@@ -32,35 +32,35 @@ namespace kfr
 // ----------------------------------------------------------------------------
 
 template <typename T>
-struct xscalar
+struct expression_scalar
 {
     T value;
 };
 
 template <typename T>
-struct expression_traits<xscalar<T>> : expression_traits_defaults
+struct expression_traits<expression_scalar<T>> : expression_traits_defaults
 {
     using value_type             = T;
     constexpr static size_t dims = 0;
 
-    constexpr static shape<0> shapeof(const xscalar<T>& self) { return {}; }
+    constexpr static shape<0> shapeof(const expression_scalar<T>& self) { return {}; }
     constexpr static shape<0> shapeof() { return {}; }
 };
 
 template <typename T>
-KFR_INTRINSIC xscalar<T> scalar(T value)
+KFR_INTRINSIC expression_scalar<T> scalar(T value)
 {
     return { std::move(value) };
 }
 
 template <typename T = fbase>
-KFR_INTRINSIC xscalar<T> zeros()
+KFR_INTRINSIC expression_scalar<T> zeros()
 {
     return { static_cast<T>(0) };
 }
 
 template <typename T = fbase>
-KFR_INTRINSIC xscalar<T> ones()
+KFR_INTRINSIC expression_scalar<T> ones()
 {
     return { static_cast<T>(1) };
 }
@@ -68,7 +68,7 @@ KFR_INTRINSIC xscalar<T> ones()
 inline namespace CMT_ARCH_NAME
 {
 template <typename T, index_t Axis, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xscalar<T>& self, const shape<0>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_scalar<T>& self, const shape<0>& index,
                                      const axis_params<Axis, N>&)
 {
     return self.value;
@@ -78,7 +78,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const xscalar<T>& self, const shape<0>& ind
 // ----------------------------------------------------------------------------
 
 template <typename T, index_t Dims = 1>
-struct xcounter
+struct expression_counter
 {
     T start;
     T steps[Dims];
@@ -88,26 +88,32 @@ struct xcounter
 };
 
 template <typename T, index_t Dims>
-struct expression_traits<xcounter<T, Dims>> : expression_traits_defaults
+struct expression_traits<expression_counter<T, Dims>> : expression_traits_defaults
 {
     using value_type             = T;
     constexpr static size_t dims = Dims;
 
-    constexpr static shape<dims> shapeof(const xcounter<T, Dims>& self) { return shape<dims>(infinite_size); }
+    constexpr static shape<dims> shapeof(const expression_counter<T, Dims>& self)
+    {
+        return shape<dims>(infinite_size);
+    }
     constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); }
 };
 
-template <typename T, typename... Args, typename Tout = std::common_type_t<T, Args...>>
-KFR_INTRINSIC xcounter<Tout, sizeof...(Args)> counter(T start, Args... steps)
+template <typename T = int, typename Arg = T, typename... Args,
+          typename Tout = std::common_type_t<T, Arg, Args...>>
+KFR_INTRINSIC expression_counter<Tout, 1 + sizeof...(Args)> counter(T start = T(0), Arg step = 1,
+                                                                    Args... steps)
 {
-    return { static_cast<Tout>(std::move(start)), { static_cast<Tout>(std::move(steps))... } };
+    return { static_cast<Tout>(std::move(start)),
+             { static_cast<Tout>(std::move(step)), static_cast<Tout>(std::move(steps))... } };
 }
 
 inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, index_t Axis, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, 1>& self, const shape<1>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_counter<T, 1>& self, const shape<1>& index,
                                      const axis_params<Axis, N>&)
 {
     T acc = self.start;
@@ -115,7 +121,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, 1>& self, const shape<1>&
     return acc + enumerate(vec_shape<T, N>(), self.back());
 }
 template <typename T, index_t dims, index_t Axis, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, dims>& self, const shape<dims>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_counter<T, dims>& self, const shape<dims>& index,
                                      const axis_params<Axis, N>&)
 {
     T acc                 = self.start;
@@ -128,20 +134,20 @@ KFR_INTRINSIC vec<T, N> get_elements(const xcounter<T, dims>& self, const shape<
 // ----------------------------------------------------------------------------
 
 template <typename Arg>
-struct xslice : public xwitharguments<Arg>
+struct expression_slice : public expression_with_arguments<Arg>
 {
     constexpr static index_t dims = expression_dims<Arg>;
     shape<dims> start;
     shape<dims> size;
 
-    KFR_MEM_INTRINSIC xslice(Arg&& arg, shape<dims> start, shape<dims> size)
-        : xwitharguments<Arg>{ std::forward<Arg>(arg) }, start(start), size(size)
+    KFR_MEM_INTRINSIC expression_slice(Arg&& arg, shape<dims> start, shape<dims> size)
+        : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, start(start), size(size)
     {
     }
 };
 
 template <typename Arg>
-struct expression_traits<xslice<Arg>> : expression_traits_defaults
+struct expression_traits<expression_slice<Arg>> : expression_traits_defaults
 {
     using ArgTraits = expression_traits<Arg>;
 
@@ -149,7 +155,7 @@ struct expression_traits<xslice<Arg>> : expression_traits_defaults
     constexpr static size_t dims        = ArgTraits::dims;
     constexpr static bool random_access = ArgTraits::random_access;
 
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xslice<Arg>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_slice<Arg>& self)
     {
         return min(sub_shape(ArgTraits::shapeof(self.first()), self.start), self.size);
     }
@@ -157,13 +163,14 @@ struct expression_traits<xslice<Arg>> : expression_traits_defaults
 };
 
 template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg), index_t Dims = expression_dims<Arg>>
-KFR_INTRINSIC xslice<Arg> slice(Arg&& arg, shape<Dims> start, shape<Dims> size)
+KFR_INTRINSIC expression_slice<Arg> slice(Arg&& arg, identity<shape<Dims>> start,
+                                          identity<shape<Dims>> size = shape<Dims>(infinite_size))
 {
     return { std::forward<Arg>(arg), start, size };
 }
 
 template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg), index_t Dims = expression_dims<Arg>>
-KFR_INTRINSIC xslice<Arg> truncate(Arg&& arg, shape<Dims> size)
+KFR_INTRINSIC expression_slice<Arg> truncate(Arg&& arg, identity<shape<Dims>> size)
 {
     return { std::forward<Arg>(arg), shape<Dims>{ 0 }, size };
 }
@@ -172,16 +179,16 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename Arg, index_t NDims, index_t Axis, size_t N,
-          typename T = typename expression_traits<xslice<Arg>>::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xslice<Arg>& self, const shape<NDims>& index,
+          typename T = typename expression_traits<expression_slice<Arg>>::value_type>
+KFR_INTRINSIC vec<T, N> get_elements(const expression_slice<Arg>& self, const shape<NDims>& index,
                                      const axis_params<Axis, N>& sh)
 {
     return static_cast<vec<T, N>>(get_elements(self.first(), index.add(self.start), sh));
 }
 
 template <typename Arg, index_t NDims, index_t Axis, size_t N,
-          typename T = typename expression_traits<xslice<Arg>>::value_type>
-KFR_INTRINSIC void set_elements(const xslice<Arg>& self, const shape<NDims>& index,
+          typename T = typename expression_traits<expression_slice<Arg>>::value_type>
+KFR_INTRINSIC void set_elements(const expression_slice<Arg>& self, const shape<NDims>& index,
                                 const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value)
 {
     set_elements(self.first(), index.add(self.start), sh, value);
@@ -191,13 +198,13 @@ KFR_INTRINSIC void set_elements(const xslice<Arg>& self, const shape<NDims>& ind
 // ----------------------------------------------------------------------------
 
 template <typename T, typename Arg>
-struct xcast : public xwitharguments<Arg>
+struct expression_cast : public expression_with_arguments<Arg>
 {
-    using xwitharguments<Arg>::xwitharguments;
+    using expression_with_arguments<Arg>::expression_with_arguments;
 };
 
 template <typename T, typename Arg>
-struct expression_traits<xcast<T, Arg>> : expression_traits_defaults
+struct expression_traits<expression_cast<T, Arg>> : expression_traits_defaults
 {
     using ArgTraits = expression_traits<Arg>;
 
@@ -205,7 +212,7 @@ struct expression_traits<xcast<T, Arg>> : expression_traits_defaults
     constexpr static size_t dims        = ArgTraits::dims;
     constexpr static bool random_access = ArgTraits::random_access;
 
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xcast<T, Arg>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_cast<T, Arg>& self)
     {
         return ArgTraits::shapeof(self.first());
     }
@@ -213,13 +220,13 @@ struct expression_traits<xcast<T, Arg>> : expression_traits_defaults
 };
 
 template <typename T, typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg)>
-KFR_INTRINSIC xcast<T, Arg> cast(Arg&& arg)
+KFR_INTRINSIC expression_cast<T, Arg> cast(Arg&& arg)
 {
     return { std::forward<Arg>(arg) };
 }
 
 template <typename T, typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg)>
-KFR_INTRINSIC xcast<T, Arg> cast(Arg&& arg, ctype_t<T>)
+KFR_INTRINSIC expression_cast<T, Arg> cast(Arg&& arg, ctype_t<T>)
 {
     return { std::forward<Arg>(arg) };
 }
@@ -228,14 +235,14 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, typename Arg, index_t NDims, index_t Axis, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xcast<T, Arg>& self, const shape<NDims>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_cast<T, Arg>& self, const shape<NDims>& index,
                                      const axis_params<Axis, N>& sh)
 {
     return static_cast<vec<T, N>>(get_elements(self.first(), index, sh));
 }
 
 template <typename T, typename Arg, index_t NDims, index_t Axis, size_t N>
-KFR_INTRINSIC void set_elements(const xcast<T, Arg>& self, const shape<NDims>& index,
+KFR_INTRINSIC void set_elements(const expression_cast<T, Arg>& self, const shape<NDims>& index,
                                 const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value)
 {
     set_elements(self.first(), index, sh, value);
@@ -245,19 +252,19 @@ KFR_INTRINSIC void set_elements(const xcast<T, Arg>& self, const shape<NDims>& i
 // ----------------------------------------------------------------------------
 
 template <typename T, index_t Dims, typename Fn, bool Rnd>
-struct xlambda
+struct expression_lambda
 {
-    Fn&& fn;
+    Fn fn;
 };
 
 template <typename T, index_t Dims, typename Fn, bool Rnd>
-struct expression_traits<xlambda<T, Dims, Fn, Rnd>> : expression_traits_defaults
+struct expression_traits<expression_lambda<T, Dims, Fn, Rnd>> : expression_traits_defaults
 {
     using value_type                           = T;
     constexpr static size_t dims               = Dims;
     constexpr static inline bool random_access = Rnd;
 
-    KFR_MEM_INTRINSIC constexpr static shape<Dims> shapeof(const xlambda<T, Dims, Fn, Rnd>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<Dims> shapeof(const expression_lambda<T, Dims, Fn, Rnd>& self)
     {
         return shape<Dims>(infinite_size);
     }
@@ -265,12 +272,12 @@ struct expression_traits<xlambda<T, Dims, Fn, Rnd>> : expression_traits_defaults
 };
 
 template <typename T, index_t Dims = 1, typename Fn, bool RandomAccess = true>
-KFR_INTRINSIC xlambda<T, Dims, Fn, RandomAccess> lambda(Fn&& fn, cbool_t<RandomAccess> = {})
+KFR_INTRINSIC expression_lambda<T, Dims, Fn, RandomAccess> lambda(Fn&& fn, cbool_t<RandomAccess> = {})
 {
     return { std::forward<Fn>(fn) };
 }
 template <typename T, index_t Dims = 1, typename Fn>
-KFR_INTRINSIC xlambda<T, Dims, Fn, false> lambda_generator(Fn&& fn)
+KFR_INTRINSIC expression_lambda<T, Dims, Fn, false> lambda_generator(Fn&& fn)
 {
     return { std::forward<Fn>(fn) };
 }
@@ -278,25 +285,46 @@ KFR_INTRINSIC xlambda<T, Dims, Fn, false> lambda_generator(Fn&& fn)
 template <typename... Ts, typename T = std::common_type_t<Ts...>>
 KFR_INTRINSIC auto sequence(const Ts&... list)
 {
-    return lambda<T>([seq = std::array<T, sizeof...(Ts)>{ { static_cast<T>(list)... } }](size_t index)
-                     { return seq[index % seq.size()]; });
+    return lambda<T>([seq = std::array<T, sizeof...(Ts)>{ { static_cast<T>(list)... } }](size_t index) { //
+        return seq[index % seq.size()];
+    });
 }
 
 inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, index_t Dims, typename Fn, bool Rnd, index_t Axis, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xlambda<T, Dims, Fn, Rnd>& self, const shape<Dims>& index,
-                                     const axis_params<Axis, N>& sh)
+KFR_INTRINSIC vec<T, N> get_elements(const expression_lambda<T, Dims, Fn, Rnd>& self,
+                                     const shape<Dims>& index, const axis_params<Axis, N>& sh)
 {
-    if constexpr (std::is_invocable_v<Fn, shape<Dims>, csize_t<N>>)
+    if constexpr (std::is_invocable_v<Fn, shape<Dims>, axis_params<Axis, N>>)
         return self.fn(index, sh);
+    else if constexpr (std::is_invocable_v<Fn, shape<Dims>, csize_t<N>>)
+        return self.fn(index, csize<N>);
     else if constexpr (std::is_invocable_v<Fn, shape<Dims>>)
-        return vec<T, N>{ [&](size_t idx) { return self.fn(index.add(idx)); } };
+    {
+        portable_vec<T, N> result;
+        shape<Dims> cur_index = index;
+        for (index_t i = 0; i < N; ++i)
+        {
+            result[i] = self.fn(cur_index);
+            ++cur_index.back();
+        }
+        return result;
+        // return vec<T, N>{ [&](size_t idx) { //
+        // return self.fn(index.add(idx));
+        // } };
+    }
     else if constexpr (std::is_invocable_v<Fn>)
         return apply<N>(self.fn);
     else
+    {
+        static_assert(std::is_invocable_v<Fn, shape<Dims>, axis_params<Axis, N>> ||
+                          std::is_invocable_v<Fn, shape<Dims>, csize_t<N>> ||
+                          std::is_invocable_v<Fn, shape<Dims>> || std::is_invocable_v<Fn>,
+                      "Lambda must be callable");
         return czeros;
+    }
 }
 
 } // namespace CMT_ARCH_NAME
@@ -304,28 +332,28 @@ KFR_INTRINSIC vec<T, N> get_elements(const xlambda<T, Dims, Fn, Rnd>& self, cons
 // ----------------------------------------------------------------------------
 
 template <typename Arg>
-struct xpadded : public xwitharguments<Arg>
+struct expression_padded : public expression_with_arguments<Arg>
 {
-    using ArgTraits = typename xwitharguments<Arg>::first_arg_trait;
+    using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits;
     typename ArgTraits::value_type fill_value;
     shape<ArgTraits::dims> input_shape;
 
-    KFR_MEM_INTRINSIC xpadded(Arg&& arg, typename ArgTraits::value_type fill_value)
-        : xwitharguments<Arg>{ std::forward<Arg>(arg) }, fill_value(std::move(fill_value)),
+    KFR_MEM_INTRINSIC expression_padded(Arg&& arg, typename ArgTraits::value_type fill_value)
+        : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, fill_value(std::move(fill_value)),
           input_shape(ArgTraits::shapeof(this->first()))
     {
     }
 };
 
-template <typename Arg, typename T = expression_value_type<Arg>>
-KFR_INTRINSIC xpadded<Arg> padded(Arg&& arg, T fill_value = T{})
+template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg), typename T = expression_value_type<Arg>>
+KFR_INTRINSIC expression_padded<Arg> padded(Arg&& arg, T fill_value = T{})
 {
     static_assert(expression_dims<Arg> >= 1);
     return { std::forward<Arg>(arg), std::move(fill_value) };
 }
 
 template <typename Arg>
-struct expression_traits<xpadded<Arg>> : expression_traits_defaults
+struct expression_traits<expression_padded<Arg>> : expression_traits_defaults
 {
     using ArgTraits = expression_traits<Arg>;
 
@@ -333,7 +361,7 @@ struct expression_traits<xpadded<Arg>> : expression_traits_defaults
     constexpr static size_t dims        = ArgTraits::dims;
     constexpr static bool random_access = ArgTraits::random_access;
 
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xpadded<Arg>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_padded<Arg>& self)
     {
         return shape<dims>(infinite_size);
     }
@@ -343,16 +371,16 @@ struct expression_traits<xpadded<Arg>> : expression_traits_defaults
 inline namespace CMT_ARCH_NAME
 {
 
-template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<xpadded<Arg>>,
+template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<expression_padded<Arg>>,
           typename T = typename Traits::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xpadded<Arg>& self, const shape<Traits::dims>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_padded<Arg>& self, const shape<Traits::dims>& index,
                                      const axis_params<Axis, N>& sh)
 {
-    if (index.ge(self.input_size))
+    if (index.ge(self.input_shape))
     {
         return self.fill_value;
     }
-    else if (CMT_LIKELY(index.add(N).le(self.input_size)))
+    else if (CMT_LIKELY(index.add(N).le(self.input_shape)))
     {
         return get_elements(self.first(), index, sh);
     }
@@ -362,8 +390,8 @@ KFR_INTRINSIC vec<T, N> get_elements(const xpadded<Arg>& self, const shape<Trait
         for (size_t i = 0; i < N; i++)
         {
             shape ish = index.add(i);
-            if (ish.back() < self.input_size.back())
-                x[i] = get_elements(self.first(), ish, csize_t<1>()).front();
+            if (ish.back() < self.input_shape.back())
+                x[i] = get_elements(self.first(), ish, axis_params_v<Axis, 1>).front();
         }
         return x;
     }
@@ -374,34 +402,35 @@ KFR_INTRINSIC vec<T, N> get_elements(const xpadded<Arg>& self, const shape<Trait
 // ----------------------------------------------------------------------------
 
 template <typename Arg>
-struct xreverse : public xwitharguments<Arg>
+struct expression_reverse : public expression_with_arguments<Arg>
 {
-    using ArgTraits = typename xwitharguments<Arg>::first_arg_trait;
+    using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits;
     shape<ArgTraits::dims> input_shape;
 
-    KFR_MEM_INTRINSIC xreverse(Arg&& arg)
-        : xwitharguments<Arg>{ std::forward<Arg>(arg) }, input_shape(ArgTraits::shapeof(this->first()))
+    KFR_MEM_INTRINSIC expression_reverse(Arg&& arg)
+        : expression_with_arguments<Arg>{ std::forward<Arg>(arg) },
+          input_shape(ArgTraits::shapeof(this->first()))
     {
     }
 };
 
-template <typename Arg>
-KFR_INTRINSIC xreverse<Arg> x_reverse(Arg&& arg)
+template <typename Arg, KFR_ACCEPT_EXPRESSIONS(Arg)>
+KFR_INTRINSIC expression_reverse<Arg> reverse(Arg&& arg)
 {
     static_assert(expression_dims<Arg> >= 1);
     return { std::forward<Arg>(arg) };
 }
 
 template <typename Arg>
-struct expression_traits<xreverse<Arg>> : expression_traits_defaults
+struct expression_traits<expression_reverse<Arg>> : expression_traits_defaults
 {
     using ArgTraits = expression_traits<Arg>;
 
     using value_type             = typename ArgTraits::value_type;
     constexpr static size_t dims = ArgTraits::dims;
-    static_assert(ArgTraits::random_access, "xreverse requires an expression with random access");
+    static_assert(ArgTraits::random_access, "expression_reverse requires an expression with random access");
 
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xreverse<Arg>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_reverse<Arg>& self)
     {
         return ArgTraits::shapeof(self.first());
     }
@@ -411,9 +440,9 @@ struct expression_traits<xreverse<Arg>> : expression_traits_defaults
 inline namespace CMT_ARCH_NAME
 {
 
-template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<xreverse<Arg>>,
+template <typename Arg, index_t Axis, size_t N, typename Traits = expression_traits<expression_reverse<Arg>>,
           typename T = typename Traits::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xreverse<Arg>& self, const shape<Traits::dims>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_reverse<Arg>& self, const shape<Traits::dims>& index,
                                      const axis_params<Axis, N>& sh)
 {
     return reverse(get_elements(self.first(), self.input_shape.sub(index).sub(shape<Traits::dims>(N)), sh));
@@ -424,62 +453,77 @@ KFR_INTRINSIC vec<T, N> get_elements(const xreverse<Arg>& self, const shape<Trai
 // ----------------------------------------------------------------------------
 
 template <index_t... Values>
-struct fixed_shape
+struct fixed_shape_t
 {
+    constexpr fixed_shape_t() = default;
     constexpr static shape<sizeof...(Values)> get() { return { Values... }; }
 };
 
+template <index_t... Values>
+constexpr inline fixed_shape_t<Values...> fixed_shape{};
+
 template <typename Arg, typename Shape>
-struct xfixshape : public xwitharguments<Arg>
+struct expression_fixshape : public expression_with_arguments<Arg>
 {
-    using ArgTraits = typename xwitharguments<Arg>::first_arg_trait;
+    using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits;
 
-    KFR_MEM_INTRINSIC xfixshape(Arg&& arg) : xwitharguments<Arg>{ std::forward<Arg>(arg) } {}
+    KFR_MEM_INTRINSIC expression_fixshape(Arg&& arg)
+        : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }
+    {
+    }
 };
 
-template <typename Arg, index_t... ShapeValues>
-KFR_INTRINSIC xfixshape<Arg, fixed_shape<ShapeValues...>> fixshape(Arg&& arg,
-                                                                   const fixed_shape<ShapeValues...>&)
+template <typename Arg, index_t... ShapeValues, KFR_ACCEPT_EXPRESSIONS(Arg)>
+KFR_INTRINSIC expression_fixshape<Arg, fixed_shape_t<ShapeValues...>> fixshape(
+    Arg&& arg, const fixed_shape_t<ShapeValues...>&)
 {
     return { std::forward<Arg>(arg) };
 }
 
 template <typename Arg, index_t... ShapeValues>
-struct expression_traits<xfixshape<Arg, fixed_shape<ShapeValues...>>> : expression_traits_defaults
+struct expression_traits<expression_fixshape<Arg, fixed_shape_t<ShapeValues...>>> : expression_traits_defaults
 {
     using ArgTraits = expression_traits<Arg>;
 
     using value_type                    = typename ArgTraits::value_type;
-    constexpr static size_t dims        = ArgTraits::dims;
+    constexpr static size_t dims        = sizeof...(ShapeValues); // ArgTraits::dims;
     constexpr static bool random_access = ArgTraits::random_access;
 
     KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(
-        const xfixshape<Arg, fixed_shape<ShapeValues...>>& self)
+        const expression_fixshape<Arg, fixed_shape_t<ShapeValues...>>& self)
     {
-        return fixed_shape<ShapeValues...>::get();
+        return fixed_shape_t<ShapeValues...>::get();
     }
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { return fixed_shape<ShapeValues...>::get(); }
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof() { return fixed_shape_t<ShapeValues...>::get(); }
 };
 
 inline namespace CMT_ARCH_NAME
 {
 
 template <typename Arg, typename Shape, index_t Axis, size_t N,
-          typename Traits = expression_traits<xfixshape<Arg, Shape>>,
+          typename Traits = expression_traits<expression_fixshape<Arg, Shape>>,
           typename T      = typename Traits::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xfixshape<Arg, Shape>& self, const shape<Traits::dims>& index,
-                                     const axis_params<Axis, N>& sh)
+KFR_INTRINSIC vec<T, N> get_elements(const expression_fixshape<Arg, Shape>& self,
+                                     const shape<Traits::dims>& index, const axis_params<Axis, N>& sh)
 {
-    return get_elements(self.first(), index, sh);
+    using ArgTraits = expression_traits<Arg>;
+    return get_elements(self.first(), index.template trim<ArgTraits::dims>(), sh);
 }
 
 template <typename Arg, typename Shape, index_t Axis, size_t N,
-          typename Traits = expression_traits<xfixshape<Arg, Shape>>,
+          typename Traits = expression_traits<expression_fixshape<Arg, Shape>>,
           typename T      = typename Traits::value_type>
-KFR_INTRINSIC void set_elements(xfixshape<Arg, Shape>& self, const shape<Traits::dims>& index,
+KFR_INTRINSIC void set_elements(expression_fixshape<Arg, Shape>& self, const shape<Traits::dims>& index,
                                 const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value)
 {
-    set_elements(self.first(), index, sh, value);
+    using ArgTraits = expression_traits<Arg>;
+    if constexpr (is_output_expression<Arg>)
+    {
+        set_elements(self.first(), index.template trim<ArgTraits::dims>(), sh, value);
+    }
+    else
+    {
+    }
 }
 
 } // namespace CMT_ARCH_NAME
@@ -487,27 +531,27 @@ KFR_INTRINSIC void set_elements(xfixshape<Arg, Shape>& self, const shape<Traits:
 // ----------------------------------------------------------------------------
 
 template <typename Arg, index_t OutDims>
-struct xreshape : public xwitharguments<Arg>
+struct expression_reshape : public expression_with_arguments<Arg>
 {
-    using ArgTraits = typename xwitharguments<Arg>::first_arg_trait;
+    using ArgTraits = typename expression_with_arguments<Arg>::first_arg_traits;
     shape<ArgTraits::dims> in_shape;
     shape<OutDims> out_shape;
 
-    KFR_MEM_INTRINSIC xreshape(Arg&& arg, const shape<OutDims>& out_shape)
-        : xwitharguments<Arg>{ std::forward<Arg>(arg) }, in_shape(ArgTraits::shapeof(arg)),
+    KFR_MEM_INTRINSIC expression_reshape(Arg&& arg, const shape<OutDims>& out_shape)
+        : expression_with_arguments<Arg>{ std::forward<Arg>(arg) }, in_shape(ArgTraits::shapeof(arg)),
           out_shape(out_shape)
     {
     }
 };
 
-template <typename Arg, index_t OutDims>
-KFR_INTRINSIC xreshape<Arg, OutDims> reshape(Arg&& arg, const shape<OutDims>& out_shape)
+template <typename Arg, index_t OutDims, KFR_ACCEPT_EXPRESSIONS(Arg)>
+KFR_INTRINSIC expression_reshape<Arg, OutDims> reshape(Arg&& arg, const shape<OutDims>& out_shape)
 {
     return { std::forward<Arg>(arg), out_shape };
 }
 
 template <typename Arg, index_t OutDims>
-struct expression_traits<xreshape<Arg, OutDims>> : expression_traits_defaults
+struct expression_traits<expression_reshape<Arg, OutDims>> : expression_traits_defaults
 {
     using ArgTraits = expression_traits<Arg>;
 
@@ -515,7 +559,7 @@ struct expression_traits<xreshape<Arg, OutDims>> : expression_traits_defaults
     constexpr static size_t dims        = OutDims;
     constexpr static bool random_access = ArgTraits::random_access;
 
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xreshape<Arg, OutDims>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const expression_reshape<Arg, OutDims>& self)
     {
         return self.out_shape;
     }
@@ -526,10 +570,10 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename Arg, index_t outdims, index_t Axis, size_t N,
-          typename Traits = expression_traits<xreshape<Arg, outdims>>,
+          typename Traits = expression_traits<expression_reshape<Arg, outdims>>,
           typename T      = typename Traits::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xreshape<Arg, outdims>& self, const shape<Traits::dims>& index,
-                                     const axis_params<Axis, N>& sh)
+KFR_INTRINSIC vec<T, N> get_elements(const expression_reshape<Arg, outdims>& self,
+                                     const shape<Traits::dims>& index, const axis_params<Axis, N>& sh)
 {
     using ArgTraits          = typename Traits::ArgTraits;
     constexpr index_t indims = ArgTraits::dims;
@@ -581,9 +625,9 @@ KFR_INTRINSIC vec<T, N> get_elements(const xreshape<Arg, outdims>& self, const s
 }
 
 template <typename Arg, index_t outdims, index_t Axis, size_t N,
-          typename Traits = expression_traits<xreshape<Arg, outdims>>,
+          typename Traits = expression_traits<expression_reshape<Arg, outdims>>,
           typename T      = typename Traits::value_type>
-KFR_INTRINSIC void set_elements(xreshape<Arg, outdims>& self, const shape<Traits::dims>& index,
+KFR_INTRINSIC void set_elements(expression_reshape<Arg, outdims>& self, const shape<Traits::dims>& index,
                                 const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value)
 {
     using ArgTraits          = typename Traits::ArgTraits;
@@ -631,44 +675,84 @@ KFR_INTRINSIC void set_elements(xreshape<Arg, outdims>& self, const shape<Traits
 
 // ----------------------------------------------------------------------------
 
+struct symmetric_linspace_t
+{
+};
+constexpr inline const symmetric_linspace_t symmetric_linspace{};
+
 template <typename T, bool truncated = true>
-struct xlinspace
+struct expression_linspace
 {
     T start;
     T stop;
     index_t size;
     bool endpoint;
+    T invsize;
+
+    expression_linspace(T start, T stop, size_t size, bool endpoint = false)
+        : start(start), stop(stop), size(size), invsize(1.0 / T(endpoint ? size - 1 : size))
+    {
+    }
+
+    expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false)
+        : expression_linspace(-symsize, +symsize, size, endpoint)
+    {
+    }
 };
 
 template <typename T, bool truncated>
-struct expression_traits<xlinspace<T, truncated>> : expression_traits_defaults
+struct expression_traits<expression_linspace<T, truncated>> : expression_traits_defaults
 {
     using value_type             = T;
     constexpr static size_t dims = 1;
 
-    constexpr static shape<dims> shapeof(const xlinspace<T, truncated>& self)
+    constexpr static shape<dims> shapeof(const expression_linspace<T, truncated>& self)
     {
         return shape<dims>(truncated ? self.size : infinite_size);
     }
     constexpr static shape<dims> shapeof() { return shape<dims>(truncated ? undefined_size : infinite_size); }
 };
 
-template <bool truncated = false, typename T1, typename T2, typename Tout = std::common_type_t<T1, T2>>
-KFR_INTRINSIC xlinspace<Tout, truncated> linspace(T1 start, T2 stop, size_t size, bool endpoint = false)
+/** @brief Returns evenly spaced numbers over a specified interval.
+ *
+ * @param start The starting value of the sequence
+ * @param stop The end value of the sequence. if ``endpoint`` is ``false``, the last value is excluded
+ * @param size Number of samples to generate
+ * @param endpoint If ``true``, ``stop`` is the last sample. Otherwise, it is not included
+ * @tparam truncated If ``true``, linspace returns exactly size elements, otherwise, returns infinite sequence
+ * @tparam precise No longer used since KFR5, calculations are always precise
+ */
+template <typename T = void, bool precise = false, bool truncated = false, typename T1, typename T2,
+          typename Tout = or_type<T, ftype<std::common_type_t<T1, T2>>>>
+KFR_INTRINSIC expression_linspace<Tout, truncated> linspace(T1 start, T2 stop, size_t size,
+                                                            bool endpoint = false, cbool_t<truncated> = {})
 {
     return { static_cast<Tout>(start), static_cast<Tout>(stop), size, endpoint };
 }
 
+/** @brief Returns evenly spaced numbers over a specified interval.
+ *
+ * @param symsize The sequence will have interval [-symsize..symsize]
+ * @param size Number of samples to generate
+ * @tparam truncated If ``true``, linspace returns exactly size elements, otherwise, returns infinite sequence
+ * @tparam precise No longer used since KFR5, calculations are always precise
+ */
+template <typename T, bool precise = false, bool truncated = false, typename Tout = ftype<T>>
+KFR_INTRINSIC expression_linspace<Tout, truncated> symmlinspace(T symsize, size_t size,
+                                                                cbool_t<truncated> = {})
+{
+    return { symmetric_linspace, static_cast<Tout>(symsize), size, true };
+}
+
 inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, bool truncated, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xlinspace<T, truncated>& self, const shape<1>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_linspace<T, truncated>& self, const shape<1>& index,
                                      const axis_params<0, N>&)
 {
-    T acc    = self.start;
     using TI = itype<T>;
-    return mix(enumerate(vec_shape<T, N>(), static_cast<T>(static_cast<TI>(index))) * self.invsize,
+    return mix((enumerate(vec_shape<T, N>()) + static_cast<T>(static_cast<TI>(index.front()))) * self.invsize,
                self.start, self.stop);
 }
 } // namespace CMT_ARCH_NAME
@@ -676,22 +760,22 @@ KFR_INTRINSIC vec<T, N> get_elements(const xlinspace<T, truncated>& self, const 
 // ----------------------------------------------------------------------------
 
 template <typename Arg1, typename Arg2, index_t ConcatAxis>
-struct xconcatenate : public xwitharguments<Arg1, Arg2>
+struct expression_concatenate : public expression_with_arguments<Arg1, Arg2>
 {
     static_assert(expression_dims<Arg1> == expression_dims<Arg2>);
     static_assert(std::is_same_v<expression_value_type<Arg1>, expression_value_type<Arg2>>);
     constexpr static index_t dims = expression_dims<Arg1>;
     shape<dims> size1;
 
-    KFR_MEM_INTRINSIC xconcatenate(Arg1&& arg1, Arg2&& arg2)
-        : xwitharguments<Arg1, Arg2>{ std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) },
+    KFR_MEM_INTRINSIC expression_concatenate(Arg1&& arg1, Arg2&& arg2)
+        : expression_with_arguments<Arg1, Arg2>{ std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) },
           size1(expression_traits<Arg1>::shapeof(arg1))
     {
     }
 };
 
 template <typename Arg1, typename Arg2, index_t ConcatAxis>
-struct expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>> : expression_traits_defaults
+struct expression_traits<expression_concatenate<Arg1, Arg2, ConcatAxis>> : expression_traits_defaults
 {
     using ArgTraits1 = expression_traits<Arg1>;
     using ArgTraits2 = expression_traits<Arg2>;
@@ -708,9 +792,11 @@ struct expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>> : expression_trai
         return result;
     }
 
-    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(const xconcatenate<Arg1, Arg2, ConcatAxis>& self)
+    KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof(
+        const expression_concatenate<Arg1, Arg2, ConcatAxis>& self)
     {
-        return concat_shape(ArgTraits1::shapeof(std::get<0>(self)), ArgTraits2::shapeof(std::get<1>(self)));
+        return concat_shape(ArgTraits1::shapeof(std::get<0>(self.args)),
+                            ArgTraits2::shapeof(std::get<1>(self.args)));
     }
     KFR_MEM_INTRINSIC constexpr static shape<dims> shapeof()
     {
@@ -719,16 +805,15 @@ struct expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>> : expression_trai
 };
 
 template <index_t ConcatAxis = 0, typename Arg1, typename Arg2, KFR_ACCEPT_EXPRESSIONS(Arg1, Arg2)>
-KFR_INTRINSIC xconcatenate<Arg1, Arg2, ConcatAxis> concatenate(Arg1&& arg1, Arg2&& arg2)
+KFR_INTRINSIC expression_concatenate<Arg1, Arg2, ConcatAxis> concatenate(Arg1&& arg1, Arg2&& arg2)
 {
     return { std::forward<Arg1>(arg1), std::forward<Arg2>(arg2) };
 }
 
 template <index_t ConcatAxis = 0, typename Arg1, typename Arg2, typename Arg3,
           KFR_ACCEPT_EXPRESSIONS(Arg1, Arg2, Arg3)>
-KFR_INTRINSIC xconcatenate<Arg1, xconcatenate<Arg2, Arg3, ConcatAxis>, ConcatAxis> concatenate(Arg1&& arg1,
-                                                                                               Arg2&& arg2,
-                                                                                               Arg3&& arg3)
+KFR_INTRINSIC expression_concatenate<Arg1, expression_concatenate<Arg2, Arg3, ConcatAxis>, ConcatAxis>
+concatenate(Arg1&& arg1, Arg2&& arg2, Arg3&& arg3)
 {
     return { std::forward<Arg1>(arg1), { std::forward<Arg2>(arg2), std::forward<Arg3>(arg3) } };
 }
@@ -737,11 +822,11 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename Arg1, typename Arg2, index_t ConcatAxis, index_t NDims, index_t Axis, size_t N,
-          typename T = typename expression_traits<xconcatenate<Arg1, Arg2, ConcatAxis>>::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xconcatenate<Arg1, Arg2, ConcatAxis>& self,
+          typename T = typename expression_traits<expression_concatenate<Arg1, Arg2, ConcatAxis>>::value_type>
+KFR_INTRINSIC vec<T, N> get_elements(const expression_concatenate<Arg1, Arg2, ConcatAxis>& self,
                                      const shape<NDims>& index, const axis_params<Axis, N>& sh)
 {
-    const index_t size1     = self.size1;
+    const shape<NDims> size1     = self.size1;
     constexpr index_t Naxis = ConcatAxis == Axis ? N : 1;
     if (index[ConcatAxis] >= size1[ConcatAxis])
     {
@@ -773,8 +858,133 @@ KFR_INTRINSIC vec<T, N> get_elements(const xconcatenate<Arg1, Arg2, ConcatAxis>&
     }
 }
 
-} // namespace CMT_ARCH_NAME
+// ----------------------------------------------------------------------------
+
+template <typename... Args>
+using expression_pack = expression_function<fn::packtranspose, Args...>;
+
+template <typename... Args, KFR_ACCEPT_EXPRESSIONS(Args...)>
+KFR_INTRINSIC expression_pack<Args...> pack(Args&&... args)
+{
+    return { std::forward<Args>(args)... };
+}
+
+namespace internal
+{
+template <typename... Args, index_t Axis, size_t N,
+          typename Tr = expression_traits<expression_function<fn::packtranspose, Args...>>, size_t... Indices>
+KFR_INTRINSIC void set_elements_packed(expression_function<fn::packtranspose, Args...>& self,
+                                       shape<Tr::dims> index, axis_params<Axis, N> sh,
+                                       const vec<typename Tr::value_type, N>& x, csizes_t<Indices...>)
+{
+    constexpr size_t count          = sizeof...(Args);
+    using ST                        = subtype<typename Tr::value_type>;
+    const vec<vec<ST, N>, count> xx = vec<vec<ST, N>, count>::from_flatten(transpose<count>(flatten(x)));
+    (set_elements(std::get<Indices>(self.args), index, sh, xx[Indices]), ...);
+}
+} // namespace internal
+
+template <typename... Args, index_t Axis, size_t N,
+          typename Tr = expression_traits<expression_function<fn::packtranspose, Args...>>>
+KFR_INTRINSIC void set_elements(expression_function<fn::packtranspose, Args...>& self, shape<Tr::dims> index,
+                                axis_params<Axis, N> sh, const identity<vec<typename Tr::value_type, N>>& x)
+{
+    internal::set_elements_packed(self, index, sh, x, csizeseq<sizeof...(Args)>);
+}
 
 // ----------------------------------------------------------------------------
 
+template <typename... E>
+struct expression_unpack : expression_with_arguments<E...>, expression_traits_defaults
+{
+    constexpr static size_t count = sizeof...(E);
+
+    using first_arg_traits = typename expression_with_arguments<E...>::first_arg_traits;
+
+    constexpr static index_t dims = first_arg_traits::dims;
+    using first_value_type        = typename first_arg_traits::value_type;
+
+    using value_type = vec<first_value_type, count>;
+
+    static_assert(((expression_dims<E> == dims) && ...));
+    static_assert(((std::is_same_v<expression_value_type<E>, first_value_type>)&&...));
+
+    constexpr static shape<dims> shapeof(const expression_unpack& self)
+    {
+        return first_arg_traits::shapeof(self.first());
+    }
+    constexpr static shape<dims> shapeof() { return first_arg_traits::shapeof(); }
+
+    expression_unpack(E&&... e) : expression_with_arguments<E...>(std::forward<E>(e)...) {}
+
+    template <index_t Axis, size_t N>
+    KFR_INTRINSIC friend void set_elements(expression_unpack& self, shape<dims> index,
+                                           axis_params<Axis, N> sh, const identity<vec<value_type, N>>& x)
+    {
+        self.output(index, sh, x, csizeseq<count>);
+    }
+
+    template <typename Input, KFR_ACCEPT_EXPRESSIONS(Input)>
+    KFR_MEM_INTRINSIC expression_unpack& operator=(Input&& input)
+    {
+        process(*this, std::forward<Input>(input));
+        return *this;
+    }
+
+private:
+    template <index_t Axis, size_t N, size_t... indices>
+    KFR_MEM_INTRINSIC void output(shape<dims> index, axis_params<Axis, N> sh, const vec<value_type, N>& x,
+                                  csizes_t<indices...>)
+    {
+        const vec<vec<first_value_type, N>, count> xx =
+            vec<vec<first_value_type, N>, count>::from_flatten(transpose<count>(flatten(x)));
+        (set_elements(std::get<indices>(this->args), index, sh, xx[indices]), ...);
+    }
+};
+
+// ----------------------------------------------------------------------------
+
+template <typename... E, enable_if_output_expressions<E...>* = nullptr>
+KFR_FUNCTION expression_unpack<E...> unpack(E&&... e)
+{
+    return expression_unpack<E...>(std::forward<E>(e)...);
+}
+
+// ----------------------------------------------------------------------------
+
+template <typename Fn, typename E>
+struct expression_adjacent : expression_with_traits<E>
+{
+    using value_type                           = typename expression_with_traits<E>::value_type;
+    constexpr static inline index_t dims       = expression_with_traits<E>::dims;
+    constexpr static inline bool random_access = false;
+
+    expression_adjacent(Fn&& fn, E&& e)
+        : expression_with_traits<E>(std::forward<E>(e)), fn(std::forward<Fn>(fn))
+    {
+    }
+
+    template <size_t N, index_t VecAxis>
+    KFR_INTRINSIC friend vec<value_type, N> get_elements(const expression_adjacent& self, shape<dims> index,
+                                                         axis_params<VecAxis, N> sh)
+    {
+        const vec<value_type, N> in      = get_elements(self.first(), index, sh);
+        const vec<value_type, N> delayed = insertleft(self.data, in);
+        self.data                        = in.back();
+        return self.fn(in, delayed);
+    }
+    Fn fn;
+    mutable value_type data = value_type(0);
+};
+
+/**
+ * @brief Returns template expression that returns the result of calling \f$ fn(x_i, x_{i-1}) \f$
+ */
+template <typename Fn, typename E1>
+KFR_INTRINSIC expression_adjacent<Fn, E1> adjacent(Fn&& fn, E1&& e1)
+{
+    return expression_adjacent<Fn, E1>(std::forward<Fn>(fn), std::forward<E1>(e1));
+}
+
+} // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/base/conversion.hpp b/include/kfr/base/conversion.hpp
@@ -183,7 +183,7 @@ template <typename Tout, typename Tin, typename Tout_traits = audio_sample_trait
 inline Tout convert_sample(const Tin& in)
 {
     constexpr auto scale = Tout_traits::scale / Tin_traits::scale;
-    return innercast<Tout>(clamp(in * scale, -Tout_traits::scale, +Tout_traits::scale));
+    return broadcastto<Tout>(clamp(in * scale, -Tout_traits::scale, +Tout_traits::scale));
 }
 
 /// @brief Deinterleaves and converts audio samples
@@ -264,20 +264,24 @@ void convert(Tout* out, const Tin* in, size_t size)
 template <typename Tout, typename Tout_traits = audio_sample_traits<Tout>>
 void convert(Tout* out, const void* in, audio_sample_type in_type, size_t size)
 {
-    cswitch(audio_sample_type_clist{}, in_type, [&](auto t) {
-        using type = typename audio_sample_get_type<val_of(decltype(t)())>::type;
-        convert(out, reinterpret_cast<const type*>(in), size);
-    });
+    cswitch(audio_sample_type_clist{}, in_type,
+            [&](auto t)
+            {
+                using type = typename audio_sample_get_type<val_of(decltype(t)())>::type;
+                convert(out, reinterpret_cast<const type*>(in), size);
+            });
 }
 
 /// @brief Converts audio samples (output format is known at runtime)
 template <typename Tin, typename Tin_traits = audio_sample_traits<Tin>>
 void convert(void* out, audio_sample_type out_type, const Tin* in, size_t size)
 {
-    cswitch(audio_sample_type_clist{}, out_type, [&](auto t) {
-        using type = typename audio_sample_get_type<val_of(decltype(t)())>::type;
-        convert(reinterpret_cast<type*>(out), in, size);
-    });
+    cswitch(audio_sample_type_clist{}, out_type,
+            [&](auto t)
+            {
+                using type = typename audio_sample_get_type<val_of(decltype(t)())>::type;
+                convert(reinterpret_cast<type*>(out), in, size);
+            });
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -81,6 +81,10 @@ constexpr inline shape<expression_dims<T>> shapeof()
 }
 
 template <typename T>
+struct expression_traits<const T, std::void_t<expression_value_type<T>>> : expression_traits<T>
+{
+};
+template <typename T>
 struct expression_traits<T&, std::void_t<expression_value_type<T>>> : expression_traits<T>
 {
 };
@@ -98,6 +102,19 @@ struct expression_traits<const T&&, std::void_t<typename expression_traits<T>::v
 {
 };
 
+// This allows old style expressions+traits
+template <typename T>
+struct expression_traits<T, std::void_t<decltype(T::random_access), decltype(T::shapeof())>>
+{
+    using value_type             = typename T::value_type;
+    constexpr static size_t dims = T::dims;
+    constexpr static shape<dims> shapeof(const T& self) { return T::shapeof(self); }
+    constexpr static shape<dims> shapeof() { return T::shapeof(); }
+
+    constexpr static inline bool explicit_operand = T::explicit_operand;
+    constexpr static inline bool random_access    = T::random_access;
+};
+
 struct expression_traits_defaults
 {
     // using value_type = accepts_any;
@@ -166,6 +183,9 @@ constexpr inline bool is_input_output_expression<E, enable_if_input_output_expre
 template <typename T>
 constexpr inline bool is_expr_element = std::is_same_v<std::remove_cv_t<T>, T>&& is_vec_element<T>;
 
+template <typename E>
+constexpr inline bool is_infinite = expression_traits<E>::shapeof().has_infinity();
+
 template <typename T>
 struct expression_traits<T, std::enable_if_t<is_expr_element<T>>> : expression_traits_defaults
 {
@@ -177,8 +197,29 @@ struct expression_traits<T, std::enable_if_t<is_expr_element<T>>> : expression_t
     KFR_MEM_INTRINSIC constexpr static shape<0> shapeof() { return {}; }
 };
 
+namespace internal_generic
+{
+struct anything
+{
+    template <typename Expr>
+    constexpr anything(Expr&&)
+    {
+    }
+};
+} // namespace internal_generic
+
 inline namespace CMT_ARCH_NAME
 {
+
+template <index_t Dims>
+KFR_INTRINSIC void begin_pass(const internal_generic::anything&, shape<Dims> start, shape<Dims> stop)
+{
+}
+template <index_t Dims>
+KFR_INTRINSIC void end_pass(const internal_generic::anything&, shape<Dims> start, shape<Dims> stop)
+{
+}
+
 template <typename T, index_t Axis, size_t N, KFR_ENABLE_IF(is_expr_element<std::decay_t<T>>)>
 KFR_INTRINSIC vec<std::decay_t<T>, N> get_elements(T&& self, const shape<0>& index,
                                                    const axis_params<Axis, N>&)
@@ -193,10 +234,313 @@ KFR_INTRINSIC void set_elements(T& self, const shape<0>& index, const axis_param
     static_assert(!std::is_const_v<T>);
     self = val.front();
 }
-} // namespace CMT_ARCH_NAME
 
-inline namespace CMT_ARCH_NAME
+template <typename... Args>
+struct expression_with_arguments
+{
+    constexpr static size_t count = sizeof...(Args);
+
+    using type_list = ctypes_t<Args...>;
+
+    template <size_t idx>
+    using nth = typename type_list::template nth<idx>;
+
+    using first_arg = typename type_list::template nth<0>;
+
+    template <size_t idx>
+    using nth_trait = expression_traits<typename type_list::template nth<idx>>;
+
+    using first_arg_traits = expression_traits<first_arg>;
+
+    std::tuple<Args...> args;
+    std::array<dimset, count> masks;
+
+    KFR_INTRINSIC auto& first() { return std::get<0>(args); }
+    KFR_INTRINSIC const auto& first() const { return std::get<0>(args); }
+
+    template <size_t idx>
+    KFR_INTRINSIC dimset getmask(csize_t<idx> = {}) const
+    {
+        static_assert(idx < count);
+        using Traits = expression_traits<nth<idx>>;
+        if constexpr (sizeof...(Args) <= 1 || Traits::dims == 0)
+        {
+            return -1;
+        }
+        else
+        {
+            constexpr shape<Traits::dims> sh = Traits::shapeof();
+            if constexpr (sh.cproduct() > 0)
+            {
+                return sh.tomask();
+            }
+            else
+            {
+                return std::get<idx>(masks);
+            }
+        }
+    }
+
+    template <typename Fn>
+    KFR_INTRINSIC constexpr auto fold(Fn&& fn) const
+    {
+        return fold_impl(std::forward<Fn>(fn), csizeseq<count>);
+    }
+    template <typename Fn>
+    KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn)
+    {
+        return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>);
+    }
+
+    KFR_INTRINSIC expression_with_arguments(Args&&... args) : args{ std::forward<Args>(args)... }
+    {
+        cforeach(csizeseq<count>,
+                 [&](auto idx_) CMT_INLINE_LAMBDA
+                 {
+                     constexpr size_t idx = val_of(decltype(idx_)());
+                     shape sh             = expression_traits<nth<idx>>::shapeof(std::get<idx>(this->args));
+                     masks[idx]           = sh.tomask();
+                 });
+    }
+
+private:
+    template <typename Fn, size_t... indices>
+    KFR_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const
+    {
+        return fn(std::get<indices>(args)...);
+    }
+    template <typename Fn, size_t... indices>
+    KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>)
+    {
+        return fn(csize<indices>...);
+    }
+};
+
+template <typename Arg>
+struct expression_with_arguments<Arg>
+{
+    constexpr static size_t count = 1;
+
+    using type_list = ctypes_t<Arg>;
+
+    template <size_t idx>
+    using nth = Arg;
+
+    using first_arg = Arg;
+
+    template <size_t idx>
+    using nth_trait = expression_traits<Arg>;
+
+    using first_arg_traits = expression_traits<first_arg>;
+
+    std::tuple<Arg> args;
+
+    KFR_MEM_INTRINSIC auto& first() { return std::get<0>(args); }
+    KFR_MEM_INTRINSIC const auto& first() const { return std::get<0>(args); }
+
+    template <size_t idx>
+    KFR_MEM_INTRINSIC dimset getmask(csize_t<idx> = {}) const
+    {
+        return -1;
+    }
+
+    template <typename Fn>
+    KFR_MEM_INTRINSIC constexpr auto fold(Fn&& fn) const
+    {
+        return fold_impl(std::forward<Fn>(fn), csizeseq<count>);
+    }
+    template <typename Fn>
+    KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn)
+    {
+        return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>);
+    }
+
+    KFR_MEM_INTRINSIC expression_with_arguments(Arg&& arg) : args{ std::forward<Arg>(arg) } {}
+
+private:
+    template <typename Fn, size_t... indices>
+    KFR_MEM_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const
+    {
+        return fn(std::get<indices>(args)...);
+    }
+    template <typename Fn, size_t... indices>
+    KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>)
+    {
+        return fn(csize<indices>...);
+    }
+};
+
+template <typename... Args>
+expression_with_arguments(Args&&... args) -> expression_with_arguments<Args...>;
+
+template <typename Arg>
+struct expression_with_traits : expression_with_arguments<Arg>, expression_traits_defaults
+{
+    using first_arg_traits       = expression_traits<Arg>;
+    using value_type             = typename first_arg_traits::value_type;
+    constexpr static size_t dims = first_arg_traits::dims;
+    constexpr static shape<dims> shapeof(const expression_with_traits& self)
+    {
+        return first_arg_traits::shapeof(self.first());
+    }
+    constexpr static shape<dims> shapeof() { return first_arg_traits::shapeof(); }
+
+    using expression_with_arguments<Arg>::expression_with_arguments;
+};
+
+template <typename Fn, typename... Args>
+struct expression_function : expression_with_arguments<Args...>, expression_traits_defaults
+{
+    using value_type =
+        typename std::invoke_result_t<Fn,
+                                      vec<typename expression_traits<Args>::value_type, 1>...>::value_type;
+    constexpr static size_t dims = const_max(expression_traits<Args>::dims...);
+
+    constexpr static shape<dims> shapeof(const expression_function& self)
+    {
+        return self.fold([&](auto&&... args) CMT_INLINE_LAMBDA constexpr->auto {
+            return internal_generic::common_shape(expression_traits<decltype(args)>::shapeof(args)...);
+        });
+    }
+    constexpr static shape<dims> shapeof()
+    {
+        return expression_function<Fn,
+                                   Args...>::fold_idx([&](auto... args) CMT_INLINE_LAMBDA constexpr->auto {
+            return internal_generic::common_shape(
+                expression_traits<
+                    typename expression_function::template nth<val_of(decltype(args)())>>::shapeof()...);
+        });
+    }
+
+    constexpr static inline bool random_access = (expression_traits<Args>::random_access && ...);
+
+    Fn fn;
+
+    KFR_MEM_INTRINSIC expression_function(expression_with_arguments<Args...> args, Fn&& fn)
+        : expression_with_arguments<Args...>{ std::move(args) }, fn(std::forward<Fn>(fn))
+    {
+        check_shapes();
+    }
+    KFR_MEM_INTRINSIC expression_function(Fn&& fn, Args&&... args)
+        : expression_with_arguments<Args...>{ std::forward<Args>(args)... }, fn(std::forward<Fn>(fn))
+    {
+        check_shapes();
+    }
+    KFR_MEM_INTRINSIC expression_function(Args&&... args)
+        : expression_with_arguments<Args...>{ std::forward<Args>(args)... }, fn{}
+    {
+        check_shapes();
+    }
+    KFR_MEM_INTRINSIC void check_shapes()
+    {
+        if constexpr (dims > 0)
+        {
+            shape<dims> sh = shapeof(*this);
+            if (sh == shape<dims>(0))
+            {
+                // throw std::runtime_error("KFR: Invalid shapes in expression_function");
+            }
+        }
+    }
+
+    template <typename In, enable_if_input_expression<In>* = nullptr>
+    expression_function& operator=(In&& in)
+    {
+        static_assert(is_output_expression<expression_function>);
+        process(*this, std::forward<In>(in));
+        return *this;
+    }
+};
+
+template <typename... Args, typename Fn>
+expression_function(const expression_with_arguments<Args...>& args, Fn&& fn)
+    -> expression_function<Fn, Args...>;
+template <typename... Args, typename Fn>
+expression_function(expression_with_arguments<Args...>&& args, Fn&& fn) -> expression_function<Fn, Args...>;
+template <typename... Args, typename Fn>
+expression_function(expression_with_arguments<Args...>& args, Fn&& fn) -> expression_function<Fn, Args...>;
+
+namespace internal
+{
+
+template <typename... Args, index_t Dims, size_t... idx>
+KFR_INTRINSIC void begin_pass_args(const expression_with_arguments<Args...>& self, shape<Dims> start,
+                                   shape<Dims> stop, csizes_t<idx...>)
+{
+    (begin_pass(std::get<idx>(self.args), start, stop), ...);
+}
+
+template <typename... Args, index_t Dims, size_t... idx>
+KFR_INTRINSIC void end_pass_args(const expression_with_arguments<Args...>& self, shape<Dims> start,
+                                 shape<Dims> stop, csizes_t<idx...>)
+{
+    (end_pass(std::get<idx>(self.args), start, stop), ...);
+}
+
+template <index_t outdims, typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, size_t idx,
+          typename Traits = expression_traits<typename expression_function<Fn, Args...>::template nth<idx>>>
+KFR_MEM_INTRINSIC vec<typename Traits::value_type, N> get_arg(const expression_function<Fn, Args...>& self,
+                                                              const shape<Dims>& index,
+                                                              const axis_params<Axis, N>& sh, csize_t<idx>)
+{
+    if constexpr (Traits::dims == 0)
+    {
+        return repeat<N>(get_elements(std::get<idx>(self.args), {}, axis_params<Axis, 1>{}));
+    }
+    else
+    {
+        auto indices               = internal_generic::adapt<Traits::dims>(index, self.getmask(csize<idx>));
+        constexpr index_t last_dim = Traits::shapeof().back();
+        if constexpr (last_dim != undefined_size)
+        {
+            constexpr index_t last_dim_pot = prev_poweroftwo(last_dim);
+            return repeat<N / std::min(last_dim_pot, N)>(get_elements(
+                std::get<idx>(self.args), indices, axis_params<Axis, std::min(last_dim_pot, N)>{}));
+        }
+        else
+        {
+            if constexpr (sizeof...(Args) > 1 && N > 1)
+            {
+                if (CMT_UNLIKELY(self.masks[idx].back() == 0))
+                    return get_elements(std::get<idx>(self.args), indices, axis_params<Axis, 1>{}).front();
+                else
+                    return get_elements(std::get<idx>(self.args), indices, sh);
+            }
+            else
+            {
+                return get_elements(std::get<idx>(self.args), indices, sh);
+            }
+        }
+    }
+}
+} // namespace internal
+
+template <typename... Args, index_t Dims>
+KFR_INTRINSIC void begin_pass(const expression_with_arguments<Args...>& self, shape<Dims> start,
+                              shape<Dims> stop)
 {
+    internal::begin_pass_args(self, start, stop, indicesfor<Args...>);
+}
+
+template <typename... Args, index_t Dims>
+KFR_INTRINSIC void end_pass(const expression_with_arguments<Args...>& self, shape<Dims> start,
+                            shape<Dims> stop)
+{
+    internal::end_pass_args(self, start, stop, indicesfor<Args...>);
+}
+
+template <typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims,
+          typename Tr = expression_traits<expression_function<Fn, Args...>>,
+          typename T  = typename Tr::value_type>
+KFR_INTRINSIC vec<T, N> get_elements(const expression_function<Fn, Args...>& self, const shape<Dims>& index,
+                                     const axis_params<Axis, N>& sh)
+{
+    constexpr index_t outdims = Tr::dims;
+    return self.fold_idx(
+        [&](auto... idx) CMT_INLINE_LAMBDA -> vec<T, N> {
+            return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...);
+        });
+}
 
 template <typename Out, typename In, index_t OutAxis, size_t w, size_t gw, typename Tin, index_t outdims,
           index_t indims>
@@ -234,8 +578,10 @@ KFR_INTRINSIC static void tprocess_body(Out&& out, In&& in, size_t start, size_t
             {
                 outidx[OutAxis] = x;
                 inidx[InAxis]   = std::min(x, insize - 1);
+                auto v = get_elements(in, inidx, axis_params_v<InAxis, w>);
+                // println("## i=", x, "\n", v);
                 set_elements(out, outidx, axis_params_v<OutAxis, w>,
-                             get_elements(in, inidx, axis_params_v<InAxis, w>));
+                             v );
             }
         }
         CMT_LOOP_NOUNROLL
@@ -253,7 +599,14 @@ template <size_t width = 0, index_t Axis = 0, typename Out, typename In, size_t 
           CMT_ENABLE_IF(expression_traits<Out>::dims == 0)>
 static auto process(Out&& out, In&& in, shape<0> = {}, shape<0> = {}, csize_t<gw> = {}) -> shape<0>
 {
+    static_assert(is_input_expression<In>, "In must be an input expression");
+    static_assert(is_output_expression<Out>, "Out must be an output expression");
+    static_assert(expression_traits<In>::dims == 0);
+    begin_pass(out, shape{}, shape{});
+    begin_pass(in, shape{}, shape{});
     set_elements(out, shape<0>{}, axis_params_v<0, 1>, get_elements(in, shape<0>{}, axis_params_v<0, 1>));
+    end_pass(in, shape{}, shape{});
+    end_pass(out, shape{}, shape{});
     return {};
 }
 
@@ -305,6 +658,9 @@ template <size_t width = 0, index_t Axis = infinite_size, typename Out, typename
 static auto process(Out&& out, In&& in, shape<outdims> start = shape<outdims>(0),
                     shape<outdims> size = shape<outdims>(infinite_size), csize_t<gw> = {}) -> shape<outdims>
 {
+    static_assert(is_input_expression<In>, "In must be an input expression");
+    static_assert(is_output_expression<Out>, "Out must be an output expression");
+
     using Trin  = expression_traits<In>;
     using Trout = expression_traits<Out>;
     using Tin   = typename Trin::value_type;
@@ -332,12 +688,15 @@ static auto process(Out&& out, In&& in, shape<outdims> start = shape<outdims>(0)
     const shape<indims> inshape   = Trin::shapeof(in);
     if (CMT_UNLIKELY(!internal_generic::can_assign_from(outshape, inshape)))
         return shape<outdims>{ 0 };
-    shape<outdims> stop = min(add_shape(start, size), outshape);
+    shape<outdims> stop = min(min(add_shape(start, size), outshape), inshape.template extend<outdims>());
 
     index_t in_size = 0;
     if constexpr (indims > 0)
         in_size = inshape[in_axis];
 
+    begin_pass(out, start, stop);
+    begin_pass(in, inshape.adapt(start), inshape.adapt(stop));
+
     shape<outdims> outidx;
     if constexpr (outdims == 1)
     {
@@ -407,255 +766,15 @@ static auto process(Out&& out, In&& in, shape<outdims> start = shape<outdims>(0)
             outidx[out_axis] = stop[out_axis] - 1;
         } while (internal_generic::increment_indices(outidx, start, stop));
     }
+    end_pass(in, inshape.adapt(start), inshape.adapt(stop));
+    end_pass(out, start, stop);
     return stop;
 }
-} // namespace CMT_ARCH_NAME
-
-template <typename... Args>
-struct xwitharguments
-{
-    constexpr static size_t count = sizeof...(Args);
-
-    using type_list = ctypes_t<Args...>;
-
-    template <size_t idx>
-    using nth = typename type_list::template nth<idx>;
-
-    using first_arg = typename type_list::template nth<0>;
-
-    template <size_t idx>
-    using nth_trait = expression_traits<typename type_list::template nth<idx>>;
-
-    using first_arg_trait = expression_traits<first_arg>;
-
-    std::tuple<Args...> args;
-    std::array<dimset, count> masks;
-
-    KFR_INTRINSIC auto& first() { return std::get<0>(args); }
-    KFR_INTRINSIC const auto& first() const { return std::get<0>(args); }
-
-    template <size_t idx>
-    KFR_INTRINSIC dimset getmask(csize_t<idx> = {}) const
-    {
-        static_assert(idx < count);
-        using Traits = expression_traits<nth<idx>>;
-        if constexpr (sizeof...(Args) <= 1 || Traits::dims == 0)
-        {
-            return -1;
-        }
-        else
-        {
-            constexpr shape<Traits::dims> sh = Traits::shapeof();
-            if constexpr (sh.cproduct() > 0)
-            {
-                return sh.tomask();
-            }
-            else
-            {
-                return std::get<idx>(masks);
-            }
-        }
-    }
-
-    template <typename Fn>
-    KFR_INTRINSIC constexpr auto fold(Fn&& fn) const
-    {
-        return fold_impl(std::forward<Fn>(fn), csizeseq<count>);
-    }
-    template <typename Fn>
-    KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn)
-    {
-        return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>);
-    }
-
-    KFR_INTRINSIC xwitharguments(Args&&... args) : args{ std::forward<Args>(args)... }
-    {
-        cforeach(csizeseq<count>,
-                 [&](auto idx_) CMT_INLINE_LAMBDA
-                 {
-                     constexpr size_t idx = val_of(decltype(idx_)());
-                     shape sh             = expression_traits<nth<idx>>::shapeof(std::get<idx>(this->args));
-                     masks[idx]           = sh.tomask();
-                 });
-    }
-
-private:
-    template <typename Fn, size_t... indices>
-    KFR_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const
-    {
-        return fn(std::get<indices>(args)...);
-    }
-    template <typename Fn, size_t... indices>
-    KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>)
-    {
-        return fn(csize<indices>...);
-    }
-};
-
-template <typename Arg>
-struct xwitharguments<Arg>
-{
-    constexpr static size_t count = 1;
-
-    using type_list = ctypes_t<Arg>;
-
-    template <size_t idx>
-    using nth = Arg;
-
-    using first_arg = Arg;
-
-    template <size_t idx>
-    using nth_trait = expression_traits<Arg>;
-
-    using first_arg_trait = expression_traits<first_arg>;
-
-    std::tuple<Arg> args;
-
-    KFR_MEM_INTRINSIC auto& first() { return std::get<0>(args); }
-    KFR_MEM_INTRINSIC const auto& first() const { return std::get<0>(args); }
-
-    template <size_t idx>
-    KFR_MEM_INTRINSIC dimset getmask(csize_t<idx> = {}) const
-    {
-        return -1;
-    }
-
-    template <typename Fn>
-    KFR_MEM_INTRINSIC constexpr auto fold(Fn&& fn) const
-    {
-        return fold_impl(std::forward<Fn>(fn), csizeseq<count>);
-    }
-    template <typename Fn>
-    KFR_INTRINSIC constexpr static auto fold_idx(Fn&& fn)
-    {
-        return fold_idx_impl(std::forward<Fn>(fn), csizeseq<count>);
-    }
-
-    KFR_MEM_INTRINSIC xwitharguments(Arg&& arg) : args{ std::forward<Arg>(arg) } {}
-
-private:
-    template <typename Fn, size_t... indices>
-    KFR_MEM_INTRINSIC constexpr auto fold_impl(Fn&& fn, csizes_t<indices...>) const
-    {
-        return fn(std::get<indices>(args)...);
-    }
-    template <typename Fn, size_t... indices>
-    KFR_INTRINSIC constexpr static auto fold_idx_impl(Fn&& fn, csizes_t<indices...>)
-    {
-        return fn(csize<indices>...);
-    }
-};
-
-template <typename... Args>
-xwitharguments(Args&&... args) -> xwitharguments<Args...>;
-
-template <typename Fn, typename... Args>
-struct xfunction : public xwitharguments<Args...>
-{
-    Fn fn;
-
-    KFR_MEM_INTRINSIC xfunction(xwitharguments<Args...> args, Fn&& fn)
-        : xwitharguments<Args...>{ std::move(args) }, fn(std::move(fn))
-    {
-    }
-    KFR_MEM_INTRINSIC xfunction(Fn&& fn, Args&&... args)
-        : xwitharguments<Args...>{ std::forward<Args>(args)... }, fn(std::move(fn))
-    {
-    }
-};
-
-template <typename... Args, typename Fn>
-xfunction(const xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>;
-template <typename... Args, typename Fn>
-xfunction(xwitharguments<Args...>&& args, Fn&& fn) -> xfunction<Fn, Args...>;
-template <typename... Args, typename Fn>
-xfunction(xwitharguments<Args...>& args, Fn&& fn) -> xfunction<Fn, Args...>;
 
 template <typename Fn, typename... Args>
-struct expression_traits<xfunction<Fn, Args...>> : expression_traits_defaults
-{
-    using E = xfunction<Fn, Args...>;
-
-    using value_type =
-        typename std::invoke_result_t<Fn,
-                                      vec<typename expression_traits<Args>::value_type, 1>...>::value_type;
-    constexpr static size_t dims = const_max(expression_traits<Args>::dims...);
-
-    constexpr static shape<dims> shapeof(const E& self)
-    {
-        return self.fold([&](auto&&... args) CMT_INLINE_LAMBDA -> auto {
-            return internal_generic::common_shape(expression_traits<decltype(args)>::shapeof(args)...);
-        });
-    }
-    constexpr static shape<dims> shapeof()
-    {
-        return xfunction<Fn, Args...>::fold_idx([&](auto... args) CMT_INLINE_LAMBDA -> auto {
-            return internal_generic::common_shape(
-                expression_traits<typename E::template nth<val_of(decltype(args)())>>::shapeof()...);
-        });
-    }
-
-    constexpr static inline bool random_access = (expression_traits<Args>::random_access && ...);
-};
-
-inline namespace CMT_ARCH_NAME
-{
-
-namespace internal
+KFR_FUNCTION expression_function<decay<Fn>, Args...> bind_expression(Fn&& fn, Args&&... args)
 {
-template <index_t outdims, typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims, size_t idx,
-          typename Traits = expression_traits<typename xfunction<Fn, Args...>::template nth<idx>>>
-KFR_MEM_INTRINSIC vec<typename Traits::value_type, N> get_arg(const xfunction<Fn, Args...>& self,
-                                                              const shape<Dims>& index,
-                                                              const axis_params<Axis, N>& sh, csize_t<idx>)
-{
-    if constexpr (Traits::dims == 0)
-    {
-        return repeat<N>(get_elements(std::get<idx>(self.args), {}, axis_params<Axis, 1>{}));
-    }
-    else
-    {
-        auto indices               = internal_generic::adapt<Traits::dims>(index, self.getmask(csize<idx>));
-        constexpr index_t last_dim = Traits::shapeof().back();
-        if constexpr (last_dim > 0)
-        {
-            return repeat<N / std::min(last_dim, N)>(
-                get_elements(std::get<idx>(self.args), indices, axis_params<Axis, std::min(last_dim, N)>{}));
-        }
-        else
-        {
-            if constexpr (sizeof...(Args) > 1 && N > 1)
-            {
-                if (CMT_UNLIKELY(self.masks[idx].back() == 0))
-                    return get_elements(std::get<idx>(self.args), indices, axis_params<Axis, 1>{}).front();
-                else
-                    return get_elements(std::get<idx>(self.args), indices, sh);
-            }
-            else
-            {
-                return get_elements(std::get<idx>(self.args), indices, sh);
-            }
-        }
-    }
-}
-} // namespace internal
-
-template <typename Fn, typename... Args, index_t Axis, size_t N, index_t Dims,
-          typename Tr = expression_traits<xfunction<Fn, Args...>>, typename T = typename Tr::value_type>
-KFR_INTRINSIC vec<T, N> get_elements(const xfunction<Fn, Args...>& self, const shape<Dims>& index,
-                                     const axis_params<Axis, N>& sh)
-{
-    constexpr index_t outdims = Tr::dims;
-    return self.fold_idx(
-        [&](auto... idx) CMT_INLINE_LAMBDA -> vec<T, N> {
-            return self.fn(internal::get_arg<outdims>(self, index, sh, idx)...);
-        });
-}
-
-template <typename Fn, typename... Args>
-KFR_FUNCTION xfunction<decay<Fn>, Args...> bind_expression(Fn&& fn, Args&&... args)
-{
-    return xfunction<decay<Fn>, Args...>(std::forward<Fn>(fn), std::forward<Args>(args)...);
+    return expression_function<decay<Fn>, Args...>(std::forward<Fn>(fn), std::forward<Args>(args)...);
 }
 /**
  * @brief Construct a new expression using the same function as in @c e and new arguments
@@ -663,22 +782,79 @@ KFR_FUNCTION xfunction<decay<Fn>, Args...> bind_expression(Fn&& fn, Args&&... ar
  * @param args new arguments for the function
  */
 template <typename Fn, typename... OldArgs, typename... NewArgs>
-KFR_FUNCTION xfunction<Fn, NewArgs...> rebind(const xfunction<Fn, OldArgs...>& e, NewArgs&&... args)
+KFR_FUNCTION expression_function<Fn, NewArgs...> rebind(const expression_function<Fn, OldArgs...>& e,
+                                                        NewArgs&&... args)
 {
-    return xfunction<Fn, NewArgs...>(e.fn, std::forward<NewArgs>(args)...);
+    return expression_function<Fn, NewArgs...>(Fn{ e.fn }, std::forward<NewArgs>(args)...);
 }
 template <typename Fn, typename... OldArgs, typename... NewArgs>
-KFR_FUNCTION xfunction<Fn, NewArgs...> rebind(xfunction<Fn, OldArgs...>&& e, NewArgs&&... args)
+KFR_FUNCTION expression_function<Fn, NewArgs...> rebind(expression_function<Fn, OldArgs...>&& e,
+                                                        NewArgs&&... args)
+{
+    return expression_function<Fn, NewArgs...>(std::move(e.fn), std::forward<NewArgs>(args)...);
+}
+
+#ifdef KFR_TESTING
+namespace internal
 {
-    return xfunction<Fn, NewArgs...>(std::move(e.fn), std::forward<NewArgs>(args)...);
+template <typename T, size_t N, typename Fn>
+inline vec<T, N> get_fn_value(size_t index, Fn&& fn)
+{
+    return apply(fn, enumerate<size_t, N>() + index);
 }
+} // namespace internal
 
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::interleave, E1, E2> interleave(E1&& x, E2&& y)
+template <typename E, typename Fn, KFR_ENABLE_IF(std::is_invocable_v<Fn, size_t>)>
+void test_expression(const E& expr, size_t size, Fn&& fn, const char* expression = nullptr,
+                     const char* file = nullptr, int line = 0)
+{
+    static_assert(expression_dims<E> == 1, "CHECK_EXPRESSION supports only 1-dim expressions");
+    using T                  = expression_value_type<E>;
+    size_t expr_size         = shapeof(expr).front();
+    ::testo::test_case* test = ::testo::active_test();
+    auto&& c                 = ::testo::make_comparison();
+    test->check(c <= expr_size == size, expression, file, line);
+    if (expr_size != size)
+        return;
+    size                     = min(shape{ size }, shape{ 200 }).front();
+    constexpr size_t maxsize = 2 + ilog2(vector_width<T> * 2);
+    size_t g                 = 1;
+    for (size_t i = 0; i < size;)
+    {
+        const size_t next_size = std::min(prev_poweroftwo(size - i), g);
+        g *= 2;
+        if (g > (1 << (maxsize - 1)))
+            g = 1;
+
+        cswitch(csize<1> << csizeseq<maxsize>, next_size,
+                [&](auto x)
+                {
+                    constexpr size_t nsize = val_of(decltype(x)());
+                    ::testo::scope s(as_string("i = ", i, " width = ", nsize));
+                    test->check(c <= get_elements(expr, shape<1>(i), axis_params_v<0, nsize>) ==
+                                    internal::get_fn_value<T, nsize>(i, fn),
+                                expression, file, line);
+                });
+        i += next_size;
+    }
+}
+
+template <typename E, typename T = expression_value_type<E>>
+void test_expression(const E& expr, std::initializer_list<cometa::identity<T>> list,
+                     const char* expression = nullptr, const char* file = nullptr, int line = 0)
 {
-    return xfunction<Fn, E1, E2>{ fn::interleave(), std::forward<E1>(x), std::forward<E2>(y) };
+    test_expression(
+        expr, list.size(), [&](size_t i) { return list.begin()[i]; }, expression, file, line);
 }
+#define TESTO_CHECK_EXPRESSION(expr, ...) ::kfr::test_expression(expr, __VA_ARGS__, #expr, __FILE__, __LINE__)
+
+#ifndef TESTO_NO_SHORT_MACROS
+#define CHECK_EXPRESSION TESTO_CHECK_EXPRESSION
+#endif
+#endif
+
 } // namespace CMT_ARCH_NAME
+
 } // namespace kfr
 
 CMT_PRAGMA_GNU(GCC diagnostic pop)
diff --git a/include/kfr/base/filter.hpp b/include/kfr/base/filter.hpp
@@ -78,12 +78,12 @@ public:
     void apply(T* dest, const T* src, size_t size) { process_buffer(dest, src, size); }
 
     template <univector_tag Tag>
-    void apply(univector<T, Tag>& dest, const expression_pointer<T>& src)
+    void apply(univector<T, Tag>& dest, const expression_pointer<T, 1>& src)
     {
         process_expression(dest.data(), src, size_min(dest.size(), src.size()));
     }
 
-    void apply(T* dest, const expression_pointer<T>& src, size_t size)
+    void apply(T* dest, const expression_pointer<T, 1>& src, size_t size)
     {
         process_expression(dest, src, size_min(size, src.size()));
     }
@@ -101,36 +101,36 @@ public:
     }
 
 protected:
-    virtual void process_buffer(T* dest, const T* src, size_t size)                         = 0;
-    virtual void process_expression(T* dest, const expression_pointer<T>& src, size_t size) = 0;
+    virtual void process_buffer(T* dest, const T* src, size_t size)                            = 0;
+    virtual void process_expression(T* dest, const expression_pointer<T, 1>& src, size_t size) = 0;
 };
 
 template <typename T>
 class expression_filter : public filter<T>
 {
 public:
-    explicit expression_filter(expression_pointer<T>&& filter_expr) : filter_expr(std::move(filter_expr)) {}
+    explicit expression_filter(expression_pointer<T, 1> filter_expr) : filter_expr(std::move(filter_expr)) {}
 
 protected:
     void process_buffer(T* dest, const T* src, size_t size) override
     {
         substitute(filter_expr, to_pointer(make_univector(src, size)));
-        process(make_univector(dest, size), filter_expr, 0, size);
+        process(make_univector(dest, size), filter_expr, shape<1>(0), shape<1>(size));
     }
-    void process_expression(T* dest, const expression_pointer<T>& src, size_t size) override
+    void process_expression(T* dest, const expression_pointer<T, 1>& src, size_t size) override
     {
         substitute(filter_expr, src);
-        process(make_univector(dest, size), filter_expr, 0, size);
+        process(make_univector(dest, size), filter_expr, shape<1>(0), shape<1>(size));
     }
 
-    expression_pointer<T> filter_expr;
+    expression_pointer<T, 1> filter_expr;
 };
 
 inline namespace CMT_ARCH_NAME
 {
 
 /// @brief Converts expression with placeholder to filter. Placeholder and filter must have the same type
-template <typename E, typename T = value_type_of<E>>
+template <typename E, typename T = expression_value_type<E>>
 KFR_INTRINSIC expression_filter<T> to_filter(E&& e)
 {
     return expression_filter<T>(to_pointer(std::move(e)));
@@ -139,7 +139,7 @@ KFR_INTRINSIC expression_filter<T> to_filter(E&& e)
 
 /// @brief Converts expression with placeholder to filter. Placeholder and filter must have the same type
 template <typename T, typename E>
-KFR_INTRINSIC expression_filter<T> to_filter(expression_pointer<T>&& e)
+KFR_INTRINSIC expression_filter<T> to_filter(expression_pointer<T, 1>&& e)
 {
     return expression_filter<T>(std::move(e));
 }
diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp
@@ -27,17 +27,29 @@
 
 #include "../math/log_exp.hpp"
 #include "../math/sin_cos.hpp"
+#include "../simd/complex.hpp"
 #include "../simd/impl/function.hpp"
 #include "../simd/select.hpp"
 #include "../simd/vec.hpp"
+#include "expression.hpp"
 #include "shape.hpp"
 
 namespace kfr
 {
 
+inline namespace CMT_ARCH_NAME
+{
+
 template <typename T, size_t VecWidth, typename Class, typename Twork = T>
-struct xgenerator
+struct generator : public expression_traits_defaults
 {
+    using value_type             = T;
+    constexpr static size_t dims = 1;
+    constexpr static shape<1> shapeof(const Class&) { return infinite_size; }
+    constexpr static shape<1> shapeof() { return infinite_size; }
+
+    constexpr static inline bool random_access = false;
+
     constexpr static size_t width = VecWidth;
 
     void resync(T start) const { ptr_cast<Class>(this)->sync(start); }
@@ -53,10 +65,11 @@ struct xgenerator
             value = slice<N, width>(oldvalue, value);
             return result;
         }
-        else if (N > width)
+        else if constexpr (N > width)
         {
-            const vec lo = generate(low(x));
-            const vec hi = generate(high(x));
+            constexpr size_t Nlow = prev_poweroftwo(N - 1);
+            const vec lo          = generate<Nlow>();
+            const vec hi          = generate<N - Nlow>();
             return concat(lo, hi);
         }
         else // N == width
@@ -68,6 +81,13 @@ struct xgenerator
     }
     mutable vec<Twork, width> value;
 
+    template <size_t N>
+    friend KFR_INTRINSIC vec<T, N> get_elements(const generator& self, const shape<1>& index,
+                                                const axis_params<0, N>&)
+    {
+        return self.template generate<N>();
+    }
+
 private:
     KFR_MEM_INTRINSIC void call_next() const { ptr_cast<Class>(this)->next(); }
 
@@ -79,32 +99,10 @@ private:
     }
 };
 
-template <typename T, size_t VecWidth, typename Class>
-struct expression_traits<xgenerator<T, VecWidth, Class>> : public expression_traits_defaults
-{
-    using value_type             = T;
-    constexpr static size_t dims = 1;
-    constexpr static shape<1> shapeof(const T&) { return shape<1>(infinite_size); }
-    constexpr static shape<1> shapeof() { return shape<1>(infinite_size); }
-
-    constexpr static inline bool explicit_operand = true;
-    constexpr static inline bool random_access    = false;
-};
-
-inline namespace CMT_ARCH_NAME
-{
-template <typename T, size_t VecWidth, typename Class, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xgenerator<T, VecWidth, Class>& self, const shape<1>& index,
-                                     const axis_params<0, N>&)
-{
-    return self.template generate<N>();
-}
-} // namespace CMT_ARCH_NAME
-
 template <typename T, size_t VecWidth = vector_capacity<T> / 8>
-struct xgenlinear : public xgenerator<T, VecWidth, xgenlinear<T, VecWidth>>
+struct generator_linear : public generator<T, VecWidth, generator_linear<T, VecWidth>>
 {
-    xgenlinear(T start, T step) CMT_NOEXCEPT : step{ step }, vstep{ step * VecWidth } { sync(start); }
+    generator_linear(T start, T step) CMT_NOEXCEPT : vstep{ step * VecWidth } { sync(start); }
 
     KFR_MEM_INTRINSIC void sync(T start) const CMT_NOEXCEPT
     {
@@ -117,10 +115,10 @@ struct xgenlinear : public xgenerator<T, VecWidth, xgenlinear<T, VecWidth>>
 };
 
 template <typename T, size_t VecWidth = vector_capacity<T> / 8>
-struct xgenexp : public xgenerator<T, VecWidth, xgenexp<T, VecWidth>>
+struct generator_exp : public generator<T, VecWidth, generator_exp<T, VecWidth>>
 {
-    xgenexp(T start, T step) CMT_NOEXCEPT : step{ step },
-                                            vstep{ exp(make_vector(step * VecWidth)).front() - 1 }
+    generator_exp(T start, T step) CMT_NOEXCEPT : step{ step },
+                                                  vstep{ exp(make_vector(step * VecWidth)).front() - 1 }
     {
         this->resync(start);
     }
@@ -138,12 +136,12 @@ protected:
 };
 
 template <typename T, size_t VecWidth = vector_capacity<deep_subtype<T>> / 8 / 2>
-struct xgenexpj : public xgenerator<T, VecWidth, xgenexpj<T, VecWidth>>
+struct generator_expj : public generator<T, VecWidth, generator_expj<T, VecWidth>>
 {
     using ST = deep_subtype<T>;
-    static_assert(std::is_same_v<complex<deep_subtype<T>>, T>, "xgenexpj requires complex type");
+    static_assert(std::is_same_v<complex<deep_subtype<T>>, T>, "generator_expj requires complex type");
 
-    xgenexpj(ST start_, ST step_)
+    generator_expj(ST start_, ST step_)
         : step(step_), alpha(2 * sqr(sin(VecWidth * step / 2))), beta(-sin(VecWidth * step))
     {
         this->resync(T(start_));
@@ -162,14 +160,15 @@ protected:
     ST beta;
     CMT_NOINLINE static vec<T, VecWidth> init_cossin(ST w, ST phase)
     {
-        return ccomp(cossin(dup(phase + enumerate<ST, width>() * w)));
+        return ccomp(cossin(dup(phase + enumerate<ST, VecWidth>() * w)));
     }
 };
 
 template <typename T, size_t VecWidth = vector_capacity<T> / 8>
-struct xgenexp2 : public xgenerator<T, VecWidth, xgenexp2<T, VecWidth>>
+struct generator_exp2 : public generator<T, VecWidth, generator_exp2<T, VecWidth>>
 {
-    xgenexp2(T start, T step) CMT_NOEXCEPT : step{ step }, vstep{ exp2(make_vector(step * VecWidth))[0] - 1 }
+    generator_exp2(T start, T step) CMT_NOEXCEPT : step{ step },
+                                                   vstep{ exp2(make_vector(step * VecWidth))[0] - 1 }
     {
         this->resync(start);
     }
@@ -187,10 +186,10 @@ protected:
 };
 
 template <typename T, size_t VecWidth = vector_capacity<T> / 8>
-struct xgencossin : public xgenerator<T, VecWidth, xgencossin<T, VecWidth>>
+struct generator_cossin : public generator<T, VecWidth, generator_cossin<T, VecWidth>>
 {
     static_assert(VecWidth % 2 == 0);
-    xgencossin(T start, T step)
+    generator_cossin(T start, T step)
         : step(step), alpha(2 * sqr(sin(VecWidth / 2 * step / 2))), beta(-sin(VecWidth / 2 * step))
     {
         this->resync(start);
@@ -213,9 +212,9 @@ protected:
 };
 
 template <typename T, size_t VecWidth = vector_capacity<T> / 8 / 2>
-struct xgensin : public xgenerator<T, VecWidth, xgensin<T, VecWidth>, vec<T, 2>>
+struct generator_sin : public generator<T, VecWidth, generator_sin<T, VecWidth>, vec<T, 2>>
 {
-    xgensin(T start, T step)
+    generator_sin(T start, T step)
         : step(step), alpha(2 * sqr(sin(VecWidth * step / 2))), beta(sin(VecWidth * step))
     {
         this->resync(start);
@@ -248,9 +247,6 @@ protected:
     T beta;
 };
 
-inline namespace CMT_ARCH_NAME
-{
-
 /**
  * @brief Returns template expression that generates values starting from the start and using the step as the
  * increment between numbers.
@@ -260,9 +256,9 @@ inline namespace CMT_ARCH_NAME
    \f]
  */
 template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
-KFR_FUNCTION xgenlinear<TF> gen_linear(T1 start, T2 step)
+KFR_FUNCTION generator_linear<TF> gen_linear(T1 start, T2 step)
 {
-    return xgenlinear<TF>(start, step);
+    return generator_linear<TF>(start, step);
 }
 
 /**
@@ -272,9 +268,9 @@ KFR_FUNCTION xgenlinear<TF> gen_linear(T1 start, T2 step)
    \f]
  */
 template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
-KFR_FUNCTION xgenexp<TF> gen_exp(T1 start, T2 step)
+KFR_FUNCTION generator_exp<TF> gen_exp(T1 start, T2 step)
 {
-    return xgenexp<TF>(start, step);
+    return generator_exp<TF>(start, step);
 }
 
 /**
@@ -284,9 +280,9 @@ KFR_FUNCTION xgenexp<TF> gen_exp(T1 start, T2 step)
    \f]
  */
 template <typename T1, typename T2, typename TF = complex<ftype<common_type<T1, T2>>>>
-KFR_FUNCTION xgenexpj<TF> gen_expj(T1 start, T2 step)
+KFR_FUNCTION generator_expj<TF> gen_expj(T1 start, T2 step)
 {
-    return xgenexpj<TF>(start, step);
+    return generator_expj<TF>(start, step);
 }
 
 /**
@@ -296,9 +292,9 @@ KFR_FUNCTION xgenexpj<TF> gen_expj(T1 start, T2 step)
    \f]
  */
 template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
-KFR_FUNCTION xgenexp2<TF> gen_exp2(T1 start, T2 step)
+KFR_FUNCTION generator_exp2<TF> gen_exp2(T1 start, T2 step)
 {
-    return xgenexp2<TF>(start, step);
+    return generator_exp2<TF>(start, step);
 }
 
 /**
@@ -312,9 +308,9 @@ KFR_FUNCTION xgenexp2<TF> gen_exp2(T1 start, T2 step)
    \f]
  */
 template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
-KFR_FUNCTION xgencossin<TF> gen_cossin(T1 start, T2 step)
+KFR_FUNCTION generator_cossin<TF> gen_cossin(T1 start, T2 step)
 {
-    return xgencossin<TF>(start, step);
+    return generator_cossin<TF>(start, step);
 }
 
 /**
@@ -324,9 +320,9 @@ KFR_FUNCTION xgencossin<TF> gen_cossin(T1 start, T2 step)
    \f]
  */
 template <typename T1, typename T2, typename TF = ftype<common_type<T1, T2>>>
-KFR_FUNCTION xgensin<TF> gen_sin(T1 start, T2 step)
+KFR_FUNCTION generator_sin<TF> gen_sin(T1 start, T2 step)
 {
-    return xgensin<TF>(start, step);
+    return generator_sin<TF>(start, step);
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/base/impl/static_array.hpp b/include/kfr/base/impl/static_array.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
diff --git a/include/kfr/base/math_expressions.hpp b/include/kfr/base/math_expressions.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
@@ -33,120 +33,10 @@ namespace kfr
 {
 
 /**
- * @brief Returns template expression that returns x if m is true, otherwise return y. Order of the arguments
- * is same as in ternary operator.
- */
-template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
-KFR_FUNCTION xfunction<fn::select, E1, E2, E3> select(E1&& m, E2&& x, E3&& y)
-{
-    return { fn::select(), std::forward<E1>(m), std::forward<E2>(x), std::forward<E3>(y) };
-}
-
-/**
- * @brief Returns template expression that returns the absolute value of x.
- */
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::abs, E1> abs(E1&& x)
-{
-    return { fn::abs(), std::forward<E1>(x) };
-}
-
-/**
- * @brief Returns the smaller of two values. Accepts and returns expressions.
- */
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::min, E1, E2> min(E1&& x, E2&& y)
-{
-    return { fn::min(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-
-/**
- * @brief Returns the greater of two values. Accepts and returns expressions.
- */
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::max, E1, E2> max(E1&& x, E2&& y)
-{
-    return { fn::max(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-
-/**
- * @brief Returns the smaller in magnitude of two values. Accepts and returns expressions.
- */
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::absmin, E1, E2> absmin(E1&& x, E2&& y)
-{
-    return { fn::absmin(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-
-/**
- * @brief Returns the greater in magnitude of two values. Accepts and returns expressions.
- */
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::absmax, E1, E2> absmax(E1&& x, E2&& y)
-{
-    return { fn::absmax(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-
-/// @brief Returns the largest integer value not greater than x. Accepts and returns expressions.
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::floor, E1> floor(E1&& x)
-{
-    return { fn::floor(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::ceil, E1> ceil(E1&& x)
-{
-    return { fn::ceil(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::round, E1> round(E1&& x)
-{
-    return { fn::round(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::trunc, E1> trunc(E1&& x)
-{
-    return { fn::trunc(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::fract, E1> fract(E1&& x)
-{
-    return { fn::fract(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::ifloor, E1> ifloor(E1&& x)
-{
-    return { fn::ifloor(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::iceil, E1> iceil(E1&& x)
-{
-    return { fn::iceil(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::iround, E1> iround(E1&& x)
-{
-    return { fn::iround(), std::forward<E1>(x) };
-}
-
-template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::itrunc, E1> itrunc(E1&& x)
-{
-    return { fn::itrunc(), std::forward<E1>(x) };
-}
-
-/**
  * @brief Returns the trigonometric sine of x. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sin, E1> sin(E1&& x)
+KFR_FUNCTION expression_function<fn::sin, E1> sin(E1&& x)
 {
     return { fn::sin(), std::forward<E1>(x) };
 }
@@ -155,7 +45,7 @@ KFR_FUNCTION xfunction<fn::sin, E1> sin(E1&& x)
  * @brief Returns the trigonometric cosine of x. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cos, E1> cos(E1&& x)
+KFR_FUNCTION expression_function<fn::cos, E1> cos(E1&& x)
 {
     return { fn::cos(), std::forward<E1>(x) };
 }
@@ -164,7 +54,7 @@ KFR_FUNCTION xfunction<fn::cos, E1> cos(E1&& x)
  * @brief Returns an approximation of the trigonometric sine of x. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::fastsin, E1> fastsin(E1&& x)
+KFR_FUNCTION expression_function<fn::fastsin, E1> fastsin(E1&& x)
 {
     return { fn::fastsin(), std::forward<E1>(x) };
 }
@@ -173,7 +63,7 @@ KFR_FUNCTION xfunction<fn::fastsin, E1> fastsin(E1&& x)
  * @brief Returns an approximation of the trigonometric cosine of x. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::fastcos, E1> fastcos(E1&& x)
+KFR_FUNCTION expression_function<fn::fastcos, E1> fastcos(E1&& x)
 {
     return { fn::fastcos(), std::forward<E1>(x) };
 }
@@ -183,7 +73,7 @@ KFR_FUNCTION xfunction<fn::fastcos, E1> fastcos(E1&& x)
  * cosine of the odd elements. x must be a vector. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sincos, E1> sincos(E1&& x)
+KFR_FUNCTION expression_function<fn::sincos, E1> sincos(E1&& x)
 {
     return { fn::sincos(), std::forward<E1>(x) };
 }
@@ -193,7 +83,7 @@ KFR_FUNCTION xfunction<fn::sincos, E1> sincos(E1&& x)
  * sine of the odd elements. x must be a vector. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cossin, E1> cossin(E1&& x)
+KFR_FUNCTION expression_function<fn::cossin, E1> cossin(E1&& x)
 {
     return { fn::cossin(), std::forward<E1>(x) };
 }
@@ -202,7 +92,7 @@ KFR_FUNCTION xfunction<fn::cossin, E1> cossin(E1&& x)
  * @brief Returns the trigonometric sine of the x (expressed in degrees). Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sindeg, E1> sindeg(E1&& x)
+KFR_FUNCTION expression_function<fn::sindeg, E1> sindeg(E1&& x)
 {
     return { fn::sindeg(), std::forward<E1>(x) };
 }
@@ -211,7 +101,7 @@ KFR_FUNCTION xfunction<fn::sindeg, E1> sindeg(E1&& x)
  * @brief Returns the trigonometric cosine of the x (expressed in degrees). Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cosdeg, E1> cosdeg(E1&& x)
+KFR_FUNCTION expression_function<fn::cosdeg, E1> cosdeg(E1&& x)
 {
     return { fn::cosdeg(), std::forward<E1>(x) };
 }
@@ -221,7 +111,7 @@ KFR_FUNCTION xfunction<fn::cosdeg, E1> cosdeg(E1&& x)
  * (expressed in degrees). Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::fastsindeg, E1> fastsindeg(E1&& x)
+KFR_FUNCTION expression_function<fn::fastsindeg, E1> fastsindeg(E1&& x)
 {
     return { fn::fastsindeg(), std::forward<E1>(x) };
 }
@@ -231,7 +121,7 @@ KFR_FUNCTION xfunction<fn::fastsindeg, E1> fastsindeg(E1&& x)
  * (expressed in degrees). Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::fastcosdeg, E1> fastcosdeg(E1&& x)
+KFR_FUNCTION expression_function<fn::fastcosdeg, E1> fastcosdeg(E1&& x)
 {
     return { fn::fastcosdeg(), std::forward<E1>(x) };
 }
@@ -241,7 +131,7 @@ KFR_FUNCTION xfunction<fn::fastcosdeg, E1> fastcosdeg(E1&& x)
  * cosine of the odd elements. x must be expressed in degrees. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sincosdeg, E1> sincosdeg(E1&& x)
+KFR_FUNCTION expression_function<fn::sincosdeg, E1> sincosdeg(E1&& x)
 {
     return { fn::sincosdeg(), std::forward<E1>(x) };
 }
@@ -251,7 +141,7 @@ KFR_FUNCTION xfunction<fn::sincosdeg, E1> sincosdeg(E1&& x)
  * sine of the odd elements. x must be expressed in degrees. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cossindeg, E1> cossindeg(E1&& x)
+KFR_FUNCTION expression_function<fn::cossindeg, E1> cossindeg(E1&& x)
 {
     return { fn::cossindeg(), std::forward<E1>(x) };
 }
@@ -260,35 +150,21 @@ KFR_FUNCTION xfunction<fn::cossindeg, E1> cossindeg(E1&& x)
  * @brief Returns the sinc function of x. Accepts and returns expressions.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sinc, E1> sinc(E1&& x)
+KFR_FUNCTION expression_function<fn::sinc, E1> sinc(E1&& x)
 {
     return { fn::sinc(), std::forward<E1>(x) };
 }
 
-/// @brief Creates an expression that returns the first argument clamped to a range [lo, hi]
-template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
-KFR_FUNCTION xfunction<fn::clamp, E1, E2, E3> clamp(E1&& x, E2&& lo, E3&& hi)
-{
-    return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(lo), std::forward<E3>(hi) };
-}
-
-/// @brief Creates an expression that returns the first argument clamped to a range [0, hi]
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::clamp, E1, E2> clamp(E1&& x, E2&& hi)
-{
-    return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(hi) };
-}
-
 /// @brief Creates expression that returns the approximate gamma function of an argument
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::gamma, E1> gamma(E1&& x)
+KFR_FUNCTION expression_function<fn::gamma, E1> gamma(E1&& x)
 {
     return { fn::gamma(), std::forward<E1>(x) };
 }
 
 /// @brief Creates expression that returns the approximate factorial of an argument
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::factorial_approx, E1> factorial_approx(E1&& x)
+KFR_FUNCTION expression_function<fn::factorial_approx, E1> factorial_approx(E1&& x)
 {
     return { fn::factorial_approx(), std::forward<E1>(x) };
 }
@@ -297,19 +173,19 @@ KFR_FUNCTION xfunction<fn::factorial_approx, E1> factorial_approx(E1&& x)
  * @brief Returns template expression that returns the positive square root of the x. \f$\sqrt{x}\f$
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sqrt, E1> sqrt(E1&& x)
+KFR_FUNCTION expression_function<fn::sqrt, E1> sqrt(E1&& x)
 {
     return { fn::sqrt(), std::forward<E1>(x) };
 }
 
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::tan, E1> tan(E1&& x)
+KFR_FUNCTION expression_function<fn::tan, E1> tan(E1&& x)
 {
     return { fn::tan(), std::forward<E1>(x) };
 }
 
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::tandeg, E1> tandeg(E1&& x)
+KFR_FUNCTION expression_function<fn::tandeg, E1> tandeg(E1&& x)
 {
     return { fn::tandeg(), std::forward<E1>(x) };
 }
@@ -318,7 +194,7 @@ KFR_FUNCTION xfunction<fn::tandeg, E1> tandeg(E1&& x)
  * @brief Returns template expression that returns the arc sine of x.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::asin, E1> asin(E1&& x)
+KFR_INTRINSIC expression_function<fn::asin, E1> asin(E1&& x)
 {
     return { fn::asin(), std::forward<E1>(x) };
 }
@@ -327,35 +203,35 @@ KFR_INTRINSIC xfunction<fn::asin, E1> asin(E1&& x)
  * @brief Returns template expression that returns the arc cosine of x.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::acos, E1> acos(E1&& x)
+KFR_INTRINSIC expression_function<fn::acos, E1> acos(E1&& x)
 {
     return { fn::acos(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the sine of the the complex value x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::csin, E1> csin(E1&& x)
+KFR_FUNCTION expression_function<fn::csin, E1> csin(E1&& x)
 {
     return { fn::csin(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the hyperbolic sine of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::csinh, E1> csinh(E1&& x)
+KFR_FUNCTION expression_function<fn::csinh, E1> csinh(E1&& x)
 {
     return { fn::csinh(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the cosine of the the complex value x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::ccos, E1> ccos(E1&& x)
+KFR_FUNCTION expression_function<fn::ccos, E1> ccos(E1&& x)
 {
     return { fn::ccos(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the hyperbolic cosine of the the complex value x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::ccosh, E1> ccosh(E1&& x)
+KFR_FUNCTION expression_function<fn::ccosh, E1> ccosh(E1&& x)
 {
     return { fn::ccosh(), std::forward<E1>(x) };
 }
@@ -363,91 +239,91 @@ KFR_FUNCTION xfunction<fn::ccosh, E1> ccosh(E1&& x)
 /// @brief Returns template expression that returns the squared absolute value (magnitude squared) of the
 /// complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cabssqr, E1> cabssqr(E1&& x)
+KFR_FUNCTION expression_function<fn::cabssqr, E1> cabssqr(E1&& x)
 {
     return { fn::cabssqr(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the absolute value (magnitude) of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cabs, E1> cabs(E1&& x)
+KFR_FUNCTION expression_function<fn::cabs, E1> cabs(E1&& x)
 {
     return { fn::cabs(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the phase angle (argument) of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::carg, E1> carg(E1&& x)
+KFR_FUNCTION expression_function<fn::carg, E1> carg(E1&& x)
 {
     return { fn::carg(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the natural logarithm of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::clog, E1> clog(E1&& x)
+KFR_FUNCTION expression_function<fn::clog, E1> clog(E1&& x)
 {
     return { fn::clog(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the binary (base-2) logarithm of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::clog2, E1> clog2(E1&& x)
+KFR_FUNCTION expression_function<fn::clog2, E1> clog2(E1&& x)
 {
     return { fn::clog2(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the common (base-10) logarithm of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::clog10, E1> clog10(E1&& x)
+KFR_FUNCTION expression_function<fn::clog10, E1> clog10(E1&& x)
 {
     return { fn::clog10(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns \f$e\f$ raised to the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cexp, E1> cexp(E1&& x)
+KFR_FUNCTION expression_function<fn::cexp, E1> cexp(E1&& x)
 {
     return { fn::cexp(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns 2 raised to the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cexp2, E1> cexp2(E1&& x)
+KFR_FUNCTION expression_function<fn::cexp2, E1> cexp2(E1&& x)
 {
     return { fn::cexp2(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns 10 raised to the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cexp10, E1> cexp10(E1&& x)
+KFR_FUNCTION expression_function<fn::cexp10, E1> cexp10(E1&& x)
 {
     return { fn::cexp10(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that converts complex number to polar
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::polar, E1> polar(E1&& x)
+KFR_FUNCTION expression_function<fn::polar, E1> polar(E1&& x)
 {
     return { fn::polar(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that converts complex number to cartesian
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cartesian, E1> cartesian(E1&& x)
+KFR_FUNCTION expression_function<fn::cartesian, E1> cartesian(E1&& x)
 {
     return { fn::cartesian(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns square root of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::csqrt, E1> csqrt(E1&& x)
+KFR_FUNCTION expression_function<fn::csqrt, E1> csqrt(E1&& x)
 {
     return { fn::csqrt(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns square of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::csqr, E1> csqr(E1&& x)
+KFR_FUNCTION expression_function<fn::csqr, E1> csqr(E1&& x)
 {
     return { fn::csqr(), std::forward<E1>(x) };
 }
@@ -456,7 +332,7 @@ KFR_FUNCTION xfunction<fn::csqr, E1> csqr(E1&& x)
  * @brief Returns template expression that returns the arc tangent of x.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::atan, E1> atan(E1&& x)
+KFR_FUNCTION expression_function<fn::atan, E1> atan(E1&& x)
 {
     return { fn::atan(), std::forward<E1>(x) };
 }
@@ -465,7 +341,7 @@ KFR_FUNCTION xfunction<fn::atan, E1> atan(E1&& x)
  * @brief Returns template expression that returns the arc tangent of the x, expressed in degrees.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::atandeg, E1> atandeg(E1&& x)
+KFR_FUNCTION expression_function<fn::atandeg, E1> atandeg(E1&& x)
 {
     return { fn::atandeg(), std::forward<E1>(x) };
 }
@@ -474,7 +350,7 @@ KFR_FUNCTION xfunction<fn::atandeg, E1> atandeg(E1&& x)
  * @brief Returns template expression that returns the arc tangent of y/x.
  */
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::atan2, E1, E2> atan2(E1&& x, E2&& y)
+KFR_FUNCTION expression_function<fn::atan2, E1, E2> atan2(E1&& x, E2&& y)
 {
     return { fn::atan2(), std::forward<E1>(x), std::forward<E2>(y) };
 }
@@ -483,55 +359,41 @@ KFR_FUNCTION xfunction<fn::atan2, E1, E2> atan2(E1&& x, E2&& y)
  * @brief Returns template expression that returns the arc tangent of y/x (expressed in degrees).
  */
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::atan2deg, E1, E2> atan2deg(E1&& x, E2&& y)
+KFR_FUNCTION expression_function<fn::atan2deg, E1, E2> atan2deg(E1&& x, E2&& y)
 {
     return { fn::atan2deg(), std::forward<E1>(x), std::forward<E2>(y) };
 }
 
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::modzerobessel, E1> modzerobessel(E1&& x)
+KFR_FUNCTION expression_function<fn::modzerobessel, E1> modzerobessel(E1&& x)
 {
     return { fn::modzerobessel(), std::forward<E1>(x) };
 }
 
-/// @brief Creates an expression that adds two arguments using saturation
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::satadd, E1, E2> satadd(E1&& x, E2&& y)
-{
-    return { fn::satadd(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-
-/// @brief Creates an expression that subtracts two arguments using saturation
-template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::satsub, E1, E2> satsub(E1&& x, E2&& y)
-{
-    return { fn::satsub(), std::forward<E1>(x), std::forward<E2>(y) };
-}
-
 /// @brief Returns template expression that returns the hyperbolic sine of the x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sinh, E1> sinh(E1&& x)
+KFR_FUNCTION expression_function<fn::sinh, E1> sinh(E1&& x)
 {
     return { fn::sinh(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the hyperbolic cosine of the x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cosh, E1> cosh(E1&& x)
+KFR_FUNCTION expression_function<fn::cosh, E1> cosh(E1&& x)
 {
     return { fn::cosh(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the hyperbolic tangent of the x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::tanh, E1> tanh(E1&& x)
+KFR_FUNCTION expression_function<fn::tanh, E1> tanh(E1&& x)
 {
     return { fn::tanh(), std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the hyperbolic cotangent of the x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::coth, E1> coth(E1&& x)
+KFR_FUNCTION expression_function<fn::coth, E1> coth(E1&& x)
 {
     return { fn::coth(), std::forward<E1>(x) };
 }
@@ -539,7 +401,7 @@ KFR_FUNCTION xfunction<fn::coth, E1> coth(E1&& x)
 /// @brief Returns template expression that returns the hyperbolic sine of the even elements of the x and the
 /// hyperbolic cosine of the odd elements of the x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::sinhcosh, E1> sinhcosh(E1&& x)
+KFR_FUNCTION expression_function<fn::sinhcosh, E1> sinhcosh(E1&& x)
 {
     return { fn::sinhcosh(), std::forward<E1>(x) };
 }
@@ -547,49 +409,49 @@ KFR_FUNCTION xfunction<fn::sinhcosh, E1> sinhcosh(E1&& x)
 /// @brief Returns template expression that returns the hyperbolic cosine of the even elements of the x and
 /// the hyperbolic sine of the odd elements of the x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::coshsinh, E1> coshsinh(E1&& x)
+KFR_FUNCTION expression_function<fn::coshsinh, E1> coshsinh(E1&& x)
 {
     return { fn::coshsinh(), std::forward<E1>(x) };
 }
 
 /// @brief Returns e raised to the given power x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::exp, E1> exp(E1&& x)
+KFR_FUNCTION expression_function<fn::exp, E1> exp(E1&& x)
 {
     return { fn::exp(), std::forward<E1>(x) };
 }
 
 /// @brief Returns 2 raised to the given power x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::exp2, E1> exp2(E1&& x)
+KFR_FUNCTION expression_function<fn::exp2, E1> exp2(E1&& x)
 {
     return { fn::exp2(), std::forward<E1>(x) };
 }
 
 /// @brief Returns 10 raised to the given power x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::exp10, E1> exp10(E1&& x)
+KFR_FUNCTION expression_function<fn::exp10, E1> exp10(E1&& x)
 {
     return { fn::exp10(), std::forward<E1>(x) };
 }
 
 /// @brief Returns the natural logarithm of the x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::log, E1> log(E1&& x)
+KFR_FUNCTION expression_function<fn::log, E1> log(E1&& x)
 {
     return { fn::log(), std::forward<E1>(x) };
 }
 
 /// @brief Returns the binary (base-2) logarithm of the x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::log2, E1> log2(E1&& x)
+KFR_FUNCTION expression_function<fn::log2, E1> log2(E1&& x)
 {
     return { fn::log2(), std::forward<E1>(x) };
 }
 
 /// @brief Returns the common (base-10) logarithm of the x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::log10, E1> log10(E1&& x)
+KFR_FUNCTION expression_function<fn::log10, E1> log10(E1&& x)
 {
     return { fn::log10(), std::forward<E1>(x) };
 }
@@ -597,56 +459,56 @@ KFR_FUNCTION xfunction<fn::log10, E1> log10(E1&& x)
 /// @brief Returns the rounded binary (base-2) logarithm of the x. Version that accepts and returns
 /// expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::logb, E1> logb(E1&& x)
+KFR_FUNCTION expression_function<fn::logb, E1> logb(E1&& x)
 {
     return { fn::logb(), std::forward<E1>(x) };
 }
 
 /// @brief Returns the logarithm of the x with base y. Accepts and returns expressions.
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::logn, E1, E2> logn(E1&& x, E2&& y)
+KFR_FUNCTION expression_function<fn::logn, E1, E2> logn(E1&& x, E2&& y)
 {
     return { fn::logn(), std::forward<E1>(x), std::forward<E2>(y) };
 }
 
 /// @brief Returns log(x) * y. Accepts and returns expressions.
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::logm, E1, E2> logm(E1&& x, E2&& y)
+KFR_FUNCTION expression_function<fn::logm, E1, E2> logm(E1&& x, E2&& y)
 {
     return { fn::logm(), std::forward<E1>(x), std::forward<E2>(y) };
 }
 
 /// @brief Returns exp(x * m + a). Accepts and returns expressions.
 template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
-KFR_FUNCTION xfunction<fn::exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& y, E3&& z)
+KFR_FUNCTION expression_function<fn::exp_fmadd, E1, E2, E3> exp_fmadd(E1&& x, E2&& y, E3&& z)
 {
     return { fn::exp_fmadd(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) };
 }
 
 /// @brief Returns log(x) * m + a. Accepts and returns expressions.
 template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
-KFR_FUNCTION xfunction<fn::log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& y, E3&& z)
+KFR_FUNCTION expression_function<fn::log_fmadd, E1, E2, E3> log_fmadd(E1&& x, E2&& y, E3&& z)
 {
     return { fn::log_fmadd(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) };
 }
 
 /// @brief Returns the x raised to the given power y. Accepts and returns expressions.
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::pow, E1, E2> pow(E1&& x, E2&& y)
+KFR_FUNCTION expression_function<fn::pow, E1, E2> pow(E1&& x, E2&& y)
 {
     return { fn::pow(), std::forward<E1>(x), std::forward<E2>(y) };
 }
 
 /// @brief Returns the real nth root of the x. Accepts and returns expressions.
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_FUNCTION xfunction<fn::root, E1, E2> root(E1&& x, E2&& y)
+KFR_FUNCTION expression_function<fn::root, E1, E2> root(E1&& x, E2&& y)
 {
     return { fn::root(), std::forward<E1>(x), std::forward<E2>(y) };
 }
 
 /// @brief Returns the cube root of the x. Accepts and returns expressions.
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cbrt, E1> cbrt(E1&& x)
+KFR_FUNCTION expression_function<fn::cbrt, E1> cbrt(E1&& x)
 {
     return { fn::cbrt(), std::forward<E1>(x) };
 }
diff --git a/include/kfr/base/old_basic_expressions.hpp b/include/kfr/base/old_basic_expressions.hpp
@@ -1,708 +0,0 @@
-/** @addtogroup expressions
- *  @{
- */
-/*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
-  This file is part of KFR
-
-  KFR is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 2 of the License, or
-  (at your option) any later version.
-
-  KFR is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with KFR.
-
-  If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
-  Buying a commercial license is mandatory as soon as you develop commercial activities without
-  disclosing the source code of your own applications.
-  See https://www.kfrlib.com for details.
- */
-#pragma once
-
-#include "../simd/operators.hpp"
-#include "../simd/vec.hpp"
-#include "univector.hpp"
-#include <algorithm>
-
-namespace kfr
-{
-inline namespace CMT_ARCH_NAME
-{
-
-namespace internal
-{
-template <size_t width, typename Fn>
-KFR_INTRINSIC void block_process_impl(size_t& i, size_t size, Fn&& fn)
-{
-    CMT_LOOP_NOUNROLL
-    for (; i < size / width * width; i += width)
-        fn(i, csize_t<width>());
-}
-} // namespace internal
-
-template <size_t... widths, typename Fn>
-KFR_INTRINSIC void block_process(size_t size, csizes_t<widths...>, Fn&& fn)
-{
-    size_t i = 0;
-    swallow{ (internal::block_process_impl<widths>(i, size, std::forward<Fn>(fn)), 0)... };
-}
-
-namespace internal
-{
-
-template <typename To, typename E>
-struct expression_cast : expression_with_arguments<E>
-{
-    using value_type = To;
-    KFR_MEM_INTRINSIC expression_cast(E&& expr) CMT_NOEXCEPT
-        : expression_with_arguments<E>(std::forward<E>(expr))
-    {
-    }
-
-    template <size_t N>
-    friend KFR_INTRINSIC vec<To, N> get_elements(const expression_cast& self, cinput_t input, size_t index,
-                                                 vec_shape<To, N>)
-    {
-        return self.argument_first(input, index, vec_shape<To, N>());
-    }
-};
-
-template <typename T, typename E1>
-struct expression_iterator
-{
-    constexpr expression_iterator(E1&& e1) : e1(std::forward<E1>(e1)) {}
-    struct iterator
-    {
-        T operator*() const { return get(); }
-        T get() const { return get_elements(expr.e1, cinput, position, vec_shape<T, 1>()).front(); }
-        iterator& operator++()
-        {
-            ++position;
-            return *this;
-        }
-        iterator operator++(int)
-        {
-            iterator copy = *this;
-            ++(*this);
-            return copy;
-        }
-        bool operator!=(const iterator& other) const { return position != other.position; }
-        const expression_iterator& expr;
-        size_t position;
-    };
-    iterator begin() const { return { *this, 0 }; }
-    iterator end() const { return { *this, e1.size() }; }
-    E1 e1;
-};
-} // namespace internal
-
-template <typename To, typename E, KFR_ENABLE_IF(is_input_expression<E>)>
-KFR_INTRINSIC internal::expression_cast<To, E> cast(E&& expr)
-{
-    return internal::expression_cast<To, E>(std::forward<E>(expr));
-}
-
-template <typename E1, typename T = value_type_of<E1>>
-KFR_INTRINSIC internal::expression_iterator<T, E1> to_iterator(E1&& e1)
-{
-    return internal::expression_iterator<T, E1>(std::forward<E1>(e1));
-}
-
-template <typename... Ts, typename T = common_type<Ts...>>
-inline auto sequence(const Ts&... list)
-{
-    return lambda<T>([seq = std::array<T, sizeof...(Ts)>{ { static_cast<T>(list)... } }](size_t index)
-                     { return seq[index % seq.size()]; });
-}
-
-template <typename T = int>
-KFR_INTRINSIC auto zeros()
-{
-    return lambda<T>([](cinput_t, size_t, auto x) { return zerovector(x); });
-}
-
-template <typename T = int>
-KFR_INTRINSIC auto ones()
-{
-    return lambda<T>([](cinput_t, size_t, auto) { return 1; });
-}
-
-template <typename T = int>
-KFR_INTRINSIC auto counter()
-{
-    return lambda<T>([](cinput_t, size_t index, auto x) { return enumerate(x) + index; });
-}
-
-template <typename T1>
-KFR_INTRINSIC auto counter(T1 start)
-{
-    return lambda<T1>([start](cinput_t, size_t index, auto x) { return enumerate(x) + index + start; });
-}
-template <typename T1, typename T2>
-KFR_INTRINSIC auto counter(T1 start, T2 step)
-{
-    return lambda<common_type<T1, T2>>([start, step](cinput_t, size_t index, auto x)
-                                       { return (enumerate(x) + index) * step + start; });
-}
-
-template <typename Gen>
-struct segment
-{
-    template <typename Gen_>
-    constexpr segment(size_t start, Gen_&& gen) : start(start), gen(std::forward<Gen_>(gen))
-    {
-    }
-    size_t start;
-    Gen gen;
-};
-
-enum symmetric_linspace_t
-{
-    symmetric_linspace
-};
-
-namespace internal
-{
-template <typename T, typename E1>
-struct expression_reader
-{
-    constexpr expression_reader(E1&& e1) CMT_NOEXCEPT : e1(std::forward<E1>(e1)) {}
-    T read() const
-    {
-        const T result = get_elements(e1, cinput, m_position, vec_shape<T, 1>());
-        m_position++;
-        return result;
-    }
-    mutable size_t m_position = 0;
-    E1 e1;
-};
-template <typename T, typename E1>
-struct expression_writer
-{
-    constexpr expression_writer(E1&& e1) CMT_NOEXCEPT : e1(std::forward<E1>(e1)) {}
-    template <typename U>
-    void write(U value)
-    {
-        e1(coutput, m_position, vec<U, 1>(value));
-        m_position++;
-    }
-    size_t m_position = 0;
-    E1 e1;
-};
-} // namespace internal
-
-template <typename T, typename E1>
-internal::expression_reader<T, E1> reader(E1&& e1)
-{
-    static_assert(is_input_expression<E1>, "E1 must be an expression");
-    return internal::expression_reader<T, E1>(std::forward<E1>(e1));
-}
-
-template <typename T, typename E1>
-internal::expression_writer<T, E1> writer(E1&& e1)
-{
-    static_assert(is_output_expression<E1>, "E1 must be an output expression");
-    return internal::expression_writer<T, E1>(std::forward<E1>(e1));
-}
-
-namespace internal
-{
-
-template <typename E1>
-struct expression_slice : expression_with_arguments<E1>
-{
-    using value_type = value_type_of<E1>;
-    using T          = value_type;
-    expression_slice(E1&& e1, size_t start, size_t size)
-        : expression_with_arguments<E1>(std::forward<E1>(e1)), start(start),
-          new_size(size_min(size, size_sub(std::get<0>(this->args).size(), start)))
-    {
-    }
-    template <size_t N>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_slice& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
-    {
-        return self.argument_first(cinput, index + self.start, y);
-    }
-    size_t size() const { return new_size; }
-    size_t start;
-    size_t new_size;
-};
-
-template <typename E1>
-struct expression_reverse : expression_with_arguments<E1>
-{
-    using value_type = value_type_of<E1>;
-    using T          = value_type;
-    expression_reverse(E1&& e1) : expression_with_arguments<E1>(std::forward<E1>(e1)), expr_size(e1.size()) {}
-    template <size_t N>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_reverse& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
-    {
-        return reverse(self.argument_first(cinput, self.expr_size - index - N, y));
-    }
-    size_t size() const { return expr_size; }
-    size_t expr_size;
-};
-
-template <typename T, bool precise = false>
-struct expression_linspace;
-
-template <typename T>
-struct expression_linspace<T, false> : input_expression
-{
-    using value_type = T;
-
-    KFR_MEM_INTRINSIC constexpr size_t size() const CMT_NOEXCEPT { return truncate_size; }
-
-    expression_linspace(T start, T stop, size_t size, bool endpoint = false, bool truncate = false)
-        : start(start), offset((stop - start) / T(endpoint ? size - 1 : size)),
-          truncate_size(truncate ? size : infinite_size)
-    {
-    }
-
-    expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false)
-        : expression_linspace(-symsize, +symsize, size, endpoint)
-    {
-    }
-
-    template <size_t N>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_linspace& self, cinput_t, size_t index,
-                                                vec_shape<T, N> x)
-    {
-        using TI = itype<T>;
-        return T(self.start) + (enumerate(x) + static_cast<T>(static_cast<TI>(index))) * T(self.offset);
-    }
-
-    T start;
-    T offset;
-    size_t truncate_size;
-};
-
-template <typename T>
-struct expression_linspace<T, true> : input_expression
-{
-    using value_type = T;
-
-    KFR_MEM_INTRINSIC constexpr size_t size() const CMT_NOEXCEPT { return truncate_size; }
-
-    expression_linspace(T start, T stop, size_t size, bool endpoint = false, bool truncate = false)
-        : start(start), stop(stop), invsize(1.0 / T(endpoint ? size - 1 : size)),
-          truncate_size(truncate ? size : infinite_size)
-    {
-    }
-
-    expression_linspace(symmetric_linspace_t, T symsize, size_t size, bool endpoint = false)
-        : expression_linspace(-symsize, +symsize, size, endpoint)
-    {
-    }
-
-    template <size_t N>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_linspace& self, cinput_t, size_t index,
-                                                vec_shape<T, N> x)
-    {
-        using TI = itype<T>;
-        return mix((enumerate(x) + static_cast<T>(static_cast<TI>(index))) * self.invsize, self.start,
-                   self.stop);
-    }
-    template <typename U, size_t N>
-    KFR_MEM_INTRINSIC static vec<U, N> mix(const vec<U, N>& t, U x, U y)
-    {
-        return (U(1.0) - t) * x + t * y;
-    }
-
-    T start;
-    T stop;
-    T invsize;
-    size_t truncate_size;
-};
-
-template <typename... E>
-struct expression_sequence : expression_with_arguments<E...>
-{
-public:
-    using base = expression_with_arguments<E...>;
-
-    using value_type = common_type<value_type_of<E>...>;
-    using T          = value_type;
-
-    template <typename... Expr_>
-    KFR_MEM_INTRINSIC expression_sequence(const size_t (&segments)[base::count], Expr_&&... expr) CMT_NOEXCEPT
-        : base(std::forward<Expr_>(expr)...)
-    {
-        std::copy(std::begin(segments), std::end(segments), this->segments.begin() + 1);
-        this->segments[0]               = 0;
-        this->segments[base::count + 1] = size_t(-1);
-    }
-
-    template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_sequence& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
-    {
-        std::size_t sindex =
-            size_t(std::upper_bound(std::begin(self.segments), std::end(self.segments), index) - 1 -
-                   std::begin(self.segments));
-        if (CMT_LIKELY(self.segments[sindex + 1] - index >= N))
-            return get_elements(self, cinput, index, sindex - 1, y);
-        else
-        {
-            vec<T, N> result;
-            CMT_PRAGMA_CLANG(clang loop unroll_count(4))
-            for (size_t i = 0; i < N; i++)
-            {
-                sindex           = self.segments[sindex + 1] == index ? sindex + 1 : sindex;
-                result.data()[i] = get_elements(self, cinput, index, sindex - 1, vec_shape<T, 1>()).front();
-                index++;
-            }
-            return result;
-        }
-    }
-
-protected:
-    template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_sequence& self, cinput_t cinput,
-                                                size_t index, size_t expr_index, vec_shape<T, N> y)
-    {
-        return cswitch(
-            indicesfor_t<E...>(), expr_index, [&](auto val) { return self.argument(cinput, val, index, y); },
-            [&]() { return zerovector(y); });
-    }
-
-    std::array<size_t, base::count + 2> segments;
-};
-
-template <typename Fn, typename E>
-struct expression_adjacent : expression_with_arguments<E>
-{
-    using value_type = value_type_of<E>;
-    using T          = value_type;
-
-    expression_adjacent(Fn&& fn, E&& e)
-        : expression_with_arguments<E>(std::forward<E>(e)), fn(std::forward<Fn>(fn))
-    {
-    }
-
-    template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_adjacent& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N>)
-    {
-        const vec<T, N> in      = self.argument_first(cinput, index, vec_shape<T, N>());
-        const vec<T, N> delayed = insertleft(self.data, in);
-        self.data               = in[N - 1];
-        return self.fn(in, delayed);
-    }
-    Fn fn;
-    mutable value_type data = value_type(0);
-};
-} // namespace internal
-
-/** @brief Returns the subrange of the given expression
- */
-template <typename E1>
-KFR_INTRINSIC internal::expression_slice<E1> slice(E1&& e1, size_t start, size_t size = infinite_size)
-{
-    return internal::expression_slice<E1>(std::forward<E1>(e1), start, size);
-}
-
-/** @brief Returns the expression truncated to the given size
- */
-template <typename E1>
-KFR_INTRINSIC internal::expression_slice<E1> truncate(E1&& e1, size_t size)
-{
-    return internal::expression_slice<E1>(std::forward<E1>(e1), 0, size);
-}
-
-/** @brief Returns the reversed expression
- */
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_INTRINSIC internal::expression_reverse<E1> reverse(E1&& e1)
-{
-    static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
-    return internal::expression_reverse<E1>(std::forward<E1>(e1));
-}
-
-/** @brief Returns evenly spaced numbers over a specified interval.
- *
- * @param start The starting value of the sequence
- * @param stop The end value of the sequence. if ``endpoint`` is ``false``, the last value is excluded
- * @param size Number of samples to generate
- * @param endpoint If ``true``, ``stop`` is the last sample. Otherwise, it is not included
- * @param truncate If ``true``, linspace returns exactly size elements, otherwise, returns infinite sequence
- */
-template <typename T1, typename T2, bool precise = false, typename TF = ftype<common_type<T1, T2>>>
-KFR_INTRINSIC internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop, size_t size,
-                                                                  bool endpoint = false,
-                                                                  bool truncate = false)
-{
-    return internal::expression_linspace<TF, precise>(start, stop, size, endpoint, truncate);
-}
-KFR_FN(linspace)
-
-template <typename T, bool precise = false, typename TF = ftype<T>>
-KFR_INTRINSIC internal::expression_linspace<TF, precise> symmlinspace(T symsize, size_t size,
-                                                                      bool endpoint = false)
-{
-    return internal::expression_linspace<TF, precise>(symmetric_linspace, symsize, size, endpoint);
-}
-KFR_FN(symmlinspace)
-
-template <size_t size, typename... E>
-KFR_INTRINSIC internal::expression_sequence<decay<E>...> gen_sequence(const size_t (&list)[size], E&&... gens)
-{
-    static_assert(size == sizeof...(E), "Lists must be of equal length");
-    return internal::expression_sequence<decay<E>...>(list, std::forward<E>(gens)...);
-}
-KFR_FN(gen_sequence)
-
-/**
- * @brief Returns template expression that returns the result of calling \f$ fn(x_i, x_{i-1}) \f$
- */
-template <typename Fn, typename E1>
-KFR_INTRINSIC internal::expression_adjacent<Fn, E1> adjacent(Fn&& fn, E1&& e1)
-{
-    return internal::expression_adjacent<Fn, E1>(std::forward<Fn>(fn), std::forward<E1>(e1));
-}
-
-namespace internal
-{
-template <typename E>
-struct expression_padded : expression_with_arguments<E>
-{
-    using value_type = value_type_of<E>;
-
-    KFR_MEM_INTRINSIC constexpr static size_t size() CMT_NOEXCEPT { return infinite_size; }
-
-    expression_padded(value_type fill_value, E&& e)
-        : expression_with_arguments<E>(std::forward<E>(e)), fill_value(fill_value), input_size(e.size())
-    {
-    }
-
-    template <size_t N>
-    KFR_INTRINSIC friend vec<value_type, N> get_elements(const expression_padded& self, cinput_t cinput,
-                                                         size_t index, vec_shape<value_type, N> y)
-    {
-        if (CMT_UNLIKELY(index >= self.input_size))
-        {
-            return self.fill_value;
-        }
-        else if (CMT_LIKELY(index + N <= self.input_size))
-        {
-            return self.argument_first(cinput, index, y);
-        }
-        else
-        {
-            vec<value_type, N> x{};
-            for (size_t i = 0; i < N; i++)
-            {
-                if (CMT_LIKELY(index + i < self.input_size))
-                    x[i] = self.argument_first(cinput, index + i, vec_shape<value_type, 1>()).front();
-                else
-                    x[i] = self.fill_value;
-            }
-            return x;
-        }
-    }
-    value_type fill_value;
-    const size_t input_size;
-};
-} // namespace internal
-
-/**
- * @brief Returns infinite template expression that pads e with fill_value (default value = 0)
- */
-template <typename E, typename T = value_type_of<E>>
-internal::expression_padded<E> padded(E&& e, const T& fill_value = T(0))
-{
-    static_assert(is_input_expression<E>, "E must be an input expression");
-    return internal::expression_padded<E>(fill_value, std::forward<E>(e));
-}
-
-namespace internal
-{
-template <typename... E>
-struct multioutput : output_expression
-{
-    template <typename... E_>
-    multioutput(E_&&... e) : outputs(std::forward<E_>(e)...)
-    {
-    }
-    template <typename T, size_t N>
-    KFR_INTRINSIC friend void set_elements(multioutput& self, coutput_t coutput, size_t index,
-                                           const vec<T, N>& x)
-    {
-        cfor(csize_t<0>(), csize_t<sizeof...(E)>(),
-             [&](auto n) { set_elements(std::get<val_of(decltype(n)())>(self.outputs), coutput, index, x); });
-    }
-    std::tuple<E...> outputs;
-
-private:
-};
-
-template <typename... E>
-struct expression_pack : expression_with_arguments<E...>
-{
-    constexpr static size_t count = sizeof...(E);
-
-    expression_pack(E&&... e) : expression_with_arguments<E...>(std::forward<E>(e)...) {}
-    using value_type = vec<common_type<value_type_of<E>...>, count>;
-    using T          = value_type;
-
-    using expression_with_arguments<E...>::size;
-
-    template <size_t N>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_pack& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
-    {
-        return self.call(cinput, fn::packtranspose(), index, y);
-    }
-};
-
-template <typename... E>
-struct expression_unpack : private expression_with_arguments<E...>, output_expression
-{
-    using expression_with_arguments<E...>::begin_block;
-    using expression_with_arguments<E...>::end_block;
-    using output_expression::begin_block;
-    using output_expression::end_block;
-    constexpr static size_t count = sizeof...(E);
-
-    expression_unpack(E&&... e) : expression_with_arguments<E...>(std::forward<E>(e)...) {}
-
-    using expression_with_arguments<E...>::size;
-
-    template <typename U, size_t N>
-    KFR_INTRINSIC friend void set_elements(expression_unpack& self, coutput_t coutput, size_t index,
-                                           const vec<vec<U, count>, N>& x)
-    {
-        self.output(coutput, index, x, csizeseq<count>);
-    }
-
-    template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>)>
-    KFR_MEM_INTRINSIC expression_unpack& operator=(Input&& input)
-    {
-        process(*this, std::forward<Input>(input));
-        return *this;
-    }
-
-private:
-    template <typename U, size_t N, size_t... indices>
-    void output(coutput_t coutput, size_t index, const vec<vec<U, count>, N>& x, csizes_t<indices...>)
-    {
-        const vec<vec<U, N>, count> xx = vec<vec<U, N>, count>::from_flatten(transpose<count>(flatten(x)));
-        swallow{ (set_elements(std::get<indices>(this->args), coutput, index, xx[indices]), void(), 0)... };
-    }
-};
-} // namespace internal
-
-template <typename... E, KFR_ENABLE_IF(is_output_expressions<E...>)>
-internal::expression_unpack<E...> unpack(E&&... e)
-{
-    return internal::expression_unpack<E...>(std::forward<E>(e)...);
-}
-
-template <typename... E, KFR_ENABLE_IF(is_input_expressions<E...>)>
-internal::expression_pack<internal::arg<E>...> pack(E&&... e)
-{
-    return internal::expression_pack<internal::arg<E>...>(std::forward<E>(e)...);
-}
-
-template <typename OutExpr, typename InExpr>
-struct task_partition
-{
-    task_partition(OutExpr&& output, InExpr&& input, size_t size, size_t chunk_size, size_t count)
-        : output(std::forward<OutExpr>(output)), input(std::forward<InExpr>(input)), size(size),
-          chunk_size(chunk_size), count(count)
-    {
-    }
-    OutExpr output;
-    InExpr input;
-    size_t size;
-    size_t chunk_size;
-    size_t count;
-    size_t operator()(size_t index)
-    {
-        if (CMT_UNLIKELY(index >= count))
-            return 0;
-        return process(output, input, index * chunk_size,
-                       index == count - 1 ? size - (count - 1) * chunk_size : chunk_size);
-    }
-};
-
-template <typename OutExpr, typename InExpr, typename T = value_type_of<InExpr>>
-task_partition<OutExpr, InExpr> partition(OutExpr&& output, InExpr&& input, size_t count,
-                                          size_t minimum_size = 0)
-{
-    static_assert(!is_infinite<OutExpr> || !is_infinite<InExpr>, "");
-
-    minimum_size            = minimum_size == 0 ? vector_width<T> * 8 : minimum_size;
-    const size_t size       = size_min(output.size(), input.size());
-    const size_t chunk_size = align_up(std::max(size / count, minimum_size), vector_width<T>);
-
-    task_partition<OutExpr, InExpr> result(std::forward<OutExpr>(output), std::forward<InExpr>(input), size,
-                                           chunk_size, (size + chunk_size - 1) / chunk_size);
-    return result;
-}
-
-namespace internal
-{
-
-template <typename E1, typename E2>
-struct concatenate_expression : expression_with_arguments<E1, E2>
-{
-    using value_type = common_type<value_type_of<E1>, value_type_of<E2>>;
-    using T          = value_type;
-
-    KFR_MEM_INTRINSIC constexpr size_t size() const CMT_NOEXCEPT
-    {
-        return size_add(std::get<0>(this->args).size(), std::get<1>(this->args).size());
-    }
-    template <typename E1_, typename E2_>
-    concatenate_expression(E1_&& e1, E2_&& e2)
-        : expression_with_arguments<E1, E2>(std::forward<E1_>(e1), std::forward<E2_>(e2))
-    {
-    }
-
-    template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const concatenate_expression& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
-    {
-        const size_t size0 = std::get<0>(self.args).size();
-        if (index >= size0)
-        {
-            return self.argument(cinput, csize<1>, index - size0, y);
-        }
-        else if (CMT_LIKELY(index + N <= size0))
-        {
-            return self.argument(cinput, csize<0>, index, y);
-        }
-        else // (index < size0) && (index + N > size0)
-        {
-            vec<T, N> result;
-            for (size_t i = 0; i < size0 - index; ++i)
-            {
-                result[i] = self.argument(cinput, csize<0>, index + i, vec_shape<T, 1>{})[0];
-            }
-            for (size_t i = size0 - index; i < N; ++i)
-            {
-                result[i] = self.argument(cinput, csize<1>, index + i - size0, vec_shape<T, 1>{})[0];
-            }
-            return result;
-        }
-    }
-};
-} // namespace internal
-
-template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expression<E1>&& is_input_expression<E2>)>
-internal::concatenate_expression<E1, E2> concatenate(E1&& e1, E2&& e2)
-{
-    return { std::forward<E1>(e1), std::forward<E2>(e2) };
-}
-
-} // namespace CMT_ARCH_NAME
-} // namespace kfr
diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp
@@ -32,7 +32,7 @@
 namespace kfr
 {
 
-template <typename T, bool enable_resource = true>
+template <typename T, index_t Dims>
 struct expression_pointer;
 
 template <typename T>
@@ -53,8 +53,8 @@ inline namespace CMT_ARCH_NAME
 namespace internal
 {
 
-template <typename Expression, typename T, size_t key = 0>
-KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T>&& new_pointer,
+template <typename Expression, typename T, index_t Dims, size_t key = 0>
+KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T, Dims> new_pointer,
                                      csize_t<key> = {});
 }
 } // namespace CMT_ARCH_NAME
@@ -63,32 +63,40 @@ template <typename T, index_t Dims>
 struct expression_vtable
 {
     constexpr static const size_t Nsizes = 1 + ilog2(maximum_expression_width<T>);
+    constexpr static const size_t Nmax   = 1 << Nsizes;
 
     using func_get        = void (*)(void*, shape<Dims>, T*);
     using func_set        = void (*)(void*, shape<Dims>, const T*);
     using func_shapeof    = void (*)(void*, shape<Dims>&);
-    using func_substitute = bool (*)(void*, expression_pointer<T>&&);
+    using func_substitute = bool (*)(void*, expression_pointer<T, Dims>);
+    using func_pass       = void (*)(void*, shape<Dims>, shape<Dims>);
 
     func_shapeof fn_shapeof;
     func_substitute fn_substitute;
+    func_pass fn_begin_pass;
+    func_pass fn_end_pass;
     std::array<std::array<func_get, Nsizes>, Dims> fn_get_elements;
     std::array<std::array<func_set, Nsizes>, Dims> fn_set_elements;
 
     template <typename Expression>
-    expression_vtable(ctype_t<Expression> t)
+    KFR_MEM_INTRINSIC expression_vtable(ctype_t<Expression> t)
     {
         fn_shapeof    = &static_shapeof<Expression>;
         fn_substitute = &static_substitute<Expression>;
+        fn_begin_pass = &static_begin_pass<Expression>;
+        fn_end_pass   = &static_end_pass<Expression>;
         cforeach(csizeseq<Nsizes>,
-                 [&](size_t size) CMT_INLINE_LAMBDA
+                 [&](auto size_) CMT_INLINE_LAMBDA
                  {
                      cforeach(csizeseq<Dims>,
-                              [&](size_t axis) CMT_INLINE_LAMBDA
+                              [&](auto axis_) CMT_INLINE_LAMBDA
                               {
+                                  constexpr size_t size = decltype(size_)::value;
+                                  constexpr size_t axis = decltype(axis_)::value;
                                   fn_get_elements[axis][size] =
-                                      &expression_vtable::static_get_elements<Expression, 1 << size, axis>;
+                                      &static_get_elements<Expression, 1 << size, axis>;
                                   fn_set_elements[axis][size] =
-                                      &expression_vtable::static_set_elements<Expression, 1 << size, axis>;
+                                      &static_set_elements<Expression, 1 << size, axis>;
                               });
                  });
     }
@@ -96,12 +104,24 @@ struct expression_vtable
     template <typename Expression, size_t N, index_t VecAxis>
     static void static_get_elements(void* instance, shape<Dims> index, T* dest)
     {
-        write(dest, get_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>));
+        if constexpr (is_input_expression<Expression>)
+        {
+            write(dest, get_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>));
+        }
+        else
+        {
+        }
     }
     template <typename Expression, size_t N, index_t VecAxis>
     static void static_set_elements(void* instance, shape<Dims> index, const T* src)
     {
-        set_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>, read<N>(src));
+        if constexpr (is_output_expression<Expression>)
+        {
+            set_elements(*static_cast<Expression*>(instance), index, axis_params_v<VecAxis, N>, read<N>(src));
+        }
+        else
+        {
+        }
     }
     template <typename Expression>
     static void static_shapeof(void* instance, shape<Dims>& result)
@@ -109,10 +129,20 @@ struct expression_vtable
         result = expression_traits<Expression>::shapeof(*static_cast<Expression*>(instance));
     }
     template <typename Expression>
-    static bool static_substitute(void* instance, expression_pointer<T> ptr)
+    static bool static_substitute(void* instance, expression_pointer<T, Dims> ptr)
     {
         return internal::invoke_substitute(*static_cast<Expression*>(instance), std::move(ptr));
     }
+    template <typename Expression>
+    static void static_begin_pass(void* instance, shape<Dims> start, shape<Dims> stop)
+    {
+        begin_pass(*static_cast<Expression*>(instance), start, stop);
+    }
+    template <typename Expression>
+    static void static_end_pass(void* instance, shape<Dims> start, shape<Dims> stop)
+    {
+        end_pass(*static_cast<Expression*>(instance), start, stop);
+    }
 };
 
 struct expression_resource
@@ -126,7 +156,7 @@ struct expression_resource_impl : expression_resource
 {
     expression_resource_impl(E&& e) CMT_NOEXCEPT : e(std::move(e)) {}
     virtual ~expression_resource_impl() {}
-    virtual void* instance() override final { return &e; }
+    KFR_INTRINSIC virtual void* instance() override final { return &e; }
 
 public:
 #ifdef __cpp_aligned_new
@@ -145,32 +175,38 @@ KFR_INTRINSIC std::shared_ptr<expression_resource> make_resource(E&& e)
         new (aligned_allocate<T>()) T(std::move(e)), [](T* pi) { aligned_deallocate<T>(pi); }));
 }
 
-template <typename T, index_t Dims>
-struct xpointer
+template <typename T, index_t Dims = 1>
+struct expression_pointer
 {
     void* instance;
     const expression_vtable<T, Dims>* vtable;
     std::shared_ptr<expression_resource> resource;
 
-    xpointer() CMT_NOEXCEPT : instance(nullptr), vtable(nullptr) {}
-    xpointer(const void* instance, const expression_vtable<T, Dims>* vtable,
-             std::shared_ptr<expression_resource> resource = nullptr)
+    expression_pointer() CMT_NOEXCEPT : instance(nullptr), vtable(nullptr) {}
+    expression_pointer(const void* instance, const expression_vtable<T, Dims>* vtable,
+                       std::shared_ptr<expression_resource> resource = nullptr)
         : instance(const_cast<void*>(instance)), vtable(vtable), resource(std::move(resource))
     {
     }
 
     explicit operator bool() const { return instance != nullptr; }
+
+    bool substitute(expression_pointer<T, Dims> new_pointer)
+    {
+        return vtable->fn_substitute(instance, std::move(new_pointer));
+    }
 };
 
 template <typename T, index_t Dims>
-struct expression_traits<xpointer<T, Dims>> : expression_traits_defaults
+struct expression_traits<expression_pointer<T, Dims>> : expression_traits_defaults
 {
     using value_type             = T;
     constexpr static size_t dims = Dims;
-    constexpr static shape<dims> shapeof(const xpointer<T, Dims>& self)
+    constexpr static shape<dims> shapeof(const expression_pointer<T, Dims>& self)
     {
         shape<dims> result;
         self.vtable->fn_shapeof(self.instance, result);
+        return result;
     }
     constexpr static shape<dims> shapeof() { return shape<dims>(undefined_size); }
 
@@ -180,8 +216,19 @@ struct expression_traits<xpointer<T, Dims>> : expression_traits_defaults
 inline namespace CMT_ARCH_NAME
 {
 
+template <typename T, index_t NDims>
+KFR_INTRINSIC void begin_pass(const expression_pointer<T, NDims>& self, shape<NDims> start, shape<NDims> stop)
+{
+    self.vtable->fn_begin_pass(self.instance, start, stop);
+}
+template <typename T, index_t NDims>
+KFR_INTRINSIC void end_pass(const expression_pointer<T, NDims>& self, shape<NDims> start, shape<NDims> stop)
+{
+    self.vtable->fn_end_pass(self.instance, start, stop);
+}
+
 template <typename T, index_t NDims, index_t Axis, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const xpointer<T, NDims>& self, const shape<NDims>& index,
+KFR_INTRINSIC vec<T, N> get_elements(const expression_pointer<T, NDims>& self, const shape<NDims>& index,
                                      const axis_params<Axis, N>& sh)
 {
     static_assert(is_poweroftwo(N) && N >= 1);
@@ -200,7 +247,7 @@ KFR_INTRINSIC vec<T, N> get_elements(const xpointer<T, NDims>& self, const shape
 }
 
 template <typename T, index_t NDims, index_t Axis, size_t N>
-KFR_INTRINSIC void set_elements(const xpointer<T, NDims>& self, const shape<NDims>& index,
+KFR_INTRINSIC void set_elements(const expression_pointer<T, NDims>& self, const shape<NDims>& index,
                                 const axis_params<Axis, N>& sh, const identity<vec<T, N>>& value)
 {
     static_assert(is_poweroftwo(N) && N >= 1);
@@ -231,6 +278,8 @@ KFR_INTRINSIC expression_vtable<T, Dims>* make_expression_vtable()
 }
 } // namespace internal
 
+} // namespace CMT_ARCH_NAME
+
 /** @brief Converts the given expression into an opaque object.
  *  This overload takes reference to the expression.
  *  @warning Use with caution with local variables.
@@ -250,89 +299,109 @@ KFR_INTRINSIC expression_pointer<T, Dims> to_pointer(E&& expr)
 {
     std::shared_ptr<expression_resource> ptr = make_resource(std::move(expr));
     void* instance                           = ptr->instance();
-    return expression_pointer<T, Dims>(instance, internal::make_expression_vtable<T, E>(), std::move(ptr));
+    return expression_pointer<T, Dims>(instance, internal::make_expression_vtable<T, Dims, E>(),
+                                       std::move(ptr));
 }
 
-#if 0
-template <typename T, size_t key>
-class expression_placeholder : public input_expression
+template <typename T, index_t Dims = 1, size_t Key = 0>
+struct expression_placeholder
 {
 public:
     using value_type                      = T;
     expression_placeholder() CMT_NOEXCEPT = default;
-    template <typename U, size_t N>
-    friend KFR_INTRINSIC vec<U, N> get_elements(const expression_placeholder& self, cinput_t, size_t index,
-                                                vec_shape<U, N>)
+    expression_pointer<T, Dims> pointer;
+};
+
+template <typename T, index_t Dims, size_t Key>
+struct expression_traits<expression_placeholder<T, Dims, Key>> : public expression_traits_defaults
+{
+    using value_type             = T;
+    constexpr static size_t dims = Dims;
+    constexpr static shape<dims> shapeof(const expression_placeholder<T, Dims, Key>& self)
     {
-        return self.pointer ? elemcast<U>(get_elements(self.pointer, cinput, index, vec_shape<T, N>())) : 0;
+        return self.pointer ? ::kfr::shapeof(self.pointer) : shape<dims>(infinite_size);
     }
-    expression_pointer<T> pointer;
+    constexpr static shape<dims> shapeof() { return shape<dims>(undefined_size); }
 };
 
-template <typename T, size_t key = 0>
-KFR_INTRINSIC expression_placeholder<T, key> placeholder(csize_t<key> = csize_t<key>{})
+inline namespace CMT_ARCH_NAME
+{
+
+template <typename T, index_t Dims, size_t Key, index_t VecAxis, size_t N>
+KFR_INTRINSIC vec<T, N> get_elements(const expression_placeholder<T, Dims, Key>& self, shape<Dims> index,
+                                     axis_params<VecAxis, N> sh)
 {
-    return expression_placeholder<T, key>();
+    return self.pointer ? get_elements(self.pointer, index, sh) : 0;
+}
+} // namespace CMT_ARCH_NAME
+
+template <typename T, index_t Dims = 1, size_t Key = 0>
+KFR_INTRINSIC expression_placeholder<T, Dims, Key> placeholder(csize_t<Key> = csize_t<Key>{})
+{
+    return expression_placeholder<T, Dims, Key>();
 }
 
 template <typename... Args>
-KFR_INTRINSIC bool substitute(input_expression&, Args&&...)
+KFR_INTRINSIC bool substitute(const internal_generic::anything&, Args&&...)
 {
     return false;
 }
 
+inline namespace CMT_ARCH_NAME
+{
 namespace internal
 {
-template <typename... Args, typename T, size_t key, size_t... indices>
-KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr,
-                              expression_pointer<T>&& new_pointer, csize_t<key>, csizes_t<indices...>);
+template <typename... Args, typename T, index_t Dims, size_t Key, size_t... indices>
+KFR_INTRINSIC bool substitute_helper(expression_with_arguments<Args...>& expr,
+                                     expression_pointer<T, Dims> new_pointer, csize_t<Key>,
+                                     csizes_t<indices...>);
 }
+} // namespace CMT_ARCH_NAME
 
-template <typename T, size_t key = 0>
-KFR_INTRINSIC bool substitute(expression_placeholder<T, key>& expr, expression_pointer<T>&& new_pointer,
-                              csize_t<key> = csize_t<key>{})
+template <typename T, index_t Dims, size_t Key = 0>
+KFR_INTRINSIC bool substitute(expression_placeholder<T, Dims, Key>& expr,
+                              expression_pointer<T, Dims> new_pointer, csize_t<Key> = csize_t<Key>{})
 {
     expr.pointer = std::move(new_pointer);
     return true;
 }
 
-template <typename... Args, typename T, size_t key = 0>
-KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr,
-                              expression_pointer<T>&& new_pointer, csize_t<key> = csize_t<key>{})
+template <typename... Args, typename T, index_t Dims, size_t Key = 0>
+KFR_INTRINSIC bool substitute(expression_with_arguments<Args...>& expr,
+                              expression_pointer<T, Dims> new_pointer, csize_t<Key> = csize_t<Key>{})
 {
-    return internal::substitute(expr, std::move(new_pointer), csize_t<key>{}, indicesfor_t<Args...>{});
+    return internal::substitute_helper(expr, std::move(new_pointer), csize_t<Key>{}, indicesfor_t<Args...>{});
 }
 
-template <typename T, size_t key = 0>
-KFR_INTRINSIC bool substitute(expression_pointer<T>& expr, expression_pointer<T>&& new_pointer,
-                              csize_t<key> = csize_t<key>{})
+template <typename T, index_t Dims, size_t Key = 0>
+KFR_INTRINSIC bool substitute(expression_pointer<T, Dims>& expr, expression_pointer<T, Dims> new_pointer,
+                              csize_t<Key> = csize_t<Key>{})
 {
-    return expr.substitute(std::move(new_pointer), csize_t<key>{});
+    static_assert(Key == 0, "expression_pointer supports only Key = 0");
+    return expr.substitute(std::move(new_pointer));
 }
 
+inline namespace CMT_ARCH_NAME
+{
 namespace internal
 {
 
-template <typename... Args, typename T, size_t key, size_t... indices>
-KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr,
-                              expression_pointer<T>&& new_pointer, csize_t<key>, csizes_t<indices...>)
+template <typename... Args, typename T, index_t Dims, size_t Key, size_t... indices>
+KFR_INTRINSIC bool substitute_helper(expression_with_arguments<Args...>& expr,
+                                     expression_pointer<T, Dims> new_pointer, csize_t<Key>,
+                                     csizes_t<indices...>)
 {
-    return (substitute(std::get<indices>(expr.args), std::move(new_pointer), csize_t<key>()) || ...);
+    return (substitute(std::get<indices>(expr.args), std::move(new_pointer), csize_t<Key>()) || ...);
 }
 
-} // namespace internal
-
-namespace internal
+template <typename Expression, typename T, index_t Dims, size_t Key>
+KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T, Dims> new_pointer, csize_t<Key>)
 {
-
-template <typename Expression, typename T, size_t key>
-KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T>&& new_pointer, csize_t<key>)
-{
-    return kfr::substitute(expr, std::move(new_pointer), csize_t<key>{});
+    return kfr::substitute(expr, std::move(new_pointer), csize_t<Key>{});
 }
 
 } // namespace internal
-#endif
+
 } // namespace CMT_ARCH_NAME
 
-}
+} // namespace kfr
diff --git a/include/kfr/base/random.hpp b/include/kfr/base/random.hpp
@@ -26,6 +26,7 @@
 #pragma once
 
 #include "random_bits.hpp"
+#include "state_holder.hpp"
 
 namespace kfr
 {
@@ -34,128 +35,141 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, size_t N, KFR_ENABLE_IF(is_integral<T>)>
-KFR_INTRINSIC vec<T, N> random_uniform(random_bit_generator& gen)
+KFR_INTRINSIC vec<T, N> random_uniform(random_state& state)
 {
-    return bitcast<T>(random_bits<N * sizeof(T)>(gen));
+    return bitcast<T>(random_bits<N * sizeof(T)>(state));
 }
 
 template <typename T, size_t N, KFR_ENABLE_IF(is_same<T, f32>)>
-KFR_INTRINSIC vec<f32, N> randommantissa(random_bit_generator& gen)
+KFR_INTRINSIC vec<f32, N> randommantissa(random_state& state)
 {
-    return bitcast<f32>((random_uniform<u32, N>(gen) & u32(0x7FFFFFu)) | u32(0x3f800000u)) + 0.0f;
+    return bitcast<f32>((random_uniform<u32, N>(state) & u32(0x7FFFFFu)) | u32(0x3f800000u)) + 0.0f;
 }
 
 template <typename T, size_t N, KFR_ENABLE_IF(is_same<T, f64>)>
-KFR_INTRINSIC vec<f64, N> randommantissa(random_bit_generator& gen)
+KFR_INTRINSIC vec<f64, N> randommantissa(random_state& state)
 {
-    return bitcast<f64>((random_uniform<u64, N>(gen) & u64(0x000FFFFFFFFFFFFFull)) |
+    return bitcast<f64>((random_uniform<u64, N>(state) & u64(0x000FFFFFFFFFFFFFull)) |
                         u64(0x3FF0000000000000ull)) +
            0.0;
 }
 
 template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>)>
-KFR_INTRINSIC vec<T, N> random_uniform(random_bit_generator& gen)
+KFR_INTRINSIC vec<T, N> random_uniform(random_state& state)
 {
-    return randommantissa<T, N>(gen) - 1.f;
+    return randommantissa<T, N>(state) - 1.f;
 }
 
 template <size_t N, typename T, KFR_ENABLE_IF(is_f_class<T>)>
-KFR_INTRINSIC vec<T, N> random_range(random_bit_generator& gen, T min, T max)
+KFR_INTRINSIC vec<T, N> random_range(random_state& state, T min, T max)
 {
-    return mix(random_uniform<T, N>(gen), min, max);
+    return mix(random_uniform<T, N>(state), min, max);
 }
 
 template <size_t N, typename T, KFR_ENABLE_IF(!is_f_class<T>)>
-KFR_INTRINSIC vec<T, N> random_range(random_bit_generator& gen, T min, T max)
+KFR_INTRINSIC vec<T, N> random_range(random_state& state, T min, T max)
 {
     using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>;
 
-    vec<T, N> u                = random_uniform<T, N>(gen);
+    vec<T, N> u                = random_uniform<T, N>(state);
     const vec<big_type, N> tmp = u;
     return (tmp * (max - min) + min) >> typebits<T>::bits;
 }
 
-namespace internal
+template <typename T, index_t Dims, bool Reference = false>
+struct expression_random_uniform : expression_traits_defaults
 {
-template <typename T, typename Gen = random_bit_generator>
-struct expression_random_uniform : input_expression
-{
-    using value_type = T;
-    constexpr expression_random_uniform(Gen gen) CMT_NOEXCEPT : gen(gen) {}
-    template <size_t N>
-    friend vec<T, N> get_elements(const expression_random_uniform& self, cinput_t, size_t, vec_shape<T, N>)
+    using value_type             = T;
+    constexpr static size_t dims = Dims;
+    constexpr static shape<dims> shapeof(const expression_random_uniform&)
+    {
+        return shape<dims>(infinite_size);
+    }
+    constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); }
+
+    mutable state_holder<random_state, Reference> state;
+
+    template <size_t N, index_t VecAxis>
+    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_random_uniform& self, shape<Dims>,
+                                                axis_params<VecAxis, N>)
     {
-        return random_uniform<T, N>(self.gen);
+        return random_uniform<N, T>(*self.state);
     }
-    mutable Gen gen;
 };
 
-template <typename T, typename Gen = random_bit_generator>
-struct expression_random_range : input_expression
+template <typename T, index_t Dims, bool Reference = false>
+struct expression_random_range : expression_traits_defaults
 {
-    using value_type = T;
-    constexpr expression_random_range(Gen gen, T min, T max) CMT_NOEXCEPT : gen(gen), min(min), max(max) {}
+    using value_type             = T;
+    constexpr static size_t dims = Dims;
+    constexpr static shape<dims> shapeof(const expression_random_range&)
+    {
+        return shape<dims>(infinite_size);
+    }
+    constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); }
+
+    mutable state_holder<random_state, Reference> state;
+    T min;
+    T max;
 
-    template <size_t N>
-    friend vec<T, N> get_elements(const expression_random_range& self, cinput_t, size_t, vec_shape<T, N>)
+    template <size_t N, index_t VecAxis>
+    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_random_range& self, shape<Dims>,
+                                                axis_params<VecAxis, N>)
     {
-        return random_range<N, T>(self.gen, self.min, self.max);
+        return random_range<N, T>(*self.state, self.min, self.max);
     }
-    mutable Gen gen;
-    const T min;
-    const T max;
 };
-} // namespace internal
 
-/// @brief Returns expression that returns pseudo random values. Copies the given generator
-template <typename T>
-KFR_FUNCTION internal::expression_random_uniform<T> gen_random_uniform(const random_bit_generator& gen)
+/// @brief Returns expression that returns pseudorandom values. Copies the given generator
+template <typename T, index_t Dims = 1>
+KFR_FUNCTION expression_random_uniform<T, Dims> gen_random_uniform(const random_state& state)
 {
-    return internal::expression_random_uniform<T>(gen);
+    return { {}, state };
 }
 
-/// @brief Returns expression that returns pseudo random values. References the given
+/// @brief Returns expression that returns pseudorandom values. References the given
 /// generator. Use std::ref(gen) to force this overload
-template <typename T>
-KFR_FUNCTION internal::expression_random_uniform<T, std::reference_wrapper<random_bit_generator>>
-gen_random_uniform(std::reference_wrapper<random_bit_generator> gen)
+template <typename T, index_t Dims = 1>
+KFR_FUNCTION expression_random_range<T, Dims, true> gen_random_uniform(
+    std::reference_wrapper<random_state> state)
 {
-    return internal::expression_random_uniform<T, std::reference_wrapper<random_bit_generator>>(gen);
+    return { {}, state };
 }
 
 #ifndef KFR_DISABLE_READCYCLECOUNTER
-/// @brief Returns expression that returns pseudo random values
-template <typename T>
-KFR_FUNCTION internal::expression_random_uniform<T> gen_random_uniform()
+/// @brief Returns expression that returns pseudorandom values
+template <typename T, index_t Dims = 1>
+KFR_FUNCTION expression_random_range<T, Dims> gen_random_uniform()
 {
-    return internal::expression_random_uniform<T>(random_bit_generator(seed_from_rdtsc));
+    return expression_random_uniform<T, Dims>{ random_init() };
 }
 #endif
 
-/// @brief Returns expression that returns pseudo random values of the given range. Copies the given generator
-template <typename T>
-KFR_FUNCTION internal::expression_random_range<T> gen_random_range(const random_bit_generator& gen, T min,
-                                                                   T max)
+/// @brief Returns expression that returns pseudorandom values of the given range. Copies the given generator
+template <typename T, index_t Dims = 1>
+KFR_FUNCTION expression_random_range<T, Dims> gen_random_range(const random_state& state, T min, T max)
 {
-    return internal::expression_random_range<T>(gen, min, max);
+    return { {}, state, min, max };
 }
 
-/// @brief Returns expression that returns pseudo random values of the given range. References the given
+/// @brief Returns expression that returns pseudorandom values of the given range. References the given
 /// generator. Use std::ref(gen) to force this overload
-template <typename T>
-KFR_FUNCTION internal::expression_random_range<T, std::reference_wrapper<random_bit_generator>>
-gen_random_range(std::reference_wrapper<random_bit_generator> gen, T min, T max)
+template <typename T, index_t Dims = 1>
+KFR_FUNCTION expression_random_range<T, Dims, true> gen_random_range(
+    std::reference_wrapper<random_state> state, T min, T max)
 {
-    return internal::expression_random_range<T, std::reference_wrapper<random_bit_generator>>(gen, min, max);
+    return { {}, state, min, max };
 }
 
 #ifndef KFR_DISABLE_READCYCLECOUNTER
-/// @brief Returns expression that returns pseudo random values of the given range
-template <typename T>
-KFR_FUNCTION internal::expression_random_range<T> gen_random_range(T min, T max)
+/// @brief Returns expression that returns pseudorandom values of the given range
+template <typename T, index_t Dims = 1>
+KFR_FUNCTION expression_random_range<T, Dims> gen_random_range(T min, T max)
 {
-    return internal::expression_random_range<T>(random_bit_generator(seed_from_rdtsc), min, max);
+    return { {}, random_init(), min, max };
 }
 #endif
+
 } // namespace CMT_ARCH_NAME
+
 } // namespace kfr
diff --git a/include/kfr/base/random_bits.hpp b/include/kfr/base/random_bits.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
@@ -47,10 +47,16 @@ struct seed_from_rdtsc_t
 constexpr seed_from_rdtsc_t seed_from_rdtsc{};
 #endif
 
-inline namespace CMT_ARCH_NAME
+struct random_state
 {
-
-using random_state = u32x4;
+    constexpr random_state() : v{ 0, 0, 0, 0 } {}
+    constexpr random_state(random_state&&)      = default;
+    constexpr random_state(const random_state&) = default;
+    constexpr random_state& operator=(random_state&&) = default;
+    constexpr random_state& operator=(const random_state&) = default;
+    // internal field
+    portable_vec<u32, 4> v;
+};
 
 #ifndef KFR_DISABLE_READCYCLECOUNTER
 #ifdef CMT_COMPILER_CLANG
@@ -61,53 +67,57 @@ using random_state = u32x4;
 #endif
 #endif
 
-struct random_bit_generator
+static_assert(sizeof(random_state) == 16, "sizeof(random_state) == 16");
+
+inline namespace CMT_ARCH_NAME
+{
+
+KFR_INTRINSIC void random_next(random_state& state)
+{
+    constexpr static portable_vec<u32, 4> mul{ 214013u, 17405u, 214013u, 69069u };
+    constexpr static portable_vec<u32, 4> add{ 2531011u, 10395331u, 13737667u, 1u };
+    state.v = bitcast<u32>(rotateright<3>(
+        bitcast<u8>(fmadd(static_cast<u32x4>(state.v), static_cast<u32x4>(mul), static_cast<u32x4>(add)))));
+}
+KFR_INTRINSIC random_state random_init()
 {
-#ifndef KFR_DISABLE_READCYCLECOUNTER
-    KFR_MEM_INTRINSIC random_bit_generator(seed_from_rdtsc_t) CMT_NOEXCEPT
-        : state(bitcast<u32>(make_vector(KFR_builtin_readcyclecounter(),
-                                         (KFR_builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)))
-    {
-        (void)operator()();
-    }
-#endif
-    KFR_MEM_INTRINSIC random_bit_generator(u32 x0, u32 x1, u32 x2, u32 x3) CMT_NOEXCEPT
-        : state(x0, x1, x2, x3)
-    {
-        (void)operator()();
-    }
-    KFR_MEM_INTRINSIC random_bit_generator(u64 x0, u64 x1) CMT_NOEXCEPT
-        : state(bitcast<u32>(make_vector(x0, x1)))
-    {
-        (void)operator()();
-    }
-
-    KFR_MEM_INTRINSIC random_state operator()()
-    {
-        const static random_state mul{ 214013u, 17405u, 214013u, 69069u };
-        const static random_state add{ 2531011u, 10395331u, 13737667u, 1u };
-        state = bitcast<u32>(rotateright<3>(bitcast<u8>(fmadd(state, mul, add))));
-        return state;
-    }
-
-protected:
     random_state state;
-};
+    state.v = portable_vec<u32, 4>{ bitcast<u32>(make_vector(
+        KFR_builtin_readcyclecounter(), (KFR_builtin_readcyclecounter() << 11) ^ 0x710686d615e2257bull)) };
+    random_next(state);
+    return state;
+}
 
-static_assert(sizeof(random_state) == 16, "sizeof(random_state) == 16");
+KFR_INTRINSIC random_state random_init(u32 x0, u32 x1, u32 x2, u32 x3)
+{
+    random_state state;
+    state.v = portable_vec<u32, 4>{ x0, x1, x2, x3 };
+    random_next(state);
+    return state;
+}
+
+KFR_INTRINSIC random_state random_init(u64 x0, u64 x1)
+{
+    random_state state;
+    state.v = portable_vec<u32, 4>{ static_cast<u32>(x0), static_cast<u32>(x0 >> 32), static_cast<u32>(x1),
+                                    static_cast<u32>(x1 >> 32) };
+    random_next(state);
+    return state;
+}
 
 template <size_t N, KFR_ENABLE_IF(N <= sizeof(random_state))>
-KFR_INTRINSIC vec<u8, N> random_bits(random_bit_generator& gen)
+KFR_INTRINSIC vec<u8, N> random_bits(random_state& state)
 {
-    return narrow<N>(bitcast<u8>(gen()));
+    random_next(state);
+    return narrow<N>(bitcast<u8>(u32x4(state.v)));
 }
 
 template <size_t N, KFR_ENABLE_IF(N > sizeof(random_state))>
-KFR_INTRINSIC vec<u8, N> random_bits(random_bit_generator& gen)
+KFR_INTRINSIC vec<u8, N> random_bits(random_state& state)
 {
     constexpr size_t N2         = prev_poweroftwo(N - 1);
-    const vec<u8, N2> bits1     = random_bits<N2>(gen);
-    const vec<u8, N - N2> bits2 = random_bits<N - N2>(gen);
+    const vec<u8, N2> bits1     = random_bits<N2>(state);
+    const vec<u8, N - N2> bits2 = random_bits<N - N2>(state);
     return concat(bits1, bits2);
 }
 } // namespace CMT_ARCH_NAME
diff --git a/include/kfr/base/reduce.hpp b/include/kfr/base/reduce.hpp
@@ -25,11 +25,12 @@
  */
 #pragma once
 
-#include "../simd/min_max.hpp"
 #include "../simd/horizontal.hpp"
 #include "../simd/impl/function.hpp"
+#include "../simd/min_max.hpp"
 #include "../simd/operators.hpp"
 #include "../simd/vec.hpp"
+#include "simd_expressions.hpp"
 #include "basic_expressions.hpp"
 
 namespace kfr
@@ -47,12 +48,10 @@ KFR_FN(final_mean)
 template <typename T>
 KFR_INTRINSIC T final_rootmean(T value, size_t size)
 {
-    return builtin_sqrt(value / T(size));
+    return sqrt(value / T(size));
 }
 KFR_FN(final_rootmean)
 
-namespace internal
-{
 template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, T, size_t>)>
 KFR_INTRINSIC auto reduce_call_final(FinalFn&& finalfn, size_t size, T value)
 {
@@ -64,10 +63,15 @@ KFR_INTRINSIC auto reduce_call_final(FinalFn&& finalfn, size_t, T value)
     return finalfn(value);
 }
 
-template <typename Tout, typename Twork, typename Tin, typename ReduceFn, typename TransformFn,
+template <typename Tout, index_t Dims, typename Twork, typename Tin, typename ReduceFn, typename TransformFn,
           typename FinalFn>
-struct expression_reduce : output_expression
+struct expression_reduce : public expression_traits_defaults
 {
+    using value_type             = Tin;
+    constexpr static size_t dims = Dims;
+    constexpr static shape<dims> shapeof(const expression_reduce&) { return shape<dims>(infinite_size); }
+    constexpr static shape<dims> shapeof() { return shape<dims>(infinite_size); }
+
     constexpr static size_t width = vector_width<Tin> * bitness_const(1, 2);
 
     using value_type = Tin;
@@ -78,18 +82,16 @@ struct expression_reduce : output_expression
     {
     }
 
-    template <size_t N>
-    KFR_INTRINSIC friend void set_elements(expression_reduce& self, coutput_t, size_t, const vec<Tin, N>& x)
+    KFR_MEM_INTRINSIC Tout get() { return reduce_call_final(finalfn, counter, horizontal(value, reducefn)); }
+
+    template <size_t N, index_t VecAxis>
+    friend KFR_INTRINSIC void set_elements(expression_reduce& self, shape<Dims>, axis_params<VecAxis, N>,
+                                           const identity<vec<Tin, N>>& x)
     {
         self.counter += N;
         self.process(x);
     }
 
-    KFR_MEM_INTRINSIC Tout get()
-    {
-        return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn));
-    }
-
 protected:
     void reset() { counter = 0; }
     KFR_MEM_INTRINSIC void process(const vec<Tin, width>& x) const
@@ -116,21 +118,20 @@ protected:
     FinalFn finalfn;
     mutable vec<Twork, width> value;
 };
-} // namespace internal
 
 template <typename ReduceFn, typename TransformFn = fn_generic::pass_through,
-          typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = value_type_of<E1>,
+          typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = expression_value_type<E1>,
           typename Twork = decay<decltype(std::declval<TransformFn>()(std::declval<Tin>()))>,
-          typename Tout  = decay<decltype(internal::reduce_call_final(
-              std::declval<FinalFn>(), std::declval<size_t>(), std::declval<Twork>()))>,
+          typename Tout  = decay<decltype(reduce_call_final(std::declval<FinalFn>(), std::declval<size_t>(),
+                                                           std::declval<Twork>()))>,
           KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn,
                           TransformFn&& transformfn = fn_generic::pass_through(),
                           FinalFn&& finalfn         = fn_generic::pass_through())
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
-    using reducer_t =
-        internal::expression_reduce<Tout, Twork, Tin, decay<ReduceFn>, decay<TransformFn>, decay<FinalFn>>;
+    using reducer_t = expression_reduce<Tout, expression_dims<E1>, Twork, Tin, decay<ReduceFn>,
+                                        decay<TransformFn>, decay<FinalFn>>;
     reducer_t red(std::forward<ReduceFn>(reducefn), std::forward<TransformFn>(transformfn),
                   std::forward<FinalFn>(finalfn));
     process(red, e1);
@@ -139,10 +140,10 @@ KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn,
 }
 
 template <typename ReduceFn, typename TransformFn = fn_generic::pass_through,
-          typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = value_type_of<E1>,
+          typename FinalFn = fn_generic::pass_through, typename E1, typename Tin = expression_value_type<E1>,
           typename Twork = decay<decltype(std::declval<TransformFn>()(std::declval<Tin>()))>,
-          typename Tout  = decay<decltype(internal::reduce_call_final(
-              std::declval<FinalFn>(), std::declval<size_t>(), std::declval<Twork>()))>,
+          typename Tout  = decay<decltype(reduce_call_final(std::declval<FinalFn>(), std::declval<size_t>(),
+                                                           std::declval<Twork>()))>,
           KFR_ENABLE_IF(!is_input_expression<E1>)>
 KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn,
                           TransformFn&& transformfn = fn_generic::pass_through(),
@@ -158,8 +159,6 @@ KFR_INTRINSIC Tout reduce(const E1& e1, ReduceFn&& reducefn,
     return internal::reduce_call_final(finalfn, counter, result);
 }
 
-KFR_FN(reduce)
-
 /**
  * @brief Returns the sum of all the elements in x.
  *
@@ -168,7 +167,7 @@ KFR_FN(reduce)
  *  x_0 + x_1 + \ldots + x_{N-1}
  * \f]
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T sum(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -183,7 +182,7 @@ KFR_FUNCTION T sum(const E1& x)
  *  \frac{1}{N}(x_0 + x_1 + \ldots + x_{N-1})
  * \f]
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T mean(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -195,7 +194,7 @@ KFR_FUNCTION T mean(const E1& x)
  *
  * x must have its size and type specified.
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T minof(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -207,7 +206,7 @@ KFR_FUNCTION T minof(const E1& x)
  *
  * x must have its size and type specified.
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T maxof(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -219,7 +218,7 @@ KFR_FUNCTION T maxof(const E1& x)
  *
  * x must have its size and type specified.
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T absminof(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -231,7 +230,7 @@ KFR_FUNCTION T absminof(const E1& x)
  *
  * x must have its size and type specified.
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T absmaxof(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -247,8 +246,8 @@ KFR_FUNCTION T absmaxof(const E1& x)
  * \f]
  */
 template <typename E1, typename E2,
-          typename T = value_type_of<decltype(std::declval<E1>() * std::declval<E2>())>,
-          KFR_ENABLE_IF(is_input_expressions<E1, E2>)>
+          typename T = expression_value_type<decltype(std::declval<E1>() * std::declval<E2>())>,
+          KFR_ACCEPT_EXPRESSIONS(E1, E2)>
 KFR_FUNCTION T dotproduct(E1&& x, E2&& y)
 {
     auto m    = std::forward<E1>(x) * std::forward<E2>(y);
@@ -265,7 +264,7 @@ KFR_FUNCTION T dotproduct(E1&& x, E2&& y)
    \sqrt{\frac{1}{N}( x_0^2 + x_1^2 + \ldots + x_{N-1}^2)}
    \f]
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T rms(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -280,7 +279,7 @@ KFR_FUNCTION T rms(const E1& x)
     x_0^2 + x_1^2 + \ldots + x_{N-1}^2
    \f]
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T sumsqr(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
@@ -295,7 +294,7 @@ KFR_FUNCTION T sumsqr(const E1& x)
     x_0 \cdot x_1 \cdot \ldots \cdot x_{N-1}
    \f]
  */
-template <typename E1, typename T = value_type_of<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
+template <typename E1, typename T = expression_value_type<E1>, KFR_ENABLE_IF(is_input_expression<E1>)>
 KFR_FUNCTION T product(const E1& x)
 {
     static_assert(!is_infinite<E1>, "e1 must be a sized expression (use slice())");
diff --git a/include/kfr/base/shape.hpp b/include/kfr/base/shape.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
@@ -27,6 +27,8 @@
 
 #include "impl/static_array.hpp"
 
+#include "../cometa/string.hpp"
+#include "../simd/logical.hpp"
 #include "../simd/min_max.hpp"
 #include "../simd/shuffle.hpp"
 #include "../simd/types.hpp"
@@ -83,11 +85,17 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>>
 
     static_assert(dims < maximum_dims);
 
+    template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && dims == 1)>
+    operator index_t() const
+    {
+        return this->front();
+    }
+
     bool ge(const shape& other) const
     {
         if constexpr (dims == 1)
         {
-            return front() >= other.front();
+            return this->front() >= other.front();
         }
         else
         {
@@ -109,7 +117,7 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>>
     {
         if constexpr (dims == 1)
         {
-            return front() <= other.front();
+            return this->front() <= other.front();
         }
         else
         {
@@ -267,7 +275,17 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>>
         return result;
     }
 
-    template <size_t odims>
+    template <index_t new_dims>
+    KFR_MEM_INTRINSIC shape<new_dims> extend(index_t value = infinite_size) const
+    {
+        static_assert(new_dims >= dims);
+        if constexpr (new_dims == dims)
+            return *this;
+        else
+            return concat(broadcast<new_dims - dims>(value), **this);
+    }
+
+    template <index_t odims>
     shape<odims> trim() const
     {
         static_assert(odims <= dims);
@@ -293,8 +311,6 @@ struct shape : static_array_base<index_t, csizeseq_t<dims>>
         }
     }
 
-    shape<dims + 1> extend() const { return concat(**this, vec{ index_t(0) }); }
-
     KFR_MEM_INTRINSIC constexpr index_t revindex(size_t index) const
     {
         return index < dims ? this->operator[](dims - 1 - index) : 1;
@@ -313,8 +329,8 @@ struct shape<0>
 
     static constexpr size_t size() { return static_size; }
 
-    shape() = default;
-    shape(index_t value) {}
+    constexpr shape() = default;
+    constexpr shape(index_t value) {}
 
     constexpr bool has_infinity() const { return false; }
 
@@ -331,13 +347,20 @@ struct shape<0>
 
     KFR_MEM_INTRINSIC index_t dot(const shape& other) const { return 0; }
 
-    KFR_MEM_INTRINSIC size_t product() const { return 0; }
+    KFR_MEM_INTRINSIC index_t product() const { return 0; }
 
     KFR_MEM_INTRINSIC dimset tomask() const { return -1; }
 
-    shape<1> extend() const { return { 0 }; }
+    template <index_t new_dims>
+    KFR_MEM_INTRINSIC shape<new_dims> extend(index_t value = infinite_size) const
+    {
+        if constexpr (new_dims == 0)
+            return *this;
+        else
+            return shape<new_dims>{ value };
+    }
 
-    template <size_t new_dims>
+    template <index_t new_dims>
     shape<new_dims> trim() const
     {
         static_assert(new_dims == 0);
@@ -468,7 +491,7 @@ bool can_assign_from(const shape<dims1>& dst_shape, const shape<dims2>& src_shap
             vec<index_t, outdims> dst = padlow<outdims - dims1>(*dst_shape, 1);
             vec<index_t, outdims> src = padlow<outdims - dims2>(*src_shape, 1);
 
-            mask<index_t, outdims> match = src + 1 <= 2 || src == dst;
+            mask<index_t, outdims> match = src + 1 <= 2 || src == dst || dst == infinite_size;
             return all(match);
         }
         else
@@ -477,7 +500,8 @@ bool can_assign_from(const shape<dims1>& dst_shape, const shape<dims2>& src_shap
             {
                 index_t dst_size = dst_shape.revindex(i);
                 index_t src_size = src_shape.revindex(i);
-                if (src_size == 1 || src_size == infinite_size || src_size == dst_size)
+                if (src_size == 1 || src_size == infinite_size || src_size == dst_size ||
+                    dst_size == infinite_size)
                 {
                 }
                 else
@@ -497,13 +521,20 @@ constexpr shape<dims> common_shape(const shape<dims>& shape)
 }
 
 template <index_t dims1, index_t dims2, index_t outdims = const_max(dims1, dims2)>
-constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<dims2>& shape2)
+KFR_MEM_INTRINSIC constexpr shape<outdims> common_shape(const shape<dims1>& shape1,
+                                                        const shape<dims2>& shape2)
 {
     shape<outdims> result;
     for (size_t i = 0; i < outdims; ++i)
     {
         index_t size1 = shape1.revindex(i);
         index_t size2 = shape2.revindex(i);
+        if (!size1 || !size2)
+        {
+            result[outdims - 1 - i] = 0;
+            continue;
+        }
+
         if (size1 == infinite_size)
         {
             if (size2 == infinite_size)
@@ -512,14 +543,14 @@ constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<di
             }
             else
             {
-                result[outdims - 1 - i] = size2;
+                result[outdims - 1 - i] = size2 == 1 ? infinite_size : size2;
             }
         }
         else
         {
             if (size2 == infinite_size)
             {
-                result[outdims - 1 - i] = size1;
+                result[outdims - 1 - i] = size1 == 1 ? infinite_size : size1;
             }
             else
             {
@@ -540,11 +571,19 @@ constexpr shape<outdims> common_shape(const shape<dims1>& shape1, const shape<di
 }
 
 template <>
-KFR_MEM_INTRINSIC shape<0> common_shape(const shape<0>& shape1, const shape<0>& shape2)
+KFR_MEM_INTRINSIC constexpr shape<0> common_shape(const shape<0>& shape1, const shape<0>& shape2)
 {
     return {};
 }
 
+template <index_t dims1, index_t dims2, index_t... dims, index_t outdims = const_max(dims1, dims2, dims...)>
+KFR_MEM_INTRINSIC constexpr shape<outdims> common_shape(const shape<dims1>& shape1,
+                                                        const shape<dims2>& shape2,
+                                                        const shape<dims>&... shapes)
+{
+    return common_shape(shape1, common_shape(shape2, shapes...));
+}
+
 template <index_t dims1, index_t dims2>
 KFR_MEM_INTRINSIC bool same_layout(const shape<dims1>& x, const shape<dims2>& y)
 {
@@ -745,6 +784,7 @@ struct axis_params
 {
     constexpr static index_t axis  = Axis;
     constexpr static index_t width = N;
+    constexpr static index_t value = N;
 
     constexpr axis_params() = default;
 };
@@ -753,3 +793,31 @@ template <index_t Axis, index_t N>
 constexpr inline const axis_params<Axis, N> axis_params_v{};
 
 } // namespace kfr
+
+namespace cometa
+{
+template <kfr::index_t dims>
+struct representation<kfr::shape<dims>>
+{
+    using type = std::string;
+    static std::string get(const kfr::shape<dims>& value)
+    {
+        if constexpr (dims == 0)
+        {
+            return "shape{}";
+        }
+        else
+        {
+            std::string s;
+            for (kfr::index_t i = 0; i < dims; ++i)
+            {
+                if (CMT_LIKELY(i > 0))
+                    s += ", ";
+                s += as_string(value[i]);
+            }
+            return "shape{" + s + "}";
+        }
+    }
+};
+
+} // namespace cometa
diff --git a/include/kfr/base/simd_expressions.hpp b/include/kfr/base/simd_expressions.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
@@ -25,9 +25,15 @@
  */
 #pragma once
 
+#include "../simd/abs.hpp"
+#include "../simd/clamp.hpp"
 #include "../simd/comparison.hpp"
 #include "../simd/complex.hpp"
+#include "../simd/min_max.hpp"
 #include "../simd/operators.hpp"
+#include "../simd/round.hpp"
+#include "../simd/saturation.hpp"
+#include "../simd/select.hpp"
 #include "../simd/vec.hpp"
 #include "expression.hpp"
 #include "univector.hpp"
@@ -36,17 +42,20 @@
 namespace kfr
 {
 
+inline namespace CMT_ARCH_NAME
+{
+
 /**
  * @brief Returns template expression that returns sum of all the arguments passed to a function.
  */
 template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)>
-KFR_INTRINSIC xfunction<fn::add, E...> add(E&&... x)
+KFR_INTRINSIC expression_function<fn::add, E...> add(E&&... x)
 {
     return { fn::add(), std::forward<E>(x)... };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::sub, E1, E2> sub(E1&& x, E2&& y)
+KFR_INTRINSIC expression_function<fn::sub, E1, E2> sub(E1&& x, E2&& y)
 {
     return { fn::sub(), std::forward<E1>(x), std::forward<E2>(y) };
 }
@@ -55,97 +64,102 @@ KFR_INTRINSIC xfunction<fn::sub, E1, E2> sub(E1&& x, E2&& y)
  * @brief Returns template expression that returns product of all the arguments passed to a function.
  */
 template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)>
-KFR_INTRINSIC xfunction<fn::mul, E...> mul(E&&... x)
+KFR_INTRINSIC expression_function<fn::mul, E...> mul(E&&... x)
 {
     return { fn::mul(), std::forward<E>(x)... };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::ipow, E1, E2> ipow(E1&& x, E2&& b)
+KFR_INTRINSIC expression_function<fn::ipow, E1, E2> ipow(E1&& x, E2&& b)
 {
     return { fn::ipow(), std::forward<E1>(x), std::forward<E2>(b) };
 }
 
 template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
-KFR_INTRINSIC xfunction<fn::mix, E1, E2, E3> mix(E1&& c, E2&& x, E3&& y)
+KFR_INTRINSIC expression_function<fn::mix, E1, E2, E3> mix(E1&& c, E2&& x, E3&& y)
 {
     return { fn::mix(), std::forward<E1>(c), std::forward<E2>(x), std::forward<E3>(y) };
 }
 
 template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
-KFR_INTRINSIC xfunction<fn::mixs, E1, E2, E3> mixs(E1&& c, E2&& x, E3&& y)
+KFR_INTRINSIC expression_function<fn::mixs, E1, E2, E3> mixs(E1&& c, E2&& x, E3&& y)
 {
     return { fn::mixs(), std::forward<E1>(c), std::forward<E2>(x), std::forward<E3>(y) };
 }
 
 template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)>
-KFR_INTRINSIC xfunction<fn::horner, E...> horner(E&&... x)
+KFR_INTRINSIC expression_function<fn::horner, E...> horner(E&&... x)
 {
     return { fn::horner(), std::forward<E>(x)... };
 }
 
 template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)>
-KFR_INTRINSIC xfunction<fn::horner_even, E...> horner_even(E&&... x)
+KFR_INTRINSIC expression_function<fn::horner_even, E...> horner_even(E&&... x)
 {
     return { fn::horner_even(), std::forward<E>(x)... };
 }
 
 template <typename... E, KFR_ACCEPT_EXPRESSIONS(E...)>
-KFR_INTRINSIC xfunction<fn::horner_odd, E...> horner_odd(E&&... x)
+KFR_INTRINSIC expression_function<fn::horner_odd, E...> horner_odd(E&&... x)
 {
     return { fn::horner_odd(), std::forward<E>(x)... };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::add, E1, E2> operator+(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::add, E1, E2> operator+(E1&& e1, E2&& e2)
 {
     return { fn::add(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::sub, E1, E2> operator-(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::sub, E1, E2> operator-(E1&& e1, E2&& e2)
 {
     return { fn::sub(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::mul, E1, E2> operator*(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::mul, E1, E2> operator*(E1&& e1, E2&& e2)
 {
     return { fn::mul(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::div, E1, E2> operator/(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::div, E1, E2> operator/(E1&& e1, E2&& e2)
 {
     return { fn::div(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_INTRINSIC expression_function<fn::mod, E1, E2> operator%(E1&& e1, E2&& e2)
+{
+    return { fn::mod(), std::forward<E1>(e1), std::forward<E2>(e2) };
+}
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::bitwiseand, E1, E2> operator&(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::bitwiseand, E1, E2> operator&(E1&& e1, E2&& e2)
 {
     return { fn::bitwiseand(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::bitwiseor, E1, E2> operator|(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::bitwiseor, E1, E2> operator|(E1&& e1, E2&& e2)
 {
     return { fn::bitwiseor(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::bitwisexor, E1, E2> operator^(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::bitwisexor, E1, E2> operator^(E1&& e1, E2&& e2)
 {
     return { fn::bitwisexor(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::shl, E1, E2> operator<<(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::shl, E1, E2> operator<<(E1&& e1, E2&& e2)
 {
     return { fn::shl(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::shr, E1, E2> operator>>(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::shr, E1, E2> operator>>(E1&& e1, E2&& e2)
 {
     return { fn::shr(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
@@ -154,7 +168,7 @@ KFR_INTRINSIC xfunction<fn::shr, E1, E2> operator>>(E1&& e1, E2&& e2)
  * @brief Returns template expression that returns square of x.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::sqr, E1> sqr(E1&& x)
+KFR_INTRINSIC expression_function<fn::sqr, E1> sqr(E1&& x)
 {
     return { fn::sqr(), std::forward<E1>(x) };
 }
@@ -163,112 +177,330 @@ KFR_INTRINSIC xfunction<fn::sqr, E1> sqr(E1&& x)
  * @brief Returns template expression that returns cube of x.
  */
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::cub, E1> cub(E1&& x)
+KFR_INTRINSIC expression_function<fn::cub, E1> cub(E1&& x)
 {
     return { fn::cub(), std::forward<E1>(x) };
 }
 
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::pow2, E1> pow2(E1&& x)
+KFR_INTRINSIC expression_function<fn::pow2, E1> pow2(E1&& x)
 {
     return { fn::pow2(), std::forward<E1>(x) };
 }
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::pow3, E1> pow3(E1&& x)
+KFR_INTRINSIC expression_function<fn::pow3, E1> pow3(E1&& x)
 {
     return { fn::pow3(), std::forward<E1>(x) };
 }
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::pow4, E1> pow4(E1&& x)
+KFR_INTRINSIC expression_function<fn::pow4, E1> pow4(E1&& x)
 {
     return { fn::pow4(), std::forward<E1>(x) };
 }
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::pow5, E1> pow5(E1&& x)
+KFR_INTRINSIC expression_function<fn::pow5, E1> pow5(E1&& x)
 {
     return { fn::pow5(), std::forward<E1>(x) };
 }
 
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::neg, E1> operator-(E1&& e1)
+KFR_INTRINSIC expression_function<fn::neg, E1> operator-(E1&& e1)
 {
     return { fn::neg(), std::forward<E1>(e1) };
 }
 
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::bitwisenot, E1> operator~(E1&& e1)
+KFR_INTRINSIC expression_function<fn::bitwisenot, E1> operator~(E1&& e1)
 {
     return { fn::bitwisenot(), std::forward<E1>(e1) };
 }
 
 /// @brief Constructs complex value from real and imaginary parts
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::make_complex, E1, E2> make_complex(E1&& re, E2&& im)
+KFR_INTRINSIC expression_function<fn::make_complex, E1, E2> make_complex(E1&& re, E2&& im)
 {
     return { fn::make_complex{}, std::forward<E1>(re), std::forward<E2>(im) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::equal, E1, E2> operator==(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::equal, E1, E2> operator==(E1&& e1, E2&& e2)
 {
     return { fn::equal(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::notequal, E1, E2> operator!=(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::notequal, E1, E2> operator!=(E1&& e1, E2&& e2)
 {
     return { fn::notequal(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::less, E1, E2> operator<(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::less, E1, E2> operator<(E1&& e1, E2&& e2)
 {
     return { fn::less(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::greater, E1, E2> operator>(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::greater, E1, E2> operator>(E1&& e1, E2&& e2)
 {
     return { fn::greater(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::lessorequal, E1, E2> operator<=(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::lessorequal, E1, E2> operator<=(E1&& e1, E2&& e2)
 {
     return { fn::lessorequal(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-KFR_INTRINSIC xfunction<fn::greaterorequal, E1, E2> operator>=(E1&& e1, E2&& e2)
+KFR_INTRINSIC expression_function<fn::greaterorequal, E1, E2> operator>=(E1&& e1, E2&& e2)
 {
     return { fn::greaterorequal(), std::forward<E1>(e1), std::forward<E2>(e2) };
 }
 
 /// @brief Returns the real part of the complex value
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::real, E1> real(E1&& x)
+KFR_INTRINSIC expression_function<fn::real, E1> real(E1&& x)
 {
     return { fn::real{}, std::forward<E1>(x) };
 }
 
 /// @brief Returns the imaginary part of the complex value
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_INTRINSIC xfunction<fn::imag, E1> imag(E1&& x)
+KFR_INTRINSIC expression_function<fn::imag, E1> imag(E1&& x)
 {
     return { fn::imag{}, std::forward<E1>(x) };
 }
 
 /// @brief Returns template expression that returns the complex conjugate of the complex number x
 template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
-KFR_FUNCTION xfunction<fn::cconj, E1> cconj(E1&& x)
+KFR_FUNCTION expression_function<fn::cconj, E1> cconj(E1&& x)
 {
     return { fn::cconj(), std::forward<E1>(x) };
 }
 
 template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
-CMT_INTRINSIC xfunction<fn::interleave, E1, E2> interleave(E1&& x, E2&& y)
+CMT_INTRINSIC expression_function<fn::interleave, E1, E2> interleave(E1&& x, E2&& y)
 {
     return { fn::interleave(), std::forward<E1>(x), std::forward<E2>(y) };
 }
 
+/**
+ * @brief Returns template expression that returns x if m is true, otherwise return y. Order of the arguments
+ * is same as in ternary operator.
+ */
+template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
+KFR_FUNCTION expression_function<fn::select, E1, E2, E3> select(E1&& m, E2&& x, E3&& y)
+{
+    return { fn::select(), std::forward<E1>(m), std::forward<E2>(x), std::forward<E3>(y) };
+}
+
+/**
+ * @brief Returns template expression that returns the absolute value of x.
+ */
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::abs, E1> abs(E1&& x)
+{
+    return { fn::abs(), std::forward<E1>(x) };
+}
+
+/**
+ * @brief Returns the smaller of two values. Accepts and returns expressions.
+ */
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_FUNCTION expression_function<fn::min, E1, E2> min(E1&& x, E2&& y)
+{
+    return { fn::min(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+/**
+ * @brief Returns the greater of two values. Accepts and returns expressions.
+ */
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_FUNCTION expression_function<fn::max, E1, E2> max(E1&& x, E2&& y)
+{
+    return { fn::max(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+/**
+ * @brief Returns the smaller in magnitude of two values. Accepts and returns expressions.
+ */
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_FUNCTION expression_function<fn::absmin, E1, E2> absmin(E1&& x, E2&& y)
+{
+    return { fn::absmin(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+/**
+ * @brief Returns the greater in magnitude of two values. Accepts and returns expressions.
+ */
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_FUNCTION expression_function<fn::absmax, E1, E2> absmax(E1&& x, E2&& y)
+{
+    return { fn::absmax(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+/// @brief Returns the largest integer value not greater than x. Accepts and returns expressions.
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::floor, E1> floor(E1&& x)
+{
+    return { fn::floor(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::ceil, E1> ceil(E1&& x)
+{
+    return { fn::ceil(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::round, E1> round(E1&& x)
+{
+    return { fn::round(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::trunc, E1> trunc(E1&& x)
+{
+    return { fn::trunc(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::fract, E1> fract(E1&& x)
+{
+    return { fn::fract(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::ifloor, E1> ifloor(E1&& x)
+{
+    return { fn::ifloor(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::iceil, E1> iceil(E1&& x)
+{
+    return { fn::iceil(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::iround, E1> iround(E1&& x)
+{
+    return { fn::iround(), std::forward<E1>(x) };
+}
+
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::itrunc, E1> itrunc(E1&& x)
+{
+    return { fn::itrunc(), std::forward<E1>(x) };
+}
+
+/// @brief Creates an expression that returns the first argument clamped to a range [lo, hi]
+template <typename E1, typename E2, typename E3, KFR_ACCEPT_EXPRESSIONS(E1, E2, E3)>
+KFR_FUNCTION expression_function<fn::clamp, E1, E2, E3> clamp(E1&& x, E2&& lo, E3&& hi)
+{
+    return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(lo), std::forward<E3>(hi) };
+}
+
+/// @brief Creates an expression that returns the first argument clamped to a range [0, hi]
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_FUNCTION expression_function<fn::clamp, E1, E2> clamp(E1&& x, E2&& hi)
+{
+    return { fn::clamp(), std::forward<E1>(x), std::forward<E2>(hi) };
+}
+
+/// @brief Creates an expression that adds two arguments using saturation
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_INTRINSIC expression_function<fn::satadd, E1, E2> satadd(E1&& x, E2&& y)
+{
+    return { fn::satadd(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+/// @brief Creates an expression that subtracts two arguments using saturation
+template <typename E1, typename E2, KFR_ACCEPT_EXPRESSIONS(E1, E2)>
+KFR_INTRINSIC expression_function<fn::satsub, E1, E2> satsub(E1&& x, E2&& y)
+{
+    return { fn::satsub(), std::forward<E1>(x), std::forward<E2>(y) };
+}
+
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator+=(E1& e1, E2&& e2)
+{
+    process(e1, operator+(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator-=(E1& e1, E2&& e2)
+{
+    process(e1, operator-(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator*=(E1& e1, E2&& e2)
+{
+    process(e1, operator*(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator*=(E1&& e1, E2&& e2)
+{
+    process(e1, operator*(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator/=(E1& e1, E2&& e2)
+{
+    process(e1, operator/(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator%=(E1& e1, E2&& e2)
+{
+    process(e1, operator%(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator|=(E1& e1, E2&& e2)
+{
+    process(e1, operator|(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator&=(E1& e1, E2&& e2)
+{
+    process(e1, operator&(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator^=(E1& e1, E2&& e2)
+{
+    process(e1, operator^(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator<<=(E1& e1, E2&& e2)
+{
+    process(e1, operator<<(e1, e2));
+    return e1;
+}
+template <typename E1, typename E2, enable_if_input_output_expression<E1>* = nullptr,
+          enable_if_input_expression<E2>* = nullptr>
+KFR_INTRINSIC E1& operator>>=(E1& e1, E2&& e2)
+{
+    process(e1, operator>>(e1, e2));
+    return e1;
+}
+
+} // namespace CMT_ARCH_NAME
+
 } // namespace kfr
diff --git a/include/kfr/base/state_holder.hpp b/include/kfr/base/state_holder.hpp
@@ -0,0 +1,49 @@
+/** @addtogroup fir
+ *  @{
+ */
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+#pragma once
+
+#include "../cident.h"
+#include <functional>
+
+namespace kfr
+{
+
+template <typename T, bool Stateless>
+struct state_holder
+{
+    constexpr state_holder()                    = delete;
+    constexpr state_holder(const state_holder&) = default;
+    constexpr state_holder(state_holder&&)      = default;
+    constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {}
+    constexpr state_holder(std::reference_wrapper<const T> state) CMT_NOEXCEPT : s(state) {}
+    T s;
+
+    const T* operator->() const { return &s; }
+    T* operator->() { return &s; }
+    const T& operator*() const { return s; }
+    T& operator*() { return s; }
+};
+
+template <typename T>
+struct state_holder<T, true>
+{
+    constexpr state_holder()                    = delete;
+    constexpr state_holder(const state_holder&) = default;
+    constexpr state_holder(state_holder&&)      = default;
+    constexpr state_holder(T& state) CMT_NOEXCEPT : s(state) {}
+    constexpr state_holder(std::reference_wrapper<T> state) CMT_NOEXCEPT : s(state) {}
+    T& s;
+
+    const T* operator->() const { return &s; }
+    T* operator->() { return &s; }
+    const T& operator*() const { return s; }
+    T& operator*() { return s; }
+};
+
+} // namespace kfr
diff --git a/include/kfr/base/tensor.hpp b/include/kfr/base/tensor.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
@@ -29,13 +29,15 @@
 
 #include "../cometa/array.hpp"
 
-#include "../simd/logical.hpp"
-#include "../simd/min_max.hpp"
 #include "../simd/horizontal.hpp"
 #include "../simd/impl/function.hpp"
+#include "../simd/logical.hpp"
+#include "../simd/min_max.hpp"
 #include "../simd/read_write.hpp"
 #include "../simd/types.hpp"
+#include "expression.hpp"
 #include "memory.hpp"
+#include "shape.hpp"
 
 CMT_PRAGMA_MSVC(warning(push))
 CMT_PRAGMA_MSVC(warning(disable : 4324))
@@ -822,7 +824,7 @@ public:
             shape_type index{ 0 };
             std::string open_filler(open.size(), ' ');
             std::string separator_trimmed = separator.substr(0, 1 + separator.find_last_not_of(" \t"));
-            int columns = 0;
+            int columns                   = 0;
             do
             {
                 std::string str = as_string(wrap_fmt(access(index), ctype<Fmt>));
@@ -999,30 +1001,6 @@ tensor<typename Traits::value_type, Traits::dims> trender(const E& expr, shape<T
 
 namespace cometa
 {
-template <kfr::index_t dims>
-struct representation<kfr::shape<dims>>
-{
-    using type = std::string;
-    static std::string get(const kfr::shape<dims>& value)
-    {
-        if constexpr (dims == 0)
-        {
-            return "()";
-        }
-        else
-        {
-            std::string s;
-            for (kfr::index_t i = 0; i < dims; ++i)
-            {
-                if (CMT_LIKELY(i > 0))
-                    s += ", ";
-                s += as_string(value[i]);
-            }
-            return s;
-        }
-    }
-};
-
 template <typename T, kfr::index_t dims>
 struct representation<kfr::tensor<T, dims>>
 {
diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp
@@ -97,30 +97,12 @@ struct univector_base<T, Class, true>
     template <typename Input, KFR_ACCEPT_EXPRESSIONS(Input)>
     KFR_MEM_INTRINSIC Class& operator=(Input&& input)
     {
+        constexpr index_t dims = expression_dims<Input>;
+        static_assert(dims <= 1, "univector accepts only expressions with dims <= 1");
         assign_expr(std::forward<Input>(input));
         return *derived_cast<Class>(this);
     }
 
-#define KFR_UVEC_ASGN_OP(aop, op)                                                                            \
-    template <typename Input>                                                                                \
-    KFR_MEM_INTRINSIC Class& aop(Input&& input)                                                              \
-    {                                                                                                        \
-        assign_expr(*derived_cast<Class>(this) op std::forward<Input>(input));                               \
-        return *derived_cast<Class>(this);                                                                   \
-    }
-    KFR_UVEC_ASGN_OP(operator+=, +)
-    KFR_UVEC_ASGN_OP(operator-=, -)
-    KFR_UVEC_ASGN_OP(operator*=, *)
-    KFR_UVEC_ASGN_OP(operator/=, /)
-    KFR_UVEC_ASGN_OP(operator%=, %)
-
-    KFR_UVEC_ASGN_OP(operator&=, &)
-    KFR_UVEC_ASGN_OP(operator|=, |)
-    KFR_UVEC_ASGN_OP(operator^=, ^)
-
-    KFR_UVEC_ASGN_OP(operator<<=, <<)
-    KFR_UVEC_ASGN_OP(operator>>=, >>)
-
     /// @brief Returns subrange of the vector.
     /// If start is greater or equal to this->size, returns empty univector
     /// If requested size is greater than this->size, returns only available elements
@@ -336,13 +318,14 @@ struct alignas(platform<>::maximum_vector_alignment) univector
 
     constexpr univector() CMT_NOEXCEPT_SPEC(noexcept(std::array<T, Size>())) = default;
     constexpr univector(size_t, const T& value) { std::fill(this->begin(), this->end(), value); }
-    constexpr static bool size_known   = true;
-    constexpr static bool is_array     = true;
-    constexpr static bool is_array_ref = false;
-    constexpr static bool is_vector    = false;
-    constexpr static bool is_aligned   = true;
-    constexpr static bool is_pod       = kfr::is_pod<T>;
-    using value_type                   = T;
+    constexpr static bool size_known    = true;
+    constexpr static size_t static_size = Size;
+    constexpr static bool is_array      = true;
+    constexpr static bool is_array_ref  = false;
+    constexpr static bool is_vector     = false;
+    constexpr static bool is_aligned    = true;
+    constexpr static bool is_pod        = kfr::is_pod<T>;
+    using value_type                    = T;
 
     value_type get(size_t index, value_type fallback_value) const CMT_NOEXCEPT
     {
@@ -384,6 +367,10 @@ struct univector<T, tag_array_ref> : array_ref<T>,
     constexpr univector(univector<U, Tag>& other) : array_ref<T>(other.data(), other.size())
     {
     }
+    template <typename U, univector_tag Tag, KFR_ENABLE_IF(is_same<remove_const<T>, U>&& is_const<T>)>
+    constexpr univector(univector<U, Tag>&& other) : array_ref<T>(other.data(), other.size())
+    {
+    }
     void resize(size_t) CMT_NOEXCEPT {}
     constexpr static bool size_known   = false;
     constexpr static bool is_array     = false;
@@ -417,7 +404,12 @@ struct univector<T, tag_dynamic_vector>
     univector(Input&& input)
     {
         static_assert(!is_infinite<Input>, "Dynamically sized vector requires finite input expression");
-        this->resize(input.size());
+        constexpr index_t dims = expression_dims<Input>;
+        static_assert(dims <= 1, "univector accepts only expressions with dims <= 1");
+        if constexpr (dims > 0)
+        {
+            this->resize(shapeof(input).front());
+        }
         this->assign_expr(std::forward<Input>(input));
     }
     constexpr univector() CMT_NOEXCEPT_SPEC(noexcept(std::vector<T, allocator<T>>())) = default;
@@ -452,13 +444,44 @@ struct univector<T, tag_dynamic_vector>
     template <typename Input, KFR_ACCEPT_EXPRESSIONS(Input)>
     KFR_MEM_INTRINSIC univector& operator=(Input&& input)
     {
-        if (input.size() != infinite_size)
-            this->resize(input.size());
+        constexpr index_t dims = expression_dims<Input>;
+        static_assert(dims <= 1, "univector accepts only expressions with dims <= 1");
+        if constexpr (dims > 0)
+        {
+            if (shapeof(input).front() != infinite_size)
+                this->resize(shapeof(input).front());
+        }
         this->assign_expr(std::forward<Input>(input));
         return *this;
     }
 };
 
+template <typename T, univector_tag Tag>
+struct expression_traits<univector<T, Tag>> : public expression_traits_defaults
+{
+    using value_type             = std::remove_const_t<T>;
+    constexpr static size_t dims = 1;
+    constexpr static shape<dims> shapeof(const univector<T, Tag>& u) { return shape<1>(u.size()); }
+    constexpr static shape<dims> shapeof()
+    {
+        if constexpr (univector<T, Tag>::size_known)
+            return shape<1>{ univector<T, Tag>::static_size };
+        else
+            return shape<1>{ undefined_size };
+    }
+};
+
+template <typename T, univector_tag T1, univector_tag T2>
+KFR_FUNCTION bool operator==(const univector<T, T1>& x, const univector<T, T2>& y)
+{
+    return std::equal(x.begin(), x.end(), y.begin(), y.end());
+}
+template <typename T, univector_tag T1, univector_tag T2>
+KFR_FUNCTION bool operator!=(const univector<T, T1>& x, const univector<T, T2>& y)
+{
+    return !operator==(x, y);
+}
+
 /// @brief Alias for ``univector<T, tag_array_ref>``;
 template <typename T>
 using univector_ref = univector<T, tag_array_ref>;
@@ -586,35 +609,36 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, univector_tag Tag, size_t N>
-KFR_INTRINSIC vec<T, N> get_elements(const univector<T, Tag>& self, const shape<1>& index,
-                                     const axis_params<0, N>&)
+KFR_INTRINSIC vec<std::remove_const_t<T>, N> get_elements(const univector<T, Tag>& self,
+                                                          const shape<1>& index, const axis_params<0, N>&)
 {
     const T* data = self.data();
-    return static_cast<vec<U, N>>(read<N>(ptr_cast<T>(data) + index.front()));
+    return read<N>(ptr_cast<T>(data) + index.front());
 }
 
-template <typename T, univector_tag Tag, size_t N>
+template <typename T, univector_tag Tag, size_t N, KFR_ENABLE_IF(!std::is_const_v<T>)>
 KFR_INTRINSIC void set_elements(univector<T, Tag>& self, const shape<1>& index, const axis_params<0, N>&,
                                 const identity<vec<T, N>>& value)
 {
     T* data = self.data();
-    write(ptr_cast<T>(data) + index.front(), vec<T, N>(value));
+    write(ptr_cast<T>(data) + index.front(), value);
 }
 
 /// @brief Converts an expression to univector
-template <typename Expr, typename T = value_type_of<Expr>>
+template <typename Expr, typename T = expression_value_type<Expr>>
 KFR_INTRINSIC univector<T> render(Expr&& expr)
 {
+    static_assert(expression_dims<Expr> == 1);
     static_assert(!is_infinite<Expr>,
                   "render: Can't process infinite expressions. Pass size as a second argument to render.");
     univector<T> result;
-    result.resize(expr.size());
+    result.resize(shapeof(expr).front());
     result = expr;
     return result;
 }
 
 /// @brief Converts an expression to univector
-template <typename Expr, typename T = value_type_of<Expr>>
+template <typename Expr, typename T = expression_value_type<Expr>>
 KFR_INTRINSIC univector<T> render(Expr&& expr, size_t size, size_t offset = 0)
 {
     univector<T> result;
@@ -624,7 +648,7 @@ KFR_INTRINSIC univector<T> render(Expr&& expr, size_t size, size_t offset = 0)
 }
 
 /// @brief Converts an expression to univector
-template <typename Expr, size_t Size, typename T = value_type_of<Expr>>
+template <typename Expr, size_t Size, typename T = expression_value_type<Expr>>
 KFR_INTRINSIC univector<T, Size> render(Expr&& expr, csize_t<Size>)
 {
     univector<T, Size> result;
diff --git a/include/kfr/capi.h b/include/kfr/capi.h
@@ -238,7 +238,7 @@ typedef double kfr_c64;
                                              size_t size);
     KFR_API_SPEC void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input,
                                              size_t size);
-                                             
+
     KFR_API_SPEC void kfr_filter_reset_f32(KFR_FILTER_F32* plan);
     KFR_API_SPEC void kfr_filter_reset_f64(KFR_FILTER_F64* plan);
 
diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp
@@ -149,6 +149,9 @@ using remove_const = typename std::remove_const<T>::type;
 template <typename T>
 using underlying_type = typename std::underlying_type<T>::type;
 
+template <typename T1, typename T2>
+using or_type = std::conditional_t<std::is_same_v<T1, void>, T2, T1>;
+
 template <typename T>
 constexpr inline bool is_pod = std::is_pod<T>::value || details::is_pod_impl<T>::value;
 
@@ -267,7 +270,8 @@ struct compound_type_traits<std::pair<T, T>>
     using deep_rebind = std::pair<typename compound_type_traits<subtype>::template deep_rebind<U>,
                                   typename compound_type_traits<subtype>::template deep_rebind<U>>;
 
-    CMT_MEM_INTRINSIC static constexpr const subtype& at(const std::pair<subtype, subtype>& value, size_t index)
+    CMT_MEM_INTRINSIC static constexpr const subtype& at(const std::pair<subtype, subtype>& value,
+                                                         size_t index)
     {
         return index == 0 ? value.first : value.second;
     }
@@ -684,12 +688,12 @@ constexpr CMT_INTRINSIC Ret cfilter(cvals_t<T, vals...>, cvals_t<bool, flags...>
 #define CMT_UN_OP(op)                                                                                        \
     template <typename T1, T1... vals1,                                                                      \
               typename Ret = cvals_t<decltype(op std::declval<T1>()), (op vals1)...>>                        \
-    constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>)                                                  \
+    constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>)                                           \
     {                                                                                                        \
         return Ret{};                                                                                        \
     }                                                                                                        \
     template <typename T1, T1 val1, typename Ret = cval_t<decltype(op std::declval<T1>()), (op val1)>>       \
-    constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>)                                                       \
+    constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>)                                                \
     {                                                                                                        \
         return Ret{};                                                                                        \
     }
@@ -698,21 +702,21 @@ constexpr CMT_INTRINSIC Ret cfilter(cvals_t<T, vals...>, cvals_t<bool, flags...>
     template <typename T1, T1... vals1, typename T2, T2... vals2,                                            \
               typename Ret =                                                                                 \
                   cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op vals2)...>>          \
-    constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cvals_t<T2, vals2...>)                           \
+    constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cvals_t<T2, vals2...>)                    \
     {                                                                                                        \
         return Ret{};                                                                                        \
     }                                                                                                        \
     template <typename T1, T1... vals1, typename T2, T2 val2,                                                \
               typename Ret =                                                                                 \
                   cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (vals1 op val2)...>>           \
-    constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cval_t<T2, val2>)                                \
+    constexpr CMT_INTRINSIC Ret operator op(cvals_t<T1, vals1...>, cval_t<T2, val2>)                         \
     {                                                                                                        \
         return Ret{};                                                                                        \
     }                                                                                                        \
     template <typename T1, T1 val1, typename T2, T2... vals2,                                                \
               typename Ret =                                                                                 \
                   cvals_t<decltype(std::declval<T1>() op std::declval<T2>()), (val1 op vals2)...>>           \
-    constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>, cvals_t<T2, vals2...>)                                \
+    constexpr CMT_INTRINSIC Ret operator op(cval_t<T1, val1>, cvals_t<T2, vals2...>)                         \
     {                                                                                                        \
         return Ret{};                                                                                        \
     }
@@ -2120,6 +2124,9 @@ CMT_INTRINSIC constexpr Tout pack_elements(Arg x, Args... args)
            (pack_elements<Tout, Arg>(args...) << (sizeof(Arg) * 8));
 }
 
+template <typename T, bool reference>
+using value_or_ref = std::conditional_t<reference, const T&, T>;
+
 enum class special_constant
 {
     default_constructed,
diff --git a/include/kfr/cometa/cstring.hpp b/include/kfr/cometa/cstring.hpp
@@ -104,9 +104,9 @@ CMT_INTRINSIC cstring<N1 - Nfrom + Nto> str_replace_impl(size_t pos, const cstri
 {
     if (pos == size_t(-1))
         stop_constexpr();
-    return { { (indices < pos
-                    ? str[indices]
-                    : (indices < pos + Nto - 1) ? to[indices - pos] : str[indices - Nto + Nfrom])...,
+    return { { (indices < pos               ? str[indices]
+                : (indices < pos + Nto - 1) ? to[indices - pos]
+                                            : str[indices - Nto + Nfrom])...,
                0 } };
 }
 } // namespace details
diff --git a/include/kfr/cometa/function.hpp b/include/kfr/cometa/function.hpp
@@ -145,8 +145,8 @@ inline function<Ret(Args...)> cdispatch(cvals_t<T, v0, values...>, identity<T> v
 {
     if (value == v0)
     {
-        return [=](Args... args)
-                   CMT_INLINE_LAMBDA -> Ret { return fn(cval_t<T, v0>(), std::forward<Args>(args)...); };
+        return [=](Args... args) CMT_INLINE_LAMBDA -> Ret
+        { return fn(cval_t<T, v0>(), std::forward<Args>(args)...); };
     }
     else
     {
diff --git a/include/kfr/cometa/numeric.hpp b/include/kfr/cometa/numeric.hpp
@@ -124,13 +124,11 @@ constexpr inline datatype operator&(datatype x, datatype y)
 }
 
 template <typename T>
-constexpr inline datatype typeclass = is_floating_point<typename compound_type_traits<T>::subtype>
-                                          ? datatype::f
-                                          : is_integral<typename compound_type_traits<T>::subtype>
-                                                ? (is_unsigned<typename compound_type_traits<T>::subtype>
-                                                       ? datatype::u
-                                                       : datatype::i)
-                                                : datatype();
+constexpr inline datatype typeclass =
+    is_floating_point<typename compound_type_traits<T>::subtype> ? datatype::f
+    : is_integral<typename compound_type_traits<T>::subtype>
+        ? (is_unsigned<typename compound_type_traits<T>::subtype> ? datatype::u : datatype::i)
+        : datatype();
 
 template <typename T>
 constexpr inline bool is_f_class = typeclass<T> == datatype::f;
diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp
@@ -80,18 +80,18 @@ namespace internal
 {
 /// @brief Utility class to abstract real/complex differences
 template <typename T>
-struct dft_conv_plan: public dft_plan_real<T>
+struct dft_conv_plan : public dft_plan_real<T>
 {
     dft_conv_plan(size_t size) : dft_plan_real<T>(size, dft_pack_format::Perm) {}
-    
+
     size_t csize() const { return this->size / 2; }
 };
 
 template <typename T>
-struct dft_conv_plan<complex<T>>: public dft_plan<T>
+struct dft_conv_plan<complex<T>> : public dft_plan<T>
 {
     dft_conv_plan(size_t size) : dft_plan<T>(size) {}
-    
+
     size_t csize() const { return this->size; }
 };
 } // namespace internal
@@ -118,7 +118,7 @@ protected:
 
     using ST                       = subtype<T>;
     static constexpr auto real_fft = !std::is_same<T, complex<ST>>::value;
-    using plan_t = internal::dft_conv_plan<T>;
+    using plan_t                   = internal::dft_conv_plan<T>;
 
     // Length of filter data.
     size_t data_size;
diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp
@@ -163,7 +163,7 @@ struct dft_plan
         }
     }
     explicit dft_plan(size_t size, dft_order order = dft_order::normal)
-        :dft_plan(cpu_t::runtime, size, order)
+        : dft_plan(cpu_t::runtime, size, order)
     {
     }
 #else
@@ -174,7 +174,6 @@ struct dft_plan
     }
 #endif
 
-
     void dump() const
     {
         for (const std::unique_ptr<dft_stage<T>>& s : stages)
@@ -291,11 +290,11 @@ protected:
             }
             else
             {
-                size_t offset   = 0;
+                size_t offset = 0;
                 while (offset < this->size)
                 {
                     stages[depth]->execute(cbool<inverse>, select_out(depth, out, scratch) + offset,
-                                       select_in(depth, out, in, scratch, in_scratch) + offset, temp);
+                                           select_in(depth, out, in, scratch, in_scratch) + offset, temp);
                     offset += stages[depth]->stage_size;
                 }
                 depth++;
diff --git a/include/kfr/dft/impl/convolution-impl.cpp b/include/kfr/dft/impl/convolution-impl.cpp
@@ -23,9 +23,9 @@
   disclosing the source code of your own applications.
   See https://www.kfrlib.com for details.
  */
-#include "../convolution.hpp"
-#include "../../simd/complex.hpp"
 #include "../../base/simd_expressions.hpp"
+#include "../../simd/complex.hpp"
+#include "../convolution.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/dft/impl/dft-impl.hpp b/include/kfr/dft/impl/dft-impl.hpp
@@ -25,9 +25,9 @@
  */
 #pragma once
 
-#include "dft-fft.hpp"
-#include "../../base/simd_expressions.hpp"
 #include "../../base/math_expressions.hpp"
+#include "../../base/simd_expressions.hpp"
+#include "dft-fft.hpp"
 
 CMT_PRAGMA_GNU(GCC diagnostic push)
 #if CMT_HAS_WARNING("-Wshadow")
@@ -96,9 +96,9 @@ struct dft_stage_fixed_impl : dft_stage<T>
 
     constexpr static size_t rradix = fixed_radix;
 
-    constexpr static size_t width = fixed_radix >= 7
-                                        ? fft_vector_width<T> / 2
-                                        : fixed_radix >= 4 ? fft_vector_width<T> : fft_vector_width<T> * 2;
+    constexpr static size_t width = fixed_radix >= 7   ? fft_vector_width<T> / 2
+                                    : fixed_radix >= 4 ? fft_vector_width<T>
+                                                       : fft_vector_width<T> * 2;
     virtual void do_initialize(size_t) override final { dft_stage_fixed_initialize(this, width); }
 
     DFT_STAGE_FN
@@ -132,9 +132,9 @@ struct dft_stage_fixed_final_impl : dft_stage<T>
         this->recursion   = false;
         this->can_inplace = false;
     }
-    constexpr static size_t width = fixed_radix >= 7
-                                        ? fft_vector_width<T> / 2
-                                        : fixed_radix >= 4 ? fft_vector_width<T> : fft_vector_width<T> * 2;
+    constexpr static size_t width = fixed_radix >= 7   ? fft_vector_width<T> / 2
+                                    : fixed_radix >= 4 ? fft_vector_width<T>
+                                                       : fft_vector_width<T> * 2;
 
     DFT_STAGE_FN
     template <bool inverse>
@@ -434,7 +434,8 @@ protected:
     {
         cswitch(
             dft_radices, radices[0],
-            [&](auto first_radix) {
+            [&](auto first_radix)
+            {
                 if (count == 3)
                 {
                     dft_permute(out, in, radices[2], radices[1], first_radix);
@@ -449,7 +450,8 @@ protected:
                     }
                 }
             },
-            [&]() {
+            [&]()
+            {
                 if (count == 3)
                 {
                     dft_permute(out, in, radices[2], radices[1], radices[0]);
@@ -473,14 +475,14 @@ void prepare_dft_stage(dft_plan<T>* self, size_t radix, size_t iterations, size_
 {
     return cswitch(
         dft_radices, radix,
-        [self, iterations, blocks](auto radix) CMT_INLINE_LAMBDA {
+        [self, iterations, blocks](auto radix) CMT_INLINE_LAMBDA
+        {
             add_stage<conditional<is_final, intrinsics::dft_stage_fixed_final_impl<T, val_of(radix)>,
                                   intrinsics::dft_stage_fixed_impl<T, val_of(radix)>>>(self, radix,
                                                                                        iterations, blocks);
         },
-        [self, radix, iterations, blocks]() {
-            add_stage<intrinsics::dft_stage_generic_impl<T, is_final>>(self, radix, iterations, blocks);
-        });
+        [self, radix, iterations, blocks]()
+        { add_stage<intrinsics::dft_stage_generic_impl<T, is_final>>(self, radix, iterations, blocks); });
 }
 
 template <typename T>
@@ -502,13 +504,15 @@ void init_dft(dft_plan<T>* self, size_t size, dft_order)
         int radices[32]                = { 0 };
         size_t radices_size            = 0;
 
-        cforeach(dft_radices[csizeseq<dft_radices.size(), dft_radices.size() - 1, -1>], [&](auto radix) {
-            while (cur_size && cur_size % val_of(radix) == 0)
-            {
-                count[val_of(radix)]++;
-                cur_size /= val_of(radix);
-            }
-        });
+        cforeach(dft_radices[csizeseq<dft_radices.size(), dft_radices.size() - 1, -1>],
+                 [&](auto radix)
+                 {
+                     while (cur_size && cur_size % val_of(radix) == 0)
+                     {
+                         count[val_of(radix)]++;
+                         cur_size /= val_of(radix);
+                     }
+                 });
 
         if (cur_size >= 101)
         {
diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp
@@ -951,17 +951,21 @@ KFR_INTRINSIC void init_fft(dft_plan<T>* self, size_t size, dft_order)
     const size_t log2n = ilog2(size);
     cswitch(
         csizes_t<1, 2, 3, 4, 5, 6, 7, 8, 9, 10>(), log2n,
-        [&](auto log2n) {
+        [&](auto log2n)
+        {
             (void)log2n;
             constexpr size_t log2nv = val_of(decltype(log2n)());
             add_stage<intrinsics::fft_specialization<T, log2nv>>(self, size);
         },
-        [&]() {
-            cswitch(cfalse_true, is_even(log2n), [&](auto is_even) {
-                make_fft(self, size, is_even, ctrue);
-                constexpr size_t is_evenv = val_of(decltype(is_even)());
-                add_stage<intrinsics::fft_reorder_stage_impl<T, is_evenv>>(self, size);
-            });
+        [&]()
+        {
+            cswitch(cfalse_true, is_even(log2n),
+                    [&](auto is_even)
+                    {
+                        make_fft(self, size, is_even, ctrue);
+                        constexpr size_t is_evenv = val_of(decltype(is_even)());
+                        add_stage<intrinsics::fft_reorder_stage_impl<T, is_evenv>>(self, size);
+                    });
         });
 }
 
@@ -970,15 +974,19 @@ KFR_INTRINSIC void generate_real_twiddles(dft_plan_real<T>* self, size_t size)
 {
     using namespace intrinsics;
     constexpr size_t width = vector_width<T> * 2;
-    block_process(size / 4, csizes_t<width, 1>(), [=](size_t i, auto w) {
-        constexpr size_t width = val_of(decltype(w)());
-        cwrite<width>(self->rtwiddle.data() + i,
-                      cossin(dup(-constants<T>::pi * ((enumerate<T, width>() + i + size / 4) / (size / 2)))));
-    });
+    block_process(size / 4, csizes_t<width, 1>(),
+                  [=](size_t i, auto w)
+                  {
+                      constexpr size_t width = val_of(decltype(w)());
+                      cwrite<width>(self->rtwiddle.data() + i,
+                                    cossin(dup(-constants<T>::pi *
+                                               ((enumerate<T, width>() + i + size / 4) / (size / 2)))));
+                  });
 }
 
 template <typename T>
-#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) && ((defined __APPLE__) || (__clang_major__ == 8))
+#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) &&                                   \
+    ((defined __APPLE__) || (__clang_major__ == 8))
 // Fix for Clang 8.0 bug (x32 with FMA instructions)
 // Xcode has different versions but x86 is very rare on macOS these days, 
 // so disable inlining and FMA for x32 macOS and Clang 8.x
@@ -997,20 +1005,22 @@ to_fmt(size_t real_size, const complex<T>* rtwiddle, complex<T>* out, const comp
     const cvec<T, 1> dc    = cread<1>(out);
     const size_t count     = csize / 2;
 
-    block_process(count - 1, csizes_t<width, 1>(), [&](size_t i, auto w) {
-        i++;
-        constexpr size_t width    = val_of(decltype(w)());
-        constexpr size_t widthm1  = width - 1;
-        const cvec<T, width> tw   = cread<width>(rtwiddle + i);
-        const cvec<T, width> fpk  = cread<width>(in + i);
-        const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1)));
+    block_process(count - 1, csizes_t<width, 1>(),
+                  [&](size_t i, auto w)
+                  {
+                      i++;
+                      constexpr size_t width    = val_of(decltype(w)());
+                      constexpr size_t widthm1  = width - 1;
+                      const cvec<T, width> tw   = cread<width>(rtwiddle + i);
+                      const cvec<T, width> fpk  = cread<width>(in + i);
+                      const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1)));
 
-        const cvec<T, width> f1k = fpk + fpnk;
-        const cvec<T, width> f2k = fpk - fpnk;
-        const cvec<T, width> t   = cmul(f2k, tw);
-        cwrite<width>(out + i, T(0.5) * (f1k + t));
-        cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(T(0.5) * (f1k - t))));
-    });
+                      const cvec<T, width> f1k = fpk + fpnk;
+                      const cvec<T, width> f2k = fpk - fpnk;
+                      const cvec<T, width> t   = cmul(f2k, tw);
+                      cwrite<width>(out + i, T(0.5) * (f1k + t));
+                      cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(T(0.5) * (f1k - t))));
+                  });
 
     {
         size_t k              = csize / 2;
@@ -1030,7 +1040,8 @@ to_fmt(size_t real_size, const complex<T>* rtwiddle, complex<T>* out, const comp
 }
 
 template <typename T>
-#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) && ((defined __APPLE__) || (__clang_major__ == 8))
+#if (defined CMT_ARCH_X32 && defined CMT_ARCH_X86 && defined __clang__) &&                                   \
+    ((defined __APPLE__) || (__clang_major__ == 8))
 // Fix for Clang 8.0 bug (x32 with FMA instructions)
 // Xcode has different versions but x86 is very rare on macOS these days, 
 // so disable inlining and FMA for x32 macOS and Clang 8.x
@@ -1059,20 +1070,22 @@ void from_fmt(size_t real_size, complex<T>* rtwiddle, complex<T>* out, const com
     constexpr size_t width = vector_width<T> * 2;
     const size_t count     = csize / 2;
 
-    block_process(count - 1, csizes_t<width, 1>(), [&](size_t i, auto w) {
-        i++;
-        constexpr size_t width    = val_of(decltype(w)());
-        constexpr size_t widthm1  = width - 1;
-        const cvec<T, width> tw   = cread<width>(rtwiddle + i);
-        const cvec<T, width> fpk  = cread<width>(in + i);
-        const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1)));
+    block_process(count - 1, csizes_t<width, 1>(),
+                  [&](size_t i, auto w)
+                  {
+                      i++;
+                      constexpr size_t width    = val_of(decltype(w)());
+                      constexpr size_t widthm1  = width - 1;
+                      const cvec<T, width> tw   = cread<width>(rtwiddle + i);
+                      const cvec<T, width> fpk  = cread<width>(in + i);
+                      const cvec<T, width> fpnk = reverse<2>(negodd(cread<width>(in + csize - i - widthm1)));
 
-        const cvec<T, width> f1k = fpk + fpnk;
-        const cvec<T, width> f2k = fpk - fpnk;
-        const cvec<T, width> t   = cmul_conj(f2k, tw);
-        cwrite<width>(out + i, f1k + t);
-        cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(f1k - t)));
-    });
+                      const cvec<T, width> f1k = fpk + fpnk;
+                      const cvec<T, width> f2k = fpk - fpnk;
+                      const cvec<T, width> t   = cmul_conj(f2k, tw);
+                      cwrite<width>(out + i, f1k + t);
+                      cwrite<width>(out + csize - i - widthm1, reverse<2>(negodd(f1k - t)));
+                  });
 
     {
         size_t k              = csize / 2;
@@ -1125,12 +1138,15 @@ public:
         constexpr size_t width = vector_width<T> * 2;
         size_t real_size       = this->stage_size;
         complex<T>* rtwiddle   = ptr_cast<complex<T>>(this->data);
-        block_process(real_size / 4, csizes_t<width, 1>(), [=](size_t i, auto w) {
-            constexpr size_t width = val_of(decltype(w)());
-            cwrite<width>(rtwiddle + i,
-                          cossin(dup(-constants<T>::pi *
-                                     ((enumerate<T, width>() + i + real_size / 4) / (real_size / 2)))));
-        });
+        block_process(real_size / 4, csizes_t<width, 1>(),
+                      [=](size_t i, auto w)
+                      {
+                          constexpr size_t width = val_of(decltype(w)());
+                          cwrite<width>(
+                              rtwiddle + i,
+                              cossin(dup(-constants<T>::pi *
+                                         ((enumerate<T, width>() + i + real_size / 4) / (real_size / 2)))));
+                      });
     }
     void do_execute(cdirect_t, complex<T>* out, const complex<T>* in, u8* temp) override
     {
diff --git a/include/kfr/dft/impl/ft.hpp b/include/kfr/dft/impl/ft.hpp
@@ -1092,19 +1092,19 @@ template <typename T, bool inverse = false>
 static constexpr KFR_INTRINSIC cvec<T, 1> tw9_1()
 {
     return { T(0.76604444311897803520239265055541),
-                                  (inverse ? -1 : 1) * T(-0.64278760968653932632264340990727) };
+             (inverse ? -1 : 1) * T(-0.64278760968653932632264340990727) };
 }
 template <typename T, bool inverse = false>
 static constexpr KFR_INTRINSIC cvec<T, 1> tw9_2()
 {
     return { T(0.17364817766693034885171662676931),
-                                  (inverse ? -1 : 1) * T(-0.98480775301220805936674302458952) };
+             (inverse ? -1 : 1) * T(-0.98480775301220805936674302458952) };
 }
 template <typename T, bool inverse = false>
 static constexpr KFR_INTRINSIC cvec<T, 1> tw9_4()
 {
     return { T(-0.93969262078590838405410927732473),
-                                  (inverse ? -1 : 1) * T(-0.34202014332566873304409961468226) };
+             (inverse ? -1 : 1) * T(-0.34202014332566873304409961468226) };
 }
 
 template <size_t N, bool inverse = false, typename T>
@@ -1205,7 +1205,7 @@ KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
     const cvec<T, N> d1 =
         dif1 * tw7i1<T, N, inverse>() + dif2 * tw7i2<T, N, inverse>() + dif3 * tw7i3<T, N, inverse>();
     const cvec<T, N> d2 =
-        dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>()- dif3 * tw7i1<T, N, inverse>();
+        dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>() - dif3 * tw7i1<T, N, inverse>();
     const cvec<T, N> d3 =
         dif1 * tw7i3<T, N, inverse>() - dif2 * tw7i1<T, N, inverse>() + dif3 * tw7i2<T, N, inverse>();
 
@@ -1729,15 +1729,18 @@ template <typename T, bool inverse, typename Tstride = csize_t<1>>
 KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
                                      complex<T>*, const complex<T>* twiddle, Tstride ostride = {})
 {
-    cswitch(csizes_t<11, 13>(), radix,
-            [&](auto radix_) CMT_INLINE_LAMBDA {
-                constexpr size_t width = vector_width<T>;
-                spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
-            },
-            [&]() CMT_INLINE_LAMBDA {
-                constexpr size_t width = vector_width<T>;
-                generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
-            });
+    cswitch(
+        csizes_t<11, 13>(), radix,
+        [&](auto radix_) CMT_INLINE_LAMBDA
+        {
+            constexpr size_t width = vector_width<T>;
+            spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
+        },
+        [&]() CMT_INLINE_LAMBDA
+        {
+            constexpr size_t width = vector_width<T>;
+            generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
+        });
 }
 
 template <typename T, size_t N>
@@ -1809,4 +1812,3 @@ KFR_INTRINSIC void cdigitreverse4_write<false, f64, 32>(complex<f64>* dest, cons
 } // namespace kfr
 
 CMT_PRAGMA_MSVC(warning(pop))
-
diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp
@@ -93,8 +93,6 @@ struct biquad_params
 inline namespace CMT_ARCH_NAME
 {
 
-namespace internal
-{
 template <typename T, size_t filters>
 struct biquad_state
 {
@@ -143,125 +141,126 @@ struct biquad_block
 };
 
 template <size_t filters, typename T, typename E1>
-struct expression_biquads_l : public expression_with_arguments<E1>
+struct expression_biquads_l : public expression_with_traits<E1>
 {
     using value_type = T;
 
     expression_biquads_l(const biquad_block<T, filters>& bq, E1&& e1)
-        : expression_with_arguments<E1>(std::forward<E1>(e1)), bq(bq)
+        : expression_with_traits<E1>(std::forward<E1>(e1)), bq(bq)
     {
     }
-    template <size_t width>
-    friend KFR_INTRINSIC vec<T, width> get_elements(const expression_biquads_l& self, cinput_t cinput,
-                                                    size_t index, vec_shape<T, width> t)
-    {
-        const vec<T, width> in = self.argument_first(cinput, index, t);
-        vec<T, width> out;
+    biquad_block<T, filters> bq;
+    mutable biquad_state<T, filters> state;
+};
 
-        CMT_LOOP_UNROLL
-        for (size_t i = 0; i < width; i++)
-        {
-            self.state.out = process(self.bq, self.state, insertleft(in[i], self.state.out));
-            out[i]         = self.state.out[filters - 1];
-        }
+template <size_t filters, typename T, typename E1>
+struct expression_biquads : expression_with_traits<E1>
+{
+    using value_type = T;
 
-        return out;
-    }
-    static KFR_MEM_INTRINSIC vec<T, filters> process(const biquad_block<T, filters>& bq,
-                                                     biquad_state<T, filters>& state,
-                                                     const vec<T, filters>& in)
+    expression_biquads(const biquad_block<T, filters>& bq, E1&& e1)
+        : expression_with_traits<E1>(std::forward<E1>(e1)), bq(bq), block_end(0)
     {
-        const vec<T, filters> out = bq.b0 * in + state.s1;
-        state.s1                  = state.s2 + bq.b1 * in - bq.a1 * out;
-        state.s2                  = bq.b2 * in - bq.a2 * out;
-        return out;
     }
+
     biquad_block<T, filters> bq;
+
     mutable biquad_state<T, filters> state;
+    mutable biquad_state<T, filters> saved_state;
+    mutable size_t block_end;
 };
 
-template <size_t filters, typename T, typename E1>
-struct expression_biquads : expression_with_arguments<E1>
+template <size_t filters, typename T>
+KFR_INTRINSIC vec<T, filters> biquad_process(const biquad_block<T, filters>& bq,
+                                             biquad_state<T, filters>& state, const vec<T, filters>& in)
 {
-    using value_type = T;
+    const vec<T, filters> out = bq.b0 * in + state.s1;
+    state.s1                  = state.s2 + bq.b1 * in - bq.a1 * out;
+    state.s2                  = bq.b2 * in - bq.a2 * out;
+    return out;
+}
 
-    expression_biquads(const biquad_block<T, filters>& bq, E1&& e1)
-        : expression_with_arguments<E1>(std::forward<E1>(e1)), bq(bq), block_end(0)
+template <size_t filters, typename T, typename E1, size_t N>
+KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads_l<filters, T, E1>& self, shape<1> index,
+                                     axis_params<0, N> t)
+{
+    const vec<T, N> in = get_elements(self.first(), index, t);
+    vec<T, N> out;
+
+    CMT_LOOP_UNROLL
+    for (size_t i = 0; i < N; i++)
     {
+        self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out));
+        out[i]         = self.state.out[filters - 1];
     }
 
-    void begin_block(cinput_t cinput, size_t size) const
+    return out;
+}
+
+template <size_t filters, typename T, typename E1>
+KFR_INTRINSIC void begin_pass(const expression_biquads<filters, T, E1>& self, shape<1> start, shape<1> stop)
+{
+    size_t size    = stop.front();
+    self.block_end = size;
+    for (size_t i = 0; i < filters - 1; i++)
     {
-        block_end = size;
-        for (size_t i = 0; i < filters - 1; i++)
+        const vec<T, 1> in = i < size ? get_elements(self.first(), shape<1>{ i }, axis_params_v<0, 1>) : 0;
+        self.state.out     = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out));
+    }
+}
+template <size_t filters, typename T, typename E1>
+KFR_INTRINSIC void end_pass(const expression_biquads<filters, T, E1>& self, shape<1> start, shape<1> stop)
+{
+    self.state = self.saved_state;
+}
+
+template <size_t filters, typename T, typename E1, size_t N>
+KFR_INTRINSIC vec<T, N> get_elements(const expression_biquads<filters, T, E1>& self, shape<1> index,
+                                     axis_params<0, N> t)
+{
+    index.front() += filters - 1;
+    vec<T, N> out{};
+    if (index.front() + N <= self.block_end)
+    {
+        const vec<T, N> in = get_elements(self.first(), shape<1>{ index.front() }, t);
+
+        CMT_LOOP_UNROLL
+        for (size_t i = 0; i < N; i++)
         {
-            const vec<T, 1> in = i < size ? this->argument_first(cinput, i, vec_shape<T, 1>()) : 0;
-            state.out          = process(bq, state, insertleft(in[0], state.out));
+            self.state.out = biquad_process(self.bq, self.state, insertleft(in[i], self.state.out));
+            out[i]         = self.state.out[filters - 1];
         }
+        if (index.front() + N == self.block_end)
+            self.saved_state = self.state;
     }
-    void end_block(cinput_t, size_t) const { state = saved_state; }
-
-    template <size_t width>
-    friend KFR_INTRINSIC vec<T, width> get_elements(const expression_biquads& self, cinput_t cinput,
-                                                    size_t index, vec_shape<T, width> t)
+    else if (index.front() >= self.block_end)
     {
-        index += filters - 1;
-        vec<T, width> out{};
-        if (index + width <= self.block_end)
+        CMT_LOOP_UNROLL
+        for (size_t i = 0; i < N; i++)
         {
-            const vec<T, width> in = self.argument_first(cinput, index, t);
-
-            CMT_LOOP_UNROLL
-            for (size_t i = 0; i < width; i++)
-            {
-                self.state.out = process(self.bq, self.state, insertleft(in[i], self.state.out));
-                out[i]         = self.state.out[filters - 1];
-            }
-            if (index + width == self.block_end)
-                self.saved_state = self.state;
+            self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out));
+            out[i]         = self.state.out[filters - 1];
         }
-        else if (index >= self.block_end)
+    }
+    else
+    {
+        size_t i = 0;
+        for (; i < std::min(N, self.block_end - index.front()); i++)
         {
-            CMT_LOOP_UNROLL
-            for (size_t i = 0; i < width; i++)
-            {
-                self.state.out = process(self.bq, self.state, insertleft(T(0), self.state.out));
-                out[i]         = self.state.out[filters - 1];
-            }
+            const vec<T, 1> in =
+                get_elements(self.first(), index.add_at(i, cval<index_t, 0>), axis_params_v<0, 1>);
+            self.state.out = biquad_process(self.bq, self.state, insertleft(in[0], self.state.out));
+            out[i]         = self.state.out[filters - 1];
         }
-        else
+        self.saved_state = self.state;
+        for (; i < N; i++)
         {
-            size_t i = 0;
-            for (; i < std::min(width, self.block_end - index); i++)
-            {
-                const vec<T, 1> in = self.argument_first(cinput, index + i, vec_shape<T, 1>());
-                self.state.out     = process(self.bq, self.state, insertleft(in[0], self.state.out));
-                out[i]             = self.state.out[filters - 1];
-            }
-            self.saved_state = self.state;
-            for (; i < width; i++)
-            {
-                self.state.out = process(self.bq, self.state, insertleft(T(0), self.state.out));
-                out[i]         = self.state.out[filters - 1];
-            }
+            self.state.out = biquad_process(self.bq, self.state, insertleft(T(0), self.state.out));
+            out[i]         = self.state.out[filters - 1];
         }
-        return out;
-    }
-    static KFR_MEM_INTRINSIC vec<T, filters> process(const biquad_block<T, filters>& bq,
-                                                     biquad_state<T, filters>& state, vec<T, filters> in)
-    {
-        const vec<T, filters> out = bq.b0 * in + state.s1;
-        state.s1                  = state.s2 + bq.b1 * in - bq.a1 * out;
-        state.s2                  = bq.b2 * in - bq.a2 * out;
-        return out;
     }
-    biquad_block<T, filters> bq;
-
-    mutable biquad_state<T, filters> state;
-    mutable biquad_state<T, filters> saved_state;
-    mutable size_t block_end;
-};
-} // namespace internal
+    return out;
+}
 
 /**
  * @brief Returns template expressions that applies biquad filter to the input.
@@ -269,10 +268,10 @@ struct expression_biquads : expression_with_arguments<E1>
  * @param e1 Input expression
  */
 template <typename T, typename E1>
-KFR_FUNCTION internal::expression_biquads<1, T, E1> biquad(const biquad_params<T>& bq, E1&& e1)
+KFR_FUNCTION expression_biquads<1, T, E1> biquad(const biquad_params<T>& bq, E1&& e1)
 {
     const biquad_params<T> bqs[1] = { bq };
-    return internal::expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1));
+    return expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1));
 }
 
 /**
@@ -282,10 +281,9 @@ KFR_FUNCTION internal::expression_biquads<1, T, E1> biquad(const biquad_params<T
  * @note This implementation introduces delay of N - 1 samples, where N is the filter count.
  */
 template <size_t filters, typename T, typename E1>
-KFR_FUNCTION internal::expression_biquads_l<filters, T, E1> biquad_l(const biquad_params<T> (&bq)[filters],
-                                                                     E1&& e1)
+KFR_FUNCTION expression_biquads_l<filters, T, E1> biquad_l(const biquad_params<T> (&bq)[filters], E1&& e1)
 {
-    return internal::expression_biquads_l<filters, T, E1>(bq, std::forward<E1>(e1));
+    return expression_biquads_l<filters, T, E1>(bq, std::forward<E1>(e1));
 }
 
 /**
@@ -295,10 +293,9 @@ KFR_FUNCTION internal::expression_biquads_l<filters, T, E1> biquad_l(const biqua
  * @note This implementation has zero latency
  */
 template <size_t filters, typename T, typename E1>
-KFR_FUNCTION internal::expression_biquads<filters, T, E1> biquad(const biquad_params<T> (&bq)[filters],
-                                                                 E1&& e1)
+KFR_FUNCTION expression_biquads<filters, T, E1> biquad(const biquad_params<T> (&bq)[filters], E1&& e1)
 {
-    return internal::expression_biquads<filters, T, E1>(bq, std::forward<E1>(e1));
+    return expression_biquads<filters, T, E1>(bq, std::forward<E1>(e1));
 }
 
 /**
@@ -308,21 +305,22 @@ KFR_FUNCTION internal::expression_biquads<filters, T, E1> biquad(const biquad_pa
  * @note This implementation has zero latency
  */
 template <size_t maxfiltercount = 4, typename T, typename E1>
-KFR_FUNCTION expression_pointer<T> biquad(const biquad_params<T>* bq, size_t count, E1&& e1)
+KFR_FUNCTION expression_pointer<T, 1> biquad(const biquad_params<T>* bq, size_t count, E1&& e1)
 {
     constexpr csizes_t<1, 2, 4, 8, 16, 32, 64> sizes;
     return cswitch(
         cfilter(sizes, sizes <= csize_t<maxfiltercount>{}), next_poweroftwo(count),
-        [&](auto x) {
+        [&](auto x)
+        {
             constexpr size_t filters = x;
-            return to_pointer(internal::expression_biquads<filters, T, E1>(
-                internal::biquad_block<T, filters>(bq, count), std::forward<E1>(e1)));
+            return to_pointer(expression_biquads<filters, T, E1>(biquad_block<T, filters>(bq, count),
+                                                                 std::forward<E1>(e1)));
         },
-        [&] { return to_pointer(zeros<T>()); });
+        [&] { return to_pointer(fixshape(zeros<T>(), fixed_shape<infinite_size>)); });
 }
 
 template <size_t maxfiltercount = 4, typename T, typename E1>
-KFR_FUNCTION expression_pointer<T> biquad(const std::vector<biquad_params<T>>& bq, E1&& e1)
+KFR_FUNCTION expression_pointer<T, 1> biquad(const std::vector<biquad_params<T>>& bq, E1&& e1)
 {
     return biquad<maxfiltercount>(bq.data(), bq.size(), std::forward<E1>(e1));
 }
@@ -341,11 +339,7 @@ public:
     {
     }
 
-    biquad_filter(const std::vector<biquad_params<T>>& bq)
-        : biquad_filter(bq.data(), bq.size()) 
-    {
-    }
-
+    biquad_filter(const std::vector<biquad_params<T>>& bq) : biquad_filter(bq.data(), bq.size()) {}
 };
 
 } // namespace CMT_ARCH_NAME
diff --git a/include/kfr/dsp/dcremove.hpp b/include/kfr/dsp/dcremove.hpp
@@ -30,14 +30,12 @@
 
 namespace kfr
 {
-inline namespace CMT_ARCH_NAME
-{
 
-template <typename E1, typename T = flt_type<value_type_of<E1>>>
-KFR_INTRINSIC internal::expression_biquads<1, T, E1> dcremove(E1&& e1, double cutoff = 0.00025)
+template <typename E1, typename T = flt_type<expression_value_type<E1>>>
+KFR_INTRINSIC expression_biquads<1, T, E1> dcremove(E1&& e1, double cutoff = 0.00025)
 {
     const biquad_params<T> bqs[1] = { biquad_highpass(cutoff, 0.5) };
-    return internal::expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1));
+    return expression_biquads<1, T, E1>(bqs, std::forward<E1>(e1));
 }
-} // namespace CMT_ARCH_NAME
+
 } // namespace kfr
diff --git a/include/kfr/dsp/delay.hpp b/include/kfr/dsp/delay.hpp
@@ -27,8 +27,8 @@
 
 #include "../base/basic_expressions.hpp"
 #include "../base/expression.hpp"
+#include "../base/state_holder.hpp"
 #include "../base/univector.hpp"
-#include "state_holder.hpp"
 
 namespace kfr
 {
@@ -58,14 +58,21 @@ struct delay_state<T, 1, 1>
     mutable T data = T(0);
 };
 
-namespace internal
-{
-
 template <size_t delay, typename E, bool stateless, univector_tag STag>
-struct expression_delay : expression_with_arguments<E>
+struct expression_delay : expression_with_arguments<E>, public expression_traits_defaults
 {
-    using value_type = value_type_of<E>;
-    using T          = value_type;
+    using ArgTraits = expression_traits<E>;
+    static_assert(ArgTraits::dims == 1, "expression_delay requires argument with dims == 1");
+    using value_type             = typename ArgTraits::value_type;
+    constexpr static size_t dims = 1;
+    constexpr static shape<dims> shapeof(const expression_delay& self)
+    {
+        return ArgTraits::shapeof(self.first());
+    }
+    constexpr static shape<dims> shapeof() { return ArgTraits::shapeof(); }
+    constexpr static inline bool random_access = false;
+
+    using T = value_type;
     using expression_with_arguments<E>::expression_with_arguments;
 
     expression_delay(E&& e, const delay_state<T, delay, STag>& state)
@@ -74,46 +81,54 @@ struct expression_delay : expression_with_arguments<E>
     }
 
     template <size_t N, KFR_ENABLE_IF(N <= delay)>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N>)
+    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
         vec<T, N> out;
-        size_t c = self.state.s.cursor;
-        self.state.s.data.ringbuf_read(c, out);
-        const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>());
-        self.state.s.data.ringbuf_write(self.state.s.cursor, in);
+        size_t c = self.state->cursor;
+        self.state->data.ringbuf_read(c, out);
+        const vec<T, N> in = get_elements(self.first(), index, sh);
+        self.state->data.ringbuf_write(self.state->cursor, in);
         return out;
     }
-    friend vec<T, 1> get_elements(const expression_delay& self, cinput_t cinput, size_t index,
-                                  vec_shape<T, 1>)
+    friend vec<T, 1> get_elements(const expression_delay& self, shape<1> index, axis_params<0, 1> sh)
     {
         T out;
-        size_t c = self.state.s.cursor;
-        self.state.s.data.ringbuf_read(c, out);
-        const T in = self.argument_first(cinput, index, vec_shape<T, 1>())[0];
-        self.state.s.data.ringbuf_write(self.state.s.cursor, in);
+        size_t c = self.state->cursor;
+        self.state->data.ringbuf_read(c, out);
+        const T in = get_elements(self.first(), index, sh).front();
+        self.state->data.ringbuf_write(self.state->cursor, in);
         return out;
     }
     template <size_t N, KFR_ENABLE_IF(N > delay)>
-    friend vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index,
-                                  vec_shape<T, N>)
+    friend vec<T, N> get_elements(const expression_delay& self, shape<1> index, axis_params<0, N> sh)
     {
         vec<T, delay> out;
-        size_t c = self.state.s.cursor;
-        self.state.s.data.ringbuf_read(c, out);
-        const vec<T, N> in = self.argument_first(cinput, index, vec_shape<T, N>());
-        self.state.s.data.ringbuf_write(self.state.s.cursor, slice<N - delay, delay>(in));
+        size_t c = self.state->cursor;
+        self.state->data.ringbuf_read(c, out);
+        const vec<T, N> in = get_elements(self.first(), index, sh);
+        self.state->data.ringbuf_write(self.state->cursor, slice<N - delay, delay>(in));
         return concat_and_slice<0, N>(out, in);
     }
 
-    state_holder<delay_state<T, delay, STag>, stateless> state;
+    state_holder<const delay_state<T, delay, STag>, stateless> state;
 };
 
 template <typename E, bool stateless, univector_tag STag>
-struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E>
+struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E>, expression_traits_defaults
 {
-    using value_type = value_type_of<E>;
-    using T          = value_type;
+    using ArgTraits = expression_traits<E>;
+    static_assert(ArgTraits::dims == 1, "expression_delay requires argument with dims == 1");
+    using value_type             = typename ArgTraits::value_type;
+    constexpr static size_t dims = 1;
+    constexpr static shape<dims> shapeof(const expression_delay& self)
+    {
+        return ArgTraits::shapeof(self.first());
+    }
+    constexpr static shape<dims> shapeof() { return ArgTraits::shapeof(); }
+    constexpr static inline bool random_access = false;
+
+    using T = value_type;
     using expression_with_arguments<E>::expression_with_arguments;
 
     expression_delay(E&& e, const delay_state<T, 1, STag>& state)
@@ -122,17 +137,16 @@ struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E>
     }
 
     template <size_t N>
-    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N>)
+    friend KFR_INTRINSIC vec<T, N> get_elements(const expression_delay& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const vec<T, N> in  = self.argument_first(cinput, index, vec_shape<T, N>());
-        const vec<T, N> out = insertleft(self.state.s.data, in);
-        self.state.s.data   = in[N - 1];
+        const vec<T, N> in  = get_elements(self.first(), index, sh);
+        const vec<T, N> out = insertleft(self.state->data, in);
+        self.state->data    = in[N - 1];
         return out;
     }
-    state_holder<delay_state<T, 1, STag>, stateless> state;
+    state_holder<const delay_state<T, 1, STag>, stateless> state;
 };
-} // namespace internal
 
 /**
  * @brief Returns template expression that applies delay to the input (uses ring buffer internally)
@@ -143,12 +157,11 @@ struct expression_delay<1, E, stateless, STag> : expression_with_arguments<E>
  * auto d = delay(v, csize<4>);
  * @endcode
  */
-template <size_t samples = 1, typename E1, typename T = value_type_of<E1>>
-KFR_INTRINSIC internal::expression_delay<samples, E1, false, samples> delay(E1&& e1)
+template <size_t samples = 1, typename E1, typename T = expression_value_type<E1>>
+KFR_INTRINSIC expression_delay<samples, E1, false, samples> delay(E1&& e1)
 {
     static_assert(samples >= 1 && samples < 1024, "");
-    return internal::expression_delay<samples, E1, false, samples>(std::forward<E1>(e1),
-                                                                   delay_state<T, samples>());
+    return expression_delay<samples, E1, false, samples>(std::forward<E1>(e1), delay_state<T, samples>());
 }
 
 /**
@@ -162,11 +175,10 @@ KFR_INTRINSIC internal::expression_delay<samples, E1, false, samples> delay(E1&&
  * @endcode
  */
 template <size_t samples, typename T, typename E1, univector_tag STag>
-KFR_INTRINSIC internal::expression_delay<samples, E1, true, STag> delay(delay_state<T, samples, STag>& state,
-                                                                        E1&& e1)
+KFR_INTRINSIC expression_delay<samples, E1, true, STag> delay(delay_state<T, samples, STag>& state, E1&& e1)
 {
     static_assert(STag == tag_dynamic_vector || (samples >= 1 && samples < 1024), "");
-    return internal::expression_delay<samples, E1, true, STag>(std::forward<E1>(e1), state);
+    return expression_delay<samples, E1, true, STag>(std::forward<E1>(e1), state);
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/dsp/ebu.hpp b/include/kfr/dsp/ebu.hpp
@@ -62,7 +62,7 @@ struct integrated_vec : public univector<T>
 private:
     void compute() const
     {
-        const T z_total = mean(*this);
+        const T z_total = mean(static_cast<const univector<T>&>(*this));
         T relative_gate = energy_to_loudness(z_total) - 10;
 
         T z        = 0;
@@ -130,7 +130,7 @@ private:
         static const T PRC_LOW  = T(0.10);
         static const T PRC_HIGH = T(0.95);
 
-        const T z_total       = mean(*this);
+        const T z_total       = mean(static_cast<const univector<T>&>(*this));
         const T relative_gate = energy_to_loudness(z_total) - 20;
 
         if (this->size() < 2)
@@ -184,7 +184,7 @@ private:
 };
 
 template <typename T>
-KFR_INTRINSIC expression_pointer<T> make_kfilter(int samplerate)
+KFR_INTRINSIC expression_pointer<T, 1> make_kfilter(int samplerate)
 {
     const biquad_params<T> bq[] = {
         biquad_highshelf(T(1681.81 / samplerate), T(+4.0)),
@@ -245,7 +245,7 @@ private:
     const Speaker m_speaker;
     const T m_input_gain;
     const size_t m_packet_size;
-    expression_pointer<T> m_kfilter;
+    expression_pointer<T, 1> m_kfilter;
     univector<T> m_short_sum_of_squares;
     univector<T> m_momentary_sum_of_squares;
     T m_output_energy_gain;
@@ -311,6 +311,7 @@ public:
         T shortterm = 0;
         for (size_t ch = 0; ch < m_channels.size(); ch++)
         {
+            // println(ch, "=> ", source[ch][0], " ", source[ch][10], " ", source[ch][20] );
             TESTO_ASSERT(source[ch].size() == m_packet_size);
             ebu_channel<T>& chan = m_channels[ch];
             chan.process_packet(source[ch].data());
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -26,13 +26,13 @@
 #pragma once
 
 #include "../base/basic_expressions.hpp"
-#include "../base/simd_expressions.hpp"
 #include "../base/filter.hpp"
 #include "../base/memory.hpp"
 #include "../base/reduce.hpp"
+#include "../base/simd_expressions.hpp"
+#include "../base/state_holder.hpp"
 #include "../base/univector.hpp"
 #include "../simd/vec.hpp"
-#include "state_holder.hpp"
 
 namespace kfr
 {
@@ -87,115 +87,122 @@ struct moving_sum_state<U, tag_dynamic_vector>
     mutable size_t head_cursor, tail_cursor;
 };
 
-namespace internal
-{
-
 template <size_t tapcount, typename T, typename U, typename E1, bool stateless = false>
-struct expression_short_fir : expression_with_arguments<E1>
+struct expression_short_fir : expression_with_traits<E1>
 {
-    using value_type = U;
+    using value_type = U; // override value_type
+
+    static_assert(expression_traits<E1>::dims == 1, "expression_short_fir requires input with dims == 1");
+    constexpr static inline bool random_access = false;
 
     expression_short_fir(E1&& e1, const short_fir_state<tapcount, T, U>& state)
-        : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state)
+        : expression_with_traits<E1>(std::forward<E1>(e1)), state(state)
     {
     }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<U, N> get_elements(const expression_short_fir& self, cinput_t cinput,
-                                                size_t index, vec_shape<U, N> x)
+    KFR_INTRINSIC friend vec<U, N> get_elements(const expression_short_fir& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        vec<U, N> in = self.argument_first(cinput, index, x);
+        vec<U, N> in = get_elements(self.first(), index, sh);
 
-        vec<U, N> out = in * self.state.s.taps.front();
-        cforeach(csizeseq<tapcount - 1, 1>, [&](auto I) {
-            out = out +
-                  concat_and_slice<tapcount - 1 - I, N>(self.state.s.delayline, in) * self.state.s.taps[I];
-        });
-        self.state.s.delayline = concat_and_slice<N, tapcount - 1>(self.state.s.delayline, in);
+        vec<U, N> out = in * self.state->taps.front();
+        cforeach(csizeseq<tapcount - 1, 1>,
+                 [&](auto I) {
+                     out = out + concat_and_slice<tapcount - 1 - I, N>(self.state->delayline, in) *
+                                     self.state->taps[I];
+                 });
+        self.state->delayline = concat_and_slice<N, tapcount - 1>(self.state->delayline, in);
 
         return out;
     }
-    state_holder<short_fir_state<tapcount, T, U>, stateless> state;
+    state_holder<const short_fir_state<tapcount, T, U>, stateless> state;
 };
 
 template <typename T, typename U, typename E1, bool stateless = false>
-struct expression_fir : expression_with_arguments<E1>
+struct expression_fir : expression_with_traits<E1>
 {
-    using value_type = U;
+    using value_type = U; // override value_type
+
+    static_assert(expression_traits<E1>::dims == 1, "expression_fir requires input with dims == 1");
+    constexpr static inline bool random_access = false;
 
     expression_fir(E1&& e1, const fir_state<T, U>& state)
-        : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state)
+        : expression_with_traits<E1>(std::forward<E1>(e1)), state(state)
     {
     }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<U, N> get_elements(const expression_fir& self, cinput_t cinput, size_t index,
-                                                vec_shape<U, N> x)
+    KFR_INTRINSIC friend vec<U, N> get_elements(const expression_fir& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const size_t tapcount = self.state.s.taps.size();
-        const vec<U, N> input = self.argument_first(cinput, index, x);
+        const size_t tapcount = self.state->taps.size();
+        const vec<U, N> input = get_elements(self.first(), index, sh);
 
         vec<U, N> output;
-        size_t cursor = self.state.s.delayline_cursor;
+        size_t cursor = self.state->delayline_cursor;
         CMT_LOOP_NOUNROLL
         for (size_t i = 0; i < N; i++)
         {
-            self.state.s.delayline.ringbuf_write(cursor, input[i]);
-            output[i] =
-                dotproduct(self.state.s.taps, self.state.s.delayline.slice(cursor) /*, tapcount - cursor*/) +
-                dotproduct(self.state.s.taps.slice(tapcount - cursor), self.state.s.delayline /*, cursor*/);
+            self.state->delayline.ringbuf_write(cursor, input[i]);
+            U v =
+                dotproduct(self.state->taps.slice(0, tapcount - cursor), self.state->delayline.slice(cursor));
+            if (cursor > 0)
+                v = v + dotproduct(self.state->taps.slice(tapcount - cursor),
+                                   self.state->delayline.slice(0, cursor));
+            output[i] = v;
         }
-        self.state.s.delayline_cursor = cursor;
+        self.state->delayline_cursor = cursor;
         return output;
     }
-    state_holder<fir_state<T, U>, stateless> state;
+    state_holder<const fir_state<T, U>, stateless> state;
 };
 
 template <typename U, typename E1, univector_tag STag, bool stateless = false>
-struct expression_moving_sum : expression_with_arguments<E1>
+struct expression_moving_sum : expression_with_traits<E1>
 {
-    using value_type = U;
+    using value_type = U; // override value_type
+
+    static_assert(expression_traits<E1>::dims == 1, "expression_moving_sum requires input with dims == 1");
+    constexpr static inline bool random_access = false;
 
     expression_moving_sum(E1&& e1, const moving_sum_state<U, STag>& state)
-        : expression_with_arguments<E1>(std::forward<E1>(e1)), state(state)
+        : expression_with_traits<E1>(std::forward<E1>(e1)), state(state)
     {
     }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<U, N> get_elements(const expression_moving_sum& self, cinput_t cinput,
-                                                size_t index, vec_shape<U, N> x)
+    KFR_INTRINSIC friend vec<U, N> get_elements(const expression_moving_sum& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        static_assert(N >= 1, "");
-
-        const vec<U, N> input = self.argument_first(cinput, index, x);
+        const vec<U, N> input = get_elements(self.first(), index, sh);
 
         vec<U, N> output;
-        size_t wcursor = self.state.s.head_cursor;
-        size_t rcursor = self.state.s.tail_cursor;
+        size_t wcursor = self.state->head_cursor;
+        size_t rcursor = self.state->tail_cursor;
 
         // initial summation
-        self.state.s.delayline.ringbuf_write(wcursor, input[0]);
-        auto s    = sum(self.state.s.delayline);
+        self.state->delayline.ringbuf_write(wcursor, input[0]);
+        auto s    = sum(self.state->delayline);
         output[0] = s;
 
         CMT_LOOP_NOUNROLL
         for (size_t i = 1; i < N; i++)
         {
             U nextout;
-            self.state.s.delayline.ringbuf_read(rcursor, nextout);
-            U const nextin = input[i];
-            self.state.s.delayline.ringbuf_write(wcursor, nextin);
+            self.state->delayline.ringbuf_read(rcursor, nextout);
+            const U nextin = input[i];
+            self.state->delayline.ringbuf_write(wcursor, nextin);
             s += nextin - nextout;
             output[i] = s;
         }
-        self.state.s.delayline.ringbuf_step(rcursor, 1);
-        self.state.s.head_cursor = wcursor;
-        self.state.s.tail_cursor = rcursor;
+        self.state->delayline.ringbuf_step(rcursor, 1);
+        self.state->head_cursor = wcursor;
+        self.state->tail_cursor = rcursor;
         return output;
     }
-    state_holder<moving_sum_state<U, STag>, stateless> state;
+    state_holder<const moving_sum_state<U, STag>, stateless> state;
 };
-} // namespace internal
 
 /**
  * @brief Returns template expression that applies FIR filter to the input
@@ -203,9 +210,9 @@ struct expression_moving_sum : expression_with_arguments<E1>
  * @param taps coefficients for the FIR filter
  */
 template <typename T, typename E1, univector_tag Tag>
-KFR_INTRINSIC internal::expression_fir<T, value_type_of<E1>, E1> fir(E1&& e1, const univector<T, Tag>& taps)
+KFR_INTRINSIC expression_fir<T, expression_value_type<E1>, E1> fir(E1&& e1, const univector<T, Tag>& taps)
 {
-    return internal::expression_fir<T, value_type_of<E1>, E1>(std::forward<E1>(e1), taps.ref());
+    return expression_fir<T, expression_value_type<E1>, E1>(std::forward<E1>(e1), taps.ref());
 }
 
 /**
@@ -214,21 +221,20 @@ KFR_INTRINSIC internal::expression_fir<T, value_type_of<E1>, E1> fir(E1&& e1, co
  * @param e1 an input expression
  */
 template <typename T, typename U, typename E1>
-KFR_INTRINSIC internal::expression_fir<T, U, E1, true> fir(fir_state<T, U>& state, E1&& e1)
+KFR_INTRINSIC expression_fir<T, U, E1, true> fir(fir_state<T, U>& state, E1&& e1)
 {
-    return internal::expression_fir<T, U, E1, true>(std::forward<E1>(e1), state);
+    return expression_fir<T, U, E1, true>(std::forward<E1>(e1), state);
 }
 
 /**
  * @brief Returns template expression that performs moving sum on the input
- * @param state moving sum state
  * @param e1 an input expression
  */
 template <size_t sum_length, typename E1>
-KFR_INTRINSIC internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic_vector> moving_sum(E1&& e1)
+KFR_INTRINSIC expression_moving_sum<expression_value_type<E1>, E1, tag_dynamic_vector> moving_sum(E1&& e1)
 {
-    return internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic_vector>(std::forward<E1>(e1),
-                                                                                      sum_length);
+    return expression_moving_sum<expression_value_type<E1>, E1, tag_dynamic_vector>(std::forward<E1>(e1),
+                                                                                    sum_length);
 }
 
 /**
@@ -237,10 +243,9 @@ KFR_INTRINSIC internal::expression_moving_sum<value_type_of<E1>, E1, tag_dynamic
  * @param e1 an input expression
  */
 template <typename U, typename E1, univector_tag STag>
-KFR_INTRINSIC internal::expression_moving_sum<U, E1, STag, true> moving_sum(moving_sum_state<U, STag>& state,
-                                                                            E1&& e1)
+KFR_INTRINSIC expression_moving_sum<U, E1, STag, true> moving_sum(moving_sum_state<U, STag>& state, E1&& e1)
 {
-    return internal::expression_moving_sum<U, E1, STag, true>(std::forward<E1>(e1), state);
+    return expression_moving_sum<U, E1, STag, true>(std::forward<E1>(e1), state);
 }
 
 /**
@@ -250,11 +255,11 @@ KFR_INTRINSIC internal::expression_moving_sum<U, E1, STag, true> moving_sum(movi
  * @param taps coefficients for the FIR filter
  */
 template <typename T, size_t TapCount, typename E1>
-KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1>
+KFR_INTRINSIC expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1>
 short_fir(E1&& e1, const univector<T, TapCount>& taps)
 {
     static_assert(TapCount >= 2 && TapCount <= 33, "Use short_fir only for small FIR filters");
-    return internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1>(
+    return expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1>(
         std::forward<E1>(e1), taps);
 }
 
@@ -265,12 +270,11 @@ short_fir(E1&& e1, const univector<T, TapCount>& taps)
  * @param e1 an input expression
  */
 template <size_t TapCount, typename T, typename U, typename E1>
-KFR_INTRINSIC internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1,
-                                             true>
-    short_fir(short_fir_state<next_poweroftwo(TapCount - 1) + 1, T, U>& state, E1&& e1)
+KFR_INTRINSIC expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1, true>
+short_fir(short_fir_state<next_poweroftwo(TapCount - 1) + 1, T, U>& state, E1&& e1)
 {
     static_assert(TapCount >= 2 && TapCount <= 33, "Use short_fir only for small FIR filters");
-    return internal::expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, value_type_of<E1>, E1, true>(
+    return expression_short_fir<next_poweroftwo(TapCount - 1) + 1, T, expression_value_type<E1>, E1, true>(
         std::forward<E1>(e1), state);
 }
 
@@ -294,7 +298,7 @@ protected:
     {
         make_univector(dest, size) = fir(state, make_univector(src, size));
     }
-    void process_expression(U* dest, const expression_pointer<U>& src, size_t size) final
+    void process_expression(U* dest, const expression_pointer<U, 1>& src, size_t size) final
     {
         make_univector(dest, size) = fir(state, src);
     }
diff --git a/include/kfr/dsp/fir_design.hpp b/include/kfr/dsp/fir_design.hpp
@@ -39,8 +39,7 @@ template <typename T>
 void fir_lowpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& window, bool normalize = true)
 {
     const T scale = 2.0 * cutoff;
-    taps          = bind_expression(fn::sinc(),
-                           symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) *
+    taps = bind_expression(fn::sinc(), symmlinspace<T>((taps.size() - 1) * cutoff * c_pi<T>, taps.size())) *
            scale * window;
 
     if (is_odd(taps.size()))
@@ -57,7 +56,7 @@ void fir_highpass(univector_ref<T> taps, T cutoff, const expression_pointer<T>& 
 {
     const T scale = 2.0 * -cutoff;
     taps          = bind_expression(fn::sinc(),
-                           symmlinspace<T, true>((taps.size() - 1) * cutoff * c_pi<T>, taps.size(), true)) *
+                           symmlinspace<T>((taps.size() - 1) * cutoff * c_pi<T>, taps.size())) *
            scale * window;
 
     if (is_odd(taps.size()))
@@ -80,8 +79,8 @@ void fir_bandpass(univector_ref<T> taps, T frequency1, T frequency2, const expre
     const T start1 = sc * frequency1;
     const T start2 = sc * frequency2;
 
-    taps = (bind_expression(fn::sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2 -
-            bind_expression(fn::sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1) *
+    taps = (bind_expression(fn::sinc(), symmlinspace<T>(start2, taps.size())) * scale2 -
+            bind_expression(fn::sinc(), symmlinspace<T>(start1, taps.size())) * scale1) *
            window;
 
     if (is_odd(taps.size()))
@@ -104,8 +103,8 @@ void fir_bandstop(univector_ref<T> taps, T frequency1, T frequency2, const expre
     const T start1 = sc * frequency1;
     const T start2 = sc * frequency2;
 
-    taps = (bind_expression(fn::sinc(), symmlinspace<T, true>(start1, taps.size(), true)) * scale1 -
-            bind_expression(fn::sinc(), symmlinspace<T, true>(start2, taps.size(), true)) * scale2) *
+    taps = (bind_expression(fn::sinc(), symmlinspace<T>(start1, taps.size())) * scale1 -
+            bind_expression(fn::sinc(), symmlinspace<T>(start2, taps.size())) * scale2) *
            window;
 
     if (is_odd(taps.size()))
diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp
@@ -34,12 +34,12 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, typename E1>
-KFR_INTRINSIC internal::expression_short_fir<2, T, value_type_of<E1>, E1> fracdelay(E1&& e1, T delay)
+KFR_INTRINSIC expression_short_fir<2, T, expression_value_type<E1>, E1> fracdelay(E1&& e1, T delay)
 {
     if (CMT_UNLIKELY(delay < 0))
         delay = 0;
     univector<T, 2> taps({ 1 - delay, delay });
-    return internal::expression_short_fir<2, T, value_type_of<E1>, E1>(std::forward<E1>(e1), taps);
+    return expression_short_fir<2, T, expression_value_type<E1>, E1>(std::forward<E1>(e1), taps);
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp
@@ -35,12 +35,11 @@ namespace kfr
 inline namespace CMT_ARCH_NAME
 {
 
-namespace internal
-{
-
 template <typename T>
-struct expression_goertzel : output_expression
+struct expression_goertzel : expression_traits_defaults
 {
+    using value_type = accepts_any;
+
     expression_goertzel(complex<T>& result, T omega)
         : result(result), omega(omega), coeff(2 * cos(omega)), q0(), q1(), q2()
     {
@@ -51,7 +50,7 @@ struct expression_goertzel : output_expression
         result.imag(q2 * sin(omega));
     }
     template <typename U, size_t N>
-    KFR_INTRINSIC friend void set_elements(expression_goertzel& self, coutput_t, size_t, const vec<U, N>& x)
+    KFR_INTRINSIC friend void set_elements(expression_goertzel& self, shape<1>, const vec<U, N>& x)
     {
         vec<T, N> in = x;
         CMT_LOOP_UNROLL
@@ -71,8 +70,10 @@ struct expression_goertzel : output_expression
 };
 
 template <typename T, size_t width>
-struct expression_parallel_goertzel : output_expression
+struct expression_parallel_goertzel : expression_traits_defaults
 {
+    using value_type = accepts_any;
+
     expression_parallel_goertzel(complex<T> result[], vec<T, width> omega)
         : result(result), omega(omega), coeff(cos(omega)), q0(), q1(), q2()
     {
@@ -88,8 +89,7 @@ struct expression_parallel_goertzel : output_expression
         }
     }
     template <typename U, size_t N>
-    KFR_INTRINSIC friend void set_elements(expression_parallel_goertzel& self, coutput_t, size_t,
-                                           const vec<U, N>& x)
+    KFR_INTRINSIC friend void set_elements(expression_parallel_goertzel& self, shape<1>, const vec<U, N>& x)
     {
         const vec<T, N> in = x;
         CMT_LOOP_UNROLL
@@ -107,19 +107,18 @@ struct expression_parallel_goertzel : output_expression
     vec<T, width> q1;
     vec<T, width> q2;
 };
-} // namespace internal
 
 template <typename T>
-KFR_INTRINSIC internal::expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega)
+KFR_INTRINSIC expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega)
 {
-    return internal::expression_goertzel<T>(result, omega);
+    return expression_goertzel<T>(result, omega);
 }
 
 template <typename T, size_t width>
-KFR_INTRINSIC internal::expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width],
-                                                                        const T (&omega)[width])
+KFR_INTRINSIC expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width],
+                                                              const T (&omega)[width])
 {
-    return internal::expression_parallel_goertzel<T, width>(result, read<width>(omega));
+    return expression_parallel_goertzel<T, width>(result, read<width>(omega));
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/dsp/iir_design.hpp b/include/kfr/dsp/iir_design.hpp
@@ -28,9 +28,9 @@
 #include "../base/filter.hpp"
 #include "../base/pointer.hpp"
 #include "../math/hyperbolic.hpp"
+#include "../simd/complex.hpp"
 #include "../simd/impl/function.hpp"
 #include "../simd/operators.hpp"
-#include "../simd/complex.hpp"
 #include "../simd/vec.hpp"
 #include "../testo/assert.hpp"
 #include "biquad_design.hpp"
@@ -60,7 +60,7 @@ KFR_FUNCTION zpk<T> chebyshev1(int N, identity<T> rp)
     T eps = sqrt(exp10(0.1 * rp) - 1.0);
     T mu  = 1.0 / N * std::asinh(1 / eps);
 
-    univector<T> m = linspace(-N + 1, N + 1, N, false, true);
+    univector<T> m = linspace(-N + 1, N + 1, N, false, ctrue);
 
     univector<T> theta      = c_pi<T> * m / (2 * N);
     univector<complex<T>> p = -csinh(make_complex(mu, theta));
@@ -85,17 +85,17 @@ KFR_FUNCTION zpk<T> chebyshev2(int N, identity<T> rs)
 
     if (N % 2)
     {
-        m = concatenate(linspace(-N + 1, -2, N / 2, true, true), linspace(2, N - 1, N / 2, true, true));
+        m = concatenate(linspace(-N + 1, -2, N / 2, true, ctrue), linspace(2, N - 1, N / 2, true, ctrue));
     }
     else
     {
-        m = linspace(-N + 1, N + 1, N, false, true);
+        m = linspace(-N + 1, N + 1, N, false, ctrue);
     }
 
     univector<complex<T>> z = -cconj(complex<T>(0, 1) / sin(m * c_pi<T> / (2.0 * N)));
 
     univector<complex<T>> p =
-        -cexp(complex<T>(0, 1) * c_pi<T> * linspace(-N + 1, N + 1, N, false, true) / (2 * N));
+        -cexp(complex<T>(0, 1) * c_pi<T> * linspace(-N + 1, N + 1, N, false, ctrue) / (2 * N));
     p = make_complex(sinh(mu) * real(p), cosh(mu) * imag(p));
     p = 1.0 / p;
 
@@ -898,7 +898,8 @@ template <typename T>
 KFR_FUNCTION univector<complex<T>> cplxreal(const univector<complex<T>>& list)
 {
     univector<complex<T>> x = list;
-    std::sort(x.begin(), x.end(), [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); });
+    std::sort(x.begin(), x.end(),
+              [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); });
     T tol                        = std::numeric_limits<T>::epsilon() * 100;
     univector<complex<T>> result = x;
     for (size_t i = result.size(); i > 1; i--)
@@ -1214,4 +1215,4 @@ KFR_FUNCTION std::vector<biquad_params<T>> to_sos(const zpk<T>& filter)
 }
 } // namespace CMT_ARCH_NAME
 
-} // namespace kfr
-\ No newline at end of file
+} // namespace kfr
diff --git a/include/kfr/dsp/mixdown.hpp b/include/kfr/dsp/mixdown.hpp
@@ -36,13 +36,11 @@ inline namespace CMT_ARCH_NAME
  * @brief Returns template expression that returns the sum of all the inputs
  */
 template <typename... E>
-internal::expression_function<fn::add, E...> mixdown(E&&... e)
+expression_function<fn::add, E...> mixdown(E&&... e)
 {
-    return internal::expression_function<fn::add, E...>(fn::add(), std::forward<E>(e)...);
+    return expression_function<fn::add, E...>(fn::add(), std::forward<E>(e)...);
 }
 
-namespace internal
-{
 struct stereo_matrix
 {
     template <typename T, size_t N>
@@ -57,7 +55,6 @@ struct stereo_matrix
     }
     const f64x2x2 matrix;
 };
-} // namespace internal
 
 template <int = 0>
 CMT_GNU_CONSTEXPR f64x2x2 matrix_sum_diff()
@@ -75,12 +72,10 @@ CMT_GNU_CONSTEXPR f64x2x2 matrix_halfsum_halfdiff()
  * channels
  */
 template <typename Left, typename Right,
-          typename Result =
-              internal::expression_function<internal::stereo_matrix, internal::expression_pack<Left, Right>>>
+          typename Result = expression_function<stereo_matrix, expression_pack<Left, Right>>>
 Result mixdown_stereo(Left&& left, Right&& right, const f64x2x2& matrix)
 {
-    return Result(internal::stereo_matrix{ matrix },
-                  pack(std::forward<Left>(left), std::forward<Right>(right)));
+    return Result(stereo_matrix{ matrix }, pack(std::forward<Left>(left), std::forward<Right>(right)));
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/dsp/oscillators.hpp b/include/kfr/dsp/oscillators.hpp
@@ -26,7 +26,9 @@
 #pragma once
 
 #include "../base/basic_expressions.hpp"
+#include "../base/simd_expressions.hpp"
 #include "../math/sin_cos.hpp"
+#include "../simd/round.hpp"
 
 namespace kfr
 {
@@ -143,9 +145,9 @@ KFR_FUNCTION T1 rawsine(const T1& x)
     return intrinsics::rawsine(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::rawsine, E1> rawsine(E1&& x)
+KFR_FUNCTION expression_function<fn::rawsine, E1> rawsine(E1&& x)
 {
-    return { fn::rawsine(), std::forward<E1>(x) };
+    return { std::forward<E1>(x) };
 }
 template <typename T1, KFR_ENABLE_IF(is_numeric<T1>)>
 KFR_FUNCTION T1 sine(const T1& x)
@@ -153,9 +155,9 @@ KFR_FUNCTION T1 sine(const T1& x)
     return intrinsics::sine(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::sine, E1> sine(E1&& x)
+KFR_FUNCTION expression_function<fn::sine, E1> sine(E1&& x)
 {
-    return { fn::sine(), std::forward<E1>(x) };
+    return { std::forward<E1>(x) };
 }
 template <typename T1, KFR_ENABLE_IF(is_numeric<T1>)>
 KFR_FUNCTION T1 sinenorm(const T1& x)
@@ -163,7 +165,7 @@ KFR_FUNCTION T1 sinenorm(const T1& x)
     return intrinsics::sinenorm(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::sinenorm, E1> sinenorm(E1&& x)
+KFR_FUNCTION expression_function<fn::sinenorm, E1> sinenorm(E1&& x)
 {
     return { fn::sinenorm(), std::forward<E1>(x) };
 }
@@ -173,7 +175,7 @@ KFR_FUNCTION T1 rawsquare(const T1& x)
     return intrinsics::rawsquare(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::rawsquare, E1> rawsquare(E1&& x)
+KFR_FUNCTION expression_function<fn::rawsquare, E1> rawsquare(E1&& x)
 {
     return { fn::rawsquare(), std::forward<E1>(x) };
 }
@@ -183,7 +185,7 @@ KFR_FUNCTION T1 square(const T1& x)
     return intrinsics::square(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::square, E1> square(E1&& x)
+KFR_FUNCTION expression_function<fn::square, E1> square(E1&& x)
 {
     return { fn::square(), std::forward<E1>(x) };
 }
@@ -193,7 +195,7 @@ KFR_FUNCTION T1 squarenorm(const T1& x)
     return intrinsics::squarenorm(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::squarenorm, E1> squarenorm(E1&& x)
+KFR_FUNCTION expression_function<fn::squarenorm, E1> squarenorm(E1&& x)
 {
     return { fn::squarenorm(), std::forward<E1>(x) };
 }
@@ -203,7 +205,7 @@ KFR_FUNCTION T1 rawtriangle(const T1& x)
     return intrinsics::rawtriangle(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::rawtriangle, E1> rawtriangle(E1&& x)
+KFR_FUNCTION expression_function<fn::rawtriangle, E1> rawtriangle(E1&& x)
 {
     return { fn::rawtriangle(), std::forward<E1>(x) };
 }
@@ -213,7 +215,7 @@ KFR_FUNCTION T1 triangle(const T1& x)
     return intrinsics::triangle(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::triangle, E1> triangle(E1&& x)
+KFR_FUNCTION expression_function<fn::triangle, E1> triangle(E1&& x)
 {
     return { fn::triangle(), std::forward<E1>(x) };
 }
@@ -223,7 +225,7 @@ KFR_FUNCTION T1 trianglenorm(const T1& x)
     return intrinsics::trianglenorm(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::trianglenorm, E1> trianglenorm(E1&& x)
+KFR_FUNCTION expression_function<fn::trianglenorm, E1> trianglenorm(E1&& x)
 {
     return { fn::trianglenorm(), std::forward<E1>(x) };
 }
@@ -233,7 +235,7 @@ KFR_FUNCTION T1 rawsawtooth(const T1& x)
     return intrinsics::rawsawtooth(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::rawsawtooth, E1> rawsawtooth(E1&& x)
+KFR_FUNCTION expression_function<fn::rawsawtooth, E1> rawsawtooth(E1&& x)
 {
     return { fn::rawsawtooth(), std::forward<E1>(x) };
 }
@@ -243,7 +245,7 @@ KFR_FUNCTION T1 sawtooth(const T1& x)
     return intrinsics::sawtooth(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::sawtooth, E1> sawtooth(E1&& x)
+KFR_FUNCTION expression_function<fn::sawtooth, E1> sawtooth(E1&& x)
 {
     return { fn::sawtooth(), std::forward<E1>(x) };
 }
@@ -253,7 +255,7 @@ KFR_FUNCTION T1 sawtoothnorm(const T1& x)
     return intrinsics::sawtoothnorm(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::sawtoothnorm, E1> sawtoothnorm(E1&& x)
+KFR_FUNCTION expression_function<fn::sawtoothnorm, E1> sawtoothnorm(E1&& x)
 {
     return { fn::sawtoothnorm(), std::forward<E1>(x) };
 }
@@ -263,7 +265,7 @@ KFR_FUNCTION T1 isawtooth(const T1& x)
     return intrinsics::isawtooth(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::isawtooth, E1> isawtooth(E1&& x)
+KFR_FUNCTION expression_function<fn::isawtooth, E1> isawtooth(E1&& x)
 {
     return { fn::isawtooth(), std::forward<E1>(x) };
 }
@@ -273,7 +275,7 @@ KFR_FUNCTION T1 isawtoothnorm(const T1& x)
     return intrinsics::isawtoothnorm(x);
 }
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::isawtoothnorm, E1> isawtoothnorm(E1&& x)
+KFR_FUNCTION expression_function<fn::isawtoothnorm, E1> isawtoothnorm(E1&& x)
 {
     return { fn::isawtoothnorm(), std::forward<E1>(x) };
 }
diff --git a/include/kfr/dsp/sample_rate_conversion.hpp b/include/kfr/dsp/sample_rate_conversion.hpp
@@ -27,6 +27,9 @@
 
 #include "../base/memory.hpp"
 #include "../base/reduce.hpp"
+#include "../base/univector.hpp"
+#include "../math/modzerobessel.hpp"
+#include "../math/sqrt.hpp"
 #include "../simd/impl/function.hpp"
 #include "../simd/vec.hpp"
 #include "window.hpp"
@@ -203,14 +206,16 @@ public:
             }
             else if (input_start >= input_position)
             {
-                output[i] = dotproduct(input.slice(input_start - input_position, depth), tap_ptr);
+                output[i] =
+                    dotproduct(input.slice(input_start - input_position, depth), tap_ptr.truncate(depth));
             }
             else
             {
                 const itype prev_count = input_position - input_start;
-                output[i]              = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr) +
-                            dotproduct(input.slice(0, size_t(depth - prev_count)),
-                                       tap_ptr.slice(size_t(prev_count), size_t(depth - prev_count)));
+                output[i] =
+                    dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr.truncate(prev_count)) +
+                    dotproduct(input.truncate(size_t(depth - prev_count)),
+                               tap_ptr.slice(size_t(prev_count), size_t(depth - prev_count)));
             }
         }
 
@@ -254,28 +259,28 @@ template <size_t factor, size_t offset, typename E>
 struct expression_downsample;
 
 template <typename E>
-struct expression_upsample<2, E> : expression_with_arguments<E>
+struct expression_upsample<2, E> : expression_with_arguments<E>, expression_traits_defaults
 {
     using expression_with_arguments<E>::expression_with_arguments;
-    using value_type = value_type_of<E>;
+    using value_type = expression_value_type<E>;
     using T          = value_type;
 
     KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() * 2; }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N>)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, shape<1> index,
+                                                axis_params<0, N>)
     {
-        const vec<T, N / 2> x = self.argument_first(cinput, index / 2, vec_shape<T, N / 2>());
+        const vec<T, N / 2> x = get_elements(self.first() index / 2, axis_params<0, N / 2>());
         return interleave(x, zerovector(x));
     }
-    KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, 1>)
+    KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, shape<1> index,
+                                                axis_params<0, 1>)
     {
         if (index & 1)
             return 0;
         else
-            return self.argument_first(cinput, index / 2, vec_shape<T, 1>());
+            return get_elements(self.first(), index / 2, axis_params<0, 1>());
     }
 };
 
@@ -283,39 +288,41 @@ template <typename E>
 struct expression_upsample<4, E> : expression_with_arguments<E>
 {
     using expression_with_arguments<E>::expression_with_arguments;
-    using value_type = value_type_of<E>;
+    using value_type = expression_value_type<E>;
     using T          = value_type;
 
     KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() * 4; }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N>) CMT_NOEXCEPT
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_upsample& self, shape<1> index,
+                                                axis_params<0, N>) CMT_NOEXCEPT
     {
-        const vec<T, N / 4> x  = self.argument_first(cinput, index / 4, vec_shape<T, N / 4>());
+        const vec<T, N / 4> x  = self.argument_first(cinput, index / 4, axis_params<0, N / 4>());
         const vec<T, N / 2> xx = interleave(x, zerovector(x));
         return interleave(xx, zerovector(xx));
     }
-    KFR_INTRINSIC friend vec<T, 2> get_elements(const expression_upsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, 2>) CMT_NOEXCEPT
+    KFR_INTRINSIC friend vec<T, 2> get_elements(const expression_upsample& self, shape<1> index,
+                                                axis_params<0, 2>) CMT_NOEXCEPT
     {
         switch (index & 3)
         {
         case 0:
-            return interleave(self.argument_first(cinput, index / 4, vec_shape<T, 1>()), zerovector<T, 1>());
+            return interleave(self.argument_first(cinput, index / 4, axis_params<0, 1>()),
+                              zerovector<T, 1>());
         case 3:
-            return interleave(zerovector<T, 1>(), self.argument_first(cinput, index / 4, vec_shape<T, 1>()));
+            return interleave(zerovector<T, 1>(),
+                              self.argument_first(cinput, index / 4, axis_params<0, 1>()));
         default:
             return 0;
         }
     }
-    KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, 1>) CMT_NOEXCEPT
+    KFR_INTRINSIC friend vec<T, 1> get_elements(const expression_upsample& self, shape<1> index,
+                                                axis_params<0, 1>) CMT_NOEXCEPT
     {
         if (index & 3)
             return 0;
         else
-            return self.argument_first(cinput, index / 4, vec_shape<T, 1>());
+            return self.argument_first(cinput, index / 4, axis_params<0, 1>());
     }
 };
 
@@ -323,16 +330,16 @@ template <typename E, size_t offset>
 struct expression_downsample<2, offset, E> : expression_with_arguments<E>
 {
     using expression_with_arguments<E>::expression_with_arguments;
-    using value_type = value_type_of<E>;
+    using value_type = expression_value_type<E>;
     using T          = value_type;
 
     KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() / 2; }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N>) CMT_NOEXCEPT
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, size_t index,
+                                                axis_params<0, N>) CMT_NOEXCEPT
     {
-        const vec<T, N* 2> x = self.argument_first(cinput, index * 2, vec_shape<T, N * 2>());
+        const vec<T, N* 2> x = self.argument_first(cinput, index * 2, axis_params<0, N * 2>());
         return x.shuffle(csizeseq<N, offset, 2>);
     }
 };
@@ -341,16 +348,16 @@ template <typename E, size_t offset>
 struct expression_downsample<4, offset, E> : expression_with_arguments<E>
 {
     using expression_with_arguments<E>::expression_with_arguments;
-    using value_type = value_type_of<E>;
+    using value_type = expression_value_type<E>;
     using T          = value_type;
 
     KFR_MEM_INTRINSIC size_t size() const CMT_NOEXCEPT { return expression_with_arguments<E>::size() / 4; }
 
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N>) CMT_NOEXCEPT
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_downsample& self, shape<1> index,
+                                                axis_params<0, N>) CMT_NOEXCEPT
     {
-        const vec<T, N* 4> x = self.argument_first(cinput, index * 4, vec_shape<T, N * 4>());
+        const vec<T, N* 4> x = self.argument_first(cinput, index * 4, axis_params<0, N * 4>());
         return x.shuffle(csizeseq<N, offset, 4>);
     }
 };
diff --git a/include/kfr/dsp/special.hpp b/include/kfr/dsp/special.hpp
@@ -40,12 +40,15 @@ inline namespace CMT_ARCH_NAME
 template <typename T = int>
 auto unitimpulse()
 {
-    return lambda<T>([](cinput_t, size_t index, auto x) {
-        if (CMT_UNLIKELY(index == 0))
-            return onoff(x);
-        else
-            return zerovector(x);
-    });
+    return lambda<T>(
+        [](shape<1> index, auto x)
+        {
+            vec_shape<T, decltype(x)::value> sh{};
+            if (CMT_UNLIKELY(index[0] == 0))
+                return onoff(sh);
+            else
+                return zerovector(sh);
+        });
 }
 
 template <typename T = fbase>
diff --git a/include/kfr/dsp/state_holder.hpp b/include/kfr/dsp/state_holder.hpp
@@ -1,41 +0,0 @@
-/** @addtogroup fir
- *  @{
- */
-/**
- * KFR (http://kfrlib.com)
- * Copyright (C) 2016-2022 Fractalium Ltd
- * See LICENSE.txt for details
- */
-#pragma once
-
-#include "../cident.h"
-
-namespace kfr
-{
-inline namespace CMT_ARCH_NAME
-{
-namespace internal
-{
-
-template <typename T, bool stateless>
-struct state_holder
-{
-    state_holder()                    = delete;
-    state_holder(const state_holder&) = default;
-    state_holder(state_holder&&)      = default;
-    constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {}
-    T s;
-};
-
-template <typename T>
-struct state_holder<T, true>
-{
-    state_holder()                    = delete;
-    state_holder(const state_holder&) = default;
-    state_holder(state_holder&&)      = default;
-    constexpr state_holder(const T& state) CMT_NOEXCEPT : s(state) {}
-    const T& s;
-};
-} // namespace internal
-} // namespace CMT_ARCH_NAME
-} // namespace kfr
diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp
@@ -26,8 +26,8 @@
 #pragma once
 
 #include "../base/basic_expressions.hpp"
-#include "../math/abs.hpp"
 #include "../math/log_exp.hpp"
+#include "../simd/abs.hpp"
 #include "../simd/vec.hpp"
 
 namespace kfr
@@ -137,8 +137,8 @@ KFR_FUNCTION flt_type<T1> note_to_hertz(const T1& x)
     return intrinsics::note_to_hertz(x);
 }
 
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::note_to_hertz, E1> note_to_hertz(E1&& x)
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::note_to_hertz, E1> note_to_hertz(E1&& x)
 {
     return { fn::note_to_hertz(), std::forward<E1>(x) };
 }
@@ -149,8 +149,8 @@ KFR_FUNCTION flt_type<T1> hertz_to_note(const T1& x)
     return intrinsics::hertz_to_note(x);
 }
 
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::hertz_to_note, E1> hertz_to_note(E1&& x)
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::hertz_to_note, E1> hertz_to_note(E1&& x)
 {
     return { fn::hertz_to_note(), std::forward<E1>(x) };
 }
@@ -161,8 +161,8 @@ KFR_FUNCTION flt_type<T1> amp_to_dB(const T1& x)
     return intrinsics::amp_to_dB(x);
 }
 
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::amp_to_dB, E1> amp_to_dB(E1&& x)
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::amp_to_dB, E1> amp_to_dB(E1&& x)
 {
     return { fn::amp_to_dB(), std::forward<E1>(x) };
 }
@@ -173,8 +173,8 @@ KFR_FUNCTION flt_type<T1> dB_to_amp(const T1& x)
     return intrinsics::dB_to_amp(x);
 }
 
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::dB_to_amp, E1> dB_to_amp(E1&& x)
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::dB_to_amp, E1> dB_to_amp(E1&& x)
 {
     return { fn::dB_to_amp(), std::forward<E1>(x) };
 }
@@ -185,8 +185,8 @@ KFR_FUNCTION flt_type<T1> power_to_dB(const T1& x)
     return intrinsics::power_to_dB(x);
 }
 
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::power_to_dB, E1> power_to_dB(E1&& x)
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::power_to_dB, E1> power_to_dB(E1&& x)
 {
     return { fn::power_to_dB(), std::forward<E1>(x) };
 }
@@ -197,8 +197,8 @@ KFR_FUNCTION flt_type<T1> dB_to_power(const T1& x)
     return intrinsics::dB_to_power(x);
 }
 
-template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::dB_to_power, E1> dB_to_power(E1&& x)
+template <typename E1, KFR_ACCEPT_EXPRESSIONS(E1)>
+KFR_FUNCTION expression_function<fn::dB_to_power, E1> dB_to_power(E1&& x)
 {
     return { fn::dB_to_power(), std::forward<E1>(x) };
 }
diff --git a/include/kfr/dsp/waveshaper.hpp b/include/kfr/dsp/waveshaper.hpp
@@ -25,8 +25,8 @@
  */
 #pragma once
 
-#include "../math/clamp.hpp"
 #include "../math/hyperbolic.hpp"
+#include "../simd/clamp.hpp"
 #include "../simd/operators.hpp"
 
 namespace kfr
@@ -63,13 +63,13 @@ KFR_FUNCTION flt_type<T1> saturate_II(const T1& x)
 KFR_FN(saturate_II)
 
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::saturate_II, E1> saturate_I(E1&& x)
+KFR_FUNCTION expression_function<fn::saturate_II, E1> saturate_I(E1&& x)
 {
     return { fn::saturate_I(), std::forward<E1>(x) };
 }
 
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_FUNCTION internal::expression_function<fn::saturate_II, E1> saturate_II(E1&& x)
+KFR_FUNCTION expression_function<fn::saturate_II, E1> saturate_II(E1&& x)
 {
     return { fn::saturate_II(), std::forward<E1>(x) };
 }
diff --git a/include/kfr/dsp/weighting.hpp b/include/kfr/dsp/weighting.hpp
@@ -103,7 +103,7 @@ KFR_INTRINSIC T1 aweighting(const T1& x)
 }
 
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_INTRINSIC internal::expression_function<fn::aweighting, E1> aweighting(E1&& x)
+KFR_INTRINSIC expression_function<fn::aweighting, E1> aweighting(E1&& x)
 {
     return { fn::aweighting(), std::forward<E1>(x) };
 }
@@ -115,7 +115,7 @@ KFR_INTRINSIC T1 bweighting(const T1& x)
 }
 
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_INTRINSIC internal::expression_function<fn::bweighting, E1> bweighting(E1&& x)
+KFR_INTRINSIC expression_function<fn::bweighting, E1> bweighting(E1&& x)
 {
     return { fn::bweighting(), std::forward<E1>(x) };
 }
@@ -127,7 +127,7 @@ KFR_INTRINSIC T1 cweighting(const T1& x)
 }
 
 template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>)>
-KFR_INTRINSIC internal::expression_function<fn::cweighting, E1> cweighting(E1&& x)
+KFR_INTRINSIC expression_function<fn::cweighting, E1> cweighting(E1&& x)
 {
     return { fn::cweighting(), std::forward<E1>(x) };
 }
diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp
@@ -51,6 +51,7 @@ enum class window_type
     flattop         = 12,
     gaussian        = 13,
     lanczos         = 14,
+    cosine_np       = 15,
 };
 
 template <window_type type>
@@ -68,324 +69,280 @@ enum class window_symmetry
 inline namespace CMT_ARCH_NAME
 {
 
-namespace internal
+enum class window_metrics
 {
+    metrics_0_1,
+    metrics_m1_1,
+    metrics_mpi_pi,
+    metrics_m1_1_trunc,
+    metrics_m1_1_trunc2,
+};
 
 template <typename T>
-struct window_linspace_0_1 : expression_linspace<T>
+struct window_linspace : expression_linspace<T>
 {
-    window_linspace_0_1(size_t size, window_symmetry symmetry)
-        : expression_linspace<T>(0, 1, size, symmetry == window_symmetry::symmetric)
+    window_linspace(cval_t<window_metrics, window_metrics::metrics_0_1>, size_t size,
+                    window_symmetry symmetry)
+        : expression_linspace<T>{ 0, 1, size, symmetry == window_symmetry::symmetric }
     {
     }
-};
-
-template <typename T>
-struct window_linspace_m1_1 : expression_linspace<T>
-{
-    window_linspace_m1_1(size_t size, window_symmetry symmetry)
-        : expression_linspace<T>(-1, 1, size, symmetry == window_symmetry::symmetric)
+    window_linspace(cval_t<window_metrics, window_metrics::metrics_m1_1>, size_t size,
+                    window_symmetry symmetry)
+        : expression_linspace<T>{ -1, 1, size, symmetry == window_symmetry::symmetric }
     {
     }
-};
-
-template <typename T>
-struct window_linspace_mpi_pi : expression_linspace<T>
-{
-    window_linspace_mpi_pi(size_t size, window_symmetry symmetry)
-        : expression_linspace<T>(-c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric)
+    window_linspace(cval_t<window_metrics, window_metrics::metrics_mpi_pi>, size_t size,
+                    window_symmetry symmetry)
+        : expression_linspace<T>{ -c_pi<T>, +c_pi<T>, size, symmetry == window_symmetry::symmetric }
     {
     }
-};
-
-template <typename T>
-struct window_linspace_m1_1_trunc : expression_linspace<T>
-{
-    window_linspace_m1_1_trunc(size_t size, window_symmetry symmetry)
-        : expression_linspace<T>(-T(size - 1) / size, T(size - 1) / size, size,
-                                 symmetry == window_symmetry::symmetric)
+    window_linspace(cval_t<window_metrics, window_metrics::metrics_m1_1_trunc>, size_t size,
+                    window_symmetry symmetry)
+        : expression_linspace<T>{ symmetric_linspace, calc_p(size, symmetry == window_symmetry::symmetric),
+                                  size, symmetry == window_symmetry::symmetric }
+    {
+    }
+    window_linspace(cval_t<window_metrics, window_metrics::metrics_m1_1_trunc2>, size_t size,
+                    window_symmetry symmetry)
+        : expression_linspace<T>{ symmetric_linspace, calc_p2(size, symmetry == window_symmetry::symmetric),
+                                  size, symmetry == window_symmetry::symmetric }
     {
     }
+    static T calc_p(size_t size, bool sym)
+    {
+        if (!sym)
+            ++size;
+        return T(size - 1) / (size);
+    }
+    static T calc_p2(size_t size, bool sym)
+    {
+        if (!sym)
+            ++size;
+        return (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size);
+    }
 };
 
 template <typename T>
-struct window_linspace_m1_1_trunc2 : expression_linspace<T>
+struct expression_window : expression_traits_defaults
 {
-    window_linspace_m1_1_trunc2(size_t size, window_symmetry symmetry)
-        : expression_linspace<T>(symmetric_linspace,
-                                 (size & 1) ? T(size - 1) / T(size + 1) : T(size - 1) / (size), size,
-                                 symmetry == window_symmetry::symmetric)
+    using value_type             = T;
+    constexpr static size_t dims = 1;
+    constexpr static shape<dims> shapeof(const expression_window<T>& self)
     {
+        return shape<dims>(self.m_size);
     }
+    constexpr static shape<dims> shapeof() { return shape<1>(undefined_size); }
+
+    constexpr expression_window(size_t size) : m_size(size) {}
+
+    size_t m_size;
+    size_t size() const { return m_size; }
 };
 
 template <typename T>
-struct expression_rectangular : input_expression
+struct expression_rectangular : expression_window<T>
 {
-    using value_type = T;
-
-    expression_rectangular(size_t size, T = T(), window_symmetry = window_symmetry::symmetric) : m_size(size)
+    expression_rectangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
+        : expression_window<T>(size)
     {
     }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_rectangular& self, cinput_t, size_t index,
-                                                vec_shape<T, N>)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_rectangular& self, shape<1> index,
+                                                axis_params<0, N>)
     {
         using TI           = utype<T>;
-        const vec<TI, N> i = enumerate(vec_shape<TI, N>()) + static_cast<TI>(index);
+        const vec<TI, N> i = enumerate(vec_shape<TI, N>()) + static_cast<TI>(index.front());
         return select(i < static_cast<TI>(self.m_size), T(1), T(0));
     }
-    size_t size() const { return m_size; }
-
-private:
-    size_t m_size;
 };
 
-template <typename T>
-struct expression_triangular : input_expression
+template <typename T, window_metrics metrics>
+struct expression_window_with_metrics : expression_window<T>
 {
-    using value_type = T;
-
-    expression_triangular(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
-    {
-    }
-    template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_triangular& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
+    expression_window_with_metrics(size_t size, T arg = T(),
+                                   window_symmetry symmetry = window_symmetry::symmetric)
+        : expression_window<T>(size), linspace(cval<window_metrics, metrics>, size, symmetry), arg(arg)
     {
-        return 1 - abs(get_elements(self.linspace, cinput, index, y));
     }
-    size_t size() const { return m_size; }
 
-private:
-    window_linspace_m1_1_trunc2<T> linspace;
-    size_t m_size;
+protected:
+    window_linspace<T> linspace;
+    T arg;
 };
 
 template <typename T>
-struct expression_bartlett : input_expression
+struct expression_triangular : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc2>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T,
+                                         window_metrics::metrics_m1_1_trunc2>::expression_window_with_metrics;
 
-    expression_bartlett(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
+    template <size_t N>
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_triangular& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
+        return 1 - abs(get_elements(self.linspace, index, sh));
     }
+};
+
+template <typename T>
+struct expression_bartlett : expression_window_with_metrics<T, window_metrics::metrics_m1_1>
+{
+    using expression_window_with_metrics<T, window_metrics::metrics_m1_1>::expression_window_with_metrics;
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return 1 - abs(get_elements(self.linspace, cinput, index, y));
+        return 1 - abs(get_elements(self.linspace, index, sh));
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_m1_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_cosine : input_expression
+struct expression_cosine : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics;
 
-    expression_cosine(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
+    template <size_t N>
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_cosine& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
+        return sin(c_pi<T> * (get_elements(self.linspace, index, sh)));
     }
+};
+template <typename T>
+struct expression_cosine_np : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc>
+{
+    using expression_window_with_metrics<T,
+                                         window_metrics::metrics_m1_1_trunc>::expression_window_with_metrics;
+
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_cosine& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_cosine_np& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return sin(c_pi<T> * get_elements(self.linspace, cinput, index, y));
+        return sin(c_pi<T, 1, 2> * (1 + get_elements(self.linspace, index, sh)));
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_0_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_hann : input_expression
+struct expression_hann : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics;
 
-    expression_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
-    {
-    }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hann& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hann& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return T(0.5) * (T(1) - cos(c_pi<T, 2> * get_elements(self.linspace, cinput, index, y)));
+        return T(0.5) * (T(1) - cos(c_pi<T, 2> * get_elements(self.linspace, index, sh)));
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_0_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_bartlett_hann : input_expression
+struct expression_bartlett_hann : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics;
 
-    expression_bartlett_hann(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
-    {
-    }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett_hann& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bartlett_hann& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const vec<T, N> xx = get_elements(self.linspace, cinput, index, y);
+        const vec<T, N> xx = get_elements(self.linspace, index, sh);
         return T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)));
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_0_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_hamming : input_expression
+struct expression_hamming : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
-
     expression_hamming(size_t size, T alpha = 0.54, window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), alpha(alpha), m_size(size)
+        : expression_window_with_metrics<T, window_metrics::metrics_0_1>(size, alpha, symmetry)
     {
     }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hamming& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_hamming& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return self.alpha -
-               (T(1.0) - self.alpha) * (cos(c_pi<T, 2> * get_elements(self.linspace, cinput, index, y)));
+        return self.arg - (T(1.0) - self.arg) * (cos(c_pi<T, 2> * get_elements(self.linspace, index, sh)));
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_0_1<T> linspace;
-    T alpha;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_bohman : input_expression
+struct expression_bohman : expression_window_with_metrics<T, window_metrics::metrics_m1_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_m1_1>::expression_window_with_metrics;
 
-    expression_bohman(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
-    {
-    }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bohman& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_bohman& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const vec<T, N> n = abs(get_elements(self.linspace, cinput, index, y));
+        const vec<T, N> n = abs(get_elements(self.linspace, index, sh));
         return (T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n);
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_m1_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_blackman : input_expression
+struct expression_blackman : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics;
 
     expression_blackman(size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5), m_size(size)
+        : expression_window_with_metrics<T, window_metrics::metrics_0_1>(size, alpha, symmetry),
+          a0((1 - alpha) * 0.5), a1(0.5), a2(alpha * 0.5)
     {
     }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const vec<T, N> n = get_elements(self.linspace, cinput, index, y);
+        const vec<T, N> n = get_elements(self.linspace, index, sh);
         return self.a0 - self.a1 * cos(c_pi<T, 2> * n) + self.a2 * cos(c_pi<T, 4> * n);
     }
-    size_t size() const { return m_size; }
 
 private:
-    window_linspace_0_1<T> linspace;
     T a0, a1, a2;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_blackman_harris : input_expression
+struct expression_blackman_harris : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics;
 
-    expression_blackman_harris(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
-    {
-    }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman_harris& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_blackman_harris& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const vec<T, N> n = get_elements(self.linspace, cinput, index, y) * c_pi<T, 2>;
+        const vec<T, N> n = get_elements(self.linspace, index, sh) * c_pi<T, 2>;
         return T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n);
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_0_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_kaiser : input_expression
+struct expression_kaiser : expression_window_with_metrics<T, window_metrics::metrics_m1_1>
 {
-    using value_type = T;
-
     expression_kaiser(size_t size, T beta = 0.5, window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), beta(beta), m(reciprocal(modzerobessel(make_vector(beta))[0])),
-          m_size(size)
+        : expression_window_with_metrics<T, window_metrics::metrics_m1_1>(size, beta, symmetry),
+          m(reciprocal(modzerobessel(make_vector(beta))[0]))
     {
     }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_kaiser& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_kaiser& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return modzerobessel(self.beta * sqrt(1 - sqr(get_elements(self.linspace, cinput, index, y)))) *
-               self.m;
+        return modzerobessel(self.arg * sqrt(1 - sqr(get_elements(self.linspace, index, sh)))) * self.m;
     }
-    size_t size() const { return m_size; }
 
 private:
-    window_linspace_m1_1<T> linspace;
-    T beta;
     T m;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_flattop : input_expression
+struct expression_flattop : expression_window_with_metrics<T, window_metrics::metrics_0_1>
 {
-    using value_type = T;
+    using expression_window_with_metrics<T, window_metrics::metrics_0_1>::expression_window_with_metrics;
 
-    expression_flattop(size_t size, T = T(), window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), m_size(size)
-    {
-    }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_flattop& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_flattop& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        const vec<T, N> n = get_elements(self.linspace, cinput, index, y) * c_pi<T, 2>;
+        const vec<T, N> n = get_elements(self.linspace, index, sh) * c_pi<T, 2>;
         constexpr T a0    = 0.21557895;
         constexpr T a1    = 0.41663158;
         constexpr T a2    = 0.277263158;
@@ -393,58 +350,34 @@ struct expression_flattop : input_expression
         constexpr T a4    = 0.006947368;
         return a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n);
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_0_1<T> linspace;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_gaussian : input_expression
+struct expression_gaussian : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc>
 {
-    using value_type = T;
-
+    /// alpha = std / 2N
     expression_gaussian(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), alpha(alpha), m_size(size)
+        : expression_window_with_metrics<T, window_metrics::metrics_m1_1_trunc>(size, alpha, symmetry)
     {
     }
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_gaussian& self, cinput_t cinput,
-                                                size_t index, vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_gaussian& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return exp(T(-0.5) * sqr(self.alpha * get_elements(self.linspace, cinput, index, y)));
+        return exp(T(-0.5) * sqr(self.arg * get_elements(self.linspace, index, sh)));
     }
-
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_m1_1_trunc<T> linspace;
-    T alpha;
-    size_t m_size;
 };
 
 template <typename T>
-struct expression_lanczos : input_expression
+struct expression_lanczos : expression_window_with_metrics<T, window_metrics::metrics_mpi_pi>
 {
-    using value_type = T;
-
-    expression_lanczos(size_t size, T alpha = 2.5, window_symmetry symmetry = window_symmetry::symmetric)
-        : linspace(size, symmetry), alpha(alpha), m_size(size)
-    {
-    }
+    using expression_window_with_metrics<T, window_metrics::metrics_mpi_pi>::expression_window_with_metrics;
     template <size_t N>
-    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_lanczos& self, cinput_t cinput, size_t index,
-                                                vec_shape<T, N> y)
+    KFR_INTRINSIC friend vec<T, N> get_elements(const expression_lanczos& self, shape<1> index,
+                                                axis_params<0, N> sh)
     {
-        return sinc(get_elements(self.linspace, cinput, index, y));
+        return sinc(get_elements(self.linspace, index, sh));
     }
-    size_t size() const { return m_size; }
-
-private:
-    window_linspace_mpi_pi<T> linspace;
-    T alpha;
-    size_t m_size;
 };
 
 template <window_type>
@@ -471,62 +404,70 @@ KFR_WINDOW_BY_TYPE(kaiser)
 KFR_WINDOW_BY_TYPE(flattop)
 KFR_WINDOW_BY_TYPE(gaussian)
 KFR_WINDOW_BY_TYPE(lanczos)
+KFR_WINDOW_BY_TYPE(cosine_np)
 #undef KFR_WINDOW_BY_TYPE
-} // namespace internal
 
 /**
  * @brief Returns template expression that generates Rrectangular window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_rectangular<T> window_rectangular(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_rectangular<T> window_rectangular(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_rectangular<T>(size, T());
+    return expression_rectangular<T>(size, T());
 }
 
 /**
  * @brief Returns template expression that generates Triangular window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_triangular<T>(size);
+    return expression_triangular<T>(size);
 }
 
 /**
  * @brief Returns template expression that generates Bartlett window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_bartlett<T>(size);
+    return expression_bartlett<T>(size);
 }
 
 /**
  * @brief Returns template expression that generates Cosine window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>())
+{
+    return expression_cosine<T>(size);
+}
+
+/**
+ * @brief Returns template expression that generates Cosine window (numpy compatible) of length @c size
+ */
+template <typename T = fbase>
+KFR_FUNCTION expression_cosine_np<T> window_cosine_np(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_cosine<T>(size);
+    return expression_cosine_np<T>(size);
 }
 
 /**
  * @brief Returns template expression that generates Hann window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_hann<T>(size);
+    return expression_hann<T>(size);
 }
 
 /**
  * @brief Returns template expression that generates Bartlett-Hann window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_bartlett_hann<T> window_bartlett_hann(size_t size,
-                                                                        ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_bartlett_hann<T> window_bartlett_hann(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_bartlett_hann<T>(size);
+    return expression_bartlett_hann<T>(size);
 }
 
 /**
@@ -534,19 +475,19 @@ KFR_FUNCTION internal::expression_bartlett_hann<T> window_bartlett_hann(size_t s
  * alpha
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_hamming<T> window_hamming(size_t size, identity<T> alpha = 0.54,
-                                                            ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_hamming<T> window_hamming(size_t size, identity<T> alpha = 0.54,
+                                                  ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_hamming<T>(size, alpha);
+    return expression_hamming<T>(size, alpha);
 }
 
 /**
  * @brief Returns template expression that generates Bohman window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_bohman<T>(size);
+    return expression_bohman<T>(size);
 }
 
 /**
@@ -554,21 +495,21 @@ KFR_FUNCTION internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T
  * alpha
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_blackman<T> window_blackman(
-    size_t size, identity<T> alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric,
-    ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_blackman<T> window_blackman(size_t size, identity<T> alpha = 0.16,
+                                                    window_symmetry symmetry = window_symmetry::symmetric,
+                                                    ctype_t<T>               = ctype_t<T>())
 {
-    return internal::expression_blackman<T>(size, alpha, symmetry);
+    return expression_blackman<T>(size, alpha, symmetry);
 }
 
 /**
  * @brief Returns template expression that generates Blackman-Harris window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_blackman_harris<T> window_blackman_harris(
+KFR_FUNCTION expression_blackman_harris<T> window_blackman_harris(
     size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_blackman_harris<T>(size, T(), symmetry);
+    return expression_blackman_harris<T>(size, T(), symmetry);
 }
 
 /**
@@ -576,19 +517,19 @@ KFR_FUNCTION internal::expression_blackman_harris<T> window_blackman_harris(
  * beta
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_kaiser<T> window_kaiser(size_t size, identity<T> beta = T(0.5),
-                                                          ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_kaiser<T> window_kaiser(size_t size, identity<T> beta = T(0.5),
+                                                ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_kaiser<T>(size, beta);
+    return expression_kaiser<T>(size, beta);
 }
 
 /**
  * @brief Returns template expression that generates Flat top window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_flattop<T>(size);
+    return expression_flattop<T>(size);
 }
 
 /**
@@ -596,23 +537,23 @@ KFR_FUNCTION internal::expression_flattop<T> window_flattop(size_t size, ctype_t
  * alpha
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_gaussian<T> window_gaussian(size_t size, identity<T> alpha = 2.5,
-                                                              ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_gaussian<T> window_gaussian(size_t size, identity<T> alpha = 2.5,
+                                                    ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_gaussian<T>(size, alpha);
+    return expression_gaussian<T>(size, alpha);
 }
 
 /**
  * @brief Returns template expression that generates Lanczos window of length @c size
  */
 template <typename T = fbase>
-KFR_FUNCTION internal::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>())
+KFR_FUNCTION expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>())
 {
-    return internal::expression_lanczos<T>(size);
+    return expression_lanczos<T>(size);
 }
 
 template <typename T           = fbase, window_type type,
-          typename window_expr = typename internal::window_by_type<type>::template type<T>>
+          typename window_expr = typename window_by_type<type>::template type<T>>
 CMT_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, identity<T> win_param = T(),
                                 window_symmetry symmetry = window_symmetry::symmetric,
                                 ctype_t<T>               = ctype_t<T>())
@@ -629,12 +570,12 @@ CMT_NOINLINE expression_pointer<T> window(size_t size, window_type type, identit
         cvals_t<window_type, window_type::rectangular, window_type::triangular, window_type::bartlett,
                 window_type::cosine, window_type::hann, window_type::bartlett_hann, window_type::hamming,
                 window_type::bohman, window_type::blackman, window_type::blackman_harris, window_type::kaiser,
-                window_type::flattop, window_type::gaussian, window_type::lanczos>(),
+                window_type::flattop, window_type::gaussian, window_type::lanczos, window_type::cosine_np>(),
         type,
-        [size, win_param, symmetry](auto win) {
+        [size, win_param, symmetry](auto win)
+        {
             constexpr window_type window = val_of(decltype(win)());
-            return to_pointer(
-                typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry));
+            return to_pointer(typename window_by_type<window>::template type<T>(size, win_param, symmetry));
         },
         fn_generic::returns<expression_pointer<T>>());
 }
diff --git a/include/kfr/graphics.hpp b/include/kfr/graphics.hpp
@@ -25,4 +25,4 @@
 #include "math.hpp"
 
 #include "graphics/color.hpp"
-#include "graphics/geometry.hpp"
-\ No newline at end of file
+#include "graphics/geometry.hpp"
diff --git a/include/kfr/graphics/color.hpp b/include/kfr/graphics/color.hpp
@@ -25,7 +25,7 @@
  */
 #pragma once
 
-#include "scaled.hpp"
+#include "impl/scaled.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/graphics/geometry.hpp b/include/kfr/graphics/geometry.hpp
@@ -25,7 +25,7 @@
  */
 #pragma once
 
-#include "scaled.hpp"
+#include "impl/scaled.hpp"
 
 namespace kfr
 {
@@ -64,7 +64,8 @@ struct point
     constexpr point ceil() const { return kfr::ceil(v); }
     constexpr point trunc() const { return kfr::trunc(v); }
 
-    union {
+    union
+    {
         struct
         {
             T x;
@@ -114,7 +115,8 @@ struct size
 
     constexpr bool operator==(const size& c) const { return all(v == c.v); }
     constexpr bool operator!=(const size& c) const { return !operator==(c); }
-    union {
+    union
+    {
         struct
         {
             T x;
@@ -159,7 +161,8 @@ struct border
 
     constexpr bool operator==(const border& c) const { return all(v == c.v); }
     constexpr bool operator!=(const border& c) const { return !(operator==(c)); }
-    union {
+    union
+    {
         struct
         {
             T x1;
@@ -187,7 +190,8 @@ struct vector4
     constexpr bool operator==(const vector4& c) const { return all(v == c.v); }
     constexpr bool operator!=(const vector4& c) const { return !operator==(c); }
 
-    union {
+    union
+    {
         struct
         {
             T x;
@@ -378,7 +382,8 @@ struct rectangle
     rectangle& operator&=(const rectangle& c) { return *this = *this & c; }
     rectangle& operator|=(const rectangle& c) { return *this = *this | c; }
 
-    union {
+    union
+    {
         struct
         {
             T x1;
@@ -422,7 +427,8 @@ CMT_INTRINSIC size<T> max(const size<T>& a, const size<T>& b)
 template <typename T>
 struct matrix
 {
-    union {
+    union
+    {
         vec<T, 6> v;
         struct
         {
diff --git a/include/kfr/graphics/impl/scaled.hpp b/include/kfr/graphics/impl/scaled.hpp
@@ -0,0 +1,58 @@
+/** @addtogroup basic_math
+ *  @{
+ */
+/*
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
+  This file is part of KFR
+
+  KFR is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  KFR is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with KFR.
+
+  If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
+  Buying a commercial license is mandatory as soon as you develop commercial activities without
+  disclosing the source code of your own applications.
+  See https://www.kfrlib.com for details.
+ */
+#pragma once
+
+#include "../../cometa/string.hpp"
+#include "../../math.hpp"
+
+namespace kfr
+{
+
+template <typename Tout, int Mout, int Min, typename Tin, size_t N,
+          KFR_ENABLE_IF(Mout != Min &&
+                        (std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))>
+KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value)
+{
+    using Tcommon = common_type<Tin, Tout>;
+    return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min);
+}
+
+template <typename Tout, int Mout, int Min, typename Tin, size_t N,
+          KFR_ENABLE_IF(Mout != Min &&
+                        !(std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))>
+KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value)
+{
+    using Tcommon =
+        findinttype<std::numeric_limits<Tin>::min() * Mout, std::numeric_limits<Tin>::max() * Mout>;
+    return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min);
+}
+
+template <typename Tout, int Mout, int Min, typename Tin, size_t N, KFR_ENABLE_IF(Mout == Min)>
+KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value)
+{
+    return static_cast<vec<Tout, N>>(value);
+}
+} // namespace kfr
diff --git a/include/kfr/graphics/scaled.hpp b/include/kfr/graphics/scaled.hpp
@@ -1,58 +0,0 @@
-/** @addtogroup basic_math
- *  @{
- */
-/*
-  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
-  This file is part of KFR
-
-  KFR is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 2 of the License, or
-  (at your option) any later version.
-
-  KFR is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with KFR.
-
-  If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
-  Buying a commercial license is mandatory as soon as you develop commercial activities without
-  disclosing the source code of your own applications.
-  See https://www.kfrlib.com for details.
- */
-#pragma once
-
-#include "../cometa/string.hpp"
-#include "../math.hpp"
-
-namespace kfr
-{
-
-template <typename Tout, int Mout, int Min, typename Tin, size_t N,
-          KFR_ENABLE_IF(Mout != Min &&
-                        (std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))>
-KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value)
-{
-    using Tcommon = common_type<Tin, Tout>;
-    return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min);
-}
-
-template <typename Tout, int Mout, int Min, typename Tin, size_t N,
-          KFR_ENABLE_IF(Mout != Min &&
-                        !(std::is_floating_point<Tin>::value || std::is_floating_point<Tout>::value))>
-KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value)
-{
-    using Tcommon =
-        findinttype<std::numeric_limits<Tin>::min() * Mout, std::numeric_limits<Tin>::max() * Mout>;
-    return static_cast<vec<Tout, N>>(static_cast<vec<Tcommon, N>>(value) * Mout / Min);
-}
-
-template <typename Tout, int Mout, int Min, typename Tin, size_t N, KFR_ENABLE_IF(Mout == Min)>
-KFR_INTRINSIC vec<Tout, N> convert_scaled(const vec<Tin, N>& value)
-{
-    return static_cast<vec<Tout, N>>(value);
-}
-} // namespace kfr
-\ No newline at end of file
diff --git a/include/kfr/kfr.h b/include/kfr/kfr.h
@@ -72,14 +72,15 @@ constexpr inline const char version_full[] = KFR_VERSION_FULL;
 #ifdef KFR_FUNCTION_IS_INTRINSIC
 #define KFR_FUNCTION CMT_INTRINSIC
 #else
-#define KFR_FUNCTION 
+#define KFR_FUNCTION
 #endif
 #ifdef CMT_NATIVE_F64
 #define KFR_NATIVE_F64 CMT_NATIVE_F64
 #endif
 
 #if defined CMT_ARCH_ARM && !defined CMT_ARCH_NEON && !defined CMT_FORCE_GENERIC_CPU
-#error "ARM builds require NEON support. Add -march=native for native build or skip the check with CMT_FORCE_GENERIC_CPU=1"
+#error                                                                                                       \
+    "ARM builds require NEON support. Add -march=native for native build or skip the check with CMT_FORCE_GENERIC_CPU=1"
 #endif
 
 #if defined CMT_ARCH_ARM && !defined CMT_COMPILER_CLANG && !defined CMT_FORCE_NON_CLANG
diff --git a/include/kfr/math/complex_math.hpp b/include/kfr/math/complex_math.hpp
@@ -25,13 +25,13 @@
  */
 #pragma once
 
-#include "../simd/complex.hpp"
 #include "../simd/abs.hpp"
+#include "../simd/complex.hpp"
+#include "../simd/min_max.hpp"
+#include "../simd/select.hpp"
 #include "atan.hpp"
 #include "hyperbolic.hpp"
 #include "log_exp.hpp"
-#include "../simd/min_max.hpp"
-#include "../simd/select.hpp"
 #include "sin_cos.hpp"
 #include "sqrt.hpp"
 
diff --git a/include/kfr/math/impl/atan.hpp b/include/kfr/math/impl/atan.hpp
@@ -22,11 +22,11 @@
  */
 #pragma once
 #include "../../simd/abs.hpp"
-#include "../../simd/select.hpp"
-#include "../sin_cos.hpp"
 #include "../../simd/constants.hpp"
 #include "../../simd/impl/function.hpp"
 #include "../../simd/operators.hpp"
+#include "../../simd/select.hpp"
+#include "../sin_cos.hpp"
 
 namespace kfr
 {
@@ -59,7 +59,7 @@ KFR_INTRINSIC vec<f32, N> atan2k(const vec<f32, N>& yy, const vec<f32, N>& xx)
     u = fmadd(u, t, 0.199926957488059997558594f);
     u = fmadd(u, t, -0.333331018686294555664062f);
     t = u * t * s + s;
-    t = innercast<f32>(q) * 1.5707963267948966192313216916398f + t;
+    t = broadcastto<f32>(q) * 1.5707963267948966192313216916398f + t;
     return t;
 }
 
@@ -98,7 +98,7 @@ KFR_INTRINSIC vec<f64, N> atan2k(const vec<f64, N>& yy, const vec<f64, N>& xx)
     u = fmadd(u, t, 0.199999999996591265594148);
     u = fmadd(u, t, -0.333333333333311110369124);
     t = u * t * s + s;
-    t = innercast<f64>(q) * 1.5707963267948966192313216916398 + t;
+    t = broadcastto<f64>(q) * 1.5707963267948966192313216916398 + t;
     return t;
 }
 
diff --git a/include/kfr/math/impl/gamma.hpp b/include/kfr/math/impl/gamma.hpp
@@ -50,7 +50,7 @@ KFR_INTRINSIC vec<T, N> gamma(const vec<T, N>& z)
     vec<T, N> accm         = gamma_precalc<T>[0];
     CMT_LOOP_UNROLL
     for (size_t k = 1; k < Count; k++)
-        accm += gamma_precalc<T>[k] / (z + innercast<utype<T>>(k));
+        accm += gamma_precalc<T>[k] / (z + broadcastto<utype<T>>(k));
     accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5);
     return accm / z;
 }
diff --git a/include/kfr/math/impl/hyperbolic.hpp b/include/kfr/math/impl/hyperbolic.hpp
@@ -23,12 +23,12 @@
 #pragma once
 
 #include "../../simd/abs.hpp"
-#include "../log_exp.hpp"
-#include "../../simd/min_max.hpp"
-#include "../../simd/select.hpp"
 #include "../../simd/constants.hpp"
 #include "../../simd/impl/function.hpp"
+#include "../../simd/min_max.hpp"
 #include "../../simd/operators.hpp"
+#include "../../simd/select.hpp"
+#include "../log_exp.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/math/impl/log_exp.hpp b/include/kfr/math/impl/log_exp.hpp
@@ -24,12 +24,12 @@
 
 #include "../../simd/abs.hpp"
 #include "../../simd/clamp.hpp"
-#include "../../simd/min_max.hpp"
-#include "../../simd/round.hpp"
-#include "../../simd/select.hpp"
 #include "../../simd/constants.hpp"
 #include "../../simd/impl/function.hpp"
+#include "../../simd/min_max.hpp"
 #include "../../simd/operators.hpp"
+#include "../../simd/round.hpp"
+#include "../../simd/select.hpp"
 #include "../../simd/shuffle.hpp"
 
 namespace kfr
@@ -65,8 +65,8 @@ KFR_INTRINSIC vec<f32, N> vldexpk(const vec<f32, N>& x, const vec<i32, N>& q)
     m                    = (((m + q) >> 6) - m) << 4;
     const vec<i32, N> qq = q - (m << 2);
     m                    = clamp(m + 0x7f, vec<i32, N>(0xff));
-    vec<f32, N> u        = pow4(bitcast<f32>(innercast<i32>(m) << 23));
-    return x * u * bitcast<f32>((innercast<i32>(qq + 0x7f)) << 23);
+    vec<f32, N> u        = pow4(bitcast<f32>(broadcastto<i32>(m) << 23));
+    return x * u * bitcast<f32>((broadcastto<i32>(qq + 0x7f)) << 23);
 }
 
 template <size_t N>
@@ -76,8 +76,8 @@ KFR_INTRINSIC vec<f64, N> vldexpk(const vec<f64, N>& x, const vec<i64, N>& q)
     m                    = (((m + q) >> 9) - m) << 7;
     const vec<i64, N> qq = q - (m << 2);
     m                    = clamp(m + 0x3ff, i64(0x7ff));
-    vec<f64, N> u        = pow4(bitcast<f64>(innercast<i64>(m) << 52));
-    return x * u * bitcast<f64>((innercast<i64>(qq + 0x3ff)) << 52);
+    vec<f64, N> u        = pow4(bitcast<f64>(broadcastto<i64>(m) << 52));
+    return x * u * bitcast<f64>((broadcastto<i64>(qq + 0x3ff)) << 52);
 }
 
 template <typename T, size_t N>
@@ -98,12 +98,12 @@ KFR_INTRINSIC vec<f32, N> log(const vec<f32, N>& d)
     vec<f32, N> sp = select(d < 0, constants<f32>::qnan, constants<f32>::neginfinity);
 
     vec<f32, N> t;
-    t             = fmadd(0.2371599674224853515625f, x2, 0.285279005765914916992188f);
-    t             = fmadd(t, x2, 0.400005519390106201171875f);
-    t             = fmadd(t, x2, 0.666666567325592041015625f);
-    t             = fmadd(t, x2, 2.0f);
+    t = fmadd(0.2371599674224853515625f, x2, 0.285279005765914916992188f);
+    t = fmadd(t, x2, 0.400005519390106201171875f);
+    t = fmadd(t, x2, 0.666666567325592041015625f);
+    t = fmadd(t, x2, 2.0f);
 
-    x = x * t + c_log_2<f32> * innercast<f32>(e);
+    x = x * t + c_log_2<f32> * broadcastto<f32>(e);
     x = select(d > 0, x, sp);
 
     return x;
@@ -120,16 +120,16 @@ KFR_INTRINSIC vec<f64, N> log(const vec<f64, N>& d)
 
     vec<f64, N> sp = select(d < 0, constants<f64>::qnan, constants<f64>::neginfinity);
 
-    vec<f64, N> t; 
-    t             = fmadd(0.148197055177935105296783, x2, 0.153108178020442575739679);
-    t             = fmadd(t, x2, 0.181837339521549679055568);
-    t             = fmadd(t, x2, 0.22222194152736701733275);
-    t             = fmadd(t, x2, 0.285714288030134544449368);
-    t             = fmadd(t, x2, 0.399999999989941956712869);
-    t             = fmadd(t, x2, 0.666666666666685503450651);
-    t             = fmadd(t, x2, 2);
+    vec<f64, N> t;
+    t = fmadd(0.148197055177935105296783, x2, 0.153108178020442575739679);
+    t = fmadd(t, x2, 0.181837339521549679055568);
+    t = fmadd(t, x2, 0.22222194152736701733275);
+    t = fmadd(t, x2, 0.285714288030134544449368);
+    t = fmadd(t, x2, 0.399999999989941956712869);
+    t = fmadd(t, x2, 0.666666666666685503450651);
+    t = fmadd(t, x2, 2);
 
-    x = x * t + constants<f64>::log_2 * innercast<f64>(e);
+    x = x * t + constants<f64>::log_2 * broadcastto<f64>(e);
     x = select(d > 0, x, sp);
 
     return x;
@@ -138,12 +138,12 @@ KFR_INTRINSIC vec<f64, N> log(const vec<f64, N>& d)
 template <typename T, size_t N, typename Tout = flt_type<T>>
 KFR_INTRINSIC vec<Tout, N> log2(const vec<T, N>& x)
 {
-    return log(innercast<Tout>(x)) * constants<Tout>::recip_log_2;
+    return log(broadcastto<Tout>(x)) * constants<Tout>::recip_log_2;
 }
 template <typename T, size_t N, typename Tout = flt_type<T>>
 KFR_INTRINSIC vec<Tout, N> log10(const vec<T, N>& x)
 {
-    return log(innercast<Tout>(x)) * constants<Tout>::recip_log_10;
+    return log(broadcastto<Tout>(x)) * constants<Tout>::recip_log_10;
 }
 
 template <size_t N>
@@ -152,11 +152,11 @@ KFR_INTRINSIC vec<f32, N> exp(const vec<f32, N>& d)
     const f32 ln2_part1 = 0.6931457519f;
     const f32 ln2_part2 = 1.4286067653e-6f;
 
-    vec<i32, N> q = innercast<i32>(floor(d * constants<f32>::recip_log_2));
+    vec<i32, N> q = broadcastto<i32>(floor(d * constants<f32>::recip_log_2));
     vec<f32, N> s, u;
 
-    s = fmadd(innercast<f32>(q), -ln2_part1, d);
-    s = fmadd(innercast<f32>(q), -ln2_part2, s);
+    s = fmadd(broadcastto<f32>(q), -ln2_part1, d);
+    s = fmadd(broadcastto<f32>(q), -ln2_part2, s);
 
     const f32 c2 = 0.4999999105930328369140625f;
     const f32 c3 = 0.166668415069580078125f;
@@ -185,11 +185,11 @@ KFR_INTRINSIC vec<f64, N> exp(const vec<f64, N>& d)
     const f64 ln2_part1 = 0.69314717501401901245;
     const f64 ln2_part2 = 5.545926273775592108e-009;
 
-    vec<i64, N> q = innercast<i64>(floor(d * constants<f64>::recip_log_2));
+    vec<i64, N> q = broadcastto<i64>(floor(d * constants<f64>::recip_log_2));
     vec<f64, N> s, u;
 
-    s = fmadd(innercast<f64>(q), -ln2_part1, d);
-    s = fmadd(innercast<f64>(q), -ln2_part2, s);
+    s = fmadd(broadcastto<f64>(q), -ln2_part1, d);
+    s = fmadd(broadcastto<f64>(q), -ln2_part2, s);
 
     const f64 c2  = 0.499999999999994948485237955537741072475910186767578;
     const f64 c3  = 0.166666666667024204739888659787538927048444747924805;
@@ -235,7 +235,7 @@ KFR_INTRINSIC vec<T, N> pow(const vec<T, N>& a, const vec<T, N>& b)
 {
     const vec<T, N> t       = exp(b * log(abs(a)));
     const mask<T, N> isint  = floor(b) == b;
-    const mask<T, N> iseven = (innercast<itype<T>>(b) & 1) == 0;
+    const mask<T, N> iseven = (broadcastto<itype<T>>(b) & 1) == 0;
     return select(
         a > T(), t,
         select(a == T(), T(), select(isint, select(iseven, t, -t), broadcast<N>(constants<T>::qnan))));
@@ -256,7 +256,7 @@ KFR_INTRINSIC vec<T, N> cbrt(const vec<T, N>& x)
 template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>), typename Tout = flt_type<T>>
 KFR_INTRINSIC vec<Tout, N> cbrt(const vec<T, N>& x)
 {
-    return cbrt(innercast<Tout>(x));
+    return cbrt(broadcastto<Tout>(x));
 }
 
 KFR_HANDLE_SCALAR_1_T(exp, flt_type<T>)
diff --git a/include/kfr/math/impl/sin_cos.hpp b/include/kfr/math/impl/sin_cos.hpp
@@ -23,12 +23,12 @@
 #pragma once
 
 #include "../../simd/abs.hpp"
-#include "../../simd/min_max.hpp"
-#include "../../simd/round.hpp"
-#include "../../simd/select.hpp"
 #include "../../simd/constants.hpp"
 #include "../../simd/impl/function.hpp"
+#include "../../simd/min_max.hpp"
 #include "../../simd/operators.hpp"
+#include "../../simd/round.hpp"
+#include "../../simd/select.hpp"
 #include "../../simd/shuffle.hpp"
 
 #if CMT_HAS_WARNING("-Wc99-extensions")
@@ -62,7 +62,7 @@ KFR_INTRINSIC vec<T, N> trig_fold(const vec<T, N>& x, vec<itype<T>, N>& quadrant
     const vec<T, N> xabs = abs(x);
     constexpr T div      = constants<T>::fold_constant_div;
     vec<T, N> y          = floor(xabs / div);
-    quadrant             = innercast<itype<T>>(innercast<int>(y - floor(y * T(1.0 / 16.0)) * T(16.0)));
+    quadrant             = broadcastto<itype<T>>(broadcastto<int>(y - floor(y * T(1.0 / 16.0)) * T(16.0)));
 
     const mask<T, N> msk = (quadrant & 1) != 0;
     quadrant             = kfr::select(msk, quadrant + 1, quadrant);
diff --git a/include/kfr/math/impl/tan.hpp b/include/kfr/math/impl/tan.hpp
@@ -46,7 +46,7 @@ KFR_INTRINSIC vec<T, N> trig_fold_simple(const vec<T, N>& x_full, mask<T, N>& in
     vec<T, N> scaled = y / pi_14;
 
     vec<T, N> k_real = floor(scaled);
-    vec<IT, N> k     = innercast<IT>(k_real);
+    vec<IT, N> k     = broadcastto<IT>(k_real);
 
     vec<T, N> x = y - k_real * pi_14;
 
@@ -143,6 +143,9 @@ KFR_INTRINSIC flt_type<T> tandeg(const T& x)
     return tan(x * c_degtorad<flt_type<T>>);
 }
 } // namespace intrinsics
+namespace fn
+{
+}
 KFR_I_FN(tan)
 KFR_I_FN(tandeg)
 } // namespace CMT_ARCH_NAME
diff --git a/include/kfr/simd.hpp b/include/kfr/simd.hpp
@@ -26,6 +26,7 @@
 #include "simd/clamp.hpp"
 #include "simd/comparison.hpp"
 #include "simd/complex.hpp"
+#include "simd/complex_type.hpp"
 #include "simd/constants.hpp"
 #include "simd/digitreverse.hpp"
 #include "simd/horizontal.hpp"
diff --git a/include/kfr/simd/complex.hpp b/include/kfr/simd/complex.hpp
@@ -57,6 +57,30 @@ KFR_INTRINSIC complex<T> operator/(const complex<T>& x, const complex<T>& y)
 {
     return (make_vector(x) / make_vector(y))[0];
 }
+template <typename T>
+KFR_INTRINSIC complex<T>& operator+=(complex<T>& x, const complex<T>& y)
+{
+    x = x + y;
+    return x;
+}
+template <typename T>
+KFR_INTRINSIC complex<T>& operator-=(complex<T>& x, const complex<T>& y)
+{
+    x = x - y;
+    return x;
+}
+template <typename T>
+KFR_INTRINSIC complex<T>& operator*=(complex<T>& x, const complex<T>& y)
+{
+    x = x * y;
+    return x;
+}
+template <typename T>
+KFR_INTRINSIC complex<T>& operator/=(complex<T>& x, const complex<T>& y)
+{
+    x = x / y;
+    return x;
+}
 
 template <typename T, typename U, KFR_ENABLE_IF(is_number<U>), typename C = common_type<complex<T>, U>>
 KFR_INTRINSIC C operator+(const complex<T>& x, const U& y)
@@ -78,6 +102,30 @@ KFR_INTRINSIC C operator/(const complex<T>& x, const U& y)
 {
     return static_cast<C>(x) / static_cast<C>(y);
 }
+template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)>
+KFR_INTRINSIC complex<T>& operator+=(complex<T>& x, const U& y)
+{
+    x = x + y;
+    return x;
+}
+template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)>
+KFR_INTRINSIC complex<T>& operator-=(complex<T>& x, const U& y)
+{
+    x = x - y;
+    return x;
+}
+template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)>
+KFR_INTRINSIC complex<T>& operator*=(complex<T>& x, const U& y)
+{
+    x = x * y;
+    return x;
+}
+template <typename T, typename U, KFR_ENABLE_IF(std::is_convertible_v<U, T>)>
+KFR_INTRINSIC complex<T>& operator/=(complex<T>& x, const U& y)
+{
+    x = x / y;
+    return x;
+}
 
 template <typename T, typename U, KFR_ENABLE_IF(is_number<U>), typename C = common_type<complex<T>, U>>
 KFR_INTRINSIC C operator+(const U& x, const complex<T>& y)
@@ -275,8 +323,8 @@ struct is_complex_impl<complex<T>> : std::true_type
 };
 
 // vector<complex> to vector<complex>
-template <typename To, typename From, size_t N>
-struct conversion<vec<complex<To>, N>, vec<complex<From>, N>>
+template <typename To, typename From, size_t N, conv_t conv>
+struct conversion<1, 1, vec<complex<To>, N>, vec<complex<From>, N>, conv>
 {
     static_assert(!is_compound<To>, "");
     static_assert(!is_compound<From>, "");
@@ -287,8 +335,8 @@ struct conversion<vec<complex<To>, N>, vec<complex<From>, N>>
 };
 
 // vector to vector<complex>
-template <typename To, typename From, size_t N>
-struct conversion<vec<complex<To>, N>, vec<From, N>>
+template <typename To, typename From, size_t N, conv_t conv>
+struct conversion<1, 1, vec<complex<To>, N>, vec<From, N>, conv>
 {
     static_assert(!is_compound<To>, "");
     static_assert(!is_compound<From>, "");
@@ -349,7 +397,7 @@ template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, 
 constexpr KFR_INTRINSIC vec<complex<T>, N> make_complex(const vec<T1, N>& real,
                                                         const vec<T2, N>& imag = T2(0))
 {
-    return ccomp(interleave(innercast<T>(real), innercast<T>(imag)));
+    return ccomp(interleave(promoteto<T>(real), promoteto<T>(imag)));
 }
 
 /// @brief Constructs complex value from real and imaginary parts
@@ -357,7 +405,7 @@ template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>,
           KFR_ENABLE_IF(is_numeric_args<T1, T2>)>
 constexpr KFR_INTRINSIC complex<T> make_complex(T1 real, T2 imag = T2(0))
 {
-    return complex<T>(innercast<T>(real), innercast<T>(imag));
+    return complex<T>(promoteto<T>(real), promoteto<T>(imag));
 }
 KFR_FN(make_complex)
 
diff --git a/include/kfr/simd/complex_type.hpp b/include/kfr/simd/complex_type.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
diff --git a/include/kfr/simd/impl/abs.hpp b/include/kfr/simd/impl/abs.hpp
@@ -22,9 +22,9 @@
  */
 #pragma once
 
+#include "../operators.hpp"
 #include "../select.hpp"
 #include "function.hpp"
-#include "../operators.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/simd/impl/backend_clang.hpp b/include/kfr/simd/impl/backend_clang.hpp
@@ -135,11 +135,13 @@ KFR_INTRINSIC simd<T, Nout> simd_shuffle(simd2_t<T, N1, N2>, const simd<T, N1>& 
                                          csizes_t<indices...>, overload_generic)
 {
     constexpr size_t Nmax = (N1 > N2 ? N1 : N2);
-    return simd_shuffle(
-        simd2_t<T, Nmax, Nmax>{}, simd_shuffle(simd_t<T, N1>{}, x, csizeseq<Nmax>, overload_auto),
-        simd_shuffle(simd_t<T, N2>{}, y, csizeseq<Nmax>, overload_auto),
-        csizes<(indices < N1 ? indices : indices < N1 + N2 ? indices + (Nmax - N1) : index_undefined)...>,
-        overload_auto);
+    return simd_shuffle(simd2_t<T, Nmax, Nmax>{},
+                        simd_shuffle(simd_t<T, N1>{}, x, csizeseq<Nmax>, overload_auto),
+                        simd_shuffle(simd_t<T, N2>{}, y, csizeseq<Nmax>, overload_auto),
+                        csizes<(indices < N1        ? indices
+                                : indices < N1 + N2 ? indices + (Nmax - N1)
+                                                    : index_undefined)...>,
+                        overload_auto);
 }
 
 template <typename T, size_t N1>
diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp
@@ -455,7 +455,8 @@ KFR_INTRINSIC __m128 KFR_swap_ps(__m128 x) { return _mm_shuffle_ps(x, x, _MM_SHU
 
 #ifndef KFR_f32x2_array
 // KFR_INTRIN_SHUFFLE_CONCAT(f32, 2, _mm_castpd_ps(_mm_setr_pd(x.whole, y.whole)))
-KFR_INTRIN_SHUFFLE_SWAP(f32, 2, _mm_cvtsd_f64(_mm_castps_pd(KFR_swap_ps(_mm_castpd_ps(_mm_set1_pd(x.whole))))))
+KFR_INTRIN_SHUFFLE_SWAP(f32, 2,
+                        _mm_cvtsd_f64(_mm_castps_pd(KFR_swap_ps(_mm_castpd_ps(_mm_set1_pd(x.whole))))))
 #else
 KFR_INTRIN_SHUFFLE_CONCAT(f32, 2, _mm_setr_ps(x.low, x.high, y.low, y.high))
 KFR_INTRIN_SHUFFLE_SWAP(f32, 2, simd<f32, 2>(x.high, x.low))
diff --git a/include/kfr/simd/impl/basicoperators_clang.hpp b/include/kfr/simd/impl/basicoperators_clang.hpp
@@ -86,6 +86,12 @@ KFR_INTRINSIC vec<T, N> div(const vec<T, N>& x, const vec<T, N>& y)
     return x.v / y.v;
 }
 KFR_OP_SCALAR2(div, /, , , )
+template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)>
+KFR_INTRINSIC vec<T, N> mod(const vec<T, N>& x, const vec<T, N>& y)
+{
+    return x.v % y.v;
+}
+KFR_OP_SCALAR2(mod, %, , , )
 
 template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)>
 KFR_INTRINSIC vec<T, N> band(const vec<T, N>& x, const vec<T, N>& y)
diff --git a/include/kfr/simd/impl/basicoperators_complex.hpp b/include/kfr/simd/impl/basicoperators_complex.hpp
@@ -2,7 +2,7 @@
  *  @{
  */
 /*
-  Copyright (C) 2016 D Levin (https://www.kfrlib.com)
+  Copyright (C) 2016-2022 Fractalium Ltd (https://www.kfrlib.com)
   This file is part of KFR
 
   KFR is free software: you can redistribute it and/or modify
diff --git a/include/kfr/simd/impl/basicoperators_generic.hpp b/include/kfr/simd/impl/basicoperators_generic.hpp
@@ -39,6 +39,16 @@ inline namespace CMT_ARCH_NAME
 namespace intrinsics
 {
 
+#define KFR_DIV_MOD_FN(ty)                                                                                   \
+    KFR_INTRINSIC ty div(const ty& x, const ty& y)                                                           \
+    {                                                                                                        \
+        KFR_COMPONENTWISE_RET_I(ty, result[i] = y[i] ? x[i] / y[i] : 0);                                     \
+    }                                                                                                        \
+    KFR_INTRINSIC ty mod(const ty& x, const ty& y)                                                           \
+    {                                                                                                        \
+        KFR_COMPONENTWISE_RET_I(ty, result[i] = y[i] ? x[i] % y[i] : 0);                                     \
+    }
+
 #if defined CMT_ARCH_SSE2 && defined KFR_NATIVE_INTRINSICS
 
 KFR_INTRINSIC __m128 _mm_allones_ps()
@@ -76,17 +86,11 @@ KFR_INTRINSIC f64sse div(const f64sse& x, const f64sse& y) { return f64sse(_mm_d
 
 KFR_INTRINSIC u8sse add(const u8sse& x, const u8sse& y) { return _mm_add_epi8(x.v, y.v); }
 KFR_INTRINSIC u8sse sub(const u8sse& x, const u8sse& y) { return _mm_sub_epi8(x.v, y.v); }
-KFR_INTRINSIC u8sse div(const u8sse& x, const u8sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(u8sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(u8sse)
 
 KFR_INTRINSIC i8sse add(const i8sse& x, const i8sse& y) { return _mm_add_epi8(x.v, y.v); }
 KFR_INTRINSIC i8sse sub(const i8sse& x, const i8sse& y) { return _mm_sub_epi8(x.v, y.v); }
-KFR_INTRINSIC i8sse div(const i8sse& x, const i8sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(i8sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(i8sse)
 
 KFR_INTRINSIC __m128i mul_epi8(const __m128i& x, const __m128i& y)
 {
@@ -102,18 +106,12 @@ KFR_INTRINSIC i8sse mul(const i8sse& x, const i8sse& y) { return mul_epi8(x.v, y
 KFR_INTRINSIC u16sse add(const u16sse& x, const u16sse& y) { return _mm_add_epi16(x.v, y.v); }
 KFR_INTRINSIC u16sse sub(const u16sse& x, const u16sse& y) { return _mm_sub_epi16(x.v, y.v); }
 KFR_INTRINSIC u16sse mul(const u16sse& x, const u16sse& y) { return _mm_mullo_epi16(x.v, y.v); }
-KFR_INTRINSIC u16sse div(const u16sse& x, const u16sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(u16sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(u16sse)
 
 KFR_INTRINSIC i16sse add(const i16sse& x, const i16sse& y) { return _mm_add_epi16(x.v, y.v); }
 KFR_INTRINSIC i16sse sub(const i16sse& x, const i16sse& y) { return _mm_sub_epi16(x.v, y.v); }
 KFR_INTRINSIC i16sse mul(const i16sse& x, const i16sse& y) { return _mm_mullo_epi16(x.v, y.v); }
-KFR_INTRINSIC i16sse div(const i16sse& x, const i16sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(i16sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(i16sse)
 
 KFR_INTRINSIC u32sse add(const u32sse& x, const u32sse& y) { return _mm_add_epi32(x.v, y.v); }
 KFR_INTRINSIC u32sse sub(const u32sse& x, const u32sse& y) { return _mm_sub_epi32(x.v, y.v); }
@@ -140,14 +138,8 @@ KFR_INTRINSIC i32sse mul(const i32sse& x, const i32sse& y)
                               _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0)));
 }
 #endif
-KFR_INTRINSIC u32sse div(const u32sse& x, const u32sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(u32sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC i32sse div(const i32sse& x, const i32sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(i32sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(u32sse)
+KFR_DIV_MOD_FN(i32sse)
 
 KFR_INTRINSIC u64sse add(const u64sse& x, const u64sse& y) { return _mm_add_epi64(x.v, y.v); }
 KFR_INTRINSIC u64sse sub(const u64sse& x, const u64sse& y) { return _mm_sub_epi64(x.v, y.v); }
@@ -155,10 +147,6 @@ KFR_INTRINSIC u64sse mul(const u64sse& x, const u64sse& y)
 {
     KFR_COMPONENTWISE_RET_I(u64sse, result[i] = x[i] * y[i]);
 }
-KFR_INTRINSIC u64sse div(const u64sse& x, const u64sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(u64sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
 
 KFR_INTRINSIC i64sse add(const i64sse& x, const i64sse& y) { return _mm_add_epi64(x.v, y.v); }
 KFR_INTRINSIC i64sse sub(const i64sse& x, const i64sse& y) { return _mm_sub_epi64(x.v, y.v); }
@@ -166,10 +154,8 @@ KFR_INTRINSIC i64sse mul(const i64sse& x, const i64sse& y)
 {
     KFR_COMPONENTWISE_RET_I(i64sse, result[i] = x[i] * y[i]);
 }
-KFR_INTRINSIC i64sse div(const i64sse& x, const i64sse& y)
-{
-    KFR_COMPONENTWISE_RET_I(i64sse, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(u64sse)
+KFR_DIV_MOD_FN(i64sse)
 
 KFR_INTRINSIC f32sse shl(const f32sse& x, unsigned y)
 {
@@ -600,33 +586,21 @@ KFR_INTRINSIC f64avx shr(const f64avx& x, unsigned y)
 
 KFR_INTRINSIC u8avx add(const u8avx& x, const u8avx& y) { return _mm256_add_epi8(x.v, y.v); }
 KFR_INTRINSIC u8avx sub(const u8avx& x, const u8avx& y) { return _mm256_sub_epi8(x.v, y.v); }
-KFR_INTRINSIC u8avx div(const u8avx& x, const u8avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(u8avx, result[i] = x[i] / y[i]);
-}
+KFR_DIV_MOD_FN(u8avx)
 
 KFR_INTRINSIC i8avx add(const i8avx& x, const i8avx& y) { return _mm256_add_epi8(x.v, y.v); }
 KFR_INTRINSIC i8avx sub(const i8avx& x, const i8avx& y) { return _mm256_sub_epi8(x.v, y.v); }
-KFR_INTRINSIC i8avx div(const i8avx& x, const i8avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(i8avx, result[i] = x[i] / y[i]);
-}
+KFR_DIV_MOD_FN(i8avx)
 
 KFR_INTRINSIC u16avx add(const u16avx& x, const u16avx& y) { return _mm256_add_epi16(x.v, y.v); }
 KFR_INTRINSIC u16avx sub(const u16avx& x, const u16avx& y) { return _mm256_sub_epi16(x.v, y.v); }
 KFR_INTRINSIC u16avx mul(const u16avx& x, const u16avx& y) { return _mm256_mullo_epi16(x.v, y.v); }
-KFR_INTRINSIC u16avx div(const u16avx& x, const u16avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(u16avx, result[i] = x[i] / y[i]);
-}
+KFR_DIV_MOD_FN(u16avx)
 
 KFR_INTRINSIC i16avx add(const i16avx& x, const i16avx& y) { return _mm256_add_epi16(x.v, y.v); }
 KFR_INTRINSIC i16avx sub(const i16avx& x, const i16avx& y) { return _mm256_sub_epi16(x.v, y.v); }
 KFR_INTRINSIC i16avx mul(const i16avx& x, const i16avx& y) { return _mm256_mullo_epi16(x.v, y.v); }
-KFR_INTRINSIC i16avx div(const i16avx& x, const i16avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(i16avx, result[i] = x[i] / y[i]);
-}
+KFR_DIV_MOD_FN(i16avx)
 
 KFR_INTRINSIC u32avx add(const u32avx& x, const u32avx& y) { return _mm256_add_epi32(x.v, y.v); }
 KFR_INTRINSIC u32avx sub(const u32avx& x, const u32avx& y) { return _mm256_sub_epi32(x.v, y.v); }
@@ -636,14 +610,8 @@ KFR_INTRINSIC i32avx sub(const i32avx& x, const i32avx& y) { return _mm256_sub_e
 
 KFR_INTRINSIC u32avx mul(const u32avx& x, const u32avx& y) { return _mm256_mullo_epi32(x.v, y.v); }
 KFR_INTRINSIC i32avx mul(const i32avx& x, const i32avx& y) { return _mm256_mullo_epi32(x.v, y.v); }
-KFR_INTRINSIC u32avx div(const u32avx& x, const u32avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(u32avx, result[i] = x[i] / y[i]);
-}
-KFR_INTRINSIC i32avx div(const i32avx& x, const i32avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(i32avx, result[i] = x[i] / y[i]);
-}
+KFR_DIV_MOD_FN(u32avx)
+KFR_DIV_MOD_FN(i32avx)
 
 KFR_INTRINSIC u64avx add(const u64avx& x, const u64avx& y) { return _mm256_add_epi64(x.v, y.v); }
 KFR_INTRINSIC u64avx sub(const u64avx& x, const u64avx& y) { return _mm256_sub_epi64(x.v, y.v); }
@@ -651,10 +619,6 @@ KFR_INTRINSIC u64avx mul(const u64avx& x, const u64avx& y)
 {
     KFR_COMPONENTWISE_RET_I(u64avx, result[i] = x[i] * y[i]);
 }
-KFR_INTRINSIC u64avx div(const u64avx& x, const u64avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(u64avx, result[i] = y[i] ? x[i] / y[i] : 0);
-}
 
 KFR_INTRINSIC i64avx add(const i64avx& x, const i64avx& y) { return _mm256_add_epi64(x.v, y.v); }
 KFR_INTRINSIC i64avx sub(const i64avx& x, const i64avx& y) { return _mm256_sub_epi64(x.v, y.v); }
@@ -662,10 +626,8 @@ KFR_INTRINSIC i64avx mul(const i64avx& x, const i64avx& y)
 {
     KFR_COMPONENTWISE_RET_I(i64avx, result[i] = x[i] * y[i]);
 }
-KFR_INTRINSIC i64avx div(const i64avx& x, const i64avx& y)
-{
-    KFR_COMPONENTWISE_RET_I(i64avx, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(u64avx)
+KFR_DIV_MOD_FN(i64avx)
 
 KFR_INTRINSIC __m256i mul_epi8(const __m256i& x, const __m256i& y)
 {
@@ -1319,38 +1281,14 @@ KFR_INTRINSIC u16avx512 mul(const u16avx512& x, const u16avx512& y) { return _mm
 KFR_INTRINSIC u32avx512 mul(const u32avx512& x, const u32avx512& y) { return _mm512_mullo_epi32(x.v, y.v); }
 KFR_INTRINSIC u64avx512 mul(const u64avx512& x, const u64avx512& y) { return _mm512_mullo_epi64(x.v, y.v); }
 
-KFR_INTRINSIC i8avx512 div(const i8avx512& x, const i8avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u8avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC i16avx512 div(const i16avx512& x, const i16avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u16avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC i32avx512 div(const i32avx512& x, const i32avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u32avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC i64avx512 div(const i64avx512& x, const i64avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u64avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC u8avx512 div(const u8avx512& x, const u8avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u8avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC u16avx512 div(const u16avx512& x, const u16avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u16avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC u32avx512 div(const u32avx512& x, const u32avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u32avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
-KFR_INTRINSIC u64avx512 div(const u64avx512& x, const u64avx512& y)
-{
-    KFR_COMPONENTWISE_RET_I(u64avx512, result[i] = y[i] ? x[i] / y[i] : 0);
-}
+KFR_DIV_MOD_FN(i8avx512)
+KFR_DIV_MOD_FN(i16avx512)
+KFR_DIV_MOD_FN(i32avx512)
+KFR_DIV_MOD_FN(i64avx512)
+KFR_DIV_MOD_FN(u8avx512)
+KFR_DIV_MOD_FN(u16avx512)
+KFR_DIV_MOD_FN(u32avx512)
+KFR_DIV_MOD_FN(u64avx512)
 
 KFR_INTRINSIC i8avx512 band(const i8avx512& x, const i8avx512& y) { return _mm512_and_si512(x.v, y.v); }
 KFR_INTRINSIC i16avx512 band(const i16avx512& x, const i16avx512& y) { return _mm512_and_si512(x.v, y.v); }
@@ -1512,6 +1450,7 @@ KFR_HANDLE_ALL_SIZES_2(add)
 KFR_HANDLE_ALL_SIZES_2(sub)
 KFR_HANDLE_ALL_SIZES_2(mul)
 KFR_HANDLE_ALL_SIZES_2(div)
+KFR_HANDLE_ALL_SIZES_2(mod)
 
 KFR_HANDLE_ALL_SIZES_2(eq)
 KFR_HANDLE_ALL_SIZES_2(ne)
@@ -1619,6 +1558,11 @@ KFR_INTRINSIC vec<T, N> div(const vec<T, N>& x, const vec<T, N>& y)
 {
     KFR_COMPONENTWISE_RET(result[i] = x[i] / y[i]);
 }
+template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)>
+KFR_INTRINSIC vec<T, N> mod(const vec<T, N>& x, const vec<T, N>& y)
+{
+    KFR_COMPONENTWISE_RET(result[i] = x[i] % y[i]);
+}
 
 #define KFR_HANDLE_VEC_SCA(fn)                                                                               \
     template <typename T, size_t N, KFR_ENABLE_IF(is_simd_type<T>)>                                          \
@@ -1636,6 +1580,7 @@ KFR_HANDLE_VEC_SCA(add)
 KFR_HANDLE_VEC_SCA(sub)
 KFR_HANDLE_VEC_SCA(mul)
 KFR_HANDLE_VEC_SCA(div)
+KFR_HANDLE_VEC_SCA(mod)
 KFR_HANDLE_VEC_SCA(band)
 KFR_HANDLE_VEC_SCA(bor)
 KFR_HANDLE_VEC_SCA(bxor)
diff --git a/include/kfr/simd/impl/function.hpp b/include/kfr/simd/impl/function.hpp
@@ -38,7 +38,7 @@ inline namespace CMT_ARCH_NAME
     template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>)>                                           \
     KFR_INTRINSIC vec<flt_type<T>, N> fn(const vec<T, N>& a) CMT_NOEXCEPT                                    \
     {                                                                                                        \
-        return intrinsics::fn(elemcast<flt_type<T>>(a));                                                     \
+        return intrinsics::fn(promoteto<flt_type<T>>(a));                                                     \
     }
 
 #define KFR_HANDLE_SCALAR(fn)                                                                                \
diff --git a/include/kfr/simd/impl/logical.hpp b/include/kfr/simd/impl/logical.hpp
@@ -23,8 +23,8 @@
 #pragma once
 
 #include "../abs.hpp"
-#include "function.hpp"
 #include "../operators.hpp"
+#include "function.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/simd/impl/min_max.hpp b/include/kfr/simd/impl/min_max.hpp
@@ -23,9 +23,9 @@
 #pragma once
 
 #include "../abs.hpp"
+#include "../operators.hpp"
 #include "../select.hpp"
 #include "function.hpp"
-#include "../operators.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/simd/impl/operators.hpp b/include/kfr/simd/impl/operators.hpp
@@ -54,87 +54,31 @@ namespace intrinsics
               KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
     KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<vec<T1, N1>, N2>& x, const vec<vec<T2, N1>, N2>& y)       \
     {                                                                                                        \
-        return fn(innercast<C>(x.flatten()), innercast<C>(y.flatten())).v;                                   \
+        return fn(broadcastto<C>(x.flatten()), broadcastto<C>(y.flatten())).v;                                   \
     }                                                                                                        \
     template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>,              \
               KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
     KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<vec<T1, N1>, N2>& x, const T2& y)                         \
     {                                                                                                        \
-        return fn(innercast<C>(x.flatten()), innercast<C>(y)).v;                                             \
+        return fn(broadcastto<C>(x.flatten()), broadcastto<C>(y)).v;                                             \
     }                                                                                                        \
     template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>,              \
               KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
     KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<vec<T1, N1>, N2>& x, const vec<T2, N1>& y)                \
     {                                                                                                        \
-        return fn(innercast<C>(x.flatten()), repeat<N2>(innercast<C>(y.flatten()))).v;                       \
+        return fn(broadcastto<C>(x.flatten()), repeat<N2>(broadcastto<C>(y.flatten()))).v;                       \
     }                                                                                                        \
     template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>,              \
               KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
     KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const T1& x, const vec<vec<T2, N1>, N2>& y)                         \
     {                                                                                                        \
-        return fn(innercast<C>(x), innercast<C>(y.flatten())).v;                                             \
+        return fn(broadcastto<C>(x), broadcastto<C>(y.flatten())).v;                                             \
     }                                                                                                        \
     template <typename T1, typename T2, size_t N1, size_t N2, typename C = common_type<T1, T2>,              \
               KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
     KFR_INTRINSIC vec<vec<C, N1>, N2> fn(const vec<T1, N1>& x, const vec<vec<T2, N1>, N2>& y)                \
     {                                                                                                        \
-        return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v;                       \
-    }
-
-#define KFR_VECVECVEC_OP1(fn)                                                                                \
-    template <typename T1, size_t N1, size_t N2, size_t N3>                                                  \
-    KFR_INTRINSIC vec<vec<vec<T1, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x)                   \
-    {                                                                                                        \
-        return fn(x.flatten()).v;                                                                            \
-    }
-
-#define KFR_VECVECVEC_OP2(fn)                                                                                \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x,                    \
-                                                  const vec<vec<vec<T2, N1>, N2>, N3>& y)                    \
-    { /* VVV @ VVV */                                                                                        \
-        return fn(innercast<C>(x.flatten()), innercast<C>(y.flatten())).v;                                   \
-    }                                                                                                        \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x,                    \
-                                                  const vec<vec<T2, N1>, N2>& y)                             \
-    { /* VVV @ VV */                                                                                         \
-        return fn(innercast<C>(x.flatten()), repeat<N3>(innercast<C>(y.flatten()))).v;                       \
-    }                                                                                                        \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<T1, N1>, N2>& x,                             \
-                                                  const vec<vec<vec<T2, N1>, N2>, N3>& y)                    \
-    { /* VV @ VVV */                                                                                         \
-        return fn(repeat<N3>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v;                       \
-    }                                                                                                        \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x, const T2& y)       \
-    { /* VVV @ S */                                                                                          \
-        return fn(innercast<C>(x.flatten()), innercast<C>(y)).v;                                             \
-    }                                                                                                        \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<vec<vec<T1, N1>, N2>, N3>& x,                    \
-                                                  const vec<T2, N1>& y)                                      \
-    { /* VVV @ V */                                                                                          \
-        return fn(innercast<C>(x.flatten()), repeat<N2>(innercast<C>(y.flatten()))).v;                       \
-    }                                                                                                        \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const T1& x, const vec<vec<vec<T2, N1>, N2>, N3>& y)       \
-    { /* S @ VVV */                                                                                          \
-        return fn(innercast<C>(x), innercast<C>(y.flatten())).v;                                             \
-    }                                                                                                        \
-    template <typename T1, typename T2, size_t N1, size_t N2, size_t N3, typename C = common_type<T1, T2>,   \
-              KFR_ENABLE_IF(is_simd_type<C>)>                                                                \
-    KFR_INTRINSIC vec<vec<vec<C, N1>, N2>, N3> fn(const vec<T1, N1>& x,                                      \
-                                                  const vec<vec<vec<T2, N1>, N2>, N3>& y)                    \
-    { /* V @ VVV */                                                                                          \
-        return fn(repeat<N2>(innercast<C>(x.flatten())), innercast<C>(y.flatten())).v;                       \
+        return fn(repeat<N2>(broadcastto<C>(x.flatten())), broadcastto<C>(y.flatten())).v;                       \
     }
 
 KFR_VECVEC_OP1(neg)
@@ -143,20 +87,11 @@ KFR_VECVEC_OP2(add)
 KFR_VECVEC_OP2(sub)
 KFR_VECVEC_OP2(mul)
 KFR_VECVEC_OP2(div)
+KFR_VECVEC_OP2(mod)
 KFR_VECVEC_OP2(band)
 KFR_VECVEC_OP2(bor)
 KFR_VECVEC_OP2(bxor)
 
-KFR_VECVECVEC_OP1(neg)
-KFR_VECVECVEC_OP1(bnot)
-KFR_VECVECVEC_OP2(add)
-KFR_VECVECVEC_OP2(sub)
-KFR_VECVECVEC_OP2(mul)
-KFR_VECVECVEC_OP2(div)
-KFR_VECVECVEC_OP2(band)
-KFR_VECVECVEC_OP2(bor)
-KFR_VECVECVEC_OP2(bxor)
-
 } // namespace intrinsics
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/simd/impl/round.hpp b/include/kfr/simd/impl/round.hpp
@@ -22,9 +22,9 @@
  */
 #pragma once
 
-#include "function.hpp"
 #include "../operators.hpp"
 #include "abs.hpp"
+#include "function.hpp"
 
 namespace kfr
 {
@@ -140,48 +140,48 @@ constexpr inline f32 fp_precision_limit<f32> = 16777216.0f;
 template <size_t N>
 KFR_INTRINSIC vec<f32, N> floor(const vec<f32, N>& x)
 {
-    vec<f32, N> t = innercast<f32>(innercast<i32>(x));
+    vec<f32, N> t = broadcastto<f32>(broadcastto<i32>(x));
     return select(abs(x) >= fp_precision_limit<f32>, x, t - select(x < t, 1.f, 0.f));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f64, N> floor(const vec<f64, N>& x)
 {
-    vec<f64, N> t = innercast<f64>(innercast<i64>(x));
+    vec<f64, N> t = broadcastto<f64>(broadcastto<i64>(x));
     return select(abs(x) >= fp_precision_limit<f64>, x, t - select(x < t, 1., 0.));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f32, N> ceil(const vec<f32, N>& x)
 {
-    vec<f32, N> t = innercast<f32>(innercast<i32>(x));
+    vec<f32, N> t = broadcastto<f32>(broadcastto<i32>(x));
     return select(abs(x) >= fp_precision_limit<f32>, x, t + select(x > t, 1.f, 0.f));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f64, N> ceil(const vec<f64, N>& x)
 {
-    vec<f64, N> t = innercast<f64>(innercast<i64>(x));
+    vec<f64, N> t = broadcastto<f64>(broadcastto<i64>(x));
     return select(abs(x) >= fp_precision_limit<f64>, x, t + select(x > t, 1., 0.));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f32, N> round(const vec<f32, N>& x)
 {
     return select(abs(x) >= fp_precision_limit<f32>, x,
-                  innercast<f32>(innercast<i32>(x + mulsign(broadcast<N>(0.5f), x))));
+                  broadcastto<f32>(broadcastto<i32>(x + mulsign(broadcast<N>(0.5f), x))));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f64, N> round(const vec<f64, N>& x)
 {
     return select(abs(x) >= fp_precision_limit<f64>, x,
-                  innercast<f64>(innercast<i64>(x + mulsign(broadcast<N>(0.5), x))));
+                  broadcastto<f64>(broadcastto<i64>(x + mulsign(broadcast<N>(0.5), x))));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f32, N> trunc(const vec<f32, N>& x)
 {
-    return select(abs(x) >= fp_precision_limit<f32>, x, innercast<f32>(innercast<i32>(x)));
+    return select(abs(x) >= fp_precision_limit<f32>, x, broadcastto<f32>(broadcastto<i32>(x)));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f64, N> trunc(const vec<f64, N>& x)
 {
-    return select(abs(x) >= fp_precision_limit<f64>, x, innercast<f64>(innercast<i64>(x)));
+    return select(abs(x) >= fp_precision_limit<f64>, x, broadcastto<f64>(broadcastto<i64>(x)));
 }
 template <size_t N>
 KFR_INTRINSIC vec<f32, N> fract(const vec<f32, N>& x)
@@ -224,22 +224,22 @@ KFR_INTRINSIC vec<T, N> fract(const vec<T, N>&)
 template <typename T, size_t N, typename IT = itype<T>>
 KFR_INTRINSIC vec<IT, N> ifloor(const vec<T, N>& value)
 {
-    return innercast<IT>(floor(value));
+    return broadcastto<IT>(floor(value));
 }
 template <typename T, size_t N, typename IT = itype<T>>
 KFR_INTRINSIC vec<IT, N> iceil(const vec<T, N>& value)
 {
-    return innercast<IT>(ceil(value));
+    return broadcastto<IT>(ceil(value));
 }
 template <typename T, size_t N, typename IT = itype<T>>
 KFR_INTRINSIC vec<IT, N> itrunc(const vec<T, N>& value)
 {
-    return innercast<IT>(trunc(value));
+    return broadcastto<IT>(trunc(value));
 }
 template <typename T, size_t N, typename IT = itype<T>>
 KFR_INTRINSIC vec<IT, N> iround(const vec<T, N>& value)
 {
-    return innercast<IT>(round(value));
+    return broadcastto<IT>(round(value));
 }
 
 KFR_HANDLE_SCALAR(floor)
diff --git a/include/kfr/simd/impl/saturation.hpp b/include/kfr/simd/impl/saturation.hpp
@@ -22,9 +22,9 @@
  */
 #pragma once
 
+#include "../operators.hpp"
 #include "../select.hpp"
 #include "function.hpp"
-#include "../operators.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/simd/impl/select.hpp b/include/kfr/simd/impl/select.hpp
@@ -22,8 +22,8 @@
  */
 #pragma once
 
-#include "function.hpp"
 #include "../operators.hpp"
+#include "function.hpp"
 
 namespace kfr
 {
diff --git a/include/kfr/simd/impl/specializations.i b/include/kfr/simd/impl/specializations.hpp
diff --git a/include/kfr/simd/operators.hpp b/include/kfr/simd/operators.hpp
@@ -36,39 +36,42 @@ inline namespace CMT_ARCH_NAME
 {
 
 #define KFR_VEC_OPERATOR1(op, fn)                                                                            \
-    template <typename T, size_t N>                                                                          \
+    template <typename T, size_t N /* , KFR_ENABLE_IF(!is_vec<T>) */>                                        \
     constexpr KFR_INTRINSIC vec<T, N> operator op(const vec<T, N>& x)                                        \
     {                                                                                                        \
         return intrinsics::fn(x);                                                                            \
     }
 
 #define KFR_VEC_OPERATOR2(op, asgnop, fn)                                                                    \
-    template <typename T1, typename T2, size_t N>                                                            \
+    template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)>               \
     constexpr KFR_INTRINSIC vec<T1, N>& operator asgnop(vec<T1, N>& x, const vec<T2, N>& y)                  \
     {                                                                                                        \
-        x = intrinsics::fn(x, elemcast<T1>(y));                                                              \
+        x = intrinsics::fn(x, promoteto<T1>(y));                                                             \
         return x;                                                                                            \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N>                                                            \
+    template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(1 + vec_rank<T1> > vec_rank<T2>)>            \
     constexpr KFR_INTRINSIC vec<T1, N>& operator asgnop(vec<T1, N>& x, const T2& y)                          \
     {                                                                                                        \
         x = intrinsics::fn(x, T1(y));                                                                        \
         return x;                                                                                            \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>>                          \
+    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,                          \
+              KFR_ENABLE_IF(1 + vec_rank<T1> > vec_rank<T2>)>                                                \
     constexpr KFR_INTRINSIC vec<C, N> operator op(const vec<T1, N>& x, const T2& y)                          \
     {                                                                                                        \
-        return intrinsics::fn(elemcast<C>(x), C(y));                                                         \
+        return intrinsics::fn(promoteto<C>(x), C(y));                                                        \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>>                          \
+    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,                          \
+              KFR_ENABLE_IF(vec_rank<T1> < 1 + vec_rank<T2>)>                                                \
     constexpr KFR_INTRINSIC vec<C, N> operator op(const T1& x, const vec<T2, N>& y)                          \
     {                                                                                                        \
-        return intrinsics::fn(C(x), elemcast<C>(y));                                                         \
+        return intrinsics::fn(C(x), promoteto<C>(y));                                                        \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>>                          \
+    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,                          \
+              KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)>                                                   \
     constexpr KFR_INTRINSIC vec<C, N> operator op(const vec<T1, N>& x, const vec<T2, N>& y)                  \
     {                                                                                                        \
-        return intrinsics::fn(elemcast<C>(x), elemcast<C>(y));                                               \
+        return intrinsics::fn(promoteto<C>(x), promoteto<C>(y));                                             \
     }
 
 #define KFR_VEC_SHIFT_OPERATOR(op, asgnop, fn)                                                               \
@@ -78,43 +81,46 @@ inline namespace CMT_ARCH_NAME
         x = intrinsics::fn(x, y);                                                                            \
         return x;                                                                                            \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N>                                                            \
+    template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)>               \
     constexpr KFR_INTRINSIC vec<T1, N>& operator asgnop(vec<T1, N>& x, const vec<T2, N>& y)                  \
     {                                                                                                        \
-        x = intrinsics::fn(x, elemcast<utype<T1>>(y));                                                       \
+        x = intrinsics::fn(x, promoteto<utype<T1>>(y));                                                      \
         return x;                                                                                            \
     }                                                                                                        \
-    template <typename T, size_t N>                                                                          \
-    constexpr KFR_INTRINSIC vec<T, N> operator op(const vec<T, N>& x, unsigned y)                            \
+    template <typename T1, size_t N>                                                                         \
+    constexpr KFR_INTRINSIC vec<T1, N> operator op(const vec<T1, N>& x, unsigned y)                          \
     {                                                                                                        \
         return intrinsics::fn(x, y);                                                                         \
     }                                                                                                        \
-    template <typename T, typename T2, size_t N>                                                             \
-    constexpr KFR_INTRINSIC vec<T, N> operator op(const T& x, const vec<T2, N>& y)                           \
+    template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> < 1 + vec_rank<T2>)>            \
+    constexpr KFR_INTRINSIC vec<T1, N> operator op(const T1& x, const vec<T2, N>& y)                         \
     {                                                                                                        \
-        return intrinsics::fn(innercast<T>(x), elemcast<utype<T>>(y));                                       \
+        return intrinsics::fn(broadcastto<T1>(x), promoteto<utype<T1>>(y));                                  \
     }                                                                                                        \
-    template <typename T, typename T2, size_t N>                                                             \
-    constexpr KFR_INTRINSIC vec<T, N> operator op(const vec<T, N>& x, const vec<T2, N>& y)                   \
+    template <typename T1, typename T2, size_t N, KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)>               \
+    constexpr KFR_INTRINSIC vec<T1, N> operator op(const vec<T1, N>& x, const vec<T2, N>& y)                 \
     {                                                                                                        \
-        return intrinsics::fn(x, elemcast<utype<T>>(y));                                                     \
+        return intrinsics::fn(x, promoteto<utype<T1>>(y));                                                   \
     }
 
 #define KFR_VEC_CMP_OPERATOR(op, fn)                                                                         \
-    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>>                          \
+    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,                          \
+              KFR_ENABLE_IF(1 + vec_rank<T1> > vec_rank<T2>)>                                                \
     constexpr KFR_INTRINSIC mask<C, N> operator op(const vec<T1, N>& x, const T2& y)                         \
     {                                                                                                        \
-        return intrinsics::fn(elemcast<C>(x), vec<C, N>(y)).asmask();                                        \
+        return intrinsics::fn(promoteto<C>(x), vec<C, N>(y)).asmask();                                       \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>>                          \
+    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,                          \
+              KFR_ENABLE_IF(vec_rank<T1> < 1 + vec_rank<T2>)>                                                \
     constexpr KFR_INTRINSIC mask<C, N> operator op(const T1& x, const vec<T2, N>& y)                         \
     {                                                                                                        \
-        return intrinsics::fn(vec<C, N>(x), elemcast<C>(y)).asmask();                                        \
+        return intrinsics::fn(vec<C, N>(x), promoteto<C>(y)).asmask();                                       \
     }                                                                                                        \
-    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>>                          \
+    template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,                          \
+              KFR_ENABLE_IF(vec_rank<T1> == vec_rank<T2>)>                                                   \
     constexpr KFR_INTRINSIC mask<C, N> operator op(const vec<T1, N>& x, const vec<T2, N>& y)                 \
     {                                                                                                        \
-        return intrinsics::fn(elemcast<C>(x), elemcast<C>(y)).asmask();                                      \
+        return intrinsics::fn(promoteto<C>(x), promoteto<C>(y)).asmask();                                    \
     }
 
 KFR_VEC_OPERATOR1(-, neg)
@@ -124,6 +130,7 @@ KFR_VEC_OPERATOR2(+, +=, add)
 KFR_VEC_OPERATOR2(-, -=, sub)
 KFR_VEC_OPERATOR2(*, *=, mul)
 KFR_VEC_OPERATOR2(/, /=, div)
+KFR_VEC_OPERATOR2(%, %=, mod)
 
 KFR_VEC_OPERATOR2(&, &=, band)
 KFR_VEC_OPERATOR2(|, |=, bor)
@@ -140,7 +147,7 @@ KFR_VEC_CMP_OPERATOR(<, lt)
 
 template <typename T1, typename T2, size_t N, typename C = common_type<T1, T2>,
           KFR_ENABLE_IF(sizeof(T1) == sizeof(T2))>
-KFR_INTRINSIC mask<C, N> operator&(const mask<T1, N>& x, const mask<T2, N>& y)CMT_NOEXCEPT
+KFR_INTRINSIC mask<C, N> operator&(const mask<T1, N>& x, const mask<T2, N>& y) CMT_NOEXCEPT
 {
     return mask<C, N>((bitcast<C>(vec<T1, N>(x.v)) & bitcast<C>(vec<T2, N>(y.v))).v);
 }
@@ -433,6 +440,13 @@ KFR_INTRINSIC Tout div(const T1& x, const T2& y)
 }
 KFR_FN(div)
 
+/// Modulo
+template <typename T1, typename T2, typename Tout = common_type<T1, T2>>
+KFR_INTRINSIC Tout mod(const T1& x, const T2& y)
+{
+    return static_cast<Tout>(x) % static_cast<Tout>(y);
+}
+KFR_FN(mod)
 /// Remainder
 template <typename T1, typename T2, typename Tout = common_type<T1, T2>>
 KFR_INTRINSIC Tout rem(const T1& x, const T2& y)
diff --git a/include/kfr/simd/read_write.hpp b/include/kfr/simd/read_write.hpp
@@ -26,6 +26,7 @@
 #pragma once
 
 #include "impl/read_write.hpp"
+#include <array>
 
 namespace kfr
 {
@@ -95,9 +96,9 @@ KFR_INTRINSIC vec<T, Nout * groupsize> gather_stride(const T* base, size_t strid
     if constexpr (Nout > 2)
     {
         constexpr size_t Nlow = prev_poweroftwo(Nout - 1);
-        return concat(
-            internal::gather_stride_s<Nlow, groupsize>(base, stride, csizeseq<Nlow>),
-            internal::gather_stride_s<Nout - Nlow, groupsize>(base + Nlow * stride, stride, csizeseq<Nout - Nlow>));
+        return concat(internal::gather_stride_s<Nlow, groupsize>(base, stride, csizeseq<Nlow>),
+                      internal::gather_stride_s<Nout - Nlow, groupsize>(base + Nlow * stride, stride,
+                                                                        csizeseq<Nout - Nlow>));
     }
     else
         return internal::gather_stride_s<Nout, groupsize>(base, stride, csizeseq<Nout>);
@@ -117,7 +118,7 @@ KFR_INTRINSIC vec<T, N * groupsize> gather_helper(const T* base, const vec<IT, N
 {
     return concat(read<groupsize>(base + groupsize * offset[Indices])...);
 }
-}
+} // namespace internal
 template <size_t groupsize = 1, typename T, size_t N, typename IT>
 KFR_INTRINSIC vec<T, N * groupsize> gather(const T* base, const vec<IT, N>& offset)
 {
@@ -185,6 +186,12 @@ struct stride_pointer<const T, groupsize>
     }
 };
 
+template <typename T, size_t N>
+KFR_INTRINSIC vec<T, N> v(const std::array<T, N>& a)
+{
+    return read<N>(a.data());
+}
+
 template <typename T>
 constexpr T partial_masks[] = { constants<T>::allones(),
                                 constants<T>::allones(),
diff --git a/include/kfr/simd/select.hpp b/include/kfr/simd/select.hpp
@@ -43,7 +43,7 @@ template <typename T1, size_t N, typename T2, typename T3, KFR_ENABLE_IF(is_nume
 KFR_INTRINSIC vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y)
 {
     static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types");
-    return intrinsics::select(bitcast<Tout>(m.asvec()).asmask(), innercast<Tout>(x), innercast<Tout>(y));
+    return intrinsics::select(bitcast<Tout>(m.asvec()).asmask(), broadcastto<Tout>(x), broadcastto<Tout>(y));
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/include/kfr/simd/shuffle.hpp b/include/kfr/simd/shuffle.hpp
@@ -630,6 +630,6 @@ KFR_FN(onoff)
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
 #define KFR_SHUFFLE_SPECIALIZATIONS 1
-#include "impl/specializations.i"
+#include "impl/specializations.hpp"
 
-CMT_PRAGMA_MSVC(warning(pop))
-\ No newline at end of file
+CMT_PRAGMA_MSVC(warning(pop))
diff --git a/include/kfr/simd/vec.hpp b/include/kfr/simd/vec.hpp
@@ -93,6 +93,15 @@ struct alignas(next_poweroftwo(sizeof(T)) * next_poweroftwo(N)) portable_vec
 {
     static constexpr vec_shape<T, N> shape() CMT_NOEXCEPT { return {}; }
 
+    constexpr portable_vec() = default;
+
+    constexpr portable_vec(T value) : portable_vec(csizeseq<N>, value) {}
+
+    template <typename... Ts, size_t NN = N, KFR_ENABLE_IF(NN >= 2)>
+    constexpr portable_vec(T v1, T v2, Ts... args) : elem{ v1, v2, static_cast<T>(args)... }
+    {
+    }
+
     static_assert(N > 0 && N <= 1024, "Invalid vector size");
 
     static_assert(is_simd_type<T> || !compound_type_traits<T>::is_scalar, "Invalid vector type");
@@ -104,8 +113,18 @@ struct alignas(next_poweroftwo(sizeof(T)) * next_poweroftwo(N)) portable_vec
 
     T elem[N];
 
-    T operator[](size_t index) const { return elem[index]; }
-    T& operator[](size_t index) { return elem[index]; }
+    constexpr T operator[](size_t index) const { return elem[index]; }
+    constexpr T& operator[](size_t index) { return elem[index]; }
+    constexpr T front() const { return elem[0]; }
+    constexpr T& front() { return elem[0]; }
+    constexpr T back() const { return elem[N - 1]; }
+    constexpr T& back() { return elem[N - 1]; }
+
+private:
+    template <size_t... indices>
+    constexpr portable_vec(csizes_t<indices...>, T value) : elem{ (static_cast<void>(indices), value)... }
+    {
+    }
 };
 
 inline namespace CMT_ARCH_NAME
@@ -114,6 +133,9 @@ inline namespace CMT_ARCH_NAME
 template <typename T, size_t N>
 struct vec;
 
+template <typename T>
+constexpr inline size_t vec_rank = 0;
+
 template <typename T, size_t N>
 struct vec_halves
 {
@@ -129,12 +151,22 @@ struct vec_halves<T, 1>
 
 namespace internal
 {
+enum class conv_t
+{
+    promote,
+    broadcast,
+};
 
 // scalar to scalar
-template <typename To, typename From>
+template <size_t ToRank, size_t FromRank, typename To, typename From, conv_t conv>
 struct conversion
 {
-    static_assert(is_convertible<From, To>, "");
+};
+
+template <typename To, typename From, conv_t conv>
+struct conversion<0, 0, To, From, conv>
+{
+    static_assert(is_convertible<From, To>);
 
     static To cast(const From& value) { return value; }
 };
@@ -163,14 +195,6 @@ struct compoundcast<vec<vec<T, N1>, N2>>
     static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x.v; }
 };
 
-template <typename T, size_t N1, size_t N2, size_t N3>
-struct compoundcast<vec<vec<vec<T, N1>, N2>, N3>>
-{
-    static vec<T, N1 * N2 * N3> to_flat(const vec<vec<vec<T, N1>, N2>, N3>& x) { return x.v; }
-
-    static vec<vec<vec<T, N1>, N2>, N3> from_flat(const vec<T, N1 * N2 * N3>& x) { return x.v; }
-};
-
 template <typename T, size_t N_>
 inline constexpr size_t vec_alignment =
     const_max(alignof(intrinsics::simd<typename compound_type_traits<T>::deep_subtype,
@@ -224,7 +248,7 @@ struct alignas(internal::vec_alignment<T, N_>) vec
     // default
     KFR_MEM_INTRINSIC constexpr vec() CMT_NOEXCEPT {}
 
-#if defined(_MSC_VER) && !defined (__clang__)
+#if defined(_MSC_VER) && !defined(__clang__)
     // MSVC Internal Compiler Error workaround
     // copy
     KFR_MEM_INTRINSIC constexpr vec(const vec& value) CMT_NOEXCEPT : v(value.v) {}
@@ -294,19 +318,22 @@ struct alignas(internal::vec_alignment<T, N_>) vec
     }
 
     // from lambda
-    template <typename Fn, KFR_ENABLE_IF(is_callable<Fn, size_t>)>
+    template <typename Fn, KFR_ENABLE_IF(std::is_invocable_r_v<T, Fn, size_t>)>
     KFR_MEM_INTRINSIC vec(Fn&& fn) CMT_NOEXCEPT
     {
         for (size_t i = 0; i < N; ++i)
         {
-            set(i, fn(i));
+            auto v = fn(i);
+            set(i, v);
         }
     }
 
     template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> &&
                                         !(compound_type_traits<T>::is_scalar && !is_bit<U>))>
     KFR_MEM_INTRINSIC vec(const vec<U, N>& x) CMT_NOEXCEPT
-        : v(internal::conversion<vec<T, N>, vec<U, N>>::cast(x).v)
+        : v(internal::conversion<vec_rank<T> + 1, vec_rank<U> + 1, vec<T, N>, vec<U, N>,
+                                 internal::conv_t::promote>::cast(x)
+                .v)
     {
     }
 
@@ -603,8 +630,8 @@ constexpr KFR_INTRINSIC vec<To, N> builtin_convertvector(const vec<From, N>& val
 }
 
 // vector to vector
-template <typename To, typename From, size_t N, size_t N2>
-struct conversion<vec<To, N>, vec<From, N2>>
+template <typename To, typename From, size_t N, size_t N2, conv_t conv>
+struct conversion<1, 1, vec<To, N>, vec<From, N2>, conv>
 {
     static_assert(N == N2, "");
     static_assert(!is_compound<To>, "");
@@ -614,8 +641,8 @@ struct conversion<vec<To, N>, vec<From, N2>>
 };
 
 // scalar to vector
-template <typename To, typename From, size_t N>
-struct conversion<vec<To, N>, From>
+template <typename To, typename From, size_t N, conv_t conv>
+struct conversion<1, 0, vec<To, N>, From, conv>
 {
     static_assert(is_convertible<From, To>, "");
 
@@ -655,13 +682,6 @@ constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> cast(const vec<vec<Tin, N1>, N2>&
     return vec<vec<Tout, N1>, N2>(value);
 }
 
-template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> cast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
-    CMT_NOEXCEPT
-{
-    return vec<vec<vec<Tout, N1>, N2>, N3>(value);
-}
-
 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
 constexpr KFR_INTRINSIC const vec<Tin, N>& cast(const vec<Tin, N>& value) CMT_NOEXCEPT
 {
@@ -674,85 +694,67 @@ constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& cast(const vec<vec<Tin, N1>
     return value;
 }
 
-template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& cast(
-    const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
-{
-    return value;
-}
-
 //
 
 template <typename To, typename From,
           typename Tout = typename compound_type_traits<From>::template deep_rebind<To>>
-constexpr KFR_INTRINSIC Tout innercast(const From& value) CMT_NOEXCEPT
+constexpr KFR_INTRINSIC Tout broadcastto(const From& value) CMT_NOEXCEPT
 {
     return static_cast<Tout>(value);
 }
 
 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<Tout, N> innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
+constexpr KFR_INTRINSIC vec<Tout, N> broadcastto(const vec<Tin, N>& value) CMT_NOEXCEPT
 {
-    return vec<Tout, N>(value);
+    return internal::conversion<vec_rank<Tout> + 1, 1, vec<Tout, N>, vec<Tin, N>,
+                                internal::conv_t::broadcast>::cast(value);
 }
 
 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> innercast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
+constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> broadcastto(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
 {
-    return vec<vec<Tout, N1>, N2>(value);
-}
-
-template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> innercast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
-    CMT_NOEXCEPT
-{
-    return vec<vec<vec<Tout, N1>, N2>, N3>(value);
+    return internal::conversion<vec_rank<Tout> + 2, 2, vec<vec<Tout, N1>, N2>, vec<vec<Tin, N1>, N2>,
+                                internal::conv_t::broadcast>::cast(value);
 }
 
 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC const vec<Tin, N>& innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
+constexpr KFR_INTRINSIC const vec<Tin, N>& broadcastto(const vec<Tin, N>& value) CMT_NOEXCEPT
 {
     return value;
 }
 
 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& innercast(const vec<vec<Tin, N1>, N2>& value)
+constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& broadcastto(const vec<vec<Tin, N1>, N2>& value)
     CMT_NOEXCEPT
 {
     return value;
 }
 
-template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& innercast(
-    const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
-{
-    return value;
-}
-
 //
-
-template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<Tout, N> elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT
+template <typename Tout, typename Tin>
+constexpr KFR_INTRINSIC Tout promoteto(const Tin& value) CMT_NOEXCEPT
 {
-    return vec<Tout, N>(value);
-}
-
-template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC const vec<Tin, N>& elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT
-{
-    return value;
+    return static_cast<Tout>(value);
 }
 
-template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<Tout, N2> elemcast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
+template <typename Tout, typename Tin, size_t N>
+constexpr KFR_INTRINSIC vec<Tout, N> promoteto(const vec<Tin, N>& value) CMT_NOEXCEPT
 {
-    return vec<Tout, N2>(value);
+    if constexpr (std::is_same_v<Tin, Tout>)
+        return value;
+    else
+        return internal::conversion<vec_rank<Tout> + 1, 1, vec<Tout, N>, vec<Tin, N>,
+                                    internal::conv_t::promote>::cast(value);
 }
 
-template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
-constexpr KFR_INTRINSIC vec<Tout, N3> elemcast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
+template <typename Tout, typename Tin, size_t N1, size_t N2>
+constexpr KFR_INTRINSIC vec<Tout, N2> promoteto(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
 {
-    return vec<Tout, N3>(value);
+    if constexpr (std::is_same_v<Tin, Tout>)
+        return value;
+    else
+        return internal::conversion<vec_rank<Tout> + 1, 2, vec<Tout, N2>, vec<vec<Tin, N1>, N2>,
+                                    internal::conv_t::promote>::cast(value);
 }
 
 template <typename To, typename From>
@@ -1226,7 +1228,8 @@ void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap
 {
     testo::matrix(
         named("value") = special_values(), named("type") = test_catogories::types(cat),
-        [&](special_value value, auto type) {
+        [&](special_value value, auto type)
+        {
             using T = typename decltype(type)::type;
             if (isapplicable(ctype<T>, value))
             {
@@ -1239,7 +1242,8 @@ void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap
                 const auto fn_x  = fn(x);
                 const auto ref_x = apply(reffn, x);
                 ::testo::active_test()->check(testo::deep_is_equal(ref_x, fn_x),
-                                              as_string(fn_x, " == ", ref_x), "fn(x) == apply(reffn, x)");
+                                              as_string(fn_x, " == ", ref_x), "fn(x) == apply(reffn, x)",
+                                              __FILE__, __LINE__);
                 //   CHECK(fn(x) == apply(reffn, x));
             }
         });
@@ -1253,22 +1257,29 @@ void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap
                   });
 }
 
-template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>>
-void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{})
+template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>,
+          typename IsDefined = fn_return_constant<bool, true>>
+void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{},
+                    IsDefined&& = IsDefined{})
 {
+    constexpr IsDefined isdefined{};
+
     testo::matrix(named("value1") = special_values(), //
                   named("value2") = special_values(), named("type") = test_catogories::types(cat),
                   [&](special_value value1, special_value value2, auto type)
                   {
                       using T = typename decltype(type)::type;
-                      const T x1(value1);
-                      const T x2(value2);
-                      if (isapplicable(ctype<T>, value1, value2))
+                      if constexpr (isdefined(ctype<T>))
                       {
-                          CHECK(is_same<decltype(fn(x1, x2)),
-                                        typename compound_type_traits<T>::template rebind<decltype(reffn(
-                                            std::declval<subtype<T>>(), std::declval<subtype<T>>()))>>);
-                          CHECK(fn(x1, x2) == apply(reffn, x1, x2));
+                          const T x1(value1);
+                          const T x2(value2);
+                          if (isapplicable(ctype<T>, value1, value2))
+                          {
+                              CHECK(is_same<decltype(fn(x1, x2)),
+                                            typename compound_type_traits<T>::template rebind<decltype(reffn(
+                                                std::declval<subtype<T>>(), std::declval<subtype<T>>()))>>);
+                              CHECK(fn(x1, x2) == apply(reffn, x1, x2));
+                          }
                       }
                   });
 
@@ -1278,7 +1289,10 @@ void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap
                       using T    = typename decltype(type)::type;
                       const T x1 = test_enumerate(T::shape(), csizeseq<T::size()>, 0, 1);
                       const T x2 = test_enumerate(T::shape(), csizeseq<T::size()>, 100, -1);
-                      CHECK(fn(x1, x2) == apply(reffn, x1, x2));
+                      if constexpr (isdefined(ctype<T>))
+                      {
+                          CHECK(fn(x1, x2) == apply(reffn, x1, x2));
+                      }
                   });
 }
 
@@ -1286,20 +1300,20 @@ void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isap
 
 namespace internal
 {
-// vector to vector<vector>
-template <typename To, typename From, size_t N>
-struct conversion<vec<bit<To>, N>, vec<bit<From>, N>>
+// mask to mask
+template <typename To, typename From, size_t N, conv_t conv>
+struct conversion<1, 1, vec<bit<To>, N>, vec<bit<From>, N>, conv>
 {
     static vec<bit<To>, N> cast(const vec<bit<From>, N>& value)
     {
-        return vec<To, N>::frombits(innercast<itype<To>>(vec<itype<From>, N>::frombits(value.asvec())))
+        return vec<To, N>::frombits(broadcastto<itype<To>>(vec<itype<From>, N>::frombits(value.asvec())))
             .asmask();
     }
 };
 
 // vector to vector<vector>
 template <typename To, typename From, size_t N1, size_t N2, size_t Ns1>
-struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>>
+struct conversion<2, 1, vec<vec<To, N1>, N2>, vec<From, Ns1>, conv_t::broadcast>
 {
     static_assert(N1 == Ns1, "");
     static_assert(!is_compound<To>, "");
@@ -1308,30 +1322,31 @@ struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>>
     static vec<vec<To, N1>, N2> cast(const vec<From, N1>& value)
     {
         return vec<vec<To, N1>, N2>::from_flatten(
-            kfr::innercast<To>(value.flatten())
+            kfr::broadcastto<To>(value.flatten())
                 .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
     }
 };
 
-// vector to vector<vector<vector>>
-template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t Ns1>
-struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<From, Ns1>>
+// vector to vector<vector>
+template <typename To, typename From, size_t N1, size_t N2, size_t Ns1>
+struct conversion<2, 1, vec<vec<To, N1>, N2>, vec<From, Ns1>, conv_t::promote>
 {
-    static_assert(N1 == Ns1, "");
+    static_assert(N2 == Ns1, "");
     static_assert(!is_compound<To>, "");
     static_assert(!is_compound<From>, "");
 
-    static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<From, N1>& value)
+    static vec<vec<To, N1>, N2> cast(const vec<From, N2>& value)
     {
-        return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(
-            kfr::innercast<To>(value.flatten())
-                .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
+        return vec<vec<To, N1>, N2>::from_flatten(
+            kfr::broadcastto<To>(value.flatten())
+                .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> / csize<vec<From, N1>::scalar_size()> %
+                         csize<N2>));
     }
 };
 
 // vector<vector> to vector<vector>
-template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2>
-struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>>
+template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2, conv_t conv>
+struct conversion<2, 2, vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>, conv>
 {
     static_assert(N1 == NN1, "");
     static_assert(N2 == NN2, "");
@@ -1340,25 +1355,10 @@ struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>>
 
     static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value)
     {
-        return vec<vec<To, N1>, N2>::from_flatten(kfr::innercast<To>(value.flatten()));
+        return vec<vec<To, N1>, N2>::from_flatten(kfr::broadcastto<To>(value.flatten()));
     }
 };
 
-// vector<vector<vector>> to vector<vector<vector>>
-template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t NN1, size_t NN2, size_t NN3>
-struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<vec<vec<From, NN1>, NN2>, NN3>>
-{
-    static_assert(N1 == NN1, "");
-    static_assert(N2 == NN2, "");
-    static_assert(N3 == NN3, "");
-    static_assert(!is_compound<To>, "");
-    static_assert(!is_compound<From>, "");
-
-    static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<vec<vec<From, N1>, N2>, N3>& value)
-    {
-        return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(kfr::innercast<To>(value.flatten()));
-    }
-};
 } // namespace internal
 
 template <typename T, size_t N1, size_t N2 = N1>
@@ -1417,22 +1417,33 @@ struct vecx_t<T, N1, N2>
     using type = vec<vec<T, N1>, N2>;
 };
 
-template <typename T, size_t N1, size_t N2, size_t N3>
-struct vecx_t<T, N1, N2, N3>
-{
-    using type = vec<vec<vec<T, N1>, N2>, N3>;
-};
 } // namespace internal
 
 template <typename T, size_t... Ns>
 using vecx = typename internal::vecx_t<T, Ns...>::type;
 
+template <typename T, size_t N>
+constexpr inline size_t vec_rank<vec<T, N>> = 1;
+
+template <typename T, size_t N1, size_t N2>
+constexpr inline size_t vec_rank<vec<vec<T, N1>, N2>> = 2;
+
+template <typename T, size_t N>
+KFR_INTRINSIC vec<T, N> v(const portable_vec<T, N>& pv)
+{
+    return pv;
+}
+
 } // namespace CMT_ARCH_NAME
 template <typename T1, typename T2, size_t N>
 struct common_type_impl<kfr::vec<T1, N>, kfr::vec<T2, N>>
     : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
 {
 };
+template <typename T1, typename T2, size_t N1, size_t N2>
+struct common_type_impl<kfr::vec<T1, N1>, kfr::vec<T2, N2>>
+{
+};
 template <typename T1, typename T2, size_t N>
 struct common_type_impl<kfr::vec<T1, N>, T2>
     : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
diff --git a/sources.cmake b/sources.cmake
@@ -27,7 +27,6 @@ set(
     ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/math_expressions.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp
-    ${PROJECT_SOURCE_DIR}/include/kfr/base/old_basic_expressions.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/random_bits.hpp
@@ -35,6 +34,7 @@ set(
     ${PROJECT_SOURCE_DIR}/include/kfr/base/shape.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/simd_expressions.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/state_holder.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/tensor.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/base/impl/static_array.hpp
@@ -77,14 +77,13 @@ set(
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/special.hpp
-    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/state_holder.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/graphics/color.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/graphics/geometry.hpp
-    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/scaled.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/impl/scaled.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp
@@ -154,8 +153,316 @@ set(
     ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/select.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/simd.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specialconstants.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/intrinsics.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/testo/assert.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/testo/comparison.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/testo/console_colors.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/testo/double_double.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/testo/testo.hpp
+)
+
+    
+set(
+    KFR_SIMD_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/abs.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/clamp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/comparison.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex_type.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/constants.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/digitreverse.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/horizontal.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/logical.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/mask.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/min_max.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/operators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/platform.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/read_write.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/round.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/saturation.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/select.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/shuffle.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/sort.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/types.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/vec.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/abs.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_clang.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_generic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_clang.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_complex.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_generic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/clamp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/function.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/logical.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/min_max.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/operators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/read_write.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/round.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/saturation.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/select.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/simd.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specialconstants.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/intrinsics.h
+)
+
+    
+set(
+    KFR_MATH_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/asin_acos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/atan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/compiletime.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/complex_math.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/gamma.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/hyperbolic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/interpolation.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/log_exp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/modzerobessel.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/sin_cos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/sqrt.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/tan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/asin_acos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/atan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/gamma.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/hyperbolic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/log_exp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/modzerobessel.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sin_cos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sqrt.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/tan.hpp
+)
+
+    
+set(
+    KFR_BASE_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/filter.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/fraction.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/math_expressions.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/random_bits.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/reduce.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/shape.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/simd_expressions.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/state_holder.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/tensor.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/impl/static_array.hpp
+)
+
+    
+set(
+    KFR_DSP_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad_design.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/dcremove.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/delay.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/ebu.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir_design.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fracdelay.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/iir_design.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/mixdown.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/special.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp
+)
+
+    
+set(
+    KFR_IO_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_flac.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_mp3.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_wav.h
+)
+
+    
+set(
+    KFR_RUNTIME_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid_auto.hpp
+)
+
+    
+set(
+    KFR_GRAPHICS_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/color.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/geometry.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/impl/scaled.hpp
+)
+
+    
+set(
+    KFR_SRC
+    ${PROJECT_SOURCE_DIR}/include/kfr/all.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/runtime.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/capi.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/cident.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/config.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/kfr.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/filter.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/fraction.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/math_expressions.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/random_bits.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/reduce.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/shape.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/simd_expressions.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/state_holder.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/tensor.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/base/impl/static_array.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/array.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/cstring.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/ctti.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/function.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/memory.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/named_arg.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/numeric.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/range.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/result.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/cometa/tuple.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/cache.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/convolution.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/data/bitrev.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/data/sincos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/bitrev.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-fft.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-impl.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-templates.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/fft-impl.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/fft-templates.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/ft.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad_design.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/dcremove.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/delay.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/ebu.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir_design.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fracdelay.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/iir_design.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/mixdown.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/special.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/color.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/geometry.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/graphics/impl/scaled.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_flac.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_mp3.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/io/dr/dr_wav.h
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/asin_acos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/atan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/compiletime.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/complex_math.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/gamma.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/hyperbolic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/interpolation.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/log_exp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/modzerobessel.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/sin_cos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/sqrt.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/tan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/asin_acos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/atan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/gamma.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/hyperbolic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/log_exp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/modzerobessel.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sin_cos.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/sqrt.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/math/impl/tan.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/runtime/cpuid_auto.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/abs.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/clamp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/comparison.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/complex_type.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/constants.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/digitreverse.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/horizontal.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/logical.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/mask.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/min_max.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/operators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/platform.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/read_write.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/round.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/saturation.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/select.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/shuffle.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/sort.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/types.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/vec.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/abs.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_clang.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/backend_generic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_clang.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_complex.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/basicoperators_generic.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/clamp.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/function.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/logical.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/min_max.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/operators.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/read_write.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/round.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/saturation.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/select.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/simd.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specialconstants.hpp
+    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/intrinsics.h
-    ${PROJECT_SOURCE_DIR}/include/kfr/simd/impl/specializations.i
     ${PROJECT_SOURCE_DIR}/include/kfr/testo/assert.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/testo/comparison.hpp
     ${PROJECT_SOURCE_DIR}/include/kfr/testo/console_colors.hpp
@@ -182,17 +489,32 @@ set(
     
 set(
     KFR_UNITTEST_SRC
+    ${PROJECT_SOURCE_DIR}/tests/unit/base/base.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/base/basic_expressions.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/base/conversion.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/base/fraction.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/base/generators.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/base/pointer.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/base/random.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/base/reduce.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/base/shape.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/base/tensor.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/biquad.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/biquad_design.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/dsp.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/ebu.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/fir.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/sample_rate_conversion.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/units.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/dsp/window.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/graphics/color.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/graphics/geometry.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/graphics/graphics.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/math/asin_acos.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/math/atan.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/math/hyperbolic.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/math/log_exp.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/math/math.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/math/sin_cos.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/math/tan.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/simd/abs.cpp
@@ -202,6 +524,7 @@ set(
     ${PROJECT_SOURCE_DIR}/tests/unit/simd/round.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/simd/select.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/simd/shuffle.cpp
+    ${PROJECT_SOURCE_DIR}/tests/unit/simd/simd.cpp
     ${PROJECT_SOURCE_DIR}/tests/unit/simd/vec.cpp
 )
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -106,7 +106,8 @@ set(ALL_TESTS_CPP
     expression_test.cpp
     intrinsic_test.cpp
     io_test.cpp
-    ${KFR_UNITTEST_SRC})
+    ${KFR_UNITTEST_SRC}
+    )
 
 if (KFR_ENABLE_DFT)
     list(APPEND ALL_TESTS_CPP dft_test.cpp)
diff --git a/tests/asm_test.cpp b/tests/asm_test.cpp
@@ -143,7 +143,7 @@ using namespace kfr;
     {                                                                                                        \
         r = kfr::fn<n, true>(x);                                                                             \
     }                                                                                                        \
-    KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(vec<ty, n> & __restrict r,                   \
+    KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(vec<ty, n>& __restrict r,                    \
                                                                 const ty* __restrict x)                      \
     {                                                                                                        \
         r = kfr::fn<n, false>(x);                                                                            \
@@ -154,7 +154,7 @@ using namespace kfr;
     {                                                                                                        \
         kfr::fn<true>(p, x);                                                                                 \
     }                                                                                                        \
-    KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(ty * __restrict p, const vec<ty, n>& x)      \
+    KFR_PUBLIC void asm__test__##fn##__##ty##__##n##__unaligned(ty* __restrict p, const vec<ty, n>& x)       \
     {                                                                                                        \
         kfr::fn<false>(p, x);                                                                                \
     }
diff --git a/tests/base_test.cpp b/tests/base_test.cpp
@@ -6,8 +6,8 @@
 
 #include <kfr/testo/testo.hpp>
 
-#include <kfr/io.hpp>
-#include <kfr/simd.hpp>
+// #include <kfr/io.hpp>
+#include <kfr/base.hpp>
 
 using namespace kfr;
 
@@ -92,23 +92,7 @@ TEST(test_basic)
     CHECK(inrange(pack(1, 2, 3), 1, 1) == make_mask<int>(true, false, false));
 }
 
-TEST(test_gen_expj)
-{
-    kfr::univector<cbase> v = kfr::truncate(kfr::gen_expj(0.f, constants<float>::pi_s(2) * 0.1f), 1000);
-    CHECK(rms(cabs(v.slice(990) -
-                   univector<cbase>({ cbase(1., +0.00000000e+00), cbase(0.80901699, +5.87785252e-01),
-                                      cbase(0.30901699, +9.51056516e-01), cbase(-0.30901699, +9.51056516e-01),
-                                      cbase(-0.80901699, +5.87785252e-01), cbase(-1., +1.22464680e-16),
-                                      cbase(-0.80901699, -5.87785252e-01),
-                                      cbase(-0.30901699, -9.51056516e-01), cbase(0.30901699, -9.51056516e-01),
-                                      cbase(0.80901699, -5.87785252e-01) }))) < 0.00006); // error here depends on vector width
-                                      // In most cases error is much lower (less than 0.00001)
-}
-
-TEST(ctti)
-{
-    CHECK(cometa::type_name<float>() == std::string("float"));
-}
+TEST(ctti) { CHECK(cometa::type_name<float>() == std::string("float")); }
 
 } // namespace CMT_ARCH_NAME
 
diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp
@@ -192,9 +192,9 @@ TEST(complex_function_expressions)
     CHECK(uv3[1] == 2.f);
     CHECK(uv3[2] == 8.f);
     CHECK(uv3[3] == 18.f);
-    testo::assert_is_same<c32, value_type_of<decltype(uv2)>>();
-    testo::assert_is_same<f32, value_type_of<decltype(uv3)>>();
-    testo::assert_is_same<f32, value_type_of<decltype(real(uv2))>>();
+    testo::assert_is_same<c32, expression_value_type<decltype(uv2)>>();
+    testo::assert_is_same<f32, expression_value_type<decltype(uv3)>>();
+    testo::assert_is_same<f32, expression_value_type<decltype(real(uv2))>>();
 }
 
 TEST(static_tests)
@@ -219,10 +219,6 @@ TEST(static_tests)
     testo::assert_is_same<ftype<vec<complex<i32>, 4>>, vec<complex<f32>, 4>>();
     testo::assert_is_same<ftype<vec<complex<i64>, 8>>, vec<complex<f64>, 8>>();
 
-    testo::assert_is_same<kfr::internal::arg<int>, kfr::internal::expression_scalar<int>>();
-    testo::assert_is_same<kfr::internal::arg<complex<int>>,
-                          kfr::internal::expression_scalar<kfr::complex<int>>>();
-
     testo::assert_is_same<kfr::common_type<complex<int>, double>, complex<double>>();
 }
 } // namespace CMT_ARCH_NAME
diff --git a/tests/dsp_test.cpp b/tests/dsp_test.cpp
@@ -18,269 +18,6 @@ using namespace kfr;
 namespace CMT_ARCH_NAME
 {
 
-struct TestFragment
-{
-    float gain; // dB
-    float duration; // seconds
-    float frequency; // Hz
-};
-
-struct TestFragmentMultichannel
-{
-    float gain_L_R; // dB
-    float gain_C; // dB
-    float gain_Ls_Rs; // dB
-    float duration; // seconds
-    float frequency; // Hz
-};
-
-template <typename T>
-static void ebu_test_stereo(int sample_rate, const std::initializer_list<TestFragment>& fragments, T refM,
-                            T refS, T refI, T refLRA)
-{
-    ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right });
-
-    size_t total_length = 0;
-    for (const TestFragment& f : fragments)
-    {
-        total_length += static_cast<size_t>(f.duration * sample_rate);
-    }
-
-    univector<T> left_right(total_length);
-    size_t pos = 0;
-    for (const TestFragment& f : fragments)
-    {
-        const size_t len           = static_cast<size_t>(f.duration * sample_rate);
-        left_right.slice(pos, len) = dB_to_amp(f.gain) * sinenorm(phasor<float>(f.frequency, sample_rate));
-        pos += len;
-    }
-
-    for (size_t i = 0; i < total_length / loudness.packet_size(); i++)
-    {
-        loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()),
-                                  left_right.slice(i * loudness.packet_size(), loudness.packet_size()) });
-    }
-    T M, S, I, RL, RH;
-    loudness.get_values(M, S, I, RL, RH);
-    if (!std::isnan(refM))
-        CHECK(std::abs(M - refM) < 0.05f);
-    if (!std::isnan(refS))
-        CHECK(std::abs(S - refS) < 0.05f);
-    if (!std::isnan(refI))
-        CHECK(std::abs(I - refI) < 0.05f);
-    if (!std::isnan(refLRA))
-        CHECK(std::abs((RH - RL) - refLRA) < 0.05f);
-}
-
-template <typename T>
-static void ebu_test_multichannel(int sample_rate,
-                                  const std::initializer_list<TestFragmentMultichannel>& fragments, T refM,
-                                  T refS, T refI, T refLRA)
-{
-    ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right, Speaker::Center, Speaker::LeftSurround,
-                                        Speaker::RightSurround });
-
-    size_t total_length = 0;
-    for (const TestFragmentMultichannel& f : fragments)
-    {
-        total_length += static_cast<size_t>(f.duration * sample_rate);
-    }
-
-    univector<T> left_right(total_length);
-    univector<T> center(total_length);
-    univector<T> surround(total_length);
-    size_t pos = 0;
-    for (const TestFragmentMultichannel& f : fragments)
-    {
-        const size_t len = static_cast<size_t>(f.duration * sample_rate);
-        left_right.slice(pos, len) =
-            dB_to_amp(f.gain_L_R) * sinenorm(phasor<float>(f.frequency, sample_rate));
-        center.slice(pos, len) = dB_to_amp(f.gain_C) * sinenorm(phasor<float>(f.frequency, sample_rate));
-        surround.slice(pos, len) =
-            dB_to_amp(f.gain_Ls_Rs) * sinenorm(phasor<float>(f.frequency, sample_rate));
-        pos += len;
-    }
-
-    for (size_t i = 0; i < total_length / loudness.packet_size(); i++)
-    {
-        loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()),
-                                  left_right.slice(i * loudness.packet_size(), loudness.packet_size()),
-                                  center.slice(i * loudness.packet_size(), loudness.packet_size()),
-                                  surround.slice(i * loudness.packet_size(), loudness.packet_size()),
-                                  surround.slice(i * loudness.packet_size(), loudness.packet_size()) });
-    }
-    T M, S, I, RL, RH;
-    loudness.get_values(M, S, I, RL, RH);
-    if (!std::isnan(refM))
-        CHECK(std::abs(M - refM) < 0.05f);
-    if (!std::isnan(refS))
-        CHECK(std::abs(S - refS) < 0.05f);
-    if (!std::isnan(refI))
-        CHECK(std::abs(I - refI) < 0.05f);
-    if (!std::isnan(refLRA))
-        CHECK(std::abs((RH - RL) - refLRA) < 0.05f);
-}
-
-TEST(ebu_stereo_1_and_2)
-{
-    testo::matrix(named("type")        = ctypes_t<float, double>{},
-                  named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) {
-                      using T = typename decltype(type)::type;
-
-                      ebu_test_stereo<T>(sample_rate, { { -23.f, 20.f, 1000.f } }, -23.f, -23.f, -23.f, NAN);
-                      ebu_test_stereo<T>(sample_rate, { { -33.f, 20.f, 1000.f } }, -33.f, -33.f, -33.f, NAN);
-                  });
-}
-
-TEST(ebu_stereo_3_4_and_5)
-{
-    testo::matrix(named("type")        = ctypes_t<float, double>{},
-                  named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) {
-                      using T = typename decltype(type)::type;
-
-                      ebu_test_stereo<T>(
-                          sample_rate,
-                          { { -36.f, 10.f, 1000.f }, { -23.f, 60.f, 1000.f }, { -36.f, 10.f, 1000.f } }, NAN,
-                          NAN, -23.f, NAN);
-                      ebu_test_stereo<T>(sample_rate,
-                                         { { -72.f, 10.f, 1000.f },
-                                           { -36.f, 10.f, 1000.f },
-                                           { -23.f, 60.f, 1000.f },
-                                           { -36.f, 10.f, 1000.f },
-                                           { -72.f, 10.f, 1000.f } },
-                                         NAN, NAN, -23.f, NAN);
-                  });
-}
-
-TEST(ebu_multichannel_6)
-{
-    testo::matrix(named("type")        = ctypes_t<float, double>{},
-                  named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) {
-                      using T = typename decltype(type)::type;
-
-                      ebu_test_multichannel<T>(sample_rate, { { -28.f, -24.f, -30.f, 20.f, 1000.f } }, NAN,
-                                               NAN, -23.f, NAN);
-                  });
-}
-
-TEST(ebu_stereo_9)
-{
-    testo::matrix(named("type")        = ctypes_t<float, double>{},
-                  named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) {
-                      using T = typename decltype(type)::type;
-
-                      ebu_test_stereo<T>(sample_rate,
-                                         { { -20.f, 1.34f, 1000.f },
-                                           { -30.f, 1.66f, 1000.f },
-                                           { -20.f, 1.34f, 1000.f },
-                                           { -30.f, 1.66f, 1000.f },
-                                           { -20.f, 1.34f, 1000.f },
-                                           { -30.f, 1.66f, 1000.f },
-                                           { -20.f, 1.34f, 1000.f },
-                                           { -30.f, 1.66f, 1000.f },
-                                           { -20.f, 1.34f, 1000.f },
-                                           { -30.f, 1.66f, 1000.f } },
-                                         NAN, -23.f, NAN, NAN);
-                  });
-}
-
-TEST(ebu_stereo_12)
-{
-    testo::matrix(named("type")        = ctypes_t<float, double>{},
-                  named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) {
-                      using T = typename decltype(type)::type;
-
-                      ebu_test_stereo<T>(
-                          sample_rate,
-                          { { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
-                            { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
-                            { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f } },
-                          -23.f, NAN, NAN, NAN);
-                  });
-}
-
-TEST(ebu_lra_1_2_3_and_4)
-{
-    testo::matrix(named("type")        = ctypes_t<float, double>{},
-                  named("sample_rate") = std::vector<int>{ 44100, 48000 }, [](auto type, int sample_rate) {
-                      using T = typename decltype(type)::type;
-
-                      ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -30.f, 20.f, 1000.f } },
-                                         NAN, NAN, NAN, 10.f);
-
-                      ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -15.f, 20.f, 1000.f } },
-                                         NAN, NAN, NAN, 5.f);
-
-                      ebu_test_stereo<T>(sample_rate, { { -40.f, 20.f, 1000.f }, { -20.f, 20.f, 1000.f } },
-                                         NAN, NAN, NAN, 20.f);
-
-                      ebu_test_stereo<T>(sample_rate,
-                                         { { -50.f, 20.f, 1000.f },
-                                           { -35.f, 20.f, 1000.f },
-                                           { -20.f, 20.f, 1000.f },
-                                           { -35.f, 20.f, 1000.f },
-                                           { -50.f, 20.f, 1000.f } },
-                                         NAN, NAN, NAN, 15.f);
-                  });
-}
-
-TEST(note_to_hertz)
-{
-    testo::eplison_scope<void> eps(2000);
-    CHECK(kfr::note_to_hertz(60) == fbase(261.6255653005986346778499935233));
-    CHECK(kfr::note_to_hertz(pack(60)) == pack(fbase(261.6255653005986346778499935233)));
-
-    CHECK(kfr::note_to_hertz(69) == fbase(440.0));
-    CHECK(kfr::note_to_hertz(pack(69)) == pack(fbase(440)));
-}
-
-TEST(hertz_to_note)
-{
-    testo::eplison_scope<void> eps(1000);
-    CHECK(kfr::hertz_to_note(261.6255653005986346778499935233) == fbase(60));
-    CHECK(kfr::hertz_to_note(pack(261.6255653005986346778499935233)) == pack(fbase(60)));
-
-    CHECK(kfr::hertz_to_note(440) == fbase(69));
-    CHECK(kfr::hertz_to_note(pack(440)) == pack(fbase(69)));
-}
-
-TEST(amp_to_dB)
-{
-    testo::eplison_scope<void> eps(1000);
-
-    CHECK(kfr::amp_to_dB(fbase(2.0)) == fbase(6.0205999132796239042747778944899));
-    CHECK(kfr::amp_to_dB(fbase(-2.0)) == fbase(6.0205999132796239042747778944899));
-    CHECK(kfr::amp_to_dB(fbase(1.0)) == fbase(0));
-    CHECK(kfr::amp_to_dB(fbase(-1.0)) == fbase(0));
-    CHECK(kfr::amp_to_dB(fbase(0.5)) == fbase(-6.0205999132796239042747778944899));
-    CHECK(kfr::amp_to_dB(fbase(-0.5)) == fbase(-6.0205999132796239042747778944899));
-    CHECK(kfr::amp_to_dB(fbase(0.0)) == fbase(-HUGE_VAL));
-}
-
-TEST(dB_to_amp)
-{
-    testo::eplison_scope<void> eps(1000);
-
-    CHECK(kfr::dB_to_amp(fbase(-HUGE_VAL)) == fbase(0.0));
-    CHECK(kfr::dB_to_amp(fbase(0.0)) == fbase(1.0));
-    CHECK(kfr::dB_to_amp(fbase(6.0205999132796239042747778944899)) == fbase(2.0));
-    CHECK(kfr::dB_to_amp(fbase(-6.0205999132796239042747778944899)) == fbase(0.5));
-}
-
 TEST(delay)
 {
     const univector<float, 33> v1 = counter() + 100;
@@ -337,193 +74,6 @@ TEST(phasor)
     CHECK(rms(v1 - v2) < 1.e-5);
 }
 
-TEST(fir)
-{
-#ifdef CMT_COMPILER_IS_MSVC
-    // testo::matrix causes error in MSVC
-    {
-        using T = float;
-
-        const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5);
-        const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
-
-        CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> T {
-            T result = 0;
-            for (size_t i = 0; i < taps.size(); i++)
-                result += data.get(index - i, 0) * taps[i];
-            return result;
-        });
-
-        CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T {
-            T result = 0;
-            for (size_t i = 0; i < taps.size(); i++)
-                result += data.get(index - i, 0) * taps[i];
-            return result;
-        });
-    }
-    {
-        using T = double;
-
-        const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5);
-        const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
-
-        CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> T {
-            T result = 0;
-            for (size_t i = 0; i < taps.size(); i++)
-                result += data.get(index - i, 0) * taps[i];
-            return result;
-        });
-
-        CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T {
-            T result = 0;
-            for (size_t i = 0; i < taps.size(); i++)
-                result += data.get(index - i, 0) * taps[i];
-            return result;
-        });
-    }
-#else
-    testo::matrix(named("type") = ctypes_t<float
-#ifdef CMT_NATIVE_F64
-                                           ,
-                                           double
-#endif
-                                           >{},
-                  [](auto type) {
-                      using T = typename decltype(type)::type;
-
-                      const univector<T, 100> data =
-                          counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5);
-                      const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
-
-                      CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < taps.size(); i++)
-                              result += data.get(index - i, 0) * taps[i];
-                          return result;
-                      });
-
-                      fir_state<T> state(taps.ref());
-
-                      CHECK_EXPRESSION(fir(state, data), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < taps.size(); i++)
-                              result += data.get(index - i, 0) * taps[i];
-                          return result;
-                      });
-
-                      CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < taps.size(); i++)
-                              result += data.get(index - i, 0) * taps[i];
-                          return result;
-                      });
-
-                      short_fir_state<9, T> state2(taps);
-
-                      CHECK_EXPRESSION(short_fir<taps.size()>(state2, data), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < taps.size(); i++)
-                              result += data.get(index - i, 0) * taps[i];
-                          return result;
-                      });
-
-                      CHECK_EXPRESSION(moving_sum<taps.size()>(data), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < taps.size(); i++)
-                              result += data.get(index - i, 0);
-                          return result;
-                      });
-
-                      moving_sum_state<T, 131> msstate1;
-
-                      CHECK_EXPRESSION(moving_sum(msstate1, data), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < msstate1.delayline.size(); i++)
-                              result += data.get(index - i, 0);
-                          return result;
-                      });
-
-                      moving_sum_state<T> msstate2(133);
-
-                      CHECK_EXPRESSION(moving_sum(msstate2, data), 100, [&](size_t index) -> T {
-                          T result = 0;
-                          for (size_t i = 0; i < msstate2.delayline.size(); i++)
-                              result += data.get(index - i, 0);
-                          return result;
-                      });
-                  });
-#endif
-}
-
-#ifdef CMT_NATIVE_F64
-TEST(fir_different)
-{
-    const univector<float, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f);
-    //    const univector<double, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
-    const univector<double, 4> taps{ 1, 2, 3, 4 };
-
-    CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> float {
-        double result = 0.0;
-        for (size_t i = 0; i < taps.size(); i++)
-            result += data.get(index - i, 0.0) * taps[i];
-        return float(result);
-    });
-
-    CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> float {
-        double result = 0.0;
-        for (size_t i = 0; i < taps.size(); i++)
-            result += data.get(index - i, 0.0) * taps[i];
-        return float(result);
-    });
-}
-#endif
-
-#ifdef KFR_STD_COMPLEX
-template <typename T>
-inline std::complex<T> to_std(const std::complex<T>& c)
-{
-    return c;
-}
-template <typename T>
-inline std::complex<T> from_std(const std::complex<T>& c)
-{
-    return c;
-}
-#else
-template <typename T>
-inline std::complex<T> to_std(const kfr::complex<T>& c)
-{
-    return { c.real(), c.imag() };
-}
-
-template <typename T>
-inline kfr::complex<T> from_std(const std::complex<T>& c)
-{
-    return { c.real(), c.imag() };
-}
-#endif
-
-TEST(fir_complex)
-{
-    const univector<complex<float>, 100> data =
-        counter() * complex<float>{ 0.f, 1.f } + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f);
-    const univector<float, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
-
-    CHECK_EXPRESSION(fir(data, taps), 100, [&](size_t index) -> complex<float> {
-        std::complex<float> result = 0.0;
-        for (size_t i = 0; i < taps.size(); i++)
-            result = result + to_std(data.get(index - i, 0.0)) * taps[i];
-        return from_std(result);
-    });
-
-    CHECK_EXPRESSION(short_fir(data, taps), 100, [&](size_t index) -> complex<float> {
-        std::complex<float> result = 0.0;
-        for (size_t i = 0; i < taps.size(); i++)
-            result = result + to_std(data.get(index - i, 0.0)) * taps[i];
-        return from_std(result);
-    });
-}
-
 template <typename E, typename T, size_t size>
 void test_ir(E&& e, const univector<T, size>& test_vector)
 {
@@ -532,121 +82,6 @@ void test_ir(E&& e, const univector<T, size>& test_vector)
     println(absmaxof(ir - test_vector));
 }
 
-template <typename T, typename... Ts, univector_tag Tag>
-inline const univector<T, Tag>& choose_array(const univector<T, Tag>& array, const univector<Ts, Tag>&...)
-{
-    return array;
-}
-
-template <typename T, typename T2, typename... Ts, univector_tag Tag, KFR_ENABLE_IF(!is_same<T, T2>)>
-inline const univector<T, Tag>& choose_array(const univector<T2, Tag>&, const univector<Ts, Tag>&... arrays)
-{
-    return choose_array<T>(arrays...);
-}
-
-TEST(biquad_lowpass1)
-{
-    testo::matrix(named("type") = ctypes_t<float, double>{}, [](auto type) {
-        using T = typename decltype(type)::type;
-
-        const biquad_params<T> bq = biquad_lowpass<T>(0.1, 0.7);
-
-        constexpr size_t size = 32;
-
-        const univector<float, size> test_vector_f32{
-            +0x8.9bce2p-7,  +0xd.8383ep-6,  +0x8.f908dp-5,  +0xe.edc21p-6,  +0x9.ae104p-6,  +0x9.dcc24p-7,
-            +0xd.50584p-9,  -0xf.2668p-13,  -0xd.09ca1p-10, -0xe.15995p-10, -0xa.b90d2p-10, -0xc.edea4p-11,
-            -0xb.f14eap-12, -0xc.2cb44p-14, +0xb.4a4dep-15, +0xb.685dap-14, +0xa.b181fp-14, +0xf.0cb2bp-15,
-            +0x8.695d6p-15, +0xd.bedd4p-17, +0xf.5474p-20,  -0xd.bb266p-19, -0x9.63ca1p-18, -0xf.ca567p-19,
-            -0xa.5231p-19,  -0xa.9e934p-20, -0xe.ab52p-22,  +0xa.3c4cp-26,  +0xd.721ffp-23, +0xe.ccc1ap-23,
-            +0xb.5f248p-23, +0xd.d2c9ap-24,
-        };
-
-        const univector<double, size> test_vector_f64{
-            +0x8.9bce2bf3663e8p-7,  +0xd.8384010fdf1dp-6,   +0x8.f908e7a36df6p-5,   +0xe.edc2332a6d0bp-6,
-            +0x9.ae104af1da9ap-6,   +0x9.dcc235ef68e7p-7,   +0xd.5057ee425e05p-9,   -0xf.266e42a99aep-13,
-            -0xd.09cad73642208p-10, -0xe.1599f32a83dp-10,   -0xa.b90d8910a117p-10,  -0xc.edeaabb890948p-11,
-            -0xb.f14edbb55383p-12,  -0xc.2cb39b86f2dap-14,  +0xb.4a506ecff055p-15,  +0xb.685edfdb55358p-14,
-            +0xa.b182e32f8e298p-14, +0xf.0cb3dfd894b2p-15,  +0x8.695df725b4438p-15, +0xd.beddc3606b9p-17,
-            +0xf.547004d20874p-20,  -0xd.bb29b25b49b6p-19,  -0x9.63cb9187da1dp-18,  -0xf.ca588634fc618p-19,
-            -0xa.52322d320da78p-19, -0xa.9e9420154e4p-20,   -0xe.ab51f7b0335ap-22,  +0xa.3c6479980e1p-26,
-            +0xd.7223836599fp-23,   +0xe.ccc47ddd18678p-23, +0xb.5f265b1be1728p-23, +0xd.d2cb83f8483f8p-24,
-        };
-
-        const univector<T, size> ir = biquad(bq, unitimpulse<T>());
-
-        CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0);
-    });
-}
-
-TEST(biquad_lowpass2)
-{
-    testo::matrix(named("type") = ctypes_t<float, double>{}, [](auto type) {
-        using T = typename decltype(type)::type;
-
-        const biquad_params<T> bq = biquad_lowpass<T>(0.45, 0.2);
-
-        constexpr size_t size = 32;
-
-        const univector<float, size> test_vector_f32{
-            +0x8.ce416p-4,  +0x8.2979p-4,   -0x8.a9d04p-7,  +0xe.aeb3p-11,  +0x8.204f8p-13, -0x8.20d78p-12,
-            +0x8.3379p-12,  -0xf.83d81p-13, +0xe.8b5c4p-13, -0xd.9ddadp-13, +0xc.bedfcp-13, -0xb.ee123p-13,
-            +0xb.2a9e5p-13, -0xa.73ac4p-13, +0x9.c86f6p-13, -0x9.2828p-13,  +0x8.92229p-13, -0x8.05b7p-13,
-            +0xf.048ffp-14, -0xe.0e849p-14, +0xd.28384p-14, -0xc.50a9p-14,  +0xb.86e56p-14, -0xa.ca0b6p-14,
-            +0xa.19476p-14, -0x9.73d38p-14, +0x8.d8f64p-14, -0x8.48024p-14, +0xf.80aa2p-15, -0xe.82ad8p-15,
-            +0xd.94f22p-15, -0xc.b66d9p-15,
-        };
-
-        const univector<double, size> test_vector_f64{
-            +0x8.ce416c0d31e88p-4,  +0x8.2978efe51dafp-4,   -0x8.a9d088b81da6p-7,   +0xe.aeb56c029358p-11,
-            +0x8.20492639873ap-13,  -0x8.20d4e21aab538p-12, +0x8.3376b2d53b4a8p-12, -0xf.83d3d1c17343p-13,
-            +0xe.8b584f0dd5ac8p-13, -0xd.9dd740ceaacf8p-13, +0xc.bedc85e7a621p-13,  -0xb.ee0f472bf8968p-13,
-            +0xb.2a9baed1fe6cp-13,  -0xa.73a9d1670f4ep-13,  +0x9.c86d29d297798p-13, -0x9.2825f4d894088p-13,
-            +0x8.9220a956d651p-13,  -0x8.05b539fdd79e8p-13, +0xf.048cb5194cfa8p-14, -0xe.0e819fa128938p-14,
-            +0xd.2835957d684cp-14,  -0xc.50a69c2a8dc18p-14, +0xb.86e33bbaf3cbp-14,  -0xa.ca097058af2cp-14,
-            +0xa.1945ad1703dcp-14,  -0x9.73d1eef7d8b68p-14, +0x8.d8f4df1bb3efp-14,  -0x8.48010323c6f7p-14,
-            +0xf.80a7f5baeeb2p-15,  -0xe.82ab94bb68a8p-15,  +0xd.94f05f80af008p-15, -0xc.b66c0799b21a8p-15,
-        };
-
-        const univector<T, size> ir = biquad(bq, unitimpulse<T>());
-
-        CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0);
-    });
-}
-
-TEST(resampler_test)
-{
-    const int in_sr  = 44100;
-    const int out_sr = 48000;
-    const int freq   = 100;
-    auto resampler   = sample_rate_converter<fbase>(resample_quality::draft, out_sr, in_sr);
-    double delay     = resampler.get_fractional_delay();
-    univector<fbase> out(out_sr / 10);
-    univector<fbase> in  = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10);
-    univector<fbase> ref = truncate(
-        sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))),
-        out_sr / 10);
-    resampler.process(out, in);
-
-    CHECK(rms(slice(out - ref, static_cast<size_t>(ceil(delay * 2)))) < 0.005f);
-}
-TEST(resampler_test_complex)
-{
-    using type = complex<fbase>;
-    const int in_sr  = 44100;
-    const int out_sr = 48000;
-    const int freq   = 100;
-    auto resampler   = sample_rate_converter<type>(resample_quality::draft, out_sr, in_sr);
-    double delay     = resampler.get_fractional_delay();
-    univector<type> out(out_sr / 10);
-    univector<type> in  = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10);
-    univector<type> ref = truncate(
-        sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))),
-        out_sr / 10);
-    resampler.process(out, in);
-
-    CHECK(rms(cabs(slice(out - ref, static_cast<size_t>(ceil(delay * 2))))) < 0.005f);
-}
 } // namespace CMT_ARCH_NAME
 
 #ifndef KFR_NO_MAIN
diff --git a/tests/expression_test.cpp b/tests/expression_test.cpp
@@ -16,95 +16,6 @@ using namespace kfr;
 namespace CMT_ARCH_NAME
 {
 
-TEST(pack)
-{
-    static_assert(is_same<vec<f32x2, 1>, invoke_result<fn::reverse, vec<f32x2, 1>>>);
-    const univector<float, 21> v1 = 1 + counter();
-    const univector<float, 21> v2 = v1 * 11;
-
-    CHECK_EXPRESSION(pack(v1, v2), 21, [](float i) { return f32x2{ 1 + i, (1 + i) * 11 }; });
-
-    CHECK_EXPRESSION(bind_expression(fn::reverse(), pack(v1, v2)), 21, [](float i) {
-        return f32x2{ (1 + i) * 11, 1 + i };
-    });
-}
-
-TEST(adjacent)
-{
-    CHECK_EXPRESSION(adjacent(fn::mul(), counter()), infinite_size,
-                     [](size_t i) { return i > 0 ? i * (i - 1) : 0; });
-}
-
-TEST(padded)
-{
-    static_assert(is_infinite<decltype(padded(counter()))>, "");
-    static_assert(is_infinite<decltype(padded(truncate(counter(), 100)))>, "");
-
-    CHECK_EXPRESSION(padded(truncate(counter(), 6), -1), infinite_size,
-                     [](size_t i) { return i >= 6 ? -1 : i; });
-
-    CHECK_EXPRESSION(padded(truncate(counter(), 0), -1), infinite_size, [](size_t i) { return -1; });
-
-    CHECK_EXPRESSION(padded(truncate(counter(), 501), -1), infinite_size,
-                     [](size_t i) { return i >= 501 ? -1 : i; });
-}
-
-TEST(rebind)
-{
-    auto c_minus_two  = counter() - 2;
-    auto four_minus_c = rebind(c_minus_two, 4, counter());
-    CHECK_EXPRESSION(c_minus_two, infinite_size, [](size_t i) { return i - 2; });
-    CHECK_EXPRESSION(four_minus_c, infinite_size, [](size_t i) { return 4 - i; });
-}
-
-TEST(test_arg_access)
-{
-    univector<float> v1(10);
-    v1                       = counter();
-    auto e1                  = std::move(v1) + 10;
-    std::get<0>(e1.args)[0]  = 100;
-    std::get<1>(e1.args).val = 1;
-
-    CHECK_EXPRESSION(e1, 10, [](size_t i) { return (i == 0 ? 100 : i) + 1; });
-}
-
-TEST(to_pointer)
-{
-    auto e1 = to_pointer(counter<float>());
-
-    CHECK_EXPRESSION(e1, infinite_size, [](size_t i) { return static_cast<float>(i); });
-
-    auto e2 = to_pointer(gen_linear(0.f, 1.f));
-
-    CHECK_EXPRESSION(e2, infinite_size, [](size_t i) { return static_cast<float>(i); });
-}
-
-TEST(test_arg_replace)
-{
-    univector<float, 10> v1 = counter();
-    univector<float, 10> v2 = -counter();
-    auto e1                 = to_pointer(v1) * 10;
-    std::get<0>(e1.args)    = to_pointer(v2);
-
-    CHECK_EXPRESSION(e1, 10, [](size_t i) { return i * -10.0; });
-}
-
-TEST(placeholders)
-{
-    auto expr = 100 * placeholder<float>();
-    CHECK_EXPRESSION(expr, infinite_size, [](size_t) { return 0.f; });
-    substitute(expr, to_pointer(counter<float>()));
-    CHECK_EXPRESSION(expr, infinite_size, [](size_t i) { return 100.f * i; });
-}
-
-TEST(placeholders_pointer)
-{
-    expression_pointer<float> expr = to_pointer(10 * placeholder<float>());
-    CHECK_EXPRESSION(expr, infinite_size, [](size_t) { return 0.f; });
-    substitute(expr, to_pointer(counter<float>()));
-    CHECK_EXPRESSION(expr, infinite_size, [](size_t i) { return 10.f * i; });
-}
-
 TEST(univector_assignment)
 {
     univector<int> x = truncate(counter(), 10);
@@ -115,28 +26,12 @@ TEST(univector_assignment)
     CHECK(y.size() == 10u);
 }
 
-TEST(size_calc)
-{
-    auto a = counter();
-    CHECK(a.size() == infinite_size);
-    auto b = slice(counter(), 100);
-    CHECK(b.size() == infinite_size);
-    auto c = slice(counter(), 100, 1000);
-    CHECK(c.size() == 1000u);
-    auto d = slice(c, 100);
-    CHECK(d.size() == 900u);
-}
-
-TEST(reverse)
-{
-    CHECK_EXPRESSION(reverse(truncate(counter(), 21)), 21, [](size_t i) { return 20 - i; });
-}
-
 TEST(mix)
 {
-    CHECK_EXPRESSION(mix(sequence(0, 0.5f, 1, 0.5f), counter(), counter() * 10), infinite_size, [](size_t i) {
-        return mix(std::array<float, 4>{ 0, 0.5f, 1, 0.5f }[i % 4], i, i * 10);
-    });
+    CHECK_EXPRESSION(mix(sequence(0, 0.5f, 1, 0.5f), counter(), counter() * 10), infinite_size,
+                     [](size_t i) {
+                         return mix(std::array<float, 4>{ 0, 0.5f, 1, 0.5f }[i % 4], i, i * 10);
+                     });
 }
 
 TEST(expression_mask)
@@ -146,42 +41,6 @@ TEST(expression_mask)
     x = select(x > y, 0.5f, 0.1f) * (y - x) + x;
 }
 
-constexpr inline size_t fast_range_sum(size_t stop) { return stop * (stop + 1) / 2; }
-
-TEST(partition)
-{
-    {
-        univector<double, 385> output = zeros();
-        auto result                   = partition(output, counter(), 5, 1);
-        CHECK(result.count == 5u);
-        CHECK(result.chunk_size == 80u);
-
-        result(0);
-        CHECK(sum(output) >= fast_range_sum(80 - 1));
-        result(1);
-        CHECK(sum(output) >= fast_range_sum(160 - 1));
-        result(2);
-        CHECK(sum(output) >= fast_range_sum(240 - 1));
-        result(3);
-        CHECK(sum(output) >= fast_range_sum(320 - 1));
-        result(4);
-        CHECK(sum(output) == fast_range_sum(385 - 1));
-    }
-
-    {
-        univector<double, 385> output = zeros();
-        auto result                   = partition(output, counter(), 5, 160);
-        CHECK(result.count == 3u);
-        CHECK(result.chunk_size == 160u);
-
-        result(0);
-        CHECK(sum(output) >= fast_range_sum(160 - 1));
-        result(1);
-        CHECK(sum(output) >= fast_range_sum(320 - 1));
-        result(2);
-        CHECK(sum(output) == fast_range_sum(385 - 1));
-    }
-}
 } // namespace CMT_ARCH_NAME
 
 #ifndef KFR_NO_MAIN
diff --git a/tests/intrinsic_test.cpp b/tests/intrinsic_test.cpp
@@ -107,7 +107,8 @@ TEST(intrin_sqrt)
     CHECK(kfr::sqrt(make_vector(9)) == make_vector<fbase>(3.0));
     CHECK(kfr::sqrt(make_vector(-9)) == make_vector<fbase>(qnan));
     testo::matrix(named("type") = float_vector_types<vec>, named("value") = std::vector<int>{ 0, 2, 65536 },
-                  [](auto type, int value) {
+                  [](auto type, int value)
+                  {
                       using T = typename decltype(type)::type;
                       const T x(value);
                       CHECK(kfr::sqrt(x) == apply([](auto x) -> decltype(x) { return std::sqrt(x); }, x));
@@ -117,7 +118,8 @@ TEST(intrin_sqrt)
 TEST(intrin_satadd_satsub)
 {
     testo::matrix(named("type") = cconcat(signed_vector_types<vec>, unsigned_vector_types<vec>),
-                  [](auto type) {
+                  [](auto type)
+                  {
                       using T     = typename decltype(type)::type;
                       using Tsub  = subtype<T>;
                       const T min = std::numeric_limits<Tsub>::min();
@@ -144,23 +146,25 @@ TEST(intrin_satadd_satsub)
 
 TEST(intrin_any_all)
 {
-    testo::matrix(named("type") = unsigned_vector_types<vec>, [](auto type) {
-        using T                = typename decltype(type)::type;
-        constexpr size_t width = widthof<T>();
-        using Tsub             = subtype<T>;
-        const auto x           = enumerate<Tsub, width>() == Tsub(0);
-        CHECK(any(x) == true);
-        if (width == 1)
-            CHECK(all(x) == true);
-        else
-            CHECK(all(x) == false);
-        const auto y = zerovector<Tsub, width>() == Tsub(127);
-        CHECK(all(y) == false);
-        CHECK(any(y) == false);
-        const auto z = zerovector<Tsub, width>() == Tsub(0);
-        CHECK(all(z) == true);
-        CHECK(any(z) == true);
-    });
+    testo::matrix(named("type") = unsigned_vector_types<vec>,
+                  [](auto type)
+                  {
+                      using T                = typename decltype(type)::type;
+                      constexpr size_t width = widthof<T>();
+                      using Tsub             = subtype<T>;
+                      const auto x           = enumerate<Tsub, width>() == Tsub(0);
+                      CHECK(any(x) == true);
+                      if (width == 1)
+                          CHECK(all(x) == true);
+                      else
+                          CHECK(all(x) == false);
+                      const auto y = zerovector<Tsub, width>() == Tsub(127);
+                      CHECK(all(y) == false);
+                      CHECK(any(y) == false);
+                      const auto z = zerovector<Tsub, width>() == Tsub(0);
+                      CHECK(all(z) == true);
+                      CHECK(any(z) == true);
+                  });
 }
 
 } // namespace CMT_ARCH_NAME
diff --git a/tests/numeric_tests.hpp b/tests/numeric_tests.hpp
@@ -10,6 +10,8 @@
 namespace kfr
 {
 
+inline bool show_measured_accuracy = false;
+
 using testo::test_data_entry;
 
 inline namespace CMT_ARCH_NAME
@@ -55,7 +57,7 @@ uint64_t ulps(vec<T, N> x, vec<T, N> y)
 inline const char* tname(ctype_t<f32>) { return "float"; }
 inline const char* tname(ctype_t<f64>) { return "double"; }
 
-#define CHECK_DIFF(x_arg, y_arg, threshold)                                                                  \
+#define CHECK_DIFF(x_arg, y_arg, threshold, file, line)                                                      \
     do                                                                                                       \
     {                                                                                                        \
         ++checks_count;                                                                                      \
@@ -67,57 +69,67 @@ inline const char* tname(ctype_t<f64>) { return "double"; }
         ::testo::active_test()->check(                                                                       \
             arg_diff <= threshold,                                                                           \
             ::cometa::as_string(x_arg_value, " ~= ", y_arg_value, " (", arg_diff, " <= ", threshold, ")"),   \
-            #x_arg " ~= " #y_arg);                                                                           \
+            #x_arg " ~= " #y_arg, file, line);                                                               \
     } while (0)
 
 #define KFR_AUTO_TEST_1(fn, datafile, maxulps, avgulps)                                                      \
     TEST(fn##_##datafile)                                                                                    \
     {                                                                                                        \
-        testo::matrix(named("type") = vector_types(), [&](auto type) {                                       \
-            using T               = typename decltype(type)::type;                                           \
-            using Tsub            = subtype<T>;                                                              \
-            double error_sum      = 0.0;                                                                     \
-            uint64_t error_peak   = 0;                                                                       \
-            uint64_t checks_count = 0;                                                                       \
-            std::shared_ptr<file_reader<test_data_entry<Tsub, 1>>> reader =                                  \
-                open_file_for_reading<test_data_entry<Tsub, 1>>(                                             \
-                    std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) + "_" #datafile);   \
-            test_data_entry<Tsub, 1> entry;                                                                  \
-            while (reader->read(entry))                                                                      \
-            {                                                                                                \
-                testo::scope s(as_string(entry.arguments[0]));                                               \
-                CHECK_DIFF(kfr::fn(entry.arguments[0]), entry.result, maxulps);                              \
-            }                                                                                                \
-            CHECK(checks_count > 0u);                                                                        \
-            CHECK(error_sum / checks_count <= avgulps);                                                      \
-            println("measured accuracy: ", tname(ctype<Tsub>), " ", error_sum / checks_count, "(peak ",      \
-                    error_peak, ")");                                                                        \
-        });                                                                                                  \
+        testo::matrix(named("type") = vector_types(),                                                        \
+                      [&](auto type)                                                                         \
+                      {                                                                                      \
+                          using T               = typename decltype(type)::type;                             \
+                          using Tsub            = subtype<T>;                                                \
+                          double error_sum      = 0.0;                                                       \
+                          uint64_t error_peak   = 0;                                                         \
+                          uint64_t checks_count = 0;                                                         \
+                          std::shared_ptr<file_reader<test_data_entry<Tsub, 1>>> reader =                    \
+                              open_file_for_reading<test_data_entry<Tsub, 1>>(                               \
+                                  std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) +     \
+                                  "_" #datafile);                                                            \
+                          test_data_entry<Tsub, 1> entry;                                                    \
+                          while (reader->read(entry))                                                        \
+                          {                                                                                  \
+                              testo::scope s(as_string(entry.arguments[0]));                                 \
+                              CHECK_DIFF(kfr::fn(entry.arguments[0]), entry.result, maxulps, __FILE__,       \
+                                         __LINE__);                                                          \
+                          }                                                                                  \
+                          CHECK(checks_count > 0u);                                                          \
+                          CHECK(error_sum / checks_count <= avgulps);                                        \
+                          if (show_measured_accuracy)                                                        \
+                              println("measured accuracy: ", tname(ctype<Tsub>), " ",                        \
+                                      error_sum / checks_count, "(peak ", error_peak, ")");                  \
+                      });                                                                                    \
     }
 
 #define KFR_AUTO_TEST_2(fn, datafile, maxulps, avgulps)                                                      \
     TEST(fn##_##datafile)                                                                                    \
     {                                                                                                        \
-        testo::matrix(named("type") = vector_types(), [&](auto type) {                                       \
-            using T               = typename decltype(type)::type;                                           \
-            using Tsub            = subtype<T>;                                                              \
-            double error_sum      = 0.0;                                                                     \
-            uint64_t error_peak   = 0;                                                                       \
-            uint64_t checks_count = 0;                                                                       \
-            std::shared_ptr<file_reader<test_data_entry<Tsub, 2>>> reader =                                  \
-                open_file_for_reading<test_data_entry<Tsub, 2>>(                                             \
-                    std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) + "_" #datafile);   \
-            test_data_entry<Tsub, 2> entry;                                                                  \
-            while (reader->read(entry))                                                                      \
-            {                                                                                                \
-                testo::scope s(as_string(entry.arguments[0], entry.arguments[1]));                           \
-                CHECK_DIFF(kfr::fn(entry.arguments[0], entry.arguments[1]), entry.result, maxulps);          \
-            }                                                                                                \
-            CHECK(checks_count > 0u);                                                                        \
-            CHECK(error_sum / checks_count <= avgulps);                                                      \
-            println("measured accuracy: ", tname(ctype<Tsub>), " ", error_sum / checks_count, "(peak ",      \
-                    error_peak, ")");                                                                        \
-        });                                                                                                  \
+        testo::matrix(named("type") = vector_types(),                                                        \
+                      [&](auto type)                                                                         \
+                      {                                                                                      \
+                          using T               = typename decltype(type)::type;                             \
+                          using Tsub            = subtype<T>;                                                \
+                          double error_sum      = 0.0;                                                       \
+                          uint64_t error_peak   = 0;                                                         \
+                          uint64_t checks_count = 0;                                                         \
+                          std::shared_ptr<file_reader<test_data_entry<Tsub, 2>>> reader =                    \
+                              open_file_for_reading<test_data_entry<Tsub, 2>>(                               \
+                                  std::string(KFR_SRC_DIR "/tests/data/" #fn "_") + tname(ctype<Tsub>) +     \
+                                  "_" #datafile);                                                            \
+                          test_data_entry<Tsub, 2> entry;                                                    \
+                          while (reader->read(entry))                                                        \
+                          {                                                                                  \
+                              testo::scope s(as_string(entry.arguments[0], entry.arguments[1]));             \
+                              CHECK_DIFF(kfr::fn(entry.arguments[0], entry.arguments[1]), entry.result,      \
+                                         maxulps, __FILE__, __LINE__);                                       \
+                          }                                                                                  \
+                          CHECK(checks_count > 0u);                                                          \
+                          CHECK(error_sum / checks_count <= avgulps);                                        \
+                          if (show_measured_accuracy)                                                        \
+                              println("measured accuracy: ", tname(ctype<Tsub>), " ",                        \
+                                      error_sum / checks_count, "(peak ", error_peak, ")");                  \
+                      });                                                                                    \
     }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/tests/tensor_test.cpp b/tests/tensor_test.cpp
@@ -1,6 +1,6 @@
-#include "kfr/version.hpp"
 #include "kfr/runtime.hpp"
 #include "kfr/testo/testo.hpp"
+#include "kfr/version.hpp"
 
 using namespace kfr;
 
diff --git a/tests/unit/base/base.cpp b/tests/unit/base/base.cpp
@@ -0,0 +1,7 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base.hpp>
diff --git a/tests/unit/base/basic_expressions.cpp b/tests/unit/base/basic_expressions.cpp
@@ -0,0 +1,131 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/basic_expressions.hpp>
+#include <kfr/base/simd_expressions.hpp>
+#include <kfr/base/univector.hpp>
+#include <kfr/io/tostring.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+TEST(linspace)
+{
+    testo::eplison_scope<> eps(10);
+    CHECK_EXPRESSION(linspace(0.0, 1.0, 5, true, ctrue), { 0.0, 0.25, 0.50, 0.75, 1.0 });
+    CHECK_EXPRESSION(linspace(0.0, 1.0, 4, false, ctrue), { 0.0, 0.25, 0.50, 0.75 });
+    CHECK(shapeof(linspace(0.0, 1.0, 5, true, cfalse)) == shape{ infinite_size });
+    CHECK_EXPRESSION(linspace(0.0, 1.0, 4, false, ctrue), { 0.0, 0.25, 0.50, 0.75 });
+    CHECK_EXPRESSION(symmlinspace(3.0, 4, ctrue), { -3.0, -1.00, 1.00, 3.00 });
+
+    CHECK_EXPRESSION(linspace(1, 21, 4, false, ctrue), { 1, 6, 11, 16 });
+    CHECK_EXPRESSION(linspace(1, 21, 4, true, ctrue), { 1, 7.66666667f, 14.3333333f, 21 });
+}
+
+TEST(counter_shape)
+{
+    CHECK(shapeof(1) == shape{});
+    CHECK(shapeof(counter()) == shape{ infinite_size });
+    CHECK(shapeof(counter() + 1) == shape{ infinite_size });
+    CHECK(shapeof(counter(0, 1, 1)) == shape{ infinite_size, infinite_size });
+}
+
+TEST(pack)
+{
+    static_assert(is_same<vec<f32x2, 1>, invoke_result<fn::reverse, vec<f32x2, 1>>>);
+    const univector<float, 21> v1 = 1 + counter();
+    const univector<float, 21> v2 = v1 * 11;
+
+    CHECK_EXPRESSION(pack(v1, v2), 21, [](float i) { return f32x2{ 1 + i, (1 + i) * 11 }; });
+
+    CHECK_EXPRESSION(bind_expression(fn::reverse(), pack(v1, v2)), 21,
+                     [](float i) {
+                         return f32x2{ (1 + i) * 11, 1 + i };
+                     });
+}
+
+TEST(adjacent)
+{
+    CHECK_EXPRESSION(adjacent(fn::mul(), counter()), infinite_size,
+                     [](size_t i) { return i > 0 ? i * (i - 1) : 0; });
+}
+
+TEST(padded)
+{
+    static_assert(is_infinite<decltype(padded(counter()))>, "");
+    static_assert(is_infinite<decltype(padded(truncate(counter(), 100)))>, "");
+
+    CHECK_EXPRESSION(padded(truncate(counter(), 6), -1), infinite_size,
+                     [](size_t i) { return i >= 6 ? -1 : i; });
+
+    CHECK_EXPRESSION(padded(truncate(counter(), 0), -1), infinite_size, [](size_t i) { return -1; });
+
+    CHECK_EXPRESSION(padded(truncate(counter(), 501), -1), infinite_size,
+                     [](size_t i) { return i >= 501 ? -1 : i; });
+}
+
+TEST(rebind)
+{
+    auto c_minus_two  = counter() - 2;
+    auto four_minus_c = rebind(c_minus_two, 4, counter());
+    CHECK_EXPRESSION(counter(), infinite_size, [](size_t i) { return i; });
+    CHECK_EXPRESSION(c_minus_two, infinite_size, [](size_t i) { return i - 2; });
+    CHECK_EXPRESSION(four_minus_c, infinite_size, [](size_t i) { return 4 - i; });
+}
+
+TEST(test_arg_access)
+{
+    univector<float> v1(10);
+    v1                      = counter();
+    auto e1                 = std::move(v1) + 10;
+    std::get<0>(e1.args)[0] = 100;
+    std::get<1>(e1.args)    = 1;
+
+    CHECK_EXPRESSION(e1, 10, [](size_t i) { return (i == 0 ? 100 : i) + 1; });
+}
+
+TEST(size_calc)
+{
+    auto a = counter();
+    CHECK(shapeof(a) == shape{ infinite_size });
+    auto b = slice(counter(), 100);
+    CHECK(shapeof(b) == shape{ infinite_size });
+    auto c = slice(counter(), 100, 1000);
+    CHECK(shapeof(c) == shape{ 1000 });
+    auto d = slice(c, 100);
+    CHECK(shapeof(d) == shape{ 900 });
+}
+
+TEST(reverse_expression)
+{
+    CHECK_EXPRESSION(reverse(truncate(counter(), 21)), 21, [](size_t i) { return 20 - i; });
+}
+
+TEST(sequence)
+{
+    CHECK_EXPRESSION(sequence(0, 0.5f, 1, 0.5f), infinite_size,
+                     [](size_t i) {
+                         return std::array<float, 4>{ 0, 0.5f, 1, 0.5f }[i % 4];
+                     });
+}
+
+TEST(assign_expression)
+{
+    univector<float> f = truncate(counter(0, 1), 10);
+    f *= 10;
+    CHECK_EXPRESSION(f, { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 });
+    
+    univector<float> a = truncate(counter(0, 1), 10);
+    univector<float> b = truncate(counter(100, 1), 10);
+    pack(a, b) *= broadcast<2>(10.f);
+    CHECK_EXPRESSION(a, { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 });
+    CHECK_EXPRESSION(b, { 1000, 1010, 1020, 1030, 1040, 1050, 1060, 1070, 1080, 1090 });
+}
+
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/base/generators.cpp b/tests/unit/base/generators.cpp
@@ -0,0 +1,33 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/basic_expressions.hpp>
+#include <kfr/base/generators.hpp>
+#include <kfr/base/math_expressions.hpp>
+#include <kfr/base/reduce.hpp>
+#include <kfr/base/simd_expressions.hpp>
+#include <kfr/base/univector.hpp>
+
+using namespace kfr;
+
+namespace CMT_ARCH_NAME
+{
+
+TEST(test_gen_expj)
+{
+    univector<cbase> v = truncate(gen_expj(0.f, constants<float>::pi_s(2) * 0.1f), 1000);
+    CHECK(rms(cabs(
+              v.slice(990) -
+              univector<cbase>({ cbase(1., +0.00000000e+00), cbase(0.80901699, +5.87785252e-01),
+                                 cbase(0.30901699, +9.51056516e-01), cbase(-0.30901699, +9.51056516e-01),
+                                 cbase(-0.80901699, +5.87785252e-01), cbase(-1., +1.22464680e-16),
+                                 cbase(-0.80901699, -5.87785252e-01), cbase(-0.30901699, -9.51056516e-01),
+                                 cbase(0.30901699, -9.51056516e-01), cbase(0.80901699, -5.87785252e-01) }))) <
+          0.00006); // error here depends on vector width
+    // In most cases error is much lower (less than 0.00001)
+}
+
+} // namespace CMT_ARCH_NAME
diff --git a/tests/unit/base/pointer.cpp b/tests/unit/base/pointer.cpp
@@ -0,0 +1,57 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/pointer.hpp>
+#include <kfr/base/simd_expressions.hpp>
+#include <kfr/base/univector.hpp>
+#include <kfr/base/generators.hpp>
+#include <kfr/io/tostring.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+TEST(to_pointer)
+{
+    auto e1 = to_pointer(counter<float>());
+
+    CHECK_EXPRESSION(e1, infinite_size, [](size_t i) { return static_cast<float>(i); });
+
+    auto e2 = to_pointer(gen_linear(0.f, 1.f));
+
+    CHECK_EXPRESSION(e2, infinite_size, [](size_t i) { return static_cast<float>(i); });
+}
+
+TEST(test_arg_replace)
+{
+    univector<float, 10> v1 = counter();
+    univector<float, 10> v2 = -counter();
+    auto e1                 = to_pointer(v1) * 10;
+    std::get<0>(e1.args)    = to_pointer(v2);
+
+    CHECK_EXPRESSION(e1, 10, [](size_t i) { return i * -10.0; });
+}
+
+TEST(placeholders)
+{
+    auto expr1 = placeholder<float>();
+    CHECK_EXPRESSION(expr1, infinite_size, [](size_t) { return 0.f; });
+    auto expr2 = 100 * placeholder<float>();
+    CHECK_EXPRESSION(expr2, infinite_size, [](size_t) { return 0.f; });
+    substitute(expr2, to_pointer(counter<float>()));
+    CHECK_EXPRESSION(expr2, infinite_size, [](size_t i) { return 100.f * i; });
+}
+
+TEST(placeholders_pointer)
+{
+    expression_pointer<float> expr = to_pointer(10 * placeholder<float>());
+    CHECK_EXPRESSION(expr, infinite_size, [](size_t) { return 0.f; });
+    substitute(expr, to_pointer(counter<float>()));
+    CHECK_EXPRESSION(expr, infinite_size, [](size_t i) { return 10.f * i; });
+}
+    
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/base/random.cpp b/tests/unit/base/random.cpp
@@ -14,15 +14,15 @@ inline namespace CMT_ARCH_NAME
 {
 
 template <typename T, size_t N>
-static void test_random(kfr::random_bit_generator& gen, const vec<T, N>& value)
+static void test_random(random_state& state, const vec<T, N>& value)
 {
-    const vec<T, N> r = kfr::random_uniform<T, N>(gen);
+    const vec<T, N> r = kfr::random_uniform<T, N>(state);
     CHECK(r == value);
 }
 
 TEST(random_bit_generator)
 {
-    kfr::random_bit_generator gen(1, 2, 3, 4);
+    random_state gen = random_init(1, 2, 3, 4);
     test_random(gen, pack<u8>(21, 62, 88, 30, 46, 234, 205, 29, 41, 190, 212, 81, 217, 135, 218, 227));
     test_random(gen, pack<u16>(48589, 33814, 55928, 14799, 26904, 18521, 20808, 50888));
     test_random(gen, pack<u32>(1554764222, 1538765785, 2072590063, 2837641155));
@@ -66,11 +66,11 @@ TEST(random_bit_generator)
 
 TEST(gen_random_range)
 {
-    random_bit_generator gen(1, 2, 3, 4);
-    univector<fbase, 1000> v = kfr::gen_random_range<fbase>(std::ref(gen), -1.0, 1.0);
-    CHECK(kfr::minof(v) >= fbase(-1.0));
-    CHECK(kfr::maxof(v) <= fbase(1.0));
-    println(kfr::mean(v));
+    random_state gen         = random_init(1, 2, 3, 4);
+    univector<fbase, 1000> v = gen_random_range<fbase>(std::ref(gen), -1.0, 1.0);
+    CHECK(minof(v) >= fbase(-1.0));
+    CHECK(maxof(v) <= fbase(1.0));
+    // println(mean(v));
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/tests/unit/base/reduce.cpp b/tests/unit/base/reduce.cpp
@@ -5,6 +5,8 @@
  */
 
 #include <kfr/base/reduce.hpp>
+#include <kfr/base/simd_expressions.hpp>
+#include <kfr/base/univector.hpp>
 
 namespace kfr
 {
@@ -50,5 +52,12 @@ TEST(reduce)
         CHECK(product(a) == -1080);
     }
 }
+
+TEST(dotproduct)
+{
+    univector<float, 177> v1 = counter();
+    univector<float, 177> v2 = counter() * 2 + 10;
+    CHECK(dotproduct(v1, v2) == 3821312);
+}
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/tests/unit/base/shape.cpp b/tests/unit/base/shape.cpp
@@ -0,0 +1,72 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/shape.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+TEST(shape)
+{
+    using internal_generic::increment_indices_return;
+    using internal_generic::null_index;
+    CHECK(size_of_shape(shape{ 4, 3 }) == 12);
+    CHECK(size_of_shape(shape{ 1 }) == 1);
+    CHECK(size_of_shape<1>(1) == 1);
+    shape<1> sh1 = 1;
+    sh1          = 2;
+
+    CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }) == shape{ 12, 4, 1 });
+
+    CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }, 10) == shape{ 120, 40, 10 });
+
+    CHECK(increment_indices_return(shape{ 0, 0, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 0, 1 });
+    CHECK(increment_indices_return(shape{ 0, 0, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 1, 0 });
+    CHECK(increment_indices_return(shape{ 0, 2, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 2, 1 });
+    CHECK(increment_indices_return(shape{ 0, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 1, 0, 0 });
+    CHECK(increment_indices_return(shape{ 1, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) ==
+          shape{ null_index, null_index, null_index });
+
+    CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 0, 0, 0 }) == 0);
+    CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 2, 3, 4 }) == 59);
+
+    CHECK(shape{ 3, 4, 5 }.from_flat(0) == shape{ 0, 0, 0 });
+    CHECK(shape{ 3, 4, 5 }.from_flat(59) == shape{ 2, 3, 4 });
+}
+TEST(shape_broadcast)
+{
+    using internal_generic::can_assign_from;
+    using internal_generic::common_shape;
+    using internal_generic::same_layout;
+
+    CHECK(common_shape(shape{ 1, 5 }, shape{ 5, 1 }) == shape{ 5, 5 });
+    CHECK(common_shape(shape{ 5 }, shape{ 5, 1 }) == shape{ 5, 5 });
+    CHECK(common_shape(shape{ 1, 1, 1 }, shape{ 2, 5, 1 }) == shape{ 2, 5, 1 });
+    CHECK(common_shape(shape{ 1 }, shape{ 2, 5, 7 }) == shape{ 2, 5, 7 });
+
+    CHECK(common_shape(shape{}, shape{ 0 }) == shape{ 0 });
+    CHECK(common_shape(shape{}, shape{ 0, 0 }) == shape{ 0, 0 });
+    CHECK(common_shape(shape{ 0 }, shape{ 0, 0 }) == shape{ 0, 0 });
+
+    CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 4 }));
+    CHECK(!can_assign_from(shape{ 1, 4 }, shape{ 4, 1 }));
+    CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 1 }));
+    CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1 }));
+    CHECK(can_assign_from(shape{ 1, 4 }, shape{}));
+
+    CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4 }));
+    CHECK(same_layout(shape{ 1, 2, 3, 4 }, shape{ 2, 3, 4 }));
+    CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 1, 1, 3, 4 }));
+    CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4, 1 }));
+    CHECK(same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 3, 4, 1 }));
+
+    CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 1 }));
+    CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 0 }));
+}
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/base/tensor.cpp b/tests/unit/base/tensor.cpp
@@ -1,12 +1,13 @@
 /**
  * KFR (http://kfrlib.com)
- * Copyright (C) 2016  D Levin
+ * Copyright (C) 2016-2022 Fractalium Ltd
  * See LICENSE.txt for details
  */
 
-#include <kfr/base/simd_expressions.hpp>
-#include <kfr/base/math_expressions.hpp>
 #include <kfr/base/basic_expressions.hpp>
+#include <kfr/base/math_expressions.hpp>
+#include <kfr/base/reduce.hpp>
+#include <kfr/base/simd_expressions.hpp>
 #include <kfr/base/tensor.hpp>
 #include <kfr/io/tostring.hpp>
 #include <kfr/simd.hpp>
@@ -20,39 +21,6 @@ namespace kfr
 inline namespace CMT_ARCH_NAME
 {
 
-TEST(vec_deduction)
-{
-    vec v{ 1, 2, 3 };
-    static_assert(std::is_same_v<decltype(v), vec<int, 3>>);
-
-    tensor<float, 2> t2{ shape{ 20, 40 } };
-}
-
-TEST(shape)
-{
-    using internal_generic::increment_indices_return;
-    using internal_generic::null_index;
-    CHECK(size_of_shape(shape{ 4, 3 }) == 12);
-    CHECK(size_of_shape(shape{ 1 }) == 1);
-
-    CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }) == shape{ 12, 4, 1 });
-
-    CHECK(internal_generic::strides_for_shape(shape{ 2, 3, 4 }, 10) == shape{ 120, 40, 10 });
-
-    CHECK(increment_indices_return(shape{ 0, 0, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 0, 1 });
-    CHECK(increment_indices_return(shape{ 0, 0, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 1, 0 });
-    CHECK(increment_indices_return(shape{ 0, 2, 0 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 0, 2, 1 });
-    CHECK(increment_indices_return(shape{ 0, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) == shape{ 1, 0, 0 });
-    CHECK(increment_indices_return(shape{ 1, 2, 3 }, shape{ 0, 0, 0 }, shape{ 2, 3, 4 }) ==
-          shape{ null_index, null_index, null_index });
-
-    CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 0, 0, 0 }) == 0);
-    CHECK(shape{ 3, 4, 5 }.to_flat(shape{ 2, 3, 4 }) == 59);
-
-    CHECK(shape{ 3, 4, 5 }.from_flat(0) == shape{ 0, 0, 0 });
-    CHECK(shape{ 3, 4, 5 }.from_flat(59) == shape{ 2, 3, 4 });
-}
-
 TEST(tensor_base)
 {
     tensor<float, 2> t{ shape{ 20, 40 } };
@@ -124,19 +92,7 @@ TEST(tensor_memory)
     CHECK(refs == 0);
 }
 
-// TEST(tensor_expression_assign)
-// {
-//     tensor<float, 1> t1{ shape{ 32 }, 0.f };
-
-//     t1 = counter();
-
-//     CHECK(t1.size() == 32);
-//     CHECK(t1(0) == 0.f);
-//     CHECK(t1(1) == 1.f);
-//     CHECK(t1(31) == 31.f);
-// }
-
-DTEST(tensor_expression)
+TEST(tensor_expression)
 {
     tensor<float, 1> t1{ shape{ 32 }, 0.f };
     tensor<float, 1> t2{ shape{ 32 }, 100.f };
@@ -161,6 +117,8 @@ DTEST(tensor_expression)
     t4 = 1.f;
     CHECK(t4(0, 0) == 1.f);
     CHECK(t4(5, 5) == 1.f);
+    CHECK(minof(t4) == 1);
+    CHECK(maxof(t4) == 1);
     CHECK(sum(t4) == 36);
 
     t4(trange(2, 4), trange(2, 4)) = scalar(10);
@@ -172,31 +130,17 @@ DTEST(tensor_expression)
     CHECK(t4(5, 5) == 1.f);
     CHECK(sum(t4) == 72);
 
-    t4(trange(2, 4), trange(2, 4)) = 10 + counter();
+    t4(trange(2, 4), trange(2, 4)) = 10 + counter(0, 2, 1);
 
     CHECK(t4(2, 2) == 10.f);
     CHECK(t4(2, 3) == 11.f);
     CHECK(t4(3, 2) == 12.f);
     CHECK(t4(3, 3) == 13.f);
+    CHECK(sum(t4) == 78);
 }
 
 TEST(tensor_broadcast)
 {
-    using internal_generic::can_assign_from;
-    using internal_generic::common_shape;
-    using internal_generic::same_layout;
-
-    CHECK(common_shape(shape{ 1, 5 }, shape{ 5, 1 }) == shape{ 5, 5 });
-    CHECK(common_shape(shape{ 5 }, shape{ 5, 1 }) == shape{ 5, 5 });
-    CHECK(common_shape(shape{ 1, 1, 1 }, shape{ 2, 5, 1 }) == shape{ 2, 5, 1 });
-    CHECK(common_shape(shape{ 1 }, shape{ 2, 5, 7 }) == shape{ 2, 5, 7 });
-
-    CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 4 }));
-    CHECK(!can_assign_from(shape{ 1, 4 }, shape{ 4, 1 }));
-    CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1, 1 }));
-    CHECK(can_assign_from(shape{ 1, 4 }, shape{ 1 }));
-    CHECK(can_assign_from(shape{ 1, 4 }, shape{}));
-
     tensor<float, 2> t1{ shape{ 1, 5 }, { 1.f, 2.f, 3.f, 4.f, 5.f } };
     tensor<float, 2> t2{ shape{ 5, 1 }, { 10.f, 20.f, 30.f, 40.f, 50.f } };
     tensor<float, 1> t4{ shape{ 5 }, { 1.f, 2.f, 3.f, 4.f, 5.f } };
@@ -211,15 +155,6 @@ TEST(tensor_broadcast)
     tensor<float, 2> t5 = tapply(t4, t2, fn::add{});
     // tensor<float, 2> t5 = t4 + t2;
     CHECK(t5 == tresult);
-
-    CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4 }));
-    CHECK(same_layout(shape{ 1, 2, 3, 4 }, shape{ 2, 3, 4 }));
-    CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 1, 1, 3, 4 }));
-    CHECK(same_layout(shape{ 2, 3, 4 }, shape{ 2, 3, 4, 1 }));
-    CHECK(same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 3, 4, 1 }));
-
-    CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 1 }));
-    CHECK(!same_layout(shape{ 2, 1, 3, 4 }, shape{ 1, 2, 4, 3, 0 }));
 }
 } // namespace CMT_ARCH_NAME
 
@@ -375,8 +310,9 @@ TEST(tensor_counter)
                    { { 102.0, 112.0, 122.0, 132.0 } },
                } });
 }
-
-DTEST(tensor_dims)
+namespace tests
+{
+TEST(tensor_dims)
 {
     tensor<double, 6> t12{ shape{ 2, 3, 4, 5, 6, 7 } };
 
@@ -387,6 +323,7 @@ DTEST(tensor_dims)
 
     CHECK(t12.reduce(std::plus<>{}, 0) == 1648888920);
 }
+} // namespace tests
 
 TEST(vec_from_cvals)
 {
@@ -397,13 +334,14 @@ TEST(vec_from_cvals)
 
 TEST(xfunction_test)
 {
-    auto f = xfunction{ xwitharguments{ 3.f, 4.f }, std::plus<>{} };
+    auto f = expression_function{ expression_with_arguments{ 3.f, 4.f }, std::plus<>{} };
     float v;
     process(v, f);
     CHECK(v == 7.f);
-    static_assert(std::is_same_v<decltype(f), xfunction<std::plus<>, float, float>>);
+    static_assert(std::is_same_v<decltype(f), expression_function<std::plus<>, float, float>>);
 
-    auto f2 = xfunction{ xwitharguments{ 10.f, std::array{ 1.f, 2.f, 3.f, 4.f, 5.f } }, std::plus<>{} };
+    auto f2 = expression_function{ expression_with_arguments{ 10.f, std::array{ 1.f, 2.f, 3.f, 4.f, 5.f } },
+                                   std::plus<>{} };
     std::array<float, 5> v2;
     process(v2, f2);
     CHECK(v2 == std::array{ 11.f, 12.f, 13.f, 14.f, 15.f });
@@ -413,9 +351,11 @@ TEST(xfunction_test)
     process(v3, f3);
     CHECK(v3 == std::array{ 11.f, 12.f, 13.f, 14.f, 15.f });
 
-    auto f4 = scalar(0) + std::array<std::array<float, 1>, 5>{
-        { { { 100.f } }, { { 200.f } }, { { 300.f } }, { { 400.f } }, { { 500.f } } }
-    } + std::array{ 1.f, 2.f, 3.f, 4.f, 5.f };
+    auto f4 = scalar(0) +
+              std::array<std::array<float, 1>, 5>{
+                  { { { 100.f } }, { { 200.f } }, { { 300.f } }, { { 400.f } }, { { 500.f } } }
+              } +
+              std::array{ 1.f, 2.f, 3.f, 4.f, 5.f };
     std::array<std::array<float, 5>, 5> v4;
 
     CHECK(expression_traits<decltype(f4)>::shapeof(f4) == shape{ 5, 5 });
@@ -435,9 +375,9 @@ TEST(xfunction_test2)
 }
 
 template <typename Type, index_t Dims>
-KFR_FUNCTION xcounter<Type, Dims> debug_counter(uint64_t scale = 10)
+KFR_FUNCTION expression_counter<Type, Dims> debug_counter(uint64_t scale = 10)
 {
-    xcounter<Type, Dims> result;
+    expression_counter<Type, Dims> result;
     result.start = 0;
     uint64_t val = 1;
     for (size_t i = 0; i < Dims; i++)
@@ -554,10 +494,10 @@ static void test_reshape(const tensor<T, dims1>& t1, const tensor<T, dims>&... t
     test_reshape(ts...);
 }
 
-TEST(xreshape)
+TEST(expression_reshape)
 {
     std::array<float, 12> x;
-    process(reshape(x, shape{ 3, 4 }), xcounter<float, 2>{ 0, { 10, 1 } });
+    process(reshape(x, shape{ 3, 4 }), expression_counter<float, 2>{ 0, { 10, 1 } });
     CHECK(x == std::array<float, 12>{ { 0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23 } });
 
     test_reshape(tensor<float, 1>{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }, //
@@ -642,7 +582,7 @@ extern "C" __declspec(dllexport) void assembly_test9(int64_t* dst, size_t stride
 }
 constexpr inline index_t rank = 1;
 extern "C" __declspec(dllexport) void assembly_test10(tensor<double, rank>& t12,
-                                                      const xcounter<double, rank>& ctr)
+                                                      const expression_counter<double, rank>& ctr)
 {
     process(t12, ctr);
 }
@@ -650,8 +590,8 @@ extern "C" __declspec(dllexport) void assembly_test11(f64x2& x, u64x2 y) { x = y
 
 extern "C" __declspec(dllexport) void assembly_test12(
     std::array<std::array<uint32_t, 4>, 4>& x,
-    const xfunction<std::plus<>, std::array<std::array<uint32_t, 1>, 4>&,
-                    std::array<std::array<uint32_t, 4>, 1>&>& y)
+    const expression_function<std::plus<>, std::array<std::array<uint32_t, 1>, 4>&,
+                              std::array<std::array<uint32_t, 4>, 1>&>& y)
 {
     process(x, y);
 }
@@ -667,13 +607,13 @@ using array2d = std::array<std::array<T, N2>, N1>;
 extern "C" __declspec(dllexport) void assembly_test14(std::array<float, 32>& x,
                                                       const std::array<float, 32>& y)
 {
-    process(x, x_reverse(y));
+    process(x, reverse(y));
 }
 
 extern "C" __declspec(dllexport) void assembly_test15(array2d<float, 32, 32>& x,
                                                       const array2d<float, 32, 32>& y)
 {
-    process(x, x_reverse(y));
+    process(x, reverse(y));
 }
 
 extern "C" __declspec(dllexport) void assembly_test16a(array2d<double, 8, 2>& x,
@@ -689,28 +629,28 @@ extern "C" __declspec(dllexport) void assembly_test16b(array2d<double, 8, 2>& x,
 
 extern "C" __declspec(dllexport) void assembly_test17a(const tensor<double, 2>& x, const tensor<double, 2>& y)
 {
-    xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} };
+    expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} };
     process<8, 0>(x, ysqr);
 }
 extern "C" __declspec(dllexport) void assembly_test17b(const tensor<double, 2>& x, const tensor<double, 2>& y)
 {
-    xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} };
+    expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} };
     process<2, 1>(x, ysqr);
 }
 
 extern "C" __declspec(dllexport) void assembly_test18a(const tensor<double, 2>& x, const tensor<double, 2>& y)
 {
-    xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} };
-    process<8, 0>(fixshape(x, fixed_shape<8, 2>{}), fixshape(ysqr, fixed_shape<8, 2>{}));
+    expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} };
+    process<8, 0>(fixshape(x, fixed_shape<8, 2>), fixshape(ysqr, fixed_shape<8, 2>));
 }
 extern "C" __declspec(dllexport) void assembly_test18b(const tensor<double, 2>& x, const tensor<double, 2>& y)
 {
-    xfunction ysqr = xfunction{ xwitharguments{ y }, fn::sqr{} };
-    process<2, 1>(fixshape(x, fixed_shape<8, 2>{}), fixshape(ysqr, fixed_shape<8, 2>{}));
+    expression_function ysqr = expression_function{ expression_with_arguments{ y }, fn::sqr{} };
+    process<2, 1>(fixshape(x, fixed_shape<8, 2>), fixshape(ysqr, fixed_shape<8, 2>));
 }
 
 extern "C" __declspec(dllexport) void assembly_test19(const tensor<double, 2>& x,
-                                                      const xreshape<tensor<double, 2>, 2>& y)
+                                                      const expression_reshape<tensor<double, 2>, 2>& y)
 {
     process(x, y);
 }
@@ -728,6 +668,12 @@ extern "C" __declspec(dllexport) shape<4> assembly_test21(const shape<4>& x, siz
 {
     return x.from_flat(fl);
 }
+extern "C" __declspec(dllexport) float assembly_test22(const std::array<float, 440>& x,
+                                                       const std::array<float, 440>& y)
+{
+    return dotproduct(x, y);
+}
+extern "C" __declspec(dllexport) float assembly_test23(const std::array<float, 440>& x) { return rms(x); }
 #endif
 
 struct val
@@ -750,15 +696,16 @@ val& lvint_func()
     static val v;
     return v;
 }
-TEST(xwitharguments)
+TEST(expression_with_arguments)
 {
-    xfunction fn1 = xfunction{ xwitharguments{ rvint_func() }, fn::add{} };
+    expression_function fn1 = expression_function{ expression_with_arguments{ rvint_func() }, fn::add{} };
     static_assert(std::is_same_v<decltype(fn1)::nth<0>, val>);
 
-    xfunction fn2 = xfunction{ xwitharguments{ lvint_func() }, fn::add{} };
+    expression_function fn2 = expression_function{ expression_with_arguments{ lvint_func() }, fn::add{} };
     static_assert(std::is_same_v<decltype(fn2)::nth<0>, val&>);
 
-    xfunction fn3 = xfunction{ xwitharguments{ std::as_const(lvint_func()) }, fn::add{} };
+    expression_function fn3 =
+        expression_function{ expression_with_arguments{ std::as_const(lvint_func()) }, fn::add{} };
     static_assert(std::is_same_v<decltype(fn3)::nth<0>, const val&>);
 }
 
@@ -804,13 +751,13 @@ TEST(complex_tensors)
     tensor<complex<float>, 1> t1{
         complex<float>(0, -1),
     };
-    CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(0, 1) }, fn::mul{} }) ==
+    CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(0, 1) }, fn::mul{} }) ==
           tensor<complex<float>, 1>{ complex<float>(1, 0) });
-    CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(1, 0) }, fn::mul{} }) ==
+    CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(1, 0) }, fn::mul{} }) ==
           tensor<complex<float>, 1>{ complex<float>(0, -1) });
-    CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(0, -1) }, fn::mul{} }) ==
+    CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(0, -1) }, fn::mul{} }) ==
           tensor<complex<float>, 1>{ complex<float>(-1, 0) });
-    CHECK(trender(xfunction{ xwitharguments{ t1, complex<float>(-1, 0) }, fn::mul{} }) ==
+    CHECK(trender(expression_function{ expression_with_arguments{ t1, complex<float>(-1, 0) }, fn::mul{} }) ==
           tensor<complex<float>, 1>{ complex<float>(0, 1) });
 }
 
@@ -829,12 +776,6 @@ TEST(from_ilist)
     CHECK(t4 == tensor<float, 3>(shape{ 2, 2, 2 }, { 10, 20, 30, 40, 50, 60, 70, 80 }));
 }
 
-TEST(enumerate)
-{
-    CHECK(enumerate(vec_shape<int, 4>{}, 4) == vec{ 0, 4, 8, 12 });
-    CHECK(enumerate(vec_shape<int, 8>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18, 21 });
-    CHECK(enumerate(vec_shape<int, 7>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18 });
-}
 } // namespace CMT_ARCH_NAME
 
 } // namespace kfr
diff --git a/tests/unit/dsp/biquad.cpp b/tests/unit/dsp/biquad.cpp
@@ -0,0 +1,113 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/reduce.hpp>
+#include <kfr/base/simd_expressions.hpp>
+#include <kfr/base/univector.hpp>
+#include <kfr/dsp/biquad.hpp>
+#include <kfr/dsp/biquad_design.hpp>
+#include <kfr/dsp/special.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+template <typename T, typename... Ts, univector_tag Tag>
+inline const univector<T, Tag>& choose_array(const univector<T, Tag>& array, const univector<Ts, Tag>&...)
+{
+    return array;
+}
+
+template <typename T, typename T2, typename... Ts, univector_tag Tag, KFR_ENABLE_IF(!is_same<T, T2>)>
+inline const univector<T, Tag>& choose_array(const univector<T2, Tag>&, const univector<Ts, Tag>&... arrays)
+{
+    return choose_array<T>(arrays...);
+}
+
+TEST(biquad_lowpass1)
+{
+    testo::matrix(named("type") = ctypes_t<float, double>{},
+                  [](auto type)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      const biquad_params<T> bq = biquad_lowpass<T>(0.1, 0.7);
+
+                      constexpr size_t size = 32;
+
+                      const univector<float, size> test_vector_f32{
+                          +0x8.9bce2p-7,  +0xd.8383ep-6,  +0x8.f908dp-5,  +0xe.edc21p-6,  +0x9.ae104p-6,
+                          +0x9.dcc24p-7,  +0xd.50584p-9,  -0xf.2668p-13,  -0xd.09ca1p-10, -0xe.15995p-10,
+                          -0xa.b90d2p-10, -0xc.edea4p-11, -0xb.f14eap-12, -0xc.2cb44p-14, +0xb.4a4dep-15,
+                          +0xb.685dap-14, +0xa.b181fp-14, +0xf.0cb2bp-15, +0x8.695d6p-15, +0xd.bedd4p-17,
+                          +0xf.5474p-20,  -0xd.bb266p-19, -0x9.63ca1p-18, -0xf.ca567p-19, -0xa.5231p-19,
+                          -0xa.9e934p-20, -0xe.ab52p-22,  +0xa.3c4cp-26,  +0xd.721ffp-23, +0xe.ccc1ap-23,
+                          +0xb.5f248p-23, +0xd.d2c9ap-24,
+                      };
+
+                      const univector<double, size> test_vector_f64{
+                          +0x8.9bce2bf3663e8p-7,  +0xd.8384010fdf1dp-6,   +0x8.f908e7a36df6p-5,
+                          +0xe.edc2332a6d0bp-6,   +0x9.ae104af1da9ap-6,   +0x9.dcc235ef68e7p-7,
+                          +0xd.5057ee425e05p-9,   -0xf.266e42a99aep-13,   -0xd.09cad73642208p-10,
+                          -0xe.1599f32a83dp-10,   -0xa.b90d8910a117p-10,  -0xc.edeaabb890948p-11,
+                          -0xb.f14edbb55383p-12,  -0xc.2cb39b86f2dap-14,  +0xb.4a506ecff055p-15,
+                          +0xb.685edfdb55358p-14, +0xa.b182e32f8e298p-14, +0xf.0cb3dfd894b2p-15,
+                          +0x8.695df725b4438p-15, +0xd.beddc3606b9p-17,   +0xf.547004d20874p-20,
+                          -0xd.bb29b25b49b6p-19,  -0x9.63cb9187da1dp-18,  -0xf.ca588634fc618p-19,
+                          -0xa.52322d320da78p-19, -0xa.9e9420154e4p-20,   -0xe.ab51f7b0335ap-22,
+                          +0xa.3c6479980e1p-26,   +0xd.7223836599fp-23,   +0xe.ccc47ddd18678p-23,
+                          +0xb.5f265b1be1728p-23, +0xd.d2cb83f8483f8p-24,
+                      };
+
+                      const univector<T, size> ir = biquad(bq, unitimpulse<T>());
+
+                      CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0);
+                  });
+}
+
+TEST(biquad_lowpass2)
+{
+    testo::matrix(named("type") = ctypes_t<float, double>{},
+                  [](auto type)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      const biquad_params<T> bq = biquad_lowpass<T>(0.45, 0.2);
+
+                      constexpr size_t size = 32;
+
+                      const univector<float, size> test_vector_f32{
+                          +0x8.ce416p-4,  +0x8.2979p-4,   -0x8.a9d04p-7,  +0xe.aeb3p-11,  +0x8.204f8p-13,
+                          -0x8.20d78p-12, +0x8.3379p-12,  -0xf.83d81p-13, +0xe.8b5c4p-13, -0xd.9ddadp-13,
+                          +0xc.bedfcp-13, -0xb.ee123p-13, +0xb.2a9e5p-13, -0xa.73ac4p-13, +0x9.c86f6p-13,
+                          -0x9.2828p-13,  +0x8.92229p-13, -0x8.05b7p-13,  +0xf.048ffp-14, -0xe.0e849p-14,
+                          +0xd.28384p-14, -0xc.50a9p-14,  +0xb.86e56p-14, -0xa.ca0b6p-14, +0xa.19476p-14,
+                          -0x9.73d38p-14, +0x8.d8f64p-14, -0x8.48024p-14, +0xf.80aa2p-15, -0xe.82ad8p-15,
+                          +0xd.94f22p-15, -0xc.b66d9p-15,
+                      };
+
+                      const univector<double, size> test_vector_f64{
+                          +0x8.ce416c0d31e88p-4,  +0x8.2978efe51dafp-4,   -0x8.a9d088b81da6p-7,
+                          +0xe.aeb56c029358p-11,  +0x8.20492639873ap-13,  -0x8.20d4e21aab538p-12,
+                          +0x8.3376b2d53b4a8p-12, -0xf.83d3d1c17343p-13,  +0xe.8b584f0dd5ac8p-13,
+                          -0xd.9dd740ceaacf8p-13, +0xc.bedc85e7a621p-13,  -0xb.ee0f472bf8968p-13,
+                          +0xb.2a9baed1fe6cp-13,  -0xa.73a9d1670f4ep-13,  +0x9.c86d29d297798p-13,
+                          -0x9.2825f4d894088p-13, +0x8.9220a956d651p-13,  -0x8.05b539fdd79e8p-13,
+                          +0xf.048cb5194cfa8p-14, -0xe.0e819fa128938p-14, +0xd.2835957d684cp-14,
+                          -0xc.50a69c2a8dc18p-14, +0xb.86e33bbaf3cbp-14,  -0xa.ca097058af2cp-14,
+                          +0xa.1945ad1703dcp-14,  -0x9.73d1eef7d8b68p-14, +0x8.d8f4df1bb3efp-14,
+                          -0x8.48010323c6f7p-14,  +0xf.80a7f5baeeb2p-15,  -0xe.82ab94bb68a8p-15,
+                          +0xd.94f05f80af008p-15, -0xc.b66c0799b21a8p-15,
+                      };
+
+                      const univector<T, size> ir = biquad(bq, unitimpulse<T>());
+
+                      CHECK(absmaxof(choose_array<T>(test_vector_f32, test_vector_f64) - ir) == 0);
+                  });
+}
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/dsp/biquad_design.cpp b/tests/unit/dsp/biquad_design.cpp
@@ -0,0 +1,7 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/dsp/biquad_design.hpp>
diff --git a/tests/unit/dsp/dsp.cpp b/tests/unit/dsp/dsp.cpp
@@ -0,0 +1,7 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/dsp.hpp>
diff --git a/tests/unit/dsp/ebu.cpp b/tests/unit/dsp/ebu.cpp
@@ -0,0 +1,270 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/dsp/ebu.hpp>
+#include <kfr/dsp/oscillators.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+struct TestFragment
+{
+    float gain; // dB
+    float duration; // seconds
+    float frequency; // Hz
+};
+
+struct TestFragmentMultichannel
+{
+    float gain_L_R; // dB
+    float gain_C; // dB
+    float gain_Ls_Rs; // dB
+    float duration; // seconds
+    float frequency; // Hz
+};
+
+template <typename T>
+static void ebu_test_stereo(int sample_rate, const std::initializer_list<TestFragment>& fragments, T refM,
+                            T refS, T refI, T refLRA)
+{
+    ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right });
+
+    size_t total_length = 0;
+    for (const TestFragment& f : fragments)
+    {
+        total_length += static_cast<size_t>(f.duration * sample_rate);
+    }
+
+    univector<T> left_right(total_length);
+    size_t pos = 0;
+    for (const TestFragment& f : fragments)
+    {
+        const size_t len           = static_cast<size_t>(f.duration * sample_rate);
+        left_right.slice(pos, len) = dB_to_amp(f.gain) * sinenorm(phasor<float>(f.frequency, sample_rate));
+        pos += len;
+    }
+
+    for (size_t i = 0; i < total_length / loudness.packet_size(); i++)
+    {
+        loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()),
+                                  left_right.slice(i * loudness.packet_size(), loudness.packet_size()) });
+    }
+    T M, S, I, RL, RH;
+    loudness.get_values(M, S, I, RL, RH);
+    if (!std::isnan(refM))
+    {
+        testo::scope s(as_string("M = ", fmt<'f', -1, 2>(M)));
+        CHECK(std::abs(M - refM) < 0.05f);
+    }
+    if (!std::isnan(refS))
+    {
+        testo::scope s(as_string("S = ", fmt<'f', -1, 2>(S)));
+        CHECK(std::abs(S - refS) < 0.05f);
+    }
+    if (!std::isnan(refI))
+    {
+        testo::scope s(as_string("I = ", fmt<'f', -1, 2>(I)));
+        CHECK(std::abs(I - refI) < 0.05f);
+    }
+    if (!std::isnan(refLRA))
+    {
+        testo::scope s(as_string("LRA = ", fmt<'f', -1, 2>((RH - RL))));
+        CHECK(std::abs((RH - RL) - refLRA) < 0.05f);
+    }
+}
+
+template <typename T>
+static void ebu_test_multichannel(int sample_rate,
+                                  const std::initializer_list<TestFragmentMultichannel>& fragments, T refM,
+                                  T refS, T refI, T refLRA)
+{
+    ebu_r128<T> loudness(sample_rate, { Speaker::Left, Speaker::Right, Speaker::Center, Speaker::LeftSurround,
+                                        Speaker::RightSurround });
+
+    size_t total_length = 0;
+    for (const TestFragmentMultichannel& f : fragments)
+    {
+        total_length += static_cast<size_t>(f.duration * sample_rate);
+    }
+
+    univector<T> left_right(total_length);
+    univector<T> center(total_length);
+    univector<T> surround(total_length);
+    size_t pos = 0;
+    for (const TestFragmentMultichannel& f : fragments)
+    {
+        const size_t len = static_cast<size_t>(f.duration * sample_rate);
+        left_right.slice(pos, len) =
+            dB_to_amp(f.gain_L_R) * sinenorm(phasor<float>(f.frequency, sample_rate));
+        center.slice(pos, len) = dB_to_amp(f.gain_C) * sinenorm(phasor<float>(f.frequency, sample_rate));
+        surround.slice(pos, len) =
+            dB_to_amp(f.gain_Ls_Rs) * sinenorm(phasor<float>(f.frequency, sample_rate));
+        pos += len;
+    }
+
+    for (size_t i = 0; i < total_length / loudness.packet_size(); i++)
+    {
+        loudness.process_packet({ left_right.slice(i * loudness.packet_size(), loudness.packet_size()),
+                                  left_right.slice(i * loudness.packet_size(), loudness.packet_size()),
+                                  center.slice(i * loudness.packet_size(), loudness.packet_size()),
+                                  surround.slice(i * loudness.packet_size(), loudness.packet_size()),
+                                  surround.slice(i * loudness.packet_size(), loudness.packet_size()) });
+    }
+    T M, S, I, RL, RH;
+    loudness.get_values(M, S, I, RL, RH);
+    if (!std::isnan(refM))
+    {
+        testo::scope s(as_string("M = ", fmt<'f', -1, 2>(M)));
+        CHECK(std::abs(M - refM) < 0.05f);
+    }
+    if (!std::isnan(refS))
+    {
+        testo::scope s(as_string("S = ", fmt<'f', -1, 2>(S)));
+        CHECK(std::abs(S - refS) < 0.05f);
+    }
+    if (!std::isnan(refI))
+    {
+        testo::scope s(as_string("I = ", fmt<'f', -1, 2>(I)));
+        CHECK(std::abs(I - refI) < 0.05f);
+    }
+    if (!std::isnan(refLRA))
+    {
+        testo::scope s(as_string("LRA = ", fmt<'f', -1, 2>((RH - RL))));
+        CHECK(std::abs((RH - RL) - refLRA) < 0.05f);
+    }
+}
+
+TEST(ebu_stereo_1_and_2)
+{
+    testo::matrix(named("type")        = ctypes_t<float, double>{},
+                  named("sample_rate") = std::vector<int>{ 44100, 48000 },
+                  [](auto type, int sample_rate)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      ebu_test_stereo<T>(sample_rate, { { -23.f, 20.f, 1000.f } }, -23.f, -23.f, -23.f, NAN);
+                      ebu_test_stereo<T>(sample_rate, { { -33.f, 20.f, 1000.f } }, -33.f, -33.f, -33.f, NAN);
+                  });
+}
+
+TEST(ebu_stereo_3_4_and_5)
+{
+    testo::matrix(
+        named("type") = ctypes_t<float, double>{}, named("sample_rate") = std::vector<int>{ 44100, 48000 },
+        [](auto type, int sample_rate)
+        {
+            using T = typename decltype(type)::type;
+
+            ebu_test_stereo<T>(sample_rate,
+                               { { -36.f, 10.f, 1000.f }, { -23.f, 60.f, 1000.f }, { -36.f, 10.f, 1000.f } },
+                               NAN, NAN, -23.f, NAN);
+            ebu_test_stereo<T>(sample_rate,
+                               { { -72.f, 10.f, 1000.f },
+                                 { -36.f, 10.f, 1000.f },
+                                 { -23.f, 60.f, 1000.f },
+                                 { -36.f, 10.f, 1000.f },
+                                 { -72.f, 10.f, 1000.f } },
+                               NAN, NAN, -23.f, NAN);
+        });
+}
+
+TEST(ebu_multichannel_6)
+{
+    testo::matrix(named("type")        = ctypes_t<float, double>{},
+                  named("sample_rate") = std::vector<int>{ 44100, 48000 },
+                  [](auto type, int sample_rate)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      ebu_test_multichannel<T>(sample_rate, { { -28.f, -24.f, -30.f, 20.f, 1000.f } }, NAN,
+                                               NAN, -23.f, NAN);
+                  });
+}
+
+TEST(ebu_stereo_9)
+{
+    testo::matrix(named("type")        = ctypes_t<float, double>{},
+                  named("sample_rate") = std::vector<int>{ 44100, 48000 },
+                  [](auto type, int sample_rate)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      ebu_test_stereo<T>(sample_rate,
+                                         { { -20.f, 1.34f, 1000.f },
+                                           { -30.f, 1.66f, 1000.f },
+                                           { -20.f, 1.34f, 1000.f },
+                                           { -30.f, 1.66f, 1000.f },
+                                           { -20.f, 1.34f, 1000.f },
+                                           { -30.f, 1.66f, 1000.f },
+                                           { -20.f, 1.34f, 1000.f },
+                                           { -30.f, 1.66f, 1000.f },
+                                           { -20.f, 1.34f, 1000.f },
+                                           { -30.f, 1.66f, 1000.f } },
+                                         NAN, -23.f, NAN, NAN);
+                  });
+}
+
+TEST(ebu_stereo_12)
+{
+    testo::matrix(
+        named("type") = ctypes_t<float, double>{}, named("sample_rate") = std::vector<int>{ 44100, 48000 },
+        [](auto type, int sample_rate)
+        {
+            using T = typename decltype(type)::type;
+
+            ebu_test_stereo<T>(sample_rate,
+                               { { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f },
+                                 { -30.f, 0.22f, 1000.f }, { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f },
+                                 { -20.f, 0.18f, 1000.f }, { -30.f, 0.22f, 1000.f } },
+                               -23.f, NAN, NAN, NAN);
+        });
+}
+
+TEST(ebu_lra_1_2_3_and_4)
+{
+    testo::matrix(named("type")        = ctypes_t<float, double>{},
+                  named("sample_rate") = std::vector<int>{ 44100, 48000 },
+                  [](auto type, int sample_rate)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -30.f, 20.f, 1000.f } },
+                                         NAN, NAN, NAN, 10.f);
+
+                      ebu_test_stereo<T>(sample_rate, { { -20.f, 20.f, 1000.f }, { -15.f, 20.f, 1000.f } },
+                                         NAN, NAN, NAN, 5.f);
+
+                      ebu_test_stereo<T>(sample_rate, { { -40.f, 20.f, 1000.f }, { -20.f, 20.f, 1000.f } },
+                                         NAN, NAN, NAN, 20.f);
+
+                      ebu_test_stereo<T>(sample_rate,
+                                         { { -50.f, 20.f, 1000.f },
+                                           { -35.f, 20.f, 1000.f },
+                                           { -20.f, 20.f, 1000.f },
+                                           { -35.f, 20.f, 1000.f },
+                                           { -50.f, 20.f, 1000.f } },
+                                         NAN, NAN, NAN, 15.f);
+                  });
+}
+} // namespace CMT_ARCH_NAME
+
+} // namespace kfr
diff --git a/tests/unit/dsp/fir.cpp b/tests/unit/dsp/fir.cpp
@@ -0,0 +1,234 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <complex>
+#include <kfr/dsp/fir.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+TEST(fir)
+{
+#ifdef CMT_COMPILER_IS_MSVC
+    // testo::matrix causes error in MSVC
+    {
+        using T = float;
+
+        const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5);
+        const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
+
+        CHECK_EXPRESSION(fir(data, taps), 100,
+                         [&](size_t index) -> T
+                         {
+                             T result = 0;
+                             for (size_t i = 0; i < taps.size(); i++)
+                                 result += data.get(index - i, 0) * taps[i];
+                             return result;
+                         });
+
+        CHECK_EXPRESSION(short_fir(data, taps), 100,
+                         [&](size_t index) -> T
+                         {
+                             T result = 0;
+                             for (size_t i = 0; i < taps.size(); i++)
+                                 result += data.get(index - i, 0) * taps[i];
+                             return result;
+                         });
+    }
+    {
+        using T = double;
+
+        const univector<T, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5);
+        const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
+
+        CHECK_EXPRESSION(fir(data, taps), 100,
+                         [&](size_t index) -> T
+                         {
+                             T result = 0;
+                             for (size_t i = 0; i < taps.size(); i++)
+                                 result += data.get(index - i, 0) * taps[i];
+                             return result;
+                         });
+
+        CHECK_EXPRESSION(short_fir(data, taps), 100,
+                         [&](size_t index) -> T
+                         {
+                             T result = 0;
+                             for (size_t i = 0; i < taps.size(); i++)
+                                 result += data.get(index - i, 0) * taps[i];
+                             return result;
+                         });
+    }
+#else
+    testo::matrix(named("type") = ctypes_t<float
+#ifdef CMT_NATIVE_F64
+                                           ,
+                                           double
+#endif
+                                           >{},
+                  [](auto type)
+                  {
+                      using T = typename decltype(type)::type;
+
+                      const univector<T, 100> data =
+                          counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5);
+                      const univector<T, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
+
+                      CHECK_EXPRESSION(fir(data, taps), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < taps.size(); i++)
+                                               result += data.get(index - i, 0) * taps[i];
+                                           return result;
+                                       });
+
+                      fir_state<T> state(taps.ref());
+
+                      CHECK_EXPRESSION(fir(state, data), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < taps.size(); i++)
+                                               result += data.get(index - i, 0) * taps[i];
+                                           return result;
+                                       });
+
+                      CHECK_EXPRESSION(short_fir(data, taps), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < taps.size(); i++)
+                                               result += data.get(index - i, 0) * taps[i];
+                                           return result;
+                                       });
+
+                      short_fir_state<9, T> state2(taps);
+
+                      CHECK_EXPRESSION(short_fir<taps.size()>(state2, data), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < taps.size(); i++)
+                                               result += data.get(index - i, 0) * taps[i];
+                                           return result;
+                                       });
+
+                      CHECK_EXPRESSION(moving_sum<taps.size()>(data), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < taps.size(); i++)
+                                               result += data.get(index - i, 0);
+                                           return result;
+                                       });
+
+                      moving_sum_state<T, 131> msstate1;
+
+                      CHECK_EXPRESSION(moving_sum(msstate1, data), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < msstate1.delayline.size(); i++)
+                                               result += data.get(index - i, 0);
+                                           return result;
+                                       });
+
+                      moving_sum_state<T> msstate2(133);
+
+                      CHECK_EXPRESSION(moving_sum(msstate2, data), 100,
+                                       [&](size_t index) -> T
+                                       {
+                                           T result = 0;
+                                           for (size_t i = 0; i < msstate2.delayline.size(); i++)
+                                               result += data.get(index - i, 0);
+                                           return result;
+                                       });
+                  });
+#endif
+}
+
+#ifdef CMT_NATIVE_F64
+TEST(fir_different)
+{
+    const univector<float, 100> data = counter() + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f);
+    //    const univector<double, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
+    const univector<double, 4> taps{ 1, 2, 3, 4 };
+
+    CHECK_EXPRESSION(fir(data, taps), 100,
+                     [&](size_t index) -> float
+                     {
+                         double result = 0.0;
+                         for (size_t i = 0; i < taps.size(); i++)
+                             result += data.get(index - i, 0.0) * taps[i];
+                         return float(result);
+                     });
+
+    CHECK_EXPRESSION(short_fir(data, taps), 100,
+                     [&](size_t index) -> float
+                     {
+                         double result = 0.0;
+                         for (size_t i = 0; i < taps.size(); i++)
+                             result += data.get(index - i, 0.0) * taps[i];
+                         return float(result);
+                     });
+}
+#endif
+
+#ifdef KFR_STD_COMPLEX
+template <typename T>
+inline std::complex<T> to_std(const std::complex<T>& c)
+{
+    return c;
+}
+template <typename T>
+inline std::complex<T> from_std(const std::complex<T>& c)
+{
+    return c;
+}
+#else
+template <typename T>
+inline std::complex<T> to_std(const kfr::complex<T>& c)
+{
+    return { c.real(), c.imag() };
+}
+
+template <typename T>
+inline kfr::complex<T> from_std(const std::complex<T>& c)
+{
+    return { c.real(), c.imag() };
+}
+#endif
+
+TEST(fir_complex)
+{
+    const univector<complex<float>, 100> data =
+        counter() * complex<float>{ 0.f, 1.f } + sequence(1, 2, -10, 100) + sequence(0, -7, 0.5f);
+    const univector<float, 6> taps{ 1, 2, -2, 0.5, 0.0625, 4 };
+
+    CHECK_EXPRESSION(fir(data, taps), 100,
+                     [&](size_t index) -> complex<float>
+                     {
+                         std::complex<float> result = 0.0;
+                         for (size_t i = 0; i < taps.size(); i++)
+                             result = result + to_std(data.get(index - i, 0.0)) * taps[i];
+                         return from_std(result);
+                     });
+
+    CHECK_EXPRESSION(short_fir(data, taps), 100,
+                     [&](size_t index) -> complex<float>
+                     {
+                         std::complex<float> result = 0.0;
+                         for (size_t i = 0; i < taps.size(); i++)
+                             result = result + to_std(data.get(index - i, 0.0)) * taps[i];
+                         return from_std(result);
+                     });
+}
+} // namespace CMT_ARCH_NAME
+
+} // namespace kfr
diff --git a/tests/unit/dsp/sample_rate_conversion.cpp b/tests/unit/dsp/sample_rate_conversion.cpp
@@ -0,0 +1,51 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/math_expressions.hpp>
+#include <kfr/dsp/oscillators.hpp>
+#include <kfr/dsp/sample_rate_conversion.hpp>
+#include <kfr/math/sin_cos.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+TEST(resampler_test)
+{
+    const int in_sr  = 44100;
+    const int out_sr = 48000;
+    const int freq   = 100;
+    auto resampler   = sample_rate_converter<fbase>(resample_quality::draft, out_sr, in_sr);
+    double delay     = resampler.get_fractional_delay();
+    univector<fbase> out(out_sr / 10);
+    univector<fbase> in  = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10);
+    univector<fbase> ref = truncate(
+        sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))),
+        out_sr / 10);
+    resampler.process(out, in);
+
+    CHECK(rms(slice(out - ref, static_cast<size_t>(ceil(delay * 2)))) < 0.005f);
+}
+TEST(resampler_test_complex)
+{
+    using type       = complex<fbase>;
+    const int in_sr  = 44100;
+    const int out_sr = 48000;
+    const int freq   = 100;
+    auto resampler   = sample_rate_converter<type>(resample_quality::draft, out_sr, in_sr);
+    double delay     = resampler.get_fractional_delay();
+    univector<type> out(out_sr / 10);
+    univector<type> in  = truncate(sin(c_pi<fbase> * phasor<fbase>(freq, in_sr, 0)), in_sr / 10);
+    univector<type> ref = truncate(
+        sin(c_pi<fbase> * phasor<fbase>(freq, out_sr, -delay * (static_cast<double>(freq) / out_sr))),
+        out_sr / 10);
+    resampler.process(out, in);
+
+    CHECK(rms(cabs(slice(out - ref, static_cast<size_t>(ceil(delay * 2))))) < 0.005f);
+}
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/dsp/units.cpp b/tests/unit/dsp/units.cpp
@@ -0,0 +1,58 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/dsp/units.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+TEST(note_to_hertz)
+{
+    testo::eplison_scope<void> eps(2000);
+    CHECK(kfr::note_to_hertz(60) == fbase(261.6255653005986346778499935233));
+    CHECK(kfr::note_to_hertz(pack(60)) == pack(fbase(261.6255653005986346778499935233)));
+
+    CHECK(kfr::note_to_hertz(69) == fbase(440.0));
+    CHECK(kfr::note_to_hertz(pack(69)) == pack(fbase(440)));
+}
+
+TEST(hertz_to_note)
+{
+    testo::eplison_scope<void> eps(1000);
+    CHECK(kfr::hertz_to_note(261.6255653005986346778499935233) == fbase(60));
+    CHECK(kfr::hertz_to_note(pack(261.6255653005986346778499935233)) == pack(fbase(60)));
+
+    CHECK(kfr::hertz_to_note(440) == fbase(69));
+    CHECK(kfr::hertz_to_note(pack(440)) == pack(fbase(69)));
+}
+
+TEST(amp_to_dB)
+{
+    testo::eplison_scope<void> eps(1000);
+
+    CHECK(kfr::amp_to_dB(fbase(2.0)) == fbase(6.0205999132796239042747778944899));
+    CHECK(kfr::amp_to_dB(fbase(-2.0)) == fbase(6.0205999132796239042747778944899));
+    CHECK(kfr::amp_to_dB(fbase(1.0)) == fbase(0));
+    CHECK(kfr::amp_to_dB(fbase(-1.0)) == fbase(0));
+    CHECK(kfr::amp_to_dB(fbase(0.5)) == fbase(-6.0205999132796239042747778944899));
+    CHECK(kfr::amp_to_dB(fbase(-0.5)) == fbase(-6.0205999132796239042747778944899));
+    CHECK(kfr::amp_to_dB(fbase(0.0)) == fbase(-HUGE_VAL));
+}
+
+TEST(dB_to_amp)
+{
+    testo::eplison_scope<void> eps(1000);
+
+    CHECK(kfr::dB_to_amp(fbase(-HUGE_VAL)) == fbase(0.0));
+    CHECK(kfr::dB_to_amp(fbase(0.0)) == fbase(1.0));
+    CHECK(kfr::dB_to_amp(fbase(6.0205999132796239042747778944899)) == fbase(2.0));
+    CHECK(kfr::dB_to_amp(fbase(-6.0205999132796239042747778944899)) == fbase(0.5));
+}
+
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/dsp/window.cpp b/tests/unit/dsp/window.cpp
@@ -0,0 +1,189 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/base/reduce.hpp>
+#include <kfr/base/simd_expressions.hpp>
+#include <kfr/base/tensor.hpp>
+#include <kfr/dsp/window.hpp>
+#include <kfr/io/tostring.hpp>
+
+namespace kfr
+{
+inline namespace CMT_ARCH_NAME
+{
+
+const char* wins[] = {
+    "",
+    "rectangular    ",
+    "triangular     ",
+    "bartlett       ",
+    "cosine         ",
+    "hann           ",
+    "bartlett_hann  ",
+    "hamming        ",
+    "bohman         ",
+    "blackman       ",
+    "blackman_harris",
+    "kaiser         ",
+    "flattop        ",
+    "gaussian       ",
+    "lanczos        ",
+    "cosine_np      ",
+};
+
+template <window_type type, typename T>
+void win(size_t len, T arg, window_symmetry sym, univector<T> ref)
+{
+    univector<T> calc = render(window(len, cval_t<window_type, type>{}, arg, sym, ctype<T>));
+    testo::scope sc(as_string("win=", wins[static_cast<int>(type)], " len=", len, " sym=",
+                              sym == window_symmetry::symmetric, "\n    calc=", calc, "\n    ref =", ref));
+    CHECK(rms(calc - ref) < 0.00001f);
+}
+
+TEST(window)
+{
+    using w = window_type;
+    using s = window_symmetry;
+    using u = univector<f32>;
+    // clang-format
+    win<w::rectangular, f32>(7, 0.0, s::symmetric, u{ 1, 1, 1, 1, 1, 1, 1 });
+    win<w::triangular, f32>(7, 0.0, s::symmetric, u{ 0.25, 0.5, 0.75, 1., 0.75, 0.5, 0.25 });
+    win<w::bartlett, f32>(7, 0.0, s::symmetric,
+                          u{ 0., 0.33333333, 0.66666667, 1., 0.66666667, 0.33333333, 0. });
+    win<w::cosine, f32>(7, 0.0, s::symmetric, u{ 0, 0.5, 0.866025, 1, 0.866025, 0.5, 0 });
+    win<w::hann, f32>(7, 0.0, s::symmetric, u{ 0., 0.25, 0.75, 1., 0.75, 0.25, 0. });
+    win<w::bartlett_hann, f32>(7, 0.0, s::symmetric, u{ 0., 0.27, 0.73, 1., 0.73, 0.27, 0. });
+    win<w::hamming, f32>(7, 0.54, s::symmetric, u{ 0.08, 0.31, 0.77, 1., 0.77, 0.31, 0.08 });
+    win<w::bohman, f32>(7, 0.0, s::symmetric,
+                        u{ 0., 0.10899778, 0.60899778, 1., 0.60899778, 0.10899778, 0. });
+    win<w::blackman, f32>(7, 0.16, s::symmetric,
+                          u{ -1.38777878e-17, 1.30000000e-01, 6.30000000e-01, 1.00000000e+00, 6.30000000e-01,
+                             1.30000000e-01, -1.38777878e-17 });
+    win<w::blackman_harris, f32>(
+        7, 0.0, s::symmetric,
+        u{ 6.00000e-05, 5.56450e-02, 5.20575e-01, 1.00000e+00, 5.20575e-01, 5.56450e-02, 6.00000e-05 });
+    win<w::kaiser, f32>(7, 8.0, s::symmetric,
+                        u{ 0.00233883, 0.1520107, 0.65247867, 1., 0.65247867, 0.1520107, 0.00233883 });
+    win<w::flattop, f32>(7, 0.0, s::symmetric,
+                         u{ -4.2105100e-04, -5.1263156e-02, 1.9821053e-01, 1.0000000e+00, 1.9821053e-01,
+                            -5.1263156e-02, -4.2105100e-04 });
+    win<w::gaussian, f32>(7, 2.5, s::symmetric,
+                          u{ 0.1006689, 0.36044779, 0.77483743, 1., 0.77483743, 0.36044779, 0.1006689 });
+    win<w::lanczos, f32>(7, 0.0, s::symmetric,
+                         u{ -2.8e-08, 0.413497, 0.826993, 1, 0.826993, 0.413497, -2.8e-08 });
+    win<w::cosine_np, f32>(7, 0.0, s::symmetric,
+                           u{ 0.22252093, 0.6234898, 0.90096887, 1., 0.90096887, 0.6234898, 0.22252093 });
+
+    win<w::rectangular, f32>(8, 0.0, s::symmetric, u{ 1, 1, 1, 1, 1, 1, 1, 1 });
+    win<w::triangular, f32>(8, 0.0, s::symmetric,
+                            u{ 0.125, 0.375, 0.625, 0.875, 0.875, 0.625, 0.375, 0.125 });
+    win<w::bartlett, f32>(
+        8, 0.0, s::symmetric,
+        u{ 0., 0.28571429, 0.57142857, 0.85714286, 0.85714286, 0.57142857, 0.28571429, 0. });
+    win<w::cosine, f32>(8, 0.0, s::symmetric,
+                        u{ 0, 0.433884, 0.781832, 0.974928, 0.974928, 0.781831, 0.433883, 0 });
+    win<w::hann, f32>(8, 0.0, s::symmetric,
+                      u{ 0., 0.1882551, 0.61126047, 0.95048443, 0.95048443, 0.61126047, 0.1882551, 0. });
+    win<w::bartlett_hann, f32>(
+        8, 0.0, s::symmetric,
+        u{ 0., 0.2116453, 0.60170081, 0.92808246, 0.92808246, 0.60170081, 0.2116453, 0. });
+    win<w::hamming, f32>(
+        8, 0.54, s::symmetric,
+        u{ 0.08, 0.25319469, 0.64235963, 0.95444568, 0.95444568, 0.64235963, 0.25319469, 0.08 });
+    win<w::bohman, f32>(8, 0.0, s::symmetric,
+                        u{ 0., 0.07072475, 0.43748401, 0.91036851, 0.91036851, 0.43748401, 0.07072475, 0. });
+    win<w::blackman, f32>(8, 0.16, s::symmetric,
+                          u{ -1.38777878e-17, 9.04534244e-02, 4.59182958e-01, 9.20363618e-01, 9.20363618e-01,
+                             4.59182958e-01, 9.04534244e-02, -1.38777878e-17 });
+    win<w::blackman_harris, f32>(8, 0.0, s::symmetric,
+                                 u{ 6.00000000e-05, 3.33917235e-02, 3.32833504e-01, 8.89369772e-01,
+                                    8.89369772e-01, 3.32833504e-01, 3.33917235e-02, 6.00000000e-05 });
+    win<w::kaiser, f32>(
+        8, 8.0, s::symmetric,
+        u{ 0.00233883, 0.10919581, 0.48711868, 0.92615774, 0.92615774, 0.48711868, 0.10919581, 0.00233883 });
+    win<w::flattop, f32>(8, 0.0, s::symmetric,
+                         u{ -4.21051000e-04, -3.68407812e-02, 1.07037167e-02, 7.80873915e-01, 7.80873915e-01,
+                            1.07037167e-02, -3.68407812e-02, -4.21051000e-04 });
+    win<w::gaussian, f32>(
+        8, 2.5, s::symmetric,
+        u{ 0.09139376, 0.29502266, 0.64438872, 0.9523448, 0.9523448, 0.64438872, 0.29502266, 0.09139376 });
+    win<w::lanczos, f32>(8, 0.0, s::symmetric,
+                         u{ -2.8e-08, 0.348411, 0.724101, 0.966766, 0.966766, 0.724101, 0.34841, -2.8e-08 });
+    win<w::cosine_np, f32>(
+        8, 0.0, s::symmetric,
+        u{ 0.19509032, 0.55557023, 0.83146961, 0.98078528, 0.98078528, 0.83146961, 0.55557023, 0.19509032 });
+
+    win<w::rectangular, f32>(7, 0.0, s::periodic, u{ 1, 1, 1, 1, 1, 1, 1 });
+    win<w::triangular, f32>(7, 0.0, s::periodic, u{ 0.125, 0.375, 0.625, 0.875, 0.875, 0.625, 0.375 });
+    win<w::bartlett, f32>(7, 0.0, s::periodic,
+                          u{ 0., 0.28571429, 0.57142857, 0.85714286, 0.85714286, 0.57142857, 0.28571429 });
+    win<w::cosine, f32>(7, 0.0, s::periodic,
+                        u{ 0, 0.433884, 0.781832, 0.974928, 0.974928, 0.781831, 0.433883 });
+    win<w::hann, f32>(7, 0.0, s::periodic,
+                      u{ 0., 0.1882551, 0.61126047, 0.95048443, 0.95048443, 0.61126047, 0.1882551 });
+    win<w::bartlett_hann, f32>(7, 0.0, s::periodic,
+                               u{ 0., 0.2116453, 0.60170081, 0.92808246, 0.92808246, 0.60170081, 0.2116453 });
+    win<w::hamming, f32>(7, 0.54, s::periodic,
+                         u{ 0.08, 0.25319469, 0.64235963, 0.95444568, 0.95444568, 0.64235963, 0.25319469 });
+    win<w::bohman, f32>(7, 0.0, s::periodic,
+                        u{ 0., 0.07072475, 0.43748401, 0.91036851, 0.91036851, 0.43748401, 0.07072475 });
+    win<w::blackman, f32>(7, 0.16, s::periodic,
+                          u{ -1.38777878e-17, 9.04534244e-02, 4.59182958e-01, 9.20363618e-01, 9.20363618e-01,
+                             4.59182958e-01, 9.04534244e-02 });
+    win<w::blackman_harris, f32>(7, 0.0, s::periodic,
+                                 u{ 6.00000000e-05, 3.33917235e-02, 3.32833504e-01, 8.89369772e-01,
+                                    8.89369772e-01, 3.32833504e-01, 3.33917235e-02 });
+    win<w::kaiser, f32>(
+        7, 8.0, s::periodic,
+        u{ 0.00233883, 0.10919581, 0.48711868, 0.92615774, 0.92615774, 0.48711868, 0.10919581 });
+    win<w::flattop, f32>(7, 0.0, s::periodic,
+                         u{ -4.21051000e-04, -3.68407812e-02, 1.07037167e-02, 7.80873915e-01, 7.80873915e-01,
+                            1.07037167e-02, -3.68407812e-02 });
+    win<w::gaussian, f32>(
+        7, 2.5, s::periodic,
+        u{ 0.09139376, 0.29502266, 0.64438872, 0.9523448, 0.9523448, 0.64438872, 0.29502266 });
+    win<w::lanczos, f32>(7, 0.0, s::periodic,
+                         u{ -2.8e-08, 0.348411, 0.724101, 0.966766, 0.966766, 0.724101, 0.34841 });
+    win<w::cosine_np, f32>(
+        7, 0.0, s::periodic,
+        u{ 0.19509032, 0.55557023, 0.83146961, 0.98078528, 0.98078528, 0.83146961, 0.55557023 });
+
+    win<w::rectangular, f32>(8, 0.0, s::periodic, u{ 1, 1, 1, 1, 1, 1, 1, 1 });
+    win<w::triangular, f32>(8, 0.0, s::periodic, u{ 0.2, 0.4, 0.6, 0.8, 1., 0.8, 0.6, 0.4 });
+    win<w::bartlett, f32>(8, 0.0, s::periodic, u{ 0., 0.25, 0.5, 0.75, 1., 0.75, 0.5, 0.25 });
+    win<w::cosine, f32>(8, 0.0, s::periodic,
+                        u{ 0, 0.382683, 0.707107, 0.92388, 1, 0.92388, 0.707107, 0.382683 });
+    win<w::hann, f32>(8, 0.0, s::periodic,
+                      u{ 0., 0.14644661, 0.5, 0.85355339, 1., 0.85355339, 0.5, 0.14644661 });
+    win<w::bartlett_hann, f32>(8, 0.0, s::periodic,
+                               u{ 0., 0.17129942, 0.5, 0.82870058, 1., 0.82870058, 0.5, 0.17129942 });
+    win<w::hamming, f32>(8, 0.54, s::periodic,
+                         u{ 0.08, 0.21473088, 0.54, 0.86526912, 1., 0.86526912, 0.54, 0.21473088 });
+    win<w::bohman, f32>(8, 0.0, s::periodic,
+                        u{ 0., 0.04830238, 0.31830989, 0.75540916, 1., 0.75540916, 0.31830989, 0.04830238 });
+    win<w::blackman, f32>(8, 0.16, s::periodic,
+                          u{ -1.38777878e-17, 6.64466094e-02, 3.40000000e-01, 7.73553391e-01, 1.00000000e+00,
+                             7.73553391e-01, 3.40000000e-01, 6.64466094e-02 });
+    win<w::blackman_harris, f32>(8, 0.0, s::periodic,
+                                 u{ 6.00000000e-05, 2.17358370e-02, 2.17470000e-01, 6.95764163e-01,
+                                    1.00000000e+00, 6.95764163e-01, 2.17470000e-01, 2.17358370e-02 });
+    win<w::kaiser, f32>(
+        8, 8.0, s::periodic,
+        u{ 0.00233883, 0.08273982, 0.36897272, 0.78875245, 1., 0.78875245, 0.36897272, 0.08273982 });
+    win<w::flattop, f32>(8, 0.0, s::periodic,
+                         u{ -4.21051000e-04, -2.68721933e-02, -5.47368400e-02, 4.44135357e-01, 1.00000000e+00,
+                            4.44135357e-01, -5.47368400e-02, -2.68721933e-02 });
+    win<w::gaussian, f32>(
+        8, 2.5, s::periodic,
+        u{ 0.08465799, 0.24935221, 0.53940751, 0.85699689, 1., 0.85699689, 0.53940751, 0.24935221 });
+    win<w::lanczos, f32>(8, 0.0, s::periodic,
+                         u{ -2.8e-08, 0.300105, 0.63662, 0.900316, 1, 0.900316, 0.63662, 0.300105 });
+    win<w::cosine_np, f32>(8, 0.0, s::periodic,
+                           u{ 0.17364818, 0.5, 0.76604444, 0.93969262, 1., 0.93969262, 0.76604444, 0.5 });
+    // clang-format on
+}
+} // namespace CMT_ARCH_NAME
+} // namespace kfr
diff --git a/tests/unit/graphics/graphics.cpp b/tests/unit/graphics/graphics.cpp
@@ -0,0 +1,7 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/graphics.hpp>
diff --git a/tests/unit/math/math.cpp b/tests/unit/math/math.cpp
@@ -0,0 +1,7 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/math.hpp>
diff --git a/tests/unit/simd/abs.cpp b/tests/unit/simd/abs.cpp
@@ -4,7 +4,7 @@
  * See LICENSE.txt for details
  */
 
-#include <kfr/math/abs.hpp>
+#include <kfr/simd/abs.hpp>
 
 #include <kfr/io.hpp>
 
diff --git a/tests/unit/simd/min_max.cpp b/tests/unit/simd/min_max.cpp
@@ -4,7 +4,7 @@
  * See LICENSE.txt for details
  */
 
-#include <kfr/math/min_max.hpp>
+#include <kfr/simd/min_max.hpp>
 
 #include <kfr/io.hpp>
 
@@ -45,7 +45,8 @@ TEST(absmin)
 {
     test_function2(
         test_catogories::all, [](auto x, auto y) { return kfr::absmin(x, y); },
-        [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
+        [](auto x, auto y) -> common_type<decltype(x), decltype(y)>
+        {
             x = x >= 0 ? x : -x;
             y = y >= 0 ? y : -y;
             return x <= y ? x : y;
@@ -57,7 +58,8 @@ TEST(absmax)
 {
     test_function2(
         test_catogories::all, [](auto x, auto y) { return kfr::absmax(x, y); },
-        [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
+        [](auto x, auto y) -> common_type<decltype(x), decltype(y)>
+        {
             x = x >= 0 ? x : -x;
             y = y >= 0 ? y : -y;
             return x >= y ? x : y;
diff --git a/tests/unit/simd/operators.cpp b/tests/unit/simd/operators.cpp
@@ -4,6 +4,7 @@
  * See LICENSE.txt for details
  */
 
+#include <kfr/io/tostring.hpp>
 #include <kfr/simd/horizontal.hpp>
 #include <kfr/simd/operators.hpp>
 
@@ -22,7 +23,8 @@ TEST(bnot)
 {
     test_function1(
         test_catogories::vectors, [](auto x) -> decltype(x) { return ~x; },
-        [](auto x) -> decltype(x) {
+        [](auto x) -> decltype(x)
+        {
             utype<decltype(x)> u = ~ubitcast(x);
             return bitcast<decltype(x)>(u);
         });
@@ -59,19 +61,38 @@ TEST(div)
 {
     test_function2(
         test_catogories::vectors,
-        [](auto x, auto y) {
-            return is_safe_division<subtype<decltype(x)>>(x.front(), y.front()) ? x / y : 0;
-        },
+        [](auto x, auto y)
+        { return is_safe_division<subtype<decltype(x)>>(x.front(), y.front()) ? x / y : 0; },
         [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
             return is_safe_division(x, y) ? x / y : 0;
         });
 }
+struct not_f
+{
+    template <typename T>
+    constexpr bool operator()(ctype_t<T>) const
+    {
+        return !is_f_class<subtype<T>>;
+    }
+};
+TEST(mod)
+{
+    test_function2(
+        test_catogories::vectors,
+        [](auto x, auto y)
+        { return is_safe_division<subtype<decltype(x)>>(x.front(), y.front()) ? x % y : 0; },
+        [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
+            return is_safe_division(x, y) ? x % y : 0;
+        },
+        fn_return_constant<bool, true>{}, not_f{});
+}
 
 TEST(bor)
 {
     test_function2(
         test_catogories::vectors, [](auto x, auto y) { return x | y; },
-        [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
+        [](auto x, auto y) -> common_type<decltype(x), decltype(y)>
+        {
             using T = common_type<decltype(x), decltype(y)>;
             return bitcast<T>(static_cast<utype<T>>(ubitcast(T(x)) | ubitcast(T(y))));
         });
@@ -81,7 +102,8 @@ TEST(bxor)
 {
     test_function2(
         test_catogories::vectors, [](auto x, auto y) { return x ^ y; },
-        [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
+        [](auto x, auto y) -> common_type<decltype(x), decltype(y)>
+        {
             using T = common_type<decltype(x), decltype(y)>;
             return bitcast<T>(static_cast<utype<T>>(ubitcast(T(x)) ^ ubitcast(T(y))));
         });
@@ -91,7 +113,8 @@ TEST(band)
 {
     test_function2(
         test_catogories::vectors, [](auto x, auto y) { return x & y; },
-        [](auto x, auto y) -> common_type<decltype(x), decltype(y)> {
+        [](auto x, auto y) -> common_type<decltype(x), decltype(y)>
+        {
             using T = common_type<decltype(x), decltype(y)>;
             return bitcast<T>(static_cast<utype<T>>(ubitcast(T(x)) & ubitcast(T(y))));
         });
@@ -102,7 +125,8 @@ TEST(shl)
     testo::matrix(
         named("type") = test_catogories::types(test_catogories::vectors), named("value1") = special_values(),
         named("shift") = std::vector<unsigned>{ 1, 2, 7, 8, 9, 15, 16, 31, 32, 63, 64 },
-        [&](auto type, special_value value, unsigned shift) {
+        [&](auto type, special_value value, unsigned shift)
+        {
             using T = typename decltype(type)::type;
             if (shift < sizeof(subtype<T>))
             {
@@ -130,7 +154,8 @@ TEST(shr)
     testo::matrix(
         named("type") = test_catogories::types(test_catogories::vectors), named("value1") = special_values(),
         named("shift") = std::vector<unsigned>{ 1, 2, 7, 8, 9, 15, 16, 31, 32, 63, 64 },
-        [&](auto type, special_value value, unsigned shift) {
+        [&](auto type, special_value value, unsigned shift)
+        {
             using T = typename decltype(type)::type;
             if (shift < sizeof(subtype<T>))
             {
@@ -222,6 +247,10 @@ TEST(matrix)
 
     CHECK(m22 * i32x2{ -1, 100 } == i32x2x2{ i32x2{ -1, 200 }, i32x2{ -3, 400 } });
 
+    CHECK(vec{ vec{ 1, 1 }, vec{ 1, 1 } } * vec{ -1, 100 } == vec{ vec{ -1, 100 }, vec{ -1, 100 } });
+    CHECK(vec{ vec{ 1, 1 }, vec{ 1, 1 }, vec{ 1, 1 } } * vec{ -1, 100 } ==
+          vec{ vec{ -1, 100 }, vec{ -1, 100 }, vec{ -1, 100 } });
+
     i32x2 xy{ 10, 20 };
     i32x2x2 m{ i32x2{ 1, 2 }, i32x2{ 3, 4 } };
     xy = hadd(xy * m);
diff --git a/tests/unit/simd/round.cpp b/tests/unit/simd/round.cpp
@@ -4,7 +4,7 @@
  * See LICENSE.txt for details
  */
 
-#include <kfr/math/round.hpp>
+#include <kfr/simd/round.hpp>
 
 namespace kfr
 {
@@ -14,45 +14,40 @@ TEST(floor)
 {
     test_function1(
         test_catogories::all, [](auto x) { return kfr::floor(x); },
-        [](auto x) -> decltype(x) {
-            return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::floor(x));
-        });
+        [](auto x) -> decltype(x)
+        { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::floor(x)); });
 }
 
 TEST(ceil)
 {
     test_function1(
         test_catogories::all, [](auto x) { return kfr::ceil(x); },
-        [](auto x) -> decltype(x) {
-            return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::ceil(x));
-        });
+        [](auto x) -> decltype(x)
+        { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::ceil(x)); });
 }
 
 TEST(trunc)
 {
     test_function1(
         test_catogories::all, [](auto x) { return kfr::trunc(x); },
-        [](auto x) -> decltype(x) {
-            return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::trunc(x));
-        });
+        [](auto x) -> decltype(x)
+        { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::trunc(x)); });
 }
 
 TEST(round)
 {
     test_function1(
         test_catogories::all, [](auto x) { return kfr::round(x); },
-        [](auto x) -> decltype(x) {
-            return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::round(x));
-        });
+        [](auto x) -> decltype(x)
+        { return std::is_integral<decltype(x)>::value ? x : static_cast<decltype(x)>(std::round(x)); });
 }
 
 TEST(fract)
 {
     test_function1(
         test_catogories::all, [](auto x) { return kfr::fract(x); },
-        [](auto x) -> decltype(x) {
-            return std::is_integral<decltype(x)>::value ? 0 : static_cast<decltype(x)>(x - std::floor(x));
-        });
+        [](auto x) -> decltype(x)
+        { return std::is_integral<decltype(x)>::value ? 0 : static_cast<decltype(x)>(x - std::floor(x)); });
 }
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/tests/unit/simd/select.cpp b/tests/unit/simd/select.cpp
@@ -4,7 +4,7 @@
  * See LICENSE.txt for details
  */
 
-#include <kfr/math/select.hpp>
+#include <kfr/simd/select.hpp>
 
 namespace kfr
 {
@@ -14,7 +14,8 @@ TEST(select_true)
 {
     test_function2(
         test_catogories::vectors,
-        [](auto x, auto y) {
+        [](auto x, auto y)
+        {
             mask<subtype<decltype(x)>, decltype(x)::scalar_size()> m(true);
             return kfr::select(m, x, y);
         },
@@ -25,7 +26,8 @@ TEST(select_false)
 {
     test_function2(
         test_catogories::vectors,
-        [](auto x, auto y) {
+        [](auto x, auto y)
+        {
             mask<subtype<decltype(x)>, decltype(x)::scalar_size()> m(false);
             return kfr::select(m, x, y);
         },
diff --git a/tests/unit/simd/shuffle.cpp b/tests/unit/simd/shuffle.cpp
@@ -180,5 +180,11 @@ TEST(low_high)
     CHECK(low(vec<u8, 2>(1, 2)) == vec<u8, 1>(1));
     CHECK(high(vec<u8, 2>(1, 2)) == vec<u8, 1>(2));
 }
+TEST(enumerate)
+{
+    CHECK(enumerate(vec_shape<int, 4>{}, 4) == vec{ 0, 4, 8, 12 });
+    CHECK(enumerate(vec_shape<int, 8>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18, 21 });
+    CHECK(enumerate(vec_shape<int, 7>{}, 3) == vec{ 0, 3, 6, 9, 12, 15, 18 });
+}
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/tests/unit/simd/simd.cpp b/tests/unit/simd/simd.cpp
@@ -0,0 +1,7 @@
+/**
+ * KFR (http://kfrlib.com)
+ * Copyright (C) 2016-2022 Fractalium Ltd
+ * See LICENSE.txt for details
+ */
+
+#include <kfr/simd.hpp>
diff --git a/tests/unit/simd/vec.cpp b/tests/unit/simd/vec.cpp
@@ -71,78 +71,86 @@ TEST(cast)
     static_assert(!is_convertible<u16x4, i32x3>, "");
     static_assert(!is_convertible<u16x1, u16x16>, "");
 
-    static_assert(is_same<decltype(innercast<f64>(f32x4x4(1))), f64x4x4>, "");
-    static_assert(is_same<decltype(innercast<f64>(f32x4(1))), f64x4>, "");
-    static_assert(is_same<decltype(innercast<f64>(f32(1))), f64>, "");
+    static_assert(is_convertible<float, vecx<float, 2>>, "");
+    static_assert(is_convertible<float, vecx<float, 2, 2>>, "");
 
-    // N/A static_assert(is_same<decltype(innercast<f64x4>(f32x4x4(1))), f64x4x4>, "");
-    static_assert(is_same<decltype(innercast<f64x4>(f32x4(1))), f64x4x4>, "");
-    static_assert(is_same<decltype(innercast<f64x4>(f32(1))), f64x4>, "");
+    static_assert(is_same<decltype(broadcastto<f64>(f32x4x4(1))), f64x4x4>, "");
+    static_assert(is_same<decltype(broadcastto<f64>(f32x4(1))), f64x4>, "");
+    static_assert(is_same<decltype(broadcastto<f64>(f32(1))), f64>, "");
 
-    // N/A static_assert(is_same<decltype(elemcast<f64>(f32x4x4(1))), f64x4>, "");
-    static_assert(is_same<decltype(elemcast<f64>(f32x4(1))), f64x4>, "");
+    // N/A static_assert(is_same<decltype(broadcastto<f64x4>(f32x4x4(1))), f64x4x4>, "");
+    static_assert(is_same<decltype(broadcastto<f64x4>(f32x4(1))), f64x4x4>, "");
+    static_assert(is_same<decltype(broadcastto<f64x4>(f32(1))), f64x4>, "");
 
-    static_assert(is_same<decltype(elemcast<f64x4>(f32x4x4(1))), f64x4x4>, "");
-    static_assert(is_same<decltype(elemcast<f64x4>(f32x4(1))), f64x4x4>, "");
+    // N/A static_assert(is_same<decltype(promoteto<f64>(f32x4x4(1))), f64x4>, "");
+    static_assert(is_same<decltype(promoteto<f64>(f32x4(1))), f64x4>, "");
+
+    static_assert(is_same<decltype(promoteto<f64x4>(f32x4x4(1))), f64x4x4>, "");
+    static_assert(is_same<decltype(promoteto<f64x4>(f32x4(1))), f64x4x4>, "");
+
+    CHECK(cast<vecx<float, 2, 2>>(123.f) == vec{ vec{ 123.f, 123.f }, vec{ 123.f, 123.f } });
+
+    CHECK(promoteto<vecx<float, 2>>(vecx<float, 4>{ 1.f, 2.f, 3.f, 4.f }) ==
+          vec{ vec{ 1.f, 1.f }, vec{ 2.f, 2.f }, vec{ 3.f, 3.f }, vec{ 4.f, 4.f } });
 
     testo::scope s("");
     s.text = ("target_type = u8");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<u8>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<u8>(x); },
         [](auto x) -> u8 { return static_cast<u8>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<u8>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = i8");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<i8>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<i8>(x); },
         [](auto x) -> i8 { return static_cast<i8>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<i8>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = u16");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<u16>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<u16>(x); },
         [](auto x) -> u16 { return static_cast<u16>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<u16>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = i16");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<i16>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<i16>(x); },
         [](auto x) -> i16 { return static_cast<i16>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<i16>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = u32");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<u32>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<u32>(x); },
         [](auto x) -> u32 { return static_cast<u32>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<u32>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = i32");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<i32>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<i32>(x); },
         [](auto x) -> i32 { return static_cast<i32>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<i32>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = u64");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<u64>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<u64>(x); },
         [](auto x) -> u64 { return static_cast<u64>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<u64>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = i64");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<i64>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<i64>(x); },
         [](auto x) -> i64 { return static_cast<i64>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<i64>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = f32");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<f32>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<f32>(x); },
         [](auto x) -> f32 { return static_cast<f32>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<f32>(x.get<subtype<typename decltype(t)::type>>()); });
     s.text = ("target_type = f64");
     test_function1(
-        test_catogories::all, [](auto x) { return kfr::innercast<f64>(x); },
+        test_catogories::all, [](auto x) { return kfr::broadcastto<f64>(x); },
         [](auto x) -> f64 { return static_cast<f64>(x); },
         [](auto t, special_value x)
         { return is_in_range_of<f64>(x.get<subtype<typename decltype(t)::type>>()); });
@@ -198,5 +206,11 @@ TEST(masks)
     CHECK(float(v[3]) == maskbits<float>(true));
 }
 
+TEST(vec_deduction)
+{
+    vec v{ 1, 2, 3 };
+    static_assert(std::is_same_v<decltype(v), vec<int, 3>>);
+}
+
 } // namespace CMT_ARCH_NAME
 } // namespace kfr
diff --git a/update-sources.py b/update-sources.py
@@ -30,7 +30,15 @@ cmake = """
 # Use update-sources.py
 """
 
-list_sources("KFR_SRC", "include", ['*.hpp', '*.h', '*.i', '*.inc'])
+list_sources("KFR_SRC", "include", ['*.hpp', '*.h'])
+list_sources("KFR_SIMD_SRC", "include/kfr/simd", ['*.hpp', '*.h'])
+list_sources("KFR_MATH_SRC", "include/kfr/math", ['*.hpp', '*.h'])
+list_sources("KFR_BASE_SRC", "include/kfr/base", ['*.hpp', '*.h'])
+list_sources("KFR_DSP_SRC", "include/kfr/dsp", ['*.hpp', '*.h'])
+list_sources("KFR_IO_SRC", "include/kfr/io", ['*.hpp', '*.h'])
+list_sources("KFR_RUNTIME_SRC", "include/kfr/runtime", ['*.hpp', '*.h'])
+list_sources("KFR_GRAPHICS_SRC", "include/kfr/graphics", ['*.hpp', '*.h'])
+list_sources("KFR_SRC", "include", ['*.hpp', '*.h'])
 list_sources("KFR_DFT_SRC", "include/kfr/dft", ['*.cpp'], ["dft-src.cpp"])
 list_sources("KFR_IO_SRC", "include/kfr/io", ['*.cpp'])

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	examples/biquads.cpp	\|	4	+++-
M	examples/ccv.cpp	\|	41	+++++++++++++++++++++--------------------
M	examples/fir.cpp	\|	2	+-
M	include/kfr/base.hpp	\|	4	++++
M	include/kfr/base/basic_expressions.hpp	\|	460	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
M	include/kfr/base/conversion.hpp	\|	22	+++++++++++++---------
M	include/kfr/base/expression.hpp	\|	686	++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
M	include/kfr/base/filter.hpp	\|	22	+++++++++++-----------
M	include/kfr/base/generators.hpp	\|	108	++++++++++++++++++++++++++++++++++++++-----------------------------------------
M	include/kfr/base/impl/static_array.hpp	\|	2	+-
M	include/kfr/base/math_expressions.hpp	\|	264	+++++++++++++++++++------------------------------------------------------------
D	include/kfr/base/old_basic_expressions.hpp	\|	708	-------------------------------------------------------------------------------
M	include/kfr/base/pointer.hpp	\|	195	+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
M	include/kfr/base/random.hpp	\|	134	++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M	include/kfr/base/random_bits.hpp	\|	90	++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M	include/kfr/base/reduce.hpp	\|	69	++++++++++++++++++++++++++++++++++-----------------------------------
M	include/kfr/base/shape.hpp	\|	102	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
M	include/kfr/base/simd_expressions.hpp	\|	308	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
A	include/kfr/base/state_holder.hpp	\|	49	+++++++++++++++++++++++++++++++++++++++++++++++++
M	include/kfr/base/tensor.hpp	\|	34	++++++----------------------------
M	include/kfr/base/univector.hpp	\|	102	+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
M	include/kfr/capi.h	\|	2	+-
M	include/kfr/cometa.hpp	\|	19	+++++++++++++------
M	include/kfr/cometa/cstring.hpp	\|	6	+++---
M	include/kfr/cometa/function.hpp	\|	4	++--
M	include/kfr/cometa/numeric.hpp	\|	12	+++++-------
M	include/kfr/dft/convolution.hpp	\|	10	+++++-----
M	include/kfr/dft/fft.hpp	\|	7	+++----
M	include/kfr/dft/impl/convolution-impl.cpp	\|	4	++--
M	include/kfr/dft/impl/dft-impl.hpp	\|	46	+++++++++++++++++++++++++---------------------
M	include/kfr/dft/impl/fft-impl.hpp	\|	108	+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M	include/kfr/dft/impl/ft.hpp	\|	30	++++++++++++++++--------------
M	include/kfr/dsp/biquad.hpp	\|	212	++++++++++++++++++++++++++++++++++++++-----------------------------------------
M	include/kfr/dsp/dcremove.hpp	\|	10	++++------
M	include/kfr/dsp/delay.hpp	\|	98	++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M	include/kfr/dsp/ebu.hpp	\|	9	+++++----
M	include/kfr/dsp/fir.hpp	\|	144	+++++++++++++++++++++++++++++++++++++++++--------------------------------------
M	include/kfr/dsp/fir_design.hpp	\|	13	++++++-------
M	include/kfr/dsp/fracdelay.hpp	\|	4	++--
M	include/kfr/dsp/goertzel.hpp	\|	27	+++++++++++++--------------
M	include/kfr/dsp/iir_design.hpp	\|	16	++++++++--------
M	include/kfr/dsp/mixdown.hpp	\|	13	++++---------
M	include/kfr/dsp/oscillators.hpp	\|	34	++++++++++++++++++----------------
M	include/kfr/dsp/sample_rate_conversion.hpp	\|	69	++++++++++++++++++++++++++++++++++++++-------------------------------
M	include/kfr/dsp/special.hpp	\|	15	+++++++++------
D	include/kfr/dsp/state_holder.hpp	\|	41	-----------------------------------------
M	include/kfr/dsp/units.hpp	\|	26	+++++++++++++-------------
M	include/kfr/dsp/waveshaper.hpp	\|	6	+++---
M	include/kfr/dsp/weighting.hpp	\|	6	+++---
M	include/kfr/dsp/window.hpp	\|	457	++++++++++++++++++++++++++++++++++---------------------------------------------
M	include/kfr/graphics.hpp	\|	3	+--
M	include/kfr/graphics/color.hpp	\|	2	+-
M	include/kfr/graphics/geometry.hpp	\|	20	+++++++++++++-------
A	include/kfr/graphics/impl/scaled.hpp	\|	58	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	include/kfr/graphics/scaled.hpp	\|	59	-----------------------------------------------------------
M	include/kfr/kfr.h	\|	5	+++--
M	include/kfr/math/complex_math.hpp	\|	6	+++---
M	include/kfr/math/impl/atan.hpp	\|	8	++++----
M	include/kfr/math/impl/gamma.hpp	\|	2	+-
M	include/kfr/math/impl/hyperbolic.hpp	\|	6	+++---
M	include/kfr/math/impl/log_exp.hpp	\|	62	+++++++++++++++++++++++++++++++-------------------------------
M	include/kfr/math/impl/sin_cos.hpp	\|	8	++++----
M	include/kfr/math/impl/tan.hpp	\|	5	++++-
M	include/kfr/simd.hpp	\|	1	+
M	include/kfr/simd/complex.hpp	\|	60	++++++++++++++++++++++++++++++++++++++++++++++++++++++------
M	include/kfr/simd/complex_type.hpp	\|	2	+-
M	include/kfr/simd/impl/abs.hpp	\|	2	+-
M	include/kfr/simd/impl/backend_clang.hpp	\|	12	+++++++-----
M	include/kfr/simd/impl/backend_generic.hpp	\|	3	++-
M	include/kfr/simd/impl/basicoperators_clang.hpp	\|	6	++++++
M	include/kfr/simd/impl/basicoperators_complex.hpp	\|	2	+-
M	include/kfr/simd/impl/basicoperators_generic.hpp	\|	137	++++++++++++++++++++++++-------------------------------------------------------
M	include/kfr/simd/impl/function.hpp	\|	2	+-
M	include/kfr/simd/impl/logical.hpp	\|	2	+-
M	include/kfr/simd/impl/min_max.hpp	\|	2	+-
M	include/kfr/simd/impl/operators.hpp	\|	77	++++++-----------------------------------------------------------------------
M	include/kfr/simd/impl/round.hpp	\|	26	+++++++++++++-------------
M	include/kfr/simd/impl/saturation.hpp	\|	2	+-
M	include/kfr/simd/impl/select.hpp	\|	2	+-
R	include/kfr/simd/impl/specializations.i -> include/kfr/simd/impl/specializations.hpp	\|	0
M	include/kfr/simd/operators.hpp	\|	68	+++++++++++++++++++++++++++++++++++++++++---------------------------
M	include/kfr/simd/read_write.hpp	\|	15	+++++++++++----
M	include/kfr/simd/select.hpp	\|	2	+-
M	include/kfr/simd/shuffle.hpp	\|	5	++---
M	include/kfr/simd/vec.hpp	\|	251	+++++++++++++++++++++++++++++++++++++++++--------------------------------------
M	sources.cmake	\|	331	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	tests/CMakeLists.txt	\|	3	++-
M	tests/asm_test.cpp	\|	4	++--
M	tests/base_test.cpp	\|	22	+++-------------------
M	tests/complex_test.cpp	\|	10	+++-------
M	tests/dsp_test.cpp	\|	565	-------------------------------------------------------------------------------
M	tests/expression_test.cpp	\|	149	+++----------------------------------------------------------------------------
M	tests/intrinsic_test.cpp	\|	42	+++++++++++++++++++++++-------------------
M	tests/numeric_tests.hpp	\|	96	++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M	tests/tensor_test.cpp	\|	2	+-
A	tests/unit/base/base.cpp	\|	7	+++++++
A	tests/unit/base/basic_expressions.cpp	\|	131	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/base/generators.cpp	\|	33	+++++++++++++++++++++++++++++++++
A	tests/unit/base/pointer.cpp	\|	57	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	tests/unit/base/random.cpp	\|	16	++++++++--------
M	tests/unit/base/reduce.cpp	\|	9	+++++++++
A	tests/unit/base/shape.cpp	\|	72	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	tests/unit/base/tensor.cpp	\|	165	++++++++++++++++++++++++++-----------------------------------------------------
A	tests/unit/dsp/biquad.cpp	\|	113	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/dsp/biquad_design.cpp	\|	7	+++++++
A	tests/unit/dsp/dsp.cpp	\|	7	+++++++
A	tests/unit/dsp/ebu.cpp	\|	270	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/dsp/fir.cpp	\|	234	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/dsp/sample_rate_conversion.cpp	\|	51	+++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/dsp/units.cpp	\|	58	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/dsp/window.cpp	\|	189	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tests/unit/graphics/graphics.cpp	\|	7	+++++++
A	tests/unit/math/math.cpp	\|	7	+++++++
M	tests/unit/simd/abs.cpp	\|	2	+-
M	tests/unit/simd/min_max.cpp	\|	8	+++++---
M	tests/unit/simd/operators.cpp	\|	47	++++++++++++++++++++++++++++++++++++++---------
M	tests/unit/simd/round.cpp	\|	27	+++++++++++----------------
M	tests/unit/simd/select.cpp	\|	8	+++++---
M	tests/unit/simd/shuffle.cpp	\|	6	++++++
A	tests/unit/simd/simd.cpp	\|	7	+++++++
M	tests/unit/simd/vec.cpp	\|	54	++++++++++++++++++++++++++++++++++--------------------
M	update-sources.py	\|	10	+++++++++-