Pass vec by const reference - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 75ba81ba8421d474003470cd1254654d8cd8c14d
parent 116d49965b5bb4fe38b853559c5976c0f1e7dd2f
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Sat, 30 Jul 2016 20:54:49 +0300

Pass vec by const reference

Diffstat:
M include/kfr/base/shuffle.hpp  | 84 ++++++++++++++++++++++++++++++++++++++++----------------------------------------
M include/kfr/base/specializations.i  | 35 ++++++++++++++---------------------
M include/kfr/base/vec.hpp  | 154 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------

3 files changed, 148 insertions(+), 125 deletions(-)
diff --git a/include/kfr/base/shuffle.hpp b/include/kfr/base/shuffle.hpp
@@ -63,39 +63,39 @@ constexpr KFR_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest)
 KFR_FN(broadcast)
 
 template <size_t Ncount, typename T, size_t N>
-KFR_INLINE vec<T, N + Ncount> padhigh(vec<T, N> x)
+KFR_INLINE vec<T, N + Ncount> padhigh(const vec<T, N>& x)
 {
     return shufflevector<N + Ncount, internal::shuffle_index_extend<0, N>>(x);
 }
 KFR_FN(padhigh)
 
 template <size_t Ncount, typename T, size_t N>
-KFR_INLINE vec<T, N + Ncount> padlow(vec<T, N> x)
+KFR_INLINE vec<T, N + Ncount> padlow(const vec<T, N>& x)
 {
     return shufflevector<N + Ncount, internal::shuffle_index_extend<Ncount, N>>(x);
 }
 KFR_FN(padlow)
 
 template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout)>
-KFR_INLINE vec<T, Nout> extend(vec<T, N> x)
+KFR_INLINE vec<T, Nout> extend(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index_extend<0, N>>(x);
 }
 template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout)>
-constexpr KFR_INLINE vec<T, Nout> extend(vec<T, N> x)
+constexpr KFR_INLINE vec<T, Nout> extend(const vec<T, N>& x)
 {
     return x;
 }
 KFR_FN(extend)
 
 template <size_t start, size_t count, typename T, size_t N>
-KFR_INLINE vec<T, count> slice(vec<T, N> x)
+KFR_INLINE vec<T, count> slice(const vec<T, N>& x)
 {
     static_assert(start + count <= N, "start + count <= N");
     return shufflevector<count, internal::shuffle_index<start>>(x);
 }
 template <size_t start, size_t count, typename T, size_t N>
-KFR_INLINE vec<T, count> slice(vec<T, N> x, vec<T, N> y)
+KFR_INLINE vec<T, count> slice(const vec<T, N>& x, const vec<T, N>& y)
 {
     static_assert(start + count <= N * 2, "start + count <= N * 2");
     return shufflevector<count, internal::shuffle_index<start>>(x, y);
@@ -103,11 +103,11 @@ KFR_INLINE vec<T, count> slice(vec<T, N> x, vec<T, N> y)
 KFR_FN(slice)
 
 template <size_t, typename T, size_t N>
-KFR_INLINE void split(vec<T, N>)
+KFR_INLINE void split(const vec<T, N>&)
 {
 }
 template <size_t start = 0, typename T, size_t N, size_t Nout, typename... Args>
-KFR_INLINE void split(vec<T, N> x, vec<T, Nout>& out, Args&&... args)
+KFR_INLINE void split(const vec<T, N>& x, vec<T, Nout>& out, Args&&... args)
 {
     out = slice<start, Nout>(x);
     split<start + Nout>(x, std::forward<Args>(args)...);
@@ -115,7 +115,7 @@ KFR_INLINE void split(vec<T, N> x, vec<T, Nout>& out, Args&&... args)
 KFR_FN(split)
 
 template <size_t total, size_t number, typename T, size_t N, size_t Nout = N / total>
-KFR_INLINE vec<T, Nout> part(vec<T, N> x)
+KFR_INLINE vec<T, Nout> part(const vec<T, N>& x)
 {
     static_assert(N % total == 0, "N % total == 0");
     return shufflevector<Nout, internal::shuffle_index<number * Nout>>(x);
@@ -123,27 +123,27 @@ KFR_INLINE vec<T, Nout> part(vec<T, N> x)
 KFR_FN(part)
 
 template <size_t start, size_t count, typename T, size_t N1, size_t N2>
-KFR_INLINE vec<T, count> concat_and_slice(vec<T, N1> x, vec<T, N2> y)
+KFR_INLINE vec<T, count> concat_and_slice(const vec<T, N1>& x, const vec<T, N2>& y)
 {
     return internal::concattwo<start, count>(x, y);
 }
 KFR_FN(concat_and_slice)
 
 template <size_t Nout, typename T, size_t N>
-KFR_INLINE vec<T, Nout> widen(vec<T, N> x, identity<T> newvalue = T())
+KFR_INLINE vec<T, Nout> widen(const vec<T, N>& x, identity<T> newvalue = T())
 {
     static_assert(Nout > N, "Nout > N");
     return concat(x, broadcast<Nout - N>(newvalue));
 }
 template <size_t Nout, typename T, typename TS>
-constexpr KFR_INLINE vec<T, Nout> widen(vec<T, Nout> x, TS)
+constexpr KFR_INLINE vec<T, Nout> widen(const vec<T, Nout>& x, TS)
 {
     return x;
 }
 KFR_FN(widen)
 
 template <size_t Nout, typename T, size_t N>
-KFR_INLINE vec<T, Nout> narrow(vec<T, N> x)
+KFR_INLINE vec<T, Nout> narrow(const vec<T, N>& x)
 {
     static_assert(Nout <= N, "Nout <= N");
     return slice<0, Nout>(x);
@@ -152,7 +152,7 @@ KFR_FN(narrow)
 
 template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2,
           KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)>
-KFR_INLINE vec<T, Nout> even(vec<T, N> x)
+KFR_INLINE vec<T, Nout> even(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index<0, 2>, groupsize>(x);
 }
@@ -160,7 +160,7 @@ KFR_FNR(even, 2, 1)
 
 template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2,
           KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)>
-KFR_INLINE vec<T, Nout> odd(vec<T, N> x)
+KFR_INLINE vec<T, Nout> odd(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index<1, 2>, groupsize>(x);
 }
@@ -182,7 +182,7 @@ struct shuffle_index_dup
 }
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N> dupeven(vec<T, N> x)
+KFR_INLINE vec<T, N> dupeven(const vec<T, N>& x)
 {
     static_assert(N % 2 == 0, "N must be even");
     return shufflevector<N, internal::shuffle_index_dup<2, 0>>(x);
@@ -190,7 +190,7 @@ KFR_INLINE vec<T, N> dupeven(vec<T, N> x)
 KFR_FN(dupeven)
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N> dupodd(vec<T, N> x)
+KFR_INLINE vec<T, N> dupodd(const vec<T, N>& x)
 {
     static_assert(N % 2 == 0, "N must be even");
     return shufflevector<N, internal::shuffle_index_dup<2, 1>>(x);
@@ -198,7 +198,7 @@ KFR_INLINE vec<T, N> dupodd(vec<T, N> x)
 KFR_FN(dupodd)
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N * 2> duphalfs(vec<T, N> x)
+KFR_INLINE vec<T, N * 2> duphalfs(const vec<T, N>& x)
 {
     return concat(x, x);
 }
@@ -221,14 +221,14 @@ struct shuffle_index_shuffle
 }
 
 template <size_t... Indices, typename T, size_t N>
-KFR_INLINE vec<T, N> shuffle(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>())
+KFR_INLINE vec<T, N> shuffle(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>())
 {
     return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>>(x, y);
 }
 KFR_FN(shuffle)
 
 template <size_t groupsize, size_t... Indices, typename T, size_t N>
-KFR_INLINE vec<T, N> shufflegroups(vec<T, N> x, vec<T, N> y,
+KFR_INLINE vec<T, N> shufflegroups(const vec<T, N>& x, const vec<T, N>& y,
                                    elements_t<Indices...> = elements_t<Indices...>())
 {
     return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>, groupsize>(x, y);
@@ -253,14 +253,14 @@ struct shuffle_index_permute
 }
 
 template <size_t... Indices, typename T, size_t N>
-KFR_INLINE vec<T, N> permute(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>())
+KFR_INLINE vec<T, N> permute(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>())
 {
     return shufflevector<N, internal::shuffle_index_permute<N, Indices...>>(x);
 }
 KFR_FN(permute)
 
 template <size_t groupsize, size_t... Indices, typename T, size_t N>
-KFR_INLINE vec<T, N> permutegroups(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>())
+KFR_INLINE vec<T, N> permutegroups(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>())
 {
     return shufflevector<N, internal::shuffle_index_permute<N, Indices...>, groupsize>(x);
 }
@@ -299,7 +299,7 @@ constexpr KFR_INLINE mask<T, N> oddmask()
 }
 
 template <typename T, size_t N, size_t Nout = N * 2>
-KFR_INLINE vec<T, Nout> dup(vec<T, N> x)
+KFR_INLINE vec<T, Nout> dup(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index_dup1<2>>(x, x);
 }
@@ -315,7 +315,7 @@ struct shuffle_index_duphalf
 }
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N> duplow(vec<T, N> x)
+KFR_INLINE vec<T, N> duplow(const vec<T, N>& x)
 {
     static_assert(N % 2 == 0, "N must be even");
     return shufflevector<N, internal::shuffle_index_duphalf<N / 2, 0>>(x);
@@ -346,7 +346,7 @@ struct shuffle_index_blend
 }
 
 template <size_t... Indices, typename T, size_t N>
-KFR_INLINE vec<T, N> blend(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>())
+KFR_INLINE vec<T, N> blend(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>())
 {
     return shufflevector<N, internal::shuffle_index_blend<N, Indices...>, 1>(x, y);
 }
@@ -381,13 +381,13 @@ KFR_INLINE vec<T, N> swap(vec<T, N> x)
 KFR_FN(swap)
 
 template <size_t shift, typename T, size_t N>
-KFR_INLINE vec<T, N> rotatetwo(vec<T, N> lo, vec<T, N> hi)
+KFR_INLINE vec<T, N> rotatetwo(const vec<T, N>& lo, const vec<T, N>& hi)
 {
     return shift == 0 ? lo : (shift == N ? hi : shufflevector<N, internal::shuffle_index<N - shift>>(hi, lo));
 }
 
 template <size_t amount, typename T, size_t N>
-KFR_INLINE vec<T, N> rotateright(vec<T, N> x, csize_t<amount> = csize_t<amount>())
+KFR_INLINE vec<T, N> rotateright(const vec<T, N>& x, csize_t<amount> = csize_t<amount>())
 {
     static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N");
     return shufflevector<N, internal::shuffle_index_wrap<N, N - amount>>(x);
@@ -395,7 +395,7 @@ KFR_INLINE vec<T, N> rotateright(vec<T, N> x, csize_t<amount> = csize_t<amount>(
 KFR_FN(rotateright)
 
 template <size_t amount, typename T, size_t N>
-KFR_INLINE vec<T, N> rotateleft(vec<T, N> x, csize_t<amount> = csize_t<amount>())
+KFR_INLINE vec<T, N> rotateleft(const vec<T, N>& x, csize_t<amount> = csize_t<amount>())
 {
     static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N");
     return shufflevector<N, internal::shuffle_index_wrap<N, amount>>(x);
@@ -403,21 +403,21 @@ KFR_INLINE vec<T, N> rotateleft(vec<T, N> x, csize_t<amount> = csize_t<amount>()
 KFR_FN(rotateleft)
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N> insertright(T x, vec<T, N> y)
+KFR_INLINE vec<T, N> insertright(T x, const vec<T, N>& y)
 {
     return concat_and_slice<1, N>(y, vec<T, 1>(x));
 }
 KFR_FN(insertright)
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N> insertleft(T x, vec<T, N> y)
+KFR_INLINE vec<T, N> insertleft(T x, const vec<T, N>& y)
 {
     return concat_and_slice<0, N>(vec<T, 1>(x), y);
 }
 KFR_FN(insertleft)
 
 template <typename T, size_t N, size_t N2>
-KFR_INLINE vec<T, N> outputright(vec<T, N> x, vec<T, N2> y)
+KFR_INLINE vec<T, N> outputright(const vec<T, N>& x, const vec<T, N2>& y)
 {
     return shufflevector<N, internal::shuffle_index_outputright<N2, N>>(x, extend<N>(y));
 }
@@ -437,46 +437,46 @@ struct shuffle_index_transpose
 }
 
 template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)>
-KFR_INLINE vec<T, N> transpose(vec<T, N> x)
+KFR_INLINE vec<T, N> transpose(const vec<T, N>& x)
 {
     return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, side>, groupsize>(x);
 }
 template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)>
-KFR_INLINE vec<T, N> transpose(vec<T, N> x)
+KFR_INLINE vec<T, N> transpose(const vec<T, N>& x)
 {
     return x;
 }
 KFR_FN(transpose)
 
 template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)>
-KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x)
+KFR_INLINE vec<T, N> transposeinverse(const vec<T, N>& x)
 {
     return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / side>,
                          groupsize>(x);
 }
 template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)>
-KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x)
+KFR_INLINE vec<T, N> transposeinverse(const vec<T, N>& x)
 {
     return x;
 }
 KFR_FN(transposeinverse)
 
 template <size_t side, typename T, size_t N>
-KFR_INLINE vec<T, N> ctranspose(vec<T, N> x)
+KFR_INLINE vec<T, N> ctranspose(const vec<T, N>& x)
 {
     return transpose<side, 2>(x);
 }
 KFR_FN(ctranspose)
 
 template <size_t side, typename T, size_t N>
-KFR_INLINE vec<T, N> ctransposeinverse(vec<T, N> x)
+KFR_INLINE vec<T, N> ctransposeinverse(const vec<T, N>& x)
 {
     return transposeinverse<side, 2>(x);
 }
 KFR_FN(ctransposeinverse)
 
 template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N * 2>
-KFR_INLINE vec<T, Nout> interleave(vec<T, N> x, vec<T, N> y)
+KFR_INLINE vec<T, Nout> interleave(const vec<T, N>& x, const vec<T, N>& y)
 {
     return shufflevector<Nout, internal::shuffle_index_transpose<Nout / groupsize, Nout / groupsize / 2>,
                          groupsize>(x, y);
@@ -490,7 +490,7 @@ KFR_INLINE expr_func<fn_interleave, E1, E2> interleave(E1&& x, E2&& y)
 }
 
 template <size_t groupsize = 1, typename T, size_t N>
-KFR_INLINE vec<T, N> interleavehalfs(vec<T, N> x)
+KFR_INLINE vec<T, N> interleavehalfs(const vec<T, N>& x)
 {
     return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / 2>, groupsize>(
         x);
@@ -498,7 +498,7 @@ KFR_INLINE vec<T, N> interleavehalfs(vec<T, N> x)
 KFR_FN(interleavehalfs)
 
 template <size_t groupsize = 1, typename T, size_t N>
-KFR_INLINE vec<T, N> splitpairs(vec<T, N> x)
+KFR_INLINE vec<T, N> splitpairs(const vec<T, N>& x)
 {
     return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, 2>, groupsize>(x);
 }
@@ -514,7 +514,7 @@ struct shuffle_index_reverse
 }
 
 template <size_t groupsize = 1, typename T, size_t N>
-KFR_INLINE vec<T, N> reverse(vec<T, N> x)
+KFR_INLINE vec<T, N> reverse(const vec<T, N>& x)
 {
     return shufflevector<N, internal::shuffle_index_reverse<N / groupsize>, groupsize>(x);
 }
@@ -530,7 +530,7 @@ struct shuffle_index_combine
 }
 
 template <typename T, size_t N1, size_t N2>
-KFR_INLINE vec<T, N1> combine(vec<T, N1> x, vec<T, N2> y)
+KFR_INLINE vec<T, N1> combine(const vec<T, N1>& x, const vec<T, N2>& y)
 {
     static_assert(N2 <= N1, "N2 <= N1");
     return shufflevector<N1, internal::shuffle_index_combine<N1, N2>>(x, extend<N1>(y));
diff --git a/include/kfr/base/specializations.i b/include/kfr/base/specializations.i
@@ -30,7 +30,7 @@ template <>
 inline vec<f32, 32> shufflevector<f32, 32>(
     csizes_t<0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14,
              15, 22, 23, 30, 31>,
-    vec<f32, 32> x, vec<f32, 32>)
+    const vec<f32, 32>& x, const vec<f32, 32>&)
 {
     f32x32 w = x;
 
@@ -45,7 +45,7 @@ template <>
 inline vec<f32, 32> shufflevector<f32, 32>(
     csizes_t<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22,
              23, 14, 15, 30, 31>,
-    vec<f32, 32> x, vec<f32, 32>)
+    const vec<f32, 32>& x, const vec<f32, 32>&)
 {
     f32x32 w = x;
 
@@ -56,7 +56,7 @@ inline vec<f32, 32> shufflevector<f32, 32>(
     return w;
 }
 
-inline vec<f32, 32> bitreverse_2(vec<f32, 32> x)
+inline vec<f32, 32> bitreverse_2(const vec<f32, 32>& x)
 {
     return shufflevector<f32, 32>(csizes<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18,
                                          19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>,
@@ -68,46 +68,39 @@ inline vec<f32, 64> shufflevector<f32, 64>(
     csizes_t<0, 1, 32, 33, 16, 17, 48, 49, 8, 9, 40, 41, 24, 25, 56, 57, 4, 5, 36, 37, 20, 21, 52, 53, 12, 13,
              44, 45, 28, 29, 60, 61, 2, 3, 34, 35, 18, 19, 50, 51, 10, 11, 42, 43, 26, 27, 58, 59, 6, 7, 38,
              39, 22, 23, 54, 55, 14, 15, 46, 47, 30, 31, 62, 63>,
-    vec<f32, 64> x, vec<f32, 64>)
+    const vec<f32, 64>& x, const vec<f32, 64>&)
 {
-    x = concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x)));
-    return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(x);
+    return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x))));
 }
 
 template <>
 inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15>,
-                                           vec<f32, 16> x, vec<f32, 16>)
+                                           const vec<f32, 16>& x, const vec<f32, 16>&)
 {
 //    asm volatile("int $3");
-    x = permutegroups<(4), 0, 2, 1, 3>(x);
+    const vec<f32, 16> xx = permutegroups<(4), 0, 2, 1, 3>(x);
 
-    x = concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(x), high(x)), shuffle<1, 3, 8 + 1, 8 + 3>(low(x), high(x)));
-
-    return x;
+    return concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(xx), high(xx)), shuffle<1, 3, 8 + 1, 8 + 3>(low(xx), high(xx)));
 }
 
 template <>
 inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15>,
-                                           vec<f32, 16> x, vec<f32, 16>)
+                                           const vec<f32, 16>& x, const vec<f32, 16>&)
 {
-    x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
-
-    x = permutegroups<(4), 0, 2, 1, 3>(x);
+    const vec<f32, 16> xx = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x)));
 
-    return x;
+    return permutegroups<(4), 0, 2, 1, 3>(xx);
 }
 
 template <>
 inline vec<f32, 32> shufflevector<f32, 32>(
     csizes_t<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13,
              29, 14, 30, 15, 31>,
-    vec<f32, 32> x, vec<f32, 32>)
+    const vec<f32, 32>& x, const vec<f32, 32>&)
 {
-    x = permutegroups<(8), 0, 2, 1, 3>(x);
-
-    x = concat(interleavehalfs(low(x)), interleavehalfs(high(x)));
+    const vec<f32, 32> xx = permutegroups<(8), 0, 2, 1, 3>(x);
 
-    return x;
+    return concat(interleavehalfs(low(xx)), interleavehalfs(high(xx)));
 }
 }
 }
diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp
@@ -99,7 +99,7 @@ struct vec_ptr
 template <typename To, typename From, size_t N,
           KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value),
           size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width>
-constexpr KFR_INLINE vec<To, Nout> subcast(vec<From, N> value) noexcept
+constexpr KFR_INLINE vec<To, Nout> subcast(const vec<From, N>& value) noexcept
 {
     return *value;
 }
@@ -127,7 +127,7 @@ get_vec_index(int = 0)
 constexpr size_t index_undefined = static_cast<size_t>(-1);
 
 template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)>
-KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, vec<T, N> x, vec<T, N> y)
+KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, const vec<T, N>& x, const vec<T, N>& y)
 {
     vec<T, sizeof...(Indices)> result = __builtin_shufflevector(
         *x, *y, static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...);
@@ -150,21 +150,21 @@ constexpr auto inflate(csize_t<groupsize>, csizes_t<indices...>)
 }
 
 template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>::value)>
-KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, vec<T, N> x, vec<T, N> y)
+KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x, const vec<T, N>& y)
 {
     return subcast<T>(
         shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y)));
 }
 
 template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N>
-KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, vec<T, N> x)
+KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, const vec<T, N>& x)
 {
     return internal::shufflevector<T, N>(csizes<Indices...>, x, x);
 }
 
 template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices,
           size_t Nout = sizeof...(Indices)>
-KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y, cvals_t<size_t, Indices...>)
+KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y, cvals_t<size_t, Indices...>)
 {
     static_assert(N % groupsize == 0, "N % groupsize == 0");
     return internal::shufflevector<T, N>(
@@ -173,13 +173,13 @@ KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y, cvals_t<size_t, 
 }
 
 template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N>
-KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y)
+KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y)
 {
     return internal::shufflevector<Fn, groupsize>(x, y, csizeseq<Nout>);
 }
 
 template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N>
-KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x)
+KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x)
 {
     return internal::shufflevector<Fn, groupsize>(x, x, csizeseq<Nout>);
 }
@@ -257,61 +257,61 @@ constexpr KFR_INLINE To fbitcast(From value) noexcept
 }
 
 template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)>
-constexpr KFR_INLINE vec<To, N> cast(vec<From, N> value) noexcept
+constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept
 {
     return __builtin_convertvector(*value, simd<To, N>);
 }
 template <typename To, typename From, simdindex N>
-constexpr KFR_INLINE simd<To, N> cast(simd<From, N> value) noexcept
+constexpr KFR_INLINE simd<To, N> cast(const simd<From, N>& value) noexcept
 {
     return __builtin_convertvector(value, simd<To, N>);
 }
 template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE vec<To, Nout> bitcast(vec<From, N> value) noexcept
+constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(*value);
 }
 template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> bitcast(simd<From, N> value) noexcept
+constexpr KFR_INLINE simd<To, Nout> bitcast(const simd<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(value);
 }
 
 template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE vec<To, Nout> ubitcast(vec<From, N> value) noexcept
+constexpr KFR_INLINE vec<To, Nout> ubitcast(const vec<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(*value);
 }
 
 template <typename From, size_t N, typename To = itype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE vec<To, Nout> ibitcast(vec<From, N> value) noexcept
+constexpr KFR_INLINE vec<To, Nout> ibitcast(const vec<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(*value);
 }
 
 template <typename From, size_t N, typename To = ftype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE vec<To, Nout> fbitcast(vec<From, N> value) noexcept
+constexpr KFR_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(*value);
 }
 
 template <typename From, simdindex N, typename To = utype<From>,
           simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> ubitcast(simd<From, N> value) noexcept
+constexpr KFR_INLINE simd<To, Nout> ubitcast(const simd<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(value);
 }
 
 template <typename From, simdindex N, typename To = itype<From>,
           simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> ibitcast(simd<From, N> value) noexcept
+constexpr KFR_INLINE simd<To, Nout> ibitcast(const simd<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(value);
 }
 
 template <typename From, simdindex N, typename To = ftype<From>,
           simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> fbitcast(simd<From, N> value) noexcept
+constexpr KFR_INLINE simd<To, Nout> fbitcast(const simd<From, N>& value) noexcept
 {
     return reinterpret_cast<simd<To, Nout>>(value);
 }
@@ -319,7 +319,7 @@ constexpr KFR_INLINE simd<To, Nout> fbitcast(simd<From, N> value) noexcept
 constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
 
 template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)>
-KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest);
+KFR_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest);
 
 namespace internal
 {
@@ -337,7 +337,7 @@ struct shuffle_index_wrap
 }
 
 template <size_t count, typename T, size_t N, size_t Nout = N* count>
-KFR_INLINE vec<T, Nout> repeat(vec<T, N> x)
+KFR_INLINE vec<T, Nout> repeat(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x);
 }
@@ -355,12 +355,12 @@ constexpr KFR_INLINE vec<T, N> broadcast(T x)
 #pragma clang diagnostic pop
 
 template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)>
-KFR_INLINE vec<T, Nout> resize(vec<T, N> x)
+KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x);
 }
 template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout == N)>
-constexpr KFR_INLINE vec<T, Nout> resize(vec<T, N> x)
+constexpr KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x)
 {
     return x;
 }
@@ -385,13 +385,13 @@ KFR_INLINE vec<T, N> read(const T* src)
 }
 
 template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))>
-KFR_INLINE void write(T* dest, vec<T, N> value)
+KFR_INLINE void write(T* dest, const vec<T, N>& value)
 {
-    ptr_cast<vec_algn<subtype<T>, value.scalar_size(), A>>(dest)->value = *value;
+    ptr_cast<vec_algn<subtype<T>, vec<T, N>::scalar_size(), A>>(dest)->value = *value;
 }
 
 template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))>
-KFR_INLINE void write(T* dest, vec<T, N> value)
+KFR_INLINE void write(T* dest, const vec<T, N>& value)
 {
     constexpr size_t first = prev_poweroftwo(N);
     constexpr size_t rest  = N - first;
@@ -540,7 +540,7 @@ constexpr KFR_INLINE vec<SubType, N> make_vector(const Arg& x, const Args&... re
                                                static_cast<SubType>(rest)...);
 }
 template <typename T, size_t N>
-constexpr KFR_INLINE vec<T, N> make_vector(vec<T, N> x)
+constexpr KFR_INLINE vec<T, N> make_vector(const vec<T, N>& x)
 {
     return x;
 }
@@ -608,30 +608,60 @@ struct vec : vec_t<T, N>
     constexpr KFR_INLINE vec& operator=(const vec&) noexcept = default;
     constexpr KFR_INLINE vec& operator=(vec&&) noexcept = default;
 
-    friend constexpr KFR_INLINE vec operator+(vec x, vec y) { return vec_op<T>::add(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator-(vec x, vec y) { return vec_op<T>::sub(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator*(vec x, vec y) { return vec_op<T>::mul(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator/(vec x, vec y) { return vec_op<T>::div(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator%(vec x, vec y) { return vec_op<T>::rem(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator-(vec x) { return vec_op<T>::neg(x.v); }
+    friend constexpr KFR_INLINE vec operator+(const vec& x, const vec& y) { return vec_op<T>::add(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator-(const vec& x, const vec& y) { return vec_op<T>::sub(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator*(const vec& x, const vec& y) { return vec_op<T>::mul(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator/(const vec& x, const vec& y) { return vec_op<T>::div(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator%(const vec& x, const vec& y) { return vec_op<T>::rem(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator-(const vec& x) { return vec_op<T>::neg(x.v); }
 
-    friend constexpr KFR_INLINE vec operator&(vec x, vec y) { return vec_op<T>::band(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator|(vec x, vec y) { return vec_op<T>::bor(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator^(vec x, vec y) { return vec_op<T>::bxor(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator~(vec x) { return vec_op<T>::bnot(x.v); }
+    friend constexpr KFR_INLINE vec operator&(const vec& x, const vec& y)
+    {
+        return vec_op<T>::band(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE vec operator|(const vec& x, const vec& y) { return vec_op<T>::bor(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator^(const vec& x, const vec& y)
+    {
+        return vec_op<T>::bxor(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE vec operator~(const vec& x) { return vec_op<T>::bnot(x.v); }
 
-    friend constexpr KFR_INLINE vec operator<<(vec x, vec y) { return vec_op<T>::shl(x.v, y.v); }
-    friend constexpr KFR_INLINE vec operator>>(vec x, vec y) { return vec_op<T>::shr(x.v, y.v); }
+    friend constexpr KFR_INLINE vec operator<<(const vec& x, const vec& y)
+    {
+        return vec_op<T>::shl(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE vec operator>>(const vec& x, const vec& y)
+    {
+        return vec_op<T>::shr(x.v, y.v);
+    }
 
-    friend constexpr KFR_INLINE mask<T, N> operator==(vec x, vec y) { return vec_op<T>::eq(x.v, y.v); }
-    friend constexpr KFR_INLINE mask<T, N> operator!=(vec x, vec y) { return vec_op<T>::ne(x.v, y.v); }
-    friend constexpr KFR_INLINE mask<T, N> operator<(vec x, vec y) { return vec_op<T>::lt(x.v, y.v); }
-    friend constexpr KFR_INLINE mask<T, N> operator>(vec x, vec y) { return vec_op<T>::gt(x.v, y.v); }
-    friend constexpr KFR_INLINE mask<T, N> operator<=(vec x, vec y) { return vec_op<T>::le(x.v, y.v); }
-    friend constexpr KFR_INLINE mask<T, N> operator>=(vec x, vec y) { return vec_op<T>::ge(x.v, y.v); }
+    friend constexpr KFR_INLINE mask<T, N> operator==(const vec& x, const vec& y)
+    {
+        return vec_op<T>::eq(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE mask<T, N> operator!=(const vec& x, const vec& y)
+    {
+        return vec_op<T>::ne(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE mask<T, N> operator<(const vec& x, const vec& y)
+    {
+        return vec_op<T>::lt(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE mask<T, N> operator>(const vec& x, const vec& y)
+    {
+        return vec_op<T>::gt(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE mask<T, N> operator<=(const vec& x, const vec& y)
+    {
+        return vec_op<T>::le(x.v, y.v);
+    }
+    friend constexpr KFR_INLINE mask<T, N> operator>=(const vec& x, const vec& y)
+    {
+        return vec_op<T>::ge(x.v, y.v);
+    }
 
 #define KFR_ASGN_OP(aop, op)                                                                                 \
-    friend KFR_INLINE vec& operator aop(vec& x, vec y)                                                       \
+    friend KFR_INLINE vec& operator aop(vec& x, const vec& y)                                                \
     {                                                                                                        \
         x = x op y;                                                                                          \
         return x;                                                                                            \
@@ -721,8 +751,8 @@ struct mask : public vec<T, N>
     KFR_INLINE mask& operator=(const mask&) noexcept = default;
     KFR_INLINE mask& operator=(mask&&) noexcept = default;
 
-    template <typename M, typename = u8[sizeof(T) == sizeof(M)]>
-    constexpr KFR_INLINE mask(vec<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value))
+    template <typename M, KFR_ENABLE_IF(sizeof(T) == sizeof(M))>
+    constexpr KFR_INLINE mask(const vec<M, N>& value) : base(bitcast<T>(value))
     {
     }
 
@@ -731,21 +761,21 @@ struct mask : public vec<T, N>
     //    {
     //    }
     constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); }
-    constexpr KFR_INLINE mask operator&(vec<T, N> x) const
+    constexpr KFR_INLINE mask operator&(const vec<T, N>& x) const
     {
         return bitcast<T>(ubitcast(this->v) & ubitcast(x.v));
     }
-    constexpr KFR_INLINE mask operator|(vec<T, N> x) const
+    constexpr KFR_INLINE mask operator|(const vec<T, N>& x) const
     {
         return bitcast<T>(ubitcast(this->v) | ubitcast(x.v));
     }
-    constexpr KFR_INLINE mask operator^(vec<T, N> x) const
+    constexpr KFR_INLINE mask operator^(const vec<T, N>& x) const
     {
         return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v));
     }
 
-    constexpr KFR_INLINE mask operator&&(mask x) const { return *this & x; }
-    constexpr KFR_INLINE mask operator||(mask x) const { return *this | x; }
+    constexpr KFR_INLINE mask operator&&(const mask& x) const { return *this & x; }
+    constexpr KFR_INLINE mask operator||(const mask& x) const { return *this | x; }
     constexpr KFR_INLINE mask operator!() const { return ~*this; }
 
     constexpr KFR_INLINE simd<T, N> operator*() const { return this->v; }
@@ -778,24 +808,24 @@ struct shuffle_index_extend
 };
 
 template <size_t start, size_t count, typename T, size_t N>
-KFR_INLINE vec<T, count> concatexact(vec<T, N> x, vec<T, N> y)
+KFR_INLINE vec<T, count> concatexact(const vec<T, N>& x, const vec<T, N>& y)
 {
     return kfr::shufflevector<count, internal::shuffle_index<start>>(x, y);
 }
 
 template <size_t start, size_t count, typename T, size_t N1, size_t N2>
-KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y)
+KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y)
 {
     return concatexact<start, count>(x, y);
 }
 
 template <size_t start, size_t count, typename T, size_t N1, size_t N2>
-KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y)
+KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y)
 {
     return concatexact<start, count>(x, shufflevector<N1, internal::shuffle_index_extend<0, N2>>(y));
 }
 template <size_t start, size_t count, typename T, size_t N1, size_t N2>
-KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y)
+KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y)
 {
     return concatexact<N2 - N1 + start, count>(
         shufflevector<N2, internal::shuffle_index_extend<N2 - N1, N1>>(x), y);
@@ -813,26 +843,26 @@ constexpr mask<T, Nout> partial_mask()
 }
 
 template <typename T, size_t N>
-KFR_INLINE vec<T, N> concat(vec<T, N> x)
+KFR_INLINE vec<T, N> concat(const vec<T, N>& x)
 {
     return x;
 }
 
 template <typename T, size_t N1, size_t N2>
-KFR_INLINE vec<T, N1 + N2> concat(vec<T, N1> x, vec<T, N2> y)
+KFR_INLINE vec<T, N1 + N2> concat(const vec<T, N1>& x, const vec<T, N2>& y)
 {
     return concattwo<0, N1 + N2>(x, y);
 }
 
 template <typename T, size_t N1, size_t N2, size_t... Sizes>
-KFR_INLINE auto concat(vec<T, N1> x, vec<T, N2> y, vec<T, Sizes>... args)
+KFR_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Sizes>&... args)
 {
     return concat(x, concat(y, args...));
 }
 }
 
 template <typename T, size_t N, size_t... Sizes, size_t Nout>
-KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest)
+KFR_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest)
 {
     return internal::concat(x, rest...);
 }
@@ -1105,7 +1135,7 @@ constexpr KFR_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>)
 
 template <typename T, size_t N, typename Fn, typename... Args,
           typename Tout = result_of<Fn(T, subtype<decay<Args>>...)>>
-constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, vec<T, N> arg, Args&&... args)
+constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args)
 {
     return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...);
 }
@@ -1174,7 +1204,7 @@ constexpr KFR_INLINE vec<T, N> undefinedvector(vec_t<T, N>)
 KFR_FN(undefinedvector)
 
 template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)>
-KFR_INLINE vec<T, Nout> low(vec<T, N> x)
+KFR_INLINE vec<T, Nout> low(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index<>>(x);
 }
@@ -1186,7 +1216,7 @@ KFR_INLINE vec_t<T, Nout> low(vec_t<T, N>)
 }
 
 template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)>
-KFR_INLINE vec<T, Nout> high(vec<T, N> x)
+KFR_INLINE vec<T, Nout> high(const vec<T, N>& x)
 {
     return shufflevector<Nout, internal::shuffle_index<prev_poweroftwo(N - 1)>>(x);
 }

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	include/kfr/base/shuffle.hpp	\|	84	++++++++++++++++++++++++++++++++++++++++----------------------------------------
M	include/kfr/base/specializations.i	\|	35	++++++++++++++---------------------
M	include/kfr/base/vec.hpp	\|	154	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------