kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 75ba81ba8421d474003470cd1254654d8cd8c14d
parent 116d49965b5bb4fe38b853559c5976c0f1e7dd2f
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Sat, 30 Jul 2016 20:54:49 +0300

Pass vec by const reference

Diffstat:
Minclude/kfr/base/shuffle.hpp | 84++++++++++++++++++++++++++++++++++++++++----------------------------------------
Minclude/kfr/base/specializations.i | 35++++++++++++++---------------------
Minclude/kfr/base/vec.hpp | 154+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
3 files changed, 148 insertions(+), 125 deletions(-)

diff --git a/include/kfr/base/shuffle.hpp b/include/kfr/base/shuffle.hpp @@ -63,39 +63,39 @@ constexpr KFR_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest) KFR_FN(broadcast) template <size_t Ncount, typename T, size_t N> -KFR_INLINE vec<T, N + Ncount> padhigh(vec<T, N> x) +KFR_INLINE vec<T, N + Ncount> padhigh(const vec<T, N>& x) { return shufflevector<N + Ncount, internal::shuffle_index_extend<0, N>>(x); } KFR_FN(padhigh) template <size_t Ncount, typename T, size_t N> -KFR_INLINE vec<T, N + Ncount> padlow(vec<T, N> x) +KFR_INLINE vec<T, N + Ncount> padlow(const vec<T, N>& x) { return shufflevector<N + Ncount, internal::shuffle_index_extend<Ncount, N>>(x); } KFR_FN(padlow) template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout)> -KFR_INLINE vec<T, Nout> extend(vec<T, N> x) +KFR_INLINE vec<T, Nout> extend(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_extend<0, N>>(x); } template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout)> -constexpr KFR_INLINE vec<T, Nout> extend(vec<T, N> x) +constexpr KFR_INLINE vec<T, Nout> extend(const vec<T, N>& x) { return x; } KFR_FN(extend) template <size_t start, size_t count, typename T, size_t N> -KFR_INLINE vec<T, count> slice(vec<T, N> x) +KFR_INLINE vec<T, count> slice(const vec<T, N>& x) { static_assert(start + count <= N, "start + count <= N"); return shufflevector<count, internal::shuffle_index<start>>(x); } template <size_t start, size_t count, typename T, size_t N> -KFR_INLINE vec<T, count> slice(vec<T, N> x, vec<T, N> y) +KFR_INLINE vec<T, count> slice(const vec<T, N>& x, const vec<T, N>& y) { static_assert(start + count <= N * 2, "start + count <= N * 2"); return shufflevector<count, internal::shuffle_index<start>>(x, y); @@ -103,11 +103,11 @@ KFR_INLINE vec<T, count> slice(vec<T, N> x, vec<T, N> y) KFR_FN(slice) template <size_t, typename T, size_t N> -KFR_INLINE void split(vec<T, N>) +KFR_INLINE void split(const vec<T, N>&) { } template <size_t start = 0, typename T, size_t N, size_t Nout, typename... Args> -KFR_INLINE void split(vec<T, N> x, vec<T, Nout>& out, Args&&... args) +KFR_INLINE void split(const vec<T, N>& x, vec<T, Nout>& out, Args&&... args) { out = slice<start, Nout>(x); split<start + Nout>(x, std::forward<Args>(args)...); @@ -115,7 +115,7 @@ KFR_INLINE void split(vec<T, N> x, vec<T, Nout>& out, Args&&... args) KFR_FN(split) template <size_t total, size_t number, typename T, size_t N, size_t Nout = N / total> -KFR_INLINE vec<T, Nout> part(vec<T, N> x) +KFR_INLINE vec<T, Nout> part(const vec<T, N>& x) { static_assert(N % total == 0, "N % total == 0"); return shufflevector<Nout, internal::shuffle_index<number * Nout>>(x); @@ -123,27 +123,27 @@ KFR_INLINE vec<T, Nout> part(vec<T, N> x) KFR_FN(part) template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, count> concat_and_slice(vec<T, N1> x, vec<T, N2> y) +KFR_INLINE vec<T, count> concat_and_slice(const vec<T, N1>& x, const vec<T, N2>& y) { return internal::concattwo<start, count>(x, y); } KFR_FN(concat_and_slice) template <size_t Nout, typename T, size_t N> -KFR_INLINE vec<T, Nout> widen(vec<T, N> x, identity<T> newvalue = T()) +KFR_INLINE vec<T, Nout> widen(const vec<T, N>& x, identity<T> newvalue = T()) { static_assert(Nout > N, "Nout > N"); return concat(x, broadcast<Nout - N>(newvalue)); } template <size_t Nout, typename T, typename TS> -constexpr KFR_INLINE vec<T, Nout> widen(vec<T, Nout> x, TS) +constexpr KFR_INLINE vec<T, Nout> widen(const vec<T, Nout>& x, TS) { return x; } KFR_FN(widen) template <size_t Nout, typename T, size_t N> -KFR_INLINE vec<T, Nout> narrow(vec<T, N> x) +KFR_INLINE vec<T, Nout> narrow(const vec<T, N>& x) { static_assert(Nout <= N, "Nout <= N"); return slice<0, Nout>(x); @@ -152,7 +152,7 @@ KFR_FN(narrow) template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2, KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)> -KFR_INLINE vec<T, Nout> even(vec<T, N> x) +KFR_INLINE vec<T, Nout> even(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<0, 2>, groupsize>(x); } @@ -160,7 +160,7 @@ KFR_FNR(even, 2, 1) template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2, KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)> -KFR_INLINE vec<T, Nout> odd(vec<T, N> x) +KFR_INLINE vec<T, Nout> odd(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<1, 2>, groupsize>(x); } @@ -182,7 +182,7 @@ struct shuffle_index_dup } template <typename T, size_t N> -KFR_INLINE vec<T, N> dupeven(vec<T, N> x) +KFR_INLINE vec<T, N> dupeven(const vec<T, N>& x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_dup<2, 0>>(x); @@ -190,7 +190,7 @@ KFR_INLINE vec<T, N> dupeven(vec<T, N> x) KFR_FN(dupeven) template <typename T, size_t N> -KFR_INLINE vec<T, N> dupodd(vec<T, N> x) +KFR_INLINE vec<T, N> dupodd(const vec<T, N>& x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_dup<2, 1>>(x); @@ -198,7 +198,7 @@ KFR_INLINE vec<T, N> dupodd(vec<T, N> x) KFR_FN(dupodd) template <typename T, size_t N> -KFR_INLINE vec<T, N * 2> duphalfs(vec<T, N> x) +KFR_INLINE vec<T, N * 2> duphalfs(const vec<T, N>& x) { return concat(x, x); } @@ -221,14 +221,14 @@ struct shuffle_index_shuffle } template <size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> shuffle(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>()) +KFR_INLINE vec<T, N> shuffle(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>>(x, y); } KFR_FN(shuffle) template <size_t groupsize, size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> shufflegroups(vec<T, N> x, vec<T, N> y, +KFR_INLINE vec<T, N> shufflegroups(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>, groupsize>(x, y); @@ -253,14 +253,14 @@ struct shuffle_index_permute } template <size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> permute(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>()) +KFR_INLINE vec<T, N> permute(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_permute<N, Indices...>>(x); } KFR_FN(permute) template <size_t groupsize, size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> permutegroups(vec<T, N> x, elements_t<Indices...> = elements_t<Indices...>()) +KFR_INLINE vec<T, N> permutegroups(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_permute<N, Indices...>, groupsize>(x); } @@ -299,7 +299,7 @@ constexpr KFR_INLINE mask<T, N> oddmask() } template <typename T, size_t N, size_t Nout = N * 2> -KFR_INLINE vec<T, Nout> dup(vec<T, N> x) +KFR_INLINE vec<T, Nout> dup(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_dup1<2>>(x, x); } @@ -315,7 +315,7 @@ struct shuffle_index_duphalf } template <typename T, size_t N> -KFR_INLINE vec<T, N> duplow(vec<T, N> x) +KFR_INLINE vec<T, N> duplow(const vec<T, N>& x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_duphalf<N / 2, 0>>(x); @@ -346,7 +346,7 @@ struct shuffle_index_blend } template <size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> blend(vec<T, N> x, vec<T, N> y, elements_t<Indices...> = elements_t<Indices...>()) +KFR_INLINE vec<T, N> blend(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_blend<N, Indices...>, 1>(x, y); } @@ -381,13 +381,13 @@ KFR_INLINE vec<T, N> swap(vec<T, N> x) KFR_FN(swap) template <size_t shift, typename T, size_t N> -KFR_INLINE vec<T, N> rotatetwo(vec<T, N> lo, vec<T, N> hi) +KFR_INLINE vec<T, N> rotatetwo(const vec<T, N>& lo, const vec<T, N>& hi) { return shift == 0 ? lo : (shift == N ? hi : shufflevector<N, internal::shuffle_index<N - shift>>(hi, lo)); } template <size_t amount, typename T, size_t N> -KFR_INLINE vec<T, N> rotateright(vec<T, N> x, csize_t<amount> = csize_t<amount>()) +KFR_INLINE vec<T, N> rotateright(const vec<T, N>& x, csize_t<amount> = csize_t<amount>()) { static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N"); return shufflevector<N, internal::shuffle_index_wrap<N, N - amount>>(x); @@ -395,7 +395,7 @@ KFR_INLINE vec<T, N> rotateright(vec<T, N> x, csize_t<amount> = csize_t<amount>( KFR_FN(rotateright) template <size_t amount, typename T, size_t N> -KFR_INLINE vec<T, N> rotateleft(vec<T, N> x, csize_t<amount> = csize_t<amount>()) +KFR_INLINE vec<T, N> rotateleft(const vec<T, N>& x, csize_t<amount> = csize_t<amount>()) { static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N"); return shufflevector<N, internal::shuffle_index_wrap<N, amount>>(x); @@ -403,21 +403,21 @@ KFR_INLINE vec<T, N> rotateleft(vec<T, N> x, csize_t<amount> = csize_t<amount>() KFR_FN(rotateleft) template <typename T, size_t N> -KFR_INLINE vec<T, N> insertright(T x, vec<T, N> y) +KFR_INLINE vec<T, N> insertright(T x, const vec<T, N>& y) { return concat_and_slice<1, N>(y, vec<T, 1>(x)); } KFR_FN(insertright) template <typename T, size_t N> -KFR_INLINE vec<T, N> insertleft(T x, vec<T, N> y) +KFR_INLINE vec<T, N> insertleft(T x, const vec<T, N>& y) { return concat_and_slice<0, N>(vec<T, 1>(x), y); } KFR_FN(insertleft) template <typename T, size_t N, size_t N2> -KFR_INLINE vec<T, N> outputright(vec<T, N> x, vec<T, N2> y) +KFR_INLINE vec<T, N> outputright(const vec<T, N>& x, const vec<T, N2>& y) { return shufflevector<N, internal::shuffle_index_outputright<N2, N>>(x, extend<N>(y)); } @@ -437,46 +437,46 @@ struct shuffle_index_transpose } template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)> -KFR_INLINE vec<T, N> transpose(vec<T, N> x) +KFR_INLINE vec<T, N> transpose(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, side>, groupsize>(x); } template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)> -KFR_INLINE vec<T, N> transpose(vec<T, N> x) +KFR_INLINE vec<T, N> transpose(const vec<T, N>& x) { return x; } KFR_FN(transpose) template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)> -KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x) +KFR_INLINE vec<T, N> transposeinverse(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / side>, groupsize>(x); } template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)> -KFR_INLINE vec<T, N> transposeinverse(vec<T, N> x) +KFR_INLINE vec<T, N> transposeinverse(const vec<T, N>& x) { return x; } KFR_FN(transposeinverse) template <size_t side, typename T, size_t N> -KFR_INLINE vec<T, N> ctranspose(vec<T, N> x) +KFR_INLINE vec<T, N> ctranspose(const vec<T, N>& x) { return transpose<side, 2>(x); } KFR_FN(ctranspose) template <size_t side, typename T, size_t N> -KFR_INLINE vec<T, N> ctransposeinverse(vec<T, N> x) +KFR_INLINE vec<T, N> ctransposeinverse(const vec<T, N>& x) { return transposeinverse<side, 2>(x); } KFR_FN(ctransposeinverse) template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N * 2> -KFR_INLINE vec<T, Nout> interleave(vec<T, N> x, vec<T, N> y) +KFR_INLINE vec<T, Nout> interleave(const vec<T, N>& x, const vec<T, N>& y) { return shufflevector<Nout, internal::shuffle_index_transpose<Nout / groupsize, Nout / groupsize / 2>, groupsize>(x, y); @@ -490,7 +490,7 @@ KFR_INLINE expr_func<fn_interleave, E1, E2> interleave(E1&& x, E2&& y) } template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> interleavehalfs(vec<T, N> x) +KFR_INLINE vec<T, N> interleavehalfs(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / 2>, groupsize>( x); @@ -498,7 +498,7 @@ KFR_INLINE vec<T, N> interleavehalfs(vec<T, N> x) KFR_FN(interleavehalfs) template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> splitpairs(vec<T, N> x) +KFR_INLINE vec<T, N> splitpairs(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, 2>, groupsize>(x); } @@ -514,7 +514,7 @@ struct shuffle_index_reverse } template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> reverse(vec<T, N> x) +KFR_INLINE vec<T, N> reverse(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_reverse<N / groupsize>, groupsize>(x); } @@ -530,7 +530,7 @@ struct shuffle_index_combine } template <typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, N1> combine(vec<T, N1> x, vec<T, N2> y) +KFR_INLINE vec<T, N1> combine(const vec<T, N1>& x, const vec<T, N2>& y) { static_assert(N2 <= N1, "N2 <= N1"); return shufflevector<N1, internal::shuffle_index_combine<N1, N2>>(x, extend<N1>(y)); diff --git a/include/kfr/base/specializations.i b/include/kfr/base/specializations.i @@ -30,7 +30,7 @@ template <> inline vec<f32, 32> shufflevector<f32, 32>( csizes_t<0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31>, - vec<f32, 32> x, vec<f32, 32>) + const vec<f32, 32>& x, const vec<f32, 32>&) { f32x32 w = x; @@ -45,7 +45,7 @@ template <> inline vec<f32, 32> shufflevector<f32, 32>( csizes_t<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>, - vec<f32, 32> x, vec<f32, 32>) + const vec<f32, 32>& x, const vec<f32, 32>&) { f32x32 w = x; @@ -56,7 +56,7 @@ inline vec<f32, 32> shufflevector<f32, 32>( return w; } -inline vec<f32, 32> bitreverse_2(vec<f32, 32> x) +inline vec<f32, 32> bitreverse_2(const vec<f32, 32>& x) { return shufflevector<f32, 32>(csizes<0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29, 2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31>, @@ -68,46 +68,39 @@ inline vec<f32, 64> shufflevector<f32, 64>( csizes_t<0, 1, 32, 33, 16, 17, 48, 49, 8, 9, 40, 41, 24, 25, 56, 57, 4, 5, 36, 37, 20, 21, 52, 53, 12, 13, 44, 45, 28, 29, 60, 61, 2, 3, 34, 35, 18, 19, 50, 51, 10, 11, 42, 43, 26, 27, 58, 59, 6, 7, 38, 39, 22, 23, 54, 55, 14, 15, 46, 47, 30, 31, 62, 63>, - vec<f32, 64> x, vec<f32, 64>) + const vec<f32, 64>& x, const vec<f32, 64>&) { - x = concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x))); - return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(x); + return permutegroups<(8), 0, 4, 1, 5, 2, 6, 3, 7>(concat(bitreverse_2(even<8>(x)), bitreverse_2(odd<8>(x)))); } template <> inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15>, - vec<f32, 16> x, vec<f32, 16>) + const vec<f32, 16>& x, const vec<f32, 16>&) { // asm volatile("int $3"); - x = permutegroups<(4), 0, 2, 1, 3>(x); + const vec<f32, 16> xx = permutegroups<(4), 0, 2, 1, 3>(x); - x = concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(x), high(x)), shuffle<1, 3, 8 + 1, 8 + 3>(low(x), high(x))); - - return x; + return concat(shuffle<0, 2, 8 + 0, 8 + 2>(low(xx), high(xx)), shuffle<1, 3, 8 + 1, 8 + 3>(low(xx), high(xx))); } template <> inline vec<f32, 16> shufflevector<f32, 16>(csizes_t<0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15>, - vec<f32, 16> x, vec<f32, 16>) + const vec<f32, 16>& x, const vec<f32, 16>&) { - x = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); - - x = permutegroups<(4), 0, 2, 1, 3>(x); + const vec<f32, 16> xx = concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); - return x; + return permutegroups<(4), 0, 2, 1, 3>(xx); } template <> inline vec<f32, 32> shufflevector<f32, 32>( csizes_t<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>, - vec<f32, 32> x, vec<f32, 32>) + const vec<f32, 32>& x, const vec<f32, 32>&) { - x = permutegroups<(8), 0, 2, 1, 3>(x); - - x = concat(interleavehalfs(low(x)), interleavehalfs(high(x))); + const vec<f32, 32> xx = permutegroups<(8), 0, 2, 1, 3>(x); - return x; + return concat(interleavehalfs(low(xx)), interleavehalfs(high(xx))); } } } diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp @@ -99,7 +99,7 @@ struct vec_ptr template <typename To, typename From, size_t N, KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value), size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width> -constexpr KFR_INLINE vec<To, Nout> subcast(vec<From, N> value) noexcept +constexpr KFR_INLINE vec<To, Nout> subcast(const vec<From, N>& value) noexcept { return *value; } @@ -127,7 +127,7 @@ get_vec_index(int = 0) constexpr size_t index_undefined = static_cast<size_t>(-1); template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)> -KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, vec<T, N> x, vec<T, N> y) +KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, const vec<T, N>& x, const vec<T, N>& y) { vec<T, sizeof...(Indices)> result = __builtin_shufflevector( *x, *y, static_cast<intptr_t>(Indices == index_undefined ? -1 : static_cast<intptr_t>(Indices))...); @@ -150,21 +150,21 @@ constexpr auto inflate(csize_t<groupsize>, csizes_t<indices...>) } template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>::value)> -KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, vec<T, N> x, vec<T, N> y) +KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x, const vec<T, N>& y) { return subcast<T>( shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y))); } template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N> -KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, vec<T, N> x) +KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, const vec<T, N>& x) { return internal::shufflevector<T, N>(csizes<Indices...>, x, x); } template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices, size_t Nout = sizeof...(Indices)> -KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y, cvals_t<size_t, Indices...>) +KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y, cvals_t<size_t, Indices...>) { static_assert(N % groupsize == 0, "N % groupsize == 0"); return internal::shufflevector<T, N>( @@ -173,13 +173,13 @@ KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y, cvals_t<size_t, } template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x, vec<T, N> y) +KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y) { return internal::shufflevector<Fn, groupsize>(x, y, csizeseq<Nout>); } template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, Nout> shufflevector(vec<T, N> x) +KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x) { return internal::shufflevector<Fn, groupsize>(x, x, csizeseq<Nout>); } @@ -257,61 +257,61 @@ constexpr KFR_INLINE To fbitcast(From value) noexcept } template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(vec<From, N> value) noexcept +constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept { return __builtin_convertvector(*value, simd<To, N>); } template <typename To, typename From, simdindex N> -constexpr KFR_INLINE simd<To, N> cast(simd<From, N> value) noexcept +constexpr KFR_INLINE simd<To, N> cast(const simd<From, N>& value) noexcept { return __builtin_convertvector(value, simd<To, N>); } template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> bitcast(vec<From, N> value) noexcept +constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(*value); } template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> bitcast(simd<From, N> value) noexcept +constexpr KFR_INLINE simd<To, Nout> bitcast(const simd<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(value); } template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> ubitcast(vec<From, N> value) noexcept +constexpr KFR_INLINE vec<To, Nout> ubitcast(const vec<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(*value); } template <typename From, size_t N, typename To = itype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> ibitcast(vec<From, N> value) noexcept +constexpr KFR_INLINE vec<To, Nout> ibitcast(const vec<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(*value); } template <typename From, size_t N, typename To = ftype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> fbitcast(vec<From, N> value) noexcept +constexpr KFR_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(*value); } template <typename From, simdindex N, typename To = utype<From>, simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> ubitcast(simd<From, N> value) noexcept +constexpr KFR_INLINE simd<To, Nout> ubitcast(const simd<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(value); } template <typename From, simdindex N, typename To = itype<From>, simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> ibitcast(simd<From, N> value) noexcept +constexpr KFR_INLINE simd<To, Nout> ibitcast(const simd<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(value); } template <typename From, simdindex N, typename To = ftype<From>, simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> fbitcast(simd<From, N> value) noexcept +constexpr KFR_INLINE simd<To, Nout> fbitcast(const simd<From, N>& value) noexcept { return reinterpret_cast<simd<To, Nout>>(value); } @@ -319,7 +319,7 @@ constexpr KFR_INLINE simd<To, Nout> fbitcast(simd<From, N> value) noexcept constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); } template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)> -KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest); +KFR_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest); namespace internal { @@ -337,7 +337,7 @@ struct shuffle_index_wrap } template <size_t count, typename T, size_t N, size_t Nout = N* count> -KFR_INLINE vec<T, Nout> repeat(vec<T, N> x) +KFR_INLINE vec<T, Nout> repeat(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x); } @@ -355,12 +355,12 @@ constexpr KFR_INLINE vec<T, N> broadcast(T x) #pragma clang diagnostic pop template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)> -KFR_INLINE vec<T, Nout> resize(vec<T, N> x) +KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x); } template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout == N)> -constexpr KFR_INLINE vec<T, Nout> resize(vec<T, N> x) +constexpr KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x) { return x; } @@ -385,13 +385,13 @@ KFR_INLINE vec<T, N> read(const T* src) } template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> -KFR_INLINE void write(T* dest, vec<T, N> value) +KFR_INLINE void write(T* dest, const vec<T, N>& value) { - ptr_cast<vec_algn<subtype<T>, value.scalar_size(), A>>(dest)->value = *value; + ptr_cast<vec_algn<subtype<T>, vec<T, N>::scalar_size(), A>>(dest)->value = *value; } template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))> -KFR_INLINE void write(T* dest, vec<T, N> value) +KFR_INLINE void write(T* dest, const vec<T, N>& value) { constexpr size_t first = prev_poweroftwo(N); constexpr size_t rest = N - first; @@ -540,7 +540,7 @@ constexpr KFR_INLINE vec<SubType, N> make_vector(const Arg& x, const Args&... re static_cast<SubType>(rest)...); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> make_vector(vec<T, N> x) +constexpr KFR_INLINE vec<T, N> make_vector(const vec<T, N>& x) { return x; } @@ -608,30 +608,60 @@ struct vec : vec_t<T, N> constexpr KFR_INLINE vec& operator=(const vec&) noexcept = default; constexpr KFR_INLINE vec& operator=(vec&&) noexcept = default; - friend constexpr KFR_INLINE vec operator+(vec x, vec y) { return vec_op<T>::add(x.v, y.v); } - friend constexpr KFR_INLINE vec operator-(vec x, vec y) { return vec_op<T>::sub(x.v, y.v); } - friend constexpr KFR_INLINE vec operator*(vec x, vec y) { return vec_op<T>::mul(x.v, y.v); } - friend constexpr KFR_INLINE vec operator/(vec x, vec y) { return vec_op<T>::div(x.v, y.v); } - friend constexpr KFR_INLINE vec operator%(vec x, vec y) { return vec_op<T>::rem(x.v, y.v); } - friend constexpr KFR_INLINE vec operator-(vec x) { return vec_op<T>::neg(x.v); } + friend constexpr KFR_INLINE vec operator+(const vec& x, const vec& y) { return vec_op<T>::add(x.v, y.v); } + friend constexpr KFR_INLINE vec operator-(const vec& x, const vec& y) { return vec_op<T>::sub(x.v, y.v); } + friend constexpr KFR_INLINE vec operator*(const vec& x, const vec& y) { return vec_op<T>::mul(x.v, y.v); } + friend constexpr KFR_INLINE vec operator/(const vec& x, const vec& y) { return vec_op<T>::div(x.v, y.v); } + friend constexpr KFR_INLINE vec operator%(const vec& x, const vec& y) { return vec_op<T>::rem(x.v, y.v); } + friend constexpr KFR_INLINE vec operator-(const vec& x) { return vec_op<T>::neg(x.v); } - friend constexpr KFR_INLINE vec operator&(vec x, vec y) { return vec_op<T>::band(x.v, y.v); } - friend constexpr KFR_INLINE vec operator|(vec x, vec y) { return vec_op<T>::bor(x.v, y.v); } - friend constexpr KFR_INLINE vec operator^(vec x, vec y) { return vec_op<T>::bxor(x.v, y.v); } - friend constexpr KFR_INLINE vec operator~(vec x) { return vec_op<T>::bnot(x.v); } + friend constexpr KFR_INLINE vec operator&(const vec& x, const vec& y) + { + return vec_op<T>::band(x.v, y.v); + } + friend constexpr KFR_INLINE vec operator|(const vec& x, const vec& y) { return vec_op<T>::bor(x.v, y.v); } + friend constexpr KFR_INLINE vec operator^(const vec& x, const vec& y) + { + return vec_op<T>::bxor(x.v, y.v); + } + friend constexpr KFR_INLINE vec operator~(const vec& x) { return vec_op<T>::bnot(x.v); } - friend constexpr KFR_INLINE vec operator<<(vec x, vec y) { return vec_op<T>::shl(x.v, y.v); } - friend constexpr KFR_INLINE vec operator>>(vec x, vec y) { return vec_op<T>::shr(x.v, y.v); } + friend constexpr KFR_INLINE vec operator<<(const vec& x, const vec& y) + { + return vec_op<T>::shl(x.v, y.v); + } + friend constexpr KFR_INLINE vec operator>>(const vec& x, const vec& y) + { + return vec_op<T>::shr(x.v, y.v); + } - friend constexpr KFR_INLINE mask<T, N> operator==(vec x, vec y) { return vec_op<T>::eq(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator!=(vec x, vec y) { return vec_op<T>::ne(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator<(vec x, vec y) { return vec_op<T>::lt(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator>(vec x, vec y) { return vec_op<T>::gt(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator<=(vec x, vec y) { return vec_op<T>::le(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator>=(vec x, vec y) { return vec_op<T>::ge(x.v, y.v); } + friend constexpr KFR_INLINE mask<T, N> operator==(const vec& x, const vec& y) + { + return vec_op<T>::eq(x.v, y.v); + } + friend constexpr KFR_INLINE mask<T, N> operator!=(const vec& x, const vec& y) + { + return vec_op<T>::ne(x.v, y.v); + } + friend constexpr KFR_INLINE mask<T, N> operator<(const vec& x, const vec& y) + { + return vec_op<T>::lt(x.v, y.v); + } + friend constexpr KFR_INLINE mask<T, N> operator>(const vec& x, const vec& y) + { + return vec_op<T>::gt(x.v, y.v); + } + friend constexpr KFR_INLINE mask<T, N> operator<=(const vec& x, const vec& y) + { + return vec_op<T>::le(x.v, y.v); + } + friend constexpr KFR_INLINE mask<T, N> operator>=(const vec& x, const vec& y) + { + return vec_op<T>::ge(x.v, y.v); + } #define KFR_ASGN_OP(aop, op) \ - friend KFR_INLINE vec& operator aop(vec& x, vec y) \ + friend KFR_INLINE vec& operator aop(vec& x, const vec& y) \ { \ x = x op y; \ return x; \ @@ -721,8 +751,8 @@ struct mask : public vec<T, N> KFR_INLINE mask& operator=(const mask&) noexcept = default; KFR_INLINE mask& operator=(mask&&) noexcept = default; - template <typename M, typename = u8[sizeof(T) == sizeof(M)]> - constexpr KFR_INLINE mask(vec<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) + template <typename M, KFR_ENABLE_IF(sizeof(T) == sizeof(M))> + constexpr KFR_INLINE mask(const vec<M, N>& value) : base(bitcast<T>(value)) { } @@ -731,21 +761,21 @@ struct mask : public vec<T, N> // { // } constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); } - constexpr KFR_INLINE mask operator&(vec<T, N> x) const + constexpr KFR_INLINE mask operator&(const vec<T, N>& x) const { return bitcast<T>(ubitcast(this->v) & ubitcast(x.v)); } - constexpr KFR_INLINE mask operator|(vec<T, N> x) const + constexpr KFR_INLINE mask operator|(const vec<T, N>& x) const { return bitcast<T>(ubitcast(this->v) | ubitcast(x.v)); } - constexpr KFR_INLINE mask operator^(vec<T, N> x) const + constexpr KFR_INLINE mask operator^(const vec<T, N>& x) const { return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v)); } - constexpr KFR_INLINE mask operator&&(mask x) const { return *this & x; } - constexpr KFR_INLINE mask operator||(mask x) const { return *this | x; } + constexpr KFR_INLINE mask operator&&(const mask& x) const { return *this & x; } + constexpr KFR_INLINE mask operator||(const mask& x) const { return *this | x; } constexpr KFR_INLINE mask operator!() const { return ~*this; } constexpr KFR_INLINE simd<T, N> operator*() const { return this->v; } @@ -778,24 +808,24 @@ struct shuffle_index_extend }; template <size_t start, size_t count, typename T, size_t N> -KFR_INLINE vec<T, count> concatexact(vec<T, N> x, vec<T, N> y) +KFR_INLINE vec<T, count> concatexact(const vec<T, N>& x, const vec<T, N>& y) { return kfr::shufflevector<count, internal::shuffle_index<start>>(x, y); } template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y) +KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) { return concatexact<start, count>(x, y); } template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y) +KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) { return concatexact<start, count>(x, shufflevector<N1, internal::shuffle_index_extend<0, N2>>(y)); } template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(vec<T, N1> x, vec<T, N2> y) +KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) { return concatexact<N2 - N1 + start, count>( shufflevector<N2, internal::shuffle_index_extend<N2 - N1, N1>>(x), y); @@ -813,26 +843,26 @@ constexpr mask<T, Nout> partial_mask() } template <typename T, size_t N> -KFR_INLINE vec<T, N> concat(vec<T, N> x) +KFR_INLINE vec<T, N> concat(const vec<T, N>& x) { return x; } template <typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, N1 + N2> concat(vec<T, N1> x, vec<T, N2> y) +KFR_INLINE vec<T, N1 + N2> concat(const vec<T, N1>& x, const vec<T, N2>& y) { return concattwo<0, N1 + N2>(x, y); } template <typename T, size_t N1, size_t N2, size_t... Sizes> -KFR_INLINE auto concat(vec<T, N1> x, vec<T, N2> y, vec<T, Sizes>... args) +KFR_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Sizes>&... args) { return concat(x, concat(y, args...)); } } template <typename T, size_t N, size_t... Sizes, size_t Nout> -KFR_INLINE vec<T, Nout> concat(vec<T, N> x, vec<T, Sizes>... rest) +KFR_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest) { return internal::concat(x, rest...); } @@ -1105,7 +1135,7 @@ constexpr KFR_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>) template <typename T, size_t N, typename Fn, typename... Args, typename Tout = result_of<Fn(T, subtype<decay<Args>>...)>> -constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, vec<T, N> arg, Args&&... args) +constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args) { return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...); } @@ -1174,7 +1204,7 @@ constexpr KFR_INLINE vec<T, N> undefinedvector(vec_t<T, N>) KFR_FN(undefinedvector) template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)> -KFR_INLINE vec<T, Nout> low(vec<T, N> x) +KFR_INLINE vec<T, Nout> low(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<>>(x); } @@ -1186,7 +1216,7 @@ KFR_INLINE vec_t<T, Nout> low(vec_t<T, N>) } template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)> -KFR_INLINE vec<T, Nout> high(vec<T, N> x) +KFR_INLINE vec<T, Nout> high(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<prev_poweroftwo(N - 1)>>(x); }