commit 73c5446bd8040f030b87da4392e41643e54b6eeb
parent 8de4b0fe7e8c96548de8dd62c698399fdd58f8e6
Author: samuriddle@gmail.com <samuriddle@gmail.com>
Date: Sun, 7 Aug 2016 07:09:20 +0300
New conversion functions and refactoring
Diffstat:
22 files changed, 417 insertions(+), 322 deletions(-)
diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp
@@ -36,14 +36,14 @@ namespace intrinsics
template <typename T, size_t N, typename Tout = flt_type<T>>
KFR_SINTRIN vec<Tout, N> asin(const vec<T, N>& x)
{
- const vec<Tout, N> xx = cast<Tout>(x);
+ const vec<Tout, N> xx = x;
return atan2(xx, sqrt(Tout(1) - xx * xx));
}
template <typename T, size_t N, typename Tout = flt_type<T>>
KFR_SINTRIN vec<Tout, N> acos(const vec<T, N>& x)
{
- const vec<Tout, N> xx = cast<Tout>(x);
+ const vec<Tout, N> xx = x;
return atan2(sqrt(Tout(1) - xx * xx), xx);
}
KFR_I_CONVERTER(asin)
diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp
@@ -100,10 +100,12 @@ namespace cometa
template <typename T>
struct compound_type_traits<kfr::complex<T>>
{
- constexpr static size_t width = 2;
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static bool is_scalar = false;
+ constexpr static size_t width = 2;
+ constexpr static size_t deep_width = width * compound_type_traits<T>::width;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+ constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
template <typename U>
using rebind = kfr::complex<U>;
template <typename U>
@@ -157,21 +159,21 @@ struct vec_op<complex<T>> : private vec_op<T>
template <typename T, size_t N>
KFR_INLINE vec<complex<T>, N> cdupreal(const vec<complex<T>, N>& x)
{
- return subcast<complex<T>>(dupeven(subcast<T>(x)));
+ return compcast<complex<T>>(dupeven(compcast<T>(x)));
}
KFR_FN(cdupreal)
template <typename T, size_t N>
KFR_INLINE vec<complex<T>, N> cdupimag(const vec<complex<T>, N>& x)
{
- return subcast<complex<T>>(dupodd(subcast<T>(x)));
+ return compcast<complex<T>>(dupodd(compcast<T>(x)));
}
KFR_FN(cdupimag)
template <typename T, size_t N>
KFR_INLINE vec<complex<T>, N> cswapreim(const vec<complex<T>, N>& x)
{
- return subcast<complex<T>>(swap<2>(subcast<T>(x)));
+ return compcast<complex<T>>(swap<2>(compcast<T>(x)));
}
KFR_FN(cswapreim)
@@ -205,41 +207,43 @@ template <typename T>
struct is_complex_impl<complex<T>> : std::true_type
{
};
-}
-
-// real to complex
-template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)>
-constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept
-{
- const vec<subtype<To>, N> casted = cast<subtype<To>>(value);
- return subcast<To>(interleave(casted, zerovector(casted)));
-}
-// complex to complex
-template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)>
-constexpr KFR_INLINE vec<To, N> cast(const vec<complex<From>, N>& value) noexcept
+// vector<complex> to vector<complex>
+template <typename To, typename From, size_t N>
+struct conversion<vec<complex<To>, N>, vec<complex<From>, N>>
{
- return subcast<To>(cast<subtype<To>>(subcast<From>(value)));
-}
+ static_assert(!is_compound<To>::value, "");
+ static_assert(!is_compound<From>::value, "");
+ static vec<complex<To>, N> cast(const vec<complex<From>, N>& value)
+ {
+ return builtin_convertvector<complex<To>>(value);
+ }
+};
-// complex to real
-template <typename To, typename From, size_t N, KFR_ENABLE_IF(!internal::is_complex_impl<To>::value)>
-constexpr KFR_INLINE vec<To, N> cast(const vec<complex<From>, N>& value) noexcept
+// vector to vector<complex>
+template <typename To, typename From, size_t N>
+struct conversion<vec<complex<To>, N>, vec<From, N>>
{
- static_assert(sizeof(To) == 0, "Can't cast complex to real");
- return {};
+ static_assert(!is_compound<To>::value, "");
+ static_assert(!is_compound<From>::value, "");
+ static vec<complex<To>, N> cast(const vec<From, N>& value)
+ {
+ const vec<To, N> casted = static_cast<vec<To, N>>(value);
+ return *interleave(casted, zerovector(casted));
+ }
+};
}
template <typename T, size_t N>
constexpr KFR_INLINE vec<complex<T>, N / 2> ccomp(const vec<T, N>& x)
{
- return subcast<complex<T>>(x);
+ return compcast<complex<T>>(x);
}
template <typename T, size_t N>
constexpr KFR_INLINE vec<T, N * 2> cdecom(const vec<complex<T>, N>& x)
{
- return subcast<T>(x);
+ return compcast<T>(x);
}
template <typename T>
@@ -250,7 +254,7 @@ constexpr KFR_INLINE T real(const complex<T>& value)
template <typename T, size_t N>
constexpr KFR_INLINE vec<T, N> real(const vec<complex<T>, N>& value)
{
- return even(subcast<T>(value));
+ return even(compcast<T>(value));
}
template <typename T>
@@ -273,7 +277,7 @@ constexpr KFR_INLINE T imag(const complex<T>& value)
template <typename T, size_t N>
constexpr KFR_INLINE vec<T, N> imag(const vec<complex<T>, N>& value)
{
- return odd(subcast<T>(value));
+ return odd(compcast<T>(value));
}
KFR_FN(imag)
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
@@ -285,7 +289,7 @@ KFR_INLINE internal::expression_function<fn_imag, E1> imag(E1&& x)
template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, T2>>
constexpr KFR_INLINE vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0))
{
- return subcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag)));
+ return compcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag)));
}
template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>>
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -99,8 +99,7 @@ protected:
private:
template <typename Arg, size_t N, typename Tin,
- typename Tout1 = conditional<is_generic<Arg>::value, Tin, typename decay<Arg>::value_type>,
- typename Tout = Tout1>
+ typename Tout = conditional<is_generic<Arg>::value, Tin, value_type_of<Arg>>>
KFR_INLINE vec_t<Tout, N> vec_t_for() const
{
return {};
@@ -112,8 +111,8 @@ private:
constexpr size_t Nin = N * ratio::input / ratio::output;
using Tout = conditional<is_same<generic, value_type>::value, T, common_type<T, value_type>>;
- return cast<T>(fn(cast<Tout>(std::get<indices>(this->args)(
- cinput, index * ratio::input / ratio::output, vec_t_for<Args, Nin, Tout>()))...));
+ return fn(std::get<indices>(this->args)(cinput, index * ratio::input / ratio::output,
+ vec_t_for<Args, Nin, Tout>())...);
}
template <size_t... indices>
KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>)
@@ -149,7 +148,7 @@ struct expression_scalar : input_expression
template <typename U, size_t N>
KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
{
- return resize<N>(cast<U>(val));
+ return resize<N>(static_cast<vec<U, width>>(val));
}
};
@@ -204,7 +203,7 @@ KFR_INLINE void process_cycle(OutFn&& outfn, const Fn& fn, size_t& i, size_t siz
KFR_LOOP_NOUNROLL
for (; i < count; i += width)
{
- outfn(coutput, i, cast<Tout>(fn(cinput, i, vec_t<Tin, width>())));
+ outfn(coutput, i, fn(cinput, i, vec_t<Tin, width>()));
}
}
}
@@ -269,7 +268,7 @@ struct expressoin_typed : input_expression
template <typename U, size_t N>
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
- return cast<U>(e1(cinput, index, vec_t<T, N>()));
+ return e1(cinput, index, vec_t<T, N>());
}
E1 e1;
};
@@ -286,7 +285,7 @@ struct expressoin_sized : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
auto val = e1(cinput, index, vec_t<T, N>());
- return cast<U>(val);
+ return val;
}
constexpr size_t size() const noexcept { return m_size; }
diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp
@@ -43,7 +43,7 @@ struct generator : input_expression
template <typename U, size_t N>
KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const
{
- return cast<U>(generate(t));
+ return generate(t);
}
void resync(T start) const { ptr_cast<Class>(this)->sync(start); }
@@ -108,7 +108,7 @@ protected:
T vstep;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)>
+template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP>
struct generator_exp : generator<T, width, generator_exp<T, width>>
{
generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1)
@@ -125,7 +125,7 @@ protected:
T vstep;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)>
+template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP>
struct generator_exp2 : generator<T, width, generator_exp2<T, width>>
{
generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1)
@@ -142,7 +142,7 @@ protected:
T vstep;
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)>
+template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP>
struct generator_cossin : generator<T, width, generator_cossin<T, width>>
{
generator_cossin(T start, T step)
@@ -167,7 +167,7 @@ protected:
}
};
-template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4)>
+template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4), KFR_ARCH_DEP>
struct generator_sin : generator<T, width, generator_sin<T, width>>
{
generator_sin(T start, T step)
diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp
@@ -81,7 +81,7 @@ KFR_SINTRIN vec<f64, N> vldexpk(const vec<f64, N>& x, const vec<i64, N>& q)
template <typename T, size_t N>
KFR_SINTRIN vec<T, N> logb(const vec<T, N>& x)
{
- return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1));
+ return select(x == T(), -c_infinity<T>, static_cast<vec<T, N>>(vilogbp1(x) - 1));
}
template <size_t N>
diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp
@@ -81,9 +81,7 @@ KFR_INLINE internal::expression_function<fn_add, E1, E2> add(E1&& x, E2&& y)
template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
KFR_INLINE internal::expression_function<fn_add, E1> add(E1&& x, E2&& y, E3&& z)
{
- return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z)
-
- };
+ return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) };
}
template <typename T1, typename T2>
@@ -101,9 +99,7 @@ KFR_FN(sub)
template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)>
KFR_INLINE internal::expression_function<fn_sub, E1, E2> sub(E1&& x, E2&& y)
{
- return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y)
-
- };
+ return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y) };
}
template <typename T1>
@@ -111,10 +107,10 @@ constexpr inline T1 mul(T1 x)
{
return x;
}
-template <typename T1, typename T2, typename... Ts>
-constexpr inline common_type<T1, T2, Ts...> mul(T1 x, T2 y, Ts... rest)
+template <typename T1, typename T2, typename... Ts, typename Tout = common_type<T1, T2, Ts...>>
+constexpr inline Tout mul(T1 x, T2 y, Ts... rest)
{
- return x * mul(std::forward<T2>(y), std::forward<Ts>(rest)...);
+ return static_cast<Tout>(x) * static_cast<Tout>(mul(std::forward<T2>(y), std::forward<Ts>(rest)...));
}
template <typename T>
@@ -156,9 +152,7 @@ KFR_FN(cub)
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
KFR_INLINE internal::expression_function<fn_cub, E1> cub(E1&& x)
{
- return { fn_cub(), std::forward<E1>(x)
-
- };
+ return { fn_cub(), std::forward<E1>(x) };
}
template <typename T>
@@ -192,30 +186,22 @@ KFR_FN(pow5)
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
KFR_INLINE internal::expression_function<fn_pow2, E1> pow2(E1&& x)
{
- return { fn_pow2(), std::forward<E1>(x)
-
- };
+ return { fn_pow2(), std::forward<E1>(x) };
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
KFR_INLINE internal::expression_function<fn_pow3, E1> pow3(E1&& x)
{
- return { fn_pow3(), std::forward<E1>(x)
-
- };
+ return { fn_pow3(), std::forward<E1>(x) };
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
KFR_INLINE internal::expression_function<fn_pow4, E1> pow4(E1&& x)
{
- return { fn_pow4(), std::forward<E1>(x)
-
- };
+ return { fn_pow4(), std::forward<E1>(x) };
}
template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)>
KFR_INLINE internal::expression_function<fn_pow5, E1> pow5(E1&& x)
{
- return { fn_pow5(), std::forward<E1>(x)
-
- };
+ return { fn_pow5(), std::forward<E1>(x) };
}
/// Raise x to the power base $x^{base}$
@@ -265,24 +251,24 @@ KFR_FN(sqrsum)
KFR_FN(sqrdiff)
/// Division
-template <typename T1, typename T2>
-inline common_type<T1, T2> div(T1 x, T2 y)
+template <typename T1, typename T2, typename Tout = common_type<T1, T2>>
+inline Tout div(const T1& x, const T2& y)
{
- return x / y;
+ return static_cast<Tout>(x) / static_cast<Tout>(y);
}
KFR_FN(div)
/// Remainder
-template <typename T1, typename T2>
-inline common_type<T1, T2> rem(T1 x, T2 y)
+template <typename T1, typename T2, typename Tout = common_type<T1, T2>>
+inline Tout rem(const T1& x, const T2& y)
{
- return x % y;
+ return static_cast<Tout>(x) % static_cast<Tout>(y);
}
KFR_FN(rem)
/// Negation
template <typename T1>
-inline T1 neg(T1 x)
+inline T1 neg(const T1& x)
{
return -x;
}
@@ -290,7 +276,7 @@ KFR_FN(neg)
/// Bitwise Not
template <typename T1>
-inline T1 bitwisenot(T1 x)
+inline T1 bitwisenot(const T1& x)
{
return ~x;
}
@@ -499,26 +485,6 @@ constexpr KFR_INLINE vec<T, N> copysign(const vec<T, N>& x, const vec<T, N>& y)
return (x & internal::highbitmask<T>) | (y & internal::highbitmask<T>);
}
-template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
-KFR_INLINE vec<T, N> fmod(const vec<T, N>& x, const vec<T, N>& y)
-{
- return x - cast<itype<T>>(x / y) * y;
-}
-
-KFR_FN_S(fmod)
-KFR_FN(fmod)
-
-template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
-constexpr KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y)
-{
- return x % y;
-}
-template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
-KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y)
-{
- return fmod(x, y);
-}
-
template <typename T, size_t N>
KFR_INLINE mask<T, N> isnan(const vec<T, N>& x)
{
@@ -695,4 +661,49 @@ KFR_EXPR_BINARY(fn_less, <)
KFR_EXPR_BINARY(fn_greater, >)
KFR_EXPR_BINARY(fn_lessorequal, <=)
KFR_EXPR_BINARY(fn_greaterorequal, >=)
+
+template <typename T, size_t N1, size_t... Ns>
+vec<vec<T, sizeof...(Ns) + 1>, N1> packtranspose(const vec<T, N1>& x, const vec<T, Ns>&... rest)
+{
+ const vec<T, N1*(sizeof...(Ns) + 1)> t = transpose<N1>(concat(x, rest...));
+ return compcast<vec<T, sizeof...(Ns) + 1>>(t);
+}
+
+KFR_FN(packtranspose)
+
+namespace internal
+{
+template <typename... E>
+struct expression_pack : expression<E...>, output_expression
+{
+ expression_pack(E&&... e) : expression<E...>(std::forward<E>(e)...) {}
+ using value_type = vec<common_type<value_type_of<E>...>, sizeof...(E)>;
+ using size_type = typename expression<E...>::size_type;
+ constexpr size_type size() const noexcept { return expression<E...>::size(); }
+
+ template <typename U, size_t N>
+ KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
+ {
+ return this->call(fn_packtranspose(), index, x);
+ }
+ template <typename U, size_t N>
+ KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x)
+ {
+ output(index, x, csizeseq<sizeof...(E)>);
+ }
+
+private:
+ template <typename U, size_t N, size_t... indices>
+ void output(size_t index, const vec<U, N>& x, csizes_t<indices...>)
+ {
+ swallow{ (std::get<indices>(this->args)(coutput, index, x[indices]), void(), 0)... };
+ }
+};
+}
+
+template <typename... E, KFR_ENABLE_IF(is_input_expressions<E...>::value)>
+internal::expression_pack<internal::arg<E>...> pack(E&&... e)
+{
+ return internal::expression_pack<internal::arg<E>...>(std::forward<E>(e)...);
+}
}
diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp
@@ -82,7 +82,7 @@ struct expression_pointer : input_expression
constexpr size_t findex = ilog2(N);
static_assert(N <= maxwidth, "N is greater than maxwidth");
func_t func = reinterpret_cast<func_t>(vtable->get(csize<2 + findex>));
- vec<U, N> result = cast<U>(func(instance, index));
+ vec<U, N> result = func(instance, index);
return result;
}
KFR_INLINE void begin_block(size_t size) const
diff --git a/include/kfr/base/random.hpp b/include/kfr/base/random.hpp
@@ -114,8 +114,8 @@ inline enable_if_not_f<vec<T, N>> random_range(random_bit_generator& gen, T min,
using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>;
vec<T, N> u = random_uniform<T, N>(gen);
- const vec<big_type, N> tmp = cast<big_type>(u);
- return cast<T>((tmp * (max - min) + min) >> typebits<T>::bits);
+ const vec<big_type, N> tmp = u;
+ return (tmp * (max - min) + min) >> typebits<T>::bits;
}
namespace internal
@@ -128,7 +128,7 @@ struct expression_random_uniform : input_expression
template <typename U, size_t N>
vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
{
- return cast<U>(random_uniform<T, N>(gen));
+ return random_uniform<T, N>(gen);
}
mutable random_bit_generator gen;
};
@@ -146,7 +146,7 @@ struct expression_random_range : input_expression
template <typename U, size_t N>
vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const
{
- return cast<U>(random_range<N, T>(gen, min, max));
+ return random_range<N, T>(gen, min, max);
}
mutable random_bit_generator gen;
const T min;
diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp
@@ -318,6 +318,26 @@ KFR_INTRIN internal::expression_function<fn::itrunc, E1> itrunc(E1&& x)
{
return { fn::itrunc(), std::forward<E1>(x) };
}
+
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_INLINE vec<T, N> fmod(const vec<T, N>& x, const vec<T, N>& y)
+{
+ return x - trunc(x / y) * y;
+}
+
+KFR_FN_S(fmod)
+KFR_FN(fmod)
+
+template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)>
+constexpr KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y)
+{
+ return x % y;
+}
+template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)>
+KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y)
+{
+ return fmod(x, y);
+}
}
#undef KFR_mm_trunc_ps
diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp
@@ -180,9 +180,9 @@ KFR_SINTRIN vec<T, N> select(const mask<T, N>& m, const vec<T, N>& x, const vec<
// fallback
template <typename T, size_t N>
-KFR_SINTRIN vec<T, N> select(mask<T, N> m, const vec<T, N>& x, const vec<T, N>& y)
+KFR_SINTRIN vec<T, N> select(const mask<T, N>& m, const vec<T, N>& x, const vec<T, N>& y)
{
- return y ^ ((x ^ y) & m);
+ return y ^ ((x ^ y) & m.asvec());
}
#endif
}
@@ -193,8 +193,7 @@ template <typename T1, size_t N, typename T2, typename T3, KFR_ENABLE_IF(is_nume
KFR_INTRIN vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y)
{
static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types");
- return intrinsics::select(bitcast<Tout>(m).asmask(), static_cast<vec<Tout, N>>(x),
- static_cast<vec<Tout, N>>(y));
+ return intrinsics::select(bitcast<Tout>(m), static_cast<vec<Tout, N>>(x), static_cast<vec<Tout, N>>(y));
}
template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)>
diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp
@@ -349,6 +349,8 @@ enum class cpu_t : int
runtime = -1,
};
+#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native
+
template <cpu_t cpu>
using ccpu_t = cval_t<cpu_t, cpu>;
@@ -578,6 +580,12 @@ constexpr inline static const T* derived_cast(const U* ptr)
return static_cast<const T*>(ptr);
}
+template <typename T, typename U>
+constexpr inline static T implicit_cast(U&& value)
+{
+ return std::forward<T>(value);
+}
+
#pragma clang diagnostic pop
__attribute__((unused)) static const char* cpu_name(cpu_t set)
@@ -781,10 +789,12 @@ namespace cometa
template <typename T, size_t N>
struct compound_type_traits<kfr::vec_t<T, N>>
{
- constexpr static size_t width = N;
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static bool is_scalar = false;
+ constexpr static size_t width = N;
+ constexpr static size_t deep_width = width * compound_type_traits<T>::width;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+ constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
template <typename U>
using rebind = kfr::vec_t<U, N>;
diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp
@@ -43,13 +43,13 @@ struct univector_base : input_expression, output_expression
KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& value)
{
T* data = derived_cast<Class>(this)->data();
- write(ptr_cast<T>(data) + index, cast<T>(value));
+ write(ptr_cast<T>(data) + index, vec<T, N>(value));
}
template <typename U, size_t N>
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
const T* data = derived_cast<Class>(this)->data();
- return cast<U>(read<N>(ptr_cast<T>(data) + index));
+ return static_cast<vec<U, N>>(read<N>(ptr_cast<T>(data) + index));
}
template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)>
@@ -197,7 +197,7 @@ struct univector<T, tag_array_ref> : array_ref<T>, univector_base<T, univector<T
constexpr static bool is_array_ref = true;
constexpr static bool is_vector = false;
constexpr static bool is_aligned = false;
- using value_type = T;
+ using value_type = remove_const<T>;
using univector_base<T, univector>::operator=;
};
diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp
@@ -99,7 +99,7 @@ struct vec_ptr
template <typename To, typename From, size_t N,
KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value),
size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width>
-constexpr KFR_INLINE vec<To, Nout> subcast(const vec<From, N>& value) noexcept
+constexpr KFR_INLINE vec<To, Nout> compcast(const vec<From, N>& value) noexcept
{
return *value;
}
@@ -154,8 +154,8 @@ template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>:
KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x,
const vec<T, N>& y)
{
- return subcast<T>(
- shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y)));
+ return compcast<T>(shufflevector(inflate(csize<widthof<T>()>, indices), compcast<subtype<T>>(x),
+ compcast<subtype<T>>(y)));
}
template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N>
@@ -225,14 +225,90 @@ constexpr swiz<14> s14{};
constexpr swiz<15> s15{};
}
-template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)>
-constexpr KFR_INLINE To cast(From value) noexcept
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wold-style-cast"
+
+template <size_t N, typename T>
+constexpr KFR_INLINE vec<T, N> broadcast(T x)
+{
+ return (simd<T, N>)(x);
+}
+
+#pragma clang diagnostic pop
+
+namespace internal
+{
+
+template <typename To, typename From, size_t N, typename Tsub = deep_subtype<To>,
+ size_t Nout = N* compound_type_traits<To>::deep_width>
+constexpr KFR_INLINE vec<To, N> builtin_convertvector(const vec<From, N>& value) noexcept
{
- return static_cast<To>(value);
+ return __builtin_convertvector(*value, simd<Tsub, Nout>);
}
-template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)>
-constexpr KFR_INLINE To bitcast(From value) noexcept
+
+// scalar to scalar
+template <typename To, typename From>
+struct conversion
{
+ static_assert(std::is_convertible<From, To>::value, "");
+ static To cast(const From& value) { return value; }
+};
+
+// vector to vector
+template <typename To, typename From, size_t N>
+struct conversion<vec<To, N>, vec<From, N>>
+{
+ static_assert(!is_compound<To>::value, "");
+ static_assert(!is_compound<From>::value, "");
+ static vec<To, N> cast(const vec<From, N>& value) { return builtin_convertvector<To>(value); }
+};
+
+// vector<vector> to vector<vector>
+template <typename To, typename From, size_t N1, size_t N2>
+struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, N1>, N2>>
+{
+ static_assert(!is_compound<To>::value, "");
+ static_assert(!is_compound<From>::value, "");
+ static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value)
+ {
+ return builtin_convertvector<vec<To, N1>>(value);
+ }
+};
+
+// scalar to vector
+template <typename To, typename From, size_t N>
+struct conversion<vec<To, N>, From>
+{
+ static_assert(std::is_convertible<From, To>::value, "");
+ static vec<To, N> cast(const From& value) { return broadcast<N>(static_cast<To>(value)); }
+};
+
+// mask to mask
+template <typename To, typename From, size_t N>
+struct conversion<mask<To, N>, mask<From, N>>
+{
+ static_assert(sizeof(To) == sizeof(From), "");
+ static mask<To, N> cast(const mask<From, N>& value) { return reinterpret_cast<simd<To, N>>(*value); }
+};
+}
+
+template <typename From, size_t N, typename Tsub = deep_subtype<From>,
+ size_t Nout = N * sizeof(From) / sizeof(Tsub)>
+constexpr KFR_INLINE vec<Tsub, Nout> flatten(const vec<From, N>& value) noexcept
+{
+ return *value;
+}
+
+template <typename To, typename From, typename Tout = deep_rebind<From, To>>
+constexpr KFR_INLINE Tout cast(const From& value) noexcept
+{
+ return static_cast<Tout>(value);
+}
+
+template <typename To, typename From>
+constexpr KFR_INLINE To bitcast(const From& value) noexcept
+{
+ static_assert(sizeof(From) == sizeof(To), "bitcast: Incompatible types");
union {
From from;
To to;
@@ -240,43 +316,34 @@ constexpr KFR_INLINE To bitcast(From value) noexcept
return u.to;
}
-template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
-constexpr KFR_INLINE To ubitcast(From value) noexcept
+template <typename To, typename From, size_t N, size_t Nout = N * sizeof(From) / sizeof(To)>
+constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
{
- return bitcast<To>(value);
+ return reinterpret_cast<typename vec<To, Nout>::simd_t>(*value);
}
-template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
-constexpr KFR_INLINE To ibitcast(From value) noexcept
+template <typename To, typename From, size_t N, size_t Nout = N * sizeof(From) / sizeof(To)>
+constexpr KFR_INLINE mask<To, Nout> bitcast(const mask<From, N>& value) noexcept
{
- return bitcast<To>(value);
+ return reinterpret_cast<typename mask<To, Nout>::simd_t>(*value);
}
-template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
-constexpr KFR_INLINE To fbitcast(From value) noexcept
+template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To ubitcast(const From& value) noexcept
{
return bitcast<To>(value);
}
-template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)>
-constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept
-{
- return __builtin_convertvector(*value, simd<To, N>);
-}
-template <typename To, typename From, simdindex N>
-constexpr KFR_INLINE simd<To, N> cast(const simd<From, N>& value) noexcept
-{
- return __builtin_convertvector(value, simd<To, N>);
-}
-template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept
+template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To ibitcast(const From& value) noexcept
{
- return reinterpret_cast<simd<To, Nout>>(*value);
+ return bitcast<To>(value);
}
-template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> bitcast(const simd<From, N>& value) noexcept
+
+template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)>
+constexpr KFR_INLINE To fbitcast(const From& value) noexcept
{
- return reinterpret_cast<simd<To, Nout>>(value);
+ return bitcast<To>(value);
}
template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)>
@@ -297,27 +364,6 @@ constexpr KFR_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept
return reinterpret_cast<simd<To, Nout>>(*value);
}
-template <typename From, simdindex N, typename To = utype<From>,
- simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> ubitcast(const simd<From, N>& value) noexcept
-{
- return reinterpret_cast<simd<To, Nout>>(value);
-}
-
-template <typename From, simdindex N, typename To = itype<From>,
- simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> ibitcast(const simd<From, N>& value) noexcept
-{
- return reinterpret_cast<simd<To, Nout>>(value);
-}
-
-template <typename From, simdindex N, typename To = ftype<From>,
- simdindex Nout = sizeof(From) * N / sizeof(To)>
-constexpr KFR_INLINE simd<To, Nout> fbitcast(const simd<From, N>& value) noexcept
-{
- return reinterpret_cast<simd<To, Nout>>(value);
-}
-
constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)>
@@ -345,17 +391,6 @@ KFR_INLINE vec<T, Nout> repeat(const vec<T, N>& x)
}
KFR_FN(repeat)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wold-style-cast"
-
-template <size_t N, typename T>
-constexpr KFR_INLINE vec<T, N> broadcast(T x)
-{
- return (simd<T, N>)(x);
-}
-
-#pragma clang diagnostic pop
-
template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)>
KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x)
{
@@ -422,7 +457,8 @@ private:
template <typename T>
struct vec_op
{
- using scalar_type = subtype<T>;
+ using scalar_type = subtype<T>;
+ using uscalar_type = utype<scalar_type>;
template <simdindex N>
constexpr static simd<scalar_type, N> add(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
@@ -467,53 +503,56 @@ struct vec_op
template <simdindex N>
constexpr static simd<scalar_type, N> band(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(ubitcast(x) & ubitcast(y));
+ return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) &
+ reinterpret_cast<simd<uscalar_type, N>>(y));
}
template <simdindex N>
constexpr static simd<scalar_type, N> bor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(ubitcast(x) | ubitcast(y));
+ return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) |
+ reinterpret_cast<simd<uscalar_type, N>>(y));
}
template <simdindex N>
constexpr static simd<scalar_type, N> bxor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(ubitcast(x) ^ ubitcast(y));
+ return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) ^
+ reinterpret_cast<simd<uscalar_type, N>>(y));
}
template <simdindex N>
constexpr static simd<scalar_type, N> bnot(simd<scalar_type, N> x) noexcept
{
- return bitcast<scalar_type>(~ubitcast(x));
+ return reinterpret_cast<simd<scalar_type, N>>(~reinterpret_cast<simd<uscalar_type, N>>(x));
}
template <simdindex N>
constexpr static simd<scalar_type, N> eq(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(x == y);
+ return reinterpret_cast<simd<scalar_type, N>>(x == y);
}
template <simdindex N>
constexpr static simd<scalar_type, N> ne(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(x != y);
+ return reinterpret_cast<simd<scalar_type, N>>(x != y);
}
template <simdindex N>
constexpr static simd<scalar_type, N> lt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(x < y);
+ return reinterpret_cast<simd<scalar_type, N>>(x < y);
}
template <simdindex N>
constexpr static simd<scalar_type, N> gt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(x > y);
+ return reinterpret_cast<simd<scalar_type, N>>(x > y);
}
template <simdindex N>
constexpr static simd<scalar_type, N> le(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(x <= y);
+ return reinterpret_cast<simd<scalar_type, N>>(x <= y);
}
template <simdindex N>
constexpr static simd<scalar_type, N> ge(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept
{
- return bitcast<scalar_type>(x >= y);
+ return reinterpret_cast<simd<scalar_type, N>>(x >= y);
}
};
@@ -554,7 +593,8 @@ constexpr KFR_INLINE vec<T, N> make_vector(cvals_t<T, Values...>)
KFR_FN(make_vector)
template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1),
- typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>>
+ typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>,
+ KFR_ENABLE_IF(is_numeric<SubType>::value)>
constexpr KFR_INLINE vec<SubType, N> pack(const Arg& x, const Args&... rest)
{
return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x),
@@ -567,6 +607,7 @@ struct vec : vec_t<T, N>
{
static_assert(N > 0 && N <= 256, "Invalid vector size");
+ using UT = utype<T>;
using value_type = T;
using scalar_type = subtype<T>;
constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; }
@@ -582,6 +623,10 @@ struct vec : vec_t<T, N>
: v(*internal_read_write::read<N, false>(value.data()))
{
}
+ constexpr KFR_INLINE vec(const array_ref<const T>& value) noexcept
+ : v(*internal_read_write::read<N, false>(value.data()))
+ {
+ }
template <typename U,
KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width > 1)>
constexpr KFR_INLINE vec(const U& value) noexcept
@@ -690,10 +735,10 @@ struct vec : vec_t<T, N>
using array_t = T (&)[N];
KFR_INLINE array_t arr() { return ref_cast<array_t>(v); }
- template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value)>
+ template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value && !std::is_same<U, vec>::value)>
constexpr operator vec<U, N>() const noexcept
{
- return cast<U>(*this);
+ return internal::conversion<vec<U, N>, vec<T, N>>::cast(*this);
}
private:
@@ -730,6 +775,7 @@ private:
template <typename T, size_t N>
struct mask : public vec<T, N>
{
+ using UT = utype<T>;
using type = T;
constexpr static size_t width = N;
@@ -758,23 +804,19 @@ struct mask : public vec<T, N>
{
}
- // template <typename M, typename = u8[sizeof(T) == sizeof(M)]>
- // constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value))
- // {
- // }
- constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); }
- constexpr KFR_INLINE mask operator&(const vec<T, N>& x) const
+ friend constexpr KFR_INLINE mask operator&(const mask& x, const mask& y)
{
- return bitcast<T>(ubitcast(this->v) & ubitcast(x.v));
+ return vec_op<T>::band(x.v, y.v);
}
- constexpr KFR_INLINE mask operator|(const vec<T, N>& x) const
+ friend constexpr KFR_INLINE mask operator|(const mask& x, const mask& y)
{
- return bitcast<T>(ubitcast(this->v) | ubitcast(x.v));
+ return vec_op<T>::bor(x.v, y.v);
}
- constexpr KFR_INLINE mask operator^(const vec<T, N>& x) const
+ friend constexpr KFR_INLINE mask operator^(const mask& x, const mask& y)
{
- return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v));
+ return vec_op<T>::bxor(x.v, y.v);
}
+ friend constexpr KFR_INLINE mask operator~(const mask& x) { return vec_op<T>::bnot(x.v); }
constexpr KFR_INLINE mask operator&&(const mask& x) const { return *this & x; }
constexpr KFR_INLINE mask operator||(const mask& x) const { return *this | x; }
@@ -794,8 +836,8 @@ struct mask : public vec<T, N>
KFR_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; }
};
-template <typename T, size_t N>
-using cvec = vec<T, N * 2>;
+template <typename T, size_t N1, size_t N2 = N1>
+using mat = vec<vec<T, N1>, N2>;
namespace internal
{
@@ -1171,7 +1213,7 @@ template <typename T, size_t N>
constexpr KFR_INLINE vec<T, N> zerovector()
{
constexpr size_t width = N * compound_type_traits<T>::width;
- return subcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>()));
+ return compcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>()));
}
template <typename T, size_t N>
@@ -1285,10 +1327,12 @@ namespace cometa
template <typename T, size_t N>
struct compound_type_traits<kfr::simd<T, N>>
{
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static size_t width = N;
- constexpr static bool is_scalar = false;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static size_t width = N;
+ constexpr static size_t deep_width = width * compound_type_traits<T>::width;
+ constexpr static bool is_scalar = false;
+ constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
template <typename U>
using rebind = kfr::simd<U, N>;
template <typename U>
@@ -1300,10 +1344,12 @@ struct compound_type_traits<kfr::simd<T, N>>
template <typename T, size_t N>
struct compound_type_traits<kfr::vec<T, N>>
{
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static size_t width = N;
- constexpr static bool is_scalar = false;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static size_t width = N;
+ constexpr static size_t deep_width = width * compound_type_traits<T>::width;
+ constexpr static bool is_scalar = false;
+ constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
template <typename U>
using rebind = kfr::vec<U, N>;
template <typename U>
@@ -1315,10 +1361,12 @@ struct compound_type_traits<kfr::vec<T, N>>
template <typename T, size_t N>
struct compound_type_traits<kfr::mask<T, N>>
{
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static size_t width = N;
- constexpr static bool is_scalar = false;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static size_t width = N;
+ constexpr static size_t deep_width = width * compound_type_traits<T>::width;
+ constexpr static bool is_scalar = false;
+ constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
template <typename U>
using rebind = kfr::mask<U, N>;
template <typename U>
diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp
@@ -135,10 +135,12 @@ constexpr size_t typeindex()
template <typename T>
struct compound_type_traits
{
- constexpr static size_t width = 1;
- using subtype = T;
- using deep_subtype = T;
- constexpr static bool is_scalar = true;
+ constexpr static size_t width = 1;
+ constexpr static size_t deep_width = width;
+ using subtype = T;
+ using deep_subtype = T;
+ constexpr static size_t depth = 0;
+ constexpr static bool is_scalar = true;
template <typename U>
using rebind = U;
@@ -166,10 +168,12 @@ using deep_rebind = typename compound_type_traits<T>::template deep_rebind<SubTy
template <typename T>
struct compound_type_traits<std::pair<T, T>>
{
- constexpr static size_t width = 2;
- using subtype = T;
- using deep_subtype = cometa::deep_subtype<T>;
- constexpr static bool is_scalar = false;
+ constexpr static size_t width = 2;
+ constexpr static size_t deep_width = width * compound_type_traits<T>::width;
+ using subtype = T;
+ using deep_subtype = cometa::deep_subtype<T>;
+ constexpr static bool is_scalar = false;
+ constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
template <typename U>
using rebind = std::pair<U, U>;
diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp
@@ -103,6 +103,9 @@ KFR_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y)
KFR_FN(cmul_conj)
KFR_FN(cmul_2conj)
+template <typename T, size_t N>
+using cvec = vec<T, N * 2>;
+
template <size_t N, bool A = false, typename T>
KFR_INLINE cvec<T, N> cread(const complex<T>* src)
{
diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp
@@ -74,99 +74,92 @@ struct biquad_params
namespace internal
{
-template <cpu_t cpu = cpu_t::native>
-struct in_biquad
+template <typename T, size_t filters, KFR_ARCH_DEP>
+struct biquad_block
{
-private:
-public:
- template <typename T, size_t filters>
- struct biquad_block
- {
- vec<T, filters> s1;
- vec<T, filters> s2;
- vec<T, filters> a1;
- vec<T, filters> a2;
- vec<T, filters> b0;
- vec<T, filters> b1;
- vec<T, filters> b2;
+ vec<T, filters> s1;
+ vec<T, filters> s2;
+ vec<T, filters> a1;
+ vec<T, filters> a2;
+ vec<T, filters> b0;
+ vec<T, filters> b1;
+ vec<T, filters> b2;
- vec<T, filters> out;
- biquad_block() : s1(0), s2(0), a1(0), a2(0), b0(1), b1(0), b2(0), out(0) {}
- biquad_block(const biquad_params<T>* bq, size_t count) : s1(0), s2(0), out(0)
+ vec<T, filters> out;
+ biquad_block() : s1(0), s2(0), a1(0), a2(0), b0(1), b1(0), b2(0), out(0) {}
+ biquad_block(const biquad_params<T>* bq, size_t count) : s1(0), s2(0), out(0)
+ {
+ count = count > filters ? filters : count;
+ for (size_t i = 0; i < count; i++)
{
- count = count > filters ? filters : count;
- for (size_t i = 0; i < count; i++)
- {
- a1(i) = bq[i].a1;
- a2(i) = bq[i].a2;
- b0(i) = bq[i].b0;
- b1(i) = bq[i].b1;
- b2(i) = bq[i].b2;
- }
- for (size_t i = count; i < filters; i++)
- {
- a1(i) = T(0);
- a2(i) = T(0);
- b0(i) = T(1);
- b1(i) = T(0);
- b2(i) = T(0);
- }
+ a1(i) = bq[i].a1;
+ a2(i) = bq[i].a2;
+ b0(i) = bq[i].b0;
+ b1(i) = bq[i].b1;
+ b2(i) = bq[i].b2;
}
-
- template <size_t count>
- biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count)
+ for (size_t i = count; i < filters; i++)
{
- static_assert(count <= filters, "count > filters");
+ a1(i) = T(0);
+ a2(i) = T(0);
+ b0(i) = T(1);
+ b1(i) = T(0);
+ b2(i) = T(0);
}
- };
+ }
- template <size_t filters, typename T, typename E1>
- struct expression_biquads : public expression<E1>
+ template <size_t count>
+ biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count)
{
- using value_type = T;
+ static_assert(count <= filters, "count > filters");
+ }
+};
- expression_biquads(const biquad_block<T, filters>& bq, E1&& e1)
- : expression<E1>(std::forward<E1>(e1)), bq(bq)
- {
- }
- template <size_t width>
- KFR_INTRIN vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const
- {
- const vec<T, width> in = this->argument_first(index, t);
- vec<T, width> out;
+template <size_t filters, typename T, typename E1, KFR_ARCH_DEP>
+struct expression_biquads : public expression<E1>
+{
+ using value_type = T;
- KFR_LOOP_UNROLL
- for (size_t i = 0; i < width; i++)
- {
- bq.out = process(bq, insertleft(in[i], bq.out));
- out(i) = bq.out[filters - 1];
- }
+ expression_biquads(const biquad_block<T, filters>& bq, E1&& e1)
+ : expression<E1>(std::forward<E1>(e1)), bq(bq)
+ {
+ }
+ template <size_t width>
+ KFR_INTRIN vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const
+ {
+ const vec<T, width> in = this->argument_first(index, t);
+ vec<T, width> out;
- return out;
- }
- KFR_SINTRIN vec<T, filters> process(biquad_block<T, filters>& bq, vec<T, filters> in)
+ KFR_LOOP_UNROLL
+ for (size_t i = 0; i < width; i++)
{
- const vec<T, filters> out = bq.b0 * in + bq.s1;
- bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out;
- bq.s2 = bq.b2 * in - bq.a2 * out;
- return out;
+ bq.out = process(bq, insertleft(in[i], bq.out));
+ out(i) = bq.out[filters - 1];
}
- mutable biquad_block<T, filters> bq;
- };
+
+ return out;
+ }
+ KFR_SINTRIN vec<T, filters> process(biquad_block<T, filters>& bq, vec<T, filters> in)
+ {
+ const vec<T, filters> out = bq.b0 * in + bq.s1;
+ bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out;
+ bq.s2 = bq.b2 * in - bq.a2 * out;
+ return out;
+ }
+ mutable biquad_block<T, filters> bq;
};
}
template <typename T, typename E1>
-KFR_INLINE internal::in_biquad<>::expression_biquads<1, T, internal::arg<E1>> biquad(
- const biquad_params<T>& bq, E1&& e1)
+KFR_INLINE internal::expression_biquads<1, T, internal::arg<E1>> biquad(const biquad_params<T>& bq, E1&& e1)
{
const biquad_params<T> bqs[1] = { bq };
- return internal::in_biquad<>::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1));
+ return internal::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1));
}
template <size_t filters, typename T, typename E1>
-KFR_INLINE internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>> biquad(
+KFR_INLINE internal::expression_biquads<filters, T, internal::arg<E1>> biquad(
const biquad_params<T> (&bq)[filters], E1&& e1)
{
- return internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1));
+ return internal::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1));
}
}
diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp
@@ -38,7 +38,7 @@ using fir_taps = univector<T, Size>;
namespace internal
{
-template <size_t tapcount, typename T, typename E1>
+template <size_t tapcount, typename T, typename E1, KFR_ARCH_DEP>
struct expression_short_fir : expression<E1>
{
static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two");
@@ -47,6 +47,10 @@ struct expression_short_fir : expression<E1>
: expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0)
{
}
+ expression_short_fir(E1&& e1, const array_ref<const T>& taps)
+ : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0)
+ {
+ }
template <typename U, size_t N>
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const
{
@@ -63,7 +67,7 @@ struct expression_short_fir : expression<E1>
mutable vec<T, tapcount - 1> delayline;
};
-template <typename T, typename E1>
+template <typename T, typename E1, KFR_ARCH_DEP>
struct expression_fir : expression<E1>
{
expression_fir(E1&& e1, const array_ref<const T>& taps)
diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp
@@ -32,7 +32,7 @@ namespace kfr
namespace internal
{
-template <typename T>
+template <typename T, KFR_ARCH_DEP>
struct expression_goertzel : output_expression
{
expression_goertzel(complex<T>& result, T omega)
@@ -47,7 +47,7 @@ struct expression_goertzel : output_expression
template <typename U, size_t N>
KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x)
{
- vec<T, N> in = cast<T>(x);
+ vec<T, N> in = x;
KFR_LOOP_UNROLL
for (size_t i = 0; i < N; i++)
{
@@ -84,7 +84,7 @@ struct expression_parallel_goertzel : output_expression
template <typename U, size_t N>
KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x)
{
- const vec<T, N> in = cast<T>(x);
+ const vec<T, N> in = x;
KFR_LOOP_UNROLL
for (size_t i = 0; i < N; i++)
{
diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp
@@ -41,7 +41,7 @@ namespace intrinsics
template <typename T, typename TF = ftype<T>>
KFR_SINTRIN TF amp_to_dB(T amp)
{
- return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322);
+ return log(static_cast<TF>(amp)) * subtype<TF>(8.6858896380650365530225783783322);
// return T( 20.0 ) * log10( level );
}
diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp
@@ -147,7 +147,7 @@ struct expression_triangular : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(1 - abs(linspace(cinput, index, y)));
+ return 1 - abs(linspace(cinput, index, y));
}
size_t size() const { return m_size; }
@@ -169,7 +169,7 @@ struct expression_bartlett : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(1 - abs(linspace(cinput, index, y)));
+ return 1 - abs(linspace(cinput, index, y));
}
size_t size() const { return m_size; }
@@ -191,7 +191,7 @@ struct expression_cosine : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(sin(c_pi<T> * linspace(cinput, index, y)));
+ return sin(c_pi<T> * linspace(cinput, index, y));
}
size_t size() const { return m_size; }
@@ -213,7 +213,7 @@ struct expression_hann : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y))));
+ return T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y)));
}
size_t size() const { return m_size; }
@@ -236,7 +236,7 @@ struct expression_bartlett_hann : input_expression
{
constexpr vec_t<T, N> y{};
const vec<T, N> xx = linspace(cinput, index, y);
- return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))));
+ return T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)));
}
size_t size() const { return m_size; }
@@ -258,7 +258,7 @@ struct expression_hamming : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y))));
+ return alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y)));
}
size_t size() const { return m_size; }
@@ -282,7 +282,7 @@ struct expression_bohman : input_expression
{
constexpr vec_t<T, N> y{};
const vec<U, N> n = abs(linspace(cinput, index, y));
- return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n));
+ return (T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n);
}
size_t size() const { return m_size; }
@@ -305,7 +305,7 @@ struct expression_blackman : input_expression
{
constexpr vec_t<T, N> y{};
const vec<T, N> n = linspace(cinput, index, y);
- return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n));
+ return a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n);
}
size_t size() const { return m_size; }
@@ -330,7 +330,7 @@ struct expression_blackman_harris : input_expression
constexpr vec_t<T, N> y{};
const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>;
- return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n));
+ return T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n);
}
size_t size() const { return m_size; }
@@ -353,7 +353,7 @@ struct expression_kaiser : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m);
+ return modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m;
}
size_t size() const { return m_size; }
@@ -383,7 +383,7 @@ struct expression_flattop : input_expression
constexpr T a2 = 1.29;
constexpr T a3 = 0.388;
constexpr T a4 = 0.028;
- return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n));
+ return a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n);
}
size_t size() const { return m_size; }
@@ -405,7 +405,7 @@ struct expression_gaussian : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y))));
+ return exp(-0.5 * sqr(alpha * linspace(cinput, index, y)));
}
size_t size() const { return m_size; }
@@ -428,7 +428,7 @@ struct expression_lanczos : input_expression
KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const
{
constexpr vec_t<T, N> y{};
- return cast<U>(sinc(linspace(cinput, index, y)));
+ return sinc(linspace(cinput, index, y));
}
size_t size() const { return m_size; }
diff --git a/include/kfr/io/file.hpp b/include/kfr/io/file.hpp
@@ -85,7 +85,7 @@ struct expression_file_writer : expression_file_base, output_expression
{
if (position != index)
fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET);
- const vec<T, N> output = cast<T>(value);
+ const vec<T, N> output = value;
fwrite(output.data(), sizeof(T), output.size(), file);
position = index + N;
}
@@ -104,7 +104,7 @@ struct expression_file_reader : expression_file_base, input_expression
vec<T, N> input = qnan;
fread(input.data(), sizeof(T), input.size(), file);
position = index + N;
- return cast<U>(input);
+ return input;
}
mutable size_t position = 0;
};
diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp
@@ -44,17 +44,17 @@ TEST(complex_vector)
TEST(complex_cast)
{
- const vec<f32, 4> v1 = subcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 }));
+ const vec<f32, 4> v1 = bitcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 }));
CHECK(v1(0) == 0.f);
CHECK(v1(1) == 1.f);
CHECK(v1(2) == 2.f);
CHECK(v1(3) == 3.f);
- const vec<c32, 1> v2 = subcast<c32>(make_vector(1.f, 2.f));
+ const vec<c32, 1> v2 = bitcast<c32>(make_vector(1.f, 2.f));
CHECK(v2(0) == 1.f);
CHECK(v2(1) == 2.f);
- const vec<c32, 2> v3 = cast<c32>(make_vector(1.f, 2.f));
+ const vec<c32, 2> v3 = make_vector(1.f, 2.f);
CHECK(v3(0) == 1.f);
CHECK(v3(1) == 0.f);
CHECK(v3(2) == 2.f);