commit b53919a2c1c7f8306f8dc9295219361174da34d8
parent beb35972bef7c83468209a0f7c854173dc2f230d
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Wed, 27 Jul 2016 19:06:57 +0300
Hardcode vector widths (for build speed)
Diffstat:
1 file changed, 36 insertions(+), 36 deletions(-)
diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp
@@ -49,47 +49,47 @@ namespace intrinsics
#ifdef CID_ARCH_X86
using f32sse = vec<f32, 4>;
using f64sse = vec<f64, 2>;
-using i8sse = vec<i8, vector_width<i8, cpu_t::sse2>>;
-using i16sse = vec<i16, vector_width<i16, cpu_t::sse2>>;
-using i32sse = vec<i32, vector_width<i32, cpu_t::sse2>>;
-using i64sse = vec<i64, vector_width<i64, cpu_t::sse2>>;
-using u8sse = vec<u8, vector_width<u8, cpu_t::sse2>>;
-using u16sse = vec<u16, vector_width<u16, cpu_t::sse2>>;
-using u32sse = vec<u32, vector_width<u32, cpu_t::sse2>>;
-using u64sse = vec<u64, vector_width<u64, cpu_t::sse2>>;
+using i8sse = vec<i8, 16>;
+using i16sse = vec<i16, 8>;
+using i32sse = vec<i32, 4>;
+using i64sse = vec<i64, 2>;
+using u8sse = vec<u8, 16>;
+using u16sse = vec<u16, 8>;
+using u32sse = vec<u32, 4>;
+using u64sse = vec<u64, 2>;
-using mf32sse = mask<f32, vector_width<f32, cpu_t::sse2>>;
-using mf64sse = mask<f64, vector_width<f64, cpu_t::sse2>>;
-using mi8sse = mask<i8, vector_width<i8, cpu_t::sse2>>;
-using mi16sse = mask<i16, vector_width<i16, cpu_t::sse2>>;
-using mi32sse = mask<i32, vector_width<i32, cpu_t::sse2>>;
-using mi64sse = mask<i64, vector_width<i64, cpu_t::sse2>>;
-using mu8sse = mask<u8, vector_width<u8, cpu_t::sse2>>;
-using mu16sse = mask<u16, vector_width<u16, cpu_t::sse2>>;
-using mu32sse = mask<u32, vector_width<u32, cpu_t::sse2>>;
-using mu64sse = mask<u64, vector_width<u64, cpu_t::sse2>>;
+using mf32sse = mask<f32, 4>;
+using mf64sse = mask<f64, 2>;
+using mi8sse = mask<i8, 16>;
+using mi16sse = mask<i16, 8>;
+using mi32sse = mask<i32, 4>;
+using mi64sse = mask<i64, 2>;
+using mu8sse = mask<u8, 16>;
+using mu16sse = mask<u16, 8>;
+using mu32sse = mask<u32, 4>;
+using mu64sse = mask<u64, 2>;
using f32avx = vec<f32, 8>;
using f64avx = vec<f64, 4>;
-using i8avx = vec<i8, vector_width<i8, cpu_t::avx2>>;
-using i16avx = vec<i16, vector_width<i16, cpu_t::avx2>>;
-using i32avx = vec<i32, vector_width<i32, cpu_t::avx2>>;
-using i64avx = vec<i64, vector_width<i64, cpu_t::avx2>>;
-using u8avx = vec<u8, vector_width<u8, cpu_t::avx2>>;
-using u16avx = vec<u16, vector_width<u16, cpu_t::avx2>>;
-using u32avx = vec<u32, vector_width<u32, cpu_t::avx2>>;
-using u64avx = vec<u64, vector_width<u64, cpu_t::avx2>>;
+using i8avx = vec<i8, 32>;
+using i16avx = vec<i16, 16>;
+using i32avx = vec<i32, 8>;
+using i64avx = vec<i64, 4>;
+using u8avx = vec<u8, 32>;
+using u16avx = vec<u16, 16>;
+using u32avx = vec<u32, 8>;
+using u64avx = vec<u64, 4>;
-using mf32avx = mask<f32, vector_width<f32, cpu_t::avx1>>;
-using mf64avx = mask<f64, vector_width<f64, cpu_t::avx1>>;
-using mi8avx = mask<i8, vector_width<i8, cpu_t::avx2>>;
-using mi16avx = mask<i16, vector_width<i16, cpu_t::avx2>>;
-using mi32avx = mask<i32, vector_width<i32, cpu_t::avx2>>;
-using mi64avx = mask<i64, vector_width<i64, cpu_t::avx2>>;
-using mu8avx = mask<u8, vector_width<u8, cpu_t::avx2>>;
-using mu16avx = mask<u16, vector_width<u16, cpu_t::avx2>>;
-using mu32avx = mask<u32, vector_width<u32, cpu_t::avx2>>;
-using mu64avx = mask<u64, vector_width<u64, cpu_t::avx2>>;
+using mf32avx = mask<f32, 8>;
+using mf64avx = mask<f64, 4>;
+using mi8avx = mask<i8, 32>;
+using mi16avx = mask<i16, 16>;
+using mi32avx = mask<i32, 8>;
+using mi64avx = mask<i64, 4>;
+using mu8avx = mask<u8, 32>;
+using mu16avx = mask<u16, 16>;
+using mu32avx = mask<u32, 8>;
+using mu64avx = mask<u64, 4>;
#else
using f32neon = vec<f32, 4>;
using f64neon = vec<f64, 2>;