kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit b53919a2c1c7f8306f8dc9295219361174da34d8
parent beb35972bef7c83468209a0f7c854173dc2f230d
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Wed, 27 Jul 2016 19:06:57 +0300

Hardcode vector widths (for build speed)

Diffstat:
Minclude/kfr/base/function.hpp | 72++++++++++++++++++++++++++++++++++++------------------------------------
1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp @@ -49,47 +49,47 @@ namespace intrinsics #ifdef CID_ARCH_X86 using f32sse = vec<f32, 4>; using f64sse = vec<f64, 2>; -using i8sse = vec<i8, vector_width<i8, cpu_t::sse2>>; -using i16sse = vec<i16, vector_width<i16, cpu_t::sse2>>; -using i32sse = vec<i32, vector_width<i32, cpu_t::sse2>>; -using i64sse = vec<i64, vector_width<i64, cpu_t::sse2>>; -using u8sse = vec<u8, vector_width<u8, cpu_t::sse2>>; -using u16sse = vec<u16, vector_width<u16, cpu_t::sse2>>; -using u32sse = vec<u32, vector_width<u32, cpu_t::sse2>>; -using u64sse = vec<u64, vector_width<u64, cpu_t::sse2>>; +using i8sse = vec<i8, 16>; +using i16sse = vec<i16, 8>; +using i32sse = vec<i32, 4>; +using i64sse = vec<i64, 2>; +using u8sse = vec<u8, 16>; +using u16sse = vec<u16, 8>; +using u32sse = vec<u32, 4>; +using u64sse = vec<u64, 2>; -using mf32sse = mask<f32, vector_width<f32, cpu_t::sse2>>; -using mf64sse = mask<f64, vector_width<f64, cpu_t::sse2>>; -using mi8sse = mask<i8, vector_width<i8, cpu_t::sse2>>; -using mi16sse = mask<i16, vector_width<i16, cpu_t::sse2>>; -using mi32sse = mask<i32, vector_width<i32, cpu_t::sse2>>; -using mi64sse = mask<i64, vector_width<i64, cpu_t::sse2>>; -using mu8sse = mask<u8, vector_width<u8, cpu_t::sse2>>; -using mu16sse = mask<u16, vector_width<u16, cpu_t::sse2>>; -using mu32sse = mask<u32, vector_width<u32, cpu_t::sse2>>; -using mu64sse = mask<u64, vector_width<u64, cpu_t::sse2>>; +using mf32sse = mask<f32, 4>; +using mf64sse = mask<f64, 2>; +using mi8sse = mask<i8, 16>; +using mi16sse = mask<i16, 8>; +using mi32sse = mask<i32, 4>; +using mi64sse = mask<i64, 2>; +using mu8sse = mask<u8, 16>; +using mu16sse = mask<u16, 8>; +using mu32sse = mask<u32, 4>; +using mu64sse = mask<u64, 2>; using f32avx = vec<f32, 8>; using f64avx = vec<f64, 4>; -using i8avx = vec<i8, vector_width<i8, cpu_t::avx2>>; -using i16avx = vec<i16, vector_width<i16, cpu_t::avx2>>; -using i32avx = vec<i32, vector_width<i32, cpu_t::avx2>>; -using i64avx = vec<i64, vector_width<i64, cpu_t::avx2>>; -using u8avx = vec<u8, vector_width<u8, cpu_t::avx2>>; -using u16avx = vec<u16, vector_width<u16, cpu_t::avx2>>; -using u32avx = vec<u32, vector_width<u32, cpu_t::avx2>>; -using u64avx = vec<u64, vector_width<u64, cpu_t::avx2>>; +using i8avx = vec<i8, 32>; +using i16avx = vec<i16, 16>; +using i32avx = vec<i32, 8>; +using i64avx = vec<i64, 4>; +using u8avx = vec<u8, 32>; +using u16avx = vec<u16, 16>; +using u32avx = vec<u32, 8>; +using u64avx = vec<u64, 4>; -using mf32avx = mask<f32, vector_width<f32, cpu_t::avx1>>; -using mf64avx = mask<f64, vector_width<f64, cpu_t::avx1>>; -using mi8avx = mask<i8, vector_width<i8, cpu_t::avx2>>; -using mi16avx = mask<i16, vector_width<i16, cpu_t::avx2>>; -using mi32avx = mask<i32, vector_width<i32, cpu_t::avx2>>; -using mi64avx = mask<i64, vector_width<i64, cpu_t::avx2>>; -using mu8avx = mask<u8, vector_width<u8, cpu_t::avx2>>; -using mu16avx = mask<u16, vector_width<u16, cpu_t::avx2>>; -using mu32avx = mask<u32, vector_width<u32, cpu_t::avx2>>; -using mu64avx = mask<u64, vector_width<u64, cpu_t::avx2>>; +using mf32avx = mask<f32, 8>; +using mf64avx = mask<f64, 4>; +using mi8avx = mask<i8, 32>; +using mi16avx = mask<i16, 16>; +using mi32avx = mask<i32, 8>; +using mi64avx = mask<i64, 4>; +using mu8avx = mask<u8, 32>; +using mu16avx = mask<u16, 16>; +using mu32avx = mask<u32, 8>; +using mu64avx = mask<u64, 4>; #else using f32neon = vec<f32, 4>; using f64neon = vec<f64, 2>;