kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 25eceffebe4b2dc4520b4926d7323551b0928691
parent a5f9f835996f75ec7ad39645f5c2e4420b526fa4
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Tue, 24 Mar 2020 05:56:36 +0000

Arch tests and fixes for multi arch DFT

Diffstat:
MCMakeLists.txt | 1+
Mazure-pipelines.yml | 6+++---
Minclude/kfr/dft/impl/ft.hpp | 111++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mtests/CMakeLists.txt | 5++---
4 files changed, 76 insertions(+), 47 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -196,6 +196,7 @@ function (add_arch_library NAME ARCH SRCS DEFS) target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH}) target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS}) target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH}) + target_compile_options(${NAME}_${ARCH} PRIVATE -flto) endfunction () if (ENABLE_DFT) diff --git a/azure-pipelines.yml b/azure-pipelines.yml @@ -118,7 +118,7 @@ jobs: /bin/bash -c "sudo xcode-select -s /Applications/Xcode_$(XCODE_VER).app/Contents/Developer" brew install ninja - ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCMAKE_BUILD_TYPE=Release + ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCPU_ARCH=sse2 -DENABLE_DFT_MULTIARCH=ON -DCMAKE_BUILD_TYPE=Release - job: iOS_ARM_Clang_Release timeoutInMinutes: 120 @@ -217,7 +217,7 @@ jobs: set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=% set PATH=%PATH:C:\Strawberry\c\bin;=% set PATH=C:\sde;%PATH% - ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release + ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release - job: Windows_MSVC_x86_AVX512_Clang9_Release timeoutInMinutes: 120 @@ -236,7 +236,7 @@ jobs: set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=% set PATH=%PATH:C:\Strawberry\c\bin;=% set PATH=C:\sde;%PATH% - ci\run.cmd build-release -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release + ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release - job: Windows_MSVC_x86_64_AVX512_MSVC2017_Release timeoutInMinutes: 120 diff --git a/include/kfr/dft/impl/ft.hpp b/include/kfr/dft/impl/ft.hpp @@ -483,7 +483,7 @@ constexpr KFR_INTRINSIC cvec<T, width> fixed_twiddle(size_t size, size_t start, // constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>(); template <typename T, size_t N, bool inverse> -constexpr KFR_INTRINSIC cvec<T, N> twiddleimagmask() +constexpr static inline cvec<T, N> twiddleimagmask() { return inverse ? broadcast<N * 2, T>(-1, +1) : broadcast<N * 2, T>(+1, -1); } @@ -1022,11 +1022,16 @@ KFR_INTRINSIC void apply_twiddles2(cvec<T, N>& a1) } template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw3r1 = static_cast<T>(-0.5 - 1.0); +static constexpr KFR_INTRINSIC cvec<T, N> tw3r1() +{ + return static_cast<T>(-0.5 - 1.0); +} template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw3i1 = - static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>(); +static constexpr KFR_INTRINSIC cvec<T, N> tw3i1() +{ + return static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>(); +} template <size_t N, bool inverse = false, typename T> KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00, @@ -1037,9 +1042,9 @@ KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv const cvec<T, N> dif1 = swap<2>(a01 - a02); w00 = a00 + sum1; - const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>; + const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>(); - const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>; + const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>(); w01 = s1 + d1; w02 = s1 - d1; @@ -1132,25 +1137,40 @@ KFR_INTRINSIC void butterfly9(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cv } template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884 - 1.0); +static constexpr KFR_INTRINSIC cvec<T, N> tw7r1() +{ + return static_cast<T>(0.623489801858733530525004884 - 1.0); +} template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw7i1 = - static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>(); +static constexpr KFR_INTRINSIC cvec<T, N> tw7i1() +{ + return static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>(); +} template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw7r2 = static_cast<T>(-0.2225209339563144042889025645 - 1.0); +static constexpr KFR_INTRINSIC cvec<T, N> tw7r2() +{ + return static_cast<T>(-0.2225209339563144042889025645 - 1.0); +} template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw7i2 = - static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>(); +static constexpr KFR_INTRINSIC cvec<T, N> tw7i2() +{ + return static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>(); +} template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw7r3 = static_cast<T>(-0.90096886790241912623610231951 - 1.0); +static constexpr KFR_INTRINSIC cvec<T, N> tw7r3() +{ + return static_cast<T>(-0.90096886790241912623610231951 - 1.0); +} template <typename T, size_t N, bool inverse> -static const cvec<T, N> tw7i3 = - static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>(); +static constexpr KFR_INTRINSIC cvec<T, N> tw7i3() +{ + return static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>(); +} template <size_t N, bool inverse = false, typename T> KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04, @@ -1167,18 +1187,18 @@ KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv w00 = a00 + sum1 + sum2 + sum3; const cvec<T, N> s1 = - w00 + sum1 * tw7r1<T, N, inverse> + sum2 * tw7r2<T, N, inverse> + sum3 * tw7r3<T, N, inverse>; + w00 + sum1 * tw7r1<T, N, inverse>() + sum2 * tw7r2<T, N, inverse>() + sum3 * tw7r3<T, N, inverse>(); const cvec<T, N> s2 = - w00 + sum1 * tw7r2<T, N, inverse> + sum2 * tw7r3<T, N, inverse> + sum3 * tw7r1<T, N, inverse>; + w00 + sum1 * tw7r2<T, N, inverse>() + sum2 * tw7r3<T, N, inverse>() + sum3 * tw7r1<T, N, inverse>(); const cvec<T, N> s3 = - w00 + sum1 * tw7r3<T, N, inverse> + sum2 * tw7r1<T, N, inverse> + sum3 * tw7r2<T, N, inverse>; + w00 + sum1 * tw7r3<T, N, inverse>() + sum2 * tw7r1<T, N, inverse>() + sum3 * tw7r2<T, N, inverse>(); const cvec<T, N> d1 = - dif1 * tw7i1<T, N, inverse> + dif2 * tw7i2<T, N, inverse> + dif3 * tw7i3<T, N, inverse>; + dif1 * tw7i1<T, N, inverse>() + dif2 * tw7i2<T, N, inverse>() + dif3 * tw7i3<T, N, inverse>(); const cvec<T, N> d2 = - dif1 * tw7i2<T, N, inverse> - dif2 * tw7i3<T, N, inverse> - dif3 * tw7i1<T, N, inverse>; + dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>()- dif3 * tw7i1<T, N, inverse>(); const cvec<T, N> d3 = - dif1 * tw7i3<T, N, inverse> - dif2 * tw7i1<T, N, inverse> + dif3 * tw7i2<T, N, inverse>; + dif1 * tw7i3<T, N, inverse>() - dif2 * tw7i1<T, N, inverse>() + dif3 * tw7i2<T, N, inverse>(); w01 = s1 + d1; w06 = s1 - d1; @@ -1294,15 +1314,25 @@ KFR_INTRINSIC void butterfly11(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, c } template <typename T, size_t N, bool inverse> -const static cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718 - 1.0); +static constexpr KFR_INTRINSIC cvec<T, N> tw5r1() +{ + return static_cast<T>(0.30901699437494742410229341718 - 1.0); +} template <typename T, size_t N, bool inverse> -const static cvec<T, N> tw5i1 = - static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>(); +static constexpr KFR_INTRINSIC cvec<T, N> tw5i1() +{ + return static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>(); +} template <typename T, size_t N, bool inverse> -const static cvec<T, N> tw5r2 = static_cast<T>(-0.80901699437494742410229341718 - 1.0); +static constexpr KFR_INTRINSIC cvec<T, N> tw5r2() +{ + return static_cast<T>(-0.80901699437494742410229341718 - 1.0); +} template <typename T, size_t N, bool inverse> -const static cvec<T, N> tw5i2 = - static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>(); +static constexpr KFR_INTRINSIC cvec<T, N> tw5i2() +{ + return static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>(); +} template <size_t N, bool inverse = false, typename T> KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02, @@ -1315,11 +1345,11 @@ KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, cons const cvec<T, N> dif2 = swap<2>(a02 - a03); w00 = a00 + sum1 + sum2; - const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse> + sum2 * tw5r2<T, N, inverse>; - const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse> + sum2 * tw5r1<T, N, inverse>; + const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse>() + sum2 * tw5r2<T, N, inverse>(); + const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse>() + sum2 * tw5r1<T, N, inverse>(); - const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse> + dif2 * tw5i2<T, N, inverse>; - const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse> - dif2 * tw5i1<T, N, inverse>; + const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse>() + dif2 * tw5i2<T, N, inverse>(); + const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse>() - dif2 * tw5i1<T, N, inverse>(); w01 = s1 + d1; w04 = s1 - d1; @@ -1690,16 +1720,15 @@ template <typename T, bool inverse, typename Tstride = csize_t<1>> KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, complex<T>*, const complex<T>* twiddle, Tstride ostride = {}) { - cswitch( - csizes_t<11, 13>(), radix, - [&](auto radix_) CMT_INLINE_LAMBDA { - constexpr size_t width = vector_width<T>; - spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride); - }, - [&]() CMT_INLINE_LAMBDA { - constexpr size_t width = vector_width<T>; - generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride); - }); + cswitch(csizes_t<11, 13>(), radix, + [&](auto radix_) CMT_INLINE_LAMBDA { + constexpr size_t width = vector_width<T>; + spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride); + }, + [&]() CMT_INLINE_LAMBDA { + constexpr size_t width = vector_width<T>; + generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride); + }); } template <typename T, size_t N> diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -253,8 +253,7 @@ if (NOT SKIP_TESTS) ${PROJECT_BINARY_DIR}/bin/all_tests_${A}) endif () endforeach () - else () - add_test(NAME all_tests COMMAND ${EMULATOR} - ${PROJECT_BINARY_DIR}/bin/all_tests) endif () + add_test(NAME all_tests COMMAND ${SDE} ${SDE_ARCH_${CPU_ARCH}} -chip_check_exe_only + -- ${PROJECT_BINARY_DIR}/bin/all_tests) endif ()