commit 25eceffebe4b2dc4520b4926d7323551b0928691
parent a5f9f835996f75ec7ad39645f5c2e4420b526fa4
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Tue, 24 Mar 2020 05:56:36 +0000
Arch tests and fixes for multi arch DFT
Diffstat:
4 files changed, 76 insertions(+), 47 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -196,6 +196,7 @@ function (add_arch_library NAME ARCH SRCS DEFS)
target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH})
target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS})
target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH})
+ target_compile_options(${NAME}_${ARCH} PRIVATE -flto)
endfunction ()
if (ENABLE_DFT)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -118,7 +118,7 @@ jobs:
/bin/bash -c "sudo xcode-select -s /Applications/Xcode_$(XCODE_VER).app/Contents/Developer"
brew install ninja
- ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCMAKE_BUILD_TYPE=Release
+ ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCPU_ARCH=sse2 -DENABLE_DFT_MULTIARCH=ON -DCMAKE_BUILD_TYPE=Release
- job: iOS_ARM_Clang_Release
timeoutInMinutes: 120
@@ -217,7 +217,7 @@ jobs:
set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
set PATH=%PATH:C:\Strawberry\c\bin;=%
set PATH=C:\sde;%PATH%
- ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release
+ ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC_x86_AVX512_Clang9_Release
timeoutInMinutes: 120
@@ -236,7 +236,7 @@ jobs:
set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
set PATH=%PATH:C:\Strawberry\c\bin;=%
set PATH=C:\sde;%PATH%
- ci\run.cmd build-release -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release
+ ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC_x86_64_AVX512_MSVC2017_Release
timeoutInMinutes: 120
diff --git a/include/kfr/dft/impl/ft.hpp b/include/kfr/dft/impl/ft.hpp
@@ -483,7 +483,7 @@ constexpr KFR_INTRINSIC cvec<T, width> fixed_twiddle(size_t size, size_t start,
// constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>();
template <typename T, size_t N, bool inverse>
-constexpr KFR_INTRINSIC cvec<T, N> twiddleimagmask()
+constexpr static inline cvec<T, N> twiddleimagmask()
{
return inverse ? broadcast<N * 2, T>(-1, +1) : broadcast<N * 2, T>(+1, -1);
}
@@ -1022,11 +1022,16 @@ KFR_INTRINSIC void apply_twiddles2(cvec<T, N>& a1)
}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw3r1 = static_cast<T>(-0.5 - 1.0);
+static constexpr KFR_INTRINSIC cvec<T, N> tw3r1()
+{
+ return static_cast<T>(-0.5 - 1.0);
+}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw3i1 =
- static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>();
+static constexpr KFR_INTRINSIC cvec<T, N> tw3i1()
+{
+ return static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>();
+}
template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00,
@@ -1037,9 +1042,9 @@ KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
const cvec<T, N> dif1 = swap<2>(a01 - a02);
w00 = a00 + sum1;
- const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>;
+ const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>();
- const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>;
+ const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>();
w01 = s1 + d1;
w02 = s1 - d1;
@@ -1132,25 +1137,40 @@ KFR_INTRINSIC void butterfly9(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cv
}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884 - 1.0);
+static constexpr KFR_INTRINSIC cvec<T, N> tw7r1()
+{
+ return static_cast<T>(0.623489801858733530525004884 - 1.0);
+}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw7i1 =
- static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>();
+static constexpr KFR_INTRINSIC cvec<T, N> tw7i1()
+{
+ return static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>();
+}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw7r2 = static_cast<T>(-0.2225209339563144042889025645 - 1.0);
+static constexpr KFR_INTRINSIC cvec<T, N> tw7r2()
+{
+ return static_cast<T>(-0.2225209339563144042889025645 - 1.0);
+}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw7i2 =
- static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>();
+static constexpr KFR_INTRINSIC cvec<T, N> tw7i2()
+{
+ return static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>();
+}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw7r3 = static_cast<T>(-0.90096886790241912623610231951 - 1.0);
+static constexpr KFR_INTRINSIC cvec<T, N> tw7r3()
+{
+ return static_cast<T>(-0.90096886790241912623610231951 - 1.0);
+}
template <typename T, size_t N, bool inverse>
-static const cvec<T, N> tw7i3 =
- static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>();
+static constexpr KFR_INTRINSIC cvec<T, N> tw7i3()
+{
+ return static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>();
+}
template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
@@ -1167,18 +1187,18 @@ KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
w00 = a00 + sum1 + sum2 + sum3;
const cvec<T, N> s1 =
- w00 + sum1 * tw7r1<T, N, inverse> + sum2 * tw7r2<T, N, inverse> + sum3 * tw7r3<T, N, inverse>;
+ w00 + sum1 * tw7r1<T, N, inverse>() + sum2 * tw7r2<T, N, inverse>() + sum3 * tw7r3<T, N, inverse>();
const cvec<T, N> s2 =
- w00 + sum1 * tw7r2<T, N, inverse> + sum2 * tw7r3<T, N, inverse> + sum3 * tw7r1<T, N, inverse>;
+ w00 + sum1 * tw7r2<T, N, inverse>() + sum2 * tw7r3<T, N, inverse>() + sum3 * tw7r1<T, N, inverse>();
const cvec<T, N> s3 =
- w00 + sum1 * tw7r3<T, N, inverse> + sum2 * tw7r1<T, N, inverse> + sum3 * tw7r2<T, N, inverse>;
+ w00 + sum1 * tw7r3<T, N, inverse>() + sum2 * tw7r1<T, N, inverse>() + sum3 * tw7r2<T, N, inverse>();
const cvec<T, N> d1 =
- dif1 * tw7i1<T, N, inverse> + dif2 * tw7i2<T, N, inverse> + dif3 * tw7i3<T, N, inverse>;
+ dif1 * tw7i1<T, N, inverse>() + dif2 * tw7i2<T, N, inverse>() + dif3 * tw7i3<T, N, inverse>();
const cvec<T, N> d2 =
- dif1 * tw7i2<T, N, inverse> - dif2 * tw7i3<T, N, inverse> - dif3 * tw7i1<T, N, inverse>;
+ dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>()- dif3 * tw7i1<T, N, inverse>();
const cvec<T, N> d3 =
- dif1 * tw7i3<T, N, inverse> - dif2 * tw7i1<T, N, inverse> + dif3 * tw7i2<T, N, inverse>;
+ dif1 * tw7i3<T, N, inverse>() - dif2 * tw7i1<T, N, inverse>() + dif3 * tw7i2<T, N, inverse>();
w01 = s1 + d1;
w06 = s1 - d1;
@@ -1294,15 +1314,25 @@ KFR_INTRINSIC void butterfly11(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, c
}
template <typename T, size_t N, bool inverse>
-const static cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718 - 1.0);
+static constexpr KFR_INTRINSIC cvec<T, N> tw5r1()
+{
+ return static_cast<T>(0.30901699437494742410229341718 - 1.0);
+}
template <typename T, size_t N, bool inverse>
-const static cvec<T, N> tw5i1 =
- static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>();
+static constexpr KFR_INTRINSIC cvec<T, N> tw5i1()
+{
+ return static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>();
+}
template <typename T, size_t N, bool inverse>
-const static cvec<T, N> tw5r2 = static_cast<T>(-0.80901699437494742410229341718 - 1.0);
+static constexpr KFR_INTRINSIC cvec<T, N> tw5r2()
+{
+ return static_cast<T>(-0.80901699437494742410229341718 - 1.0);
+}
template <typename T, size_t N, bool inverse>
-const static cvec<T, N> tw5i2 =
- static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>();
+static constexpr KFR_INTRINSIC cvec<T, N> tw5i2()
+{
+ return static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>();
+}
template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02,
@@ -1315,11 +1345,11 @@ KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, cons
const cvec<T, N> dif2 = swap<2>(a02 - a03);
w00 = a00 + sum1 + sum2;
- const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse> + sum2 * tw5r2<T, N, inverse>;
- const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse> + sum2 * tw5r1<T, N, inverse>;
+ const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse>() + sum2 * tw5r2<T, N, inverse>();
+ const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse>() + sum2 * tw5r1<T, N, inverse>();
- const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse> + dif2 * tw5i2<T, N, inverse>;
- const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse> - dif2 * tw5i1<T, N, inverse>;
+ const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse>() + dif2 * tw5i2<T, N, inverse>();
+ const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse>() - dif2 * tw5i1<T, N, inverse>();
w01 = s1 + d1;
w04 = s1 - d1;
@@ -1690,16 +1720,15 @@ template <typename T, bool inverse, typename Tstride = csize_t<1>>
KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
complex<T>*, const complex<T>* twiddle, Tstride ostride = {})
{
- cswitch(
- csizes_t<11, 13>(), radix,
- [&](auto radix_) CMT_INLINE_LAMBDA {
- constexpr size_t width = vector_width<T>;
- spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
- },
- [&]() CMT_INLINE_LAMBDA {
- constexpr size_t width = vector_width<T>;
- generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
- });
+ cswitch(csizes_t<11, 13>(), radix,
+ [&](auto radix_) CMT_INLINE_LAMBDA {
+ constexpr size_t width = vector_width<T>;
+ spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
+ },
+ [&]() CMT_INLINE_LAMBDA {
+ constexpr size_t width = vector_width<T>;
+ generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
+ });
}
template <typename T, size_t N>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -253,8 +253,7 @@ if (NOT SKIP_TESTS)
${PROJECT_BINARY_DIR}/bin/all_tests_${A})
endif ()
endforeach ()
- else ()
- add_test(NAME all_tests COMMAND ${EMULATOR}
- ${PROJECT_BINARY_DIR}/bin/all_tests)
endif ()
+ add_test(NAME all_tests COMMAND ${SDE} ${SDE_ARCH_${CPU_ARCH}} -chip_check_exe_only
+ -- ${PROJECT_BINARY_DIR}/bin/all_tests)
endif ()