kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 3b5b14e92b4358e223aa5c04297aedc2507f0d0f
parent 250a3ac4d769e0404c3f6558bc66807eed034c95
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Sat, 12 Nov 2022 23:21:30 +0000

Fixes for MSVC2019 and MSVC2022

Diffstat:
Mazure-pipelines.yml | 112+++++++++++++++++++++++++++++++++----------------------------------------------
Minclude/kfr/simd/impl/backend_generic.hpp | 6++++++
2 files changed, 52 insertions(+), 66 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml @@ -1,5 +1,5 @@ jobs: -- job: Windows_MSVC_x86_64_AVX512_Clang13_Release +- job: Windows_MSVC_x86_64_AVX512_Clang14_Release timeoutInMinutes: 180 pool: vmImage: 'windows-2019' @@ -18,7 +18,7 @@ jobs: set PATH=C:\sde;%PATH% ci\run.cmd build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=ON -DKFR_ENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_LINKER="C:/Program Files/LLVM/bin/lld-link.exe" -DKFR_USE_SDE=ON -DKFR_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_AR="C:/Program Files/LLVM/bin/llvm-lib.exe" -- job: Windows_MSVC_x86_AVX512_Clang13_Release +- job: Windows_MSVC_x86_AVX512_Clang14_Release timeoutInMinutes: 180 pool: vmImage: 'windows-2019' @@ -44,23 +44,13 @@ jobs: steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-8 + sudo apt-get update && sudo apt-get install -y ninja-build clang-8 curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX) mkdir "$(Agent.TempDirectory)/sde-bin" tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1 sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde ci/run.sh build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_ARCH=avx2 -DKFR_USE_SDE=ON -DCMAKE_CXX_COMPILER=clang++-8 -DCMAKE_BUILD_TYPE=Release -- job: Linux_x86_64_Clang9 - timeoutInMinutes: 180 - pool: - vmImage: 'ubuntu-18.04' - steps: - - bash: | - set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-9 - ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_BUILD_TYPE=Release - - job: Linux_x86_64_Clang10 timeoutInMinutes: 180 pool: @@ -68,19 +58,9 @@ jobs: steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-10 + sudo apt-get update && sudo apt-get install -y ninja-build clang-10 ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_BUILD_TYPE=Release -- job: Linux_x86_64_Clang11 - timeoutInMinutes: 180 - pool: - vmImage: 'ubuntu-20.04' - steps: - - bash: | - set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-11 - ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-11 -DCMAKE_BUILD_TYPE=Release - - job: Linux_x86_64_Clang12 timeoutInMinutes: 180 pool: @@ -88,24 +68,26 @@ jobs: steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-12 + sudo apt-get update && sudo apt-get install -y ninja-build clang-12 curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX) mkdir "$(Agent.TempDirectory)/sde-bin" tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1 sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde ci/run.sh build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_ARCH=avx2 -DKFR_USE_SDE=ON -DCMAKE_CXX_COMPILER=clang++-12 -DCMAKE_BUILD_TYPE=Release -- job: Linux_x86_64_Clang13 +- job: Linux_x86_64_Clang14 timeoutInMinutes: 180 pool: - vmImage: 'ubuntu-20.04' + vmImage: 'ubuntu-22.04' steps: - bash: | set -e - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee /etc/apt/sources.list.d/llvm.list - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-13 - ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release + sudo apt-get update && sudo apt-get install -y ninja-build clang-14 + curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX) + mkdir "$(Agent.TempDirectory)/sde-bin" + tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1 + sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde + ci/run.sh build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_ARCH=avx2 -DKFR_USE_SDE=ON -DCMAKE_CXX_COMPILER=clang++-14 -DCMAKE_BUILD_TYPE=Release - job: Linux_x86_64_GCC7 timeoutInMinutes: 180 @@ -114,7 +96,7 @@ jobs: steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-7 g++-7 + sudo apt-get update && sudo apt-get install -y ninja-build gcc-7 g++-7 ci/run.sh build-release -DKFR_ARCH=avx -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-7 -DCMAKE_BUILD_TYPE=Release - job: Linux_x86_64_GCC8 @@ -124,40 +106,40 @@ jobs: steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-8 g++-8 + sudo apt-get update && sudo apt-get install -y ninja-build gcc-8 g++-8 curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX) mkdir "$(Agent.TempDirectory)/sde-bin" tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1 sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2 -DKFR_USE_SDE=ON -DKFR_ARCH=avx -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-8 -DCMAKE_BUILD_TYPE=Release -- job: Linux_x86_64_GCC9 +- job: Linux_x86_64_GCC10 timeoutInMinutes: 180 pool: - vmImage: 'ubuntu-20.04' + vmImage: 'ubuntu-22.04' steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-9 g++-9 + sudo apt-get update && sudo apt-get install -y ninja-build gcc-10 g++-10 curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX) mkdir "$(Agent.TempDirectory)/sde-bin" tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1 sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde - ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-9 -DCMAKE_BUILD_TYPE=Release + ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release -- job: Linux_x86_64_GCC10 +- job: Linux_x86_64_GCC12 timeoutInMinutes: 180 pool: - vmImage: 'ubuntu-20.04' + vmImage: 'ubuntu-22.04' steps: - bash: | set -e - sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-10 g++-10 + sudo apt-get update && sudo apt-get install -y ninja-build gcc-12 g++-12 curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX) mkdir "$(Agent.TempDirectory)/sde-bin" tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1 sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde - ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release + ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=Release - job: Linux_ARM_Clang9_Release timeoutInMinutes: 180 @@ -231,8 +213,6 @@ jobs: export PATH=$PATH:$(Agent.TempDirectory)/sde-bin sde64 -help || true - # sudo spctl --master-disable - # sudo /usr/sbin/DevToolsSecurity –enable sudo security authorizationdb write system.privilege.taskport allow sde64 -chip_check_exe_only -- $(Agent.TempDirectory)/sde-bin/intel64/nullapp @@ -263,7 +243,7 @@ jobs: brew install ninja ci/run.sh build-release -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.cmake -DCMAKE_BUILD_TYPE=Release -DIOS_PLATFORM=OS64 -DIOS_ARCH=arm64 -DCMAKE_MAKE_PROGRAM=/usr/local/bin/ninja -DKFR_SKIP_TESTS=ON -- job: Android_ARMv7_Clang_Release +- job: Android_ARMv7_Clang11_Release timeoutInMinutes: 180 pool: vmImage: 'windows-2019' @@ -277,7 +257,7 @@ jobs: call "%ANDROID_HOME%\tools\bin\sdkmanager.bat" "ndk-bundle" < %TMP%\always_yes ci\run.cmd build-release -DCMAKE_TOOLCHAIN_FILE="%ANDROID_HOME%\ndk-bundle\build\cmake\android.toolchain.cmake" -DANDROID_ABI=armeabi-v7a -DANDROID_ARM_NEON=TRUE -DKFR_SKIP_TESTS=ON -DCMAKE_BUILD_TYPE=Release -- job: Android_AArch64_Clang_Release +- job: Android_AArch64_Clang11_Release timeoutInMinutes: 180 pool: vmImage: 'windows-2019' @@ -291,7 +271,7 @@ jobs: call "%ANDROID_HOME%\tools\bin\sdkmanager.bat" "ndk-bundle" < %TMP%\always_yes ci\run.cmd build-release -DCMAKE_TOOLCHAIN_FILE="%ANDROID_HOME%\ndk-bundle\build\cmake\android.toolchain.cmake" -DANDROID_ABI=arm64-v8a -DANDROID_ARM_NEON=TRUE -DKFR_SKIP_TESTS=ON -DCMAKE_BUILD_TYPE=Release -- job: Windows_MinGW_x86_Clang13_Release +- job: Windows_MinGW_x86_Clang14_Release timeoutInMinutes: 180 pool: vmImage: 'windows-2019' @@ -306,7 +286,7 @@ jobs: call C:\tools\msys64\msys2_shell.cmd -defterm -mingw32 -no-start -full-path -here -c "pacman -S --noconfirm mingw32/mingw-w64-i686-ninja" call C:\tools\msys64\msys2_shell.cmd -defterm -mingw32 -no-start -full-path -here -c "ci/run.sh build-release -DCMAKE_CXX_COMPILER=""C:/Program Files/LLVM/bin/clang++.exe"" -DKFR_ARCH=avx -DCMAKE_CXX_FLAGS=--target=i686-w64-windows-gnu -DCMAKE_BUILD_TYPE=Release" -- job: Windows_MinGW_x86_64_Clang13_Release +- job: Windows_MinGW_x86_64_Clang14_Release timeoutInMinutes: 180 pool: vmImage: 'windows-2019' @@ -341,25 +321,25 @@ jobs: set PATH=C:\sde;%PATH% ci\run.cmd build-release -DKFR_ARCH_TESTS=ON -DKFR_USE_SDE=ON -DKFR_ARCH=avx512 -DKFR_ENABLE_DFT=OFF -DCMAKE_BUILD_TYPE=Release -# - job: Windows_MSVC_x86_AVX512_MSVC2019_Release -# timeoutInMinutes: 180 -# pool: -# vmImage: 'windows-2019' -# steps: -# - script: | -# choco uninstall mingw -# choco install ninja - -# curl -o "$(Agent.TempDirectory)/sde.zip" -L $(SDE_URL_WINDOWS) -# "C:\Program Files\7-Zip\7z.exe" x -oC:\sde "$(Agent.TempDirectory)/sde.zip" - -# call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars32.bat" -# set PATH=%PATH:C:\tools\mingw64\bin;=% -# set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=% -# set PATH=%PATH:C:\Program Files\LLVM\bin;=% -# set PATH=%PATH:C:\Strawberry\c\bin;=% -# set PATH=C:\sde;%PATH% -# ci\run.cmd build-release -DKFR_ARCH_TESTS=OFF -DKFR_USE_SDE=ON -DKFR_ARCH=avx512 -DKFR_ENABLE_DFT=OFF -DCMAKE_BUILD_TYPE=Release +- job: Windows_MSVC_x86_AVX512_MSVC2019_Release + timeoutInMinutes: 180 + pool: + vmImage: 'windows-2019' + steps: + - script: | + choco uninstall mingw + choco install ninja + + curl -o "$(Agent.TempDirectory)/sde.zip" -L $(SDE_URL_WINDOWS) + "C:\Program Files\7-Zip\7z.exe" x -oC:\sde "$(Agent.TempDirectory)/sde.zip" + + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars32.bat" + set PATH=%PATH:C:\tools\mingw64\bin;=% + set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=% + set PATH=%PATH:C:\Program Files\LLVM\bin;=% + set PATH=%PATH:C:\Strawberry\c\bin;=% + set PATH=C:\sde;%PATH% + ci\run.cmd build-release -DKFR_ARCH_TESTS=OFF -DKFR_USE_SDE=ON -DKFR_ARCH=avx512 -DKFR_ENABLE_DFT=OFF -DCMAKE_BUILD_TYPE=Release - job: Windows_MSVC2022_x86_64_Release timeoutInMinutes: 180 diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp @@ -581,7 +581,13 @@ KFR_INTRIN_SHUFFLE_LINEAR_START(f32, 2, 4, 2, KFR_INTRIN_CONVERT(f32, i32, 4, _mm_cvtepi32_ps(x)) KFR_INTRIN_CONVERT(i32, f32, 4, _mm_cvttps_epi32(x)) KFR_INTRIN_CONVERT(i32, f64, 2, simd<i32, 2>::from(_mm_cvtsi128_si64(_mm_cvttpd_epi32(x)))) +#ifdef CMT_COMPILER_IS_MSVC +KFR_INTRIN_CONVERT(f64, i32, 2, + _mm_cvtepi32_pd(_mm_setr_epi32(bitcast_anything<simd_array<i32, 2>>(x).val[0], + bitcast_anything<simd_array<i32, 2>>(x).val[1], 0, 0))) +#else KFR_INTRIN_CONVERT(f64, i32, 2, _mm_cvtepi32_pd(KFR_mm_setr_epi64x(x.whole, 0))) +#endif KFR_INTRIN_CONVERT(i64, f64, 2, KFR_mm_setr_epi64x(_mm_cvttsd_si64(x), _mm_cvttsd_si64(_mm_unpackhi_pd(x, x)))) KFR_INTRIN_CONVERT(f64, i64, 2,