commit 3b5b14e92b4358e223aa5c04297aedc2507f0d0f
parent 250a3ac4d769e0404c3f6558bc66807eed034c95
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date: Sat, 12 Nov 2022 23:21:30 +0000
Fixes for MSVC2019 and MSVC2022
Diffstat:
2 files changed, 52 insertions(+), 66 deletions(-)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -1,5 +1,5 @@
jobs:
-- job: Windows_MSVC_x86_64_AVX512_Clang13_Release
+- job: Windows_MSVC_x86_64_AVX512_Clang14_Release
timeoutInMinutes: 180
pool:
vmImage: 'windows-2019'
@@ -18,7 +18,7 @@ jobs:
set PATH=C:\sde;%PATH%
ci\run.cmd build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=ON -DKFR_ENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DCMAKE_LINKER="C:/Program Files/LLVM/bin/lld-link.exe" -DKFR_USE_SDE=ON -DKFR_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_AR="C:/Program Files/LLVM/bin/llvm-lib.exe"
-- job: Windows_MSVC_x86_AVX512_Clang13_Release
+- job: Windows_MSVC_x86_AVX512_Clang14_Release
timeoutInMinutes: 180
pool:
vmImage: 'windows-2019'
@@ -44,23 +44,13 @@ jobs:
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-8
+ sudo apt-get update && sudo apt-get install -y ninja-build clang-8
curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX)
mkdir "$(Agent.TempDirectory)/sde-bin"
tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1
sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde
ci/run.sh build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_ARCH=avx2 -DKFR_USE_SDE=ON -DCMAKE_CXX_COMPILER=clang++-8 -DCMAKE_BUILD_TYPE=Release
-- job: Linux_x86_64_Clang9
- timeoutInMinutes: 180
- pool:
- vmImage: 'ubuntu-18.04'
- steps:
- - bash: |
- set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-9
- ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-9 -DCMAKE_BUILD_TYPE=Release
-
- job: Linux_x86_64_Clang10
timeoutInMinutes: 180
pool:
@@ -68,19 +58,9 @@ jobs:
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-10
+ sudo apt-get update && sudo apt-get install -y ninja-build clang-10
ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_BUILD_TYPE=Release
-- job: Linux_x86_64_Clang11
- timeoutInMinutes: 180
- pool:
- vmImage: 'ubuntu-20.04'
- steps:
- - bash: |
- set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-11
- ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-11 -DCMAKE_BUILD_TYPE=Release
-
- job: Linux_x86_64_Clang12
timeoutInMinutes: 180
pool:
@@ -88,24 +68,26 @@ jobs:
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-12
+ sudo apt-get update && sudo apt-get install -y ninja-build clang-12
curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX)
mkdir "$(Agent.TempDirectory)/sde-bin"
tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1
sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde
ci/run.sh build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_ARCH=avx2 -DKFR_USE_SDE=ON -DCMAKE_CXX_COMPILER=clang++-12 -DCMAKE_BUILD_TYPE=Release
-- job: Linux_x86_64_Clang13
+- job: Linux_x86_64_Clang14
timeoutInMinutes: 180
pool:
- vmImage: 'ubuntu-20.04'
+ vmImage: 'ubuntu-22.04'
steps:
- bash: |
set -e
- wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
- echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee /etc/apt/sources.list.d/llvm.list
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev clang-13
- ci/run.sh build-release -DKFR_ARCH=avx2 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release
+ sudo apt-get update && sudo apt-get install -y ninja-build clang-14
+ curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX)
+ mkdir "$(Agent.TempDirectory)/sde-bin"
+ tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1
+ sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde
+ ci/run.sh build-release -DKFR_ENABLE_CAPI_BUILD=ON -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_ARCH=avx2 -DKFR_USE_SDE=ON -DCMAKE_CXX_COMPILER=clang++-14 -DCMAKE_BUILD_TYPE=Release
- job: Linux_x86_64_GCC7
timeoutInMinutes: 180
@@ -114,7 +96,7 @@ jobs:
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-7 g++-7
+ sudo apt-get update && sudo apt-get install -y ninja-build gcc-7 g++-7
ci/run.sh build-release -DKFR_ARCH=avx -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-7 -DCMAKE_BUILD_TYPE=Release
- job: Linux_x86_64_GCC8
@@ -124,40 +106,40 @@ jobs:
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-8 g++-8
+ sudo apt-get update && sudo apt-get install -y ninja-build gcc-8 g++-8
curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX)
mkdir "$(Agent.TempDirectory)/sde-bin"
tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1
sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde
ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2 -DKFR_USE_SDE=ON -DKFR_ARCH=avx -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-8 -DCMAKE_BUILD_TYPE=Release
-- job: Linux_x86_64_GCC9
+- job: Linux_x86_64_GCC10
timeoutInMinutes: 180
pool:
- vmImage: 'ubuntu-20.04'
+ vmImage: 'ubuntu-22.04'
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-9 g++-9
+ sudo apt-get update && sudo apt-get install -y ninja-build gcc-10 g++-10
curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX)
mkdir "$(Agent.TempDirectory)/sde-bin"
tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1
sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde
- ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-9 -DCMAKE_BUILD_TYPE=Release
+ ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release
-- job: Linux_x86_64_GCC10
+- job: Linux_x86_64_GCC12
timeoutInMinutes: 180
pool:
- vmImage: 'ubuntu-20.04'
+ vmImage: 'ubuntu-22.04'
steps:
- bash: |
set -e
- sudo apt-get update && sudo apt-get install -y ninja-build libmpfr-dev gcc-10 g++-10
+ sudo apt-get update && sudo apt-get install -y ninja-build gcc-12 g++-12
curl -o "$(Agent.TempDirectory)/sde.tar.bz2" -L $(SDE_URL_LINUX)
mkdir "$(Agent.TempDirectory)/sde-bin"
tar -C "$(Agent.TempDirectory)/sde-bin" -xjf "$(Agent.TempDirectory)/sde.tar.bz2" --strip 1
sudo ln -s $(Agent.TempDirectory)/sde-bin/sde64 /usr/bin/sde
- ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_BUILD_TYPE=Release
+ ci/run.sh build-release -DKFR_ARCH_TESTS=sse2,sse3,sse41,avx,avx2,avx512 -DKFR_USE_SDE=ON -DKFR_ARCH=avx2 -DKFR_ENABLE_DFT=OFF -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=Release
- job: Linux_ARM_Clang9_Release
timeoutInMinutes: 180
@@ -231,8 +213,6 @@ jobs:
export PATH=$PATH:$(Agent.TempDirectory)/sde-bin
sde64 -help || true
- # sudo spctl --master-disable
- # sudo /usr/sbin/DevToolsSecurity –enable
sudo security authorizationdb write system.privilege.taskport allow
sde64 -chip_check_exe_only -- $(Agent.TempDirectory)/sde-bin/intel64/nullapp
@@ -263,7 +243,7 @@ jobs:
brew install ninja
ci/run.sh build-release -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.cmake -DCMAKE_BUILD_TYPE=Release -DIOS_PLATFORM=OS64 -DIOS_ARCH=arm64 -DCMAKE_MAKE_PROGRAM=/usr/local/bin/ninja -DKFR_SKIP_TESTS=ON
-- job: Android_ARMv7_Clang_Release
+- job: Android_ARMv7_Clang11_Release
timeoutInMinutes: 180
pool:
vmImage: 'windows-2019'
@@ -277,7 +257,7 @@ jobs:
call "%ANDROID_HOME%\tools\bin\sdkmanager.bat" "ndk-bundle" < %TMP%\always_yes
ci\run.cmd build-release -DCMAKE_TOOLCHAIN_FILE="%ANDROID_HOME%\ndk-bundle\build\cmake\android.toolchain.cmake" -DANDROID_ABI=armeabi-v7a -DANDROID_ARM_NEON=TRUE -DKFR_SKIP_TESTS=ON -DCMAKE_BUILD_TYPE=Release
-- job: Android_AArch64_Clang_Release
+- job: Android_AArch64_Clang11_Release
timeoutInMinutes: 180
pool:
vmImage: 'windows-2019'
@@ -291,7 +271,7 @@ jobs:
call "%ANDROID_HOME%\tools\bin\sdkmanager.bat" "ndk-bundle" < %TMP%\always_yes
ci\run.cmd build-release -DCMAKE_TOOLCHAIN_FILE="%ANDROID_HOME%\ndk-bundle\build\cmake\android.toolchain.cmake" -DANDROID_ABI=arm64-v8a -DANDROID_ARM_NEON=TRUE -DKFR_SKIP_TESTS=ON -DCMAKE_BUILD_TYPE=Release
-- job: Windows_MinGW_x86_Clang13_Release
+- job: Windows_MinGW_x86_Clang14_Release
timeoutInMinutes: 180
pool:
vmImage: 'windows-2019'
@@ -306,7 +286,7 @@ jobs:
call C:\tools\msys64\msys2_shell.cmd -defterm -mingw32 -no-start -full-path -here -c "pacman -S --noconfirm mingw32/mingw-w64-i686-ninja"
call C:\tools\msys64\msys2_shell.cmd -defterm -mingw32 -no-start -full-path -here -c "ci/run.sh build-release -DCMAKE_CXX_COMPILER=""C:/Program Files/LLVM/bin/clang++.exe"" -DKFR_ARCH=avx -DCMAKE_CXX_FLAGS=--target=i686-w64-windows-gnu -DCMAKE_BUILD_TYPE=Release"
-- job: Windows_MinGW_x86_64_Clang13_Release
+- job: Windows_MinGW_x86_64_Clang14_Release
timeoutInMinutes: 180
pool:
vmImage: 'windows-2019'
@@ -341,25 +321,25 @@ jobs:
set PATH=C:\sde;%PATH%
ci\run.cmd build-release -DKFR_ARCH_TESTS=ON -DKFR_USE_SDE=ON -DKFR_ARCH=avx512 -DKFR_ENABLE_DFT=OFF -DCMAKE_BUILD_TYPE=Release
-# - job: Windows_MSVC_x86_AVX512_MSVC2019_Release
-# timeoutInMinutes: 180
-# pool:
-# vmImage: 'windows-2019'
-# steps:
-# - script: |
-# choco uninstall mingw
-# choco install ninja
-
-# curl -o "$(Agent.TempDirectory)/sde.zip" -L $(SDE_URL_WINDOWS)
-# "C:\Program Files\7-Zip\7z.exe" x -oC:\sde "$(Agent.TempDirectory)/sde.zip"
-
-# call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars32.bat"
-# set PATH=%PATH:C:\tools\mingw64\bin;=%
-# set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
-# set PATH=%PATH:C:\Program Files\LLVM\bin;=%
-# set PATH=%PATH:C:\Strawberry\c\bin;=%
-# set PATH=C:\sde;%PATH%
-# ci\run.cmd build-release -DKFR_ARCH_TESTS=OFF -DKFR_USE_SDE=ON -DKFR_ARCH=avx512 -DKFR_ENABLE_DFT=OFF -DCMAKE_BUILD_TYPE=Release
+- job: Windows_MSVC_x86_AVX512_MSVC2019_Release
+ timeoutInMinutes: 180
+ pool:
+ vmImage: 'windows-2019'
+ steps:
+ - script: |
+ choco uninstall mingw
+ choco install ninja
+
+ curl -o "$(Agent.TempDirectory)/sde.zip" -L $(SDE_URL_WINDOWS)
+ "C:\Program Files\7-Zip\7z.exe" x -oC:\sde "$(Agent.TempDirectory)/sde.zip"
+
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars32.bat"
+ set PATH=%PATH:C:\tools\mingw64\bin;=%
+ set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
+ set PATH=%PATH:C:\Program Files\LLVM\bin;=%
+ set PATH=%PATH:C:\Strawberry\c\bin;=%
+ set PATH=C:\sde;%PATH%
+ ci\run.cmd build-release -DKFR_ARCH_TESTS=OFF -DKFR_USE_SDE=ON -DKFR_ARCH=avx512 -DKFR_ENABLE_DFT=OFF -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC2022_x86_64_Release
timeoutInMinutes: 180
diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp
@@ -581,7 +581,13 @@ KFR_INTRIN_SHUFFLE_LINEAR_START(f32, 2, 4, 2,
KFR_INTRIN_CONVERT(f32, i32, 4, _mm_cvtepi32_ps(x))
KFR_INTRIN_CONVERT(i32, f32, 4, _mm_cvttps_epi32(x))
KFR_INTRIN_CONVERT(i32, f64, 2, simd<i32, 2>::from(_mm_cvtsi128_si64(_mm_cvttpd_epi32(x))))
+#ifdef CMT_COMPILER_IS_MSVC
+KFR_INTRIN_CONVERT(f64, i32, 2,
+ _mm_cvtepi32_pd(_mm_setr_epi32(bitcast_anything<simd_array<i32, 2>>(x).val[0],
+ bitcast_anything<simd_array<i32, 2>>(x).val[1], 0, 0)))
+#else
KFR_INTRIN_CONVERT(f64, i32, 2, _mm_cvtepi32_pd(KFR_mm_setr_epi64x(x.whole, 0)))
+#endif
KFR_INTRIN_CONVERT(i64, f64, 2,
KFR_mm_setr_epi64x(_mm_cvttsd_si64(x), _mm_cvttsd_si64(_mm_unpackhi_pd(x, x))))
KFR_INTRIN_CONVERT(f64, i64, 2,