saturation.cpp (4340B)
1 /** 2 * KFR (https://www.kfrlib.com) 3 * Copyright (C) 2016-2023 Dan Cazarin 4 * See LICENSE.txt for details 5 */ 6 7 #include <kfr/simd/saturation.hpp> 8 9 namespace kfr 10 { 11 inline namespace CMT_ARCH_NAME 12 { 13 14 template <typename T> 15 bool builtin_add_overflow(T x, T y, T* r) 16 { 17 #if CMT_HAS_BUILTIN(__builtin_add_overflow) || defined CMT_COMPILER_GCC 18 return __builtin_add_overflow(x, y, r); 19 #else 20 *r = x + y; 21 return static_cast<long long>(x) + static_cast<long long>(y) != static_cast<long long>(*r); 22 #endif 23 } 24 template <> 25 bool builtin_add_overflow<u64>(u64 x, u64 y, u64* r) 26 { 27 #if CMT_HAS_BUILTIN(__builtin_uaddll_overflow) || defined CMT_COMPILER_GCC 28 return __builtin_uaddll_overflow(x, y, reinterpret_cast<unsigned long long*>(r)); 29 #else 30 *r = x + y; 31 return x > 0xFFFFFFFFFFFFFFFFull - y; 32 #endif 33 } 34 template <> 35 bool builtin_add_overflow<i64>(i64 x, i64 y, i64* r) 36 { 37 #if CMT_HAS_BUILTIN(__builtin_saddll_overflow) || defined CMT_COMPILER_GCC 38 return __builtin_saddll_overflow(x, y, reinterpret_cast<long long*>(r)); 39 #else 40 *r = x + y; 41 return !((x ^ y) & 0x8000000000000000ull) && ((*r ^ x) & 0x8000000000000000ull); 42 #endif 43 } 44 template <typename T> 45 bool builtin_sub_overflow(T x, T y, T* r) 46 { 47 #if CMT_HAS_BUILTIN(__builtin_sub_overflow) || defined CMT_COMPILER_GCC 48 return __builtin_sub_overflow(x, y, r); 49 #else 50 *r = x - y; 51 return static_cast<long long>(x) - static_cast<long long>(y) != static_cast<long long>(*r); 52 #endif 53 } 54 template <> 55 bool builtin_sub_overflow<u64>(u64 x, u64 y, u64* r) 56 { 57 #if CMT_HAS_BUILTIN(__builtin_usubll_overflow) || defined CMT_COMPILER_GCC 58 return __builtin_usubll_overflow(x, y, reinterpret_cast<unsigned long long*>(r)); 59 #else 60 *r = x - y; 61 return x < y; 62 #endif 63 } 64 template <> 65 bool builtin_sub_overflow<i64>(i64 x, i64 y, i64* r) 66 { 67 #if CMT_HAS_BUILTIN(__builtin_ssubll_overflow) || defined CMT_COMPILER_GCC 68 return __builtin_ssubll_overflow(x, y, reinterpret_cast<long long*>(r)); 69 #else 70 *r = x - y; 71 return ((x ^ y) & 0x8000000000000000ull) && ((*r ^ x) & 0x8000000000000000ull); 72 #endif 73 } 74 template <typename T> 75 inline T ref_satadd(T x, T y) 76 { 77 T result; 78 if (builtin_add_overflow(x, y, &result)) 79 return x < 0 ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); 80 else 81 return result; 82 } 83 84 template <typename T> 85 inline T ref_satsub(T x, T y) 86 { 87 T result; 88 if (builtin_sub_overflow(x, y, &result)) 89 return x < y ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); 90 else 91 return result; 92 } 93 94 TEST(intrin_satadd_satsub) 95 { 96 testo::matrix(named("type") = cconcat(signed_vector_types<vec>, unsigned_vector_types<vec>), 97 [](auto type) 98 { 99 using T = typename decltype(type)::type; 100 using Tsub = subtype<T>; 101 const T min = std::numeric_limits<Tsub>::min(); 102 const T max = std::numeric_limits<Tsub>::max(); 103 CHECK(kfr::satadd(min, min) == 104 apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, min, min)); 105 CHECK(kfr::satadd(max, max) == 106 apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, max, max)); 107 CHECK(kfr::satadd(min, max) == 108 apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, min, max)); 109 CHECK(kfr::satadd(max, min) == 110 apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, max, min)); 111 112 CHECK(kfr::satsub(min, min) == 113 apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, min, min)); 114 CHECK(kfr::satsub(max, max) == 115 apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, max, max)); 116 CHECK(kfr::satsub(min, max) == 117 apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, min, max)); 118 CHECK(kfr::satsub(max, min) == 119 apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, max, min)); 120 }); 121 } 122 } // namespace CMT_ARCH_NAME 123 } // namespace kfr