kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

saturation.cpp (4340B)


      1 /**
      2  * KFR (https://www.kfrlib.com)
      3  * Copyright (C) 2016-2023 Dan Cazarin
      4  * See LICENSE.txt for details
      5  */
      6 
      7 #include <kfr/simd/saturation.hpp>
      8 
      9 namespace kfr
     10 {
     11 inline namespace CMT_ARCH_NAME
     12 {
     13 
     14 template <typename T>
     15 bool builtin_add_overflow(T x, T y, T* r)
     16 {
     17 #if CMT_HAS_BUILTIN(__builtin_add_overflow) || defined CMT_COMPILER_GCC
     18     return __builtin_add_overflow(x, y, r);
     19 #else
     20     *r = x + y;
     21     return static_cast<long long>(x) + static_cast<long long>(y) != static_cast<long long>(*r);
     22 #endif
     23 }
     24 template <>
     25 bool builtin_add_overflow<u64>(u64 x, u64 y, u64* r)
     26 {
     27 #if CMT_HAS_BUILTIN(__builtin_uaddll_overflow) || defined CMT_COMPILER_GCC
     28     return __builtin_uaddll_overflow(x, y, reinterpret_cast<unsigned long long*>(r));
     29 #else
     30     *r = x + y;
     31     return x > 0xFFFFFFFFFFFFFFFFull - y;
     32 #endif
     33 }
     34 template <>
     35 bool builtin_add_overflow<i64>(i64 x, i64 y, i64* r)
     36 {
     37 #if CMT_HAS_BUILTIN(__builtin_saddll_overflow) || defined CMT_COMPILER_GCC
     38     return __builtin_saddll_overflow(x, y, reinterpret_cast<long long*>(r));
     39 #else
     40     *r = x + y;
     41     return !((x ^ y) & 0x8000000000000000ull) && ((*r ^ x) & 0x8000000000000000ull);
     42 #endif
     43 }
     44 template <typename T>
     45 bool builtin_sub_overflow(T x, T y, T* r)
     46 {
     47 #if CMT_HAS_BUILTIN(__builtin_sub_overflow) || defined CMT_COMPILER_GCC
     48     return __builtin_sub_overflow(x, y, r);
     49 #else
     50     *r = x - y;
     51     return static_cast<long long>(x) - static_cast<long long>(y) != static_cast<long long>(*r);
     52 #endif
     53 }
     54 template <>
     55 bool builtin_sub_overflow<u64>(u64 x, u64 y, u64* r)
     56 {
     57 #if CMT_HAS_BUILTIN(__builtin_usubll_overflow) || defined CMT_COMPILER_GCC
     58     return __builtin_usubll_overflow(x, y, reinterpret_cast<unsigned long long*>(r));
     59 #else
     60     *r = x - y;
     61     return x < y;
     62 #endif
     63 }
     64 template <>
     65 bool builtin_sub_overflow<i64>(i64 x, i64 y, i64* r)
     66 {
     67 #if CMT_HAS_BUILTIN(__builtin_ssubll_overflow) || defined CMT_COMPILER_GCC
     68     return __builtin_ssubll_overflow(x, y, reinterpret_cast<long long*>(r));
     69 #else
     70     *r = x - y;
     71     return ((x ^ y) & 0x8000000000000000ull) && ((*r ^ x) & 0x8000000000000000ull);
     72 #endif
     73 }
     74 template <typename T>
     75 inline T ref_satadd(T x, T y)
     76 {
     77     T result;
     78     if (builtin_add_overflow(x, y, &result))
     79         return x < 0 ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
     80     else
     81         return result;
     82 }
     83 
     84 template <typename T>
     85 inline T ref_satsub(T x, T y)
     86 {
     87     T result;
     88     if (builtin_sub_overflow(x, y, &result))
     89         return x < y ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
     90     else
     91         return result;
     92 }
     93 
     94 TEST(intrin_satadd_satsub)
     95 {
     96     testo::matrix(named("type") = cconcat(signed_vector_types<vec>, unsigned_vector_types<vec>),
     97                   [](auto type)
     98                   {
     99                       using T     = typename decltype(type)::type;
    100                       using Tsub  = subtype<T>;
    101                       const T min = std::numeric_limits<Tsub>::min();
    102                       const T max = std::numeric_limits<Tsub>::max();
    103                       CHECK(kfr::satadd(min, min) ==
    104                             apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, min, min));
    105                       CHECK(kfr::satadd(max, max) ==
    106                             apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, max, max));
    107                       CHECK(kfr::satadd(min, max) ==
    108                             apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, min, max));
    109                       CHECK(kfr::satadd(max, min) ==
    110                             apply([](auto x, auto y) -> decltype(x) { return ref_satadd(x, y); }, max, min));
    111 
    112                       CHECK(kfr::satsub(min, min) ==
    113                             apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, min, min));
    114                       CHECK(kfr::satsub(max, max) ==
    115                             apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, max, max));
    116                       CHECK(kfr::satsub(min, max) ==
    117                             apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, min, max));
    118                       CHECK(kfr::satsub(max, min) ==
    119                             apply([](auto x, auto y) -> decltype(x) { return ref_satsub(x, y); }, max, min));
    120                   });
    121 }
    122 } // namespace CMT_ARCH_NAME
    123 } // namespace kfr