Use CMT_LIKELY where it's profitable - kfr - Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)

commit 2dc71ae8ac32aaec0b5e00fdb37d9a1c0e26db90
parent bd8ccf749266b671ed17b63d9113abf3599b7cde
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Mon, 18 Jul 2022 19:16:21 +0100

Use CMT_LIKELY where it's profitable

Diffstat:
M include/kfr/base/basic_expressions.hpp  | 12 ++++++------
M include/kfr/base/conversion.hpp  | 6 +++---
M include/kfr/base/expression.hpp  | 2 +-
M include/kfr/base/fraction.hpp  | 2 +-
M include/kfr/base/univector.hpp  | 8 ++++----
M include/kfr/cident.h  | 16 ++++++++--------
M include/kfr/dsp/fracdelay.hpp  | 2 +-
M include/kfr/dsp/iir_design.hpp  | 3 ++-
M include/kfr/dsp/special.hpp  | 2 +-
M include/kfr/dsp/units.hpp  | 2 +-
M include/kfr/math/gamma.hpp  | 2 +-
M include/kfr/simd/platform.hpp  | 2 +-

12 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp
@@ -348,7 +348,7 @@ public:
         std::size_t sindex =
             size_t(std::upper_bound(std::begin(self.segments), std::end(self.segments), index) - 1 -
                    std::begin(self.segments));
-        if (self.segments[sindex + 1] - index >= N)
+        if (CMT_LIKELY(self.segments[sindex + 1] - index >= N))
             return get_elements(self, cinput, index, sindex - 1, y);
         else
         {
@@ -487,11 +487,11 @@ struct expression_padded : expression_with_arguments<E>
     KFR_INTRINSIC friend vec<value_type, N> get_elements(const expression_padded& self, cinput_t cinput,
                                                          size_t index, vec_shape<value_type, N> y)
     {
-        if (index >= self.input_size)
+        if (CMT_UNLIKELY(index >= self.input_size))
         {
             return self.fill_value;
         }
-        else if (index + N <= self.input_size)
+        else if (CMT_LIKELY(index + N <= self.input_size))
         {
             return self.argument_first(cinput, index, y);
         }
@@ -500,7 +500,7 @@ struct expression_padded : expression_with_arguments<E>
             vec<value_type, N> x{};
             for (size_t i = 0; i < N; i++)
             {
-                if (index + i < self.input_size)
+                if (CMT_LIKELY(index + i < self.input_size))
                     x[i] = self.argument_first(cinput, index + i, vec_shape<value_type, 1>()).front();
                 else
                     x[i] = self.fill_value;
@@ -627,7 +627,7 @@ struct task_partition
     size_t count;
     size_t operator()(size_t index)
     {
-        if (index >= count)
+        if (CMT_UNLIKELY(index >= count))
             return 0;
         return process(output, input, index * chunk_size,
                        index == count - 1 ? size - (count - 1) * chunk_size : chunk_size);
@@ -677,7 +677,7 @@ struct concatenate_expression : expression_with_arguments<E1, E2>
         {
             return self.argument(cinput, csize<1>, index - size0, y);
         }
-        else if (index + N <= size0)
+        else if (CMT_LIKELY(index + N <= size0))
         {
             return self.argument(cinput, csize<0>, index, y);
         }
diff --git a/include/kfr/base/conversion.hpp b/include/kfr/base/conversion.hpp
@@ -202,7 +202,7 @@ void deinterleave(Tout* out[], const Tin* in, size_t channels, size_t size)
 template <typename Tout, univector_tag Tag1, univector_tag Tag2, typename Tin, univector_tag Tag3>
 void deinterleave(univector2d<Tout, Tag1, Tag2>& out, const univector<Tin, Tag3>& in)
 {
-    if (in.empty() || out.empty())
+    if (CMT_UNLIKELY(in.empty() || out.empty()))
         return;
     std::vector<Tout*> ptrs(out.size());
     for (size_t i = 0; i < out.size(); ++i)
@@ -228,7 +228,7 @@ void interleave(Tout* out, const Tin* in[], size_t channels, size_t size)
 template <typename Tout, univector_tag Tag1, typename Tin, univector_tag Tag2, univector_tag Tag3>
 void interleave(univector<Tout, Tag1>& out, const univector2d<Tin, Tag2, Tag3>& in)
 {
-    if (in.empty() || out.empty())
+    if (CMT_UNLIKELY(in.empty() || out.empty()))
         return;
     std::vector<const Tin*> ptrs(in.size());
     for (size_t i = 0; i < in.size(); ++i)
@@ -242,7 +242,7 @@ void interleave(univector<Tout, Tag1>& out, const univector2d<Tin, Tag2, Tag3>& 
 template <typename Tin, univector_tag Tag1, univector_tag Tag2>
 univector<Tin> interleave(const univector2d<Tin, Tag1, Tag2>& in)
 {
-    if (in.empty())
+    if (CMT_UNLIKELY(in.empty()))
         return {};
     univector<Tin> result(in.size() * in[0].size());
     interleave(result, in);
diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp
@@ -436,7 +436,7 @@ static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0,
     static_assert(is_input_expression<InputExpr>, "Fn must be an expression");
 
     size = size_sub(size_min(out.size(), in.size(), size_add(size, start)), start);
-    if (size == 0 || size == infinite_size)
+    if (CMT_UNLIKELY(size == 0 || size == infinite_size))
         return size;
     out.begin_block(coutput, size);
     in.begin_block(cinput, size);
diff --git a/include/kfr/base/fraction.hpp b/include/kfr/base/fraction.hpp
@@ -36,7 +36,7 @@ struct fraction
     fraction(i64 num = 0, i64 den = 1) : numerator(num), denominator(den) { normalize(); }
     void normalize()
     {
-        if (denominator < 0)
+        if (CMT_UNLIKELY(denominator < 0))
         {
             denominator = -denominator;
             numerator   = -numerator;
diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp
@@ -185,7 +185,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression
 
     void ringbuf_write(size_t& cursor, const T* src, size_t srcsize)
     {
-        if (srcsize == 0)
+        if (CMT_UNLIKELY(srcsize == 0))
             return;
         // skip redundant data
         const size_t size = get_size();
@@ -197,7 +197,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression
         }
         const size_t fsize = size - cursor;
         // one fragment
-        if (srcsize <= fsize)
+        if (CMT_LIKELY(srcsize <= fsize))
         {
             copy(data + cursor, src, srcsize);
         }
@@ -238,7 +238,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression
     }
     void ringbuf_read(size_t& cursor, T* dest, size_t destsize) const
     {
-        if (destsize == 0)
+        if (CMT_UNLIKELY(destsize == 0))
             return;
         // skip redundant data
         const size_t size = get_size();
@@ -250,7 +250,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression
         }
         const size_t fsize = size - cursor;
         // one fragment
-        if (destsize <= fsize)
+        if (CMT_LIKELY(destsize <= fsize))
         {
             copy(dest, data + cursor, destsize);
         }
diff --git a/include/kfr/cident.h b/include/kfr/cident.h
@@ -346,23 +346,23 @@ extern char* gets(char* __s);
 #if defined(CMT_GNU_ATTRIBUTES)
 
 #define CMT_NODEBUG
-// __attribute__((__nodebug__))
 
-// GCC 9 broke attributes on lambdas.
-#if defined(NDEBUG) && (!defined(__GNUC__) || __GNUC__ != 9)
 #define CMT_ALWAYS_INLINE __attribute__((__always_inline__))
-#else
-#define CMT_ALWAYS_INLINE
-#endif
 #define CMT_INLINE __inline__ CMT_ALWAYS_INLINE
 #define CMT_INLINE_MEMBER CMT_ALWAYS_INLINE
+#if defined(CMT_COMPILER_GCC) &&                                                                             \
+    (CMT_GCC_VERSION >= 900 && CMT_GCC_VERSION < 904 || CMT_GCC_VERSION >= 1000 && CMT_GCC_VERSION < 1002)
+// Workaround for GCC 9/10 bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90333
+#define CMT_INLINE_LAMBDA
+#else
 #define CMT_INLINE_LAMBDA CMT_INLINE_MEMBER
+#endif
 #define CMT_NOINLINE __attribute__((__noinline__))
 #define CMT_FLATTEN __attribute__((__flatten__))
 #define CMT_RESTRICT __restrict__
 
-#define CMT_LIKELY(...) __builtin_expect(__VA_ARGS__, 1)
-#define CMT_UNLIKELY(...) __builtin_expect(__VA_ARGS__, 0)
+#define CMT_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), 1)
+#define CMT_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), 0)
 
 #elif defined(CMT_MSVC_ATTRIBUTES)
 
diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp
@@ -36,7 +36,7 @@ inline namespace CMT_ARCH_NAME
 template <typename T, typename E1>
 KFR_INTRINSIC internal::expression_short_fir<2, T, value_type_of<E1>, E1> fracdelay(E1&& e1, T delay)
 {
-    if (delay < 0)
+    if (CMT_UNLIKELY(delay < 0))
         delay = 0;
     univector<T, 2> taps({ 1 - delay, delay });
     return internal::expression_short_fir<2, T, value_type_of<E1>, E1>(std::forward<E1>(e1), taps);
diff --git a/include/kfr/dsp/iir_design.hpp b/include/kfr/dsp/iir_design.hpp
@@ -897,7 +897,8 @@ template <typename T>
 KFR_FUNCTION univector<complex<T>> cplxreal(const univector<complex<T>>& list)
 {
     univector<complex<T>> x = list;
-    std::sort(x.begin(), x.end(), [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); });
+    std::sort(x.begin(), x.end(),
+              [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); });
     T tol                        = std::numeric_limits<T>::epsilon() * 100;
     univector<complex<T>> result = x;
     for (size_t i = result.size(); i > 1; i--)
diff --git a/include/kfr/dsp/special.hpp b/include/kfr/dsp/special.hpp
@@ -41,7 +41,7 @@ template <typename T = int>
 auto unitimpulse()
 {
     return lambda<T>([](cinput_t, size_t index, auto x) {
-        if (index == 0)
+        if (CMT_UNLIKELY(index == 0))
             return onoff(x);
         else
             return zerovector(x);
diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp
@@ -86,7 +86,7 @@ KFR_INTRINSIC Tout power_to_dB(const T& x)
 template <typename T, typename Tout = flt_type<T>>
 KFR_INTRINSIC Tout dB_to_power(const T& x)
 {
-    if (x == -c_infinity<Tout>)
+    if (CMT_UNLIKELY(x == -c_infinity<Tout>))
         return 0.0;
     else
         return exp(x * (c_log_10<Tout> / 10.0));
diff --git a/include/kfr/math/gamma.hpp b/include/kfr/math/gamma.hpp
@@ -87,7 +87,7 @@ constexpr inline uint64_t factorial_table[21] = {
 /// @brief Returns the factorial of an argument. Returns max(uint64_t) if does not fit to uint64_t
 constexpr uint64_t factorial(int n)
 {
-    if (n < 0 || n > 20)
+    if (CMT_LIKELY(n < 0 || n > 20))
         return std::numeric_limits<uint64_t>::max();
     return factorial_table[n];
 }
diff --git a/include/kfr/simd/platform.hpp b/include/kfr/simd/platform.hpp
@@ -103,7 +103,7 @@ CMT_UNUSED static const char* cpu_name(cpu_t set)
 #ifdef CMT_ARCH_ARM
     static const char* names[] = { "generic", "neon", "neon64" };
 #endif
-    if (set >= cpu_t::lowest && set <= cpu_t::highest)
+    if (CMT_LIKELY(set >= cpu_t::lowest && set <= cpu_t::highest))
         return names[static_cast<size_t>(set)];
     return "-";
 }

	kfr Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
	Log \| Files \| Refs \| README

M	include/kfr/base/basic_expressions.hpp	\|	12	++++++------
M	include/kfr/base/conversion.hpp	\|	6	+++---
M	include/kfr/base/expression.hpp	\|	2	+-
M	include/kfr/base/fraction.hpp	\|	2	+-
M	include/kfr/base/univector.hpp	\|	8	++++----
M	include/kfr/cident.h	\|	16	++++++++--------
M	include/kfr/dsp/fracdelay.hpp	\|	2	+-
M	include/kfr/dsp/iir_design.hpp	\|	3	++-
M	include/kfr/dsp/special.hpp	\|	2	+-
M	include/kfr/dsp/units.hpp	\|	2	+-
M	include/kfr/math/gamma.hpp	\|	2	+-
M	include/kfr/simd/platform.hpp	\|	2	+-