kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 2dc71ae8ac32aaec0b5e00fdb37d9a1c0e26db90
parent bd8ccf749266b671ed17b63d9113abf3599b7cde
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Mon, 18 Jul 2022 19:16:21 +0100

Use CMT_LIKELY where it's profitable

Diffstat:
Minclude/kfr/base/basic_expressions.hpp | 12++++++------
Minclude/kfr/base/conversion.hpp | 6+++---
Minclude/kfr/base/expression.hpp | 2+-
Minclude/kfr/base/fraction.hpp | 2+-
Minclude/kfr/base/univector.hpp | 8++++----
Minclude/kfr/cident.h | 16++++++++--------
Minclude/kfr/dsp/fracdelay.hpp | 2+-
Minclude/kfr/dsp/iir_design.hpp | 3++-
Minclude/kfr/dsp/special.hpp | 2+-
Minclude/kfr/dsp/units.hpp | 2+-
Minclude/kfr/math/gamma.hpp | 2+-
Minclude/kfr/simd/platform.hpp | 2+-
12 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp @@ -348,7 +348,7 @@ public: std::size_t sindex = size_t(std::upper_bound(std::begin(self.segments), std::end(self.segments), index) - 1 - std::begin(self.segments)); - if (self.segments[sindex + 1] - index >= N) + if (CMT_LIKELY(self.segments[sindex + 1] - index >= N)) return get_elements(self, cinput, index, sindex - 1, y); else { @@ -487,11 +487,11 @@ struct expression_padded : expression_with_arguments<E> KFR_INTRINSIC friend vec<value_type, N> get_elements(const expression_padded& self, cinput_t cinput, size_t index, vec_shape<value_type, N> y) { - if (index >= self.input_size) + if (CMT_UNLIKELY(index >= self.input_size)) { return self.fill_value; } - else if (index + N <= self.input_size) + else if (CMT_LIKELY(index + N <= self.input_size)) { return self.argument_first(cinput, index, y); } @@ -500,7 +500,7 @@ struct expression_padded : expression_with_arguments<E> vec<value_type, N> x{}; for (size_t i = 0; i < N; i++) { - if (index + i < self.input_size) + if (CMT_LIKELY(index + i < self.input_size)) x[i] = self.argument_first(cinput, index + i, vec_shape<value_type, 1>()).front(); else x[i] = self.fill_value; @@ -627,7 +627,7 @@ struct task_partition size_t count; size_t operator()(size_t index) { - if (index >= count) + if (CMT_UNLIKELY(index >= count)) return 0; return process(output, input, index * chunk_size, index == count - 1 ? size - (count - 1) * chunk_size : chunk_size); @@ -677,7 +677,7 @@ struct concatenate_expression : expression_with_arguments<E1, E2> { return self.argument(cinput, csize<1>, index - size0, y); } - else if (index + N <= size0) + else if (CMT_LIKELY(index + N <= size0)) { return self.argument(cinput, csize<0>, index, y); } diff --git a/include/kfr/base/conversion.hpp b/include/kfr/base/conversion.hpp @@ -202,7 +202,7 @@ void deinterleave(Tout* out[], const Tin* in, size_t channels, size_t size) template <typename Tout, univector_tag Tag1, univector_tag Tag2, typename Tin, univector_tag Tag3> void deinterleave(univector2d<Tout, Tag1, Tag2>& out, const univector<Tin, Tag3>& in) { - if (in.empty() || out.empty()) + if (CMT_UNLIKELY(in.empty() || out.empty())) return; std::vector<Tout*> ptrs(out.size()); for (size_t i = 0; i < out.size(); ++i) @@ -228,7 +228,7 @@ void interleave(Tout* out, const Tin* in[], size_t channels, size_t size) template <typename Tout, univector_tag Tag1, typename Tin, univector_tag Tag2, univector_tag Tag3> void interleave(univector<Tout, Tag1>& out, const univector2d<Tin, Tag2, Tag3>& in) { - if (in.empty() || out.empty()) + if (CMT_UNLIKELY(in.empty() || out.empty())) return; std::vector<const Tin*> ptrs(in.size()); for (size_t i = 0; i < in.size(); ++i) @@ -242,7 +242,7 @@ void interleave(univector<Tout, Tag1>& out, const univector2d<Tin, Tag2, Tag3>& template <typename Tin, univector_tag Tag1, univector_tag Tag2> univector<Tin> interleave(const univector2d<Tin, Tag1, Tag2>& in) { - if (in.empty()) + if (CMT_UNLIKELY(in.empty())) return {}; univector<Tin> result(in.size() * in[0].size()); interleave(result, in); diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -436,7 +436,7 @@ static size_t process(OutputExpr&& out, const InputExpr& in, size_t start = 0, static_assert(is_input_expression<InputExpr>, "Fn must be an expression"); size = size_sub(size_min(out.size(), in.size(), size_add(size, start)), start); - if (size == 0 || size == infinite_size) + if (CMT_UNLIKELY(size == 0 || size == infinite_size)) return size; out.begin_block(coutput, size); in.begin_block(cinput, size); diff --git a/include/kfr/base/fraction.hpp b/include/kfr/base/fraction.hpp @@ -36,7 +36,7 @@ struct fraction fraction(i64 num = 0, i64 den = 1) : numerator(num), denominator(den) { normalize(); } void normalize() { - if (denominator < 0) + if (CMT_UNLIKELY(denominator < 0)) { denominator = -denominator; numerator = -numerator; diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp @@ -185,7 +185,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression void ringbuf_write(size_t& cursor, const T* src, size_t srcsize) { - if (srcsize == 0) + if (CMT_UNLIKELY(srcsize == 0)) return; // skip redundant data const size_t size = get_size(); @@ -197,7 +197,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression } const size_t fsize = size - cursor; // one fragment - if (srcsize <= fsize) + if (CMT_LIKELY(srcsize <= fsize)) { copy(data + cursor, src, srcsize); } @@ -238,7 +238,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression } void ringbuf_read(size_t& cursor, T* dest, size_t destsize) const { - if (destsize == 0) + if (CMT_UNLIKELY(destsize == 0)) return; // skip redundant data const size_t size = get_size(); @@ -250,7 +250,7 @@ struct univector_base<T, Class, true> : input_expression, output_expression } const size_t fsize = size - cursor; // one fragment - if (destsize <= fsize) + if (CMT_LIKELY(destsize <= fsize)) { copy(dest, data + cursor, destsize); } diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -346,23 +346,23 @@ extern char* gets(char* __s); #if defined(CMT_GNU_ATTRIBUTES) #define CMT_NODEBUG -// __attribute__((__nodebug__)) -// GCC 9 broke attributes on lambdas. -#if defined(NDEBUG) && (!defined(__GNUC__) || __GNUC__ != 9) #define CMT_ALWAYS_INLINE __attribute__((__always_inline__)) -#else -#define CMT_ALWAYS_INLINE -#endif #define CMT_INLINE __inline__ CMT_ALWAYS_INLINE #define CMT_INLINE_MEMBER CMT_ALWAYS_INLINE +#if defined(CMT_COMPILER_GCC) && \ + (CMT_GCC_VERSION >= 900 && CMT_GCC_VERSION < 904 || CMT_GCC_VERSION >= 1000 && CMT_GCC_VERSION < 1002) +// Workaround for GCC 9/10 bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90333 +#define CMT_INLINE_LAMBDA +#else #define CMT_INLINE_LAMBDA CMT_INLINE_MEMBER +#endif #define CMT_NOINLINE __attribute__((__noinline__)) #define CMT_FLATTEN __attribute__((__flatten__)) #define CMT_RESTRICT __restrict__ -#define CMT_LIKELY(...) __builtin_expect(__VA_ARGS__, 1) -#define CMT_UNLIKELY(...) __builtin_expect(__VA_ARGS__, 0) +#define CMT_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), 1) +#define CMT_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), 0) #elif defined(CMT_MSVC_ATTRIBUTES) diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp @@ -36,7 +36,7 @@ inline namespace CMT_ARCH_NAME template <typename T, typename E1> KFR_INTRINSIC internal::expression_short_fir<2, T, value_type_of<E1>, E1> fracdelay(E1&& e1, T delay) { - if (delay < 0) + if (CMT_UNLIKELY(delay < 0)) delay = 0; univector<T, 2> taps({ 1 - delay, delay }); return internal::expression_short_fir<2, T, value_type_of<E1>, E1>(std::forward<E1>(e1), taps); diff --git a/include/kfr/dsp/iir_design.hpp b/include/kfr/dsp/iir_design.hpp @@ -897,7 +897,8 @@ template <typename T> KFR_FUNCTION univector<complex<T>> cplxreal(const univector<complex<T>>& list) { univector<complex<T>> x = list; - std::sort(x.begin(), x.end(), [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); }); + std::sort(x.begin(), x.end(), + [](const complex<T>& a, const complex<T>& b) { return a.real() < b.real(); }); T tol = std::numeric_limits<T>::epsilon() * 100; univector<complex<T>> result = x; for (size_t i = result.size(); i > 1; i--) diff --git a/include/kfr/dsp/special.hpp b/include/kfr/dsp/special.hpp @@ -41,7 +41,7 @@ template <typename T = int> auto unitimpulse() { return lambda<T>([](cinput_t, size_t index, auto x) { - if (index == 0) + if (CMT_UNLIKELY(index == 0)) return onoff(x); else return zerovector(x); diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp @@ -86,7 +86,7 @@ KFR_INTRINSIC Tout power_to_dB(const T& x) template <typename T, typename Tout = flt_type<T>> KFR_INTRINSIC Tout dB_to_power(const T& x) { - if (x == -c_infinity<Tout>) + if (CMT_UNLIKELY(x == -c_infinity<Tout>)) return 0.0; else return exp(x * (c_log_10<Tout> / 10.0)); diff --git a/include/kfr/math/gamma.hpp b/include/kfr/math/gamma.hpp @@ -87,7 +87,7 @@ constexpr inline uint64_t factorial_table[21] = { /// @brief Returns the factorial of an argument. Returns max(uint64_t) if does not fit to uint64_t constexpr uint64_t factorial(int n) { - if (n < 0 || n > 20) + if (CMT_LIKELY(n < 0 || n > 20)) return std::numeric_limits<uint64_t>::max(); return factorial_table[n]; } diff --git a/include/kfr/simd/platform.hpp b/include/kfr/simd/platform.hpp @@ -103,7 +103,7 @@ CMT_UNUSED static const char* cpu_name(cpu_t set) #ifdef CMT_ARCH_ARM static const char* names[] = { "generic", "neon", "neon64" }; #endif - if (set >= cpu_t::lowest && set <= cpu_t::highest) + if (CMT_LIKELY(set >= cpu_t::lowest && set <= cpu_t::highest)) return names[static_cast<size_t>(set)]; return "-"; }