commit 87de5cb2381c9afb9b6ccef1ba9a16954d4838f2
parent 38e819b1d0de99641e0fdbddafb776df165a2b21
Author: jatinchowdhury18 <jatinchowdhury18@gmail.com>
Date: Wed, 29 Jun 2022 07:23:05 +0100
Small math optimizations for hysteresis processing (#279)
Diffstat:
2 files changed, 45 insertions(+), 23 deletions(-)
diff --git a/Plugin/Source/Headless/UnitTests/HysteresisOpsTest.cpp b/Plugin/Source/Headless/UnitTests/HysteresisOpsTest.cpp
@@ -20,16 +20,26 @@ public:
auto near_zero = x_val < 0.001 && x_val > -0.001;
auto near_zero_vec = ((Vec2) x_val < 0.001) && ((Vec2) x_val > -0.001);
+ using namespace HysteresisOps;
+ HysteresisState hp;
+ hp.Q = (Vec2) x_val;
+ hp.oneOverQ = 1.0 / hp.Q;
+ hp.oneOverQSq = hp.oneOverQ * hp.oneOverQ;
+ hp.oneOverQCubed = hp.oneOverQ * hp.oneOverQSq;
+ hp.coth = (Vec2) coth;
+ hp.cothSq = hp.coth * hp.coth;
+ hp.nearZero = near_zero_vec;
+
auto double_val = ! near_zero ? (coth) - (1.0 / x_val) : x_val / 3.0;
- auto vec_val = HysteresisOps::langevin ((Vec2) x_val, (Vec2) coth, near_zero_vec).get (0);
+ auto vec_val = HysteresisOps::langevin<Vec2> (hp).get (0);
expectWithinAbsoluteError (double_val, vec_val, 1.0e-12);
double_val = ! near_zero ? (1.0 / (x_val * x_val)) - (coth * coth) + 1.0 : (1.0 / 3.0);
- vec_val = HysteresisOps::langevinD ((Vec2) x_val, (Vec2) coth, near_zero_vec).get (0);
+ vec_val = HysteresisOps::langevinD<Vec2> (hp).get (0);
expectWithinAbsoluteError (double_val, vec_val, 1.0e-12);
double_val = ! near_zero ? 2.0 * coth * (coth * coth - 1.0) - (2.0 / (x_val * x_val * x_val)) : (-2.0 / 15.0) * x_val;
- vec_val = HysteresisOps::langevinD2 ((Vec2) x_val, (Vec2) coth, near_zero_vec).get (0);
+ vec_val = HysteresisOps::langevinD2<Vec2> (hp).get (0);
expectWithinAbsoluteError (double_val, vec_val, 1.0e-12);
}
#endif
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisOps.h b/Plugin/Source/Processors/Hysteresis/HysteresisOps.h
@@ -32,10 +32,12 @@ struct HysteresisState
xsimd::batch<double> Q, M_diff, L_prime, kap1, f1Denom, f1, f2, f3;
xsimd::batch<double> coth = 0.0;
xsimd::batch_bool<double> nearZero;
+ xsimd::batch<double> oneOverQ, oneOverQSq, oneOverQCubed, cothSq, oneOverF3, oneOverF1Denom;
#else
double Q, M_diff, L_prime, kap1, f1Denom, f1, f2, f3;
double coth = 0.0;
bool nearZero = false;
+ double oneOverQ, oneOverQSq, oneOverQCubed, cothSq, oneOverF3, oneOverF1Denom;
#endif
};
@@ -48,37 +50,39 @@ constexpr inline int sign (double x)
}
/** Langevin function */
-template <typename Float, typename Bool>
-static inline Float langevin (Float x, Float coth, Bool nearZero) noexcept
+template <typename Float>
+static inline Float langevin (const HysteresisState& hp) noexcept
{
#if HYSTERESIS_USE_SIMD
- return xsimd::select (nearZero, x / 3.0, coth - ((Float) 1.0 / x));
+ return xsimd::select (hp.nearZero, hp.Q * ONE_THIRD, hp.coth - hp.oneOverQ);
#else
- return ! nearZero ? (coth) - (1.0 / x) : x / 3.0;
+ return ! hp.nearZero ? (hp.coth) - (hp.oneOverQ) : hp.Q * ONE_THIRD;
#endif
}
/** Derivative of Langevin function */
-template <typename Float, typename Bool>
-static inline Float langevinD (Float x, Float coth, Bool nearZero) noexcept
+template <typename Float>
+static inline Float langevinD (const HysteresisState& hp) noexcept
{
#if HYSTERESIS_USE_SIMD
- return xsimd::select (nearZero, (Float) ONE_THIRD, ((Float) 1.0 / (x * x)) - (coth * coth) + 1.0);
+ return xsimd::select (hp.nearZero, (Float) ONE_THIRD, hp.oneOverQSq - hp.cothSq + (Float) 1.0);
#else
- return ! nearZero ? (1.0 / (x * x)) - (coth * coth) + 1.0 : ONE_THIRD;
+ return ! hp.nearZero ? hp.oneOverQSq - hp.cothSq + 1.0 : ONE_THIRD;
#endif
}
/** 2nd derivative of Langevin function */
-template <typename Float, typename Bool>
-static inline Float langevinD2 (Float x, Float coth, Bool nearZero) noexcept
+template <typename Float>
+static inline Float langevinD2 (const HysteresisState& hp) noexcept
{
#if HYSTERESIS_USE_SIMD
- return xsimd::select (nearZero, x * NEG_TWO_OVER_15, (Float) 2.0 * coth * (coth * coth - 1.0) - ((Float) 2.0 / (x * x * x)));
+ return xsimd::select (hp.nearZero,
+ NEG_TWO_OVER_15 * hp.Q,
+ (Float) 2.0 * hp.coth * (hp.cothSq - (Float) 1.0) - ((Float) 2.0 * hp.oneOverQCubed));
#else
- return ! nearZero
- ? 2.0 * coth * (coth * coth - 1.0) - (2.0 / (x * x * x))
- : NEG_TWO_OVER_15 * x;
+ return ! hp.nearZero
+ ? 2.0 * hp.coth * (hp.cothSq - 1.0) - (2.0 * hp.oneOverQCubed)
+ : NEG_TWO_OVER_15 * hp.Q;
#endif
}
@@ -95,6 +99,9 @@ template <typename Float>
static inline Float hysteresisFunc (Float M, Float H, Float H_d, HysteresisState& hp) noexcept
{
hp.Q = (H + M * HysteresisState::alpha) * (1.0 / hp.a);
+ hp.oneOverQ = (Float) 1.0 / hp.Q;
+ hp.oneOverQSq = hp.oneOverQ * hp.oneOverQ;
+ hp.oneOverQCubed = hp.oneOverQ * hp.oneOverQSq;
#if HYSTERESIS_USE_SIMD
hp.coth = (Float) 1.0 / xsimd::tanh (hp.Q);
@@ -104,7 +111,8 @@ static inline Float hysteresisFunc (Float M, Float H, Float H_d, HysteresisState
hp.nearZero = hp.Q < 0.001 && hp.Q > -0.001;
#endif
- hp.M_diff = langevin (hp.Q, hp.coth, hp.nearZero) * hp.M_s - M;
+ hp.cothSq = hp.coth * hp.coth;
+ hp.M_diff = langevin<Float> (hp) * hp.M_s - M;
#if HYSTERESIS_USE_SIMD
const auto delta = xsimd::select (H_d >= 0.0, (Float) 1, (Float) -1);
@@ -116,28 +124,32 @@ static inline Float hysteresisFunc (Float M, Float H, Float H_d, HysteresisState
hp.kap1 = (Float) hp.nc * delta_M;
#endif
- hp.L_prime = langevinD (hp.Q, hp.coth, hp.nearZero);
+ hp.L_prime = langevinD<Float> (hp);
hp.f1Denom = ((Float) hp.nc * delta) * hp.k - (Float) HysteresisState::alpha * hp.M_diff;
+ hp.oneOverF1Denom = (Float) 1.0 / hp.f1Denom;
hp.f1 = hp.kap1 * hp.M_diff / hp.f1Denom;
hp.f2 = hp.L_prime * hp.M_s_oa_tc;
hp.f3 = (Float) 1.0 - (hp.L_prime * hp.M_s_oa_tc_talpha);
+ hp.oneOverF3 = (Float) 1.0 / hp.f3;
- return H_d * (hp.f1 + hp.f2) / hp.f3;
+ return H_d * (hp.f1 + hp.f2) * hp.oneOverF3;
}
// derivative of hysteresis func w.r.t M (depends on cached values from computing hysteresisFunc)
template <typename Float>
static inline Float hysteresisFuncPrime (Float H_d, Float dMdt, HysteresisState& hp) noexcept
{
- const Float L_prime2 = langevinD2 (hp.Q, hp.coth, hp.nearZero);
+ const Float L_prime2 = langevinD2<Float> (hp);
const Float M_diff2 = hp.L_prime * hp.M_s_oa_talpha - 1.0;
- const Float f1_p = hp.kap1 * ((M_diff2 / hp.f1Denom) + hp.M_diff * HysteresisState::alpha * M_diff2 / (hp.f1Denom * hp.f1Denom));
+ Float f1_p = (M_diff2 * hp.oneOverF1Denom);
+ f1_p += hp.M_diff * HysteresisState::alpha * M_diff2 * (hp.oneOverF1Denom * hp.oneOverF1Denom);
+ f1_p *= hp.kap1;
const Float f2_p = L_prime2 * hp.M_s_oaSq_tc_talpha;
const Float f3_p = L_prime2 * (-hp.M_s_oaSq_tc_talphaSq);
- return H_d * (f1_p + f2_p) / hp.f3 - dMdt * f3_p / hp.f3;
+ return (H_d * (f1_p + f2_p) - dMdt * f3_p) * hp.oneOverF3;
}
} // namespace HysteresisOps