Small tracker scope optimization + code cleanup - ft2-clone

commit c76bf4cb46d92930413d34ee8b47dc2d0c2b856c
parent 22a8376772b6e264828cbde7d478483288fa867d
Author: Olav Sørensen <olav.sorensen@live.no>
Date:   Sat, 28 Dec 2024 17:03:26 +0100

Small tracker scope optimization + code cleanup

Diffstat:
M src/ft2_audio.c  | 66 ++++++++++++++++++++++--------------------------------------------
M src/ft2_audio.h  | 3 +--
M src/ft2_header.h  | 2 +-
M src/ft2_replayer.c  | 3 +--
M src/ft2_replayer.h  | 5 ++---
M src/ft2_sample_ed.c  | 2 +-
M src/scopes/ft2_scope_macros.h  | 295 ++++++++++++++++++++++++++++++++++++++++++-------------------------------------
M src/scopes/ft2_scopedraw.c  | 144 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M src/scopes/ft2_scopedraw.h  | 1 -
M src/scopes/ft2_scopes.c  | 31 +++++++++++++++----------------
M src/scopes/ft2_scopes.h  | 16 +++++++++++-----

11 files changed, 295 insertions(+), 273 deletions(-)
diff --git a/src/ft2_audio.c b/src/ft2_audio.c
@@ -37,17 +37,6 @@ chSync_t chSync;
 pattSync_t pattSync;
 volatile bool pattQueueClearing, chQueueClearing;
 
-void resetCachedMixerVars(void)
-{
-	channel_t *ch = channel;
-	for (int32_t i = 0; i < MAX_CHANNELS; i++, ch++)
-		ch->oldFinalPeriod = -1;
-
-	voice_t *v = voice;
-	for (int32_t i = 0; i < MAX_CHANNELS*2; i++, v++)
-		v->oldDelta = 0;
-}
-
 void stopVoice(int32_t i)
 {
 	voice_t *v;
@@ -332,7 +321,7 @@ static void voiceTrigger(int32_t ch, sample_t *s, int32_t position)
 		v->leftEdgeTaps8 = s->leftEdgeTapSamples8 + MAX_LEFT_TAPS;
 	}
 
-	v->hasLooped = false; // for sinc interpolation special case
+	v->hasLooped = false; // for cubic/sinc interpolation special case
 	v->samplingBackwards = false;
 	v->loopType = loopType;
 	v->sampleEnd = (loopType == LOOP_OFF) ? length : loopEnd;
@@ -380,8 +369,8 @@ void updateVoices(void)
 		{
 			v->fVolume = ch->fFinalVol;
 
-			// scale volume for scopes (0..128)
-			const int32_t scopeVolume = (int32_t)((ch->fFinalVol * 128.0f) + 0.5f); // rounded
+			// set scope volume (scaled)
+			const int32_t scopeVolume = (int32_t)((ch->fFinalVol * (SCOPE_HEIGHT*(1<<2))) + 0.5f); // rounded
 			v->scopeVolume = (uint8_t)scopeVolume;
 		}
 
@@ -393,32 +382,25 @@ void updateVoices(void)
 
 		if (status & IS_Period)
 		{
-			// use cached values when possible
-			if (ch->finalPeriod != ch->oldFinalPeriod)
+			const double dVoiceHz = dPeriod2Hz(ch->finalPeriod);
+
+			// set voice delta
+			v->delta = (int64_t)((dVoiceHz * audio.dHz2MixDeltaMul) + 0.5); // Hz -> fixed-point delta (rounded)
+
+			// set scope delta
+			const double dHz2ScopeDeltaMul = SCOPE_FRAC_SCALE / (double)SCOPE_HZ;
+			v->scopeDelta = (int64_t)((dVoiceHz * dHz2ScopeDeltaMul) + 0.5); // Hz -> fixed-point delta (rounded)
+
+			if (audio.sincInterpolation)
 			{
-				ch->oldFinalPeriod = ch->finalPeriod;
-
-				const double dHz = dPeriod2Hz(ch->finalPeriod);
-
-				// set voice delta
-				const uint64_t delta = v->oldDelta = (int64_t)((dHz * audio.dHz2MixDeltaMul) + 0.5); // Hz -> fixed-point delta (rounded)
-
-				if (audio.sincInterpolation) // decide which sinc LUT to use according to the resampling ratio
-				{
-					if (delta <= sincDownsample1Ratio)
-						v->fSincLUT = fKaiserSinc;
-					else if (delta <= sincDownsample2Ratio)
-						v->fSincLUT = fDownSample1;
-					else
-						v->fSincLUT = fDownSample2;
-				}
-
-				// set scope delta
-				const double dHz2ScopeDeltaMul = SCOPE_FRAC_SCALE / (double)SCOPE_HZ;
-				v->scopeDelta = (int64_t)((dHz * dHz2ScopeDeltaMul) + 0.5); // Hz -> fixed-point delta (rounded)
+				// decide which sinc LUT to use according to the resampling ratio
+				if (v->delta <= sincDownsample1Ratio)
+					v->fSincLUT = fKaiserSinc;
+				else if (v->delta <= sincDownsample2Ratio)
+					v->fSincLUT = fDownSample1;
+				else
+					v->fSincLUT = fDownSample2;
 			}
-
-			v->delta = v->oldDelta;
 		}
 
 		if (status & IS_Trigger)
@@ -431,8 +413,6 @@ static void sendSamples16BitStereo(void *stream, uint32_t sampleBlockLength)
 	int16_t *streamPtr16 = (int16_t *)stream;
 	for (uint32_t i = 0; i < sampleBlockLength; i++)
 	{
-		// TODO: This could use dithering (a proper implementation, that is...)
-
 		int32_t L = (int32_t)(audio.fMixBufferL[i] * fAudioNormalizeMul);
 		int32_t R = (int32_t)(audio.fMixBufferR[i] * fAudioNormalizeMul);
 
@@ -443,8 +423,7 @@ static void sendSamples16BitStereo(void *stream, uint32_t sampleBlockLength)
 		*streamPtr16++ = (int16_t)R;
 
 		// clear what we read from the mixing buffer
-		audio.fMixBufferL[i] = 0.0f;
-		audio.fMixBufferR[i] = 0.0f;
+		audio.fMixBufferL[i] = audio.fMixBufferR[i] = 0.0f;
 	}
 }
 
@@ -460,8 +439,7 @@ static void sendSamples32BitFloatStereo(void *stream, uint32_t sampleBlockLength
 		*fStreamPtr32++ = CLAMP(fR, -1.0f, 1.0f);
 
 		// clear what we read from the mixing buffer
-		audio.fMixBufferL[i] = 0.0f;
-		audio.fMixBufferR[i] = 0.0f;
+		audio.fMixBufferL[i] = audio.fMixBufferR[i] = 0.0f;
 	}
 }
 
diff --git a/src/ft2_audio.h b/src/ft2_audio.h
@@ -64,7 +64,7 @@ typedef struct
 	uint8_t mixFuncOffset, panning, loopType, scopeVolume;
 	int32_t position, sampleEnd, loopStart, loopLength, oldPeriod;
 	uint32_t volumeRampLength;
-	uint64_t positionFrac, delta, oldDelta, scopeDelta;
+	uint64_t positionFrac, delta, scopeDelta;
 
 	// if (loopEnabled && hasLooped && samplingPos <= loopStart+MAX_LEFT_TAPS) readFixedTapsFromThisPointer();
 	const int8_t *leftEdgeTaps8;
@@ -109,7 +109,6 @@ typedef struct chSync_t
 	chSyncData_t data[SYNC_QUEUE_LEN+1];
 } chSync_t;
 
-void resetCachedMixerVars(void);
 int32_t pattQueueReadSize(void);
 int32_t pattQueueWriteSize(void);
 bool pattQueuePush(pattSyncData_t t);
diff --git a/src/ft2_header.h b/src/ft2_header.h
@@ -12,7 +12,7 @@
 #endif
 #include "ft2_replayer.h"
 
-#define PROG_VER_STR "1.93"
+#define PROG_VER_STR "1.94"
 
 // do NOT change these! It will only mess things up...
 
diff --git a/src/ft2_replayer.c b/src/ft2_replayer.c
@@ -403,7 +403,7 @@ void calcReplayerVars(int32_t audioFreq)
 
 	audio.dHz2MixDeltaMul = (double)MIXER_FRAC_SCALE / audioFreq;
 	audio.quickVolRampSamples = (uint32_t)round(audioFreq / (1000.0 / FT2_QUICK_VOLRAMP_MILLISECONDS));
-	audio.fQuickVolRampSamplesMul = (float)(1.0 / (double)audio.quickVolRampSamples);
+	audio.fQuickVolRampSamplesMul = (float)(1.0 / audio.quickVolRampSamples);
 
 	for (int32_t bpm = MIN_BPM; bpm <= MAX_BPM; bpm++)
 	{
@@ -3131,7 +3131,6 @@ void stopVoices(void)
 	editor.curPlaySmp = 255;
 
 	stopAllScopes();
-	resetCachedMixerVars();
 
 	// wait for scope thread to finish, making sure pointers aren't illegal
 	while (editor.scopeThreadBusy);
diff --git a/src/ft2_replayer.h b/src/ft2_replayer.h
@@ -194,9 +194,8 @@ note_t;
 
 typedef struct syncedChannel_t // used for audio/video sync queue (pack to save RAM)
 {
-	uint8_t status, pianoNoteNum, smpNum, instrNum;
+	uint8_t status, pianoNoteNum, smpNum, instrNum, scopeVolume;
 	int32_t smpStartPos;
-	uint8_t scopeVolume;
 	uint64_t scopeDelta;
 }
 #ifdef __GNUC__
@@ -256,7 +255,7 @@ typedef struct channel_t
 	uint16_t volEnvTick, panEnvTick, autoVibAmp, autoVibSweep;
 	uint16_t midiVibDepth;
 	int32_t fadeoutVol, fadeoutSpeed;
-	int32_t oldFinalPeriod, smpStartPos;
+	int32_t smpStartPos;
 
 	float fFinalVol, fVolEnvDelta, fPanEnvDelta, fVolEnvValue, fPanEnvValue;
 
diff --git a/src/ft2_sample_ed.c b/src/ft2_sample_ed.c
@@ -3068,7 +3068,7 @@ static void writeSamplePosLine(void)
 
 	if (editor.curInstr == ins && editor.curSmp == smp)
 	{
-		const int32_t smpPos = getSamplePosition(editor.curSmpChannel);
+		const int32_t smpPos = getSamplePositionFromScopes(editor.curSmpChannel);
 		if (smpPos != -1)
 		{
 			// convert sample position to screen position
diff --git a/src/scopes/ft2_scope_macros.h b/src/scopes/ft2_scope_macros.h
@@ -2,41 +2,20 @@
 
 #include <stdint.h>
 #include "../ft2_header.h"
-#include "../mixer/ft2_windowed_sinc.h"
 #include "ft2_scopes.h"
 
 /* ----------------------------------------------------------------------- */
 /*                          SCOPE DRAWING MACROS                           */
 /* ----------------------------------------------------------------------- */
 
-#define SCOPE_REGS_NO_LOOP \
-	const int32_t volume = s->volume * SCOPE_HEIGHT; \
-	const int32_t sampleEnd = s->sampleEnd; \
-	const uint64_t delta = s->drawDelta; \
+#define SCOPE_INIT \
 	const uint32_t color = video.palette[PAL_PATTEXT]; \
 	uint32_t width = x + w; \
 	int32_t sample; \
 	int32_t position = s->position; \
 	uint64_t positionFrac = 0;
 
-#define SCOPE_REGS_LOOP \
-	const int32_t volume = s->volume * SCOPE_HEIGHT; \
-	const int32_t sampleEnd = s->sampleEnd; \
-	const int32_t loopStart = s->loopStart; \
-	const int32_t loopLength = s->loopLength; \
-	const uint64_t delta = s->drawDelta; \
-	const uint32_t color = video.palette[PAL_PATTEXT]; \
-	uint32_t width = x + w; \
-	int32_t sample; \
-	int32_t position = s->position; \
-	uint64_t positionFrac = 0;
-
-#define SCOPE_REGS_BIDI \
-	const int32_t volume = s->volume * SCOPE_HEIGHT; \
-	const int32_t sampleEnd = s->sampleEnd; \
-	const int32_t loopStart = s->loopStart; \
-	const int32_t loopLength = s->loopLength; \
-	const uint64_t delta = s->drawDelta; \
+#define SCOPE_INIT_BIDI \
 	const uint32_t color = video.palette[PAL_PATTEXT]; \
 	uint32_t width = x + w; \
 	int32_t sample; \
@@ -44,18 +23,13 @@
 	uint64_t positionFrac = 0; \
 	bool samplingBackwards = s->samplingBackwards;
 
-#define LINED_SCOPE_REGS_NO_LOOP \
-	SCOPE_REGS_NO_LOOP \
+#define LINED_SCOPE_INIT \
+	SCOPE_INIT \
 	int32_t smpY1, smpY2; \
 	width--;
 
-#define LINED_SCOPE_REGS_LOOP \
-	SCOPE_REGS_LOOP \
-	int32_t smpY1, smpY2; \
-	width--;
-
-#define LINED_SCOPE_REGS_BIDI \
-	SCOPE_REGS_BIDI \
+#define LINED_SCOPE_INIT_BIDI \
+	SCOPE_INIT_BIDI \
 	int32_t smpY1, smpY2; \
 	width--;
 
@@ -63,107 +37,123 @@
 ** so that out-of-bounds reads get the correct interpolation tap data.
 */
 
+#define NEAREST_NEIGHGBOR8 \
+{ \
+	sample = s8[0] << 8; \
+} \
+
+#define LINEAR_INTERPOLATION8(frac) \
+{ \
+	const int32_t f = (frac) >> (SCOPE_FRAC_BITS-15); \
+	sample = (s8[0] << 8) + ((((s8[1] - s8[0]) << 8) * f) >> 15); \
+} \
+
+#define NEAREST_NEIGHGBOR16 \
+{ \
+	sample = s16[0]; \
+} \
+
+#define LINEAR_INTERPOLATION16(frac) \
+{ \
+	const int32_t f = (frac) >> (SCOPE_FRAC_BITS-15); \
+	sample = s16[0] + (((s16[1] - s16[0]) * f) >> 15); \
+} \
+
+#define CUBIC_SMP8(frac) \
+	const int16_t *t = scopeIntrpLUT + (((frac) >> (SCOPE_FRAC_BITS-SCOPE_INTRP_PHASES_BITS)) * SCOPE_INTRP_TAPS); \
+	\
+	sample = ((s8[-2] * t[0]) + \
+	          (s8[-1] * t[1]) + \
+	          ( s8[0] * t[2]) + \
+	          ( s8[1] * t[3]) + \
+	          ( s8[2] * t[4]) + \
+	          ( s8[3] * t[5])) >> (SCOPE_INTRP_SCALE_BITS-8);
+
+#define CUBIC_SMP16(frac) \
+	const int16_t *t = scopeIntrpLUT + (((frac) >> (SCOPE_FRAC_BITS-SCOPE_INTRP_PHASES_BITS)) * SCOPE_INTRP_TAPS); \
+	\
+	sample = ((s16[-2] * t[0]) + \
+	          (s16[-1] * t[1]) + \
+	          ( s16[0] * t[2]) + \
+	          ( s16[1] * t[3]) + \
+	          ( s16[2] * t[4]) + \
+	          ( s16[3] * t[5])) >> SCOPE_INTRP_SCALE_BITS;
+
+#define CUBIC_INTERPOLATION8(frac) \
+{ \
+	CUBIC_SMP8(frac) \
+} \
+
+#define CUBIC_INTERPOLATION16(frac) \
+{ \
+	CUBIC_SMP16(frac) \
+} \
+
+#define CUBIC_INTERPOLATION8_LOOP(pos, frac) \
+{ \
+	if (s->hasLooped && pos <= s->loopStart+MAX_LEFT_TAPS) \
+		s8 = s->leftEdgeTaps8 + (pos - s->loopStart); \
+	\
+	CUBIC_SMP8(frac) \
+} \
+
+#define CUBIC_INTERPOLATION16_LOOP(pos, frac) \
+{ \
+	if (s->hasLooped && pos <= s->loopStart+MAX_LEFT_TAPS) \
+		s16 = s->leftEdgeTaps16 + (pos - s->loopStart); \
+	\
+	CUBIC_SMP16(frac) \
+} \
+
 #define INTERPOLATE_SMP8(pos, frac) \
 	const int8_t *s8 = s->base8 + pos; \
 	if (config.interpolation == INTERPOLATION_DISABLED) \
-	{ \
-		sample = s8[0] << 8; \
-	} \
+		NEAREST_NEIGHGBOR8 \
 	else if (config.interpolation == INTERPOLATION_LINEAR) \
-	{ \
-		const int32_t f = (frac) >> (SCOPE_FRAC_BITS-15); \
-		sample = (s8[0] << 8) + ((((s8[1] - s8[0]) << 8) * f) >> 15); \
-	} \
-	else /* interpolate scopes using 6-tap cubic B-spline */ \
-	{ \
-		const float *t = fScopeIntrpLUT + (((frac) >> (SCOPE_FRAC_BITS-SCOPE_INTRP_PHASES_BITS)) * SCOPE_INTRP_TAPS); \
-		\
-		/* get correct negative tap sample points */ \
-		int32_t p1 = pos - 2; \
-		int32_t p2 = pos - 1; \
-		float fSample; \
-		if (s->loopType != LOOP_DISABLED && s->hasLooped && (int32_t)pos-2 < (int32_t)s->loopStart) \
-		{ \
-			const int32_t overflow1 = (int32_t)s->loopStart - p1; \
-			const int32_t overflow2 = (int32_t)s->loopStart - p2; \
-			if (s->loopType == LOOP_BIDI) /* direction is always backwards at this point */ \
-			{ \
-				p1 = s->loopStart + overflow1; \
-				if (overflow2 > 0) \
-					p2 = s->loopStart + overflow2; \
-			} \
-			else \
-			{ \
-				p1 = s->loopEnd - overflow1; \
-				if (overflow2 > 0) \
-					p2 = s->loopEnd - overflow2; \
-			} \
-		} \
-		\
-		fSample = (s->base8[p1] * t[0]) + \
-		          (s->base8[p2] * t[1]) + \
-		          (       s8[0] * t[2]) + \
-		          (       s8[1] * t[3]) + \
-		          (       s8[2] * t[4]) + \
-		          (       s8[3] * t[5]); \
-		sample = (int32_t)(fSample * 256.0f); \
-	}
+		LINEAR_INTERPOLATION8(frac) \
+	else \
+		CUBIC_INTERPOLATION8(frac) \
+	sample = (sample * s->volume) >> (16+2);
 
 #define INTERPOLATE_SMP16(pos, frac) \
 	const int16_t *s16 = s->base16 + pos; \
 	if (config.interpolation == INTERPOLATION_DISABLED) \
-	{ \
-		sample = s16[0]; \
-	} \
+		NEAREST_NEIGHGBOR16 \
 	else if (config.interpolation == INTERPOLATION_LINEAR) \
-	{ \
-		const int32_t f = (frac) >> (SCOPE_FRAC_BITS-15); \
-		sample = s16[0] + (((s16[1] - s16[0]) * f) >> 15); \
-	} \
-	else /* interpolate scopes using 6-tap cubic B-spline */ \
-	{ \
-		const float *t = fScopeIntrpLUT + (((frac) >> (SCOPE_FRAC_BITS-SCOPE_INTRP_PHASES_BITS)) * SCOPE_INTRP_TAPS); \
-		\
-		/* get correct negative tap sample points */ \
-		int32_t p1 = pos - 2; \
-		int32_t p2 = pos - 1; \
-		float fSample; \
-		if (s->loopType != LOOP_DISABLED && s->hasLooped && (int32_t)pos-2 < (int32_t)s->loopStart) \
-		{ \
-			const int32_t overflow1 = (int32_t)s->loopStart - p1; \
-			const int32_t overflow2 = (int32_t)s->loopStart - p2; \
-			if (s->loopType == LOOP_BIDI) /* direction is always backwards at this point */ \
-			{ \
-				p1 = s->loopStart + overflow1; \
-				if (overflow2 > 0) \
-					p2 = s->loopStart + overflow2; \
-			} \
-			else \
-			{ \
-				p1 = s->loopEnd - overflow1; \
-				if (overflow2 > 0) \
-					p2 = s->loopEnd - overflow2; \
-			} \
-		} \
-		\
-		fSample = (s->base16[p1] * t[0]) + \
-		          (s->base16[p2] * t[1]) + \
-		          (       s16[0] * t[2]) + \
-		          (       s16[1] * t[3]) + \
-		          (       s16[2] * t[4]) + \
-		          (       s16[3] * t[5]); \
-		\
-		sample = (int32_t)fSample; \
-	}
+		LINEAR_INTERPOLATION16(frac) \
+	else \
+		CUBIC_INTERPOLATION16(frac) \
+	sample = (sample * s->volume) >> (16+2);
+
+#define INTERPOLATE_SMP8_LOOP(pos, frac) \
+	const int8_t *s8 = s->base8 + pos; \
+	if (config.interpolation == INTERPOLATION_DISABLED) \
+		NEAREST_NEIGHGBOR8 \
+	else if (config.interpolation == INTERPOLATION_LINEAR) \
+		LINEAR_INTERPOLATION8(frac) \
+	else \
+		CUBIC_INTERPOLATION8_LOOP(pos, frac) \
+	sample = (sample * s->volume) >> (16+2);
+
+#define INTERPOLATE_SMP16_LOOP(pos, frac) \
+	const int16_t *s16 = s->base16 + pos; \
+	if (config.interpolation == INTERPOLATION_DISABLED) \
+		NEAREST_NEIGHGBOR16 \
+	else if (config.interpolation == INTERPOLATION_LINEAR) \
+		LINEAR_INTERPOLATION16(frac) \
+	else \
+		CUBIC_INTERPOLATION16_LOOP(pos, frac) \
+	sample = (sample * s->volume) >> (16+2);
+
 #define SCOPE_GET_SMP8 \
 	if (s->active) \
-		sample = (s->base8[position] * volume) >> (8+7); \
+		sample = (s->base8[position] * s->volume) >> (8+2); \
 	else \
 		sample = 0;
 
 #define SCOPE_GET_SMP16 \
 	if (s->active) \
-		sample = (s->base16[position] * volume) >> (16+7); \
+		sample = (s->base16[position] * s->volume) >> (16+2); \
 	else \
 		sample = 0;
 
@@ -171,7 +161,7 @@
 	if (s->active) \
 	{ \
 		GET_BIDI_POSITION \
-		sample = (s->base8[actualPos] * volume) >> (8+7); \
+		sample = (s->base8[actualPos] * s->volume) >> (8+2); \
 	} \
 	else \
 	{ \
@@ -182,7 +172,7 @@
 	if (s->active) \
 	{ \
 		GET_BIDI_POSITION \
-		sample = (s->base16[actualPos] * volume) >> (16+7); \
+		sample = (s->base16[actualPos] * s->volume) >> (16+2); \
 	} \
 	else \
 	{ \
@@ -193,7 +183,6 @@
 	if (s->active) \
 	{ \
 		INTERPOLATE_SMP8(position, (uint32_t)positionFrac) \
-		sample = (sample * volume) >> (16+7); \
 	} \
 	else \
 	{ \
@@ -204,7 +193,26 @@
 	if (s->active) \
 	{ \
 		INTERPOLATE_SMP16(position, (uint32_t)positionFrac) \
-		sample = (sample * volume) >> (16+7); \
+	} \
+	else \
+	{ \
+		sample = 0; \
+	}
+
+#define SCOPE_GET_INTERPOLATED_SMP8_LOOP \
+	if (s->active) \
+	{ \
+		INTERPOLATE_SMP8_LOOP(position, (uint32_t)positionFrac) \
+	} \
+	else \
+	{ \
+		sample = 0; \
+	}
+
+#define SCOPE_GET_INTERPOLATED_SMP16_LOOP \
+	if (s->active) \
+	{ \
+		INTERPOLATE_SMP16_LOOP(position, (uint32_t)positionFrac) \
 	} \
 	else \
 	{ \
@@ -213,7 +221,7 @@
 
 #define GET_BIDI_POSITION \
 	if (samplingBackwards) \
-		actualPos = (sampleEnd - 1) - (position - loopStart); \
+		actualPos = (s->sampleEnd - 1) - (position - s->loopStart); \
 	else \
 		actualPos = position;
 
@@ -221,8 +229,7 @@
 	if (s->active) \
 	{ \
 		GET_BIDI_POSITION \
-		INTERPOLATE_SMP8(actualPos, samplingBackwards ? ((uint32_t)positionFrac ^ UINT32_MAX) : (uint32_t)positionFrac) \
-		sample = (sample * volume) >> (16+7); \
+		INTERPOLATE_SMP8_LOOP(actualPos, samplingBackwards ? ((uint32_t)positionFrac ^ UINT32_MAX) : (uint32_t)positionFrac) \
 	} \
 	else \
 	{ \
@@ -233,8 +240,7 @@
 	if (s->active) \
 	{ \
 		GET_BIDI_POSITION \
-		INTERPOLATE_SMP16(actualPos, samplingBackwards ? ((uint32_t)positionFrac ^ UINT32_MAX) : (uint32_t)positionFrac) \
-		sample = (sample * volume) >> (16+7); \
+		INTERPOLATE_SMP16_LOOP(actualPos, samplingBackwards ? ((uint32_t)positionFrac ^ UINT32_MAX) : (uint32_t)positionFrac) \
 	} \
 	else \
 	{ \
@@ -242,7 +248,7 @@
 	}
 
 #define SCOPE_UPDATE_READPOS \
-	positionFrac += delta; \
+	positionFrac += s->drawDelta; \
 	position += positionFrac >> 32; \
 	positionFrac &= UINT32_MAX;
 
@@ -259,6 +265,16 @@
 	smpY1 = lineY - sample; \
 	SCOPE_UPDATE_READPOS
 
+#define LINED_SCOPE_PREPARE_SMP8_LOOP \
+	SCOPE_GET_INTERPOLATED_SMP8_LOOP \
+	smpY1 = lineY - sample; \
+	SCOPE_UPDATE_READPOS
+
+#define LINED_SCOPE_PREPARE_SMP16_LOOP \
+	SCOPE_GET_INTERPOLATED_SMP16_LOOP \
+	smpY1 = lineY - sample; \
+	SCOPE_UPDATE_READPOS
+
 #define LINED_SCOPE_PREPARE_SMP8_BIDI \
 	SCOPE_GET_INTERPOLATED_SMP8_BIDI \
 	smpY1 = lineY - sample; \
@@ -275,34 +291,35 @@
 	smpY1 = smpY2;
 
 #define SCOPE_HANDLE_POS_NO_LOOP \
-	if (position >= sampleEnd) \
+	if (position >= s->sampleEnd) \
 		s->active = false;
 
 #define SCOPE_HANDLE_POS_LOOP \
-	if (position >= sampleEnd) \
+	if (position >= s->sampleEnd) \
 	{ \
-		if (loopLength >= 2) \
-			position = loopStart + ((position - sampleEnd) % loopLength); \
+		if (s->loopLength >= 2) \
+			position = s->loopStart + ((uint32_t)(position - s->sampleEnd) % (uint32_t)s->loopLength); \
 		else \
-			position = loopStart; \
+			position = s->loopStart; \
 		\
 		s->hasLooped = true; \
 	}
 
 #define SCOPE_HANDLE_POS_BIDI \
-	if (position >= sampleEnd) \
+	if (position >= s->sampleEnd) \
 	{ \
-		if (loopLength >= 2) \
+		if (s->loopLength >= 2) \
 		{ \
-			const uint32_t overflow = position - sampleEnd; \
-			const uint32_t cycles = overflow / loopLength; \
-			const uint32_t phase = overflow % loopLength; \
-			position = loopStart + phase; \
+			const uint32_t overflow = position - s->sampleEnd; \
+			const uint32_t cycles = overflow / (uint32_t)s->loopLength; \
+			const uint32_t phase = overflow % (uint32_t)s->loopLength; \
+			\
+			position = s->loopStart + phase; \
 			samplingBackwards ^= !(cycles & 1); \
 		} \
 		else \
 		{ \
-			position = loopStart; \
+			position = s->loopStart; \
 		} \
 		\
 		s->hasLooped = true; \
diff --git a/src/scopes/ft2_scopedraw.c b/src/scopes/ft2_scopedraw.c
@@ -10,18 +10,25 @@
 #include "ft2_scopedraw.h"
 #include "ft2_scope_macros.h"
 
-static float *fScopeIntrpLUT;
+static int16_t *scopeIntrpLUT;
 
-static void scopeLine(int32_t x1, int32_t y1, int32_t y2, uint32_t color);
+static void scopeLine(int32_t x1, int32_t y1, int32_t y2, const uint32_t color);
 
 bool calcScopeIntrpLUT(void)
 {
-	fScopeIntrpLUT = (float *)malloc(SCOPE_INTRP_TAPS * SCOPE_INTRP_PHASES * sizeof (float));
-	if (fScopeIntrpLUT == NULL)
+	scopeIntrpLUT = (int16_t *)malloc(SCOPE_INTRP_TAPS * SCOPE_INTRP_PHASES * sizeof (int16_t));
+	if (scopeIntrpLUT == NULL)
 		return false;
 
-	// 6-point cubic B-spline (No overshoot w/ low filter cut-off. Very suitable for scopes.)
-	float *fPtr = fScopeIntrpLUT;
+	/* Several tests have been done to figure out what interpolation method is most suitable
+	** for the tracker scopes. After testing linear, cubic, Gaussian and windowed-sinc
+	** interpolation, I have come to the conclusion that 6-point cubic B-spline is the best.
+	** This interpolation method also has no overshoot.
+	*/
+
+	// 6-point cubic B-spline (no overshoot)
+
+	int16_t *ptr16 = scopeIntrpLUT;
 	for (int32_t i = 0; i < SCOPE_INTRP_PHASES; i++)
 	{
 		const double x1 = i * (1.0 / SCOPE_INTRP_PHASES);
@@ -37,12 +44,13 @@ bool calcScopeIntrpLUT(void)
 		double t5 = (-(1.0/ 24.0) * x5) + ( (1.0/24.0) * x4) + ( (1.0/12.0) * x3) + ( (1.0/12.0) * x2) + ( (1.0/24.0) * x1) + ( 1.0/120.0);
 		double t6 =   (1.0/120.0) * x5;
 
-		*fPtr++ = (float)t1;
-		*fPtr++ = (float)t2;
-		*fPtr++ = (float)t3;
-		*fPtr++ = (float)t4;
-		*fPtr++ = (float)t5;
-		*fPtr++ = (float)t6;
+		// important: truncate, do not round (would cause scope overflow)
+		*ptr16++ = (int16_t)(t1 * SCOPE_INTRP_SCALE);
+		*ptr16++ = (int16_t)(t2 * SCOPE_INTRP_SCALE);
+		*ptr16++ = (int16_t)(t3 * SCOPE_INTRP_SCALE);
+		*ptr16++ = (int16_t)(t4 * SCOPE_INTRP_SCALE);
+		*ptr16++ = (int16_t)(t5 * SCOPE_INTRP_SCALE);
+		*ptr16++ = (int16_t)(t6 * SCOPE_INTRP_SCALE);
 	}
 
 	return true;
@@ -50,20 +58,20 @@ bool calcScopeIntrpLUT(void)
 
 void freeScopeIntrpLUT(void)
 {
-	if (fScopeIntrpLUT != NULL)
+	if (scopeIntrpLUT != NULL)
 	{
-		free(fScopeIntrpLUT);
-		fScopeIntrpLUT = NULL;
+		free(scopeIntrpLUT);
+		scopeIntrpLUT = NULL;
 	}
 }
 
 /* ----------------------------------------------------------------------- */
-/*                         SCOPE DRAWING ROUTINES                          */
+/*                    NON-LINED SCOPE DRAWING ROUTINES                     */
 /* ----------------------------------------------------------------------- */
 
 static void scopeDrawNoLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	SCOPE_REGS_NO_LOOP
+	SCOPE_INIT
 
 	for (; x < width; x++)
 	{
@@ -76,7 +84,7 @@ static void scopeDrawNoLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_
 
 static void scopeDrawLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	SCOPE_REGS_LOOP
+	SCOPE_INIT
 
 	for (; x < width; x++)
 	{
@@ -87,9 +95,9 @@ static void scopeDrawLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t 
 	}
 }
 
-static void scopeDrawPingPong_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
+static void scopeDrawBidiLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	SCOPE_REGS_BIDI
+	SCOPE_INIT_BIDI
 
 	for (; x < width; x++)
 	{
@@ -102,7 +110,7 @@ static void scopeDrawPingPong_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint3
 
 static void scopeDrawNoLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	SCOPE_REGS_NO_LOOP
+	SCOPE_INIT
 
 	for (; x < width; x++)
 	{
@@ -115,7 +123,7 @@ static void scopeDrawNoLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32
 
 static void scopeDrawLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	SCOPE_REGS_LOOP
+	SCOPE_INIT
 
 	for (; x < width; x++)
 	{
@@ -126,9 +134,9 @@ static void scopeDrawLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t
 	}
 }
 
-static void scopeDrawPingPong_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
+static void scopeDrawBidiLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	SCOPE_REGS_BIDI
+	SCOPE_INIT_BIDI
 
 	for (; x < width; x++)
 	{
@@ -140,12 +148,12 @@ static void scopeDrawPingPong_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint
 }
 
 /* ----------------------------------------------------------------------- */
-/*                   INTERPOLATED SCOPE DRAWING ROUTINES                   */
+/*                       LINED SCOPE DRAWING ROUTINES                      */
 /* ----------------------------------------------------------------------- */
 
 static void linedScopeDrawNoLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	LINED_SCOPE_REGS_NO_LOOP
+	LINED_SCOPE_INIT
 	LINED_SCOPE_PREPARE_SMP8
 	SCOPE_HANDLE_POS_NO_LOOP
 
@@ -160,22 +168,22 @@ static void linedScopeDrawNoLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, ui
 
 static void linedScopeDrawLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	LINED_SCOPE_REGS_LOOP
-	LINED_SCOPE_PREPARE_SMP8
+	LINED_SCOPE_INIT
+	LINED_SCOPE_PREPARE_SMP8_LOOP
 	SCOPE_HANDLE_POS_LOOP
 
 	for (; x < width; x++)
 	{
-		SCOPE_GET_INTERPOLATED_SMP8
+		SCOPE_GET_INTERPOLATED_SMP8_LOOP
 		LINED_SCOPE_DRAW_SMP
 		SCOPE_UPDATE_READPOS
 		SCOPE_HANDLE_POS_LOOP
 	}
 }
 
-static void linedScopeDrawPingPong_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
+static void linedScopeDrawBidiLoop_8bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	LINED_SCOPE_REGS_BIDI
+	LINED_SCOPE_INIT_BIDI
 	LINED_SCOPE_PREPARE_SMP8_BIDI
 	SCOPE_HANDLE_POS_BIDI
 
@@ -190,7 +198,7 @@ static void linedScopeDrawPingPong_8bit(scope_t *s, uint32_t x, uint32_t lineY, 
 
 static void linedScopeDrawNoLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	LINED_SCOPE_REGS_NO_LOOP
+	LINED_SCOPE_INIT
 	LINED_SCOPE_PREPARE_SMP16
 	SCOPE_HANDLE_POS_NO_LOOP
 
@@ -205,22 +213,22 @@ static void linedScopeDrawNoLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, u
 
 static void linedScopeDrawLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	LINED_SCOPE_REGS_LOOP
-	LINED_SCOPE_PREPARE_SMP16
+	LINED_SCOPE_INIT
+	LINED_SCOPE_PREPARE_SMP16_LOOP
 	SCOPE_HANDLE_POS_LOOP
 
 	for (; x < width; x++)
 	{
-		SCOPE_GET_INTERPOLATED_SMP16
+		SCOPE_GET_INTERPOLATED_SMP16_LOOP
 		LINED_SCOPE_DRAW_SMP
 		SCOPE_UPDATE_READPOS
 		SCOPE_HANDLE_POS_LOOP
 	}
 }
 
-static void linedScopeDrawPingPong_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
+static void linedScopeDrawBidiLoop_16bit(scope_t *s, uint32_t x, uint32_t lineY, uint32_t w)
 {
-	LINED_SCOPE_REGS_BIDI
+	LINED_SCOPE_INIT_BIDI
 	LINED_SCOPE_PREPARE_SMP16_BIDI
 	SCOPE_HANDLE_POS_BIDI
 
@@ -235,42 +243,60 @@ static void linedScopeDrawPingPong_16bit(scope_t *s, uint32_t x, uint32_t lineY,
 
 // -----------------------------------------------------------------------
 
-static void scopeLine(int32_t x1, int32_t y1, int32_t y2, uint32_t color)
+static void scopeLine(int32_t x1, int32_t y1, int32_t y2, const uint32_t color)
 {
-	const int32_t dy = y2 - y1;
-	const int32_t sy = SGN(dy);
-	const int32_t pitch = sy * SCREEN_W;
+#ifdef _DEBUG
+	if (x1 < 0 || x1 >= SCREEN_W || y1 < 0 || y1 >= SCREEN_H || y2 < 0 || y2 >= SCREEN_H)
+		return;
+#endif
 
 	uint32_t *dst32 = &video.frameBuffer[(y1 * SCREEN_W) + x1];
 
 	*dst32 = color; // set first pixel
 
-	int32_t ay = ABS(dy);
-	if (ay <= 1)
+	const int32_t dy = y2 - y1;
+	if (dy == 0) // y1 == y2
 	{
-		if (ay != 0)
-			dst32 += pitch;
-
-		*++dst32 = color;
+		dst32[1] = color;
 		return;
 	}
 
+	uint32_t ay = ABS(dy);
 	int32_t d = 1 - ay;
 
 	ay <<= 1;
-	while (y1 != y2)
+
+	if (y1 > y2)
 	{
-		if (d >= 0)
+		for (; y1 != y2; y1--)
 		{
-			d -= ay;
-			dst32++;
+			if (d >= 0)
+			{
+				d -= ay;
+				dst32++;
+			}
+
+			d += 2;
+
+			dst32 -= SCREEN_W;
+			*dst32 = color;
 		}
+	}
+	else
+	{
+		for (; y1 != y2; y1++)
+		{
+			if (d >= 0)
+			{
+				d -= ay;
+				dst32++;
+			}
 
-		y1 += sy;
-		d += 2;
+			d += 2;
 
-		dst32 += pitch;
-		*dst32 = color;
+			dst32 += SCREEN_W;
+			*dst32 = color;
+		}
 	}
 }
 
@@ -280,14 +306,14 @@ const scopeDrawRoutine scopeDrawRoutineTable[12] =
 {
 	(scopeDrawRoutine)scopeDrawNoLoop_8bit,
 	(scopeDrawRoutine)scopeDrawLoop_8bit,
-	(scopeDrawRoutine)scopeDrawPingPong_8bit,
+	(scopeDrawRoutine)scopeDrawBidiLoop_8bit,
 	(scopeDrawRoutine)scopeDrawNoLoop_16bit,
 	(scopeDrawRoutine)scopeDrawLoop_16bit,
-	(scopeDrawRoutine)scopeDrawPingPong_16bit,
+	(scopeDrawRoutine)scopeDrawBidiLoop_16bit,
 	(scopeDrawRoutine)linedScopeDrawNoLoop_8bit,
 	(scopeDrawRoutine)linedScopeDrawLoop_8bit,
-	(scopeDrawRoutine)linedScopeDrawPingPong_8bit,
+	(scopeDrawRoutine)linedScopeDrawBidiLoop_8bit,
 	(scopeDrawRoutine)linedScopeDrawNoLoop_16bit,
 	(scopeDrawRoutine)linedScopeDrawLoop_16bit,
-	(scopeDrawRoutine)linedScopeDrawPingPong_16bit
+	(scopeDrawRoutine)linedScopeDrawBidiLoop_16bit
 };
diff --git a/src/scopes/ft2_scopedraw.h b/src/scopes/ft2_scopedraw.h
@@ -9,4 +9,3 @@ extern const scopeDrawRoutine scopeDrawRoutineTable[12]; // ft2_scopedraw.c
 
 bool calcScopeIntrpLUT(void);
 void freeScopeIntrpLUT(void);
-
diff --git a/src/scopes/ft2_scopes.c b/src/scopes/ft2_scopes.c
@@ -31,29 +31,22 @@ static SDL_Thread *scopeThread;
 
 lastChInstr_t lastChInstr[MAX_CHANNELS]; // global
 
-int32_t getSamplePosition(uint8_t ch)
+int32_t getSamplePositionFromScopes(uint8_t ch)
 {
 	if (ch >= song.numChannels)
 		return -1;
 
-	volatile scope_t *sc = &scope[ch];
-
-	// cache some stuff
-	volatile bool active = sc->active;
-	volatile bool samplingBackwards = sc->samplingBackwards;
-	volatile int32_t position = sc->position;
-	volatile int32_t loopStart = sc->loopStart;
-	volatile int32_t sampleEnd = sc->sampleEnd;
+	volatile scope_t sc = scope[ch]; // cache it
 
-	if (!active || sampleEnd == 0)
+	if (!sc.active || sc.sampleEnd == 0)
 		return -1;
 
-	if (position >= 0 && position < sampleEnd)
+	if (sc.position >= 0 && sc.position < sc.sampleEnd)
 	{
-		if (samplingBackwards) // get actual bidi pos when in backwards mode
-			position = (sampleEnd - 1) - (position - loopStart);
+		if (sc.samplingBackwards) // get actual bidi pos when in backwards mode
+			sc.position = (sc.sampleEnd - 1) - (sc.position - sc.loopStart);
 
-		return position;
+		return sc.position;
 	}
 
 	return -1; // not active or overflown
@@ -308,9 +301,15 @@ static void scopeTrigger(int32_t ch, const sample_t *s, int32_t playOffset)
 		loopType = 0;
 
 	if (sample16Bit)
+	{
 		tempState.base16 = (const int16_t *)s->dataPtr;
+		tempState.leftEdgeTaps16 = s->leftEdgeTapSamples16 + MAX_LEFT_TAPS;
+	}
 	else
+	{
 		tempState.base8 = s->dataPtr;
+		tempState.leftEdgeTaps8 = s->leftEdgeTapSamples8 + MAX_LEFT_TAPS;
+	}
 
 	tempState.sample16Bit = sample16Bit;
 	tempState.loopType = loopType;
@@ -428,7 +427,7 @@ void drawScopes(void)
 			continue;
 		}
 
-		scope_t s = scope[i]; // cache scope to lower thread race condition issues
+		volatile scope_t s = scope[i]; // cache scope to lower thread race condition issues
 		if (s.active && s.volume > 0 && !audio.locked)
 		{
 			// scope is active
@@ -442,7 +441,7 @@ void drawScopes(void)
 
 			// draw scope
 			bool linedScopesFlag = !!(config.specialFlags & LINED_SCOPES);
-			scopeDrawRoutineTable[(linedScopesFlag * 6) + (s.sample16Bit * 3) + s.loopType](&s, scopeXOffs, scopeLineY, scopeDrawLen);
+			scopeDrawRoutineTable[(linedScopesFlag * 6) + (s.sample16Bit * 3) + s.loopType]((const scope_t *)&s, scopeXOffs, scopeLineY, scopeDrawLen);
 		}
 		else
 		{
diff --git a/src/scopes/ft2_scopes.h b/src/scopes/ft2_scopes.h
@@ -11,17 +11,19 @@
 */
 #define SCOPE_HZ 64
 
-#define SCOPE_INTRP_TAPS 6
-#define SCOPE_INTRP_PHASES 512 /* plentiful for a small scope window */
-#define SCOPE_INTRP_PHASES_BITS 9 /* log2(SCOPE_INTRP_PHASES) */
-
 #define SCOPE_HEIGHT 36
 
 #define SCOPE_FRAC_BITS 32
 #define SCOPE_FRAC_SCALE ((int64_t)1 << SCOPE_FRAC_BITS)
 #define SCOPE_FRAC_MASK (SCOPE_FRAC_SCALE-1)
 
-int32_t getSamplePosition(uint8_t ch);
+#define SCOPE_INTRP_TAPS 6
+#define SCOPE_INTRP_SCALE 32768
+#define SCOPE_INTRP_SCALE_BITS 15 /* log2(SCOPE_INTRP_SCALE) */
+#define SCOPE_INTRP_PHASES 512 /* plentiful for FT2-styled scopes */
+#define SCOPE_INTRP_PHASES_BITS 9 /* log2(SCOPE_INTRP_PHASES) */
+
+int32_t getSamplePositionFromScopes(uint8_t ch);
 void stopAllScopes(void);
 void refreshScopes(void);
 bool testScopesMouseDown(void);
@@ -39,6 +41,10 @@ typedef struct scope_t
 	uint8_t loopType;
 	int32_t volume, loopStart, loopLength, loopEnd, sampleEnd, position;
 	uint64_t delta, drawDelta, positionFrac;
+
+	// if (loopEnabled && hasLooped && samplingPos <= loopStart+MAX_LEFT_TAPS) readFixedTapsFromThisPointer();
+	const int8_t *leftEdgeTaps8;
+	const int16_t *leftEdgeTaps16;
 } scope_t;
 
 typedef struct lastChInstr_t

	ft2-clone Fasttracker 2 clone
	Log \| Files \| Refs \| README \| LICENSE

M	src/ft2_audio.c	\|	66	++++++++++++++++++++++--------------------------------------------
M	src/ft2_audio.h	\|	3	+--
M	src/ft2_header.h	\|	2	+-
M	src/ft2_replayer.c	\|	3	+--
M	src/ft2_replayer.h	\|	5	++---
M	src/ft2_sample_ed.c	\|	2	+-
M	src/scopes/ft2_scope_macros.h	\|	295	++++++++++++++++++++++++++++++++++++++++++-------------------------------------
M	src/scopes/ft2_scopedraw.c	\|	144	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M	src/scopes/ft2_scopedraw.h	\|	1	-
M	src/scopes/ft2_scopes.c	\|	31	+++++++++++++++----------------
M	src/scopes/ft2_scopes.h	\|	16	+++++++++++-----