Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More audio buffering fixes (primarily affects SDL) #12916

Merged
merged 6 commits into from
May 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 127 additions & 71 deletions Core/HW/StereoResampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,27 @@

// 16 bit Stereo

#define MAX_SAMPLES_DEFAULT (4096) // 2*64ms - had to double it for nVidia Shield which has huge buffers
#define MAX_SAMPLES_EXTRA (8192)
// These must be powers of 2.
#define MAX_BUFSIZE_DEFAULT (4096) // 2*64ms - had to double it for nVidia Shield which has huge buffers
#define MAX_BUFSIZE_EXTRA (8192)

#define LOW_WATERMARK_DEFAULT 1680 // 40 ms
#define LOW_WATERMARK_EXTRA 3360 // 80 ms
#define TARGET_BUFSIZE_MARGIN 512

#define MAX_FREQ_SHIFT 200 // per 32000 Hz
#define TARGET_BUFSIZE_DEFAULT 1680 // 40 ms
#define TARGET_BUFSIZE_EXTRA 3360 // 80 ms

#define MAX_FREQ_SHIFT 600.0f // how far off can we be from 44100 Hz
#define CONTROL_FACTOR 0.2f // in freq_shift per fifo size offset
#define CONTROL_AVG 32
#define CONTROL_AVG 32.0f

#include <cstring>
#include <atomic>

#include "base/logging.h"
#include "base/timeutil.h"
#include "base/NativeApp.h"
#include "Common/ChunkFile.h"
#include "Common/MathUtil.h"
#include "Common/Atomics.h"
#include "Core/Config.h"
#include "Core/ConfigValues.h"
#include "Core/HW/StereoResampler.h"
Expand All @@ -55,10 +59,10 @@
#endif

StereoResampler::StereoResampler()
: m_bufsize(MAX_SAMPLES_DEFAULT)
, m_lowwatermark(LOW_WATERMARK_DEFAULT) {
: m_maxBufsize(MAX_BUFSIZE_DEFAULT)
, m_targetBufsize(TARGET_BUFSIZE_DEFAULT) {
// Need to have space for the worst case in case it changes.
m_buffer = new int16_t[MAX_SAMPLES_EXTRA * 2]();
m_buffer = new int16_t[MAX_BUFSIZE_EXTRA * 2]();

// Some Android devices are v-synced to non-60Hz framerates. We simply timestretch audio to fit.
// TODO: should only do this if auto frameskip is off?
Expand All @@ -68,7 +72,7 @@ StereoResampler::StereoResampler()
if (refresh != 60.0f && refresh > 50.0f && refresh < 70.0f) {
int input_sample_rate = (int)(44100 * (refresh / 60.0f));
ILOG("StereoResampler: Adjusting target sample rate to %dHz", input_sample_rate);
SetInputSampleRate(input_sample_rate);
m_input_sample_rate = input_sample_rate;
}

UpdateBufferSize();
Expand All @@ -81,11 +85,18 @@ StereoResampler::~StereoResampler() {

void StereoResampler::UpdateBufferSize() {
if (g_Config.bExtraAudioBuffering) {
m_bufsize = MAX_SAMPLES_EXTRA;
m_lowwatermark = LOW_WATERMARK_EXTRA;
m_maxBufsize = MAX_BUFSIZE_EXTRA;
m_targetBufsize = TARGET_BUFSIZE_EXTRA;
} else {
m_bufsize = MAX_SAMPLES_DEFAULT;
m_lowwatermark = LOW_WATERMARK_DEFAULT;
m_maxBufsize = MAX_BUFSIZE_DEFAULT;
m_targetBufsize = TARGET_BUFSIZE_DEFAULT;

int systemBufsize = System_GetPropertyInt(SYSPROP_AUDIO_FRAMES_PER_BUFFER);
if (systemBufsize > 0 && m_targetBufsize < systemBufsize + TARGET_BUFSIZE_MARGIN) {
m_targetBufsize = std::min(4096, systemBufsize + TARGET_BUFSIZE_MARGIN);
if (m_targetBufsize * 2 > MAX_BUFSIZE_DEFAULT)
m_maxBufsize = MAX_BUFSIZE_EXTRA;
}
}
}

Expand Down Expand Up @@ -147,70 +158,98 @@ inline void ClampBufferToS16WithVolume(s16 *out, const s32 *in, size_t size) {
}

void StereoResampler::Clear() {
memset(m_buffer, 0, m_bufsize * 2 * sizeof(int16_t));
memset(m_buffer, 0, m_maxBufsize * 2 * sizeof(int16_t));
}

// Executed from sound stream thread
// Executed from sound stream thread, pulling sound out of the buffer.
unsigned int StereoResampler::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
if (!samples)
return 0;

unsigned int currentSample = 0;
unsigned int currentSample;

// Cache access in non-volatile variable
// This is the only function changing the read value, so it's safe to
// cache it locally although it's written here.
// The writing pointer will be modified outside, but it will only increase,
// so we will just ignore new written data while interpolating.
// so we will just ignore new written data while interpolating (until it wraps...).
// Without this cache, the compiler wouldn't be allowed to optimize the
// interpolation loop.
u32 indexR = Common::AtomicLoad(m_indexR);
u32 indexW = Common::AtomicLoad(m_indexW);
u32 indexR = m_indexR.load();
u32 indexW = m_indexW.load();

const int INDEX_MASK = (m_bufsize * 2 - 1);
const int INDEX_MASK = (m_maxBufsize * 2 - 1);
lastBufSize_ = (indexR - m_indexW) & INDEX_MASK;

// We force on the audio resampler if the output sample rate doesn't match the input.
if (!g_Config.bAudioResampler && sample_rate == (int)m_input_sample_rate) {
for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
for (currentSample = 0; currentSample < numSamples * 2; currentSample += 2) {
s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
samples[currentSample] = l1;
samples[currentSample + 1] = r1;
indexR += 2;
if (((indexW - indexR) & INDEX_MASK) == 0) {
// Ran out!
underrunCount_++;
break;
}
}
sample_rate_ = (float)sample_rate;
output_sample_rate_ = (float)sample_rate;
droppedSamples_ = 0;
} else {
// Drift prevention mechanism
// Drift prevention mechanism.
float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG;
float offset = (m_numLeftI - m_lowwatermark) * CONTROL_FACTOR;
// If we had to discard samples the last frame due to underrun,
// apply an adjustment here. Otherwise we'll overestimate how many
// samples we need.
numLeft -= droppedSamples_;
droppedSamples_ = 0;

// m_numLeftI here becomes a lowpass filtered version of numLeft.
m_numLeftI = (numLeft + m_numLeftI * (CONTROL_AVG - 1.0f)) / CONTROL_AVG;

// Here we try to keep the buffer size around m_lowwatermark (which is
// really now more like desired_buffer_size) by adjusting the speed.
// Note that the speed of adjustment here does not take the buffer size into
// account. Since this is called once per "output frame", the frame size
// will affect how fast this algorithm reacts, which can't be a good thing.
float offset = (m_numLeftI - (float)m_targetBufsize) * CONTROL_FACTOR;
if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;

sample_rate_ = (float)(m_input_sample_rate + offset);
const u32 ratio = (u32)(65536.0 * sample_rate_ / (double)sample_rate);

output_sample_rate_ = (float)(m_input_sample_rate + offset);
const u32 ratio = (u32)(65536.0 * output_sample_rate_ / (double)sample_rate);
ratio_ = ratio;
// TODO: consider a higher-quality resampling algorithm.
// TODO: Add a fast path for 1:1.
for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
u32 frac = m_frac;
for (currentSample = 0; currentSample < numSamples * 2; currentSample += 2) {
u32 indexR2 = indexR + 2; //next sample
s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
int sampleL = ((l1 << 16) + (l2 - l1) * (u16)frac) >> 16;
int sampleR = ((r1 << 16) + (r2 - r1) * (u16)frac) >> 16;
samples[currentSample] = sampleL;
samples[currentSample + 1] = sampleR;
m_frac += ratio;
indexR += 2 * (u16)(m_frac >> 16);
m_frac &= 0xffff;
frac += ratio;
indexR += 2 * (frac >> 16);
frac &= 0xffff;
if (((indexW - indexR) & INDEX_MASK) == 0) {
// Ran out!
// int missing = numSamples * 2 - currentSample;
// ILOG("Resampler underrun: %d (numSamples: %d, currentSample: %d)", missing, numSamples, currentSample / 2);
underrunCount_++;
break;
}
}
m_frac = frac;
}

int realSamples = currentSample;
if (currentSample < numSamples * 2)
underrunCount_++;
// Let's not count the underrun padding here.
outputSampleCount_ += currentSample / 2;

// Padding with the last value to reduce clicking
short s[2];
Expand All @@ -222,80 +261,97 @@ unsigned int StereoResampler::Mix(short* samples, unsigned int numSamples, bool
}

// Flush cached variable
Common::AtomicStore(m_indexR, indexR);
m_indexR.store(indexR);

//if (realSamples != numSamples * 2) {
// ILOG("Underrun! %i / %i", realSamples / 2, numSamples);
//}
lastBufSize_ = (m_indexW - m_indexR) & INDEX_MASK;

return realSamples / 2;
// TODO: What should we actually return here?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think actually produced samples is best, then padding can be decided per platform.

-[Unknown]

return currentSample / 2;
}

void StereoResampler::PushSamples(const s32 *samples, unsigned int num_samples) {
// Executes on the emulator thread, pushing sound into the buffer.
void StereoResampler::PushSamples(const s32 *samples, unsigned int numSamples) {
inputSampleCount_ += numSamples;

UpdateBufferSize();
const int INDEX_MASK = (m_bufsize * 2 - 1);
const int INDEX_MASK = (m_maxBufsize * 2 - 1);
// Cache access in non-volatile variable
// indexR isn't allowed to cache in the audio throttling loop as it
// needs to get updates to not deadlock.
u32 indexW = Common::AtomicLoad(m_indexW);
u32 indexW = m_indexW.load();

u32 cap = m_bufsize * 2;
// If unthottling, no need to fill up the entire buffer, just screws up timing after releasing unthrottle.
if (PSP_CoreParameter().unthrottle)
cap = m_lowwatermark * 2;
u32 cap = m_maxBufsize * 2;
// If unthrottling, no need to fill up the entire buffer, just screws up timing after releasing unthrottle.
if (PSP_CoreParameter().unthrottle) {
cap = m_targetBufsize * 2;
}

// Check if we have enough free space
// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= cap) {
if (!PSP_CoreParameter().unthrottle)
if (numSamples * 2 + ((indexW - m_indexR.load()) & INDEX_MASK) >= cap) {
if (!PSP_CoreParameter().unthrottle) {
overrunCount_++;
}
// TODO: "Timestretch" by doing a windowed overlap with existing buffer content?
return;
}

int over_bytes = num_samples * 4 - (m_bufsize * 2 - (indexW & INDEX_MASK)) * sizeof(short);
int over_bytes = numSamples * 4 - (m_maxBufsize * 2 - (indexW & INDEX_MASK)) * sizeof(short);
if (over_bytes > 0) {
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2);
ClampBufferToS16WithVolume(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, (numSamples * 4 - over_bytes) / 2);
ClampBufferToS16WithVolume(&m_buffer[0], samples + (numSamples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
} else {
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2);
ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, numSamples * 2);
}

Common::AtomicAdd(m_indexW, num_samples * 2);
lastPushSize_ = num_samples;
m_indexW += numSamples * 2;
lastPushSize_ = numSamples;
}

void StereoResampler::GetAudioDebugStats(char *buf, size_t bufSize) {
double elapsed = real_time_now() - startTime_;

double effective_input_sample_rate = (double)inputSampleCount_ / elapsed;
double effective_output_sample_rate = (double)outputSampleCount_ / elapsed;
snprintf(buf, bufSize,
"Audio buffer: %d/%d (low watermark: %d)\n"
"Audio buffer: %d/%d (target: %d)\n"
"Filtered: %0.2f\n"
"Underruns: %d\n"
"Overruns: %d\n"
"Sample rate: %d (input: %d)\n"
"Push size: %d\n",
"Effective input sample rate: %0.2f\n"
"Effective output sample rate: %0.2f\n"
"Push size: %d\n"
"Ratio: %0.6f\n",
lastBufSize_,
m_bufsize * 2,
m_lowwatermark,
m_maxBufsize,
m_targetBufsize,
m_numLeftI,
underrunCountTotal_,
overrunCountTotal_,
(int)sample_rate_,
(int)output_sample_rate_,
m_input_sample_rate,
lastPushSize_);
effective_input_sample_rate,
effective_output_sample_rate,
lastPushSize_,
(float)ratio_ / 65536.0f);
underrunCountTotal_ += underrunCount_;
overrunCountTotal_ += overrunCount_;
underrunCount_ = 0;
overrunCount_ = 0;

// Use this to remove the bias from the startup.
// if (elapsed > 3.0) {
//ResetStatCounters();
// }
}

void StereoResampler::ResetStatCounters() {
underrunCount_ = 0;
overrunCount_ = 0;
underrunCountTotal_ = 0;
overrunCountTotal_ = 0;
}

void StereoResampler::SetInputSampleRate(unsigned int rate) {
m_input_sample_rate = rate;
inputSampleCount_ = 0;
outputSampleCount_ = 0;
startTime_ = real_time_now();
}

void StereoResampler::DoState(PointerWrap &p) {
Expand Down
31 changes: 21 additions & 10 deletions Core/HW/StereoResampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

#pragma once

#include <string>
#include <cstdint>
#include <atomic>

#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
Expand All @@ -45,23 +46,33 @@ class StereoResampler {
void GetAudioDebugStats(char *buf, size_t bufSize);
void ResetStatCounters();

protected:
private:
void UpdateBufferSize();
void SetInputSampleRate(unsigned int rate);

int m_bufsize;
int m_lowwatermark;
int m_maxBufsize;
int m_targetBufsize;

unsigned int m_input_sample_rate = 44100;
int16_t *m_buffer;
volatile u32 m_indexW = 0;
volatile u32 m_indexR = 0;
std::atomic<u32> m_indexW;
std::atomic<u32> m_indexR;
float m_numLeftI = 0.0f;

u32 m_frac = 0;
float output_sample_rate_ = 0.0;
int lastBufSize_ = 0;
int lastPushSize_ = 0;
u32 ratio_ = 0;

int underrunCount_ = 0;
int overrunCount_ = 0;
int underrunCountTotal_ = 0;
int overrunCountTotal_ = 0;
float sample_rate_ = 0.0;
int lastBufSize_ = 0;
int lastPushSize_ = 0;

int droppedSamples_ = 0;

int64_t inputSampleCount_ = 0;
int64_t outputSampleCount_ = 0;

double startTime_ = 0.0;
};
Loading