I am trying to mix user's voice with music and save it to a file.
I created 2 Decoders - 1 for voice and 1 for music and put them into Mixer's input. I decode each frame and save it to file using FILE/createWAV/fwrite.
Everything works perfectly when my song is .wav and have the same sampleRate and samplesPerFrame as recorded voice (48000/1024).
However when I want to use .mp3 file with different parameters (44100/1152) final file is incorrect - it is stretched or has some crackling sounds. I think it's is because we get different sampledDecoded for each decoder and when it is put into Mixer or saved to file - difference between these samples are missing.
As far as I am concerned when we do voiceDecoder->decode(buffer, &samplesDecoded)
it moves samplePosition
by samplesDecoded
.
What I tried to do is to use minimum value from both decoders. However according to above sentence on every loop iteration song will loose (1152 - 1024 = 128) 128 samples so I also tried to seek songDecoder to be the same as voiceDecoder: songDecoder->seek(voiceDecoder->samplePosition, true)
but it led to totally incorrect file.
To summarize: How should I handle mixer/offlineProcessing with 2 decoders when each of them have different sampleRate and samplesPerFrame?
Code:
void AudioProcessor::startProcessing() {
SuperpoweredStereoMixer *mixer = new SuperpoweredStereoMixer();
float *mixerInputs_[] = {0,0,0,0};
float *mixerOutputs_[] = {0,0};
float inputLevels_[]= {0.5f, 0.5f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
float outputLevels_[] = { 1.0f, 1.0f };
SuperpoweredDecoder *voiceDecoder = new SuperpoweredDecoder();
SuperpoweredDecoder *songDecoder = new SuperpoweredDecoder();
if (voiceDecoder->open(voiceInputPath, false) || songDecoder->open(songInputPath, false, songOffset, songLength)) {
delete voiceDecoder;
delete songDecoder;
delete mixer;
callJavaVoidMethodWithBoolParam(jvm, jObject, processingFinishedMethodId, false);
return;
};
FILE *fd = createWAV(outputPath, songDecoder->samplerate, 2);
if (!fd) {
delete voiceDecoder;
delete songDecoder;
delete mixer;
callJavaVoidMethodWithBoolParam(jvm, jObject, processingFinishedMethodId, false);
return;
};
// Create a buffer for the 16-bit integer samples coming from the decoder.
short int *voiceIntBuffer = (short int *)malloc(voiceDecoder->samplesPerFrame * 4 * sizeof(short int) + 32768);
short int *songIntBuffer = (short int *)malloc(songDecoder->samplesPerFrame * 4 * sizeof(short int) + 32768);
short int *outputIntBuffer = (short int *)malloc(voiceDecoder->samplesPerFrame * 4 * sizeof(short int) + 32768);
// Create a buffer for the 32-bit floating point samples required by the effect.
float *voiceFloatBuffer = (float *)malloc(voiceDecoder->samplesPerFrame * 4 * sizeof(float) + 32768);
float *songFloatBuffer = (float *)malloc(songDecoder->samplesPerFrame * 4 * sizeof(float) + 32768);
float *outputFloatBuffer = (float *)malloc(voiceDecoder->samplesPerFrame * 4 * sizeof(float) + 32768);
bool isError = false;
// Processing.
while (true) {
if (isCanceled) {
isError = true;
break;
}
// Decode one frame. samplesDecoded will be overwritten with the actual decoded number of samples.
unsigned int voiceSamplesDecoded = voiceDecoder->samplesPerFrame;
if (voiceDecoder->decode(voiceIntBuffer, &voiceSamplesDecoded) == SUPERPOWEREDDECODER_ERROR) {
break;
}
if (voiceSamplesDecoded < 1) {
break;
}
//
// Decode one frame. samplesDecoded will be overwritten with the actual decoded number of samples.
unsigned int songSamplesDecoded = songDecoder->samplesPerFrame;
if (songDecoder->decode(songIntBuffer, &songSamplesDecoded) == SUPERPOWEREDDECODER_ERROR) {
break;
}
if (songSamplesDecoded < 1) {
break;
}
unsigned int samplesDecoded = static_cast<unsigned int>(fmin(voiceSamplesDecoded, songSamplesDecoded));
// Convert the decoded PCM samples from 16-bit integer to 32-bit floating point.
SuperpoweredShortIntToFloat(voiceIntBuffer, voiceFloatBuffer, samplesDecoded);
SuperpoweredShortIntToFloat(songIntBuffer, songFloatBuffer, samplesDecoded);
//setup mixer inputs
mixerInputs_[0] = voiceFloatBuffer;
mixerInputs_[1] = songFloatBuffer;
mixerInputs_[2] = NULL;
mixerInputs_[3] = NULL;
// setup mixer outputs, might have two separate outputs (L/R) if second not null
mixerOutputs_[0] = outputFloatBuffer;
mixerOutputs_[1] = NULL;
mixer->process(mixerInputs_, mixerOutputs_, inputLevels_, outputLevels_, NULL, NULL, samplesDecoded);
// Convert the PCM samples from 32-bit floating point to 16-bit integer.
SuperpoweredFloatToShortInt(outputFloatBuffer, outputIntBuffer, samplesDecoded);
// Write the audio to disk.
fwrite(outputIntBuffer, 1, samplesDecoded * 4, fd);
// songDecoder->seek(voiceDecoder->samplePosition, true);
}
// Cleanup.
closeWAV(fd);
delete voiceDecoder;
delete songDecoder;
delete mixer;
free(voiceIntBuffer);
free(voiceFloatBuffer);
free(songIntBuffer);
free(songFloatBuffer);
free(outputFloatBuffer);
free(outputIntBuffer);
}
Thanks in advance!
You need to match the sample rates using the SuperpoweredResampler class. You'll also need some circular buffer for both inputs, because the available number of samples will not match in many cases.