The following code is an attempt to use AAC on Android to encode a floating point sine tone signal into an mp4 file. However, it fails. Either I get a distorted tone or nothing at all. The length of the mp4 file is also wrong.
I would appreciate some help solving this problem. Thanks!
public class MyAudioTest {
MediaMuxer mux = null;
int SAMPLING_RATE = 44100;
int BUFFER_SIZE = 4096;
int timeOutMicroSeconds = 10000;
double sampleRateHz = 44100;
double pha = 0.0;
double deltapha = 2.0 * Math.PI * 440.0 / 44100;
double maxpha = 2.0 * Math.PI;
int SIZE_OF_FLOAT = 4;
int NUM_SAMPLES = 441;
int numBlocks = 0;
private byte[] GetFloatArrayAsByteArray(float[] sampleBlock)
{
ByteBuffer buffer = ByteBuffer.allocate(sampleBlock.length * SIZE_OF_FLOAT);
buffer.order(ByteOrder.nativeOrder());
buffer.asFloatBuffer().put(sampleBlock);
return buffer.array();
}
private short NextSineSample() {
float val = (float) Math.sin(pha);
pha = pha + deltapha;
if (pha >= maxpha) pha = pha - maxpha;
return val;
}
public void AudioEncoderTest() {
Log.i("AUDIO", "Starting audio encoder test");
try {
mux = new MediaMuxer("/mnt/sdcard/testing.mp4", MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
MediaFormat outputFormat = MediaFormat.createAudioFormat(MediaFormat.MIMETYPE_AUDIO_AAC, SAMPLING_RATE, 1);
outputFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
outputFormat.setInteger(MediaFormat.KEY_BIT_RATE, 96000);
outputFormat.setString(MediaFormat.KEY_MIME, MediaFormat.MIMETYPE_AUDIO_AAC);
outputFormat.setInteger(MediaFormat.KEY_CHANNEL_COUNT, 1);
outputFormat.setInteger(MediaFormat.KEY_SAMPLE_RATE, SAMPLING_RATE);
outputFormat.setInteger(MediaFormat.KEY_PCM_ENCODING, AudioFormat.ENCODING_PCM_FLOAT);
MediaCodec codec = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_AUDIO_AAC);
codec.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
codec.start();
ByteBuffer[] codecInputBuffers = codec.getInputBuffers();
ByteBuffer[] codecOutputBuffers = codec.getOutputBuffers();
MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();
byte[] tempBuffer;
float[] sampleBlock = new float[NUM_SAMPLES];
boolean hasMoreData = true;
double presentationTimeUs = 0;
int audioTrackIdx = 0;
int totalBytesRead = 0;
int totalSamplesProcessed = 0;
do {
int inputBufIndex = 0;
while (inputBufIndex != -1 && hasMoreData) {
inputBufIndex = codec.dequeueInputBuffer(timeOutMicroSeconds);
if (inputBufIndex >= 0) {
ByteBuffer dstBuf = codecInputBuffers[inputBufIndex];
dstBuf.clear();
for (int i = 0; i < NUM_SAMPLES; i++) {
sampleBlock[i] = NextSineSample();
}
numBlocks++;
int bytesRead = NUM_SAMPLES * SIZE_OF_FLOAT;
tempBuffer = GetFloatArrayAsByteArray(sampleBlock);
totalSamplesProcessed = totalSamplesProcessed + NUM_SAMPLES;
if (totalSamplesProcessed >= SAMPLING_RATE * 2) {
bytesRead = -1;
}
if (bytesRead == -1) { // -1 implies EOS
hasMoreData = false;
Log.i("AUDIO", "No more input for encoder");
codec.queueInputBuffer(inputBufIndex, 0, 0, (long) presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
} else {
totalBytesRead += bytesRead;
dstBuf.put(tempBuffer, 0, bytesRead);
Log.i("AUDIO", "Encoding data. Block: " + numBlocks);
codec.queueInputBuffer(inputBufIndex, 0, bytesRead, (long) presentationTimeUs, 0);
presentationTimeUs = 1000000l * (totalBytesRead / SIZE_OF_FLOAT) / SAMPLING_RATE;
Log.i("AUDIOTIME", "Block: " + numBlocks + " | Presentation time: " + presentationTimeUs);
}
}
}
// Drain audio
int outputBufIndex = 0;
while (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER)
{
outputBufIndex = codec.dequeueOutputBuffer(outBuffInfo, timeOutMicroSeconds);
if (outputBufIndex >= 0)
{
ByteBuffer encodedData = codecOutputBuffers[outputBufIndex];
encodedData.position(outBuffInfo.offset);
encodedData.limit(outBuffInfo.offset + outBuffInfo.size);
if ((outBuffInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0 && outBuffInfo.size != 0) {
codec.releaseOutputBuffer(outputBufIndex, false);
} else {
Log.i("AUDIO", "Writing encoded data to mux");
mux.writeSampleData(audioTrackIdx, codecOutputBuffers[outputBufIndex], outBuffInfo);
codec.releaseOutputBuffer(outputBufIndex, false);
}
} else if (outputBufIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
outputFormat = codec.getOutputFormat();
Log.v("AUDIO", "Output format changed - " + outputFormat);
audioTrackIdx = mux.addTrack(outputFormat);
Log.i("AUDIO", "Start mux");
mux.start();
} else if (outputBufIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
Log.e("AUDIO", "Output buffers changed during encode!");
} else if (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
Log.e("AUDIO", "Unknown return code from dequeueOutputBuffer - " + outputBufIndex);
}
}
} while (outBuffInfo.flags != MediaCodec.BUFFER_FLAG_END_OF_STREAM);
mux.stop();
mux.release();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Log.i("AUDIO", "Finished audio encoder test. Blocks: " + numBlocks);
}
}
Maybe endian matters. The byte-arrays to be queued in MediaCodec's input-buffer have to store all the PCM sample values in native byte order
as follows.
//private byte[] GetFloatArrayAsByteArray(float[] samples) throws IOException {
// ByteArrayOutputStream bas = new ByteArrayOutputStream();
// DataOutputStream ds = new DataOutputStream(bas);
// for (float f : samples)
// ds.writeFloat(f);
// return bas.toByteArray();
//}
private byte[] GetFloatArrayAsByteArray(float[] sampleBlock)
{
ByteBuffer buffer = ByteBuffer.allocate(sampleBlock.length * SIZE_OF_FLOAT);
buffer.order(ByteOrder.nativeOrder());
buffer.asFloatBuffer().put(sampleBlock);
return buffer.array();
}
And since valid range of PCM_FLOAT sample-value is between -1.0F and 1.0F, you would need to normalize the values.
float[] sampleBlock = new float[NUM_SAMPLES];
:
for (int i = 0; i < NUM_SAMPLES; i++) {
sampleBlock[i] = NextSineSample() / 32767.0F;
}
It's not clear how the length of the mp4 file can get wrong. If it plays the tone for 2 sec properly, it would be ok.