Search code examples
androidmp4android-mediacodecaacmediamuxer

Android audio encoding fails


The following code is an attempt to use AAC on Android to encode a floating point sine tone signal into an mp4 file. However, it fails. Either I get a distorted tone or nothing at all. The length of the mp4 file is also wrong.

I would appreciate some help solving this problem. Thanks!

public class MyAudioTest {

    MediaMuxer mux = null;
    int SAMPLING_RATE = 44100;
    int BUFFER_SIZE = 4096;
    int timeOutMicroSeconds = 10000;

    double sampleRateHz =  44100;
    double pha = 0.0;
    double deltapha = 2.0 * Math.PI * 440.0 / 44100;
    double maxpha = 2.0 * Math.PI;
    int SIZE_OF_FLOAT = 4;
    int NUM_SAMPLES = 441;

    int numBlocks = 0;

        private byte[] GetFloatArrayAsByteArray(float[] sampleBlock)
{
    ByteBuffer buffer = ByteBuffer.allocate(sampleBlock.length * SIZE_OF_FLOAT);
    buffer.order(ByteOrder.nativeOrder());
    buffer.asFloatBuffer().put(sampleBlock);
    return buffer.array();
}


    private short NextSineSample() {
        float val = (float) Math.sin(pha);
        pha = pha + deltapha;
        if (pha >= maxpha) pha = pha - maxpha;
        return val;
    }

    public void AudioEncoderTest() {

        Log.i("AUDIO", "Starting audio encoder test");

        try {
            mux = new MediaMuxer("/mnt/sdcard/testing.mp4", MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);

            MediaFormat outputFormat = MediaFormat.createAudioFormat(MediaFormat.MIMETYPE_AUDIO_AAC, SAMPLING_RATE, 1);
            outputFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
            outputFormat.setInteger(MediaFormat.KEY_BIT_RATE, 96000);
            outputFormat.setString(MediaFormat.KEY_MIME, MediaFormat.MIMETYPE_AUDIO_AAC);
            outputFormat.setInteger(MediaFormat.KEY_CHANNEL_COUNT, 1);
            outputFormat.setInteger(MediaFormat.KEY_SAMPLE_RATE, SAMPLING_RATE);
            outputFormat.setInteger(MediaFormat.KEY_PCM_ENCODING, AudioFormat.ENCODING_PCM_FLOAT);

            MediaCodec codec = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_AUDIO_AAC);
            codec.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
            codec.start();


            ByteBuffer[] codecInputBuffers = codec.getInputBuffers();
            ByteBuffer[] codecOutputBuffers = codec.getOutputBuffers();

            MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();

            byte[] tempBuffer;
            float[] sampleBlock = new float[NUM_SAMPLES];

            boolean hasMoreData = true;
            double presentationTimeUs = 0;
            int audioTrackIdx = 0;
            int totalBytesRead = 0;
            int totalSamplesProcessed = 0;

            do {

                int inputBufIndex = 0;
                while (inputBufIndex != -1 && hasMoreData) {
                    inputBufIndex = codec.dequeueInputBuffer(timeOutMicroSeconds);

                    if (inputBufIndex >= 0) {
                        ByteBuffer dstBuf = codecInputBuffers[inputBufIndex];
                        dstBuf.clear();

                        for (int i = 0; i < NUM_SAMPLES; i++) {
                            sampleBlock[i] = NextSineSample();
                        }
                        numBlocks++;

                        int bytesRead = NUM_SAMPLES * SIZE_OF_FLOAT;
                        tempBuffer = GetFloatArrayAsByteArray(sampleBlock);

                        totalSamplesProcessed = totalSamplesProcessed + NUM_SAMPLES;
                        if (totalSamplesProcessed >= SAMPLING_RATE * 2) {
                            bytesRead = -1;
                        }

                        if (bytesRead == -1) { // -1 implies EOS
                            hasMoreData = false;
                            Log.i("AUDIO", "No more input for encoder");
                            codec.queueInputBuffer(inputBufIndex, 0, 0, (long) presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
                        } else {
                            totalBytesRead += bytesRead;
                            dstBuf.put(tempBuffer, 0, bytesRead);
                            Log.i("AUDIO", "Encoding data. Block: " + numBlocks);

                            codec.queueInputBuffer(inputBufIndex, 0, bytesRead, (long) presentationTimeUs, 0);
                            presentationTimeUs = 1000000l * (totalBytesRead / SIZE_OF_FLOAT) / SAMPLING_RATE;
                            Log.i("AUDIOTIME", "Block: " + numBlocks + " | Presentation time: " + presentationTimeUs);
                        }
                    }
                }

                // Drain audio
                int outputBufIndex = 0;
                while (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER)
                {
                    outputBufIndex = codec.dequeueOutputBuffer(outBuffInfo, timeOutMicroSeconds);
                    if (outputBufIndex >= 0)
                    {
                        ByteBuffer encodedData = codecOutputBuffers[outputBufIndex];
                        encodedData.position(outBuffInfo.offset);
                        encodedData.limit(outBuffInfo.offset + outBuffInfo.size);

                        if ((outBuffInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0 && outBuffInfo.size != 0) {
                            codec.releaseOutputBuffer(outputBufIndex, false);
                        } else {

                            Log.i("AUDIO", "Writing encoded data to mux");
                            mux.writeSampleData(audioTrackIdx, codecOutputBuffers[outputBufIndex], outBuffInfo);
                            codec.releaseOutputBuffer(outputBufIndex, false);
                        }
                    } else if (outputBufIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
                        outputFormat = codec.getOutputFormat();
                        Log.v("AUDIO", "Output format changed - " + outputFormat);
                        audioTrackIdx = mux.addTrack(outputFormat);
                        Log.i("AUDIO", "Start mux");

                        mux.start();
                    } else if (outputBufIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
                        Log.e("AUDIO", "Output buffers changed during encode!");
                    } else if (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
                        Log.e("AUDIO", "Unknown return code from dequeueOutputBuffer - " + outputBufIndex);
                    }
                }

            } while (outBuffInfo.flags != MediaCodec.BUFFER_FLAG_END_OF_STREAM);

            mux.stop();
            mux.release();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        Log.i("AUDIO", "Finished audio encoder test. Blocks: " + numBlocks);

    }
}

Solution

  • Maybe endian matters. The byte-arrays to be queued in MediaCodec's input-buffer have to store all the PCM sample values in native byte order as follows.

    //private byte[] GetFloatArrayAsByteArray(float[] samples) throws IOException {
    //    ByteArrayOutputStream bas = new ByteArrayOutputStream();
    //    DataOutputStream ds = new DataOutputStream(bas);
    //    for (float f : samples)
    //        ds.writeFloat(f);
    //    return bas.toByteArray();
    //}
    
    private byte[] GetFloatArrayAsByteArray(float[] sampleBlock)
    {
        ByteBuffer buffer = ByteBuffer.allocate(sampleBlock.length * SIZE_OF_FLOAT);
        buffer.order(ByteOrder.nativeOrder());
        buffer.asFloatBuffer().put(sampleBlock);
        return buffer.array();
    }
    

    And since valid range of PCM_FLOAT sample-value is between -1.0F and 1.0F, you would need to normalize the values.

     float[] sampleBlock = new float[NUM_SAMPLES];
     :
    
        for (int i = 0; i < NUM_SAMPLES; i++) {
             sampleBlock[i] = NextSineSample() / 32767.0F;
        }
    

    It's not clear how the length of the mp4 file can get wrong. If it plays the tone for 2 sec properly, it would be ok.