Search code examples
javaandroidkotlinandroid-camerax

Android Camera X ImageAnalysis image plane buffers size (limit) does not match image size


Issue

This is a general question regarding ImageAnalysis use case of camera-x but I will use a slightly modified version of this codelab as an example to illustrate the issue I'm seeing. I'm seeing a mismatch between the image dimensions (image.height * image.width) and the associated ByteBuffer size as measured by its limit and or capacity. I would expect them to be the same and mapping one pixel of the image to a single value in the ByteBuffer. This does not appear to be the case. Hoping someone can clarify if this is a bug, and if not, how to interpret this mismatch.

Details

On step 6 (Image Analysis) of the codelab they provide a subclass for their luminosity analyzer:

package jp.oist.cameraxcodelab

import androidx.appcompat.app.AppCompatActivity
import android.os.Bundle
import android.Manifest
import android.content.pm.PackageManager
import android.net.Uri
import android.util.Log
import android.util.Size
import android.widget.Toast
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import java.util.concurrent.Executors
import androidx.camera.core.*
import androidx.camera.lifecycle.ProcessCameraProvider
import kotlinx.android.synthetic.main.activity_main.*
import java.io.File
import java.nio.ByteBuffer
import java.text.SimpleDateFormat
import java.util.*
import java.util.concurrent.ExecutorService
typealias LumaListener = (luma: Double) -> Unit

class MainActivity : AppCompatActivity() {
    private var imageCapture: ImageCapture? = null

    private lateinit var outputDirectory: File
    private lateinit var cameraExecutor: ExecutorService

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        // Request camera permissions
        if (allPermissionsGranted()) {
            startCamera()
        } else {
            ActivityCompat.requestPermissions(
                    this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS)
        }

        // Set up the listener for take photo button
        camera_capture_button.setOnClickListener { takePhoto() }

        outputDirectory = getOutputDirectory()

        cameraExecutor = Executors.newSingleThreadExecutor()
    }

    private fun takePhoto() {
        // Get a stable reference of the modifiable image capture use case
        val imageCapture = imageCapture ?: return

        // Create time-stamped output file to hold the image
        val photoFile = File(
                outputDirectory,
                SimpleDateFormat(FILENAME_FORMAT, Locale.US
                ).format(System.currentTimeMillis()) + ".jpg")

        // Create output options object which contains file + metadata
        val outputOptions = ImageCapture.OutputFileOptions.Builder(photoFile).build()

        // Set up image capture listener, which is triggered after photo has
        // been taken
        imageCapture.takePicture(
                outputOptions, ContextCompat.getMainExecutor(this), object : ImageCapture.OnImageSavedCallback {
            override fun onError(exc: ImageCaptureException) {
                Log.e(TAG, "Photo capture failed: ${exc.message}", exc)
            }

            override fun onImageSaved(output: ImageCapture.OutputFileResults) {
                val savedUri = Uri.fromFile(photoFile)
                val msg = "Photo capture succeeded: $savedUri"
                Toast.makeText(baseContext, msg, Toast.LENGTH_SHORT).show()
                Log.d(TAG, msg)
            }
        })
    }

    private fun startCamera() {
        val cameraProviderFuture = ProcessCameraProvider.getInstance(this)

        cameraProviderFuture.addListener(Runnable {
            // Used to bind the lifecycle of cameras to the lifecycle owner
            val cameraProvider: ProcessCameraProvider = cameraProviderFuture.get()

            // Preview
            val preview = Preview.Builder()
                    .build()
                    .also {
                        it.setSurfaceProvider(viewFinder.createSurfaceProvider())
                    }

            imageCapture = ImageCapture.Builder()
                    .build()

            val imageAnalyzer = ImageAnalysis.Builder()
                    .setTargetResolution(Size(480, 640)) // I added this line
                    .build()
                    .also {
                        it.setAnalyzer(cameraExecutor, LuminosityAnalyzer { luma ->
//                            Log.d(TAG, "Average luminosity: $luma")
                        })
                    }

            // Select back camera as a default
            val cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA

            try {
                // Unbind use cases before rebinding
                cameraProvider.unbindAll()

                // Bind use cases to camera
                cameraProvider.bindToLifecycle(
                        this, cameraSelector, preview, imageCapture, imageAnalyzer)

            } catch(exc: Exception) {
                Log.e(TAG, "Use case binding failed", exc)
            }

        }, ContextCompat.getMainExecutor(this))
    }
    
    private fun allPermissionsGranted() = REQUIRED_PERMISSIONS.all {
        ContextCompat.checkSelfPermission(
                baseContext, it) == PackageManager.PERMISSION_GRANTED
    }

    private fun getOutputDirectory(): File {
        val mediaDir = externalMediaDirs.firstOrNull()?.let {
            File(it, resources.getString(R.string.app_name)).apply { mkdirs() } }
        return if (mediaDir != null && mediaDir.exists())
            mediaDir else filesDir
    }

    override fun onDestroy() {
        super.onDestroy()
        cameraExecutor.shutdown()
    }

    companion object {
        private const val TAG = "CameraXBasic"
        private const val FILENAME_FORMAT = "yyyy-MM-dd-HH-mm-ss-SSS"
        private const val REQUEST_CODE_PERMISSIONS = 10
        private val REQUIRED_PERMISSIONS = arrayOf(Manifest.permission.CAMERA)
    }

    override fun onRequestPermissionsResult(
            requestCode: Int, permissions: Array<String>, grantResults:
            IntArray) {
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera()
            } else {
                Toast.makeText(this,
                        "Permissions not granted by the user.",
                        Toast.LENGTH_SHORT).show()
                finish()
            }
        }
    }

    private class LuminosityAnalyzer(private val listener: LumaListener) : ImageAnalysis.Analyzer {

        private fun ByteBuffer.toByteArray(): ByteArray {
            rewind()    // Rewind the buffer to zero
            val data = ByteArray(remaining())
            get(data)   // Copy the buffer into a byte array
            return data // Return the byte array
        }

        override fun analyze(image: ImageProxy) {

            val buffer = image.planes[0].buffer
            for ((index,plane) in image.planes.withIndex()){
                Log.i("analyzer", "Plane: $index" + " H: " + image.height + " W: " + 
                        image.width + " HxW: " +  image.height * image.width + " buffer.limit: " + 
                        buffer.limit() + " buffer.cap: " + buffer.capacity() + buffer.get())
            }
            val data = buffer.toByteArray()
            val pixels = data.map { it.toInt() and 0xFF }
            val luma = pixels.average()

            listener(luma)

            image.close()
        }
    }

}

I am trying to pass the array data from the planes externally, so I'm interested in the var data defined in LuminosityAnalyzer.analyze().

Expectation

I wanted to check the dimensions of the data array and so added the log statement you see after the init of val buffer. For the default resolution I'm getting this in the log:

Plane: 0 H: 480 W: 640 HxW: 307200 buffer.limit: 307200 buffer.cap: 3072005

Plane: 1 H: 480 W: 640 HxW: 307200 buffer.limit: 307200 buffer.cap: 3072003

Plane: 2 H: 480 W: 640 HxW: 307200 buffer.limit: 307200 buffer.cap: 3072003

This is what I would expect. The H*W = buffer.limit. The buffer represents the pixels of the image in the particular plane.

Unexpected Result

If I change the resolution by setting up the imageAnalyzer with the setTargetResolution() method, I get strange results. For example, if I set this to setTargetResolution(144, 176) I get the following log:

Plane: 0 H: 144 W: 176 HxW: 25344 buffer.limit: 27632 buffer.cap: 276324

Plane: 1 H: 144 W: 176 HxW: 25344 buffer.limit: 27632 buffer.cap: 276324

Plane: 2 H: 144 W: 176 HxW: 25344 buffer.limit: 27632 buffer.cap: 276322

Note the different size of the image vs the buffer limit and capacity.

A few other examples for Plane 0 (for brevity):

Plane: 0 H: 288 W: 352 HxW: 101376 buffer.limit: 110560 buffer.cap: 1105604

Plane: 0 H: 600 W: 800 HxW: 480000 buffer.limit: 499168 buffer.cap: 4991685

Plane: 0 H: 960 W: 1280 HxW: 1228800 buffer.limit: 1228800 buffer.cap: 12288004

Does this have something to do with sensor size vs standard image sizes not matching up? Should I expect the remaining entries in the buffer to be zeros or otherwise meaningless?

I was originally not running this in Kotlin, but in Java, and was getting even stranger results there. If you log the image size and the buffer limit for each of the three planes, you get limits both larger and smaller than the image size for different layers:

Plane: 0 width: 176 height: 144 WxH: 25344 buffer.limit: 27632

Plane: 1 width: 176 height: 144 WxH: 25344 buffer.limit: 13807

Plane: 2 width: 176 height: 144 WxH: 25344 buffer.limit: 13807

For whatever reason in Kotlin the limit remains the same between planes.

How should I interpret this? Is the image being padded in Plane 0 and cropped in planes 1 and 2? Or is this a bug?

For reference the manifest, layout file, and build.gradle files are copied below (should be same as CodeLab) At the end I've also included the Java version of the MainActivity that produces the mismatch in buffer limits between planes. Bonus points if you can tell me why ByteBuffer.array() hangs and why I have to use ByteBuffer.get() instead:

AndroidManifest:

<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="jp.oist.cameraxcodelab">

    <uses-feature android:name="android.hardware.camera.any" />
    <uses-permission android:name="android.permission.CAMERA" />

    <application
        android:allowBackup="true"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.CameraXCodeLab">
        <activity android:name=".MainActivity">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

activity_main.xml

<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout
    xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <Button
        android:id="@+id/camera_capture_button"
        android:layout_width="100dp"
        android:layout_height="100dp"
        android:layout_marginBottom="50dp"
        android:scaleType="fitCenter"
        android:text="Take Photo"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintBottom_toBottomOf="parent"
        android:elevation="2dp" />

    <androidx.camera.view.PreviewView
        android:id="@+id/viewFinder"
        android:layout_width="match_parent"
        android:layout_height="match_parent" />

</androidx.constraintlayout.widget.ConstraintLayout>

build.gradle(:app)

plugins {
    id 'com.android.application'
    id 'kotlin-android'
    id 'kotlin-android-extensions'
}

android {
    compileSdkVersion 30
    buildToolsVersion "30.0.2"

    defaultConfig {
        applicationId "jp.oist.cameraxcodelab"
        minSdkVersion 21
        targetSdkVersion 30
        versionCode 1
        versionName "1.0"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
    implementation 'androidx.core:core-ktx:1.2.0'
    implementation 'androidx.appcompat:appcompat:1.2.0'
    implementation 'com.google.android.material:material:1.2.1'
    implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
    testImplementation 'junit:junit:4.+'
    androidTestImplementation 'androidx.test.ext:junit:1.1.2'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
    def camerax_version = "1.0.0-beta07"
// CameraX core library using camera2 implementation
    implementation "androidx.camera:camera-camera2:$camerax_version"
// CameraX Lifecycle Library
    implementation "androidx.camera:camera-lifecycle:$camerax_version"
// CameraX View class
    implementation "androidx.camera:camera-view:1.0.0-alpha14"

}

Java version of MainActivity

package jp.oist.abcvlib.camera;

import android.Manifest;
import android.content.pm.PackageManager;
import android.media.Image;
import android.os.Bundle;
import android.util.Log;
import android.util.Size;
import android.widget.Toast;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.Camera;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageProxy;
import androidx.camera.core.Preview;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.camera.view.PreviewView;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;
import androidx.lifecycle.LifecycleOwner;

import com.google.common.util.concurrent.ListenableFuture;

import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.util.Arrays;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;

public class MainActivity extends AppCompatActivity implements LifecycleOwner {

    private static final int REQUEST_CODE_PERMISSIONS = 10;
    private static final String[] REQUIRED_PERMISSIONS = { Manifest.permission.CAMERA };

    private ListenableFuture<ProcessCameraProvider> mCameraProviderFuture;
    private PreviewView mPreviewView;

    private ExecutorService analysisExecutor;


    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        mPreviewView = findViewById(R.id.preview_view);

        // Request camera permissions
        if (allPermissionsGranted()) {
            startCamera();
        } else {
            ActivityCompat.requestPermissions(
                    this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }

        int threadPoolSize = 8;
        analysisExecutor = new ScheduledThreadPoolExecutor(threadPoolSize);
    }

    private void bindAll(@NonNull ProcessCameraProvider cameraProvider) {
        Preview preview = new Preview.Builder().build();
        CameraSelector cameraSelector = new CameraSelector.Builder()
                .requireLensFacing(CameraSelector.LENS_FACING_FRONT)
                .build();

        ImageAnalysis imageAnalysis =
                new ImageAnalysis.Builder()
                        .setTargetResolution(new Size(10, 10))
                        .setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
                        .build();

        imageAnalysis.setAnalyzer(analysisExecutor, new ImageAnalysis.Analyzer() {
            @Override
            @androidx.camera.core.ExperimentalGetImage
            public void analyze(@NonNull ImageProxy imageProxy) {
                Image image = imageProxy.getImage();
                if (image != null) {
                    int width = image.getWidth();
                    int height = image.getHeight();
                    byte[] frame = new byte[width * height];
                    Image.Plane[] planes = image.getPlanes();
                    int idx = 0;
                    for (Image.Plane plane : planes){
                        ByteBuffer frameBuffer = plane.getBuffer();
                        int n = frameBuffer.capacity();
                        Log.i("analyzer", "Plane: " + idx + " width: " + width + " height: " + height + " WxH: " + width*height + " buffer.limit: " + n);
                        frameBuffer.rewind();
                        frame = new byte[n];
                        frameBuffer.get(frame);
                        idx++;
                    }
                }
                imageProxy.close();
            }
        });

        Camera camera = cameraProvider.bindToLifecycle(this, cameraSelector, preview, imageAnalysis);
        preview.setSurfaceProvider(mPreviewView.getSurfaceProvider());
    }

    private void startCamera() {
        mPreviewView.post(() -> {
            mCameraProviderFuture = ProcessCameraProvider.getInstance(this);
            mCameraProviderFuture.addListener(() -> {
                try {
                    ProcessCameraProvider cameraProvider = mCameraProviderFuture.get();
                    bindAll(cameraProvider);
                } catch (ExecutionException | InterruptedException e) {
                    // No errors need to be handled for this Future.
                    // This should never be reached.
                }
            }, ContextCompat.getMainExecutor(this));
        });
    }

    /**
     * Process result from permission request dialog box, has the request
     * been granted? If yes, start Camera. Otherwise display a toast
     */
    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        // super.onRequestPermissionsResult(requestCode, permissions, grantResults);
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera();
            } else {
                Toast.makeText(this,
                        "Permissions not granted by the user.",
                        Toast.LENGTH_SHORT).show();
                finish();
            }
        }
    }

    /**
     * Check if all permission specified in the manifest have been granted
     */
    private boolean allPermissionsGranted() {
        for (String permission : REQUIRED_PERMISSIONS) {
            if (ContextCompat.checkSelfPermission(getBaseContext(), permission) != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }
}

Solution

  • Please take a look at the details of the ImageProxy.PlaneProxy class; they planes are not just packed image data. They may have both row and pixel stride.

    Row stride is padding between two adjacent rows of image data. Pixel stride is padding between two adjacent pixels.

    Also, planes 1 and 2 in a YUV_420_888 image have half the pixel count of plane 0; the reason you're getting the same size is likely because of pixel stride being 2.

    For some resolutions, the stride may be equal to width (usually the processing hardware has some constraint like the row stride has to be a multiple of 16 or 32 bytes), but it may not for all.