This is a general question regarding ImageAnalysis use case of camera-x but I will use a slightly modified version of this codelab as an example to illustrate the issue I'm seeing. I'm seeing a mismatch between the image dimensions (image.height * image.width) and the associated ByteBuffer size as measured by its limit and or capacity. I would expect them to be the same and mapping one pixel of the image to a single value in the ByteBuffer. This does not appear to be the case. Hoping someone can clarify if this is a bug, and if not, how to interpret this mismatch.
On step 6 (Image Analysis) of the codelab they provide a subclass for their luminosity analyzer:
package jp.oist.cameraxcodelab
import androidx.appcompat.app.AppCompatActivity
import android.os.Bundle
import android.Manifest
import android.content.pm.PackageManager
import android.net.Uri
import android.util.Log
import android.util.Size
import android.widget.Toast
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import java.util.concurrent.Executors
import androidx.camera.core.*
import androidx.camera.lifecycle.ProcessCameraProvider
import kotlinx.android.synthetic.main.activity_main.*
import java.io.File
import java.nio.ByteBuffer
import java.text.SimpleDateFormat
import java.util.*
import java.util.concurrent.ExecutorService
typealias LumaListener = (luma: Double) -> Unit
class MainActivity : AppCompatActivity() {
private var imageCapture: ImageCapture? = null
private lateinit var outputDirectory: File
private lateinit var cameraExecutor: ExecutorService
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
// Request camera permissions
if (allPermissionsGranted()) {
startCamera()
} else {
ActivityCompat.requestPermissions(
this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS)
}
// Set up the listener for take photo button
camera_capture_button.setOnClickListener { takePhoto() }
outputDirectory = getOutputDirectory()
cameraExecutor = Executors.newSingleThreadExecutor()
}
private fun takePhoto() {
// Get a stable reference of the modifiable image capture use case
val imageCapture = imageCapture ?: return
// Create time-stamped output file to hold the image
val photoFile = File(
outputDirectory,
SimpleDateFormat(FILENAME_FORMAT, Locale.US
).format(System.currentTimeMillis()) + ".jpg")
// Create output options object which contains file + metadata
val outputOptions = ImageCapture.OutputFileOptions.Builder(photoFile).build()
// Set up image capture listener, which is triggered after photo has
// been taken
imageCapture.takePicture(
outputOptions, ContextCompat.getMainExecutor(this), object : ImageCapture.OnImageSavedCallback {
override fun onError(exc: ImageCaptureException) {
Log.e(TAG, "Photo capture failed: ${exc.message}", exc)
}
override fun onImageSaved(output: ImageCapture.OutputFileResults) {
val savedUri = Uri.fromFile(photoFile)
val msg = "Photo capture succeeded: $savedUri"
Toast.makeText(baseContext, msg, Toast.LENGTH_SHORT).show()
Log.d(TAG, msg)
}
})
}
private fun startCamera() {
val cameraProviderFuture = ProcessCameraProvider.getInstance(this)
cameraProviderFuture.addListener(Runnable {
// Used to bind the lifecycle of cameras to the lifecycle owner
val cameraProvider: ProcessCameraProvider = cameraProviderFuture.get()
// Preview
val preview = Preview.Builder()
.build()
.also {
it.setSurfaceProvider(viewFinder.createSurfaceProvider())
}
imageCapture = ImageCapture.Builder()
.build()
val imageAnalyzer = ImageAnalysis.Builder()
.setTargetResolution(Size(480, 640)) // I added this line
.build()
.also {
it.setAnalyzer(cameraExecutor, LuminosityAnalyzer { luma ->
// Log.d(TAG, "Average luminosity: $luma")
})
}
// Select back camera as a default
val cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA
try {
// Unbind use cases before rebinding
cameraProvider.unbindAll()
// Bind use cases to camera
cameraProvider.bindToLifecycle(
this, cameraSelector, preview, imageCapture, imageAnalyzer)
} catch(exc: Exception) {
Log.e(TAG, "Use case binding failed", exc)
}
}, ContextCompat.getMainExecutor(this))
}
private fun allPermissionsGranted() = REQUIRED_PERMISSIONS.all {
ContextCompat.checkSelfPermission(
baseContext, it) == PackageManager.PERMISSION_GRANTED
}
private fun getOutputDirectory(): File {
val mediaDir = externalMediaDirs.firstOrNull()?.let {
File(it, resources.getString(R.string.app_name)).apply { mkdirs() } }
return if (mediaDir != null && mediaDir.exists())
mediaDir else filesDir
}
override fun onDestroy() {
super.onDestroy()
cameraExecutor.shutdown()
}
companion object {
private const val TAG = "CameraXBasic"
private const val FILENAME_FORMAT = "yyyy-MM-dd-HH-mm-ss-SSS"
private const val REQUEST_CODE_PERMISSIONS = 10
private val REQUIRED_PERMISSIONS = arrayOf(Manifest.permission.CAMERA)
}
override fun onRequestPermissionsResult(
requestCode: Int, permissions: Array<String>, grantResults:
IntArray) {
if (requestCode == REQUEST_CODE_PERMISSIONS) {
if (allPermissionsGranted()) {
startCamera()
} else {
Toast.makeText(this,
"Permissions not granted by the user.",
Toast.LENGTH_SHORT).show()
finish()
}
}
}
private class LuminosityAnalyzer(private val listener: LumaListener) : ImageAnalysis.Analyzer {
private fun ByteBuffer.toByteArray(): ByteArray {
rewind() // Rewind the buffer to zero
val data = ByteArray(remaining())
get(data) // Copy the buffer into a byte array
return data // Return the byte array
}
override fun analyze(image: ImageProxy) {
val buffer = image.planes[0].buffer
for ((index,plane) in image.planes.withIndex()){
Log.i("analyzer", "Plane: $index" + " H: " + image.height + " W: " +
image.width + " HxW: " + image.height * image.width + " buffer.limit: " +
buffer.limit() + " buffer.cap: " + buffer.capacity() + buffer.get())
}
val data = buffer.toByteArray()
val pixels = data.map { it.toInt() and 0xFF }
val luma = pixels.average()
listener(luma)
image.close()
}
}
}
I am trying to pass the array data from the planes externally, so I'm interested in the var data
defined in LuminosityAnalyzer.analyze()
.
I wanted to check the dimensions of the data array and so added the log statement you see after the init of val buffer. For the default resolution I'm getting this in the log:
Plane: 0 H: 480 W: 640 HxW: 307200 buffer.limit: 307200 buffer.cap: 3072005
Plane: 1 H: 480 W: 640 HxW: 307200 buffer.limit: 307200 buffer.cap: 3072003
Plane: 2 H: 480 W: 640 HxW: 307200 buffer.limit: 307200 buffer.cap: 3072003
This is what I would expect. The H*W = buffer.limit. The buffer represents the pixels of the image in the particular plane.
If I change the resolution by setting up the imageAnalyzer with the setTargetResolution() method, I get strange results. For example, if I set this to setTargetResolution(144, 176)
I get the following log:
Plane: 0 H: 144 W: 176 HxW: 25344 buffer.limit: 27632 buffer.cap: 276324
Plane: 1 H: 144 W: 176 HxW: 25344 buffer.limit: 27632 buffer.cap: 276324
Plane: 2 H: 144 W: 176 HxW: 25344 buffer.limit: 27632 buffer.cap: 276322
Note the different size of the image vs the buffer limit and capacity.
A few other examples for Plane 0 (for brevity):
Plane: 0 H: 288 W: 352 HxW: 101376 buffer.limit: 110560 buffer.cap: 1105604
Plane: 0 H: 600 W: 800 HxW: 480000 buffer.limit: 499168 buffer.cap: 4991685
Plane: 0 H: 960 W: 1280 HxW: 1228800 buffer.limit: 1228800 buffer.cap: 12288004
Does this have something to do with sensor size vs standard image sizes not matching up? Should I expect the remaining entries in the buffer to be zeros or otherwise meaningless?
I was originally not running this in Kotlin, but in Java, and was getting even stranger results there. If you log the image size and the buffer limit for each of the three planes, you get limits both larger and smaller than the image size for different layers:
Plane: 0 width: 176 height: 144 WxH: 25344 buffer.limit: 27632
Plane: 1 width: 176 height: 144 WxH: 25344 buffer.limit: 13807
Plane: 2 width: 176 height: 144 WxH: 25344 buffer.limit: 13807
For whatever reason in Kotlin the limit remains the same between planes.
How should I interpret this? Is the image being padded in Plane 0 and cropped in planes 1 and 2? Or is this a bug?
For reference the manifest, layout file, and build.gradle files are copied below (should be same as CodeLab) At the end I've also included the Java version of the MainActivity that produces the mismatch in buffer limits between planes. Bonus points if you can tell me why ByteBuffer.array() hangs and why I have to use ByteBuffer.get() instead:
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="jp.oist.cameraxcodelab">
<uses-feature android:name="android.hardware.camera.any" />
<uses-permission android:name="android.permission.CAMERA" />
<application
android:allowBackup="true"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.CameraXCodeLab">
<activity android:name=".MainActivity">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout
xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
xmlns:app="http://schemas.android.com/apk/res-auto"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<Button
android:id="@+id/camera_capture_button"
android:layout_width="100dp"
android:layout_height="100dp"
android:layout_marginBottom="50dp"
android:scaleType="fitCenter"
android:text="Take Photo"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintBottom_toBottomOf="parent"
android:elevation="2dp" />
<androidx.camera.view.PreviewView
android:id="@+id/viewFinder"
android:layout_width="match_parent"
android:layout_height="match_parent" />
</androidx.constraintlayout.widget.ConstraintLayout>
plugins {
id 'com.android.application'
id 'kotlin-android'
id 'kotlin-android-extensions'
}
android {
compileSdkVersion 30
buildToolsVersion "30.0.2"
defaultConfig {
applicationId "jp.oist.cameraxcodelab"
minSdkVersion 21
targetSdkVersion 30
versionCode 1
versionName "1.0"
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = '1.8'
}
}
dependencies {
implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
implementation 'androidx.core:core-ktx:1.2.0'
implementation 'androidx.appcompat:appcompat:1.2.0'
implementation 'com.google.android.material:material:1.2.1'
implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
testImplementation 'junit:junit:4.+'
androidTestImplementation 'androidx.test.ext:junit:1.1.2'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
def camerax_version = "1.0.0-beta07"
// CameraX core library using camera2 implementation
implementation "androidx.camera:camera-camera2:$camerax_version"
// CameraX Lifecycle Library
implementation "androidx.camera:camera-lifecycle:$camerax_version"
// CameraX View class
implementation "androidx.camera:camera-view:1.0.0-alpha14"
}
package jp.oist.abcvlib.camera;
import android.Manifest;
import android.content.pm.PackageManager;
import android.media.Image;
import android.os.Bundle;
import android.util.Log;
import android.util.Size;
import android.widget.Toast;
import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.Camera;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageProxy;
import androidx.camera.core.Preview;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.camera.view.PreviewView;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;
import androidx.lifecycle.LifecycleOwner;
import com.google.common.util.concurrent.ListenableFuture;
import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.util.Arrays;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
public class MainActivity extends AppCompatActivity implements LifecycleOwner {
private static final int REQUEST_CODE_PERMISSIONS = 10;
private static final String[] REQUIRED_PERMISSIONS = { Manifest.permission.CAMERA };
private ListenableFuture<ProcessCameraProvider> mCameraProviderFuture;
private PreviewView mPreviewView;
private ExecutorService analysisExecutor;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
mPreviewView = findViewById(R.id.preview_view);
// Request camera permissions
if (allPermissionsGranted()) {
startCamera();
} else {
ActivityCompat.requestPermissions(
this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
}
int threadPoolSize = 8;
analysisExecutor = new ScheduledThreadPoolExecutor(threadPoolSize);
}
private void bindAll(@NonNull ProcessCameraProvider cameraProvider) {
Preview preview = new Preview.Builder().build();
CameraSelector cameraSelector = new CameraSelector.Builder()
.requireLensFacing(CameraSelector.LENS_FACING_FRONT)
.build();
ImageAnalysis imageAnalysis =
new ImageAnalysis.Builder()
.setTargetResolution(new Size(10, 10))
.setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
.build();
imageAnalysis.setAnalyzer(analysisExecutor, new ImageAnalysis.Analyzer() {
@Override
@androidx.camera.core.ExperimentalGetImage
public void analyze(@NonNull ImageProxy imageProxy) {
Image image = imageProxy.getImage();
if (image != null) {
int width = image.getWidth();
int height = image.getHeight();
byte[] frame = new byte[width * height];
Image.Plane[] planes = image.getPlanes();
int idx = 0;
for (Image.Plane plane : planes){
ByteBuffer frameBuffer = plane.getBuffer();
int n = frameBuffer.capacity();
Log.i("analyzer", "Plane: " + idx + " width: " + width + " height: " + height + " WxH: " + width*height + " buffer.limit: " + n);
frameBuffer.rewind();
frame = new byte[n];
frameBuffer.get(frame);
idx++;
}
}
imageProxy.close();
}
});
Camera camera = cameraProvider.bindToLifecycle(this, cameraSelector, preview, imageAnalysis);
preview.setSurfaceProvider(mPreviewView.getSurfaceProvider());
}
private void startCamera() {
mPreviewView.post(() -> {
mCameraProviderFuture = ProcessCameraProvider.getInstance(this);
mCameraProviderFuture.addListener(() -> {
try {
ProcessCameraProvider cameraProvider = mCameraProviderFuture.get();
bindAll(cameraProvider);
} catch (ExecutionException | InterruptedException e) {
// No errors need to be handled for this Future.
// This should never be reached.
}
}, ContextCompat.getMainExecutor(this));
});
}
/**
* Process result from permission request dialog box, has the request
* been granted? If yes, start Camera. Otherwise display a toast
*/
@Override
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
// super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == REQUEST_CODE_PERMISSIONS) {
if (allPermissionsGranted()) {
startCamera();
} else {
Toast.makeText(this,
"Permissions not granted by the user.",
Toast.LENGTH_SHORT).show();
finish();
}
}
}
/**
* Check if all permission specified in the manifest have been granted
*/
private boolean allPermissionsGranted() {
for (String permission : REQUIRED_PERMISSIONS) {
if (ContextCompat.checkSelfPermission(getBaseContext(), permission) != PackageManager.PERMISSION_GRANTED) {
return false;
}
}
return true;
}
}
Please take a look at the details of the ImageProxy.PlaneProxy class; they planes are not just packed image data. They may have both row and pixel stride.
Row stride is padding between two adjacent rows of image data. Pixel stride is padding between two adjacent pixels.
Also, planes 1 and 2 in a YUV_420_888 image have half the pixel count of plane 0; the reason you're getting the same size is likely because of pixel stride being 2.
For some resolutions, the stride may be equal to width (usually the processing hardware has some constraint like the row stride has to be a multiple of 16 or 32 bytes), but it may not for all.