Fixing Color and Depth Misalignment in Offline 3D Replay

I'm developing an application in Unity using the Azure Kinect DK. The goal is to capture a short sequence of frames (color + depth), apply some offline noise removal on the depth data, then replay the frames as a 3D point cloud with color textures. However, the color and depth are not lining up correctly in the final replay: the texture appears shifted relative to the 3D geometry.

Current Issue / Question When replaying the point cloud, the color texture is noticeably shifted from the actual geometry. I believe this may be because I need to align RGB to depth more accurately, either at capture time or by adjusting the code in the replay stage.

I’ve attempted to apply an extrinsic offset (e.g., extrinsicTranslation = new Vector3(0.025f, 0f, 0f)) and flipping the textures, but it still doesn’t match perfectly.

Important: I need to keep the rest of the logic as-is; I only want to fix the alignment issue.

Any suggestions on how to correctly align the Azure Kinect’s color and depth for an offline reconstruction?

For example:

Should I perform alignment during capture using the Azure Kinect SDK’s built-in transformation APIs?
Are there any flips or orientation adjustments that I’m missing?

Below are the three main scripts involved:

Capture (captures a sequence of frames and saves them in RAW files)
Image Processing (applies a 3x3 median filter to the saved RAW depth files)
Replay (loads the saved frames, reconstructs the point cloud, and applies the color as texture) I suspect that if I correctly align the RGB image to the depth coordinates at the capture stage (or apply a more appropriate alignment transform during the replay), this problem might go away. Any advice on the best way to fix the alignment would be greatly appreciated.

CaptureScript

using UnityEngine;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using Microsoft.Azure.Kinect.Sensor;

public class KinectCaptureController : MonoBehaviour
{
    // ======== 撮影設定 ========
    public int captureDuration = 10;
    // OfflineReconstructionControllerのオブジェクト（Inspectorで設定）
    public GameObject offlineController;

    // 撮影したキャプチャを保持するリスト
    private List<Capture> capturedFrames = new List<Capture>();
    private Device kinectDevice = null;
    private DeviceConfiguration config;

    // ======== 初期化＆撮影開始 ========
    void Start()
    {
        Debug.Log("Azure Kinect初期化開始");

        // Kinectデバイスをオープン
        try
        {
            kinectDevice = Device.Open(0);
        }
        catch (System.Exception ex)
        {
            Debug.LogError("Azure Kinectのオープンに失敗: " + ex.Message);
            return;
        }

        // カメラ設定の構成
        config = new DeviceConfiguration
        {
            ColorFormat = ImageFormat.ColorBGRA32,
            ColorResolution = ColorResolution.R720p,
            DepthMode = DepthMode.NFOV_Unbinned,
            SynchronizedImagesOnly = true,
            CameraFPS = FPS.FPS30
        };

        // カメラを開始
        try
        {
            kinectDevice.StartCameras(config);
        }
        catch (System.Exception ex)
        {
            Debug.LogError("カメラ開始に失敗: " + ex.Message);
            return;
        }

        StartCoroutine(CaptureCoroutine());
        Debug.Log("撮影開始");
    }

    // ======== キャプチャの取得 ========
    IEnumerator CaptureCoroutine()
    {
        float startTime = Time.time;
        while (Time.time - startTime < captureDuration)
        {
            Capture cap = null;
            try
            {
                cap = kinectDevice.GetCapture();
            }
            catch (System.Exception ex)
            {
                Debug.LogWarning("キャプチャ取得中例外: " + ex.Message);
            }
            if (cap != null)
            {
                capturedFrames.Add(cap);
            }
            yield return null;
        }

        kinectDevice.StopCameras();
        Debug.Log("撮影完了。取得フレーム数: " + capturedFrames.Count);
        SaveCapturedFrames();
    }

    // ======== テクスチャの上下反転 ========
    Texture2D FlipTextureVertically(Texture2D original)
    {
        int width = original.width, height = original.height;
        Texture2D flipped = new Texture2D(width, height, original.format, false);
        Color[] origPixels = original.GetPixels();
        Color[] flippedPixels = new Color[origPixels.Length];

        for (int y = 0; y < height; y++)
        {
            for (int x = 0; x < width; x++)
            {
                flippedPixels[y * width + x] = origPixels[(height - 1 - y) * width + x];
            }
        }
        flipped.SetPixels(flippedPixels);
        flipped.Apply();
        return flipped;
    }

    // ======== 撮影データの保存 ========
    void SaveCapturedFrames()
    {
        string saveDir = Path.Combine(Application.persistentDataPath, "CapturedFrames");
        if (!Directory.Exists(saveDir))
        {
            Directory.CreateDirectory(saveDir);
        }

        for (int i = 0; i < capturedFrames.Count; i++)
        {
            Capture cap = capturedFrames[i];

            /* ----- カラー画像の保存 ----- */
            Image colorImage = cap.Color;
            int cw = colorImage.WidthPixels, ch = colorImage.HeightPixels;
            byte[] bgra = colorImage.Memory.ToArray();
            byte[] rgba = new byte[bgra.Length];
            // BGRA → RGBAの変換
            for (int j = 0; j < bgra.Length; j += 4)
            {
                rgba[j + 0] = bgra[j + 2];
                rgba[j + 1] = bgra[j + 1];
                rgba[j + 2] = bgra[j + 0];
                rgba[j + 3] = bgra[j + 3];
            }
            Texture2D colorTex = new Texture2D(cw, ch, TextureFormat.RGBA32, false);
            colorTex.LoadRawTextureData(rgba);
            colorTex.Apply();
            // 座標系補正のため、保存前に上下反転
            colorTex = FlipTextureVertically(colorTex);

            byte[] colorRawData = colorTex.GetRawTextureData();
            string colorFilePath = Path.Combine(saveDir, $"frame_{i:D4}_color.raw");
            using (BinaryWriter writer = new BinaryWriter(File.Open(colorFilePath, FileMode.Create)))
            {
                writer.Write(cw);
                writer.Write(ch);
                writer.Write(colorRawData.Length);
                writer.Write(colorRawData);
            }

            /* ----- 深度画像の保存 ----- */
            Image depthImage = cap.Depth;
            int dw = depthImage.WidthPixels, dh = depthImage.HeightPixels;
            byte[] depthRaw = depthImage.Memory.ToArray();
            ushort[] depthValues = new ushort[depthRaw.Length / 2];
            System.Buffer.BlockCopy(depthRaw, 0, depthValues, 0, depthRaw.Length);

            Texture2D depthTex = new Texture2D(dw, dh, TextureFormat.R8, false);
            Color32[] depthColors = new Color32[dw * dh];
            for (int y = 0; y < dh; y++)
            {
                for (int x = 0; x < dw; x++)
                {
                    int idx = y * dw + x;
                    byte intensity = (byte)(Mathf.Clamp(depthValues[idx] / 31, 0, 255));
                    depthColors[idx] = new Color32(intensity, intensity, intensity, 255);
                }
            }
            depthTex.SetPixels32(depthColors);
            depthTex.Apply();
            // 座標系補正のため、深度画像も上下反転
            depthTex = FlipTextureVertically(depthTex);

            byte[] depthRawData = depthTex.GetRawTextureData();
            string depthFilePath = Path.Combine(saveDir, $"frame_{i:D4}_depth.raw");
            using (BinaryWriter writer = new BinaryWriter(File.Open(depthFilePath, FileMode.Create)))
            {
                writer.Write(dw);
                writer.Write(dh);
                writer.Write(depthRawData.Length);
                writer.Write(depthRawData);
            }
        }

        Debug.Log("撮影データの保存完了: " + saveDir);

        /* ----- オフライン処理の開始 ----- */
        if (offlineController != null)
        {
            offlineController.SetActive(true);
            offlineController.SendMessage("StartOfflineProcessing");
            Debug.Log("Offline processing triggered.");
        }
        else
        {
            Debug.LogWarning("offlineControllerがInspectorから参照されていません。");
        }

        gameObject.SetActive(false);
    }

    // ======== クリーンアップ ========
    void OnDestroy()
    {
        foreach (var cap in capturedFrames)
        {
            cap.Dispose();
        }
        capturedFrames.Clear();

        if (kinectDevice != null)
        {
            kinectDevice.Dispose();
            kinectDevice = null;
        }
    }
}

Image Processing Script (Median Filter)

using UnityEngine;
using System.Collections;
using System.IO;
using System.Threading.Tasks;

public class ImageProcessingController : MonoBehaviour
{
    // Inspector で DynamicPointCloudController をセットする
    public GameObject dynamicPointCloudController;

    /// <summary>
    /// KinectCaptureController から呼ばれる。画像処理開始。
    /// </summary>
    public void StartOfflineProcessing()
    {
        StartCoroutine(ProcessImages());
    }

    IEnumerator ProcessImages()
    {
        Debug.Log("【画像処理開始】");

        yield return StartCoroutine(PerformOfflineProcessing());

        Debug.Log("【画像処理完了】");

        if (dynamicPointCloudController != null)
        {
            dynamicPointCloudController.SetActive(true);
            dynamicPointCloudController.SendMessage("StartOfflineProcessing");
            Debug.Log("【ストリーミング起動】");
        }
        else
        {
            Debug.LogWarning("DynamicPointCloudController 未設定");
        }

        gameObject.SetActive(false);
    }

    IEnumerator PerformOfflineProcessing()
    {
        string dataDir = Path.Combine(Application.persistentDataPath, "CapturedFrames");
        if (!Directory.Exists(dataDir))
        {
            Debug.LogError("CapturedFrames フォルダが存在しません:" + dataDir);
            yield break;
        }

        string[] depthFiles = Directory.GetFiles(dataDir, "*_depth.raw");
        Debug.Log($"【対象ファイル数】 {depthFiles.Length} 個");

        for (int i = 0; i < depthFiles.Length; i++)
        {
            string filePath = depthFiles[i];
            Debug.Log($"【処理開始】({i + 1}/{depthFiles.Length}) {Path.GetFileName(filePath)}");
            ProcessAndOverwriteRawFile(filePath);
            Debug.Log($"【処理完了】({i + 1}/{depthFiles.Length})");
            yield return null; // 負荷分散
        }
        yield return null;
    }

    void ProcessAndOverwriteRawFile(string filePath)
    {
        int width, height, dataLength;
        byte[] rawData;

        // ヘッダーとデータ読み込み
        using (BinaryReader reader = new BinaryReader(File.Open(filePath, FileMode.Open)))
        {
            width = reader.ReadInt32();
            height = reader.ReadInt32();
            dataLength = reader.ReadInt32();
            rawData = reader.ReadBytes(dataLength);
        }

        if (rawData.Length != width * height)
            Debug.LogWarning($"【サイズ不一致】{filePath}: 期待 {width * height} バイト, 実際 {rawData.Length} バイト");

        // 3x3 中央値フィルタ適用
        byte[] denoisedData = ApplyMedianFilter(rawData, width, height, 3);

        // 上書き保存
        using (BinaryWriter writer = new BinaryWriter(File.Open(filePath, FileMode.Create)))
        {
            writer.Write(width);
            writer.Write(height);
            writer.Write(denoisedData.Length);
            writer.Write(denoisedData);
        }
    }

    byte[] ApplyMedianFilter(byte[] input, int width, int height, int kernelSize)
    {
        int halfKernel = kernelSize / 2;
        byte[] output = new byte[input.Length];
        int kernelArea = kernelSize * kernelSize;

        // 行単位に並列処理
        Parallel.For(0, height, y =>
        {
            byte[] neighbors = new byte[kernelArea];
            for (int x = 0; x < width; x++)
            {
                int count = 0;
                // カーネル内の値収集
                for (int ky = -halfKernel; ky <= halfKernel; ky++)
                {
                    int ny = y + ky;
                    if (ny < 0 || ny >= height) continue;
                    for (int kx = -halfKernel; kx <= halfKernel; kx++)
                    {
                        int nx = x + kx;
                        if (nx < 0 || nx >= width) continue;
                        neighbors[count++] = input[ny * width + nx];
                    }
                }
                // 挿入ソート
                for (int i = 1; i < count; i++)
                {
                    byte key = neighbors[i];
                    int j = i - 1;
                    while (j >= 0 && neighbors[j] > key)
                    {
                        neighbors[j + 1] = neighbors[j];
                        j--;
                    }
                    neighbors[j + 1] = key;
                }
                output[y * width + x] = neighbors[count / 2];
            }
        });
        return output;
    }
}

Replay Script (Point Cloud Generation)

using UnityEngine;
using System.Collections;
using System.Collections.Generic;
using System.IO;

public class DynamicPointCloudController : MonoBehaviour
{
    // ======== 再生設定 ========
    private string dataDir;
    // 再生間隔（秒）
    public float playbackInterval = 0.5f;

    // ======== オフライン点群生成用キャリブレーションパラメータ ========
    [Header("Depth Camera Intrinsics")]
    public float depthFx = 365f;
    public float depthFy = 365f;
    public float depthCx = 320f;
    public float depthCy = 240f;
    public float depthScale = 31f; // 保存時に適用したスケーリング係数

    [Header("Color Camera Intrinsics")]
    public float colorFx = 1000f;
    public float colorFy = 1000f;
    public float colorCx = 640f;
    public float colorCy = 360f;

    [Header("Extrinsics (Depth → Color)")]
    public Vector3 extrinsicTranslation = new Vector3(0.025f, 0f, 0f);
    public Quaternion extrinsicRotation = Quaternion.identity;

    [Header("Rendering Settings")]
    public Shader pointCloudShader;
    public Material pointCloudMaterial;

    void Awake()
    {
        dataDir = Path.Combine(Application.persistentDataPath, "CapturedFrames");
        Debug.Log("DynamicPointCloudController: Data directory = " + dataDir);
    }

    // KinectCaptureControllerからSendMessage("StartOfflineProcessing")で呼ばれる
    public void StartOfflineProcessing()
    {
        Debug.Log("Offline point cloud processing (playback) started.");
        StartCoroutine(ProcessCapturedFrames());
    }

    IEnumerator ProcessCapturedFrames()
    {
        int frameIndex = 0;
        while (true)
        {
            string colorPath = Path.Combine(dataDir, $"frame_{frameIndex:D4}_color.raw");
            string depthPath = Path.Combine(dataDir, $"frame_{frameIndex:D4}_depth.raw");

            if (!File.Exists(colorPath) || !File.Exists(depthPath))
            {
                Debug.Log("No more frames found. Playback finished.");
                break;
            }

            Debug.Log($"Frame {frameIndex:D4}: Loading raw files...");

            /* ----- カラー画像の読み込み ----- */
            Texture2D colorTex;
            using (BinaryReader reader = new BinaryReader(File.Open(colorPath, FileMode.Open)))
            {
                int cw = reader.ReadInt32();
                int ch = reader.ReadInt32();
                int dataLength = reader.ReadInt32();
                byte[] colorRawData = reader.ReadBytes(dataLength);
                colorTex = new Texture2D(cw, ch, TextureFormat.RGBA32, false);
                colorTex.LoadRawTextureData(colorRawData);
                colorTex.Apply();
            }
            Debug.Log($"Frame {frameIndex:D4}: Color image loaded. Size: {colorTex.width}x{colorTex.height}");

            /* ----- 深度画像の読み込み ----- */
            Texture2D depthTex;
            using (BinaryReader reader = new BinaryReader(File.Open(depthPath, FileMode.Open)))
            {
                int dw = reader.ReadInt32();
                int dh = reader.ReadInt32();
                int depthDataLength = reader.ReadInt32();
                byte[] depthRawData = reader.ReadBytes(depthDataLength);
                depthTex = new Texture2D(dw, dh, TextureFormat.R8, false);
                depthTex.LoadRawTextureData(depthRawData);
                depthTex.Apply();
            }
            Debug.Log($"Frame {frameIndex:D4}: Depth image loaded. Size: {depthTex.width}x{depthTex.height}");

            // ----- カラー画像の整列処理 -----
            Color32[] alignedColors = AlignColorToDepth(colorTex, depthTex);

            /* ----- 点群生成処理 ----- */
            int width = depthTex.width;
            int height = depthTex.height;
            float cx = depthCx;
            float cy = depthCy;

            byte[] depthData = depthTex.GetRawTextureData();
            int totalPixels = width * height;
            List<Vector3> points = new List<Vector3>(totalPixels);
            List<Color> colors = new List<Color>(totalPixels);

            for (int i = 0; i < totalPixels; i++)
            {
                byte d = depthData[i];
                if (d == 0)
                    continue;

                int x = i % width;
                int y = i / width;
                float z = (d * depthScale) / 1000f;
                float vx = (x - cx) * z / depthFx;
                float vy = (y - cy) * z / depthFy;

                points.Add(new Vector3(vx, vy, z));
                colors.Add(alignedColors[i]); // 整列済みカラーを使用
            }
            Debug.Log($"Frame {frameIndex:D4}: Point cloud generated. Total points: {points.Count}");

            int[] indices = new int[points.Count];
            for (int i = 0; i < points.Count; i++)
            {
                indices[i] = i;
            }

            Mesh pointMesh = new Mesh();
            pointMesh.indexFormat = UnityEngine.Rendering.IndexFormat.UInt32;
            pointMesh.vertices = points.ToArray();
            pointMesh.colors = colors.ToArray();
            pointMesh.SetIndices(indices, MeshTopology.Points, 0);
            pointMesh.RecalculateBounds();

            GameObject pointCloudObj = GameObject.Find("ReconstructedPointCloud");
            if (pointCloudObj == null)
            {
                pointCloudObj = new GameObject("ReconstructedPointCloud");
                MeshFilter mf = pointCloudObj.AddComponent<MeshFilter>();
                MeshRenderer mr = pointCloudObj.AddComponent<MeshRenderer>();

                if (pointCloudMaterial != null)
                    mr.material = pointCloudMaterial;
                else if (pointCloudShader != null)
                    mr.material = new Material(pointCloudShader);
                else
                    mr.material = new Material(Shader.Find("Particles/Standard Unlit"));

                mf.mesh = pointMesh;
            }
            else
            {
                MeshFilter mf = pointCloudObj.GetComponent<MeshFilter>();
                if (mf != null)
                    mf.mesh = pointMesh;
            }

            Destroy(colorTex);
            Destroy(depthTex);

            yield return new WaitForSeconds(playbackInterval);
            frameIndex++;
        }
        Debug.Log("Point cloud playback complete.");
    }

    // ======== カラー画像整列関数 ========
    private Color32[] AlignColorToDepth(Texture2D colorTex, Texture2D depthTex)
    {
        int depthWidth = depthTex.width;
        int depthHeight = depthTex.height;
        Color32[] alignedColors = new Color32[depthWidth * depthHeight];

        Color32[] colorPixels = colorTex.GetPixels32();
        int colorWidth = colorTex.width;
        int colorHeight = colorTex.height;

        byte[] depthData = depthTex.GetRawTextureData();
        int totalPixels = depthWidth * depthHeight;

        for (int i = 0; i < totalPixels; i++)
        {
            byte d = depthData[i];
            if (d == 0)
            {
                alignedColors[i] = new Color32(0, 0, 0, 255);
                continue;
            }

            int u_depth = i % depthWidth;
            int v_depth_loaded = i / depthWidth;
            int v_depth = (depthHeight - 1) - v_depth_loaded; // 上下反転補正

            float z = (d * depthScale) / 1000f;
            float x_depth = (u_depth - depthCx) * z / depthFx;
            float y_depth = (v_depth - depthCy) * z / depthFy;
            Vector3 pointDepth = new Vector3(x_depth, y_depth, z);

            Vector3 pointColor = extrinsicRotation * pointDepth + extrinsicTranslation;

            if (pointColor.z <= 0.001f)
            {
                alignedColors[i] = new Color32(0, 0, 0, 255);
                continue;
            }
            float u_color = (pointColor.x * colorFx) / pointColor.z + colorCx;
            float v_color = (pointColor.y * colorFy) / pointColor.z + colorCy;

            int u_color_int = Mathf.RoundToInt(u_color);
            int v_color_int_top = Mathf.RoundToInt(v_color);
            int v_color_int = (colorHeight - 1) - v_color_int_top; // 上下反転補正

            if (u_color_int < 0 || u_color_int >= colorWidth || v_color_int < 0 || v_color_int >= colorHeight)
            {
                alignedColors[i] = new Color32(0, 0, 0, 255);
            }
            else
            {
                int colorIndex = v_color_int * colorWidth + u_color_int;
                alignedColors[i] = colorPixels[colorIndex];
            }
        }
        return alignedColors;
    }
}

Solution

It's difficult to consider what might go wrong without the ability to play with it, but the first step I would take for alignment is to first render the depth texture as a regular grayscale texture. Then I'd render both the color and depth textures on planes in a way that makes it easier to visualize - either to the screen or a plane with one overlaid and the alpha set lower, or perhaps flipping between rendering one texture or the other each frame.

The idea is to make it as easy as possible to see exactly how the textures line up, before you do any projection. I had similar problems a few years ago and doing this helped me enormously.