Search code examples
c++copencvgstreameryuv

Converting gstreamer YUV 4:2:0 (I420) raw frame to OpenCV cv::Mat


I am new and still learning gstreamer and its concepts, So part of doing this was the followup practice mention in gstreamer tutorials Basic tutorial 3: Dynamic pipelines.

It was easy enough to finish the practice mentioned at the end of the tutorial, that is the dynamic pad linking of a video/x-raw src pad to videoconvert and autovideosink.

To escalate the learning curve a little bit, I decided to make a C++ gstreamer dummy plugin that links to OpenCV and convert the raw frame to a cv::Mat and shows those frames via cv::imshow. The C ++ gstreamer plugin (element) will be referred to from now on as myfilter.

But first I needed a capsFilter that links videoconvert and myfilter and forces videoconvert to produce frames in BGR format instead of the negotiated I420 by default which is done easily. So far so Good!

I also found that autovideosink could not negotiate properly with the BGR output of myfilter, so I decided to use a fakesink and sync it with the rest of the pipeline.

Problem is that converted frames are kind of shifted/skewed and became monochrome (gray).

Weirdly enough, we know that BGR is 24 bits per pixel. The resolution for given stream is 854x480 which should result in a total raw frame buffer size of 854x480x24/8 = 1229760 bytes, how come that buffer size shown is 1230720 ? this shows that there are about 960 bytes extra. I could only relate to the 960 as they are double the height of resolution (i.e. 2 extra bytes per row)

What's wrong here?

Kindly, Find the driver code (which constructs the pipeline) and the myfilter's chain function that simply loads the incoming buffers in cv::Mat and cv::imshow them below:

The Driver Code

#include <gst/gst.h>

/* Functions below print the Capabilities in a human-friendly format */
static gboolean print_field(GQuark field, const GValue *value, gpointer pfx) {
  gchar *str = gst_value_serialize(value);

  g_print("%s  %15s: %s\n", (gchar *)pfx, g_quark_to_string(field), str);
  g_free(str);
  return TRUE;
}

static void print_caps(const GstCaps *caps, const gchar *pfx) {
  guint i;

  g_return_if_fail(caps != NULL);

  if (gst_caps_is_any(caps)) {
    g_print("%sANY\n", pfx);
    return;
  }
  if (gst_caps_is_empty(caps)) {
    g_print("%sEMPTY\n", pfx);
    return;
  }

  for (i = 0; i < gst_caps_get_size(caps); i++) {
    GstStructure *structure = gst_caps_get_structure(caps, i);

    g_print("%s%s\n", pfx, gst_structure_get_name(structure));
    gst_structure_foreach(structure, print_field, (gpointer)pfx);
  }
}

static void print_pad_capabilities(GstElement *element, gchar *pad_name) {
  GstPad *pad = NULL;
  GstCaps *caps = NULL;

  /* Retrieve pad */
  pad = gst_element_get_static_pad(element, pad_name);
  if (!pad) {
    g_printerr("Could not retrieve pad '%s'\n", pad_name);
    return;
  }

  /* Retrieve negotiated caps (or acceptable caps if negotiation is not finished yet) */
  caps = gst_pad_get_current_caps(pad);
  if (!caps) caps = gst_pad_query_caps(pad, NULL);

  /* Print and free */
  g_print("Caps for the %s pad:\n", pad_name);
  print_caps(caps, "      ");
  gst_caps_unref(caps);
  gst_object_unref(pad);
}

/* Structure to contain all our information, so we can pass it to callbacks */
typedef struct _CustomData {
  GstElement *pipeline;
  GstElement *source;

  GstElement *cap_filter;
  GstElement *convert_video;
  GstElement *dummy_plugin;
  GstElement *sink_video;

  GstElement *convert_audio;
  GstElement *resample_audio;
  GstElement *sink_audio;
} CustomData;

/* Handler for the pad-added signal */
static void pad_added_handler(GstElement *src, GstPad *pad, CustomData *data);

int main(int argc, char *argv[]) {
  CustomData data;
  GstBus *bus;
  GstMessage *msg;
  GstStateChangeReturn ret;
  gboolean terminate = FALSE;

  /* Initialize GStreamer */
  gst_init(&argc, &argv);

  /* Create the elements */
  data.source = gst_element_factory_make("uridecodebin", "source");
  data.cap_filter = gst_element_factory_make("capsfilter", "capsfilter");
  data.convert_video = gst_element_factory_make("videoconvert", "videoconvert");
  data.dummy_plugin = gst_element_factory_make("myfilter", "myfilter");
  //  data.sink_video = gst_element_factory_make("autovideosink", "videosink");
  data.sink_video = gst_element_factory_make("fakesink", "videosink");
  data.convert_audio = gst_element_factory_make("audioconvert", "audioconvert");
  data.resample_audio = gst_element_factory_make("audioresample", "audioresample");
  data.sink_audio = gst_element_factory_make("autoaudiosink", "audiosink");

  /* Create the empty pipeline */
  data.pipeline = gst_pipeline_new("test-pipeline");

  if (!data.pipeline || !data.source || !data.convert_audio || !data.resample_audio || !data.sink_audio ||
      !data.convert_video || !data.sink_video || !data.dummy_plugin || !data.cap_filter) {
    g_printerr("Not all elements could be created.\n");
    return -1;
  }

  /* Build the pipeline. Note that we are NOT linking the source at this
   * point. We will do it later. */
  gst_bin_add_many(GST_BIN(data.pipeline), data.source, data.convert_audio, data.resample_audio, data.sink_audio,
                   data.convert_video, data.cap_filter, data.dummy_plugin, data.sink_video, NULL);
  if (!gst_element_link_many(data.convert_audio, data.resample_audio, data.sink_audio, NULL)) {
    g_printerr("Elements could not be linked.\n");
    gst_object_unref(data.pipeline);
    return -1;
  }

  /*prepare caps filter*/
  GstCaps *filtercaps = gst_caps_new_simple("video/x-raw", "format", G_TYPE_STRING, "BGR", NULL);
  g_object_set(G_OBJECT(data.cap_filter), "caps", filtercaps, NULL);
  gst_caps_unref(filtercaps);

  g_object_set(G_OBJECT(data.sink_video), "sync", 1, NULL);

  if (!gst_element_link_many(data.convert_video, data.cap_filter, data.dummy_plugin, data.sink_video, NULL)) {
    g_printerr("Elements could not be linked.\n");
    gst_object_unref(data.pipeline);
    return -1;
  }

  //  g_print("In NULL state:\n");
  //  print_pad_capabilities(data.convert_video, "sink");

  /* Set the URI to play */
  g_object_set(data.source, "uri",
               "https://www.freedesktop.org/software/gstreamer-sdk/data/media/sintel_trailer-480p.webm", NULL);

  /* Connect to the pad-added signal */
  g_signal_connect(data.source, "pad-added", G_CALLBACK(pad_added_handler), &data);

  /* Start playing */
  ret = gst_element_set_state(data.pipeline, GST_STATE_PLAYING);
  if (ret == GST_STATE_CHANGE_FAILURE) {
    g_printerr("Unable to set the pipeline to the playing state.\n");
    gst_object_unref(data.pipeline);
    return -1;
  }

  /* Listen to the bus */
  bus = gst_element_get_bus(data.pipeline);
  do {
    msg = gst_bus_timed_pop_filtered(bus, GST_CLOCK_TIME_NONE,
                                     GST_MESSAGE_STATE_CHANGED | GST_MESSAGE_ERROR | GST_MESSAGE_EOS);

    /* Parse message */
    if (msg != NULL) {
      GError *err;
      gchar *debug_info;

      switch (GST_MESSAGE_TYPE(msg)) {
        case GST_MESSAGE_ERROR:
          gst_message_parse_error(msg, &err, &debug_info);
          g_printerr("Error received from element %s: %s\n", GST_OBJECT_NAME(msg->src), err->message);
          g_printerr("Debugging information: %s\n", debug_info ? debug_info : "none");
          g_clear_error(&err);
          g_free(debug_info);
          terminate = TRUE;
          break;
        case GST_MESSAGE_EOS:
          g_print("End-Of-Stream reached.\n");
          terminate = TRUE;
          break;
        case GST_MESSAGE_STATE_CHANGED:
          /* We are only interested in state-changed messages from the pipeline */
          if (GST_MESSAGE_SRC(msg) == GST_OBJECT(data.pipeline)) {
            //            g_print("In Playing state:\n");
            //            print_pad_capabilities(data.convert_video, "sink");
          }
          break;
        default:
          /* We should not reach here */
          g_printerr("Unexpected message received.\n");
          break;
      }
      gst_message_unref(msg);
    }
  } while (!terminate);

  /* Free resources */
  gst_object_unref(bus);
  gst_element_set_state(data.pipeline, GST_STATE_NULL);
  gst_object_unref(data.pipeline);
  return 0;
}

/* This function will be called by the pad-added signal */
static void pad_added_handler(GstElement *src, GstPad *new_pad, CustomData *data) {
  g_print("Received new pad '%s' from '%s':\n", GST_PAD_NAME(new_pad), GST_ELEMENT_NAME(src));

  GstPad *audio_sink_pad = gst_element_get_static_pad(data->convert_audio, "sink");
  GstPad *video_sink_pad = gst_element_get_static_pad(data->convert_video, "sink");

  GstPadLinkReturn ret;
  GstStructure *new_pad_struct = NULL;
  const gchar *new_pad_type = NULL;

  if (!gst_pad_is_linked(video_sink_pad)) {
    GstCaps *new_pad_caps = NULL;
    new_pad_caps = gst_pad_get_current_caps(new_pad);
    new_pad_struct = gst_caps_get_structure(new_pad_caps, 0);
    new_pad_type = gst_structure_get_name(new_pad_struct);
    if (g_str_has_prefix(new_pad_type, "video/x-raw")) {
      // Check the frame format
      const gchar *format = gst_structure_get_string(new_pad_struct, "format");
      g_print("Frame Raw format is %s\n", format);

      /* Attempt the link */
      ret = gst_pad_link(new_pad, video_sink_pad);
      if (GST_PAD_LINK_FAILED(ret)) {
        g_print("Type is '%s' but link failed.\n", new_pad_type);
      } else {
        g_print("Link succeeded (type '%s').\n", new_pad_type);
      }
    } else
      g_print("It has type '%s' which is not raw audio. Ignoring.\n", new_pad_type);
    if (new_pad_caps != NULL) gst_caps_unref(new_pad_caps);
  } else
    g_print("video convert is already linked. Ignoring.\n");

  if (!gst_pad_is_linked(audio_sink_pad)) {
    GstCaps *new_pad_caps = NULL;
    new_pad_caps = gst_pad_get_current_caps(new_pad);
    new_pad_struct = gst_caps_get_structure(new_pad_caps, 0);
    new_pad_type = gst_structure_get_name(new_pad_struct);
    if (g_str_has_prefix(new_pad_type, "audio/x-raw")) {
      /* Attempt the link */
      ret = gst_pad_link(new_pad, audio_sink_pad);
      if (GST_PAD_LINK_FAILED(ret)) {
        g_print("Type is '%s' but link failed.\n", new_pad_type);
      } else {
        g_print("Link succeeded (type '%s').\n", new_pad_type);
      }
    } else
      g_print("It has type '%s' which is not raw audio. Ignoring.\n", new_pad_type);

    if (new_pad_caps != NULL) gst_caps_unref(new_pad_caps);
  } else
    g_print("audio convert is already linked. Ignoring.\n");

  /* Unreference the sink pad */
  gst_object_unref(audio_sink_pad);
  gst_object_unref(video_sink_pad);
}

The Chain Function

static GstFlowReturn gst_my_filter_chain(GstPad *pad, GstObject *parent, GstBuffer *buf) {
  GstMyFilter *filter;

  filter = GST_MYFILTER(parent);

  GstCaps *current_caps = gst_pad_get_current_caps(filter->sinkpad);
  GstStructure *new_pad_struct = gst_caps_get_structure(current_caps, 0);
  const gchar *new_pad_type = gst_structure_get_name(new_pad_struct);
  if (g_str_has_prefix(new_pad_type, "video/x-raw")) {
    const gchar *format = gst_structure_get_string(new_pad_struct, "format");
    gint width = 0, height = 0;
    if (!gst_structure_get_int(new_pad_struct, "width", &width) ||
        !gst_structure_get_int(new_pad_struct, "height", &height)) {
      g_print("No width/height available\n");
    }
    g_print("The video size of this set of capabilities is %s %dx%d\n", format, width, height);
    if (width && height) {
      GstMapInfo map;
      gst_buffer_map(buf, &map, GST_MAP_READ);
      g_print("frame size in bytes: %lu\n", map.size);
      // Convert gstreamer data to OpenCV Mat
      cv::Mat frame(cv::Size(width, height), CV_8UC3, (char *)map.data);
      cv::imshow("OpenCV Frame", frame);
      cv::waitKey(1);
      gst_buffer_unmap(buf, &map);
    }
  }

  if (!current_caps) gst_caps_unref(current_caps);

  /* just push out the incoming buffer without touching it */
  return gst_pad_push(filter->srcpad, buf);
}

Solution

  • Thanks to Sven Nilsson, The answer is simply

    cv::Mat frame(cv::Size(width, height), CV_8UC3, (char *)map.data, (width * 3) + 2);

    I had to change the opencv's cv::Mat::AUTO_STEP which is calculated as cols*elemSize() to (width * 3) + 2 since 854 is not divisible by 4.