Search code examples
c#webrtcvideo-streamingvideo-capturevideo-encoding

How to implement Low Latency, 60 fps video encoding in WebRTC in C# with WebRTC?


Recently, I've been trying to implement a 60 fps low latency screenshare with C# with a library called Sipsorcery. I've tried my best so far to maximize the frames but i only get around 20 fps.

I tried to see whether it was the encoding or the screenshotting. I used Visual Studio CPU usage profiler and saw it was mostly the encoder. Still, I remade the screenshotting function and used SharpDX; I noticed a small increase in performance but nothing near 60 fps That leads me to believe the encoder is the problem. Perhaps it could use FFmpeg or some GPU encoding. Sorry in advance if I put any incorrect details as I'm relatively new to Video Encoding and Decoding.

Heres what I made so far:

//VideoTestPatternSource.cs
using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.Drawing;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.Extensions.Logging;
using SIPSorceryMedia.Abstractions;
using SharpDX;
using SharpDX.Direct3D11;
using SharpDX.DXGI;
using Device = SharpDX.Direct3D11.Device;
using MapFlags = SharpDX.Direct3D11.MapFlags;
using System.Runtime.InteropServices;

namespace SIPSorcery.Media
{
    public class VideoSource : IVideoSource, IDisposable
    {
        private const int VIDEO_SAMPLING_RATE = 90000;
        private const int MAXIMUM_FRAMES_PER_SECOND = 20;
        private const int DEFAULT_FRAMES_PER_SECOND = MAXIMUM_FRAMES_PER_SECOND; // Changed from 30 to 60
        private const int MINIMUM_FRAMES_PER_SECOND = MAXIMUM_FRAMES_PER_SECOND - 5;
        private const int TIMER_DISPOSE_WAIT_MILLISECONDS = 1000;
        private const int VP8_SUGGESTED_FORMAT_ID = 96;
        private const int H264_SUGGESTED_FORMAT_ID = 100;

        public static readonly List<VideoFormat> SupportedFormats = new List<VideoFormat>
        {
            new VideoFormat(VideoCodecsEnum.VP8, VP8_SUGGESTED_FORMAT_ID, VIDEO_SAMPLING_RATE),
            new VideoFormat(VideoCodecsEnum.H264, H264_SUGGESTED_FORMAT_ID, VIDEO_SAMPLING_RATE, "packetization-mode=1")
        };

        private int _frameSpacing;
        private System.Threading.Timer _sendTestPatternTimer;
        private bool _isStarted;
        private bool _isPaused;
        private bool _isClosed;
        private bool _isMaxFrameRate;
        private int _frameCount;
        private SIPSorceryMedia.Abstractions.IVideoEncoder _videoEncoder;
        private MediaFormatManager<VideoFormat> _formatManager;

        public event RawVideoSampleDelegate OnVideoSourceRawSample;

#pragma warning disable CS0067
        public event RawVideoSampleFasterDelegate OnVideoSourceRawSampleFaster;
#pragma warning restore CS0067

        public event EncodedSampleDelegate OnVideoSourceEncodedSample;

        public event SourceErrorDelegate OnVideoSourceError;

        private readonly int _screenWidth;
        private readonly int _screenHeight;

        private Factory1 _factory;
        private Adapter1 _adapter;
        private Device _device;
        private Output1 _output1;
        private OutputDuplication _duplicatedOutput;
        private Texture2D _screenTexture;

        public VideoSource(int width, int height, SIPSorceryMedia.Abstractions.IVideoEncoder encoder = null)
        {
            _screenWidth = width;
            _screenHeight = height;
            if (encoder != null)
            {
                _videoEncoder = encoder;
                _formatManager = new MediaFormatManager<VideoFormat>(SupportedFormats);
            }

            try
            {
                InitializeDirectX();
                _sendTestPatternTimer = new System.Threading.Timer(GeneratePattern, null, Timeout.Infinite, Timeout.Infinite);
                _frameSpacing = 1000 / DEFAULT_FRAMES_PER_SECOND;
            }
            catch (Exception ex)
            {
                MessageBox.Show($"Failed to initialize: {ex.Message}");
            }
        }

        private void InitializeDirectX()
        {
            _factory = new Factory1();
            _adapter = _factory.GetAdapter1(0);
            _device = new Device(_adapter);
            var output = _adapter.GetOutput(0);
            _output1 = output.QueryInterface<Output1>();

            var textureDesc = new Texture2DDescription
            {
                CpuAccessFlags = CpuAccessFlags.Read,
                BindFlags = BindFlags.None,
                Format = Format.B8G8R8A8_UNorm,
                Width = _screenWidth,
                Height = _screenHeight,
                OptionFlags = ResourceOptionFlags.None,
                MipLevels = 1,
                ArraySize = 1,
                SampleDescription = { Count = 1, Quality = 0 },
                Usage = ResourceUsage.Staging
            };

            _screenTexture = new Texture2D(_device, textureDesc);

            _duplicatedOutput = _output1.DuplicateOutput(_device);
        }

        public void RestrictFormats(Func<VideoFormat, bool> filter) => _formatManager.RestrictFormats(filter);
        public List<VideoFormat> GetVideoSourceFormats() => _formatManager.GetSourceFormats();
        public void SetVideoSourceFormat(VideoFormat videoFormat) => _formatManager.SetSelectedFormat(videoFormat);
        public List<VideoFormat> GetVideoSinkFormats() => _formatManager.GetSourceFormats();
        public void SetVideoSinkFormat(VideoFormat videoFormat) => _formatManager.SetSelectedFormat(videoFormat);

        public void ForceKeyFrame() => _videoEncoder?.ForceKeyFrame();
        public bool HasEncodedVideoSubscribers() => OnVideoSourceEncodedSample != null;

        public void ExternalVideoSourceRawSample(uint durationMilliseconds, int width, int height, byte[] sample, VideoPixelFormatsEnum pixelFormat) =>
            throw new NotImplementedException("The test pattern video source does not offer any encoding services for external sources.");

        public void ExternalVideoSourceRawSampleFaster(uint durationMilliseconds, RawImage rawImage) =>
            throw new NotImplementedException("The test pattern video source does not offer any encoding services for external sources.");

        public Task<bool> InitialiseVideoSourceDevice() =>
            throw new NotImplementedException("The test pattern video source does not use a device.");
        public bool IsVideoSourcePaused() => _isPaused;

        public void SetFrameRate(int framesPerSecond)
        {
            if (framesPerSecond < MINIMUM_FRAMES_PER_SECOND || framesPerSecond > MAXIMUM_FRAMES_PER_SECOND)
            {
                MessageBox.Show($"Frames per second not in the allowed range of {MINIMUM_FRAMES_PER_SECOND} to {MAXIMUM_FRAMES_PER_SECOND}, ignoring.");
            }
            else
            {
                _frameSpacing = 1000 / framesPerSecond;

                if (_isStarted)
                {
                    _sendTestPatternTimer?.Change(0, _frameSpacing);
                }
            }
        }

        public Task PauseVideo()
        {
            _isPaused = true;
            _sendTestPatternTimer?.Change(Timeout.Infinite, Timeout.Infinite);
            return Task.CompletedTask;
        }

        public Task ResumeVideo()
        {
            _isPaused = false;
            _sendTestPatternTimer?.Change(0, _frameSpacing);
            return Task.CompletedTask;
        }

        public Task StartVideo()
        {
            if (!_isStarted)
            {
                _isStarted = true;
                if (_isMaxFrameRate)
                {
                    GenerateMaxFrames();
                }
                else
                {
                    try
                    {
                        _sendTestPatternTimer?.Change(0, _frameSpacing);
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("The following error occured: " + ex);
                    }
                }
            }
            return Task.CompletedTask;
        }

        public Task CloseVideo()
        {
            if (!_isClosed)
            {
                _isClosed = true;

                ManualResetEventSlim mre = new ManualResetEventSlim();
                _sendTestPatternTimer?.Dispose(mre.WaitHandle);
                return Task.Run(() => mre.Wait(TIMER_DISPOSE_WAIT_MILLISECONDS));
            }
            return Task.CompletedTask;
        }

        private void GenerateMaxFrames()
        {
            DateTime lastGenerateTime = DateTime.Now;

            while (!_isClosed && _isMaxFrameRate)
            {
                _frameSpacing = Convert.ToInt32(DateTime.Now.Subtract(lastGenerateTime).TotalMilliseconds);
                GeneratePattern(null);
                lastGenerateTime = DateTime.Now;
            }
        }

        private void GeneratePattern(object state)
        {
            lock (_sendTestPatternTimer)
            {
                if (!_isClosed && (OnVideoSourceRawSample != null || OnVideoSourceEncodedSample != null))
                {
                    _frameCount++;

                    var buffer = Snapshot(_screenWidth, _screenHeight);

                    if (OnVideoSourceRawSample != null)
                    {
                        OnVideoSourceRawSample?.Invoke((uint)_frameSpacing, _screenWidth, _screenHeight, buffer, VideoPixelFormatsEnum.Bgra);
                    }

                    if (_videoEncoder != null && OnVideoSourceEncodedSample != null && !_formatManager.SelectedFormat.IsEmpty())
                    {
                        var encodedBuffer = _videoEncoder.EncodeVideo(_screenWidth, _screenHeight, buffer, VideoPixelFormatsEnum.Bgra, _formatManager.SelectedFormat.Codec);

                        if (encodedBuffer != null)
                        {
                            uint fps = (_frameSpacing > 0) ? 1000 / (uint)_frameSpacing : MAXIMUM_FRAMES_PER_SECOND;
                            uint durationRtpTS = VIDEO_SAMPLING_RATE / fps;
                            OnVideoSourceEncodedSample.Invoke(durationRtpTS, encodedBuffer);
                        }
                    }

                    if (_frameCount == int.MaxValue)
                    {
                        _frameCount = 0;
                    }
                }
            }
        }

        private byte[] Snapshot(int width, int height)
        {
            SharpDX.DXGI.Resource screenResource;
            OutputDuplicateFrameInformation duplicateFrameInformation;

            try
            {
                _duplicatedOutput.AcquireNextFrame(10000, out duplicateFrameInformation, out screenResource);

                using (var screenTexture2D = screenResource.QueryInterface<Texture2D>())
                    _device.ImmediateContext.CopyResource(screenTexture2D, _screenTexture);

                var mapSource = _device.ImmediateContext.MapSubresource(_screenTexture, 0, MapMode.Read, MapFlags.None);

                int stride = width * 4; // 4 bytes per pixel (RGBA)
                byte[] buffer = new byte[stride * height];
                IntPtr sourcePtr = mapSource.DataPointer;
                for (int y = 0; y < height; y++)
                {
                    // Copy each row from source to buffer
                    Marshal.Copy(sourcePtr, buffer, y * stride, stride);
                    sourcePtr = IntPtr.Add(sourcePtr, mapSource.RowPitch);
                }

                _device.ImmediateContext.UnmapSubresource(_screenTexture, 0);

                screenResource.Dispose();
                _duplicatedOutput.ReleaseFrame();

                // Correct pixel inversion (swap R and B channels)
                for (int i = 0; i < buffer.Length; i += 4)
                {
                    byte temp = buffer[i];     // Save R
                    buffer[i] = buffer[i + 2]; // Swap R (buffer[i]) with B (buffer[i+2])
                    buffer[i + 2] = temp;      // Swap B with R (saved R)
                                               // buffer[i+1] (G) remains unchanged
                                               // buffer[i+3] (A) remains unchanged
                }

                return buffer;
            }
            catch (SharpDXException e)
            {
                if (e.ResultCode.Code != SharpDX.DXGI.ResultCode.WaitTimeout.Result.Code)
                {
                    throw;
                }
                return new byte[width * height * 4];
            }
        }

        public void Dispose()
        {
            _isClosed = true;
            _sendTestPatternTimer?.Dispose();
            _videoEncoder?.Dispose();

            _screenTexture?.Dispose();
            _duplicatedOutput?.Dispose();
            _output1?.Dispose();
            _device?.Dispose();
            _adapter?.Dispose();
            _factory?.Dispose();
        }
    }
}

Here's what loads it for reference:

//Program.cs
using System;
using System.Linq;
using System.Net;
using System.Threading.Tasks;
using SIPSorcery.Media;
using SIPSorcery.Net;
using SIPSorceryMedia.Encoders;
using SIPSorceryMedia.FFmpeg;
using WebSocketSharp.Server;

namespace TESTPATTERNSERVER
{
    class Program
    {
        private const int WEBSOCKET_PORT = 8081;

        static void Main()
        {
            Console.WriteLine("WebRTC Get Started");

            // Start web socket.
            Console.WriteLine("Starting web socket server...");
            var webSocketServer = new WebSocketServer(IPAddress.Any, WEBSOCKET_PORT);
            webSocketServer.AddWebSocketService<WebRTCWebSocketPeer>("/", (peer) => peer.CreatePeerConnection = () => CreatePeerConnection());
            webSocketServer.Start();

            Console.WriteLine($"Waiting for web socket connections on {webSocketServer.Address}:{webSocketServer.Port}...");

            Console.WriteLine("Press any key exit.");
            Console.ReadLine();
        }

        private static Task<RTCPeerConnection> CreatePeerConnection()
        {
            var pc = new RTCPeerConnection(null);
         
            var testPatternSource = new VideoSource(1920, 1080, new VP8VideoEncoder());
            var videoEndPoint = new SIPSorceryMedia.FFmpeg.FFmpegVideoEndPoint();

            MediaStreamTrack videoTrack = new MediaStreamTrack(videoEndPoint.GetVideoSourceFormats(), MediaStreamStatusEnum.SendOnly);
            pc.addTrack(videoTrack);

            testPatternSource.OnVideoSourceEncodedSample += pc.SendVideo;
            pc.OnVideoFormatsNegotiated += (formats) => testPatternSource.SetVideoSourceFormat(formats.First());

            pc.onconnectionstatechange += async (state) =>
            {
                Console.WriteLine($"Peer connection state change to {state}.");

                switch (state)
                {
                    case RTCPeerConnectionState.connected:
                        await testPatternSource.StartVideo();
                        break;
                    case RTCPeerConnectionState.failed:
                        pc.Close("ice disconnection");
                        break;
                    case RTCPeerConnectionState.closed:
                        await testPatternSource.CloseVideo();
                        testPatternSource.Dispose();
                        break;
                }
            };

            return Task.FromResult(pc);
        }
    }
}

Thanks in advance!


Solution

  • The FFmpeg implementation definitely worked. I got 50 to 60 frames with h264 encoding. FFmpeg is only being used as a encoder though. I still could improve the snapshotting method. Thanks for everyone's help!