How to send audio on RTSP backchannel?

My IP camera supports audio output (audio back-channel). I want to live stream PC microphone audio over RTSP so it will be audible at camera speaker. ONVIF streaming specification tells me I have to send my audio data over provided RTSP URL. My camera supports ONVIF profile T.

I tried :

  public static RtspClient rtspClient;
  public static IWaveIn sourceStream;

  private static void CallAudio()
        string CameraIp = "";
        string UserName = "admin";
        string Password = "admin123";
        var ClientMessageInspector = new ClientMessageInspector(UserName, Password);

        //Call Device Url and get Services.
        string DeviceServiceUrl = "http://" + CameraIp + "/onvif/device_service";
        var deviceClient = new DeviceClient("DeviceBinding", new EndpointAddress(DeviceServiceUrl));
        var getServices = deviceClient.GetServices(false);                   
        //Call media2 getStreamingUri.
        string url = "http://" + CameraIp + "/onvif/media2_service";
        var Media2Client = new Media2Client("Media2Binding", new EndpointAddress(url));
        var media2GetProfiles = Media2Client.GetProfiles(null, null);            
        var resp = Media2Client.GetAudioDecoderConfigurationOptions(null, null);
        var responseGetAudioStreamUri = Media2Client.GetStreamUri("tcp", profiles[0].token);  //This gets rtsp url of media from camera.

        rtspClient = new RtspClient(responseGetAudioStreamUri, UserName, Password);            
        sourceStream = new WaveInEvent();
        sourceStream.WaveFormat = new WaveFormat(64, 8, 1);   //8000 16
        sourceStream.DataAvailable += new EventHandler<WaveInEventArgs>(SourceStream_DataAvailable);


    //This method gets data from PC microphone and enocodes it into Mu-Law G711 and send to rtsp url.
    private static void SourceStream_DataAvailable(object sender, WaveInEventArgs e)
        byte[] encoded = TwoWayAudio_Encode_MuLaw(e.Buffer, 0, e.BytesRecorded);
        rtspClient.SendData(encoded, encoded.Length, 3);

    private static byte[] TwoWayAudio_Encode_MuLaw(byte[] data, int offset, int length)
        byte[] encoded = new byte[length / 2];
        int outIndex = 0;
        for (int n = 0; n < length; n += 2)
            encoded[outIndex++] = MuLawEncoder.LinearToMuLawSample(BitConverter.ToInt16(data, offset + n));
        return encoded;

The RTSP client in my project is from here.


using Rtsp;
using Rtsp.Messages;
using Rtsp.Sdp;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;

namespace Rtsp
   public class RtspClient
    private RtspListener rtsp_client;
    private RtspTcpTransport tcp_socket;
    public string url;
    public bool canPlay = false;
    public string username;
    public string password;
    public ushort seqNo = 0;
    public event EventHandler<string> RtspError;
    public event EventHandler<byte[]> RtpDataReceived;
    public Stopwatch stopwatch { get; private set; }

    public RtspClient(string _url, string _username, string _password)
        url = _url;
        username = _username;
        password = _password;

        var uri = new Uri(_url);

        tcp_socket = new RtspTcpTransport(uri.Host, 554); // 554);

        if (tcp_socket.Connected == false)
            Console.WriteLine("Error - did not connect");

        // Connect a RTSP Listener to the TCP Socket to send messages and listen for replies
        rtsp_client = new RtspListener(tcp_socket);

        rtsp_client.MessageReceived += Rtsp_client_MessageReceived;
        rtsp_client.DataReceived += DataReceived;
        rtsp_client.Start(); // start reading messages from the server
        rtsp_client.AutoReconnect = true;
        RtspRequest describe_message = new RtspRequestDescribe();
        describe_message.RtspUri = uri;            
        describe_message.AddHeader("Accept: application/sdp");

        stopwatch = new Stopwatch();

    private void DataReceived(object sender, RtspChunkEventArgs e)
        int rtp_version = (e.Message.Data[0] >> 6);
        int rtp_padding = (e.Message.Data[0] >> 5) & 0x01;
        int rtp_extension = (e.Message.Data[0] >> 4) & 0x01;
        int rtp_csrc_count = (e.Message.Data[0] >> 0) & 0x0F;
        int rtp_marker = (e.Message.Data[1] >> 7) & 0x01;
        int rtp_payload_type = (e.Message.Data[1] >> 0) & 0x7F;
        uint rtp_sequence_number = ((uint)e.Message.Data[2] << 8) + (uint)(e.Message.Data[3]);
        uint rtp_timestamp = ((uint)e.Message.Data[4] << 24) + (uint)(e.Message.Data[5] << 16) + (uint)(e.Message.Data[6] << 8) + (uint)(e.Message.Data[7]);
        uint rtp_ssrc = ((uint)e.Message.Data[8] << 24) + (uint)(e.Message.Data[9] << 16) + (uint)(e.Message.Data[10] << 8) + (uint)(e.Message.Data[11]);

        int rtp_payload_start = 4 // V,P,M,SEQ
                            + 4 // time stamp
                            + 4 // ssrc
                            + (4 * rtp_csrc_count); // zero or more csrcs

        uint rtp_extension_id = 0;
        uint rtp_extension_size = 0;
        if (rtp_extension == 1)
            rtp_extension_id = ((uint)e.Message.Data[rtp_payload_start + 0] << 8) + (uint)(e.Message.Data[rtp_payload_start + 1] << 0);
            rtp_extension_size = ((uint)e.Message.Data[rtp_payload_start + 2] << 8) + (uint)(e.Message.Data[rtp_payload_start + 3] << 0);
            rtp_payload_start += 4 + (int)rtp_extension_size;  // extension header and extension payload

        Console.WriteLine("RTP Data"
                           + " V=" + rtp_version
                           + " P=" + rtp_padding
                           + " X=" + rtp_extension
                           + " CC=" + rtp_csrc_count
                           + " M=" + rtp_marker
                           + " PT=" + rtp_payload_type
                           + " Seq=" + rtp_sequence_number
                           + " Time=" + rtp_timestamp
                           + " SSRC=" + rtp_ssrc
                           + " Size=" + e.Message.Data.Length);

        // If rtp_marker is '1' then this is the final transmission for this packet.
        // If rtp_marker is '0' we need to accumulate data with the same timestamp

        // ToDo - Check Timestamp matches

        // Add to the tempoary_rtp List
        if (rtp_payload_type == 98 || rtp_payload_type == 0)
            byte[] rtp_payload = new byte[e.Message.Data.Length - rtp_payload_start]; // payload with RTP header removed
            System.Array.Copy(e.Message.Data, rtp_payload_start, rtp_payload, 0, rtp_payload.Length); // copy payload
            RtpDataReceived?.Invoke(null, rtp_payload);

    public bool SendData(byte[] data, int count, int channel)
        byte[] rtp_packet = new byte[12 + data.Length];
        int rtp_version = 2;
        int rtp_padding = 0;
        int rtp_extension = 0;
        int rtp_csrc_count = 0;
        int rtp_marker = 1; // set to 1 if the last NAL in the array
        //int rtp_payload_type = 98;
        int rtp_payload_type = 0;

        RTPPacketUtil.WriteHeader(rtp_packet, rtp_version, rtp_padding, rtp_extension, rtp_csrc_count, rtp_marker, rtp_payload_type);

        RTPPacketUtil.WriteSequenceNumber(rtp_packet, seqNo);
        RTPPacketUtil.WriteTS(rtp_packet, (uint)stopwatch.ElapsedMilliseconds);

        UInt32 empty_ssrc = 1293847657;
        RTPPacketUtil.WriteSSRC(rtp_packet, empty_ssrc);

        // Now append the raw NAL
        System.Array.Copy(data, 0, rtp_packet, 12, data.Length);

        if (canPlay)
            rtsp_client.SendData(channel, rtp_packet);
            return true;
        else return false;

    private void Rtsp_client_MessageReceived(object sender, RtspChunkEventArgs e)
        RtspResponse message = e.Message as RtspResponse;
        if (message.ReturnCode == 500)
            RtspError?.Invoke(this, "Internal Server Error");
        if (message.ReturnCode == 401)
            Rtsp.Messages.RtspRequest msg = null;
            switch (message.OriginalRequest.Method)
                case "DESCRIBE":
                    msg = new RtspRequestDescribe();
                case "SETUP":
                    msg = new RtspRequestSetup();
            msg.RtspUri = new Uri(url);
            var header = message.Headers["WWW-Authenticate"];
            var _realm = GrabHeaderVar("realm", header);
            var _nonce = GrabHeaderVar("nonce", header);
            var ha1 = CalculateMd5Hash(string.Format("{0}:{1}:{2}", username, _realm, password));
            var ha2 = CalculateMd5Hash(string.Format("{0}:{1}", message.OriginalRequest.Method, url));
            var digestResponse = CalculateMd5Hash(string.Format("{0}:{1}:{2}", ha1, _nonce, ha2));

            var digest = string.Format("Digest username=\"{0}\", realm=\"{1}\", nonce=\"{2}\", uri=\"{3}\", response=\"{4}\" ",
                username, _realm, _nonce, url, digestResponse);
            msg.AddHeader("Authorization: " + digest);
            msg.AddHeader("Accept: application/sdp");
        Console.WriteLine("Received " + message.OriginalRequest.ToString());

        if (message.OriginalRequest != null && message.OriginalRequest is RtspRequestDescribe)
            // Got a reply for DESCRIBE
            // Examine the SDP

            SdpFile sdp_data;
            using (StreamReader sdp_stream = new StreamReader(new MemoryStream(message.Data)))
                sdp_data = SdpFile.Read(sdp_stream);

            // Process each 'Media' Attribute in the SDP.
            // If the attribute is for Video, then send a SETUP
            for (int x = 0; x < sdp_data.Medias.Count; x++)
                if (sdp_data.Medias[x].MediaType == || sdp_data.Medias[x].MediaType ==
                    // seach the atributes for control, fmtp and rtpmap
                    String control = "";  // the "track" or "stream id"
                    String fmtp = ""; // holds SPS and PPS
                    String rtpmap = ""; // holds the Payload format, 96 is often used with H264
                    foreach (Rtsp.Sdp.Attribut attrib in sdp_data.Medias[x].Attributs)
                        if (attrib.Key.Equals("control")) control = attrib.Value;
                        if (attrib.Key.Equals("fmtp")) fmtp = attrib.Value;
                        if (attrib.Key.Equals("rtpmap")) rtpmap = attrib.Value;

                    // Get the Payload format number for the Video Stream
                    String[] split_rtpmap = rtpmap.Split(' ');
                    var video_payload = 0;
                    bool result = Int32.TryParse(split_rtpmap[0], out video_payload);
                    // Send SETUP for the Video Stream
                    // using Interleaved mode (RTP frames over the RTSP socket)
                    Rtsp.Messages.RtspRequest setup_message = new Rtsp.Messages.RtspRequestSetup();
                    setup_message.RtspUri = new Uri(url + "/" + control);                        
                    //setup_message.AddHeader("Transport: RTP/AVP/TCP;interleaved=0");                       

        if (message.OriginalRequest != null && message.OriginalRequest is RtspRequestSetup)
            // Got Reply to SETUP
            Console.WriteLine("Got reply from Setup. Session is " + message.Session);

            String session = message.Session; // Session value used with Play, Pause, Teardown

            // Send PLAY
            RtspRequest play_message = new RtspRequestPlay();
            play_message.RtspUri = new Uri(url);               
            play_message.Session = session;

        if (message.OriginalRequest != null && message.OriginalRequest is RtspRequestPlay)
            // Got Reply to PLAY
            Console.WriteLine("Got reply from Play  " + message.Command);
            canPlay = true;


    private static string GrabHeaderVar(string varName, string header)
        var regHeader = new Regex(string.Format(@"{0}=""([^""]*)""", varName));
        var matchHeader = regHeader.Match(header);
        if (matchHeader.Success)
            return matchHeader.Groups[1].Value;
        throw new ApplicationException(string.Format("Header {0} not found", varName));

    private static string CalculateMd5Hash(string input)
        var inputBytes = Encoding.ASCII.GetBytes(input);
        var hash = MD5.Create().ComputeHash(inputBytes);
        var sb = new StringBuilder();
        foreach (var b in hash)
        return sb.ToString();

    public void Dispose()

Before calling any RTSP method I added RTSP backchannel for checking camera support of AudioBack channel. The output I get after calling Describe, Setup and play is OK:

Received Rtsp.Messages.RtspRequestDescribe
o=- 0 0 IN IP4
c=IN IP4
t=0 0
m=video 0 RTP/AVP 35
a=rtpmap:35 H264/90000
a=fmtp:35 packetization-mode=1;profile-level-id=4d0029;sprop-parameter- 
m=audio 0 RTP/AVP 96
a=rtpmap:96 mpeg4-generic/16000/1
a=fmtp:96 streamtype=5; profile-level-id=5; mode=AAC-hbr; config=1408; SizeLength=13; IndexLength=3; 
m=audio 0 RTP/AVP 0
a=rtpmap:0 PCMU/8000/1
Received Rtsp.Messages.RtspRequestSetup
Got reply from Setup. Session is 12346e9856840dc
Received Rtsp.Messages.RtspRequestSetup
Got reply from Setup. Session is 12346e9856840dc
Received Rtsp.Messages.RtspRequestSetup
Got reply from Setup. Session is 12346e9856840dc
Received Rtsp.Messages.RtspRequestPlay
Got reply from Play  RTSP/1.0 200 OK
Received Rtsp.Messages.RtspRequestPlay
Got reply from Play  RTSP/1.0 200 OK
Received Rtsp.Messages.RtspRequestPlay
Got reply from Play  RTSP/1.0 200 OK

After response from Play method I start sending my encoded data using send method in RTSP client. But the audio is not audible at camera end.

  1. Is it is possible to send audio data over RTSP URL?
  2. Is there any issue with the way I called my methods?
  3. Is there any easy way (or example/tutorial) to do audio backchannel over RTSP?


  • I figured it out what I was doing wrong. Actually above mentioned steps are fine and camera returning Ok, what I was doing wrong was here in above code :

    sourceStream.WaveFormat = new WaveFormat(64, 8, 1);   //8000 16

    Instead of 64 & 8 parameter it should be :

    sourceStream.WaveFormat = new WaveFormat(8000, 16, 1);   //8000 16

    It was all voice smapling rate due to which voice send was not audible. Thankyou!