Search code examples
c#azure-functionsazure-data-lakexmlconvert

Convert XML File with nested hierarchy placed in Azure Data lake to CSV using C# Azure Function


I have the following xml file with the below structure to convert to csv using Azure function C#. The XML file is located in Azure Data Lake location. The structure of the file is as follows.

<root id="1" created_date="01/01/2023" asof_date="01/01/2023">
    <level1>
        <data1>sdfs</data1>
        <data2>true</data2>
        <level2 rec="4">
            <level_record>
                <groupid>1</groupid>
                <groupname>somegroup</groupname>
                <groupdate>01/01/2023</groudate>
                <groupvalue>5</groupvalue>
                <groupkey>ag55</groupkey>
            </level_record>  
            <level_record>
                <groupid>2</groupid>
                <groupname>somegroup1</groupname>
                <groupdate>02/01/2023</groudate>
                <groupvalue>6</groupvalue>
                <groupkey>ag56</groupkey>
            </level_record> 
       </level2> 
    </level1>
</root> 

How do i read the file from Azure data lake and convert it as a csv file?


Solution

  • Here is the example of Azure Function in C# that reads an XML file from Azure Data Lake Storage and converts it to a CSV file

    using Microsoft.Azure.Functions.Worker;
    using Microsoft.Extensions.Logging;
    using Microsoft.Azure.Storage;
    using Microsoft.Azure.Storage.Auth;
    using Microsoft.Azure.Storage.Blob;
    using System.IO;
    using System.Xml.Linq;
    
    namespace YourNamespace
    {
        public static class ConvertXmlToCsvFunction
        {
            [Function("ConvertXmlToCsvFunction")]
            public static void Run([BlobTrigger("your-container/{name}", Connection = "AzureWebJobsStorage")] Stream xmlStream, string name, FunctionContext context)
            {
                var logger = context.GetLogger("ConvertXmlToCsvFunction");
                logger.LogInformation($"Processing file: {name}");
    
                try
                {
                    // Read the XML file content
                    string xmlContent;
                    using (StreamReader reader = new StreamReader(xmlStream))
                    {
                        xmlContent = reader.ReadToEnd();
                    }
    
                    // Parse the XML content
                    XDocument xDoc = XDocument.Parse(xmlContent);
    
                    // Extract data and convert to CSV format
                    XElement rootElement = xDoc.Element("root");
                    XElement level1Element = rootElement.Element("level1");
                    XElement level2Element = level1Element.Element("level2");
    
                    // Create the CSV header
                    string csv = "groupid,groupname,groupdate,groupvalue,groupkey" + "\n";
    
                    // Iterate over level_record elements and extract data
                    foreach (XElement recordElement in level2Element.Elements("level_record"))
                    {
                        string groupid = recordElement.Element("groupid").Value;
                        string groupname = recordElement.Element("groupname").Value;
                        string groupdate = recordElement.Element("groupdate").Value;
                        string groupvalue = recordElement.Element("groupvalue").Value;
                        string groupkey = recordElement.Element("groupkey").Value;
    
                        // Append the CSV row
                        csv += $"{groupid},{groupname},{groupdate},{groupvalue},{groupkey}" + "\n";
                    }
    
                    // Save the CSV content to a file
                    string csvFileName = Path.ChangeExtension(name, "csv");
                    string csvFilePath = Path.Combine(Path.GetTempPath(), csvFileName);
                    File.WriteAllText(csvFilePath, csv);
    
                    logger.LogInformation($"CSV file created: {csvFilePath}");
                }
                catch (Exception ex)
                {
                    logger.LogError($"An error occurred: {ex.Message}");
                    throw;
                }
            }
        }
    }