Search code examples
c#databasesql-updatemysqlcommand

Update sql table column by applying a C# function to another column


I need to update a table column by applying a defined function to that column. Using MySqlCommand for example. Eg. I have a C# defined function that cleans text string GetCleanText(string text_to_clean) and in one of my tables, I have a column fullTxt that contains the text to be cleaned, and another column txt that is empty for now.

I should have as a result: txt = GetCleanText(fullTxt).

I tried using a foreach loop, but it is so expansive, since I have several rows in my table.

Here is my code:

// Get all entries in the log table that have the html log and do not have text answers
            MySqlCommand request1 = new MySqlCommand("SELECT * FROM my_table WHERE fullTxt IS NOT NULL AND txt IS NULL", conn);

            // loop over results and clean the text entries
            List<List<string>> tobefilled = new List<List<string>>(); 

            using (MySqlDataReader r = request1.ExecuteReader())
            {
                while (r.Read())
                {
                    string id = r.GetString("id");
                    string fullTxt = r.GetString("fullTxt");
                    string txt = this.GetCleanText(fullTxt);
                    tobefilled.Add(new List<string>() { id, txt });
                }
            }

            System.Console.WriteLine($"{tobefilled.Count} to be updated ...");
            // Update all entries in the log table that have the html log and do not have text answers
            MySqlCommand request2 = new MySqlCommand("UPDATE my_table SET txt = @txt WHERE id = @id", conn);
            request2.Parameters.AddWithValue("@txt", "");
            request2.Parameters.AddWithValue("@id", "");


            foreach (List<string> elem in tobefilled)
            {
                request2.Parameters["@txt"].Value = elem[1];
                request2.Parameters["@id"].Value = elem[0];

                request2.ExecuteNonQuery();
            }

Solution

  • I don't know if this is so clean as a method, but it works faster:

    using System.Collections.Generic;
    using System.Text;
    using HtmlAgilityPack;
    using MySql.Data.MySqlClient;
    
    namespace DatabaseMigrations.MigrationScripts
    {
        class MigrationScript
        {
            /// <summary>
            /// Take a string having html formatted content, and extract raw text
            /// </summary>
            /// <param name="html">input html string</param>
            /// <returns>cleaned text in a string</returns>
            public string GetCleanText(string html)
            {
                // parse the html in TxtMessage to extract text only
                var htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(html);
                var htmlNodes = htmlDoc.DocumentNode.SelectNodes("/");  // all nodes in the html
                string txtMessage = (htmlNodes[0].InnerText).Trim();
                return txtMessage;
            }
    
            
            /// <summary>
            /// Fill the txt column of previous bot logs with the parsed html content from the fullTxt column
            /// </summary>
            /// <param name="conn"></param>
            public override void ApplyDataMigrationOperations(MySqlConnection conn)
            {
                // Get all entries in the log table that have the html log and do not have text answers
                MySqlCommand request1 = new MySqlCommand("SELECT * FROM logmessagefrombot WHERE fullTxt IS NOT NULL AND txt IS NULL", conn);
    
                // loop over results and clean the text entries
                List<List<string>> tobefilled = new List<List<string>>(); 
    
                using (MySqlDataReader r = request1.ExecuteReader())
                {
                    while (r.Read())
                    {
                        string id = r.GetString("id");
                        string fullTxt = r.GetString("fullTxt");
                        string txt = this.GetCleanText(fullTxt);
                        tobefilled.Add(new List<string>() { id, txt });
                    }
                }
    
                System.Console.WriteLine($"{tobefilled.Count} to be updated ...");
                // Update all entries in the log table that have the html log and do not have text answers
    
                StringBuilder sb = new StringBuilder();
                string cases = " WHEN @id THEN \"@txt\" ";
                List<string> ids = new List<string>();
                sb.Append("UPDATE logmessagefrombot SET txt = ( CASE id ");
    
                if (tobefilled.Count > 0)
                {
                    foreach (List<string> elem in tobefilled)
                    {
                        sb.Append(cases.Replace("@id", elem[0]).Replace("@txt", elem[1].Replace("\n", " ").Replace("\"", "''")));
                        ids.Add(elem[0]);
                    }
                    sb.Append("ELSE txt ");
                    sb.Append("END )");
                    sb.Append("WHERE id IN ( ");
                    sb.Append(string.Join(",", ids.ToArray()) + ")");
                    MySqlCommand request2 = new MySqlCommand(sb.ToString(), conn);
                    request2.ExecuteNonQuery();
                }
            }
    
            
        }
    }
    

    the resulting query, built with StringBuilder is too long, but it works.