I need to update a table column by applying a defined function to that column.
Using MySqlCommand for example.
Eg. I have a C# defined function that cleans text string GetCleanText(string text_to_clean)
and in one of my tables, I have a column fullTxt
that contains the text to be cleaned, and another column txt
that is empty for now.
I should have as a result: txt = GetCleanText(fullTxt)
.
I tried using a foreach
loop, but it is so expansive, since I have several rows in my table.
Here is my code:
// Get all entries in the log table that have the html log and do not have text answers
MySqlCommand request1 = new MySqlCommand("SELECT * FROM my_table WHERE fullTxt IS NOT NULL AND txt IS NULL", conn);
// loop over results and clean the text entries
List<List<string>> tobefilled = new List<List<string>>();
using (MySqlDataReader r = request1.ExecuteReader())
{
while (r.Read())
{
string id = r.GetString("id");
string fullTxt = r.GetString("fullTxt");
string txt = this.GetCleanText(fullTxt);
tobefilled.Add(new List<string>() { id, txt });
}
}
System.Console.WriteLine($"{tobefilled.Count} to be updated ...");
// Update all entries in the log table that have the html log and do not have text answers
MySqlCommand request2 = new MySqlCommand("UPDATE my_table SET txt = @txt WHERE id = @id", conn);
request2.Parameters.AddWithValue("@txt", "");
request2.Parameters.AddWithValue("@id", "");
foreach (List<string> elem in tobefilled)
{
request2.Parameters["@txt"].Value = elem[1];
request2.Parameters["@id"].Value = elem[0];
request2.ExecuteNonQuery();
}
I don't know if this is so clean as a method, but it works faster:
using System.Collections.Generic;
using System.Text;
using HtmlAgilityPack;
using MySql.Data.MySqlClient;
namespace DatabaseMigrations.MigrationScripts
{
class MigrationScript
{
/// <summary>
/// Take a string having html formatted content, and extract raw text
/// </summary>
/// <param name="html">input html string</param>
/// <returns>cleaned text in a string</returns>
public string GetCleanText(string html)
{
// parse the html in TxtMessage to extract text only
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
var htmlNodes = htmlDoc.DocumentNode.SelectNodes("/"); // all nodes in the html
string txtMessage = (htmlNodes[0].InnerText).Trim();
return txtMessage;
}
/// <summary>
/// Fill the txt column of previous bot logs with the parsed html content from the fullTxt column
/// </summary>
/// <param name="conn"></param>
public override void ApplyDataMigrationOperations(MySqlConnection conn)
{
// Get all entries in the log table that have the html log and do not have text answers
MySqlCommand request1 = new MySqlCommand("SELECT * FROM logmessagefrombot WHERE fullTxt IS NOT NULL AND txt IS NULL", conn);
// loop over results and clean the text entries
List<List<string>> tobefilled = new List<List<string>>();
using (MySqlDataReader r = request1.ExecuteReader())
{
while (r.Read())
{
string id = r.GetString("id");
string fullTxt = r.GetString("fullTxt");
string txt = this.GetCleanText(fullTxt);
tobefilled.Add(new List<string>() { id, txt });
}
}
System.Console.WriteLine($"{tobefilled.Count} to be updated ...");
// Update all entries in the log table that have the html log and do not have text answers
StringBuilder sb = new StringBuilder();
string cases = " WHEN @id THEN \"@txt\" ";
List<string> ids = new List<string>();
sb.Append("UPDATE logmessagefrombot SET txt = ( CASE id ");
if (tobefilled.Count > 0)
{
foreach (List<string> elem in tobefilled)
{
sb.Append(cases.Replace("@id", elem[0]).Replace("@txt", elem[1].Replace("\n", " ").Replace("\"", "''")));
ids.Add(elem[0]);
}
sb.Append("ELSE txt ");
sb.Append("END )");
sb.Append("WHERE id IN ( ");
sb.Append(string.Join(",", ids.ToArray()) + ")");
MySqlCommand request2 = new MySqlCommand(sb.ToString(), conn);
request2.ExecuteNonQuery();
}
}
}
}
the resulting query, built with StringBuilder
is too long, but it works.