My DataTable looks something like this:
id1 | id2 | id3 |
---|---|---|
1 | ||
2 | ||
1 | 1 | |
2 | 3 | |
1 | 2 | 1 |
1 | 1 | 3 |
3 | 3 | |
2 | 3 | 2 |
I want my program to remove all "duplicate combinations", so the table would look like this:
id1 | id2 | id3 |
---|---|---|
1 | 2 | 1 |
1 | 1 | 3 |
3 | 3 | |
2 | 3 | 2 |
Basically I want it to check if other rows are already contained within other rows, while treating the empty cells as a cell that could have any value at all.
I have tried working with the DataTable.DefaultView.ToTable(true) distinct method and adding column names as parameters but couldn't get it to do exactly what i want.
Also the size of the table is dynamic - There can be an infinite amount of columns.
I finally found a good solution. The distinct doesn't work (using IComarable because it leaves the row with the null instead of the row with values. I had to add a sort so the row with null is before the one with values. Then I delete the first row leaving the second row with values. Also found I had to add dt.AcceptChanges(); because the row did get deleted.
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("id1", typeof(int));
dt.Columns.Add("id2", typeof(int));
dt.Columns.Add("id3", typeof(int));
dt.Columns["id1"].AllowDBNull = true;
dt.Columns["id2"].AllowDBNull = true;
dt.Columns["id3"].AllowDBNull = true;
dt.Rows.Add(new object[] { 1});
dt.Rows.Add(new object[] { 2 });
dt.Rows.Add(new object[] { 1, 1 });
dt.Rows.Add(new object[] { 2, 3 });
dt.Rows.Add(new object[] { 1, 2, 1 });
dt.Rows.Add(new object[] { 1, 1, 3 });
dt.Rows.Add(new object[] { 3, 3 });
dt.Rows.Add(new object[] { 2, 3, 2 });
dt = RemoveDuplicates(dt);
}
static DataTable RemoveDuplicates(DataTable dt)
{
for (int col = dt.Columns.Count - 1; col >= 0; col--)
{
dt = dt.AsEnumerable().OrderBy(x => x.Field<int?>(col)).CopyToDataTable(); ;
}
for (int i = dt.Rows.Count - 2; i >= 0; i--)
{
bool delete = true;
for (int col = 0; col <= dt.Columns.Count - 1; col++ )
{
if (dt.Rows[i][col] == DBNull.Value)
continue;
if((int)dt.Rows[i][col] != (int)dt.Rows[i + 1][col])
{
delete = false;
break;
}
}
if (delete)
{
dt.Rows[i].Delete();
dt.AcceptChanges();
}
}
return dt;
}