Search code examples
c#sql-serverparsingsmo

C# parse SQL statement to find all INSERT/UPDATE/DELETE tables used in stored procedures


As the title says my intention is to find all tables participating in either INSERT/UPDATE/DELETE statements and produce a structured format. So far this is what I've come up with -

void Main()
{
    string DBName = "Blah";
    string ServerName = @"(localdb)\MSSQLLocalDB";

    Server s = new Server(ServerName);
    Database db = s.Databases[DBName];

    ConcurrentDictionary<string, SPAudit> list = new ConcurrentDictionary<string, SPAudit>();

    var sps = db.StoredProcedures.Cast<StoredProcedure>()
    .Where(x => x.ImplementationType == ImplementationType.TransactSql  && x.Schema == "dbo")
    .Select(x => new
    {
        x.Name,
        Body = x.TextBody
    }).ToList();

    Parallel.ForEach(sps, item =>
    {
        try
        {
            ParseResult p = Parser.Parse(item.Body);
            IEnumerable<SqlInsertStatement> insStats = null;
            IEnumerable<SqlUpdateStatement> updStats = null;
            IEnumerable<SqlDeleteStatement> delStats = null;
            var listTask = new List<Task>();
            listTask.Add(Task.Run(() =>
            {
                insStats = FindBatchCollection<SqlInsertStatement>(p.Script.Batches);
            }));
            listTask.Add(Task.Run(() =>
            {
                updStats = FindBatchCollection<SqlUpdateStatement>(p.Script.Batches);
            }));
            listTask.Add(Task.Run(() =>
            {
                delStats = FindBatchCollection<SqlDeleteStatement>(p.Script.Batches);
            }));
            Task.WaitAll(listTask.ToArray());
            foreach (var ins in insStats)
            {
                var table = ins?.InsertSpecification?.Children?.FirstOrDefault();
                if (table != null)
                {
                    var tableName = table.Sql.Replace("dbo.", "").Replace("[", "").Replace("]", "");
                    if (!tableName.StartsWith("@"))
                    {
                        var ll = list.ContainsKey(item.Name) ? list[item.Name] : null;
                        if (ll == null)
                        {
                            ll = new SPAudit();
                        }
                        ll.InsertTable.Add(tableName);
                        list.AddOrUpdate(item.Name, ll, (key, old) => ll);
                    }
                }
            }
            foreach (var ins in updStats)
            {
                var table = ins?.UpdateSpecification?.Children?.FirstOrDefault();
                if (table != null)
                {
                    var tableName = table.Sql.Replace("dbo.", "").Replace("[", "").Replace("]", "");
                    if (!tableName.StartsWith("@"))
                    {
                        var ll = list.ContainsKey(item.Name) ? list[item.Name] : null;
                        if (ll == null)
                        {
                            ll = new SPAudit();
                        }
                        ll.UpdateTable.Add(tableName);
                        list.AddOrUpdate(item.Name, ll, (key, old) => ll);
                    }
                }
            }
            foreach (var ins in delStats)
            {
                var table = ins?.DeleteSpecification?.Children?.FirstOrDefault();
                if (table != null)
                {
                    var tableName = table.Sql.Replace("dbo.", "").Replace("[", "").Replace("]", "");
                    if (!tableName.StartsWith("@"))
                    {
                        var ll = list.ContainsKey(item.Name) ? list[item.Name] : null;
                        if (ll == null)
                        {
                            ll = new SPAudit();
                        }
                        ll.DeleteTable.Add(tableName);
                        list.AddOrUpdate(item.Name, ll, (key, old) => ll);
                    }
                }
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.Message);
        }
    });
}

IEnumerable<T> FindBatchCollection<T>(SqlBatchCollection coll) where T : SqlStatement
{
    List<T> sts = new List<T>();
    foreach (var item in coll)
    {
        sts.AddRange(FindStatement<T>(item.Children));
    }
    return sts;
}


IEnumerable<T> FindStatement<T>(IEnumerable<SqlCodeObject> objs) where T : SqlStatement
{
    List<T> sts = new List<T>();
    foreach (var item in objs)
    {
        if (item.GetType() == typeof(T))
        {
            sts.Add(item as T);
        }
        else
        {
            foreach (var sub in item.Children)
            {
                sts.AddRange(FindStatement<T>(item.Children));
            }
        }
    }
    return sts;
}

public class SPAudit
{
    public HashSet<string> InsertTable { get; set; }
    public HashSet<string> UpdateTable { get; set; }
    public HashSet<string> DeleteTable { get; set; }

    public SPAudit()
    {
        InsertTable = new HashSet<string>();
        UpdateTable = new HashSet<string>();
        DeleteTable = new HashSet<string>();
    }
}

Now I'm facing two problems

  • First, its is taking hell lot of a time to complete, given that there are around 841 stored procedures in the database.
  • Second, if there are statements like the following the table name is not being captured properly, meaning that the table is being captured as w instead of SomeTable_1 or SomeTable_2.
CREATE PROCEDURE [dbo].[sp_blah]
    @t SomeTableType READONLY
AS  

    DELETE w
    FROM SomeTable_2 w
    INNER JOIN (Select * from @t) t
    ON w.SomeID = t.SomeID

    DELETE w
    FROM SomeTable_1 w
    INNER JOIN (Select * from @t) t
    ON w.SomeID = t.SomeID


RETURN 0

Any help would be greatly appreciated.

Edit

Using the following dll from this location C:\Program Files (x86)\Microsoft SQL Server\140\DTS\Tasks-

  • Microsoft.SqlServer.ConnectionInfo.dll
  • Microsoft.SqlServer.Management.SqlParser.dll
  • Microsoft.SqlServer.Smo.dll
  • Microsoft.SqlServer.SqlEnum.dll

Solution

  • The SMO model exposes elements of the syntax tree. So instead of assuming a token by position, as in

    UpdateSpecification?.Children?.FirstOrDefault();
    

    look up the corresponding property in the documentation. For the update clause, the target table (or updatable view) can occur in different positions. Take this syntax:

    UPDATE tablename SET column=value WHERE conditions
    

    which is represented as

    var targettable = ins?.UpdateSpecification?.Target?.ScriptTokenStream?.FirstOrDefault()?.Text;
    

    in the SMO model. Whereas, a syntax unique to tsql,

    UPDATE t SET t.columnname=value FROM tablename t WHERE conditions 
    

    will have its list of tables in the FROM clause.

    Regarding the other two DML statements you mentioned: DELETE is the same because they share a common base class, DeleteInsertSpecification (Target).

    For INSERT, there is the Target as well, and if its InsertSource is of type SelectInsertSource, this may be based on any number of tables and views too.