Search code examples
sqlsql-servervisual-studiobimlssis-2017

how to fill an array in BIML using SQL table as source


I have a table that contains filenames and some other attributes. I want to insert these records in an array in BIML so that I can loop through these files (and import them using the additional attributes).

The code to import such a file is mostly done. For now just need the part where I put the info below in an array so that I can loop through them.

 CREATE TABLE [config].[FilesToImport](
    [id] [int] IDENTITY(1,1) NOT NULL,
    [path] [nvarchar](1024) NULL,
    [importfilename] [nvarchar](1024) NULL,
    [dest_server] [nvarchar](256) NULL,
    [dest_db] [nvarchar](256) NULL,
    [dest_schema] [nvarchar](256) NULL,
    [refreshtype] [int] NULL,
    [separator] [nvarchar](5) NULL,
    [order] [int] NULL
) 
GO
SET IDENTITY_INSERT [config].[FilesToImport] ON 
GO
INSERT [config].[FilesToImport] ([id], [path], [importfilename], [dest_server], [dest_db], [dest_schema], [refreshtype], [separator],  [order]) VALUES (1, N'c:\temp', N'FileA.csv', N'SMVPDA001', N'TEST', N'landingzone', 0, N';',  1)
GO
INSERT [config].[FilesToImport] ([id], [path], [importfilename], [dest_server], [dest_db], [dest_schema], [refreshtype], [separator], [order]) VALUES (2, N'c:\temp', N'FileZ.csv', N'SMVPDA001', N'TEST', N'landingzone', 0, N';', 2)
GO
INSERT [config].[FilesToImport] ([id], [path], [importfilename], [dest_server], [dest_db], [dest_schema], [refreshtype], [separator],  [order]) VALUES (3, N'c:\temp', N'File4.cvs', N'SMVPDA001', N'TEST', N'landingzone', 0, N';', 3)
GO
SET IDENTITY_INSERT [config].[FilesToImport] OFF

So basically the code below should get is filenames from the table mentioned above

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
    <#
    string Applicatie = "BIML";
    string Prefix = "import";
     
    string fileName;
    string path = @"c:\temp";
    string[] myFiles = Directory.GetFiles(path, "*.csv");
     
    string[] myColumns;
    #>
    <FileFormats>
    <#
    foreach (string filePath in myFiles)
    {
 
    #>
    <FlatFileFormat Name="FlatFileFormat<#=Path.GetFileNameWithoutExtension(filePath)#>" RowDelimiter="CRLF" ColumnNamesInFirstDataRow="true" IsUnicode="false">
        <Columns>
            <# 
                 
                StreamReader myFile = new StreamReader(filePath);
                myColumns = myFile.ReadLine().Replace("\"","").Split('|');
                 myFile.Close();
                 
                // to determine the column delimeter 
                int columnCount = 0;
                string columnDelimiter = "";
                //WriteLine($"<!-- ref count {myColumns.Count} -->");
                    foreach(string myColumn in myColumns)
                    {
                    
                        columnCount++;
                        bool finalColumn = columnCount == myColumns.Length;
                        WriteLine($"<!-- actual count {columnCount} -->");
                      //  WriteLine($"<!-- what {columnCount == myColumns.Count} {finalColumn} -->");
                        
                        if (finalColumn)
                        {
                            columnDelimiter = "CRLF";
                        }
                        else
                        {
                            columnDelimiter = "|";
                        }
                        WriteLine($"<!-- delimiter {columnDelimiter} -->");
                #>
                <Column Name="<#=myColumn#>" DataType = "String" Length="250" Delimiter="<#=columnDelimiter#>"></Column>
                <# } #>
            </Columns>
        </FlatFileFormat>
            <#}#>
    </FileFormats>
    <Connections>
            <#
            foreach (string filePath in myFiles)
            {
             
            #>
            <FlatFileConnection Name="FF_CSV-<#=Path.GetFileNameWithoutExtension(filePath)#>" 
                                FilePath="<#=filePath#>" 
                                FileFormat="FlatFileFormat<#=Path.GetFileNameWithoutExtension(filePath)#>" 
             >
                 <Expressions>
                    <Expression ExternalProperty="TextQualifier">"\""</Expression>
                    <Expression ExternalProperty="RowDelimiter">"CRLF"</Expression>
                </Expressions>

            </FlatFileConnection>
            <# } #>
            <OleDbConnection
                Name="STG_<#=Applicatie#>" 
                ConnectionString="Data Source=SQLSERVER;Initial Catalog=TEST;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;">
            </OleDbConnection>
    </Connections> 
        <Packages>
 
    <#       // Loop trough the files
            int TableCount = 0;
            foreach (string filePath in myFiles)
            { 
                TableCount++;
                fileName = Path.GetFileNameWithoutExtension(filePath);
                #>
                <Package Name="stg_<#=Prefix#>_<#=TableCount.ToString()#>_<#=fileName#>" ConstraintMode="Linear" AutoCreateConfigurationsType="None" ProtectionLevel="EncryptSensitiveWithPassword" PackagePassword="secret">
                    <Variables>
                        <Variable Name="CountStage" DataType="Int32" Namespace="User">0</Variable>
                    </Variables>               
                    <Tasks>
                        <ExecuteSQL ConnectionName="STG_<#=Applicatie#>" Name="SQL-Truncate <#=fileName#>">
                            <DirectInput>TRUNCATE TABLE dbo.<#=Prefix#>_<#=fileName#></DirectInput>
                        </ExecuteSQL>
                         
                        <Dataflow Name="DFT-Transport CSV_<#=fileName#>">
                            <Transformations>
                                <FlatFileSource Name="SRC_FF-<#=fileName#> " ConnectionName="FF_CSV-<#=Path.GetFileNameWithoutExtension(filePath)#>">
                                </FlatFileSource>
                                 
                                <OleDbDestination ConnectionName="STG_<#=Applicatie#>" Name="OLE_DST-<#=fileName#>" >
                                <ExternalTableOutput Table="dbo.<#=Prefix#>_<#=fileName#>"/>
                                </OleDbDestination>
                            </Transformations>
                        </Dataflow>
                    </Tasks>
                </Package>
    <#    }    #>
 
                <!-- Create Master Package -->
                <Package Name="stg_<#=Prefix#>_0_Master" ConstraintMode="Parallel" AutoCreateConfigurationsType="None" ProtectionLevel="EncryptSensitiveWithPassword" PackagePassword="secret">
                    <Tasks>
                    <#  int TableCount2 = 0;
                        foreach (string filePath in myFiles)
                        { 
                                TableCount2++;
                                fileName = Path.GetFileNameWithoutExtension(filePath); #>
                 
                            <ExecutePackage Name="stg_<#=Prefix#>_<#=TableCount2.ToString()#>_<#=fileName#>">
                                <ExternalProjectPackage  Package="stg_<#=Prefix#>_<#=TableCount2.ToString()#>_<#=fileName#>.dtsx" />
                            </ExecutePackage>
                        <#
                        }
                        #>    
                    </Tasks>
                </Package>
 
    </Packages>
</Biml>
 
<!--Includes/Imports for C#-->
<#@ template language="C#" hostspecific="true"#>
<#@ import namespace="System.Data"#>
<#@ import namespace="System.IO"#>

Solution

  • I see you edited it with actual code but the crux of the matter is you're looking for the ExternalDataAccess.GetDataTable method. Also mentioned here Foreach datarow filter in BIML and weird that I didn't answer it...

    That method populates a data table which you can then work with as a 2 dimensional array. The Rows property allows you to enumerate the rows and each DataRow is then a nullable array of objects. Yes, even though it has a type in the database, you have to explicitly cast to something else to work with the data.

    <#@ import namespace="System.Data" #>
    <#@ import namespace="System.Data.SqlClient" #>
    <Biml xmlns="http://schemas.varigence.com/biml.xsd">
    <#
    // Could also pull this from this.RootNodes.Connections
    string connectionStringSource = @"Server=.\dev2017;Initial Catalog=tempdb;Integrated Security=SSPI;Provider=SQLNCLI11.1;";
    string query = @"SELECT
        FTI.id
    ,   FTI.path
    ,   FTI.importfilename
    ,   FTI.dest_server
    ,   FTI.dest_db
    ,   FTI.dest_schema
    ,   FTI.refreshtype
    ,   FTI.separator
    ,   FTI.defaultfieldtype
    ,   FTI.[order]
    FROM
        config.FilesToImport AS FTI;";
    
    DataTable dt = null;
    dt = ExternalDataAccess.GetDataTable(connectionStringSource, query);
    
    foreach(DataRow row in dt.Rows)
    {
        // Downside to data table is everything is object so explict casts required
        WriteLine($"<!-- {row[0]},{row[1]},{row[2]} etc -->");
        
    }
    #>
        <FileFormats>
    <#
    // Enumerate through the data table to define FFCM 
    foreach(DataRow row in dt.Rows)
    {
    #>
            <FlatFileFormat Name="SO_<#=row[0]#>" RowDelimiter="CRLF" ColumnNamesInFirstDataRow="true" IsUnicode="false"/>
    <#
    }
    #>    
        </FileFormats>
        <Packages>
    <#
    // Enumerate through the data table to define packages, etc
    foreach(DataRow row in dt.Rows)
    {
    #>
        <Package Name="Load_<#=row[5]#>_<#=row[0]#>" />
    <#
    }
    #>    
        </Packages>
    </Biml>
    

    Glancing through your code, you might be interested in CallBimlScript or CallBimlScriptWithOutput (samples on my blog). Instead of a monolithic BimlScript, you can break your code into functions (essentially). In the Replicate-o-matic post, I pass in a schema and a table and all the logic of building a single instance of a package is in there so the main builder package is only concerned with Building the top level artifacts - connections, packages, etc and push the work off to functions.

    What's clever about GetBimlScriptWithOutput is that you can return data to the caller. For example, you're building your package name when you create the package and you've repeated that logic for your master package to executed them. Don't repeat yourself. Instead, have the package builder return a property that is the new package's name (and add to an array/list). Then the Orchestrator package can simply grab package names ignorant of how they were derived.