Search code examples
sql-serverexternal-tablescharindexstring-function

Flat File to SQL server


I want to read data from a TXT/FLAT file and arrange the data using the first column contents as column names and the data after the semi colon as records .

SAMPLE DATA

{1:F01SBZAZAJJXXXX9999999999}{2:I940SBICMWMXXXXXN}{4:
:20:D424A100110011E4
:25:020083203
:28C:49/1
:60F:C140106ZAR1029873,62
:61:1401060106DR5000,NTRF99999999//NONREF20140106-13175-016050001844421
:86:/PREF/ZA000520CATS THIRD PARTY PAYMENT
:62F:C140106ZAR0,00
-}

{1:F01SBZAZAJJXXXX9999999999}{2:I940SBICMWMXXXXXN}{4:
:20:D3DE7040110011E4
:25:020083204
:28C:51/1
:60F:C140106NAD1030073,
:61:1401060106DR5000,NTRF20140106-13175-0//NONREF20140106-13175-016050001844421
:86:/PREF/NA000520TRANSFER
:62F:C140106NAD0,00
-}

The query below only worked for one chunk...I need a query that reads the whole data set and arranges it as shown above in the attached image.

SELECT [20], [25], [28C], [60F], [61], [86], [62F] 
FROM
(SELECT column2, column3 FROM [dbo].[Sample MT940]) AS Source_Table
PIVOT
(MAX(column3)
FOR 
column2 in ([20], [25], [28C], [60F], [61], [86], [62F])
) AS PIVOT_TABLE

Expected Results

Expected results


Solution

  • Please try the following solution.

    The assumption is that you always have full set of values for each row in the target table: ([20], [25], [28C], [60F], [61], [86], [62F])

    We are grouping all rows into buckets with 9 consecutive rows in each of them via NTILE() function.

    SQL

    -- DDL and sample data population, start
    DECLARE @tbl TABLE (Token VARCHAR(1024));
    INSERT @tbl (Token) VALUES
    ('{1:F01SBZAZAJJXXXX9999999999}{2:I940SBICMWMXXXXXN}{4:'),
    (':20:D424A100110011E4'),
    (':25:020083203'),
    (':28C:49/1'),
    (':60F:C140106ZAR1029873,62'),
    (':61:1401060106DR5000,NTRF99999999//NONREF20140106-13175-016050001844421'),
    (':86:/PREF/ZA000520CATS THIRD PARTY PAYMENT'),
    (':62F:C140106ZAR0,00'),
    ('-}'),
    ('{1:F01SBZAZAJJXXXX9999999999}{2:I940SBICMWMXXXXXN}{4:'),
    (':20:D3DE7040110011E4'),
    (':25:020083204'),
    (':28C:51/1'),
    (':60F:C140106NAD1030073,'),
    (':61:1401060106DR5000,NTRF20140106-13175-0//NONREF20140106-13175-016050001844421'),
    (':86:/PREF/NA000520TRANSFER'),
    (':62F:C140106NAD0,00'),
    ('-}');
    -- DDL and sample data population, end
    
    DECLARE @group INT = (SELECT COUNT(*) FROM @tbl) / 9
    
    ;WITH rs AS
    (
        SELECT * 
            , _token = PARSENAME(REPLACE(token,':','.'),1)
            , seq = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) % 9
            , grp = NTILE(@group) OVER (ORDER BY (SELECT NULL))
        FROM @tbl
    )
    SELECT DISTINCT [20] = MAX(IIF(seq = 2, _token, '')) OVER (PARTITION BY grp)
        , [25] = MAX(IIF(seq = 3, _token, '')) OVER (PARTITION BY grp)
        , [28C] = MAX(IIF(seq = 4, _token, '')) OVER (PARTITION BY grp)
        , [60F] = MAX(IIF(seq = 5, _token, '')) OVER (PARTITION BY grp)
        , [61] = MAX(IIF(seq = 6, _token, '')) OVER (PARTITION BY grp)
        , [86] = MAX(IIF(seq = 7, _token, '')) OVER (PARTITION BY grp)
        , [62F] = MAX(IIF(seq = 8, _token, '')) OVER (PARTITION BY grp)
    FROM rs;
    

    Output

    20 25 28C 60F 61 86 62F
    D3DE7040110011E4 020083204 51/1 C140106NAD1030073, 1401060106DR5000,NTRF20140106-13175-0//NONREF20140106-13175-016050001844421 /PREF/NA000520TRANSFER C140106NAD0,00
    D424A100110011E4 020083203 49/1 C140106ZAR1029873,62 1401060106DR5000,NTRF99999999//NONREF20140106-13175-016050001844421 /PREF/ZA000520CATS THIRD PARTY PAYMENT C140106ZAR0,00