Search code examples
excelvbapowerqueryetl

Beginner is stuck with EXCEL ETL


I just don't know where to begin the ETL adventure. I have a multi-row header messy dataset provided by an archaic ERP software. I just need to clean it to become suitable for basic pivot/vlookups and maybe power BI.

Do you think I should go the VBA route or power query?

Any other suggestions are highly appreciated because my brain is boiling..

Thank you!

before garbage data

after good data

Here is the VBA I've used but it is buggy vba

Sub TransformDataToSingleRow()
    Dim ws As Worksheet
    Dim destRow As Long
    Dim currentRow As Long
    Dim lastCol As Long
    Dim headers As Collection
    Dim cell As Range
    Dim valueDict As Object
                     
    ' Set up the worksheet and output row
    Set ws = ActiveSheet
    destRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 2 ' Output starts 2 rows below data
    
    ' Store all unique headers into a collection
    Set headers = New Collection
    Set valueDict = CreateObject("Scripting.Dictionary") ' For mapping headers to values
    
    ' Loop through each cell in the sheet
    For currentRow = 1 To ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
        For Each cell In ws.Rows(currentRow).Cells
            If cell.Value <> "" Then
                ' Check if it's a header: text followed by a value in the next cell
                If cell.Value Like "*:*" And cell.Offset(0, 1).Value <> "" Then
                    On Error Resume Next ' Prevent duplicates in the collection
                    headers.Add cell.Value, CStr(cell.Value)
                    On Error GoTo 0
                    
                    ' Add value to dictionary
                    valueDict(cell.Value) = cell.Offset(0, 1).Value
                End If
                
                ' Capture tabular data with headers from the last row
                If ws.Cells(currentRow, 1).Value = "Due Date" Then
                    lastCol = ws.Cells(currentRow, ws.Columns.Count).End(xlToLeft).Column
                    For col = 1 To lastCol
                        On Error Resume Next
                        headers.Add ws.Cells(currentRow, col).Value, ws.Cells(currentRow, col).Value
                        On Error GoTo 0
                        valueDict(ws.Cells(currentRow, col).Value) = ws.Cells(currentRow + 1, col).Value
                    Next col
                End If
            End If
        Next cell
    Next currentRow
    
    ' Output headers and values to a single row
    Dim header As Variant
    Dim destCol As Long
    destCol = 1
    
    ' Output headers in the first row
    For Each header In headers
        ws.Cells(destRow, destCol).Value = header
        destCol = destCol + 1
    Next header
    
    ' Output values in the second row
    destCol = 1
    For Each header In headers
        ws.Cells(destRow + 1, destCol).Value = valueDict(header)
        destCol = destCol + 1
    Next header
    
    MsgBox "Transformation complete!", vbInformation
End Sub

Solution

  • Bit rough but works:

    Sub ExpandTable()
        Dim wsSrc As Worksheet, wsDest As Worksheet, cDest As Range, headers
        Dim c As Range, numRows As Long
        
        Set wsSrc = ThisWorkbook.Worksheets("Source")
        Set wsDest = ThisWorkbook.Worksheets("Dest")
        
        Set cDest = wsDest.Range("A1")
        
        headers = Array( _
           "ITEM", "Description", "Current Stock", "Unit", "Min.", "Max", _
           "Due Date", "Type", "Transaction", "With", "Required", "Received", _
           "Net", "MRP Order", "Prod. On Hand", "Locat.")
        
        cDest.Resize(1, UBound(headers) + 1).Value = headers 'add all headers
        Set cDest = cDest.Offset(1)
        
        Set c = wsSrc.Range("A1") 'start here on the source sheet
        Do While c.Row <= wsSrc.Cells(Rows.count, 1).End(xlUp).Row
            If c.Value Like "Item:*" Then  'data block header?
                numRows = 0 'reset, and then count how many rows in the table below
                Do While Len(c.Offset(5 + numRows)) > 0
                    numRows = numRows + 1
                Loop
                'populate the data, starting with the repeated values
                cDest.Resize(numRows, 1).Value = Trim(Replace(Split(c.Value, ",")(0), "Item:", ""))
                cDest.Offset(0, 1).Resize(numRows, 1).Value = Trim(Split(c.Value, ",")(1))
                cDest.Offset(0, 2).Resize(numRows, 1).Value = c.Offset(2, 2).Value
                cDest.Offset(0, 3).Resize(numRows, 1).Value = c.Offset(2, 4).Value
                cDest.Offset(0, 4).Resize(numRows, 1).Value = c.Offset(2, 6).Value
                cDest.Offset(0, 5).Resize(numRows, 1).Value = c.Offset(2, 8).Value
                'copy the table...
                cDest.Offset(0, 6).Resize(numRows, 10).Value = c.Offset(5).Resize(numRows, 10).Value
                Set cDest = cDest.Offset(numRows) 'next destination
                Set c = c.Offset(numRows + 4)     'skip copied data
            End If
            Set c = c.Offset(1)
        Loop
    End Sub