I just don't know where to begin the ETL adventure. I have a multi-row header messy dataset provided by an archaic ERP software. I just need to clean it to become suitable for basic pivot/vlookups and maybe power BI.
Do you think I should go the VBA route or power query?
Any other suggestions are highly appreciated because my brain is boiling..
Thank you!
Here is the VBA I've used but it is buggy vba
Sub TransformDataToSingleRow()
Dim ws As Worksheet
Dim destRow As Long
Dim currentRow As Long
Dim lastCol As Long
Dim headers As Collection
Dim cell As Range
Dim valueDict As Object
' Set up the worksheet and output row
Set ws = ActiveSheet
destRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 2 ' Output starts 2 rows below data
' Store all unique headers into a collection
Set headers = New Collection
Set valueDict = CreateObject("Scripting.Dictionary") ' For mapping headers to values
' Loop through each cell in the sheet
For currentRow = 1 To ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
For Each cell In ws.Rows(currentRow).Cells
If cell.Value <> "" Then
' Check if it's a header: text followed by a value in the next cell
If cell.Value Like "*:*" And cell.Offset(0, 1).Value <> "" Then
On Error Resume Next ' Prevent duplicates in the collection
headers.Add cell.Value, CStr(cell.Value)
On Error GoTo 0
' Add value to dictionary
valueDict(cell.Value) = cell.Offset(0, 1).Value
End If
' Capture tabular data with headers from the last row
If ws.Cells(currentRow, 1).Value = "Due Date" Then
lastCol = ws.Cells(currentRow, ws.Columns.Count).End(xlToLeft).Column
For col = 1 To lastCol
On Error Resume Next
headers.Add ws.Cells(currentRow, col).Value, ws.Cells(currentRow, col).Value
On Error GoTo 0
valueDict(ws.Cells(currentRow, col).Value) = ws.Cells(currentRow + 1, col).Value
Next col
End If
End If
Next cell
Next currentRow
' Output headers and values to a single row
Dim header As Variant
Dim destCol As Long
destCol = 1
' Output headers in the first row
For Each header In headers
ws.Cells(destRow, destCol).Value = header
destCol = destCol + 1
Next header
' Output values in the second row
destCol = 1
For Each header In headers
ws.Cells(destRow + 1, destCol).Value = valueDict(header)
destCol = destCol + 1
Next header
MsgBox "Transformation complete!", vbInformation
End Sub
Bit rough but works:
Sub ExpandTable()
Dim wsSrc As Worksheet, wsDest As Worksheet, cDest As Range, headers
Dim c As Range, numRows As Long
Set wsSrc = ThisWorkbook.Worksheets("Source")
Set wsDest = ThisWorkbook.Worksheets("Dest")
Set cDest = wsDest.Range("A1")
headers = Array( _
"ITEM", "Description", "Current Stock", "Unit", "Min.", "Max", _
"Due Date", "Type", "Transaction", "With", "Required", "Received", _
"Net", "MRP Order", "Prod. On Hand", "Locat.")
cDest.Resize(1, UBound(headers) + 1).Value = headers 'add all headers
Set cDest = cDest.Offset(1)
Set c = wsSrc.Range("A1") 'start here on the source sheet
Do While c.Row <= wsSrc.Cells(Rows.count, 1).End(xlUp).Row
If c.Value Like "Item:*" Then 'data block header?
numRows = 0 'reset, and then count how many rows in the table below
Do While Len(c.Offset(5 + numRows)) > 0
numRows = numRows + 1
Loop
'populate the data, starting with the repeated values
cDest.Resize(numRows, 1).Value = Trim(Replace(Split(c.Value, ",")(0), "Item:", ""))
cDest.Offset(0, 1).Resize(numRows, 1).Value = Trim(Split(c.Value, ",")(1))
cDest.Offset(0, 2).Resize(numRows, 1).Value = c.Offset(2, 2).Value
cDest.Offset(0, 3).Resize(numRows, 1).Value = c.Offset(2, 4).Value
cDest.Offset(0, 4).Resize(numRows, 1).Value = c.Offset(2, 6).Value
cDest.Offset(0, 5).Resize(numRows, 1).Value = c.Offset(2, 8).Value
'copy the table...
cDest.Offset(0, 6).Resize(numRows, 10).Value = c.Offset(5).Resize(numRows, 10).Value
Set cDest = cDest.Offset(numRows) 'next destination
Set c = c.Offset(numRows + 4) 'skip copied data
End If
Set c = c.Offset(1)
Loop
End Sub