Search code examples
macroslibreoffice-calcbasicopenoffice-calc

Libreoffice Calc Basic macro to combine sheets with different number of columns


I need help with this LibreOffice Basic code intended to merge & combine all sheets into the "Combined" sheet. Columns are supposed to be merged as union of columns from all sheets, i.e. same columns to be merged as one column. Rows are meant to be appended from all sheets. But the code is not working properly:

  1. Header Row with column names is missing
  2. Not all rows from all sheets are appended
  3. Values copied don't seem to be ok
Sub CombineSheetsWithDifferentHeaders()
    Dim oDoc As Object
    Dim consolidatedData() As Variant
    Dim firstIteration As Boolean
    firstIteration = True

    oDoc = ThisComponent ' Get the current document

    ' Check if the "Combined" sheet exists; if not, create it
    Dim combinedSheet As Object
    On Error Resume Next
    combinedSheet = oDoc.Sheets.getByName("Combined")
    On Error GoTo 0

    If combinedSheet Is Nothing Then
        combinedSheet = oDoc.createInstance("com.sun.star.sheet.Spreadsheet")
        combinedSheet.setName("Combined")
        oDoc.Sheets.insertByName("Combined", combinedSheet)
    End If

    ' Iterate through all sheets in the document
    For Each srcSheet In oDoc.Sheets
        If srcSheet.Name <> "Combined" Then ' Skip the Combined sheet
            ' Read the data from the source sheet into an array
            Dim srcData() As Variant
            srcData = ReadSheetData(srcSheet)

            ' Debug: Print the sheet name
            MsgBox "Sheet Name: " & srcSheet.Name

            ' Debug: Print the dimensions of srcData
            Dim numRowsSrc As Integer
            Dim numColsSrc As Integer
            numRowsSrc = UBound(srcData, 1) + 1
            numColsSrc = UBound(srcData, 2) + 1
            MsgBox "srcData Dimensions: " & numRowsSrc & " rows, " & numColsSrc & " columns"

            ' Consolidate the data
            If firstIteration Then
                ' Initialize consolidatedData with the first data
                consolidatedData = srcData
                firstIteration = False
            Else
                ' Merge the data from the current sheet with consolidatedData
                consolidatedData = MergeData(consolidatedData, srcData)
            End If
        End If
    Next srcSheet

    ' Debug: Check if consolidatedData is empty
    If IsEmpty(consolidatedData) Then
        MsgBox "consolidatedData is empty"
    Else
        ' Debug: Print the dimensions of consolidatedData
        Dim numRowsConsolidated As Integer
        Dim numColsConsolidated As Integer
        numRowsConsolidated = UBound(consolidatedData, 1) + 1
        numColsConsolidated = UBound(consolidatedData, 2) + 1
        MsgBox "consolidatedData Dimensions: " & numRowsConsolidated & " rows, " & numColsConsolidated & " columns"
    End If

    ' Write the consolidated data to the "Combined" sheet
    WriteConsolidatedData(consolidatedData, combinedSheet)
End Sub

' Helper function to write the consolidated data to the "Combined" sheet
Sub WriteConsolidatedData(consolidatedData() As Variant, combinedSheet As Object)
    ' Resize the "Combined" sheet to accommodate the consolidated data
    Dim numRows As Integer
    Dim numCols As Integer
    numRows = UBound(consolidatedData, 1) + 1
    numCols = UBound(consolidatedData, 2) + 1
    combinedSheet.getRows().insertByIndex(0, numRows)
    combinedSheet.getColumns().insertByIndex(0, numCols)

    ' Write the consolidated data to the "Combined" sheet, including the header row
    For i = 0 To numRows - 1
        For j = 0 To numCols - 1
            combinedSheet.getCellByPosition(j, i).setValue(consolidatedData(i, j))
        Next j
    Next i
End Sub

' Helper function to merge data from different sheets
Function MergeData(data1() As Variant, data2() As Variant) As Variant
    ' Determine the number of rows in each dataset
    Dim numRows1 As Integer
    Dim numRows2 As Integer
    numRows1 = UBound(data1, 1) + 1
    numRows2 = UBound(data2, 1) + 1

    ' Determine the number of columns in each dataset
    Dim numCols1 As Integer
    Dim numCols2 As Integer
    numCols1 = UBound(data1, 2) + 1
    numCols2 = UBound(data2, 2) + 1

    ' Create an array to store column names and their indices from the first dataset
    Dim columnArray1() As Variant
    ReDim columnArray1(0 To numCols1 - 1)
    For j = 0 To numCols1 - 1
        columnArray1(j) = data1(0, j)
    Next j

    ' Merge columns from the second dataset
    Dim numMergedCols As Integer
    numMergedCols = numCols1

    For j = 0 To numCols2 - 1
        Dim colName As String
        colName = data2(0, j)

        ' Check if the column name from the second dataset exists in the first dataset
        Dim colIndex2 As Integer
        colIndex2 = -1
        For k = 0 To UBound(columnArray1)
            If columnArray1(k) = colName Then
                colIndex2 = k
                Exit For
            End If
        Next k

        If colIndex2 = -1 Then
            ' Add the new column name to the array
            ReDim Preserve columnArray1(0 To numMergedCols)
            columnArray1(numMergedCols) = colName
            numMergedCols = numMergedCols + 1
            colIndex2 = numMergedCols - 1
        End If
    Next j

    ' Calculate the maximum number of rows
    Dim maxRows As Integer
    maxRows = IIf(numRows1 > numRows2, numRows1, numRows2)

    ' Create a result array with the maximum dimensions
    Dim result() As Variant
    ReDim result(0 To maxRows, 0 To numMergedCols - 1)

    ' Initialize the result array with headers
    For j = 0 To UBound(columnArray1)
        result(0, j) = columnArray1(j)
    Next j

    ' Copy data from the first dataset
    For i = 1 To numRows1 - 1
        For j = 0 To numCols1 - 1
            result(i, j) = data1(i, j)
        Next j
    Next i

    ' Copy data from the second dataset
    For i = 1 To numRows2 - 1
        For j = 0 To numCols2 - 1
            result(i, colIndex2) = data2(i, j)
        Next j
    Next i

    MergeData = result
End Function

Function ReadSheetData(sheet As Object) As Variant
    Dim numRows As Integer
    Dim numCols As Integer
    Dim cellValue As Variant
    Dim data() As Variant

    numRows = RowsCount(UsedRange(sheet))
    numCols = ColumnsCount(UsedRange(sheet))
    
    ReDim data(0 To numRows - 1, 0 To numCols - 1)

    For i = 0 To numRows - 1
        For j = 0 To numCols - 1
            cellValue = sheet.getCellByPosition(j, i).getValue()
            data(i, j) = cellValue
        Next j
    Next i

    ReadSheetData = data
End Function

Function UsedRange(oSheet As Variant) As Variant
    Dim oCursor As Variant
    oCursor = oSheet.createCursor()
    oCursor.gotoEndOfUsedArea(False)
    oCursor.gotoStartOfUsedArea(True)
    UsedRange = oCursor
End Function

Function RowsCount(oRange As Variant) As Long 
    RowsCount = oRange.getRows().getCount()
End Function

Function ColumnsCount(oRange As Variant) As Long 
    ColumnsCount = oRange.getColumns().getCount()
End Function

Function LastRow(oRange As Variant) As Long 
    LastRow = oRange.getRangeAddress().EndRow
End Function

Function IsInArray(arr() As Variant, value As Variant) As Boolean
    Dim element As Variant
    For Each element In arr
        If element = value Then
            IsInArray = True
            Exit Function
        End If
    Next element
    IsInArray = False
End Function

Function GetColumnIndex(headerRow() As Variant, columnName As String) As Integer
    Dim i As Integer
    For i = 0 To UBound(headerRow)
        If headerRow(i) = columnName Then
            GetColumnIndex = i
            Exit Function
        End If
    Next i
    GetColumnIndex = -1
End Function


Solution

  • If your spreadsheet has more than one sheet and each sheet contains only one table, or all tables in a sheet start on the same line and do not contain additional headings like "Table 6" or "Quarterly Report", then the macro code could be like this:

    Option Explicit 
    
    Sub CombineSheetsWithDifferentHeaders()
    Const NAME_COMBIBED_SHEET = "Combined"
    Dim oDoc As Variant, oSheets As Variant, oSheet As Variant
    Dim oCursor As Variant, oSourceCell As Variant
    Dim combinedSheet As Variant
    Dim consolidatedData() As Variant
    Dim aFullHeaders() As Variant
    Dim nSheet As Long, nCount As Long, nConsolidatedData As Long
    Dim aSourceAddress As New com.sun.star.table.CellRangeAddress
    Dim aSourceHeaders As Variant 
    Dim nTargetRow As Long, nSourceRow As Long, nSourceCol As Long
    
        oDoc = ThisComponent ' Get the current document
        oSheets = oDoc.getSheets() ' All sheets of current spreadsheet
        ' Check if the "Combined" sheet exists; if yes, delete it
        If oSheets.hasByName(NAME_COMBIBED_SHEET) And (oSheets.getCount() > 1) Then oSheets.removeByName(NAME_COMBIBED_SHEET)
        
        nCount = oSheets.getCount()
        ' If there is only one sheet in the spreadsheet, then there is nothing to merge
        If nCount < 2 Then ExitWithResult("Nothing to merge")
    
        ReDim consolidatedData(0 To nCount)
        nConsolidatedData = -1
        ' First Iteration - collect source ranges:
        For nSheet = 0 To nCount-1 ' So you no need to skip the Combined sheet
            ' Read the data (as range!) from the source sheet into an array
            oSheet = oSheets.getByIndex(nSheet)
            oCursor = oSheet.createCursor()
            oCursor.gotoEndOfUsedArea(False) :  oCursor.gotoStartOfUsedArea(True)
            ' If there is no data in this sheet, the cursor contains only cell A1.
            'To combine something, there must be at least two rows in the range - header row and data
            If oCursor.getRows().getCount() > 1 Then 
                nConsolidatedData = nConsolidatedData + 1
                consolidatedData(nConsolidatedData) = Array(oCursor.getRangeAddress(), getTableHeaders(aFullHeaders, oCursor))
            EndIf 
        Next nSheet
        If nConsolidatedData < 0 Then ExitWithResult("consolidatedData is empty")
    
        ReDim Preserve consolidatedData(0 To nConsolidatedData)
    
        ' ...and only now recreate the "Combined" sheet in the last position:
        oSheets.insertNewByName(NAME_COMBIBED_SHEET, nCount)
        combinedSheet = oSheets.getByName(NAME_COMBIBED_SHEET)
        ' Set full headers row
        combinedSheet.getCellRangeByPosition(0, 0, UBound(aFullHeaders),0).setDataArray(Array(aFullHeaders))
        nTargetRow = 0
        
        ' Second Iteration - copy data from source ranges:
        For nSheet = 0 To nConsolidatedData
            aSourceAddress = consolidatedData(nSheet)(0)
            aSourceHeaders = consolidatedData(nSheet)(1)
            oSheet = oSheets.getByIndex(aSourceAddress.Sheet)
            With aSourceAddress
                oCursor = oSheet.getCellRangeByPosition(.StartColumn, .StartRow, .EndColumn, .EndRow)
            End With 
            For nSourceRow = 1 To oCursor.getRows().getCount()-1
                nTargetRow = nTargetRow + 1
                For nSourceCol = 0 To oCursor.getColumns().getCount()-1
                    If aSourceHeaders(nSourceCol) >= 0 Then
                        oSourceCell = oCursor.getCellByPosition(nSourceCol, nSourceRow)
                        If oSourceCell.getType() <> com.sun.star.table.CellContentType.EMPTY Then
                            oSheet.copyRange(combinedSheet.getCellByPosition(aSourceHeaders(nSourceCol),nTargetRow).getCellAddress, oSourceCell.getRangeAddress())
                        EndIf 
                    EndIf 
                Next nSourceCol
            Next nSourceRow
        Next nSheet
        ExitWithResult("All data is copied to the " & NAME_COMBIBED_SHEET & " sheet")
    End Sub
    
    Function getTableHeaders(aHeaders As Variant, oCursor As Variant) As Variant
    Dim aResult As Variant 
    Dim i As Long
        i = oCursor.getColumns().getCount()-1
        ReDim aResult(0 To i)
        For i = LBound(aResult) To UBound(aResult)
            aResult(i) = getHeaderIndex(aHeaders, Trim(oCursor.getCellByPosition(i, 0).getString()))
        Next i
        getTableHeaders = aResult
    End Function
    
    Function getHeaderIndex(aHeaders As Variant, sHeader As String) As Long 
    Dim i As Long, uB As Long 
        If sHeader = "" Then
            getHeaderIndex = -1 ' Skip columns with empty header
            Exit Function
        EndIf 
        uB = UBound(aHeaders)
        For i = 0 To uB
            If aHeaders(i) = sHeader Then
                getHeaderIndex = i
                Exit Function 
            EndIf 
        Next i
        uB = uB + 1
        ReDim Preserve aHeaders(0 To uB)
        aHeaders(uB) = sHeader
        getHeaderIndex = uB
    End Function
    
    Sub ExitWithResult(sMessage As String)
        MsgBox (sMessage, MB_ICONSTOP, "Procedure CombineSheetsWithDifferentHeaders()")
        End 
    End Sub
    

    I hope that the comments in the code will help you understand what this macro does and how