Search code examples
htmlvbams-accessindentationpretty-print

How can I indent HTML with VBA?


I am generating some HTML in VBA (MSACCESS), which works fine but it is a bit of a mess from an indentation point of view. Is there an easy way to indent a stream of HTML text in VBA? I use Visual Studio Code format functionality to have a prettier HTML, but I have to do this by hand and it is very tedious!

Example:

<div class="anythinggoes">
<ul><li>A</li>
        <li>B</li><li>C</li>
    </ul></div> <!-- anythinggoes -->

Should become something like:

<div class="anythinggoes">
    <ul>
        <li>A</li>
        <li>B</li>
        <li>C</li>
    </ul>
</div> <!-- anythinggoes -->

Any help will be much appreciated!


Solution

  • CAUTION! Ugly code ahead!

    Welcome on board, RichD. I think this code might help you:

    First, define these variables in the Module scope:

    Private InlineTags As Variant
    Private InlineClosingTags As Variant
    Private LineBreakTags As Variant
    

    Then, we can use this function:

    Function ReadableHTML(HTML As String) As String
        Dim a$, i&, TabsNo&, tabs$, l&, tag$, MaxTabs&
    
        'add here tags that you want to keep on the same line of their parent
        InlineTags = Array("!--", "a", "i", "b", "sup", "sub", "strong") 'never followed by a line break
        InlineClosingTags = Array("li", "h1", "h2", "h3", "h4") 'always followed by a line break
        LineBreakTags = Array("br", "br/", "br /") 'always lead & followed by a line break
    
        a = CleanOf(HTML)
        TabsNo = -1
        i = 1
        l = Len(a)
        Do While i < l
            If Mid(a, i, 2) = "</" Then
                tag = Mid(a, i + 2, InStr(i + 2, a, ">") - i - 2)
                If Not IsInArray(tag, InlineClosingTags) Or Mid(a, i - 1, 1) = ">" Then
                    tabs = Chr(10) & Filler(TabsNo, Chr(9))
                    a = Left(a, i - 1) & tabs & Right(a, Len(a) - i + 1)
                    l = Len(a)
                    i = i + Len(tabs)
                End If
                TabsNo = TabsNo - 1
            Else
                Select Case Mid(a, i, 1)
                Case "<"
                    tag = Mid(a, i + 1, InStr(i + 1, a, ">") - i - 1)
                    If Not IsInArray(tag, InlineTags) Then
                        TabsNo = TabsNo + 1
                        If TabsNo > MaxTabs Then MaxTabs = TabsNo
                        If i > 1 Then tabs = Chr(10) & Filler(TabsNo, Chr(9)) Else tabs = Filler(TabsNo, Chr(9))
    '                    tabs = tabs & Filler(TabsNo, Chr(9))
                        a = Left(a, i - 1) & tabs & Right(a, Len(a) - i + 1)
                        l = Len(a)
                        i = i + Len(tabs)
                        If IsInArray(tag, LineBreakTags) Then TabsNo = TabsNo - 1
                    End If
                Case ">"
                    tag = Mid(a, InStrRev(a, "<", i) + 1, i - InStrRev(a, "<", i) - 1)
                    If Not IsInArray(tag, InlineClosingTags) Then
                        tabs = Chr(10) & Filler(TabsNo + 1, Chr(9))
                        a = Left(a, i) & tabs & Right(a, Len(a) - i)
                    End If
                Case Chr(10)
                    If Mid(a, i + 1, 1) <> Chr(9) And Mid(a, i + 1, 1) <> "<" Then
                        tabs = Chr(10) & Filler(TabsNo + 1, Chr(9))
                        a = Left(a, i) & tabs & Right(a, Len(a) - i)
                        l = Len(a)
                        i = i + Len(tabs)
                    End If
                End Select
            End If
            i = i + 1
        Loop
        For TabsNo = MaxTabs To 0 Step -1
            a = Replace(a, Chr(10) & Filler(TabsNo, Chr(9)) & Chr(10), Chr(10))
        Next
        ReadableHTML = treatInlineTags(a, False)
    End Function
    

    Which uses these helping functions:

    Function treatInlineTags(a As String, HideFlag As Boolean)
        'Hides/unhides inline tags from CleanOf
        If HideFlag Then
            For i = LBound(InlineTags) To UBound(InlineTags)
                a = Replace(a, "<" & InlineTags(i) & " ", "|" & InlineTags(i) & "¦")
                a = Replace(a, "<" & InlineTags(i) & ">", "|" & InlineTags(i) & "|")
                a = Replace(a, "</" & InlineTags(i) & ">", "|/" & InlineTags(i) & "|")
            Next i
        Else
            For i = LBound(InlineTags) To UBound(InlineTags)
                a = Replace(a, "|" & InlineTags(i) & "¦", "<" & InlineTags(i) & " ")
                a = Replace(a, "|" & InlineTags(i) & "|", "<" & InlineTags(i) & ">")
                a = Replace(a, "|/" & InlineTags(i) & "|", "</" & InlineTags(i) & ">")
            Next i
        End If
        treatInlineTags = a
    End Function
    
    Function IsInArray(a As String, Arr As Variant) As Boolean
        Dim i As Long
        For i = LBound(Arr) To UBound(Arr)
            IsInArray = a = Arr(i)
            If IsInArray Then Exit Function
        Next i
    End Function
    
    Function CleanOf(a As String) As String
        'Removes unwanted spaces between tags
        Dim i As Long, b As Boolean, l As Long
        a = Replace(a, Chr(13), "")
        a = Replace(a, Chr(10), "")
        a = treatInlineTags(a, True)
        For i = 1 To Len(a)
            Select Case Mid(a, i, 1)
            Case ">", "<"
                If i - l > 1 And l > 0 Then a = Left(a, l) & Right(a, Len(a) - i + 1)
                If i > 1 Then l = i
                If l > 0 Then b = True
            Case Is <> " "
                b = False
                l = 0
            End Select
        Next i
        CleanOf = a
    End Function
    
    Function Filler(n As Long, Optional Str As String = "0") As String
        If n > 0 Then Filler = Replace(Space$(n), " ", Str)
    End Function
    

    To test it:

    Sub test()
        Dim a As String, b As String
        a = "<div class=""myclass""> " & Chr(13) & _
        "<ul><li>A</li>                   " & Chr(13) & _
        "<li>B</li><li>C</li>             " & _
        "</ul></div> <!-- just a comment -->" & _
        "<h2 class=""mytitle"">a title: inline and " & _
        "followed by a line break</h2>" & _
        "<div><ul><li><i class=""myitalic"">italic " & _
        "content: inline and NOT followed by a line break</i>" & _
        "</li></ul></div>"
        
        b = "<li><i class=""mylist""></i>a list <ul>" & _
        "<li>element 1</li><li>element 2</li><li>element 3</li></ul> " & _
        "</li><li>This <b>is bold</b> in an element list " & _
        "<a href=""#mydestination"">""with an href"" " & _
        "</a></li>"
        
        Debug.Print Chr(10) & "Test1 - input:" & Chr(10) & a
        Debug.Print Chr(10) & "Test1 - output:" & Chr(10) & ReadableHTML(a)
        
        Debug.Print Chr(10) & "Test2 - input:" & Chr(10) & b
        Debug.Print Chr(10) & "Test2 - output:" & Chr(10) & ReadableHTML(b)
    End Sub