Search code examples
htmlexcelvbaweb-scrapinggetelementsbytagname

Parsing HTML getElementsByTagName not returning all cells


I have some code that use to work to scrape data from a webpage however the webpage has changed and can no longer get it to work. The code is supposed to do calculations on the insider transaction table however getelementsbytagname("td") no longer returns all cells.

I'm guessing it's because it is a page embedded in a page or something but I cannot for the life of me resolve it, I am not very familiar with html. A sample webpage is gurufocus.com/stock/lmb/insider.

My code is below:

Sub getStatements()
    Dim wb As Object
    Dim doc As Object
    Dim incomeStmtURLs As Variant
    Dim sURL As String
    Dim allCells As IHTMLElementCollection
    Dim aCell As HTMLTableCell
    Dim i As Integer
    Dim loginBoxData As String    

    Application.DisplayAlerts = False

    Call ToggleEvents(False)

    incomeStmtURLs = Range("Sheet1!h1:h2").Value

    For i = 1 To UBound(incomeStmtURLs)
        Set wb = CreateObject("internetExplorer.Application")
        sURL = incomeStmtURLs(i, 1)

        wb.navigate sURL
        wb.Visible = False

        While wb.Busy
            Application.Wait Now + #12:00:01 AM#
            DoEvents
        Wend

        'HTML document
        Set doc = wb.document

        On Error GoTo err_clear

        ' gets all cell and looks for date format,
        ' goes from new transaction to old so once gets to older than a year it exits for loop
        ' checks nextSibling from date is a buy and if so does calculations, by taking further value sin row
        ' for priceThisTime have to get rid of $ symbol for calculation
        Set allCells = doc.getElementsByTagName("td")
        For Each aCell In allCells
            MsgBox (aCell.innerText)
            If aCell.innerText Like "####-##-##" = True Then
                If CDate(aCell.innerText) >= Date - 365 Then
                    If aCell.NextSibling.innerText = "Buy" Then
                        buys = buys + 1
                        sharesThisTime = CDec(aCell.NextSibling.NextSibling.innerText)
                        priceThisTime = aCell.NextSibling.NextSibling.NextSibling.NextSibling.innerText
                        totalPrice = totalPrice + (sharesThisTime * CDec(Right(priceThisTime, Len(priceThisTime) - 1)))
                        shareCount = shareCount + sharesThisTime
                    End If
                Else
                    Exit For
                End If
            End If
        Next aCell

        Sheet6.Cells(i + 1, 2) = buys
        If (shareCount <> 0) Then
            Sheet6.Cells(i + 1, 3).Value = totalPrice / shareCount
        End If

        buys = 0
        totalPrice = 0
        shareCount = 0

err_clear:
        If Err <> 0 Then
            Err.Clear
            Resume Next
        End If
        wb.Quit
    Next i

    Call ToggleEvents(True)
End Sub

Solution

  • The following targets specifically that table and retrieves all td elements. I think your logic could probably be applied on a column number basis but anyway (just in case I set the table into a variable as well).

    I set the results per page to 100 but you can comment out that line

    Option Explicit
    'VBE > Tools > References:
    ' Microsoft Internet Controls
    
    Public Sub GetData()
        Dim ie As New InternetExplorer, lastDropDrownItemIndex As Long, dropDown As Object, t As Date
        Const MAX_WAIT_SEC As Long = 10
    
    
        With ie
            .Visible = True
            .Navigate2 "https://www.gurufocus.com/stock/lmb/insider"
    
            While .Busy Or .readyState < 4: DoEvents: Wend
            t = Timer
    
            Do
                Set dropDown = .document.querySelectorAll(".el-dropdown-menu__item")
                lastDropDrownItemIndex = dropDown.Length - 1
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While lastDropDrownItemIndex < 1
    
            If dropDown.Length = 0 Then Exit Sub
    
            dropDown.item(lastDropDrownItemIndex).Click 'comment me out if don't want 100 results per page
    
            Dim tds As Object, table As Object
            Set tds = .document.getElementsByClassName("data-table")(0).getElementsByTagName("td")
            Set table = .document.getElementsByClassName("data-table")
            Stop
            .Quit
        End With
    End Sub