Search code examples
vbaexcelmshtml

Use VBA to click html button and then scrape refreshed data


I am trying to write a procedure which enters a date into an input box

<input name="Mdate" type="text" id="Mdate" size="30" value="" /></td>

clicks a submit button

<input type="submit" name="button" id="button" value="Submit" />

then scrapes the resulting data, which appears in the "a" tags.

<center>
<b>Tuesday, 6 January 2015</b><br />
<a href="/horse-racing-results/Ruakaka/2015-1-6" target="_blank">Ruakaka</a>

This data is not available until the submit button has been entered. My attempt is posted in full below. The problem I seem to be having is that i am not able to access the modified html code (modified by clicking submit). Can anyone provide any suggestions?

'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument                                                         'Document object
Dim inputs As MSHTML.IHTMLElementCollection                                                 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection                                            'Element collection for th tags
Dim element As MSHTML.IHTMLElement                                                          'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement                                                       'Header elements

'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False                                                                          'make IE invisible

'Navigate to webpage
Dim ieURL As String:    ieURL = "http://www.racenet.com.au/horse-racing-results/"           'set URL from which to retrieve racemeet and date data
ie.navigate ieURL                                                                           'navigate to URL
Do While ie.Busy Or ie.readyState <> 4                                                      'wait for page to load
    DoEvents
Loop

Set htmldoc = ie.document                                                                   'Document webpage
Set inputs = htmldoc.getElementsByTagName("input")                                          'Find all input tags

Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)

For i = startdate To enddate
    dd = Day(i): mm = Month(i): yyyy = Year(i)

    For Each element In inputs
        If element.Name = "Mdate" Then
            element.Value = yyyy & "-" & mm & "-" & dd
        Else
            If element.Name = "button" Then
                element.Click
                'insert scraper
                Set eles1 = htmldoc.getElementsByTagName("a")                                          'Find all centre tags
                    For Each ele1 In eles1
                        If InStr(ele1.href, "/horse-racing-results/") > 0 Then
                            raceMeet = ele1.innerText
                            raceURL = ele1.innerHTML
                            ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
                            racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
                            racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
                        End If
                    Next ele1
            Else
            End If
        End If

    Next element
Stop

Next i

ie.Quit

Solution

  • Insert a condition to wait while the page is loading.

    The following rewrite successfully fetches data from the target page on my pc:

    Private Sub CommandButton1_Click()
    
        'dimension variables
        Dim ie As InternetExplorer
        Dim htmldoc As MSHTML.IHTMLDocument                                                         'Document object
        Dim inputs As MSHTML.IHTMLElementCollection                                                 'Element collection for "input" tags
        Dim eles1, eles2 As MSHTML.IHTMLElementCollection                                            'Element collection for th tags
        Dim element As MSHTML.IHTMLElement                                                          'input elements
        Dim ele1, ele2 As MSHTML.IHTMLElement                                                       'Header elements
    
        'Open InternetExplorer
        Set ie = CreateObject("InternetExplorer.Application")
        ie.Visible = True                                                                          'make IE invisible
    
        'Navigate to webpage
        Dim ieURL As String:    ieURL = "http://www.racenet.com.au/horse-racing-results/"           'set URL from which to retrieve racemeet and date data
        ie.navigate ieURL                                                                           'navigate to URL
        Do While ie.Busy Or ie.readyState <> 4                                                      'wait for page to load
            DoEvents
        Loop
    
        Set htmldoc = ie.document                                                                   'Document webpage
        Set inputs = htmldoc.getElementsByTagName("input")                                          'Find all input tags
    
        Dim dd, mm, yyyy As Integer
        Dim startdate, enddate As Date
        Dim i, j, k As Long
        Dim raceMeet, raceURL As String
        startdate = #1/1/2008#: enddate = Date - 1
        Dim racemeetArr As Variant
        ReDim racemeetArr(1 To 2, 1)
    
        For i = startdate To enddate
            dd = Day(i): mm = Month(i): yyyy = Year(i)
    
    
            For Each element In inputs
                If element.Name = "Mdate" Then
                    element.Value = yyyy & "-" & mm & "-" & dd
                Else
                    If element.Name = "button" Then
                        element.Click
                        Exit For
                    End If
                End If
    
            Next element
    
    
            Do
            ' Wait until the Browser is loaded'
            Loop Until ie.readyState = READYSTATE_COMPLETE
    
            'insert scraper
            Set eles1 = htmldoc.getElementsByTagName("a")                                          'Find all centre tags
                For Each ele1 In eles1
                    If InStr(ele1.href, "/horse-racing-results/") > 0 Then
                        raceMeet = ele1.innerText
                        raceURL = ele1.innerHTML
                        ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
                        racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
                        racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
                    End If
                Next ele1
    
            Stop
    
        Next i
    
        ie.Quit
    
    End Sub
    

    Edit:

    After analyzing the HTTP requests I managed to slim down the code a little bit (results can be queried directly without filling the form and submitting the page)

    I am not a huge fan of expensive array ReDims, so I created a class instead, and save the results in a collection of that class (feel free to use it or not).

    Add a new class module, call it clRaceMeet and paste this code:

    Option Explicit
    
    Private pMeet As String
    Private pUrl As String
    
    
    Public Property Let Meet(ByVal Val As String)
        pMeet = Val
    End Property
    Public Property Get Meet() As String
        Meet = pMeet
    End Property
    
    Public Property Let URL(ByVal Val As String)
        pUrl = Val
    End Property
    Public Property Get URL() As String
        URL = pUrl
    End Property
    

    Then, use this modified code version to scrape the data and dump it to the debugging window:

    Option Explicit
    
    Private Sub CommandButton1_Click()
    
        'dimension variables
        Dim ie As InternetExplorer
        Dim ieURL As String
    
        Dim dd As Integer
        Dim mm As Integer
        Dim yyyy As Integer
        Dim startDate As Date
        Dim endDate As Date
        Dim i As Long
    
        Dim htmlDoc As MSHTML.IHTMLDocument
        Dim colLeftEleColl As MSHTML.IHTMLElementCollection
        Dim colLeftEle As MSHTML.IHTMLElement
        Dim centerEleColl As MSHTML.IHTMLElementCollection
        Dim centerEle As MSHTML.IHTMLElement
    
        Dim raceMeet As String
        Dim raceURL As String
        Dim objRaceMeet As clRaceMeet
        Dim raceMeetColl As New Collection
    
    
        'Open InternetExplorer
        Set ie = CreateObject("InternetExplorer.Application")
        ie.Visible = True
    
        startDate = #1/1/2009#
        endDate = Date - 1
    
        For i = startDate To endDate
            dd = Day(i)
            mm = Month(i)
            yyyy = Year(i)
    
            ieURL = "http://www.racenet.com.au/horse-racing-results-search.asp?Mdate=" & yyyy & "-" & mm & "-" & dd
            ie.navigate ieURL
    
            Do
            ' Wait until the Browser is loaded'
            Loop Until ie.readyState = READYSTATE_COMPLETE
    
            Set htmlDoc = ie.document
    
            'insert scraper
            Set colLeftEleColl = htmlDoc.getElementById("ColLeft").all
    
            'Loop through elements of ColLeft div
            For Each colLeftEle In colLeftEleColl
    
                If colLeftEle.tagName = "CENTER" Then
                    Set centerEleColl = colLeftEle.all
    
                    'Loop through elements of <center> tag
                    For Each centerEle In centerEleColl
    
                        If centerEle.tagName = "A" Then
                            If InStr(centerEle.href, "/horse-racing-results/") > 0 Then
                                raceMeet = centerEle.innerText
                                raceURL = centerEle.href
    
                                Set objRaceMeet = New clRaceMeet
    
                                objRaceMeet.Meet = raceMeet
                                objRaceMeet.URL = raceURL
                                raceMeetColl.Add objRaceMeet
    
                            End If
                        End If
                    Next centerEle
    
                    Exit For
                End If
            Next colLeftEle
    
            ' Dump results to immediate window:
            For Each objRaceMeet In raceMeetColl
                Debug.Print objRaceMeet.Meet & " - " & objRaceMeet.URL
            Next objRaceMeet
    
            'Stop
    
        Next i
    
        ie.Quit
    
    End Sub
    

    Happy betting! :)