Search code examples
excelvbaweb-scrapinginputbox

scrape with excel vba changing input data before scraping


In am trying to change the data on the website for an input field and have that information refreshed on the page. I have updated the input field, but I am not sure how to refresh the page so that the inner table uses the new data from the input field

Below is my code:

Dim IE As InternetExplorer
Dim htmldoc As HTMLDocument
Dim ieURL As String
Dim sPicker As String

ieURL = "https://www.investing.com/commodities/crude-oil-historical-data"

sPicker = "10/01/2017 - 12/31/2017"

'Open InternetExplorer
Set IE = New InternetExplorer
IE.Visible = True

IE.Navigate ieURL

Set htmldoc = IE.document 'Document webpage

' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
    DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop

Dim drp As HTMLFormElement
Set drp = htmldoc.getElementById("widgetFieldDateRange")
drp.innerText = sPicker     'Set the new timeframe for scraping

Dim inpt As HTMLInputElement
Set inpt = htmldoc.getElementById("picker")
inpt.Value = sPicker     'Set the new timeframe for scraping

' wait until the page loads before doing anything
Do Until (IE.readyState = 4 And Not IE.Busy)
    DoEvents ' DoEvents releases the macro and lets excel do other thing while it waits
Loop

Thanks for your help


Solution

  • Try the below script. It should solve the issue.

    Sub Web_Data()
    
        Dim IE As New InternetExplorer, html As New HTMLDocument
        Dim post As Object, elem As Object, t_data As Object
        Dim trow As Object, tcel As Object
    
        With IE
            .Visible = True
            .navigate "https://www.investing.com/commodities/crude-oil-historical-data"
            While .readyState < 4: DoEvents: Wend
            Set html = .document
        End With
    
        Application.Wait Now + TimeValue("00:00:05")
    
        html.getElementById("widgetFieldDateRange").Click
        Application.Wait Now + TimeValue("00:00:03")
    
        Set post = html.getElementById("startDate")
        post.innerText = ""
        post.Focus
        Application.SendKeys "10/01/2017"
        Application.Wait Now + TimeValue("00:00:03")
    
        Set elem = html.getElementById("endDate")
        elem.innerText = ""
        elem.Focus
        Application.SendKeys "12/31/2017"
        Application.Wait Now + TimeValue("00:00:03")
    
        html.getElementById("applyBtn").Click
        Application.Wait Now + TimeValue("00:00:03")
    
    
        Set t_data = html.getElementById("curr_table")
    
        For Each trow In t_data.Rows
            For Each tcel In trow.Cells
                y = y + 1: Cells(x + 1, y) = tcel.innerText
            Next tcel
            y = 0
            x = x + 1
        Next trow
    
    End Sub
    

    Reference to add to the library:

    1. Microsoft Internet Controls
    2. Microsoft HTML Object Library