Search code examples
excelvbaweb-scrapinginnertext

Scrape website data, insert into an Excel cell, then move on to next


My project is to insert a car reg into the tax and mot website click the buttons, load the page and then take the dates.

An issue I had it is to extract data within a strong li element which is the date/ dates for the tax and mot of which I need in two cells.

Sub searchbot()

'dimension (declare or set aside memory for) our variables
    Dim objIE As InternetExplorer 'special object variable representing the IE browser

    Dim liEle As HTMLLinkElement 'special object variable for an <li> (link) element
    Dim pEle As HTMLLinkElement 'special object variable for an <a> (link) element

    Dim y As Integer 'integer variable we'll use as a counter

'''''''''''''''''''''''''''''''''''''''''''
'open internet

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

'''''''''''''''''''''''''''''''''''''''''''
'open tax/mot page

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.Navigate "https://vehicleenquiry.service.gov.uk/"

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop

'''''''''''''''''''''''''''''''''''''''''''
'enter details in to page

    'in the search box put cell "b2" value, the word "in" and cell "C" value
    objIE.Document.getElementById("Vrm").Value = _
    Sheets("INPUT & DATA RESULTS").Range("F3").Value

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop

'''''''''''''''''''''''''''''''''''''''''''''''''''''

'click the 'Continue' button
objIE.Document.getElementsByClassName("button")(0).Click

'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop

'''''''''''''''''''''''''''''''''''''''''''''''''''''

'click the 'Yes' button
objIE.Document.getElementById("Correct_True").Click

'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop

'''''''''''''''''''''''''''''''''''''''''''''''''''''

'click the 'Continue' button
objIE.Document.getElementsByClassName("button")(0).Click

'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.ReadyState <> 4: DoEvents: Loop

'above works
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''

'HELP FROM HERE PLEASE

'take tax and mot dates and insert in to cells next to each other
'the first search result will go in row 2
y = 2

'TAKE TAX EXPIRY DATE AND PUT IN CELL
'I have tried reading up on extracting data from li elements, parent and child elements but struggling
For Each itemEle In objIE.Document.getElementsByClassName("top-section-list")
data = itemEle.getElementsByTagName("li")(0).innerText


'TAKE MOT EXPIRY DATE AND PUT IN CELL
'I have tried reading up on extracting data from li elements, parent and child elements but struggling
For Each itemEle In objIE.Document.getElementsByClassName("top-section-list")
data = itemEle.getElementsByTagName("li")(0).innerText


'increment our row counter, so the next result goes below
y = y + 1

'repeat times cells have car regs in
'Next
'take next car reg and do the same as above until there are no cells in rows with a car reg
Next
Range("A3").Value = data


'''''''''''''''''''''''''''''''''''''''''''
'close the browser
objIE.Quit

'''''''''''''''''''''''''''''''''''''''''''
'exit our SearchBot subroutine and start new row for new website data
End Sub

I am a fraud investigator trying to teach myself VBA.


Solution

  • The items you want are in strong (bold) tags and are the first two on the page so you can use faster css selector of strong and do

    Dim items As Object, i As Long, taxInfo As String, motInfo As String
    Set items = ie.document.querySelectorAll("strong")
    taxInfo = items.item(0).innerText
    motInfo = items.item(1).innerText
    

    for just the dates do:

    taxInfo = Replace$(items.item(0).innerText,"Tax due: ",vbNullString)
    motInfo = Replace$(items.item(1).innerText,"Expires: ",vbNullString)
    

    Here is something similar using css selectors, which modern webpages are optimized for, so faster. The # is an id selector. I used a timed wait to ensure search box is present for entering registration. There is a rudimentary check in case vehicle not found.

    Option Explicit   
    'VBE > Tools > References:
    ' Microsoft Internet Controls
    Public Sub CheckTax()
        Dim ie As InternetExplorer, searchBox As Object, t As Date, ws As Worksheet
        Const MAX_WAIT_SEC As Long = 20
        Dim inputValues(), i As Long
    
        Set ie = New InternetExplorer
        Set ws = ThisWorkbook.Worksheets("INPUT & DATA RESULTS")
        inputValues = Application.Transpose(ws.Range("F3:F5").Value) '<=change range here for range containing values to lookup
        With ie
            .Visible = True
    
            For i = LBound(inputValues) To UBound(inputValues)
                .Navigate2 "https://vehicleenquiry.service.gov.uk/"
    
                While .Busy Or .readyState < 4: DoEvents: Wend
                t = Timer
                Do
                    On Error Resume Next
                    Set searchBox = .document.querySelector("#Vrm")
                    On Error GoTo 0
                    If Timer - t > MAX_WAIT_SEC Then Exit Do
                Loop While searchBox Is Nothing
    
                If searchBox Is Nothing Then
                    Exit Sub
                Else
                    searchBox.Focus
                    searchBox.Value = inputValues(i)
                End If
    
                .document.querySelector(".button").Click
    
                While .Busy Or .readyState < 4: DoEvents: Wend
    
                If .document.querySelectorAll("h3").Length > 0 Then
                    ws.Cells(i + 2, "G") = "Vehicle details could not be found"
                    ws.Cells(i + 2, "H") = "Vehicle details could not be found"
                Else
                    t = Timer
                    Do
                        If Timer - t > MAX_WAIT_SEC Then Exit Do
                    Loop While ie.document.querySelectorAll("#Correct_True").Length = 0
    
                    ie.document.querySelector("#Correct_True").Click
                    While .Busy Or .readyState < 4: DoEvents: Wend
                    .document.querySelector(".button").Click
    
                    While .Busy Or .readyState < 4: DoEvents: Wend
    
                    Dim items As Object, taxInfo As String, motInfo As String
                    t = Timer
                    Do
                        On Error Resume Next
                        Set items = ie.document.querySelectorAll("strong")
                        On Error GoTo 0
                        If Timer - t > MAX_WAIT_SEC Then Exit Do
                    Loop While items.Length = 0
    
                    'taxInfo = items.item(0).innerText
                    'motInfo = items.item(1).innerText
    
                    'Debug.Print taxInfo, motInfo
    
                    taxInfo = Replace$(items.item(0).innerText, "Tax due: ", vbNullString)
                    motInfo = Replace$(items.item(1).innerText, "Expires: ", vbNullString)
    
                    ws.Cells(i + 2, "G") = taxInfo
                    ws.Cells(i + 2, "H") = motInfo
                End If
                Set searchBox = Nothing: Set items = Nothing
            Next
            .Quit
        End With
    End Sub