Search code examples
excelvbadata-extraction

Use Excel VBA to Extract Data From a Webpage


I'm trying to use Excel VBA to extract some data from a webpage (https://www.churchofjesuschrist.org/maps/meetinghouses/lang=eng&q=1148+W+100+N). The code I'm using will open Internet Explorer, navigate to the website, and it will extract the top most result. But I can't seem to figure out how to extract the rest of the results (i.e. ward, language, contact name, contact #). Thoughts?

Sub MeethinghouseLocator()

Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.navigate Sheets("Sheet1").Range("A1").Value
IE.Visible = True
While IE.Busy
DoEvents
Wend

Application.Wait (Now + TimeValue("0:00:01"))

IE.document.querySelector("button.search-input__execute.button--primary").Click

  Dim Doc As HTMLDocument
  Set Doc = IE.document
        
Application.Wait (Now + TimeValue("0:00:01"))
        
'WardName
    Dim aaaaFONT As String
    aaaaFONT = Trim(Doc.getElementsByClassName("location-header__name ng-binding")(0).innerText)
    Sheets("Sheet1").Range("D6").Value = aaaaFONT
    
Application.Wait (Now + TimeValue("0:00:01"))
    
'Language
    Dim aaabFONT As String
    aaabFONT = Trim(Doc.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
    Sheets("Sheet1").Range("E6").Value = aaabFONT

'Click 1st Link
    IE.document.getElementsByClassName("location-header__name ng-binding")(0).Click

Application.Wait (Now + TimeValue("0:00:01"))

'Contact Name
    Dim aaacFONT As String
    aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
    Sheets("Sheet1").Range("H6").Value = aaacFONT

'Contact Name Function
    Range("F6").Select
    ActiveCell.FormulaR1C1 = _
        "=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"

'Contact Phone Number
    Dim aaadFONT As String
    aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
    Sheets("Sheet1").Range("G6").Value = aaadFONT

    
  IE.Quit

End Sub

Solution

  • Most of your code works actually so I'm not sure what issue are you facing but you didn't account for the loading after clicking each link so I have added While loop to check for its Ready and ReadyState property before continuing.

    EDIT: The code now loops through all the wards listed in the result, the idea is to keep the first IE at the result page and pass the URL of the ward and the input row to sub ExtractWard where it will open another IE, navigate to the given URL and extract the ward details.

    Sub MeethinghouseLocator()
       
        Dim IE As Object
        Set IE = CreateObject("InternetExplorer.Application")
        IE.navigate Sheets("Sheet1").Range("A1").Value
        IE.Visible = True
        While IE.Busy Or IE.readyState <> 4
            DoEvents
        Wend
                
        IE.document.querySelector("button.search-input__execute.button--primary").Click
        
        While IE.Busy Or IE.readyState <> 4
            DoEvents
        Wend
        Dim Doc As HTMLDocument
        Set Doc = IE.document
            
        Application.Wait (Now + TimeValue("0:00:01"))
            
        Dim wardContent As Object
        Set wardContent = Doc.getElementsByClassName("maps-card__content")(2)
        
        Dim wardCollection As Object
        Set wardCollection = wardContent.getElementsByClassName("location-header")
        
        Dim rowNum As Long
        rowNum = 6
            
        Dim i As Long
        For i = 0 To wardCollection.Length - 1
            With wardCollection(i)
                'WardName
                Dim aaaaFONT As String
                aaaaFONT = Trim(.getElementsByClassName("location-header__name ng-binding")(0).innerText)
                Sheets("Sheet1").Cells(rowNum, "D").Value = aaaaFONT
                    
                'Language
                Dim aaabFONT As String
                aaabFONT = Trim(.getElementsByClassName("location-header__language ng-binding ng-scope")(0).innerText)
                Sheets("Sheet1").Cells(rowNum, "E").Value = aaabFONT
            
                Dim wardURL As String
                wardURL = .getElementsByClassName("location-header__name ng-binding")(0).href
                
                ExtractWard wardURL, rowNum
            End With
            
            rowNum = rowNum + 1
        Next i
                
        Set Doc = Nothing
        IE.Quit
        Set IE = Nothing
    End Sub
    
    Private Sub ExtractWard(argURL As String, argRow As Long)
        Dim IE As Object
        Set IE = CreateObject("InternetExplorer.Application")
        IE.navigate argURL
        IE.Visible = True
        While IE.Busy Or IE.readyState <> 4
            DoEvents
        Wend
                
        Dim Doc As HTMLDocument
        Set Doc = IE.document
                
        'Contact Name
        Dim aaacFONT As String
        aaacFONT = Trim(Doc.getElementsByClassName("maps-card__group maps-card__group--inline ng-scope")(2).innerText)
        Sheets("Sheet1").Cells(argRow, "H").Value = aaacFONT
        
        'Contact Name Function
        Sheets("Sheet1").Cells(argRow, "F").FormulaR1C1 = _
            "=LEFT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),FIND(RIGHT(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3),LEN(RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-FIND(CHAR(10),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))),RIGHT(RC[2],LEN(RC[2])-FIND(CHAR(10),RC[2])-3))-1)"
        
        'Contact Phone Number
        Dim aaadFONT As String
        aaadFONT = Trim(Doc.getElementsByClassName("phone ng-binding")(0).innerText)
        Sheets("Sheet1").Cells(argRow, "G").Value = aaadFONT
            
        Set Doc = Nothing
        IE.Quit
        Set IE = Nothing
    End Sub