Search code examples
arraysexcelvbaweb-scrapinggetelementsbyclassname

VBA - WebScraping Get elements for classname for equal classNames


I want to extract two values (numbers and positions) per player which have an equal class name "text". I am currently unable to select the two correct values per player.

My problem is I actually have only the first and the second value in "HTMLnumbers" and "HTMLposition". Otherwise if I select all items for the class "text", the first player gets the value for number and the second player gets the value for position. Thats also not correct.

enter image description here

Option Explicit

Sub erweiterteWerte()

Dim IE As New SHDocVw.InternetExplorer
Dim HTMLdoc As MSHTML.HTMLDocument
Dim HTMLplayerRow As MSHTML.IHTMLElementCollection

Dim i As Integer
Dim j As Integer

Dim HTMLnumbers As Object
Dim HTMLposition As Object

Dim numbers As String
Dim position As String

Dim letzteZeile As Long
Dim aktuelleZeile As Long

IE.Visible = False
IE.Navigate "https://examplexyz.de"

Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop

Application.Wait (Now + TimeValue("0:00:7"))

Set HTMLdoc = IE.Document

Set HTMLplayerRow = HTMLdoc.getElementsByClassName("playerRow")
Set HTMLnumbers = HTMLplayerRow(0).getElementsByClassName("text")

   If Not HTMLnumbers Is Nothing Then
   numbers = HTMLnumbers.Item(0).innerText
   position = HTMLnumbers.Item(1).innerText
    
   Else
     numbers = "no_value"
   End If

Debug.Print numbers
Debug.Print position

IE.Quit

End Sub

Solution

  • Untested, but to illustrate the basic approach:

    Sub erweiterteWerte()
    
        Dim IE As SHDocVw.InternetExplorer
        Dim HTMLdoc As MSHTML.HTMLDocument
        Dim playerRows As MSHTML.IHTMLElementCollection
        Dim playerBadges As MSHTML.IHTMLElementCollection
        Dim player As Object, badge As Object
        
        Set IE = New SHDocVw.InternetExplorer
        IE.Visible = False
        IE.Navigate "https://play.kickbase.com/transfermarkt/kaufen"
        
        Do While IE.ReadyState <> READYSTATE_COMPLETE
        Loop
        Application.Wait (Now + TimeValue("0:00:07"))
        
        Set HTMLdoc = IE.Document
        Set playerRows = HTMLdoc.getElementsByClassName("playerRow")
        
        For Each player In playerRows
            
            Debug.Print "---------------"
            Debug.Print classText(player, "firstName") & " " & classText(player, "lastName")
            
            Set playerBadges = player.getElementsByClassName("badge")
            For Each badge In playerBadges
                Debug.Print badge.innerText
            Next badge
        
        Next player
        
        IE.Quit
    
    End Sub
    
    'Helper function to get a child (of `obj`) element's text using its className
    '  (only handles a single instance but could be extended)
    Function classText(obj As Object, classname As String) As String
        Dim els As Object
        Set els = obj.getElementsByClassName(classname)
        If els.Length > 0 Then
            classText = els(0).innerText
        Else
            classText = "[not found]"
        End If
    End Function