Search code examples
excelvbahttpxmlhttprequestmsxml

Sending HTTP Request using MS XML, 6.0 Library with a Dynamic/Partially Known URL -loop


I am trying to loop through pitchers' game logs on www.statmuse.com. The main issue is that, since I am trying to do this on a loop, part of the URL is currently unknown.

For example, looking at Martín Pérez's 2024 game logs, the URL is: https://www.statmuse.com/mlb/player/martin-perez-46483/game-log

now while trying to loop through different pitchers, this 5-digit number sequence (46483 in my example) is variable and changes between each pitcher's game log.

I have put together the below code. The issue, of course, is looping through 10000 and 99999 in attempt to find the correct 5-digit number sequence, which is causing my excel to crash and not respond. Can anyone please advise a more efficient way to achieve this? I apologize, this is my first project with HTTP Requests and anything of the like, so I am sure the code is a bloody mess.

code:

Dim ws As Worksheet, PLws As Worksheet
Set ws = ThisWorkbook.Sheets("Sheet1")
Set PLws = ThisWorkbook.Sheets("Pitcher List T")

Set rng = PLws.Range("B1:B1")
For Each cc In rng

    Dim httpRequest As MSXML2.XMLHTTP60:    Set httpRequest = New MSXML2.XMLHTTP60
    Dim htmldoc As HTMLDocument:            Set htmldoc = New HTMLDocument
    
    playerName = CStr(cc.Value)
    Dim baseURL As String
    baseURL = "https://www.statmuse.com/mlb/player/" & playerName & "-"

    Dim lastRow As Long
    Dim startNumber As Long
    Dim endNumber As Long
    startNumber = 10000 ' this loop is the issue (i think)
    endNumber = 99999   ' this loop is the issue (i think)

Dim i As Long, target As Long
For i = startNumber To endNumber ' this loop is the issue (i think)

    Dim url As String
    url = baseURL & CStr(i) & "/game-log"
    
    If CheckUrlExists(url) Then
        target = i
        Debug.Print "the target i is: " & target
    End If

Next i
    
    Dim Murl As String
    Murl = baseURL & target & "/game-log"
    
    httpRequest.Open "GET", Murl, False
    httpRequest.send
    htmldoc.body.innerHTML = httpRequest.responseText

this is the function:

Public Function CheckUrlExists(url) As Boolean
        
    On Error GoTo CheckUrlExists_Error
    
    Dim xmlhttp As MSXML2.XMLHTTP60:    Set xmlhttp = New MSXML2.XMLHTTP60
    Dim htmldoc As HTMLDocument:            Set htmldoc = New HTMLDocument
    Dim H2el As Object
    
    xmlhttp.Open "GET", url, False
    xmlhttp.send
    htmldoc.body.innerHTML = xmlhttp.responseText
    
    If xmlhttp.Status = 200 Then
        For Each H2el In htmldoc.getElementsByTagName("h2")
            If InStr(1, ChangeAccent(H2el.innerText), CStr(cc.Offset(0, -1).Value)) > 0 Then
                CheckUrlExists = True
            End If
        Next H2el
    Else
        CheckUrlExists = False
    End If
    
    Exit Function
    
CheckUrlExists_Error:
    CheckUrlExists = False
    
End Function

thank you


Solution

  • Just get the URL directly with:

    Function GetPlayerLogURLs(ByRef playerName As Variant) As Collection
        Dim httpRequest As MSXML2.XMLHTTP60: Set httpRequest = New MSXML2.XMLHTTP60
        Const baseURL As String = "https://www.statmuse.com"
        '
        httpRequest.Open "POST", baseURL & "/ask", False
        httpRequest.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        httpRequest.send "question%5Bquery%5D=" _
                       & LCase$(Replace(playerName, " ", "+")) _
                       & "&question%5Bpreferred_domain%5D=&question%5Bconversation_token%5D="
        '
        Dim response As String: response = httpRequest.responseText
        Dim i As Long
        Dim j As Long
        Dim res As New Collection
        Dim temp As String
        '
        i = InStr(1, response, "/game-log""")
        If i = 0 Then
            i = InStr(1, response, """/mlb/player/")
            On Error Resume Next
            Do While i > 0
                j = InStr(i + 1, response, """")
                If j > 0 Then
                    temp = baseURL & Mid$(response, i + 1, j - i - 1) & "/game-log"
                    res.Add temp, temp
                End If
                i = InStr(j + 1, response, """/mlb/player/")
            Loop
            On Error GoTo 0
        Else
            j = InStrRev(response, """", i)
            If j > 0 Then res.Add baseURL & Mid$(response, j + 1, i - j + 8)
        End If
        Set GetPlayerLogURLs = res
    End Function
    

    and then just use it like this:

    Sub TestPlayerURLs()
        Dim v As Variant
        Dim w As Variant
        Dim coll As Collection
        '
        For Each v In Array("Martin Perez", "Derrick White", "Logan Allen", "This name doesn't exist")
            Debug.Print "Player name: " & v
            Set coll = GetPlayerLogURLs(v)
            Debug.Print "Results found: " & coll.Count
            For Each w In coll
                Debug.Print w
            Next w
            Debug.Print
        Next v
    End Sub