Search code examples
excelvbaweb-scrapingxmlhttprequest

Select attribute value via VBA & XMLHttpRequest


My question is related to other question VBA - Select HTML item using VBA. How can select and click data-id attribute value by using an XMLHttpRequest without Internet Explorer? So far my code looks like this:

Sub Data_multi()
    Dim http As New XMLHTTP60, html As New HTMLDocument
    Dim topic As HTMLHtmlElement
    Dim i As Integer
    For i = 1 To 4 'last page
    Application.ScreenUpdating = False
    With http
        .Open "GET", "https://www.castorama.ru/building-materials/building-dry-materials-and-primers?limit=96&p=" & i, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        Do: DoEvents: Loop Until .readyState = 4
        html.body.innerHTML = .responseText
    End With

    For Each topic In html.getElementsByClassName("product-info")
        With topic.getElementsByClassName("product-name")
            If .Length Then x = x + 1: Cells(x, 1) = .item(0).innerText
        End With
        With topic.getElementsByClassName("price")
            If .Length Then Cells(x, 2) = .item(0).innerText
        End With
    Next topic
  Next i
 Application.ScreenUpdating = True
End Sub

Thanks in advance


Solution

  • You need to do a post request to the server with the data-id for the store then grab the appropriate cookie and pass that in a subsequent GET request to the original url. If the store is correctly updated the class name for the element with that data-id should be updated to include an active class. You could pull the data-id value out into a variable to make this more readily usable for other stores.

    Option Explicit
    
    Public Sub SetStore()
        Dim html As MSHTML.HTMLDocument, re As Object, xhr As Object
    
        Set html = New MSHTML.HTMLDocument
        Set re = CreateObject("VBScript.RegExp")
        Set xhr = CreateObject("MSXML2.ServerXMLHTTP")
    
        With re
            .Global = True
            .MultiLine = True
        End With
    
        Dim cookie As String
    
        With xhr
            .Open "POST", "https://www.castorama.ru/multishop/switch/ajax/shop_id/48/", False
            .setRequestHeader "X-Requested-With", "XMLHttpRequest"
            .send
            cookie = GetCookie(re, .getAllResponseHeaders, "Set-Cookie: (castorama_current_shop=48.*)") '"Set-Cookie", "castorama_current_shop=48; expires=Fri, 17-Jan-2020 22:14:51 GMT; Max-Age=2592000; path=/; HttpOnly"
            .Open "GET", "https://www.castorama.ru/building-materials/building-dry-materials-and-primers?limit=96", False
            .setRequestHeader "Set-Cookie", cookie
            .send
            html.body.innerHTML = .responseText
        End With
    
        MsgBox html.querySelector(".shop__name[data-id='48']").className
        Stop
    
    End Sub
    
    Public Function GetCookie(ByVal re As Object, ByVal s As String, ByVal p As String) As String
        With re
            .Pattern = p
            GetCookie = .Execute(s)(0).submatches(0)
        End With
    End Function