Search code examples
excelvbaweb-scrapinghref

VBA scrape src instead of href


I am using the code below code but it brings the value of 'src' instead of 'href' for some reason. Anyone can help please?

Sub bringfox(txt As String)

Dim oHtml       As HTMLDocument
Dim oElement    As Object
Set oHtml = New HTMLDocument

maintext2 = "https://www.jjfox.co.uk/cigars/show/all.html"

With CreateObject("WINHTTP.WinHTTPRequest.5.1")
    .Open "GET", maintext2 & gr, False
    .send
    oHtml.body.innerHTML = .responseText
End With



counter = cnt
'oElement(i).Children(0).getAttribute ("href")
Set oElement = oHtml.getElementsByClassName("products-grid products-grid--max-3-col")(0).getElementsByTagName("a")
i = 0
While i < oElement.Length
    Debug.Print oElement(i).Children(0).getAttribute("href")

    i = i + 1

Wend


End Sub

Solution

  • You could try using a CSS selector.

    #wrapper div.category-products > ul a
    

    This is a reduced version of the full selector that targets a tags within the products category. You then parse the outerHTML for the hrefs as that is where the information is located.


    Site image (Sample view)

    Sample view


    Output from code (Sample view)

    Sample immediate window output


    Code

    Option Explicit
    Public Sub GetInfo()
        Dim oHtml As HTMLDocument, nodeList As Object, currentItem As Long
        Const URL As String = "https://www.jjfox.co.uk/cigars/show/all.html"
        Set oHtml = New HTMLDocument
        With CreateObject("WINHTTP.WinHTTPRequest.5.1")
            .Open "GET", URL, False
            .send
            oHtml.body.innerHTML = .responseText
        End With
    
        Set nodeList = oHtml.querySelectorAll("#wrapper div.category-products > ul a")
        For currentItem = 0 To nodeList.Length - 1
            On Error Resume Next
            Debug.Print Split(Split(nodeList.item(currentItem).outerHTML, "<A href=")(1), ">")(0)
            On Error GoTo 0
        Next currentItem
    End Sub
    

    Or more simply, use the following

    For currentItem = 0 To nodeList.Length - 1
        On Error Resume Next
        Debug.Print nodeList.item(currentItem).href
        On Error GoTo 0
    Next currentItem