Search code examples
excelvbaweb-scrapingxmlhttprequest

Change code to pull additional data from site


I am pulling data from a website but need assistance to pull the entire string.

Example:

Example

I have tried to look into the website source code to understand it but with different changes yield bad results

Option Explicit

Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument
    Dim ws As Worksheet, i As Long
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value
    Dim results()
    ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send
        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        Stop
        For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft

                html.body.innerHTML = .responseText

                Dim shipping As String, order As String, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                order = html.querySelector(".order-history__item-descript--min").innerText
                items = Split(order, vbNewLine)

                Dim qtyOrdered As Long, qtyShipped As String, product As String

                qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                product = html.querySelector(".details-table a").Title

                results(i, 1) = shipping
                results(i, 2) = qtyOrdered
                results(i, 3) = qtyShipped
                results(i, 4) = product
            End If
            'Application.Wait Now + TimeSerial(0, 0, 1)
        Next
    End With
    'results written out from row 2 column E
    ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results

End Sub

I input a PO and zip (4500987740 and 33314) that contains multiple parts, the data returned is only the first part, not all the parts.

Example 2:

example 2

I need to return all data: Tracking, qty ordered, qty shipped, product, qty ordered, qty shipped product, etc - basically as a string until all parts are displayed


Solution

  • The problem lies in the use of querySelector. querySelector returns the first match only. In this case it means you are only ever considering the first row. The required amendment is to use querySelectorAll to return all matches. Then loop those matches to extract each row's info.

    Additionally, this selector .details-table a must be altered to only return items of interest i.e. to .details-table a[title] - those that have a title attribute.

    To write out to each row appropriately a helper function is used to find the next free row. As the row count is not known in advance an array of the appropriate size cannot be set to house all results - though you could oversize the array from the start. That latter point is an amendment you could make. I instead loop writing out arrays in batches.

    Option Explicit
    Public Sub GetInfo()
        Dim html As HTMLDocument, csrft As String, ws As Worksheet
        Dim lastRow As Long, wsTarget As Worksheet, j As Long '<  VBE > Tools > References > Microsoft HTML Object Library
        Dim sourceValues()
    
        Set html = New HTMLDocument
        Set ws = ThisWorkbook.Worksheets("Sheet4")
        Set wsTarget = ThisWorkbook.Worksheets("Sheet1")
        lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
        sourceValues = ws.Range("B2:D" & lastRow).Value
    
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", "https://www.partstown.com", False
            .send
    
            html.body.innerHTML = .responseText
    
            csrft = html.querySelector("[name=CSRFToken]").Value
            For j = LBound(sourceValues, 1) To UBound(sourceValues, 1)
                If sourceValues(j, 1) <> vbNullString And sourceValues(j, 3) <> vbNullString Then
                    DoEvents
                    .Open "POST", "https://www.partstown.com/track-my-order", False
                    .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                    .setRequestHeader "User-Agent", "Mozilla/5.0"
                    .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                    .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                    .setRequestHeader "Accept-Encoding", "gzip, deflate"
                    .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                    .send "orderNo=" & sourceValues(j, 1) & "&postalCode=" & sourceValues(j, 3) & "&CSRFToken=" & csrft
                    html.body.innerHTML = .responseText
    
                    Dim shipping As String, orders As Object, items() As String
    
                    shipping = html.querySelector("[data-label=Shipping]").innerText
                    Set orders = html.querySelectorAll(".order-history__item-descript--min")
    
                    Dim i As Long, c As Long, results(), products As Object
                    ReDim results(1 To 1, 1 To 4 * orders.length)
                    Dim qtyOrdered As Long, qtyShipped As String, product As String
                    Set products = html.querySelectorAll(".details-table a[title]")
                    c = 1
                    For i = 0 To orders.length - 1
                        items = Split(orders.item(i).innerText, vbNewLine)
                        qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                        qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                        results(1, c) = shipping
                        results(1, c + 1) = qtyOrdered
                        results(1, c + 2) = qtyShipped
                        results(1, c + 3) = products.item(i).Title
                        c = c + 4
                    Next
                    wsTarget.Cells(GetLastRow(wsTarget) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
                End If
            Next
        End With
    End Sub
    Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
        With ws
            GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
        End With
    End Function