I am having issues with my eBAY Scraper and can not work out why. Although it is pulling the data off fine, it misses SOME of the data OFF for the first row and then for each first row of every Loop and therefore the data is not in the correct row.
Q) Why is it missing the data at the start and then for each loop?
I think It may have something to do with the title extracting slower that the rest of the items, however I can not work it out as I am very limited with vba. I have attached a demo, for your viewing.
I am not looking for a full rewite of the code, just pointing in the right direction or a SLIGHT change to MY code. As I stated I and very limited in vba, I can understand my code, anything more advanced will be out of my depth.
Demo Download - Download Excel File
WebSite - Ebay.co.uk
Ebay Product Page - Prodcts Shown may vary browser to browser
I have colour coded it so you can see better
For some reason it misses out Price, Condition, Former Price & Discount for the first item on start and EVERY Loop. For every loop that it misses the items out the Price, Condition, Former Price & Discount become MORE out of line
1st Loop - Items are NOW 2 rows out of line
2nd Loop - Items are NOW 3 rows out of line
As I searched 3 pages (2 pages + 1 extra) and it looped 3 time it has missed the first row on each loop. I am 3 rows out. I think this may have too do with the Title of the item as it extracts a bit slower then the rest of the items
This is my code
Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet
Set wb = ThisWorkbook
Set wsSheet = wb.Sheets("Sheet1")
Set sht = ThisWorkbook.Worksheets("Sheet1")
'+++++ Internet Explorer ++++++
Set objIE = New InternetExplorer
objIE.Visible = False
objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
Set ie = CreateObject("InternetExplorer.Application")
'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################
' Application.ScreenUpdating = False
Set HTML = objIE.document
Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
'FOR LOOP
For Each element In elements
''' Element 1
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).href
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
End If
''' Element 2
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
End If
''' Element 3
If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
End If
''' Element 4
If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
Else
HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
End If
''' Element 5
If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
Else
HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
End If
''' Element 6
If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
End If
' Application.ScreenUpdating = True
Next element
Do
'Searches Number of Pages entered in
If pageNumber >= Replace(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
If nextPageElement Is Nothing Then Exit Do
objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
nextPageElement.Click 'next web page
Do While objIE.Busy = True Or objIE.readyState <> 4
Loop
Set HTML = objIE.document
pageNumber = pageNumber + 1
'##################################### Restart Loop ##################################
GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################
Loop
objIE.Quit ' end and clear browser
Set objIE = Nothing
Set HTML = Nothing
Set nextPageElement = Nothing
Set HtmlText = Nothing
Set element = Nothing
End Sub
As always thanks in advance.
Make sure to skip the first element within your returned collection. Keeping to your code.
Private Sub CommandButton1_Click()
Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet
Set wb = ThisWorkbook
Set wsSheet = wb.Sheets("Sheet1")
Set sht = ThisWorkbook.Worksheets("Sheet1")
'+++++ Internet Explorer ++++++
Set objIE = New InternetExplorer
objIE.Visible = True
objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
'Set ie = CreateObject("InternetExplorer.Application")
'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################
' Application.ScreenUpdating = False
Set HTML = objIE.document
Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
'FOR LOOP
Dim counter As Long
counter = 0
For Each element In elements
If counter > 0 Then
''' Element 1
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).href
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
End If
''' Element 2
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
End If
''' Element 3
If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
End If
''' Element 4
If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
Else
HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
End If
''' Element 5
If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
Else
HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
End If
''' Element 6
If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
End If
' Application.ScreenUpdating = True
End If
counter = counter + 1
Next element
Do
'Searches Number of Pages entered in
If pageNumber >= Replace$(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
If nextPageElement Is Nothing Then Exit Do
objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
nextPageElement.Click 'next web page
Do While objIE.Busy = True Or objIE.readyState <> 4
DoEvents
Loop
Set HTML = objIE.document
pageNumber = pageNumber + 1
counter = 0
'##################################### Restart Loop ##################################
GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################
Loop
objIE.Quit ' end and clear browser
Set objIE = Nothing
Set HTML = Nothing
Set nextPageElement = Nothing
Set HtmlText = Nothing
Set element = Nothing
End Sub