I wrote a macro to download data from a website, after the website is fully loaded, it will scrap the data by the html tag, however, sometimes the data is incorrectly scraped due to unknown error, I want to add a checking after each variant 'x' completed, e.g. If the activesheet contains the word "中报",then go back to the step "'Select the Report Type" to re-do the scraping. Also, I know some of the variables/data types are not set at the very beginning. Could anyone help to solve this? Thanks in advance!
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
For x = 1 To 1584
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate URL
.Visible = False
Do While .Busy = True Or .readyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub
This is cleaned up quite a bit.
I added a SelectReportType: line label. Whenever you want to go back to that condition, use insert the line
Goto SelectReportType
And it will take you to that spot. The better way to do it would be to place that code in a separate function so you can call it anytime your test for "中报" is true. But I'm not following your code well enough to understand what you are doing to assist you with that.
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Dim selectItems As Variant, i As Variant
Dim tblNameArr() As String
Dim tblStartRow As Long
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
For x = 1 To 1584
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Navigate URL
.Visible = False
Do While .Busy = True Or .ReadyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.count)).name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
SelectReportType:
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub