Search code examples
vbaweb-scrapingclassname

Reset an html element after error with VBA


On a scraper macro I am trying to move over an error and return "input error" when there is no data to crawl.

For now I am using this:

Public Function translate()

    Set thisWbs = ActiveWorkbook.ActiveSheet
    Set ie = CreateObject("InternetExplorer.Application")
    link = "https://translate.google.com/#view=home&op=translate&sl=auto&tl=en"
    i = 2

    ie.Visible = True

    LastRow = thisWbs.Range("B" & ActiveSheet.Rows.Count).End(xlUp).Row

    Set Rng = thisWbs.Range("B2:B" & LastRow)

    For Each cell In Rng

        my_url = link
        ie.navigate my_url
        
        Wait 2
    
        While ie.readyState <> 4 Or ie.Busy: DoEvents: Wend
        
        ie.document.getElementById("source").innerText = ActiveSheet.Range("B" & i)
        
        Wait 2

        If ie.document.getElementsByClassName("empty-placeholder placeholder")(0).innerText = "Translation" Then
            ActiveSheet.Range("C" & i) = "input error"
        Else
            ActiveSheet.Range("C" & i) = ie.document.getElementsByClassName("tlid-translation translation")(0).innerText
        End If

        Wait 1
        
        ie.document.getElementsByClassName("empty-placeholder placeholder")(0).innerText = " "
        
        i = i + 1

    Next cell

    ie.Quit

    MsgBox "Done"
    
End Function

It work and It returns "input error" for the first error found, but when it finds another error the class remains " " as was set before, so It couldn't found The "Translate" again and stops working. Any ideas?


Solution

  • Try the next code, please:

    Private Sub translate()
      Dim thisWbs As Worksheet, IE As Object, link As String
      Dim i As Long, lastRow As Long, my_url As String
      
        Set thisWbs = ActiveSheet
        Set IE = CreateObject("InternetExplorer.Application")
        link = "https://translate.google.com/#view=home&op=translate&sl=auto&tl=en"
        
        'IE.Visible = True
        lastRow = thisWbs.Range("B" & Rows.count).End(xlUp).Row
        thisWbs.Range("C2:C" & lastRow).Clear
        
        For i = 2 To lastRow
            my_url = link & "&text=" & Replace(ActiveSheet.Range("B" & i).Value, " ", "%20")
            IE.navigate my_url
    
            While IE.readyState <> 4 Or IE.Busy: DoEvents: Wend
            
            Application.Wait (Now + TimeValue("0:00:1"))
    
            If IE.Document.getElementsByClassName("empty-placeholder placeholder")(0).innerText = "Translation" Then
                ActiveSheet.Range("C" & i) = "input error"
            Else
                ActiveSheet.Range("C" & i) = IE.Document.getElementsByClassName("tlid-translation translation")(0).innerText
            End If
        Next i
    
        IE.Quit
        MsgBox "Done"
    End Sub
    

    I tested it. I adapted yours in order to make it working.

    Now, try the next function (much faster and reliable, not needing Internet Explorer), please:

    Private Function GTranslate(strInput As String, strFromLang As String, strToLang As String) As String
        Dim strURL As String, objHTTP As Object, objHTML As Object, objDivs As Object, objDiv As Variant
        
        strURL = "https://translate.google.com/m?hl=" & strFromLang & _
            "&sl=" & strFromLang & _
            "&tl=" & strToLang & _
            "&ie=UTF-8&prev=_m&q=" & strInput
            
        Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
        objHTTP.Open "GET", strURL, False
        objHTTP.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
        objHTTP.send ""
        
        Set objHTML = CreateObject("htmlfile")
        With objHTML
            .Open
            .Write objHTTP.responseText
            .Close
        End With
        
        Set objDivs = objHTML.getElementsByTagName("div")
        For Each objDiv In objDivs
            If objDiv.className = "t0" Then
                GTranslate = objDiv.innerText: Exit For
            End If
        Next objDiv
        
        Set objHTML = Nothing: Set objHTTP = Nothing
    End Function
    

    I found it on the internet (some years before), adapted it for my need and now for yours...

    Your code, using the above function, will become:

    Private Sub Google_translate()
      Dim thisWbs As Worksheet
      Dim i As Long, lastRow As Long
      
      Set thisWbs = ActiveSheet
      lastRow = thisWbs.Range("B" & Rows.count).End(xlUp).Row
      thisWbs.Range("C2:C" & lastRow).Clear
      
      For i = 2 To lastRow
        thisWbs.Range("C" & i).Value = GTranslate(thisWbs.Range("B" & i).Value, "auto", "en")
      Next i
      MsgBox "Ready..."
    End Sub