Search code examples
excelvbaxmlhttprequest

Unicode characters as a result of xmlhttp request


In this LINK >> @QHarr has introduced a solution to count the results of Google Search and here's the code

Public Sub GetResultsCount()
Dim sResponse As String, html As HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
    .Open "GET", "https://www.google.com/search?q=mitsubishi", False
    .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
    .send
    sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
    .body.innerHTML = sResponse
    Debug.Print .querySelector("#resultStats").innerText
End With

End Sub

The code works without problems ..but in the immediate window I got this non-understandable characters enter image description here

How this be fixed to appear in Arabic characters?

Thanks advanced for help


Solution

  • I found this thread: VBA - Convert string to UNICODE, and managed to make a solution (it works for me with Polish characters, not sure about Arabic)

    Private Const CP_UTF8 = 65001
    
    Private Declare Function MultiByteToWideChar Lib "kernel32" ( _
       ByVal CodePage As Long, ByVal dwFlags As Long, _
       ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, _
       ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
    
    Public Sub GetResultsCount()
    Dim sResponse As String 
    Dim html      As HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.google.com/search?q=mitsubishi", False
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With
    Set html = New HTMLDocument
    
    html.body.innerHTML = sResponse
    Debug.Print sUTF8ToUni(StrConv(html.querySelector("#resultStats").innerText, vbFromUnicode))
    End Sub
    
    Public Function sUTF8ToUni(bySrc() As Byte) As String
       ' Converts a UTF-8 byte array to a Unicode string
       Dim lBytes As Long, lNC As Long, lRet As Long
    
       lBytes = UBound(bySrc) - LBound(bySrc) + 1
       lNC = lBytes
       sUTF8ToUni = String$(lNC, Chr(0))
       lRet = MultiByteToWideChar(CP_UTF8, 0, VarPtr(bySrc(LBound(bySrc))), lBytes, StrPtr(sUTF8ToUni), lNC)
       sUTF8ToUni = Left$(sUTF8ToUni, lRet)
    End Function