Search code examples
excelvbaweb-scrapingxmlhttprequest

Check valid URLs for Wistia


I have found a code that I converts to UDF to check if the url of wistia is valid or not ..

Sub Test()
MsgBox CheckValidURL("https://fast.wistia.net/embed/iframe/vud7ff4i6w")
End Sub

Function CheckValidURL(sURL As String) As Boolean
Dim oXMLHTTP        As Object
Dim sResponseText   As String
Dim aScriptParts    As Variant

Set oXMLHTTP = CreateObject("MSXML2.XMLHTTP")
oXMLHTTP.Open "GET", sURL, False
oXMLHTTP.Send

sResponseText = oXMLHTTP.responseText
aScriptParts = Split(sResponseText, "<script", , vbTextCompare)
If UBound(aScriptParts) > 0 Then CheckValidURL = True
End Function

I have tested the UDF with several links and I got correct results but I am not sure of the UDF is correct or not Can you advise me or improve that UDF? Thanks advanced for help


Solution

  • You could gain efficiency by creating the xhr object in the sub and pass to the function, and then only look at the response header link to differentiate

    Option Explicit
    Public Sub Test()
        Dim urls(), i As Long, xhr As Object
        Set xhr = CreateObject("MSXML2.XMLHTTP")
        urls = Array("https://fast.wistia.net/embed/iframe/vud7ff4i6wyh", "https://fast.wistia.net/embed/iframe/vud7ff4i6w")
        For i = LBound(urls) To UBound(urls)
            MsgBox CheckValidURL(urls(i), xhr)
        Next
    End Sub
    
    Public Function CheckValidURL(ByVal url As String, ByVal xhr As Object) As Boolean
        With xhr
            .Open "GET", url, False
            .send
            CheckValidURL = Not .getResponseHeader("link") = vbNullString
        End With
    End Function
    

    Alternatives:

    In the function test for the presence of an id which is only in the valid links, or a string (in the way you did)

    Public Sub Test()
        Dim urls(), i As Long, html As HTMLDocument, xhr As Object
        Set xhr = CreateObject("MSXML2.XMLHTTP"): Set html = New HTMLDocument
        urls = Array("https://fast.wistia.net/embed/iframe/vud7ff4i6wyh", "https://fast.wistia.net/embed/iframe/vud7ff4i6w")
        For i = LBound(urls) To UBound(urls)
            MsgBox CheckValidURL(urls(i), xhr, html)
        Next
    End Sub
    
    Public Function CheckValidURL(ByVal sURL As String, ByVal xhr As Object, ByVal html As HTMLDocument) As Boolean
        With xhr
            .Open "GET", sURL, False
            .send
            html.body.innerHTML = .responseText
        End With
        CheckValidURL = html.querySelectorAll("#wistia_video").Length > 0
    End Function
    

    Also using Instr works

    Option Explicit
    Public Sub Test()
        Dim urls(), i As Long, html As HTMLDocument, xhr As Object
        Set xhr = CreateObject("MSXML2.XMLHTTP")
        urls = Array("https://fast.wistia.net/embed/iframe/vud7ff4i6wyh", "https://fast.wistia.net/embed/iframe/vud7ff4i6w")
        For i = LBound(urls) To UBound(urls)
            MsgBox CheckValidURL(urls(i), xhr)
        Next
    End Sub
    
    Public Function CheckValidURL(ByVal sURL As String, ByVal xhr As Object) As Boolean
        With xhr
            .Open "GET", sURL, False
            .send
            CheckValidURL = InStr(.responseText, "html") > 0
        End With     
    End Function
    

    Re-write of yours:

    Option Explicit
    Public Sub Test()
        Dim urls(), i As Long, html As HTMLDocument, xhr As Object
        Set xhr = CreateObject("MSXML2.XMLHTTP")
        urls = Array("https://fast.wistia.net/embed/iframe/vud7ff4i6wyh", "https://fast.wistia.net/embed/iframe/vud7ff4i6w")
        For i = LBound(urls) To UBound(urls)
            MsgBox CheckValidURL(urls(i), xhr)
        Next
    End Sub
    
    Public Function CheckValidURL(ByVal sURL As String, ByVal xhr As Object) As Boolean
        With xhr
            .Open "GET", sURL, False
            .send
            CheckValidURL = UBound(Split(.responseText, "<script", , vbTextCompare)) > 0
        End With
    End Function