Search code examples
vbacloudflarems-access-2016

Parse data from online XML feed using VBA - stopped working


we have a little routine here that has run without a hitch in the background for many years which contacts MI5 each morning, reads the XML file on the MI5 website (https://www.mi5.gov.uk/UKThreatLevel/UKThreatLevel.xml), and updates our on-duty staff of the current threat level. This has worked consistently until late last year.

It appears that in late 2024 the UK Government have commissioned Cloudflare security on the MI5 website, and as such when our script attempts to read the online XML, it fails. The page is still accessible in Chrome.

Playing around with different variants of VBA code that I've found around Stack Overflow, the XML HTTP text node has

: responseText : "Just a moment...<meta http-equiv="X-UA-Compatible" con"

and the Status is showing as 403.

The question is, is there a way with VBA using MSXML6 to either get the program to wait out until Cloudflare refreshes with the correct data, or if the status 403 is as a result of the website detecting VBA as a non-standard browser, is there a way of spoofing the header so the website in question thinks it's Chrome/edge, etc.

Any pointers would be appreciated.

I tried the following code which is failing. When I save the XML and parse it locally, it works. When I access the BBC's news XML, it works.

Sub GetThreatLevel()
'On Error Resume Next
Dim strpath As String
Dim dblRate As Double
Dim i As Integer

Dim xmlOBject As MSXML2.DOMDocument60
Dim xmlNode As MSXML2.IXMLDOMNode

Dim intLength As Integer
Set xmlOBject = New MSXML2.DOMDocument60
'website path
strpath = "https://www.mi5.gov.uk/UKThreatLevel/UKThreatLevel.xml"
'strpath = "https://feeds.bbci.co.uk/news/rss.xml"
'strPath = "c:\FOO\Threat.xml"

With xmlOBject
    .async = False
    .resolveExternals = False
    .validateOnParse = False
    .Load (strpath)
End With

'get the query node
intLength = xmlOBject.childNodes.Length - 1
For i = 0 To intLength
If xmlOBject.childNodes.Item(i).BaseName = "rss" Then
Set xmlNode = xmlOBject.childNodes.Item(i)
i = intLength + 1
End If
Next i
'get the result node
intLength = xmlNode.childNodes.Length - 1
For i = 0 To intLength
If xmlNode.childNodes.Item(i).BaseName = "channel" Then
Set xmlNode = xmlNode.childNodes.Item(i)
i = intLength + 1
End If
Next i
intLength = xmlNode.childNodes.Length - 1
For i = 0 To intLength
If xmlNode.childNodes.Item(i).BaseName = "item" Then
Set xmlNode = xmlNode.childNodes.Item(i)
i = intLength + 1
End If
Next i
intLength = xmlNode.childNodes.Length - 1
For i = 0 To intLength
If xmlNode.childNodes.Item(i).BaseName = "description" Then
Set xmlNode = xmlNode.childNodes.Item(i)
i = intLength + 1
End If
Next i

intLength = xmlNode.childNodes.Length - 1

Debug.Print xmlNode.nodeTypedValue
DBSLastCheck = Date
dbslastresult = Nz(xmlNode.nodeTypedValue, "NO VALUE RETURNED")


End Sub


Solution

  • This works here with Access 365 64-bit:

    Option Compare Database
    Option Explicit
    
    ' Enums.
    '
    ' HTTP status codes, reduced.
    Private Enum HttpStatus
        OK = 200
        BadRequest = 400
        Unauthorized = 401
        Forbidden = 403
    End Enum
    
    Public Function ThreatLevel() As String
    
        ' Operational constants.
        '
        ' Base URL for MI5 UK threat level status.
        Const ServiceUrl        As String = "https://www.mi5.gov.uk/UKThreatLevel/"
        ' File to look up.
        Const Filename          As String = "UKThreatLevel.xml"
        
        ' Function constants.
        '
        ' Async setting.
        Const Async             As Variant = False
        ' XML node and attribute names.
        Const RootNodeName      As String = "rss"
        Const ChannelNodeName   As String = "channel"
        Const ItemNodeName      As String = "item"
        Const TextNodeName      As String = "description"
      
        ' Microsoft XML, v6.0.
        Dim Document            As MSXML2.DOMDocument60
        Dim XmlHttp             As MSXML2.ServerXMLHTTP60
        Dim RootNodeList        As MSXML2.IXMLDOMNodeList
        Dim ChannelNodeList     As MSXML2.IXMLDOMNodeList
        Dim TextNodeList        As MSXML2.IXMLDOMNodeList
        Dim RootNode            As MSXML2.IXMLDOMNode
        Dim ChannelNode         As MSXML2.IXMLDOMNode
        Dim ItemNode            As MSXML2.IXMLDOMNode
        Dim TextNode            As MSXML2.IXMLDOMNode
        Dim Description         As String
        Dim Url                 As String
    
        Set Document = New MSXML2.DOMDocument60
        Set XmlHttp = New MSXML2.ServerXMLHTTP60
        
        Url = ServiceUrl & Filename
        
        ' Retrieve data.
        XmlHttp.Open "GET", Url, Async
        XmlHttp.send
        
        If XmlHttp.Status = HttpStatus.OK Then
            ' File retrieved successfully.
            Document.loadXML XmlHttp.responseText
        
            Set RootNodeList = Document.getElementsByTagName(RootNodeName)
            ' Find root node.
            For Each RootNode In RootNodeList
                If RootNode.nodeName = RootNodeName Then
                    Exit For
                Else
                    Set RootNode = Nothing
                End If
            Next
    
            If Not RootNode Is Nothing Then
                If RootNode.hasChildNodes Then
                    ' Find first level Channel node.
                    Set ChannelNodeList = RootNode.childNodes
                    For Each ChannelNode In ChannelNodeList
                        If ChannelNode.nodeName = ChannelNodeName Then
                            Exit For
                        Else
                            Set ChannelNode = Nothing
                        End If
                    Next
                End If
            End If
            
            If Not ChannelNode Is Nothing Then
                If ChannelNode.hasChildNodes Then
                    ' Find Item node.
                    Set ChannelNodeList = ChannelNode.childNodes
                    For Each ItemNode In ChannelNodeList
                        If ItemNode.nodeName = ItemNodeName Then
                            Exit For
                        Else
                            Set ItemNode = Nothing
                        End If
                    Next
                End If
            End If
            
            If Not ItemNode Is Nothing Then
                If ItemNode.hasChildNodes Then
                    ' Find Text node.
                    Set TextNodeList = ItemNode.childNodes
                    For Each TextNode In TextNodeList
                        If TextNode.nodeName = TextNodeName Then
                            Description = TextNode.nodeTypedValue
                            Exit For
                        Else
                            Set TextNode = Nothing
                        End If
                    Next
                End If
            End If
    
        End If
        
        ThreatLevel = Description
    
    End Function
    

    Output:

    The current national threat level is SUBSTANTIAL. The threat to Northern Ireland from Northern Ireland-related terrorism is SUBSTANTIAL.