Search code examples
excelvbaweb-scraping

Require vba code to to webscrap the PRELIMINARY DATA section from the "https://www.cmegroup.com/markets/metals/precious/gold.volume.html"


enter image description here

[Web Link "https://www.cmegroup.com/markets/metals/precious/gold.volume.html"]

From the above website I am not able to web scrap the data table section named PRELIMINARY DATA.

I have tried a code given but no luck for this website same code is working for another website mentioned below.

["https://www.nseindia.com/market-data/live-equity-market"]

can anyone help please?

Sub GetTableUsingIE_From_NSE() 'Working sample code for NSE Website table
    'Refer below website for further nested code of row col
    'https://copyprogramming.com/howto/how-to-extract-data-from-website-with-vba-excel
    
    Dim ieApp As Object
    Dim url As String
    Dim htmlDoc As Object
    Dim tables As Object
    Dim table As Object
    
    ' Specify the URL of the website
    
    url = "https://www.nseindia.com/market-data/live-equity-market"
    'url = "https://www.cmegroup.com/markets/metals/precious/gold.volume.html#tradeDate=20231212"
    ' Create a new instance of Internet Explorer
    Set ieApp = CreateObject("InternetExplorer.Application")
    
    ' Make IE visible (you can set it to False if you don't want to see the browser)
    ieApp.Visible = True
    
    ' Navigate to the specified URL
    ieApp.navigate url
    
    ' Wait for the page to load (you may need to adjust the wait time based on the website)
    Do While ieApp.Busy Or ieApp.readyState <> 4
        Application.Wait Now + TimeValue("0:00:03")
    Loop
    
    ' Get the HTML document from the loaded page
    Set htmlDoc = ieApp.document
    
'    ' Get all tables from the HTML document
'    Set tables = htmlDoc.getElementsByTagName("table")
'
'    ' Loop through each table and do something (e.g., print table contents)
'    For Each table In tables
'        ' Do something with the table, e.g., print its innerHTML
'        Debug.Print table.innerHTML
'        MsgBox table.innerHTML
'    Next table
    Dim elt As Object
    Dim aRow, aCol, totCol As Integer
    Dim sHeaderStr  As String
    ThisWorkbook.Sheets("Data").UsedRange.Clear
    With htmlDoc.getElementsByTagName("table").Item(0)
        
        aRow = 1
        aCol = 1
        For Each elt In .getElementsByTagName("th")
            Debug.Print elt.innerText
            sHeaderStr = elt.innerText
            ThisWorkbook.Sheets("Data").Cells(aRow, aCol).Value = sHeaderStr
            aCol = aCol + 1
            totCol = aCol
        Next elt
        aRow = aRow + 1
        aCol = 1
        Debug.Print vbNewLine
        For Each elt In .getElementsByTagName("td") '
        
            Debug.Print elt.innerText
            ThisWorkbook.Sheets("Data").Cells(aRow, aCol).Value = elt.innerText
            If aCol < 18 Then
                aCol = aCol + 1
            Else
                aCol = 1
                aRow = aRow + 1
            End If
        
        Next elt
    End With
    ThisWorkbook.Sheets("Data").UsedRange.Rows.AutoFit
    ThisWorkbook.Sheets("Data").UsedRange.Rows.WrapText = True
    ThisWorkbook.Sheets("Data").UsedRange.Rows.WrapText = False
    
    ' Close Internet Explorer
    ieApp.Quit
    Set ieApp = Nothing
End Sub

From the above website I am not able to web scrap the data table section named PRELIMINARY DATA.

I have tried a code given but no luck for this website same code is working for another website mentioned below.

["https://www.nseindia.com/market-data/live-equity-market"]

can anyone help please?


Solution

  • Don't use the IE anymore. It's deprecated! You can get a JSON from the page with xhr (XML HTTP Request).

    Here is an example how it can work:

    Sub TotalsCMEGoldVolumeByDate()
        'This macro downloads a JSON
        'It works for the last 32 days
        'Oldest getable update for the data is yesterday - 31 days
        
        Const urlBase As String = "https://www.cmegroup.com/CmeWS/mvc/Volume/Details/F/437/"
        Const urlTail As String = "/P"
        Dim urlDate As String
        Dim quotationMarksAll() As String
        Dim quotationMarkOne As Long
        Dim ws As Worksheet
        Dim currRow As Long
        Dim pasteInCell As Boolean
        
        Set ws = ActiveWorkbook.ActiveSheet
        currRow = 1
        
        With CreateObject("MSXML2.XMLHTTP.6.0")
            urlDate = "20231228" 'yyyymmdd (Make this dynamic if needed)
            .Open "GET", urlBase & urlDate & urlTail, False
            .Send
            
            If .Status = 200 Then
                'Here we have a JSON like follows:
                '
                '{
                '   "tradeDate":"20231228",
                '   "totals":{
                '      "month":"-",
                '      "monthID":"-",
                '      "globex":"141,803",
                '      "openOutcry":"0",
                '      "totalVolume":"143,574",
                '      "blockVolume":"114",
                '      "efpVol":"1,657",
                '      "efrVol":"0",
                '      "eooVol":"0",
                '      "efsVol":"0",
                '      "subVol":"0",
                '      "pntVol":"1,771",
                '      "tasVol":"1,553",
                '      "deliveries":"40",
                '      "opnt":"-",
                '      "aon":"-",
                '      "atClose":"500,216",
                '      "change":"-897",
                '      "strike":"-",
                '      "exercises":"0"
                '   },
                '   "monthData":[
                '      {
                '         "month":"DEC 2023",
                '         "monthID":"DEC-2023-Calls",
                '         "globex":"0",
                '         "openOutcry":"0",
                '         "totalVolume":"0",
                '         "blockVolume":"0",
                '         "efpVol":"0",
                '         "efrVol":"0",
                '         "eooVol":"0",
                '         "efsVol":"0",
                '         "subVol":"0",
                '         "pntVol":"0",
                '         "tasVol":"0",
                '         "deliveries":"40",
                '         "opnt":"-",
                '         "aon":"-",
                '         "atClose":"0",
                '         "change":"-40",
                '         "strike":"0",
                '         "exercises":"0"
                '      },
                '      {
                '         "month":"JAN 2024",
                '         "monthID":"JAN-2024-Calls",
                '         "globex":"338",
                '         "openOutcry":"0",
                '         "totalVolume":"338",
                '         "blockVolume":"0",
                '         "efpVol":"0",
                '         ...
                '      ...
                '   ...
                '}
                '
                'We only need the tradeDate and the totals section:
                'Because we need only a few values from a flat herachie,
                'we can get them with some string operations
                '
                quotationMarksAll = Split(.responseText, """")
                
                'The following loop works column by column in the sheet because
                'I just want to show how to get the values you need.
                'If you need a different output, you can do whatever you want here.
                Do
                    Select Case quotationMarksAll(quotationMarkOne)
                        Case "tradeDate"
                            ws.Cells(currRow, 1) = "Trade Date"
                            pasteInCell = True
                        Case "globex"
                            ws.Cells(currRow, 1) = "Globex"
                            pasteInCell = True
                        Case "openOutcry"
                            ws.Cells(currRow, 1) = "Open Outcry"
                            pasteInCell = True
                        Case "totalVolume"
                            ws.Cells(currRow, 1) = "Total Volume"
                            pasteInCell = True
                        Case "blockVolume"
                            ws.Cells(currRow, 1) = "Block Volume"
                            pasteInCell = True
                        Case "efpVol"
                            ws.Cells(currRow, 1) = "efp Vol"
                            pasteInCell = True
                        Case "eooVol"
                            ws.Cells(currRow, 1) = "eoo Vol"
                            pasteInCell = True
                        Case "efsVol"
                            ws.Cells(currRow, 1) = "efs Vol"
                            pasteInCell = True
                        Case "subVol"
                            ws.Cells(currRow, 1) = "sub Vol"
                            pasteInCell = True
                        Case "pntVol"
                            ws.Cells(currRow, 1) = "pnt Vol"
                            pasteInCell = True
                        Case "tasVol"
                            ws.Cells(currRow, 1) = "tas Vol"
                            pasteInCell = True
                        Case "deliveries"
                            ws.Cells(currRow, 1) = "Deliveries"
                            pasteInCell = True
                        Case "atClose"
                            ws.Cells(currRow, 1) = "At Close"
                            pasteInCell = True
                        Case "change"
                            ws.Cells(currRow, 1) = "Change"
                            pasteInCell = True
                    End Select
                    
                    If pasteInCell Then
                        ws.Cells(currRow, 2) = quotationMarksAll(quotationMarkOne + 2)
                        currRow = currRow + 1
                        pasteInCell = False
                    End If
    
                    quotationMarkOne = quotationMarkOne + 1
                Loop While quotationMarksAll(quotationMarkOne) <> "monthData"
            Else
                MsgBox "JSON not loaded. HTTP status " & .Status
            End If
        End With
    End Sub