Search code examples
excelvbaweb-scrapingsession-cookiespowerquery

Fetch session details


I want to get the session id & csfrtoken from the url: https://www.screener.in/api/company/6596449/quick_ratios/ below is the screenshot of the values.

enter image description here

Also, I tried the below code in vba, but not getting response. please guide, Note the account is a dummy account only to fetch the details.

Sub test()
 Dim user As String
 Dim pwd As String
 Dim path As String
 user = "rajesh2.gade@gmail.com"
 pwd = "Rain@123"
 path = "https://www.screener.in/api/company/6596449/quick_ratios/"
 Debug.Print httpGET(path, user, pwd)
End Sub

Public Function httpGET(fn As String, _
        Optional authUser As String = vbNullString, _
        Optional authPass As String = vbNullString) As String
    pHtml = fn
    Dim oHttp As Object
    Set oHttp = CreateObject("Microsoft.XMLHTTP")
    Call oHttp.Open("GET", pHtml, False)
    If (authUser <> vbNullString) Then
    ' need to do basic authentication
    ' acknowledgement to http://pastie.org/1192157
        oHttp.setRequestHeader "Content-Type", "application/json"
        oHttp.setRequestHeader "Accept", "application/json"
        oHttp.setRequestHeader "Authorization", "Basic " + _
            EncodeBase64(authUser + ":" + authPass)
    End If
    Call oHttp.send("")
    Debug.Print oHttp.getAllResponseHeaders
    httpGET = oHttp.responseText
    Set oHttp = Nothing
End Function


Function EncodeBase64(text As String) As String


  Dim arrData() As Byte
  arrData = StrConv(text, vbFromUnicode)

  Dim objXML As MSXML2.DOMDocument60
  Dim objNode As MSXML2.IXMLDOMElement

  Set objXML = New MSXML2.DOMDocument60
  Set objNode = objXML.createElement("b64")

  objNode.DataType = "bin.base64"
  objNode.nodeTypedValue = arrData
  EncodeBase64 = Application.Clean(objNode.text)

  Set objNode = Nothing
  Set objXML = Nothing
End Function

Solution

  • You don't need the value of sessionid to login to that site. Csrftoken is necessary, though. However, the following is how you can log in to that site and access the content available in there.

    Sub ScrapeAfterLogin()
        Const Url$ = "https://www.screener.in/api/company/6596449/quick_ratios/"
        Const Link$ = "https://www.screener.in/login/"
        Dim Html As HTMLDocument, sParams As String
        Dim oHttp As Object, sToken$
    
        Set Html = New HTMLDocument
        Set oHttp = CreateObject("MSXML2.serverXMLHTTP")
        
        With oHttp
            .Open "GET", Url, True
            .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
            .send
            While .readyState < 4: DoEvents: Wend
            Html.body.innerHTML = .responseText
            sToken = Html.querySelector("input[name='csrfmiddlewaretoken']").getAttribute("value")
            sParams = "csrfmiddlewaretoken=" & sToken & "&next=%2Fapi%2Fcompany%2F6596449%2Fquick_ratios%2F&username=rajesh2.gade%40gmail.com&password=Rain%40123"
            
            .Open "POST", Link, True
            .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
            .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
            .setRequestHeader "Referer", "https://www.screener.in/login/"
            .setRequestHeader "Origin", "https://www.screener.in"
            .send (sParams)
            While .readyState < 4: DoEvents: Wend
            MsgBox .responseText
        End With
    End Sub
    

    To get the session id, you have to create a session, reuse the same session throughout all the requests and then find that session id in the cookies. I don't know how to reuse the same session in vba. However, the following is a python version of achieving the same.

    import requests
    from bs4 import BeautifulSoup
    
    start_url = 'https://www.screener.in/api/company/6596449/quick_ratios/'
    login_url = 'https://www.screener.in/login/'
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    }
    
    params = {
        'csrfmiddlewaretoken': '',
        'next': '/api/company/6596449/quick_ratios/',
        'username': 'rajesh2.gade@gmail.com',
        'password': 'Rain@123',
    }
    
    with requests.Session() as s:
        s.headers.update(headers)
        res = s.get(start_url)
        soup = BeautifulSoup(res.text,"lxml")
        params['csrfmiddlewaretoken'] = soup.select_one("input[name='csrfmiddlewaretoken']")['value']
        s.headers['Origin'] = 'https://www.screener.in'
        s.headers['Referer'] = 'https://www.screener.in/login/'
        resp = s.post(login_url,data=params)
        print(s.cookies.items())
    

    Output:

    [('csrftoken', 'y64L6KeQfJiDvTMcsRQNlLCGYtfEVj82'), ('sessionid', 'e06akkv5yodjag6nf9tqrz5hex9wx99w')]