Search code examples
excelvbaxmlhttprequest

Set authentication XMLHTTP export HTML


I am trying the following code so as to be able to export specific HTML page but the website requires credentials. Here's my try

Sub NewTest()
Const sURL As String = "https://courses.myexcelonline.com/courses/take/microsoft-teams/lessons/11482643-microsoft-teams-course-overview"
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument

Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument

Dim sUser As String, sPass As String
sUser = Application.WorksheetFunction.EncodeURL("myemail")
sPass = Application.WorksheetFunction.EncodeURL("mypass")

Dim postData As String
postData = "user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass

With http
    .Open "POST", sURL, False
    .setRequestHeader "User-Agent", "Mozilla/5.0"
    .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
    .send postData
    html.body.innerHTML = .responseText
    ExportHTML .responseText
End With

Stop
End Sub

Sub ExportHTML(sInput As String)
With CreateObject("ADODB.Stream")
    .Charset = "UTF-8"
    .Open
    .WriteText sInput
    .SaveToFile Environ("USERPROFILE") & "\Desktop\OutputHTML.html", 2
    .Close
End With
End Sub

I don't know what's wrong with my code .. I expect to have specific HTML page stored on my desktop but when I open the HTML page I didn't find the page but find the credentials request for email and password.

I have changed the url and this fixed the problem I think Const sURL As String = "https://courses.myexcelonline.com/users/sign_in". But I need to know how to get specific HTML page from the site?


Solution

  • It looks like you're sending a "GET" instead of "POST".

    Since you are sending POST data with your request, you need to change the "GET" to "POST'

    .Open "Get", sURL, False
    

    To

    .Open "POST", sURL, False
    

    EDIT : The URL you provided does not correlate to a POST, but rather a GET.

    You need to send a GET request to the login page first, followed by a POST to the same login URL page it is - https://courses.myexcelonline.com/users/sign_in, followed by a GET to your original URL provided since by that moment you're already logged in and can navigate to your URL correctly.

    Here is an example POST data - The authenticity token can be found when you send you send the first GET to the login page in the HTML. To do this, get the .responseText after the GET request. You can then use the Split function to get the auth token like this.

    <meta name="csrf-param" content="authenticity_token" />
    <meta name="csrf-token" content="ReGa49vTQ0KqSZtje3VerA8q3O9J1RLO7lEU/+q1hccys6S1uXn6UigKiKTjDp2yEFQRpYpziRQLnXifJLxb+Q==" />
    
    Dim authToken As String
    authToken = Application.WorksheetFunction.EncodeURL(Split(Split(http.responseText, "csrf-token"" content=""")(1), """")(0))
    
    Dim postData As String
    postdata = "utf8=%E2%9C%93&authenticity_token=" & authToken & "&user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass & "&user%5Bremember_me%5D=0"
    

    Here is the final code

    Sub NewTest()
    Const sURL As String = "https://courses.myexcelonline.com/courses/take/microsoft-teams/lessons/11482643-microsoft-teams-course-overview"
    Const sLoginURL As String = "https://courses.myexcelonline.com/users/sign_in"
    Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument
    
    Set http = New MSXML2.XMLHTTP60
    Set html = New MSHTML.HTMLDocument
    
    Dim sUser As String, sPass As String, sAuthToken As String, postData As String
    sUser = Application.WorksheetFunction.EncodeURL("myemail")
    sPass = Application.WorksheetFunction.EncodeURL("mypass")
    
    'Load Login Page
    With http
        .Open "GET", sLoginURL, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        .send
        .WaitForResponse
        html.body.innerHTML = .responseText
    End With
    
    'Get Auth Token from HTML
    sAuthToken = Application.WorksheetFunction.EncodeURL((Split( (Split(http.body.innerHTML, "csrf-token"" content=""")(1)), """")(0)))
    
    'Construct the POST data that will be sent
    postData = "utf8=%E2%9C%93&authenticity_token=" & sAuthToken & "&user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass & "&user%5Bremember_me%5D=0"
    
    'Attempt to login 
    With http
        .Open "POST", sLoginURL, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        .send (postData)
        .WaitForResponse
    End With
    
    'Add code here to verify if login was successful...
    
    'If successful... navigate to sURL 
    With http
        .Open "GET", sURL, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        .send 
        .WaitForResponse
        html.body.innerHTML = .responseText
        ExportHTML .responseText
    End With
    
    Stop
    End Sub
    
    Sub ExportHTML(sInput As String)
    With CreateObject("ADODB.Stream")
        .Charset = "UTF-8"
        .Open
        .WriteText sInput
        .SaveToFile Environ("USERPROFILE") & "\Desktop\OutputHTML.html", 2
        .Close
    End With
    End Sub
    enter code here