Search code examples
rcsvauthenticationdownloadhtml

How to do login on website using R and to check login success?


I need to just a simple log in the webpage login page and how do I check that login is successful or not?

library(httr)
library(jsonlite)
library(tictoc)
library(data.table)
library(properties)
library(futile.logger)
library(crayon)
library(XML)
library(methods)
library(compare)
library(tictoc)
args = commandArgs(trailingOnly=TRUE)

server.name <- "lgloz050.lss.emc.com"
port.no <- "58443"
default.path <- "/APG/lookup/"

set_config(config(ssl_verifypeer = 0L))

config.s3 <- fread("Configuration_modify.csv")
config.s3$bc <- config.s3$testReport
config.s3$testReport <- gsub(">>","/", config.s3$testReport)
config.s3$testReport <- gsub(" ","%20", config.s3$testReport)

config.s3$link <- paste("https://",server.name,":",port.no,default.path,config.s3$testReport,"/report.csv", sep = "")

properties = read.csv2("Configuration.properties",sep = "=", blank.lines.skip = TRUE,header = FALSE,stringsAsFactors = FALSE  )
colnames(properties) <- c("key", "value")

config.s3$link <- gsub("$","PH_", config.s3$link)
#config.s3$link

for(i in 1:nrow(properties)){
  if(startsWith(properties[i,1],"$")){
    print(properties[i,1])
    for (j in 1: nrow(config.s3)) {
      config.s3[j]$link = gsub(paste("PH_",substring(trimws(properties[i,1]),2),sep = "")
                           ,trimws(properties[i,2]),config.s3[j]$link,ignore.case = TRUE)
    }
  }

}

result <- config.s3[, list(bc,TestCaseID,link),]

auth <- function(link,user.name="*****", password="******"){
  res <- GET(link,add_headers("accept"="text/json"))
  res <- POST('https://lgloz050.lss.emc.com:58443/APG/j_security_check'
              ,set_cookies=res$cookies
              ,body = "j_username=*****&j_password=******"
              ,add_headers("Content-Type" ="application/x-www-form-linkencoded" ))
  return(res)
}


fetch <- function(link,save.location,cookies){
  fetch.success = TRUE
  res <- GET(link
             ,add_headers("Authorization"="Basic **************")
             ,set_cookies=cookies)
  tryCatch({repot_data <- fread(content(res,"text"),header = TRUE);
  fwrite(data.frame(repot_data),save.location,row.names = FALSE);
  flog.info(green("'\u2713' - Fetch Completed successfully ..."))
  flog.info(paste("link : ",link))},
  error = function(e){fetch.success= FALSE; flog.error(paste("\u2715 - Not able to fetch data,file not created "))})

  return(fetch.success)
}

config.s3$save.location = sub("TruthData","testData",config.s3$truthReport,ignore.case = T)
response = auth(config.s3[1]$link)

# Function Call - fetch all the report data
result[,fetch:=FALSE]
result[,fetch.time:=0]
pb <- winProgressBar(title="Fetching Reports... ", label="0%", min=0, max=100, initial=0,width = 500)
for (i in 1:nrow(config.s3)) {
  tic()
  getWinProgressBar(pb)
  setWinProgressBar(pb, i*(100/nrow(config.s3)), label =paste(round(i*(100/nrow(config.s3)))," % \n",config.s3[i]$testReport))
  flog.info(paste("report",i,"started",config.s3[i]$link))
  fetch.success = fetch(config.s3[i]$link,config.s3[i]$save.location,response$cookies)
  t <- toc()
  t$toc
  result[i]$fetch <- fetch.success
  result[i]$fetch.time <- t$toc / 10000
}
close(pb)

result[,-c("link"),with=FALSE]

this is the code to fetch the CSV file but the file has HTML content of login page. please tell me where I am doing mistak and what i have to correct and modify to get the correct data.

suggest some procedure. Thanks in advance.


Solution

  • I got the solution which is as follows:

    library(httr) 
    library(rvest)
    url <- "https://lgloz050.lss.emc.com:58443/APG/"
    dn_url <- "https://lgloz050.lss.emc.com:58443/APG/lookup/Report%20Library/Amazon%20S3/Inventory/Accounts/report.csv"
    session <-  html_session(url)
    form <- html_form(session)[[1]]
    fl_fm <- set_values(form,
                            j_username = "*****",
                            j_password = "********")
    main_page <- submit_form(session, fl_fm) 
    download <- jump_to(main_page,dn_url)
    writeBin(download$response$content, basename(dn_url))
    

    on the execution of this code it will successfully log in and download the report and the downloaded report has the same content which is required. I do this for one file next I am trying to download the multiple files in one execution.

    Thanks to you all for your support. let me know if there any other solution possible or any kind of modification is required in the above code.