python authentication web-scraping web-crawler urllib

Is there any way for make a log in website by python

I want to make a Login, and then crawling this site.

I made a login by python so that I have to get salaries for every class(level of employee).

Login url = https://www.jobplanet.co.kr/users/sign_in every class salary url = https://www.jobplanet.co.kr/companies/20575/salaries/

from bs4 import BeautifulSoup
import urllib, http.cookiejar
cj = http.cookiejar.LWPCookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) 
urllib.request.install_opener(opener)

headers = {'User-Agent': 'Mozilla/5.0'}

params = urllib.parse.urlencode({"mode":"login", "user_email":"*******", "user_password":"******"})
params = params.encode('utf-8')
req = urllib.request.Request("https://www.jobplanet.co.kr/users/sign_in", headers=headers)
rej = urllib.request.Request("https://www.jobplanet.co.kr/companies/20575/salaries/", headers=headers)
res = opener.open(rej)

html = res.read()

Solution

You have to find out the urlLogin, aurlAuth and urlBd (urlbody) from which you want to pull the source code. You can use Chrome or Firefox devtools (F12 key on windows).

You have urlLogin but aurlAuth could be different, use browser's devtool and do actual login, when you hit Login button you will see requests details in network tab. (Don't forget to click Network checkbox)

import requests

urlLogin = 'https://example.com/jsp/login.jsp'
urlAuth = 'https://example.com/CheckLoginServlet'
urlBd = 'https://example.com/jsp/batchdownload.jsp'
payload = {
    "username": "username",
    "password": "password"
}

# Session will be closed at the end of with block
with requests.Session() as s:
    s.get(urlLogin)
    headers = s.cookies.get_dict()
    print(f"Session cookies {headers}")
    # Use headers if you want to append your own headers to default
    r1 = s.post(urlAuth, data=payload, headers=headers)
    # Here header is optional, code could be 
    # r1 = s.post(urlAuth, data=payload) 
    print(f'MainFrame text:::: {r1.status_code}')  #200

    r2 = s.post(urlBd, data=payload)
    print(f'MainFrame text:::: {r2.status_code}')  #200
    print(f'MainFrame text:::: {r2.text}')  #page source

    # 3. Again cookies will be used through session to access batch download page
    r2 = s.post(config['access-url'])
    print(f'Batch Download status:::: {r2.status_code}')  #200
    source_code = r2.text
    # print(f'Batch Download source:::: {source_code}')