I am very new to coding, I need to solve one practical thing just get info from site and write it to excel(i hope i can manage with by guides), but the main issue i just cant get into the site(the website is free) Can you look over my code? When i run it, i get
[] Process finished with exit code 0
import requests
from bs4 import BeautifulSoup
import pytest
import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
URL ='http://way2drug.com/passonline/'
HEADERS= {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'}
class Test1():
def setup_method(self, method):
self.driver = webdriver.Chrome()
self.vars = {}
def teardown_method(self, method):
self.driver.quit()
def test_1(self):
# Test name: 1
# Step # | name | target | value | comment
# 1 | open | /passonline/ | |
self.driver.get("http://way2drug.com/passonline/")
# 2 | setWindowSize | 1920x1030 | |
self.driver.set_window_size(1920, 1030)
# 3 | click | css=#registration img | |
self.driver.find_element(By.CSS_SELECTOR, "#registration img").click()
# 4 | click | name=user_login | |
self.driver.find_element(By.NAME, "user_login").click()
# 5 | type | name=user_login | |
self.driver.find_element(By.NAME, "user_login").send_keys("MY USER")
# 6 | click | id=page1 | |
self.driver.find_element(By.ID, "page1").click()
# 7 | type | name=user_password | |
self.driver.find_element(By.NAME, "user_password").send_keys("MY PASS")
# 8 | click | id=register | |
self.driver.find_element(By.ID, "register").click()
# 9 | click | id=myHeader1 | |
self.driver.find_element(By.ID, "myHeader1").click()
# 10 | click | id=smiles | |
self.driver.find_element(By.ID, "smiles").click()
self.driver.find_element(By.ID, "smi").click()
self.driver.find_element(By.ID, "smi").send_keys("CC1(C)C(O)CC[C@@]2(C)C1CC[C@]3(C)C2CCC4[C@@]3(C)CC[C@]5(C(O)=O)C4[C@H](C)C(C)=CC5")
self.driver.find_element(By.CSS_SELECTOR, "#myContent4 input:nth-child(4)").click()
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('a', class_='Antineoplastic')
print(items)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('ALL YOUR BASE ARE BELONG TO US')
parse()
Please note - Never ever provide credentials
You mentioned you have to perform a login and selenium is a good choice, but what you are doing is to call parse()
that only performce a call via requests
. So if you take a look into your soup
you wont find what you are looking for.
Perform your selenium actions and walk to the website you want to scrape. In next step push your driver.page_source
into BeautifulSoup
and find your elements:
soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('a', class_='Antineoplastic')
print(items)
If your selection is right, you will get your result.
Concerning your comments a clue where you can end up, for debugging steps between, you should ask as new question with focused examples:
import requests
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver = webdriver.Chrome()
driver.get("http://way2drug.com/passonline/")
driver.set_window_size(1920, 1030)
driver.find_element(By.CSS_SELECTOR, "#registration img").click()
driver.find_element(By.NAME, "user_login").click()
driver.find_element(By.NAME, "user_login").send_keys("MY USER")
driver.find_element(By.ID, "page1").click()
driver.find_element(By.NAME, "user_password").send_keys("MY PASS")
driver.find_element(By.ID, "register").click()
driver.find_element(By.ID, "myHeader1").click()
driver.find_element(By.ID, "smiles").click()
driver.find_element(By.ID, "smi").click()
driver.find_element(By.ID, "smi").send_keys("CC1(C)C(O)CC[C@@]2(C)C1CC[C@]3(C)C2CCC4[C@@]3(C)CC[C@]5(C(O)=O)C4[C@H](C)C(C)=CC5")
driver.find_element(By.CSS_SELECTOR, "#myContent4 input:nth-child(4)").click()
soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('a', class_='Antineoplastic')
print(items)
driver.quit()