enter image description hereHi I am trying to do a web scraping using python, I want to extract the job title, company name, salary and skills required. I managed to get all the information I wanted except one thing which is skills required. Nir Elbaz helped me to partially solve this problem but for some reason I only get the first skill required for each job description. Here is my code as well as a screenshot of the results I got.
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv
from itertools import zip_longest
job_title = []
company_name = []
location = []
salary = []
links = []
skills = []
page_number = 0
while True:
result = requests.get(f"https://www.reed.co.uk/jobs/data-analyst-jobs?pageno={page_number}")
src = result.content
soup = BeautifulSoup(src, 'lxml')
page_limit = soup.find('div', {"class":"page-counter"})
Job_titles = soup.find_all('h3', {"class":"title"})
Company_names = soup.find_all('a', {'class':"gtmJobListingPostedBy"})
Locations = soup.find_all('li', {'class':"location"})
Salary = soup.find_all('li',{"class": "salary"})
#Job_skills = soup.find('strong').text
#Job_skills = soup.find_all('div',{"class":"description"})
for i in range(len(Job_titles)):
job_title.append(Job_titles[i].text)
links.append(Job_titles[i].find('a').attrs['href'])
company_name.append(Company_names[i].text)
location.append(Locations[i].text)
salary.append(Salary[i].text)
page_number += 1
if(page_number >= 2):
print("pages ended here")
break
for link in links:
url = "https://www.reed.co.uk/" + link
result = requests.get(url)
src = result.content
soup = BeautifulSoup(src, 'lxml')
requirements = soup.find('span', {'itemprop':"description"}).find('ul')
#print(requirements)
skills_text = ""
if not(requirements==None):
for li in requirements.text.split(';'):
#print(str(li))
skills_text += str(li)
#skills_text = skills_text[0:3]
skills.append(skills_text.strip())
file_list = [job_title, company_name, location, salary, skills]
exported = zip_longest(*file_list)
Try to replace this section in your code with this:
if not(requirements==None):
for li in requirements.find('li'):
print(str(li))
skills_text += str(li) + ', '
skills_text = skills_text[:-2]
skills.append(skills_text)