I'm doing web scraping of a web page called: CVE Trends
import bs4, requests,webbrowser
LINK = "https://cvetrends.com/"
PRE_LINK = "https://nvd.nist.gov/"
response = requests.get(LINK)
response.raise_for_status()
soup=bs4.BeautifulSoup(response.text,'html.parser')
div_tweets=soup.find('div',class_='tweet_text')
a_tweets=div_tweets.find_all('a')
link_tweets =[]
for a_tweet in a_tweets:
link_tweet= str(a_tweet.get('href'))
if PRE_LINK in link_tweet:
link_tweets.append(link_tweet)
from pprint import pprint
pprint(link_tweets)
This is the code that I've written so far. I've tried in many ways but it gives always the same error:
'NoneType' object has no attribute 'find_all'
Can someone help me please? I really need this. Thanks in advance for any answer.
This is due to not getting response you exactly want.
This website have java-script loaded content,so you will not get data in request.
instead of scraping website you will get data from https://cvetrends.com/api/cves/24hrs
here is some solution:
import requests
import json
from urlextract import URLExtract
LINK = "https://cvetrends.com/api/cves/24hrs"
PRE_LINK = "https://nvd.nist.gov/"
link_tweets = []
# library for url extraction
extractor = URLExtract()
# ectract response from LINK (json Response)
html = requests.get(LINK).text
# convert string to json object
twitt_json = json.loads(html)
twitt_datas = twitt_json.get('data')
for twitt_data in twitt_datas:
# extract tweets
twitts = twitt_data.get('tweets')
for twitt in twitts:
# extract tweet texts and validate condition
twitt_text = twitt.get('tweet_text')
if PRE_LINK in twitt_text:
# find urls from text
urls_list = extractor.find_urls(twitt_text)
for url in urls_list:
if PRE_LINK in url:
link_tweets.append(twitt_text)
print(link_tweets)