python beautifulsoup python-requests-html

Appending new column and rows from web scrape loop

What can I add to the end of my code to append a list of items to a dataframe as column 1 and the resulting webscrape data from a for loop to column 2 and match each of column 1's list items to the webscrape data? I'm trying to get -

col1    col2
url1    A Details
url2    B Details

The code I have is,

urls = ['url1','url2']

from bs4 import BeautifulSoup
import requests
from time import sleep
from random import randint

for link in urls:
  sleep(randint(11,23))
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',}
  req = requests.get(link, headers = headers)
  soup = BeautifulSoup(req.content, features="lxml")
  for script in soup(["script", "style"]): 
    script.extract()    
    text = soup.get_text() 
    lines = (line.strip() for line in text.splitlines())
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    text = '\n'.join(chunk for chunk in chunks if chunk)
    print(text)

Solution

The easiest way to do this is to append your data to a list inside your loop and then create a dataframe from your two lists.

from bs4 import BeautifulSoup
import requests
from time import sleep
from random import randint

#initializes empty lists to append data
url_list = []
details_list = []


for link in urls:
  sleep(randint(11,23))
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',}
  req = requests.get(link, headers = headers)
  soup = BeautifulSoup(req.content, features="lxml")
  for script in soup(["script", "style"]): 
    script.extract()    
    text = soup.get_text() 
    lines = (line.strip() for line in text.splitlines())
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    text = '\n'.join(chunk for chunk in chunks if chunk)

    url_list.append(link) # appends links 
    
    if not text: 
        # if text is empty
        details_list.append('none') # appends text
    else:
        details_list.append(text) # appends text 


results_df = pd.DataFrame() # creates empty dataframe
results_df['col1'] = url_list # adds column to dataframe with URLs
results_df['col2'] = details_list # adds column to dataframe with details