Search code examples
pythonweb-scrapingrich

rich.table Returning Incorrect Number of Results


I'm scraping movie information by year. When I try print statement it prints all 100 movies, but when I use rich.table print I get only first movie.

import requests
from bs4 import BeautifulSoup
from rich.table import Table
from rich.console import Console

table = Table()

url = 'https://www.rottentomatoes.com/top/bestofrt/?year='

year = input('Top 100 Movies for Which Year? ')
response = requests.get(url + year)
html = response.text

soup = BeautifulSoup(html, 'lxml')
containers = soup.find_all('table', class_='table')

for container in containers:
    for row in container.find_all('tr')[1:]:
        movie_rank = row.find('td', class_='bold')
        movie_rank = movie_rank.text

        movie_name = row.find('a', class_='unstyled articleLink')
        movie_name = movie_name.text.strip()
        movie_name = movie_name.strip('(' + year + ')')

        movie_rating = row.find('span', class_='tMeterScore')
        movie_rating = movie_rating.text

        # print(f'{movie_rank} {movie_name.strip()} - rating:{movie_rating}')
        table.add_column('Rank')
        table.add_column('Movie')
        table.add_column('Rating')
        # problem is here     
        table.add_row(movie_rank, movie_name, movie_rating)
       
        console = Console()
        console.print(table)
        break


Solution

  • You terminate your loop immediately after one iteration of your loop, you should print the table once after you construct it. Also, you should add the columns once (not per iteration). Like,

    import requests
    from bs4 import BeautifulSoup
    from rich.table import Table
    from rich.console import Console
    
    table = Table()
    table.add_column('Rank')
    table.add_column('Movie')
    table.add_column('Rating')
    
    url = 'https://www.rottentomatoes.com/top/bestofrt/?year='
    
    year = input('Top 100 Movies for Which Year? ')
    response = requests.get(url + year)
    html = response.text
    
    soup = BeautifulSoup(html, 'lxml')
    containers = soup.find_all('table', class_='table')
    
    for container in containers:
        for row in container.find_all('tr')[1:]:
            movie_rank = row.find('td', class_='bold')
            movie_rank = movie_rank.text
    
            movie_name = row.find('a', class_='unstyled articleLink')
            movie_name = movie_name.text.strip()
            movie_name = movie_name.strip('(' + year + ')')
    
            movie_rating = row.find('span', class_='tMeterScore')
            movie_rating = movie_rating.text
    
            table.add_row(movie_rank, movie_name, movie_rating)
    
    console = Console()
    console.print(table)