Search code examples
pythonloopsscale

Python Scaling loops


For each letter in the alphabet. The code should go to website.com/a and grab a table. Then it should check for a next button grab the link and makesoup and grab the next table and repeat until there is no valid next link. Then move to website.com/b(next letter in alphabet) and repeat. But I can only get as far as 2 pages for each letter. the first for loop grabs page 1 and the second grabs page 2 for each letter. I know I could write a loop for as many pages as needed but that is not scalable. How can I fix this?

from nfl_fun import make_soup
import urllib.request
import os
from string import ascii_lowercase
import requests

letter = ascii_lowercase
link = "https://www.nfl.com"

for letter in ascii_lowercase:
    soup = make_soup(f"https://www.nfl.com/players/active/{letter}")
    for tbody in soup.findAll("tbody"):
        for tr in tbody.findAll("a"):
            if tr.has_attr("href"):
                print(tr.attrs["href"])

for letter in ascii_lowercase:
    soup = make_soup(f"https://www.nfl.com/players/active/{letter}")
    for page in soup.footer.findAll("a", {"nfl-o-table-pagination__next"}):
        pagelink = ""
        footer = ""
        footer = page.attrs["href"]
        pagelink = f"{link}{footer}"        
        print(footer)
        getpage = requests.get(pagelink)                            
        if getpage.status_code == 200:
            next_soup = make_soup(pagelink)
            for next_page in next_soup.footer.findAll("a", {"nfl-o-table-pagination__next"}):
                print(getpage)
                for tbody in next_soup.findAll("tbody"):
                    for tr in tbody.findAll("a"):
                        if tr.has_attr("href"):
                            print(tr.attrs["href"])
                            soup = next_soup


Thank You again,


Solution

  • There is an element in there that says when the "Next" button is inactive. So that'll tell you you are on the last page. So what you can do is a while loop, and just keep going to the next page, until it reaches the last page (Ie "Next" is inactive) and then tell it to stop the loop and go to the next letter:

    from bs4 import BeautifulSoup
    from string import ascii_lowercase
    import requests
    import pandas as pd
    import re
    
    letters = ascii_lowercase
    link = "https://www.nfl.com"
    
    results = pd.DataFrame()
    for letter in letters:
        continueToNextPage = True
        after = ''
        page=1
        while continueToNextPage == True:
            # Get the Table
            url = f"https://www.nfl.com/players/active/{letter}?query={letter}&after={after}"
            response = requests.get(url, 'html.parser')
            soup = BeautifulSoup(response.text, 'html.parser')
    
            temp_df = pd.read_html(response.text)[0]    
            results = results.append(temp_df, sort=False).reset_index(drop=True) 
    
            print ("{letter}: Page: {page}".format(letter=letter.upper(), page=page))
    
            # Check if next page is inactive
            buttons = soup.find('div', {'class':'nfl-o-table-pagination__buttons'})
            regex = re.compile('.*pagination__next.*is-inactive.*')
            if buttons.find('span', {'class':regex}):
                continueToNextPage = False
            else:
                after = buttons.find('a', {'title':'Next'})['href'].split('after=')[-1]
                page+=1
    

    Output:

    print (results)
                      Player          Current Team Position Status
    0         Chidobe Awuzie        Dallas Cowboys       CB    ACT
    1             Josh Avery      Seattle Seahawks       DT    ACT
    2           Genard Avery   Philadelphia Eagles       DE    ACT
    3        Anthony Averett      Baltimore Ravens       CB    ACT
    4              Lee Autry         Chicago Bears       DT    ACT
    5           Denico Autry    Indianapolis Colts       DT    ACT
    6           Tavon Austin        Dallas Cowboys       WR    UFA
    7        Blessuan Austin         New York Jets       CB    ACT
    8         Antony Auclair  Tampa Bay Buccaneers       TE    ACT
    9      Jeremiah Attaochu        Denver Broncos       LB    ACT
    10       Hunter Atkinson       Atlanta Falcons       OT    ACT
    11           John Atkins         Detroit Lions       DE    ACT
    12           Geno Atkins    Cincinnati Bengals       DT    ACT
    13        Marcell Ateman     Las Vegas Raiders       WR    ACT
    14          George Aston       New York Giants       RB    ACT
    15    Dravon Askew-Henry       New York Giants       DB    ACT
    16          Devin Asiasi  New England Patriots       TE    ACT
    17    George Asafo-Adjei       New York Giants       OT    ACT
    18             Ade Aruna     Las Vegas Raiders       DE    ACT
    19       Grayland Arnold   Philadelphia Eagles      SAF    ACT
    20            Dan Arnold     Arizona Cardinals       TE    ACT
    21         Damon Arnette     Las Vegas Raiders       CB    UDF
    22     Ray-Ray Armstrong        Dallas Cowboys       LB    UFA
    23     Ka'John Armstrong        Denver Broncos       OT    ACT
    24     Dorance Armstrong        Dallas Cowboys       DE    ACT
    25     Cornell Armstrong        Houston Texans       CB    ACT
    26       Terron Armstead    New Orleans Saints       OT    ACT
    27      Ryquell Armstead  Jacksonville Jaguars       RB    ACT
    28         Arik Armstead   San Francisco 49ers       DE    ACT
    29            Alex Armah     Carolina Panthers       FB    ACT
                     ...                   ...      ...    ...
    3180       Clive Walford        Miami Dolphins       TE    UFA
    3181        Cameron Wake      Tennessee Titans       DE    UFA
    3182     Corliss Waitman   Pittsburgh Steelers        P    ACT
    3183         Rick Wagner     Green Bay Packers       OT    ACT
    3184        Bobby Wagner      Seattle Seahawks      MLB    ACT
    3185        Ahmad Wagner         Chicago Bears       WR    ACT
    3186        Colby Wadman        Denver Broncos        P    ACT
    3187      Christian Wade         Buffalo Bills       RB    ACT
    3188     LaAdrian Waddle         Buffalo Bills       OT    UFA
    3189      Oshane Ximines       New York Giants       LB    ACT
    3190        Trevon Young      Cleveland Browns       DE    ACT
    3191           Sam Young     Las Vegas Raiders       OT    ACT
    3192         Kenny Young      Los Angeles Rams      ILB    ACT
    3193         Chase Young   Washington Redskins       DE    UDF
    3194        Bryson Young       Atlanta Falcons       DE    ACT
    3195        Isaac Yiadom        Denver Broncos       CB    ACT
    3196         T.J. Yeldon         Buffalo Bills       RB    ACT
    3197         Deon Yelder    Kansas City Chiefs       TE    ACT
    3198         Rock Ya-Sin    Indianapolis Colts       CB    ACT
    3199     Eddie Yarbrough     Minnesota Vikings       DE    ACT
    3200       Marshal Yanda      Baltimore Ravens       OG    ACT
    3201         Tavon Young      Baltimore Ravens       CB    ACT
    3202     Brandon Zylstra     Carolina Panthers       WR    ACT
    3203       Jabari Zuniga         New York Jets       DE    UDF
    3204       Greg Zuerlein        Dallas Cowboys        K    ACT
    3205        Isaiah Zuber  New England Patriots       WR    ACT
    3206       Justin Zimmer      Cleveland Browns       DT    ACT
    3207      Anthony Zettel     Minnesota Vikings       DE    ACT
    3208       Kevin Zeitler       New York Giants       OG    ACT
    3209   Olamide Zaccheaus       Atlanta Falcons       WR    ACT
    
    [3210 rows x 4 columns]