Search code examples
jsonpython-3.xpandasipfs

How to fill in missing column value?


# Import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time
import ast

start_time = time.time()
s = requests.Session()

#Get URL and extract content
page=1
traits = []
accessories, backgrounds, shoes = [], [], []

while page != 100:

    params = {
        ('arg', f"Qmer3VzaeFhb7c5uiwuHJbRuVCaUu72DcnSoUKb1EvnB2x/{page}"),
    }

    content = s.get('https://ipfs.infura.io:5001/api/v0/cat', params=params, auth=('', ''))
    soup = BeautifulSoup(content.text, 'html.parser')
    page = page + 1
    
    traits = ast.literal_eval(soup.text)['attributes']

    df = pd.DataFrame(traits)
    df1 = df[df['trait_type']=='ACCESSORIES']

    accessories.append(df1['value'].values[0])

When I run the above code I get the following error:

IndexError: index 0 is out of bounds for axis 0 with size 0

This happens because not every item has an "ACCESSORIES" trait data point. So how would I go about adding/filling in an ACCESSORIES trait for those items that don't have one with an empty, nan, or 0 value?


Solution

  • Following code solves this issue:

    # Import libraries
    from bs4 import BeautifulSoup
    import requests
    import pandas as pd
    import time
    import ast
    
    start_time = time.time()
    s = requests.Session()
    
    #Get URL and extract content
    page=1
    traits = []
    accessories, backgrounds, shoes = [], [], []
    
    while page != 100:
    
        params = {
            ('arg', f"Qmer3VzaeFhb7c5uiwuHJbRuVCaUu72DcnSoUKb1EvnB2x/{page}"),
        }
    
        content = s.get('https://ipfs.infura.io:5001/api/v0/cat', params=params, auth=('', ''))
        soup = BeautifulSoup(content.text, 'html.parser')
        page = page + 1
        
        traits = ast.literal_eval(soup.text)['attributes']
    
        df = pd.DataFrame(traits)
        df1 = df[df['trait_type']=='ACCESSORIES']
    
        try:
            accessories.append(df1['value'].values[0])
        except:
            'NONE'