I am to apply consecutive filtering and logical indexing to the pandas df to display the number of plays each day for the week for two cities as you can see below.
# importing pandas
import pandas as pd
# reading the file and storing it to df
df = pd.read_csv('/datasets/music_project_en.csv')
# renaming columns
df = df.rename(columns={
' userID': 'user_id',
'Track': 'track',
' City ': 'city',
'Day': 'day'
})
# looping over column names and replacing missing values with 'unknown'
columns_to_replace = ['track', 'artist', 'genre']
for column in columns_to_replace:
df[column] = df[column].fillna('unknown')
# removing obvious duplicates
df = df.drop_duplicates().reset_index(drop=True)
# function for replacing implicit duplicates
def replace_wrong_genres(wrong_genres, correct_genre):
for wrong_genre in wrong_genres:
df['genre'] = df['genre'].replace(wrong_genre, correct_genre)
# removing implicit duplicates
replace_wrong_genres(['hip', 'hop', 'hip-hop'], 'hiphop')
# <creating the function number_tracks()>
print(number_tracks(Monday, Springfield))
# We'll declare a function with two parameters: day=, city=.
# Let the track_list variable store the df rows where
# the value in the 'day' column is equal to the day= parameter and, at the same time,
# the value in the 'city' column is equal to the city= parameter (apply consecutive filtering
# with logical indexing).
# Let the track_list_count variable store the number of 'user_id' column values in track_list
# (found with the count() method).
# Let the function return a number: the value of track_list_count.
# The function counts tracked played for a certain city and day.
# It first retrieves the rows with the intended day from the table,
# then filters out the rows with the intended city from the result,
# then finds the number of 'user_id' values in the filtered table,
# then returns that number.
# To test and see what it returns, wrap test function calls in print(),
# but don't leave any test behind, otherwise your result will not validate correctly.
# 1. the number of songs played in Springfield on Monday
print("1: " + str(your code here))
# 2. the number of songs played in Shelbyville on Monday
print("2: " + str(your code here))
# 3. the number of songs played in Springfield on Wednesday
print("3: " + str(your code here))
# 4. the number of songs played in Shelbyville on Wednesday
print("4: " + str(your code here))
# 5. the number of songs played in Springfield on Friday
print("5: " + str(your code here))
# 6. the number of songs played in Shelbyville on Friday
print("6: " + str(your code here))
I am required to create a function and use for loop with if statements then display the work, I tried the below code under the comment that says # <creating the function number_tracks()>
def number_tracks(day, city):
track_list_count = 0
for track_list in df:
if day == track_list['day'] and city == track_list['city']
print('some')
print(number_tracks(Monday, Springfield))
This might help optimize your code.
#fill multiple columns at once
df[['track', 'artist', 'genre']] = df[['track', 'artist', 'genre']].fillna('unknown')
df = df.drop_duplicates().reset_index(drop=True)
#replace multiple values at once
df['genre'] = df['genre'].replace(['hip', 'hop', 'hip-hop'], 'hiphop')
#function
def number_tracks(day, city):
track_list = df[(df['day']==day) & (df['city']==city)]
track_list_count = track_list['user_id'].count()
return track_list_count
#first question
print("1: " + str(number_tracks('Springfield', 'Monday'))