I started to code not too long ago and I jumped into the Titanic exercise from Kaggle. I was trying to change the Nan value of the Age of some of the passengers into the Age I think it is fit for their Prefixes (Mr., Ms., Master...).
Tried to do a for loop but it seems not to work as it gives the same value to everyone with Nan value in Age regardless of their Prefix. What am I doing wrong and how could I make it right?
import math
for i in range(len(database)):
if math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Capt.' or database['Prefix'][i] == ' Col.':
database['Age'] = 65.0
elif math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Sir.' or database['Prefix'][i] == ' Major.' or database['Prefix'][i] == ' Rev.' or database['Prefix'][i] == ' Lady.' or database['Prefix'][i] == ' Dr.':
database['Age'] = 47.5
elif math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Don.' or database['Prefix'][i] == ' Jonkheer.' or database['Prefix'][i] == ' Mrs.' or database['Prefix'][i] == ' the Countess.':
database['Age'] = 36.5
elif math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Mr.' or database['Prefix'][i] == ' Ms.':
database['Age'] = 29.0
elif math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Mme.' or database['Prefix'][i] == ' Mlle.':
database['Age'] = 24.0
elif math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Miss.':
database['Age'] = 21.0
elif math.isnan(database['Age'][i]) == True and database['Prefix'][i] == ' Master.':
database['Age'] = 3.5
This is BEFORE the for loop: Titanic1.py And this is AFTER the for loop: Titanic2.py
Thanks a lot!!
Several things can be fixed regarding your code.
First, we will put the common element of all the if/elifs in a single if:
import math
for i in range(len(database)):
if math.isnan(database['Age'][i]) == True:
if database['Prefix'][i] == ' Capt.' or database['Prefix'][i] == ' Col.':
database['Age'] = 65.0
elif database['Prefix'][i] == ' Sir.' or database['Prefix'][i] == ' Major.' or database['Prefix'][i] == ' Rev.' or database['Prefix'][i] == ' Lady.' or database['Prefix'][i] == ' Dr.':
database['Age'] = 47.5
elif database['Prefix'][i] == ' Don.' or database['Prefix'][i] == ' Jonkheer.' or database['Prefix'][i] == ' Mrs.' or database['Prefix'][i] == ' the Countess.':
database['Age'] = 36.5
elif database['Prefix'][i] == ' Mr.' or database['Prefix'][i] == ' Ms.':
database['Age'] = 29.0
elif database['Prefix'][i] == ' Mme.' or database['Prefix'][i] == ' Mlle.':
database['Age'] = 24.0
elif database['Prefix'][i] == ' Miss.':
database['Age'] = 21.0
elif database['Prefix'][i] == ' Master.':
database['Age'] = 3.5
Then we will get rid of all the database["Prefix"][i]
checks by saving that into a variable, and use the in
operator to avoid many prefix == "something" or prefix == "something else"
.
for i in range(len(database)):
if math.isnan(database['Age'][i]) == True:
prefix = database['Prefix'][i]
if prefix in (' Capt.', ' Col.'):
database['Age'] = 65.0
elif prefix in (' Sir.', ' Major.', ' Rev.', ' Lady.', ' Dr.'):
database['Age'] = 47.5
elif prefix in (' Don.', ' Jonkheer.', ' Mrs.', ' the Countess.'):
database['Age'] = 36.5
elif prefix (' Mr.', ' Ms.'):
database['Age'] = 29.0
elif prefix (' Mme.', ' Mlle.'):
database['Age'] = 24.0
elif prefix == ' Miss.':
database['Age'] = 21.0
elif prefix == ' Master.':
database['Age'] = 3.5
Then, notice that you where modifying database["Age"]
instead of database["Age"][i]
so we'll fix that too.
for i in range(len(database)):
if math.isnan(database['Age'][i]) == True:
prefix = database['Prefix'][i]
if prefix in (' Capt.', ' Col.'): age = 65.0
elif prefix in (' Sir.', ' Major.', ' Rev.', ' Lady.', ' Dr.'): age = 47.5
elif prefix in (' Don.', ' Jonkheer.', ' Mrs.', ' the Countess.'): age = 36.5
elif prefix (' Mr.', ' Ms.'): age = 29.0
elif prefix (' Mme.', ' Mlle.'): age = 24.0
elif prefix == ' Miss.': age = 21.0
elif prefix == ' Master.': age = 3.5
database['Age'][i] = age
Finally, if you wanted, you could write yourself a dictionary that matches prefixes with ages and use that to avoid the many if and elifs.
# Define how an age is matched with some prefixes.
ages_and_prefixes = ((65.0, ("Capt", "Col")),
(47.5, ("Sir", "Major", "Rev", "Lady", "Dr")),
(36.5, ("Don", "Jonkheer", "Mrs", "the Countess")),
(29.0, ("Mr", "Ms")),
(24.0, ("Mme", "Mlle")),
(21.0, ("Miss",)),
(3.5, ("Master",))
)
prefix_to_age_dict = {}
for data in ages_and_prefixes:
age = data[0]
prefixes = data[1]
for prefix in prefixes:
prefix_to_age_dict[prefix] = age
# The replacement step in the database is now much simpler.
for i in range(len(database)):
if math.isnan(database['Age'][i]):
prefix = " " + database['Prefix'][i] + "."
age = prefix_to_age_dict[prefix]
database['Age'][i] = age