Search code examples
rloopsnormalizationsubset

Normalizing Data by Subsets


I'm trying to normalize data (using a custom function from SO), in my case NFL passing stats by season. My original data frame has seasons 2004 - 2013 together, but since I need to normalize it by season the only way I can figure is to break it out by season, normalize it, then combine it back.

While my code works it seems very inefficient so any help (for this problem and anything else you see) would be appreciated.

Code:

# custom functions
normalize <- function(x) { 
  return((x - min(x)) / (max(x) - min(x)))
}

trim <- function (x) gsub("^\\s+|\\s+$", "", x)

first_season <- 2004
last_season <- 2013
num_seasons <- as.numeric(last_season - first_season + 1)

# seasons <- seq(2004, 2013, by=1)
seasons <- seq(first_season, last_season, by=1)
rm(first_season, last_season)

library(XML)
library(plyr)
library(stringr)

# Passing 

for (i in 1:num_seasons) {
  url <- paste("http://www.pro-football-reference.com/years/", seasons[i],"/passing.htm", sep = "")
  df <- readHTMLTable(url,which=1)
  df$season = seasons[i]
  df <- df[!names(df) %in% c("QBrec") ] 
  if(df$season >= 2008) df <- df[!names(df) %in% c("QBR") ] # Removes QBR 2008+
  assign (paste("passing_", seasons[i], sep = "") , df)
  rm(df)
  print(seasons[i])
}

names(passing_2005) <- names(passing_2004)
names(passing_2006) <- names(passing_2004)
names(passing_2007) <- names(passing_2004)
names(passing_2008) <- names(passing_2004)
names(passing_2009) <- names(passing_2004)
names(passing_2010) <- names(passing_2004)
names(passing_2011) <- names(passing_2004)
names(passing_2012) <- names(passing_2004)
names(passing_2013) <- names(passing_2004)

# Combine all Passing seasons
passing <- rbind(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
                 passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

# Remove all individual Passing seasons
rm(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
   passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

names(passing) <- c("rank_pfr", "nameinfo", "team", "age", "games", "games_started",
                    #"qb_record", 
                    "completions", "attempts", "comp_pct", "yards_passing",
                    "td_passing", "td_pct", "interceptions", "int_pct", "long_passing",
                    "yards_pass_att", "yards_pass_att_avg", "yards_pass_comp","yards_pass_game",
                    "pass_rate", "sacks", "sacks_pass", "yards_net_pass_att", "yards_net_pass_att_avg",
                    "sacks_pct", "comebacks", "game_win_drives", "season")

passing <- passing[which(passing$rank_pfr!='Rk'), ]

# Convert from Factor to Numeric
passing$rank_pfr <- as.numeric(as.character(passing$rank_pfr))
passing$age <- as.numeric(as.character(passing$age))
passing$games <- as.numeric(as.character(passing$games))
passing$games_started <- as.numeric(as.character(passing$games_started))
passing$completions <- as.numeric(as.character(passing$completions))
passing$attempts <- as.numeric(as.character(passing$attempts))
passing$comp_pct <- as.numeric(as.character(passing$comp_pct))
passing$yards_passing <- as.numeric(as.character(passing$yards_passing))
passing$td_passing <- as.numeric(as.character(passing$td_passing))
passing$td_pct <- as.numeric(as.character(passing$td_pct))
passing$interceptions <- as.numeric(as.character(passing$interceptions))
passing$int_pct <- as.numeric(as.character(passing$int_pct))
passing$long_passing <- as.numeric(as.character(passing$long_passing))
passing$yards_pass_att <- as.numeric(as.character(passing$yards_pass_att))
passing$yards_pass_att_avg <- as.numeric(as.character(passing$yards_pass_att_avg))
passing$yards_pass_comp <- as.numeric(as.character(passing$yards_pass_comp))
passing$yards_pass_game <- as.numeric(as.character(passing$yards_pass_game))
passing$pass_rate <- as.numeric(as.character(passing$pass_rate))
passing$sacks <- as.numeric(as.character(passing$sacks))
passing$sacks_pass <- as.numeric(as.character(passing$sacks_pass))
passing$yards_net_pass_att <- as.numeric(as.character(passing$yards_net_pass_att))
passing$yards_net_pass_att_avg <- as.numeric(as.character(passing$yards_net_pass_att_avg))
passing$sacks_pct <- as.numeric(as.character(passing$sacks_pct))
passing$comebacks <- as.numeric(as.character(passing$comebacks))
passing$game_win_drives <- as.numeric(as.character(passing$game_win_drives))
passing$season <- as.numeric(as.character(passing$season))

passing[is.na(passing)] <- 0

# This is where I break it out by season to normalize, but I'd rather 
# just keep it in "passing" as opposed to "passing_20XX"

for (i in 1:num_seasons) {
  assign (paste("passing_", seasons[i], sep = "") , passing[passing$season == seasons[i], ])
  print(seasons[i])
}

rm(passing)

# 
passing_2004$age <- normalize(passing_2004$age)
passing_2004$games <- normalize(passing_2004$games)
passing_2004$games_started <- normalize(passing_2004$games_started)
passing_2004$completions <- normalize(passing_2004$completions)
passing_2004$attempts <- normalize(passing_2004$attempts)
passing_2004$comp_pct <- normalize(passing_2004$comp_pct)
passing_2004$yards_passing <- normalize(passing_2004$yards_passing)
passing_2004$td_passing <- normalize(passing_2004$td_passing)
passing_2004$td_pct <- normalize(passing_2004$td_pct)
passing_2004$interceptions <- normalize(passing_2004$interceptions)
passing_2004$int_pct <- normalize(passing_2004$int_pct)
passing_2004$long_passing <- normalize(passing_2004$long_passing)
passing_2004$yards_pass_att <- normalize(passing_2004$yards_pass_att)
passing_2004$yards_pass_att_avg <- normalize(passing_2004$yards_pass_att_avg)
passing_2004$yards_pass_comp <- normalize(passing_2004$yards_pass_comp)
passing_2004$yards_pass_game <- normalize(passing_2004$yards_pass_game)
passing_2004$pass_rate <- normalize(passing_2004$pass_rate)
passing_2004$sacks <- normalize(passing_2004$sacks)
passing_2004$sacks_pass <- normalize(passing_2004$sacks_pass)
passing_2004$yards_net_pass_att <- normalize(passing_2004$yards_net_pass_att)
passing_2004$yards_net_pass_att_avg <- normalize(passing_2004$yards_net_pass_att_avg)
passing_2004$sacks_pct <- normalize(passing_2004$sacks_pct)
passing_2004$comebacks <- normalize(passing_2004$comebacks)
passing_2004$game_win_drives <- normalize(passing_2004$game_win_drives)

passing_2005$age <- normalize(passing_2005$age)
passing_2005$games <- normalize(passing_2005$games)
passing_2005$games_started <- normalize(passing_2005$games_started)
passing_2005$completions <- normalize(passing_2005$completions)
passing_2005$attempts <- normalize(passing_2005$attempts)
passing_2005$comp_pct <- normalize(passing_2005$comp_pct)
passing_2005$yards_passing <- normalize(passing_2005$yards_passing)
passing_2005$td_passing <- normalize(passing_2005$td_passing)
passing_2005$td_pct <- normalize(passing_2005$td_pct)
passing_2005$interceptions <- normalize(passing_2005$interceptions)
passing_2005$int_pct <- normalize(passing_2005$int_pct)
passing_2005$long_passing <- normalize(passing_2005$long_passing)
passing_2005$yards_pass_att <- normalize(passing_2005$yards_pass_att)
passing_2005$yards_pass_att_avg <- normalize(passing_2005$yards_pass_att_avg)
passing_2005$yards_pass_comp <- normalize(passing_2005$yards_pass_comp)
passing_2005$yards_pass_game <- normalize(passing_2005$yards_pass_game)
passing_2005$pass_rate <- normalize(passing_2005$pass_rate)
passing_2005$sacks <- normalize(passing_2005$sacks)
passing_2005$sacks_pass <- normalize(passing_2005$sacks_pass)
passing_2005$yards_net_pass_att <- normalize(passing_2005$yards_net_pass_att)
passing_2005$yards_net_pass_att_avg <- normalize(passing_2005$yards_net_pass_att_avg)
passing_2005$sacks_pct <- normalize(passing_2005$sacks_pct)
passing_2005$comebacks <- normalize(passing_2005$comebacks)
passing_2005$game_win_drives <- normalize(passing_2005$game_win_drives)

passing_2006$age <- normalize(passing_2006$age)
passing_2006$games <- normalize(passing_2006$games)
passing_2006$games_started <- normalize(passing_2006$games_started)
passing_2006$completions <- normalize(passing_2006$completions)
passing_2006$attempts <- normalize(passing_2006$attempts)
passing_2006$comp_pct <- normalize(passing_2006$comp_pct)
passing_2006$yards_passing <- normalize(passing_2006$yards_passing)
passing_2006$td_passing <- normalize(passing_2006$td_passing)
passing_2006$td_pct <- normalize(passing_2006$td_pct)
passing_2006$interceptions <- normalize(passing_2006$interceptions)
passing_2006$int_pct <- normalize(passing_2006$int_pct)
passing_2006$long_passing <- normalize(passing_2006$long_passing)
passing_2006$yards_pass_att <- normalize(passing_2006$yards_pass_att)
passing_2006$yards_pass_att_avg <- normalize(passing_2006$yards_pass_att_avg)
passing_2006$yards_pass_comp <- normalize(passing_2006$yards_pass_comp)
passing_2006$yards_pass_game <- normalize(passing_2006$yards_pass_game)
passing_2006$pass_rate <- normalize(passing_2006$pass_rate)
passing_2006$sacks <- normalize(passing_2006$sacks)
passing_2006$sacks_pass <- normalize(passing_2006$sacks_pass)
passing_2006$yards_net_pass_att <- normalize(passing_2006$yards_net_pass_att)
passing_2006$yards_net_pass_att_avg <- normalize(passing_2006$yards_net_pass_att_avg)
passing_2006$sacks_pct <- normalize(passing_2006$sacks_pct)
passing_2006$comebacks <- normalize(passing_2006$comebacks)
passing_2006$game_win_drives <- normalize(passing_2006$game_win_drives)

passing_2007$age <- normalize(passing_2007$age)
passing_2007$games <- normalize(passing_2007$games)
passing_2007$games_started <- normalize(passing_2007$games_started)
passing_2007$completions <- normalize(passing_2007$completions)
passing_2007$attempts <- normalize(passing_2007$attempts)
passing_2007$comp_pct <- normalize(passing_2007$comp_pct)
passing_2007$yards_passing <- normalize(passing_2007$yards_passing)
passing_2007$td_passing <- normalize(passing_2007$td_passing)
passing_2007$td_pct <- normalize(passing_2007$td_pct)
passing_2007$interceptions <- normalize(passing_2007$interceptions)
passing_2007$int_pct <- normalize(passing_2007$int_pct)
passing_2007$long_passing <- normalize(passing_2007$long_passing)
passing_2007$yards_pass_att <- normalize(passing_2007$yards_pass_att)
passing_2007$yards_pass_att_avg <- normalize(passing_2007$yards_pass_att_avg)
passing_2007$yards_pass_comp <- normalize(passing_2007$yards_pass_comp)
passing_2007$yards_pass_game <- normalize(passing_2007$yards_pass_game)
passing_2007$pass_rate <- normalize(passing_2007$pass_rate)
passing_2007$sacks <- normalize(passing_2007$sacks)
passing_2007$sacks_pass <- normalize(passing_2007$sacks_pass)
passing_2007$yards_net_pass_att <- normalize(passing_2007$yards_net_pass_att)
passing_2007$yards_net_pass_att_avg <- normalize(passing_2007$yards_net_pass_att_avg)
passing_2007$sacks_pct <- normalize(passing_2007$sacks_pct)
passing_2007$comebacks <- normalize(passing_2007$comebacks)
passing_2007$game_win_drives <- normalize(passing_2007$game_win_drives)

passing_2008$age <- normalize(passing_2008$age)
passing_2008$games <- normalize(passing_2008$games)
passing_2008$games_started <- normalize(passing_2008$games_started)
passing_2008$completions <- normalize(passing_2008$completions)
passing_2008$attempts <- normalize(passing_2008$attempts)
passing_2008$comp_pct <- normalize(passing_2008$comp_pct)
passing_2008$yards_passing <- normalize(passing_2008$yards_passing)
passing_2008$td_passing <- normalize(passing_2008$td_passing)
passing_2008$td_pct <- normalize(passing_2008$td_pct)
passing_2008$interceptions <- normalize(passing_2008$interceptions)
passing_2008$int_pct <- normalize(passing_2008$int_pct)
passing_2008$long_passing <- normalize(passing_2008$long_passing)
passing_2008$yards_pass_att <- normalize(passing_2008$yards_pass_att)
passing_2008$yards_pass_att_avg <- normalize(passing_2008$yards_pass_att_avg)
passing_2008$yards_pass_comp <- normalize(passing_2008$yards_pass_comp)
passing_2008$yards_pass_game <- normalize(passing_2008$yards_pass_game)
passing_2008$pass_rate <- normalize(passing_2008$pass_rate)
passing_2008$sacks <- normalize(passing_2008$sacks)
passing_2008$sacks_pass <- normalize(passing_2008$sacks_pass)
passing_2008$yards_net_pass_att <- normalize(passing_2008$yards_net_pass_att)
passing_2008$yards_net_pass_att_avg <- normalize(passing_2008$yards_net_pass_att_avg)
passing_2008$sacks_pct <- normalize(passing_2008$sacks_pct)
passing_2008$comebacks <- normalize(passing_2008$comebacks)
passing_2008$game_win_drives <- normalize(passing_2008$game_win_drives)

passing_2009$age <- normalize(passing_2009$age)
passing_2009$games <- normalize(passing_2009$games)
passing_2009$games_started <- normalize(passing_2009$games_started)
passing_2009$completions <- normalize(passing_2009$completions)
passing_2009$attempts <- normalize(passing_2009$attempts)
passing_2009$comp_pct <- normalize(passing_2009$comp_pct)
passing_2009$yards_passing <- normalize(passing_2009$yards_passing)
passing_2009$td_passing <- normalize(passing_2009$td_passing)
passing_2009$td_pct <- normalize(passing_2009$td_pct)
passing_2009$interceptions <- normalize(passing_2009$interceptions)
passing_2009$int_pct <- normalize(passing_2009$int_pct)
passing_2009$long_passing <- normalize(passing_2009$long_passing)
passing_2009$yards_pass_att <- normalize(passing_2009$yards_pass_att)
passing_2009$yards_pass_att_avg <- normalize(passing_2009$yards_pass_att_avg)
passing_2009$yards_pass_comp <- normalize(passing_2009$yards_pass_comp)
passing_2009$yards_pass_game <- normalize(passing_2009$yards_pass_game)
passing_2009$pass_rate <- normalize(passing_2009$pass_rate)
passing_2009$sacks <- normalize(passing_2009$sacks)
passing_2009$sacks_pass <- normalize(passing_2009$sacks_pass)
passing_2009$yards_net_pass_att <- normalize(passing_2009$yards_net_pass_att)
passing_2009$yards_net_pass_att_avg <- normalize(passing_2009$yards_net_pass_att_avg)
passing_2009$sacks_pct <- normalize(passing_2009$sacks_pct)
passing_2009$comebacks <- normalize(passing_2009$comebacks)
passing_2009$game_win_drives <- normalize(passing_2009$game_win_drives)

passing_2010$age <- normalize(passing_2010$age)
passing_2010$games <- normalize(passing_2010$games)
passing_2010$games_started <- normalize(passing_2010$games_started)
passing_2010$completions <- normalize(passing_2010$completions)
passing_2010$attempts <- normalize(passing_2010$attempts)
passing_2010$comp_pct <- normalize(passing_2010$comp_pct)
passing_2010$yards_passing <- normalize(passing_2010$yards_passing)
passing_2010$td_passing <- normalize(passing_2010$td_passing)
passing_2010$td_pct <- normalize(passing_2010$td_pct)
passing_2010$interceptions <- normalize(passing_2010$interceptions)
passing_2010$int_pct <- normalize(passing_2010$int_pct)
passing_2010$long_passing <- normalize(passing_2010$long_passing)
passing_2010$yards_pass_att <- normalize(passing_2010$yards_pass_att)
passing_2010$yards_pass_att_avg <- normalize(passing_2010$yards_pass_att_avg)
passing_2010$yards_pass_comp <- normalize(passing_2010$yards_pass_comp)
passing_2010$yards_pass_game <- normalize(passing_2010$yards_pass_game)
passing_2010$pass_rate <- normalize(passing_2010$pass_rate)
passing_2010$sacks <- normalize(passing_2010$sacks)
passing_2010$sacks_pass <- normalize(passing_2010$sacks_pass)
passing_2010$yards_net_pass_att <- normalize(passing_2010$yards_net_pass_att)
passing_2010$yards_net_pass_att_avg <- normalize(passing_2010$yards_net_pass_att_avg)
passing_2010$sacks_pct <- normalize(passing_2010$sacks_pct)
passing_2010$comebacks <- normalize(passing_2010$comebacks)
passing_2010$game_win_drives <- normalize(passing_2010$game_win_drives)

passing_2011$age <- normalize(passing_2011$age)
passing_2011$games <- normalize(passing_2011$games)
passing_2011$games_started <- normalize(passing_2011$games_started)
passing_2011$completions <- normalize(passing_2011$completions)
passing_2011$attempts <- normalize(passing_2011$attempts)
passing_2011$comp_pct <- normalize(passing_2011$comp_pct)
passing_2011$yards_passing <- normalize(passing_2011$yards_passing)
passing_2011$td_passing <- normalize(passing_2011$td_passing)
passing_2011$td_pct <- normalize(passing_2011$td_pct)
passing_2011$interceptions <- normalize(passing_2011$interceptions)
passing_2011$int_pct <- normalize(passing_2011$int_pct)
passing_2011$long_passing <- normalize(passing_2011$long_passing)
passing_2011$yards_pass_att <- normalize(passing_2011$yards_pass_att)
passing_2011$yards_pass_att_avg <- normalize(passing_2011$yards_pass_att_avg)
passing_2011$yards_pass_comp <- normalize(passing_2011$yards_pass_comp)
passing_2011$yards_pass_game <- normalize(passing_2011$yards_pass_game)
passing_2011$pass_rate <- normalize(passing_2011$pass_rate)
passing_2011$sacks <- normalize(passing_2011$sacks)
passing_2011$sacks_pass <- normalize(passing_2011$sacks_pass)
passing_2011$yards_net_pass_att <- normalize(passing_2011$yards_net_pass_att)
passing_2011$yards_net_pass_att_avg <- normalize(passing_2011$yards_net_pass_att_avg)
passing_2011$sacks_pct <- normalize(passing_2011$sacks_pct)
passing_2011$comebacks <- normalize(passing_2011$comebacks)
passing_2011$game_win_drives <- normalize(passing_2011$game_win_drives)

passing_2012$age <- normalize(passing_2012$age)
passing_2012$games <- normalize(passing_2012$games)
passing_2012$games_started <- normalize(passing_2012$games_started)
passing_2012$completions <- normalize(passing_2012$completions)
passing_2012$attempts <- normalize(passing_2012$attempts)
passing_2012$comp_pct <- normalize(passing_2012$comp_pct)
passing_2012$yards_passing <- normalize(passing_2012$yards_passing)
passing_2012$td_passing <- normalize(passing_2012$td_passing)
passing_2012$td_pct <- normalize(passing_2012$td_pct)
passing_2012$interceptions <- normalize(passing_2012$interceptions)
passing_2012$int_pct <- normalize(passing_2012$int_pct)
passing_2012$long_passing <- normalize(passing_2012$long_passing)
passing_2012$yards_pass_att <- normalize(passing_2012$yards_pass_att)
passing_2012$yards_pass_att_avg <- normalize(passing_2012$yards_pass_att_avg)
passing_2012$yards_pass_comp <- normalize(passing_2012$yards_pass_comp)
passing_2012$yards_pass_game <- normalize(passing_2012$yards_pass_game)
passing_2012$pass_rate <- normalize(passing_2012$pass_rate)
passing_2012$sacks <- normalize(passing_2012$sacks)
passing_2012$sacks_pass <- normalize(passing_2012$sacks_pass)
passing_2012$yards_net_pass_att <- normalize(passing_2012$yards_net_pass_att)
passing_2012$yards_net_pass_att_avg <- normalize(passing_2012$yards_net_pass_att_avg)
passing_2012$sacks_pct <- normalize(passing_2012$sacks_pct)
passing_2012$comebacks <- normalize(passing_2012$comebacks)
passing_2012$game_win_drives <- normalize(passing_2012$game_win_drives)

passing_2013$age <- normalize(passing_2013$age)
passing_2013$games <- normalize(passing_2013$games)
passing_2013$games_started <- normalize(passing_2013$games_started)
passing_2013$completions <- normalize(passing_2013$completions)
passing_2013$attempts <- normalize(passing_2013$attempts)
passing_2013$comp_pct <- normalize(passing_2013$comp_pct)
passing_2013$yards_passing <- normalize(passing_2013$yards_passing)
passing_2013$td_passing <- normalize(passing_2013$td_passing)
passing_2013$td_pct <- normalize(passing_2013$td_pct)
passing_2013$interceptions <- normalize(passing_2013$interceptions)
passing_2013$int_pct <- normalize(passing_2013$int_pct)
passing_2013$long_passing <- normalize(passing_2013$long_passing)
passing_2013$yards_pass_att <- normalize(passing_2013$yards_pass_att)
passing_2013$yards_pass_att_avg <- normalize(passing_2013$yards_pass_att_avg)
passing_2013$yards_pass_comp <- normalize(passing_2013$yards_pass_comp)
passing_2013$yards_pass_game <- normalize(passing_2013$yards_pass_game)
passing_2013$pass_rate <- normalize(passing_2013$pass_rate)
passing_2013$sacks <- normalize(passing_2013$sacks)
passing_2013$sacks_pass <- normalize(passing_2013$sacks_pass)
passing_2013$yards_net_pass_att <- normalize(passing_2013$yards_net_pass_att)
passing_2013$yards_net_pass_att_avg <- normalize(passing_2013$yards_net_pass_att_avg)
passing_2013$sacks_pct <- normalize(passing_2013$sacks_pct)
passing_2013$comebacks <- normalize(passing_2013$comebacks)
passing_2013$game_win_drives <- normalize(passing_2013$game_win_drives)

# Combine all Passing seasons
passing <- rbind(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
                 passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

# Remove all individual Passing seasons
rm(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
   passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

summary(passing)

Solution

  • There is alot you can do to speed this up...primarily using the plyr and dplyr packages. Obviously, there are further tweaks to be made, but this should significantly decrease the amount of code you put together.

    If this is for Fantasy Football, I'd love to see your results... :P

    library(plyr)
    library(dplyr)
    library(XML)
    library(plyr)
    library(stringr)
    
    # custom functions
    normalize <- function(x) { 
      return((x - min(x)) / (max(x) - min(x)))
    }
    
    trim <- function (x) gsub("^\\s+|\\s+$", "", x)
    
    "%w/o%" <- function(x, y) x[!x %in% y] #--  x without y
    
    # define seasons
    first_season <- 2004
    last_season <- 2013
    num_seasons <- as.numeric(last_season - first_season + 1)
    
    seasons <- seq(first_season, last_season, by=1)
    rm(first_season, last_season)
    
    
    # Passing 
    passing <- data.frame()
    for (i in 1:num_seasons) {
      url <- paste("http://www.pro-football-reference.com/years/", seasons[i],"/passing.htm", sep = "")
      df <- readHTMLTable(url,which=1)
      df$season = seasons[i]
      df <- df[!names(df) %in% c("QBrec") ] 
      if(df$season >= 2008) df <- df[!names(df) %in% c("QBR") ] # Removes QBR 2008+
      passing <- rbind(passing, df)
      print(seasons[i])
    }
    
    names(passing) <- c("rank_pfr", "nameinfo", "team", "age", "games", "games_started",
                        #"qb_record", 
                        "completions", "attempts", "comp_pct", "yards_passing",
                        "td_passing", "td_pct", "interceptions", "int_pct", "long_passing",
                        "yards_pass_att", "yards_pass_att_avg", "yards_pass_comp", "yards_pass_game", "pass_rate", "sacks", "sacks_pass", "yards_net_pass_att", "yards_net_pass_att_avg", "sacks_pct", "comebacks", "game_win_drives", "season")
    
    passing <- passing[which(passing$rank_pfr!='Rk'), ]
    
    # Convert numeric columns to numeric
    numeric_columns <- names(passing) %w/o% c('nameinfo', 'team')
    numeric_columns <- passing[, numeric_columns]
    numeric_columns <- data.frame(apply(numeric_columns, 2, function(x) as.numeric(as.character(x))))
    numeric_columns[is.na(numeric_columns)] <- 0
    numeric_columns <- plyr::ddply(numeric_columns, .(season), colwise(normalize))
    passing <- data.frame(nameinfo = passing$nameinfo,
                          team = passing$team) %>%
      cbind(numeric_columns)
    
    summary(passing)