Search code examples
rpanel-data

Calculate average and add as new row to dataframe


I have some panel data in long format from 2009 - 2019 for 100+ countries.

       df <- structure(list(area_name = c("Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Albania", "Albania", "Albania", "Albania", "Albania", 
"Albania", "Albania", "Albania", "Albania"), area_code = c("AFG", 
"AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "ALB", 
"ALB", "ALB", "ALB", "ALB", "ALB", "ALB", "ALB", "ALB"), area_group = c("Asia-Pacific", 
"Asia-Pacific", "Asia-Pacific", "Asia-Pacific", "Asia-Pacific", 
"Asia-Pacific", "Asia-Pacific", "Asia-Pacific", "Asia-Pacific", 
"Eastern Europe", "Eastern Europe", "Eastern Europe", "Eastern Europe", 
"Eastern Europe", "Eastern Europe", "Eastern Europe", "Eastern Europe", 
"Eastern Europe"), pillar_name = c("Governance", "Governance", 
"Governance", "Governance", "Governance", "Governance", "Investment Environment", 
"Investment Environment", "Investment Environment", "Governance", 
"Governance", "Governance", "Governance", "Governance", "Governance", 
"Investment Environment", "Investment Environment", "Investment Environment"
), element_name = c("Executive Constraints", "Government Effectiveness", 
"Government Integrity", "Political Accountability", "Regulatory Quality", 
"Rule of Law", "Contract Enforcement", "Investor Protection", 
"Property Rights", "Executive Constraints", "Government Effectiveness", 
"Government Integrity", "Political Accountability", "Regulatory Quality", 
"Rule of Law", "Contract Enforcement", "Investor Protection", 
"Property Rights"), year = c("score_2009", "score_2009", "score_2009", 
"score_2009", "score_2009", "score_2009", "score_2009", "score_2009", 
"score_2009", "score_2009", "score_2009", "score_2009", "score_2009", 
"score_2009", "score_2009", "score_2009", "score_2009", "score_2009"
), score = c(5.94, 5.6, 5.82, 3.85, 4.62, 5.68, 4.21, 4.51, 8.75, 
8.51, 9.87, 7.79, 11.5, 7.18, 6.37, 9.2, 11.3, 13.2)), row.names = c(NA, 
-18L), class = "data.frame")

My goal is to add another row for each country that averages the different scores for a given year.

In the end I would like it to look like this:

  area_name   area_code area_group     pillar_name            element_name             year       score
   <chr>       <chr>     <chr>          <chr>                  <chr>                    <chr>      <dbl>
 1 Afghanistan AFG       Asia-Pacific   Governance             Executive Constraints    score_2009  5.94
 2 Afghanistan AFG       Asia-Pacific   Governance             Government Effectiveness score_2009  5.60
 3 Afghanistan AFG       Asia-Pacific   Governance             Government Integrity     score_2009  5.82
 4 Afghanistan AFG       Asia-Pacific   Governance             Political Accountability score_2009  3.85
 5 Afghanistan AFG       Asia-Pacific   Governance             Regulatory Quality       score_2009  4.62
 6 Afghanistan AFG       Asia-Pacific   Governance             Rule of Law              score_2009  5.68
 7 Afghanistan AFG       Asia-Pacific   Investment Environment Contract Enforcement     score_2009  4.21
 8 Afghanistan AFG       Asia-Pacific   Investment Environment Investor Protection      score_2009  4.51
 9 Afghanistan AFG       Asia-Pacific   Investment Environment Property Rights          score_2009  8.75
10 Afghanistan AFG       Asia-Pacific   Avg                    Avg                      score_2009  5.44                                                                                                    
11 Albania     ALB       Eastern Europe Governance             Executive Constraints    score_2009  8.51
12 Albania     ALB       Eastern Europe Governance             Government Effectiveness score_2009  9.87
13 Albania     ALB       Eastern Europe Governance             Government Integrity     score_2009  7.79
14 Albania     ALB       Eastern Europe Governance             Political Accountability score_2009 11.5 
15 Albania     ALB       Eastern Europe Governance             Regulatory Quality       score_2009  7.18
16 Albania     ALB       Eastern Europe Governance             Rule of Law              score_2009  6.37
17 Albania     ALB       Eastern Europe Investment Environment Contract Enforcement     score_2009  9.20
18 Albania     ALB       Eastern Europe Investment Environment Investor Protection      score_2009 11.3 
19 Albania     ALB       Eastern Europe Investment Environment Property Rights          score_2009 13.2
20 Albania     ALB       Eastern Europe Avg                    Avg                      score_2009 9.44 

I am fairly new to R and am unsure how to approach this. Any help would be greatly appreciated!


Solution

  • You can try this:

    library(tidyverse)
    #Data
    df <- structure(list(area_name = c("Afghanistan", "Afghanistan", "Afghanistan", 
    "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
    "Afghanistan", "Albania", "Albania", "Albania", "Albania", "Albania", 
    "Albania", "Albania", "Albania", "Albania"), area_code = c("AFG", 
    "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "ALB", 
    "ALB", "ALB", "ALB", "ALB", "ALB", "ALB", "ALB", "ALB"), area_group = c("Asia-Pacific", 
    "Asia-Pacific", "Asia-Pacific", "Asia-Pacific", "Asia-Pacific", 
    "Asia-Pacific", "Asia-Pacific", "Asia-Pacific", "Asia-Pacific", 
    "Eastern Europe", "Eastern Europe", "Eastern Europe", "Eastern Europe", 
    "Eastern Europe", "Eastern Europe", "Eastern Europe", "Eastern Europe", 
    "Eastern Europe"), pillar_name = c("Governance", "Governance", 
    "Governance", "Governance", "Governance", "Governance", "Investment Environment", 
    "Investment Environment", "Investment Environment", "Governance", 
    "Governance", "Governance", "Governance", "Governance", "Governance", 
    "Investment Environment", "Investment Environment", "Investment Environment"
    ), element_name = c("Executive Constraints", "Government Effectiveness", 
    "Government Integrity", "Political Accountability", "Regulatory Quality", 
    "Rule of Law", "Contract Enforcement", "Investor Protection", 
    "Property Rights", "Executive Constraints", "Government Effectiveness", 
    "Government Integrity", "Political Accountability", "Regulatory Quality", 
    "Rule of Law", "Contract Enforcement", "Investor Protection", 
    "Property Rights"), year = c("score_2009", "score_2009", "score_2009", 
    "score_2009", "score_2009", "score_2009", "score_2009", "score_2009", 
    "score_2009", "score_2009", "score_2009", "score_2009", "score_2009", 
    "score_2009", "score_2009", "score_2009", "score_2009", "score_2009"
    ), score = c(5.94, 5.6, 5.82, 3.85, 4.62, 5.68, 4.21, 4.51, 8.75, 
    8.51, 9.87, 7.79, 11.5, 7.18, 6.37, 9.2, 11.3, 13.2)), row.names = c(NA, 
    -18L), class = "data.frame")
    

    The code:

    df1 <- df %>% bind_rows(df %>% 
                       group_by(area_name,area_code,area_group,year) %>% 
                       summarise(pillar_name='Val.Avg',element_name='Val.Avg',score=mean(score))) %>%
      arrange(area_name,area_code,area_group)
    

    Output:

         area_name area_code     area_group            pillar_name             element_name       year     score
    1  Afghanistan       AFG   Asia-Pacific             Governance    Executive Constraints score_2009  5.940000
    2  Afghanistan       AFG   Asia-Pacific             Governance Government Effectiveness score_2009  5.600000
    3  Afghanistan       AFG   Asia-Pacific             Governance     Government Integrity score_2009  5.820000
    4  Afghanistan       AFG   Asia-Pacific             Governance Political Accountability score_2009  3.850000
    5  Afghanistan       AFG   Asia-Pacific             Governance       Regulatory Quality score_2009  4.620000
    6  Afghanistan       AFG   Asia-Pacific             Governance              Rule of Law score_2009  5.680000
    7  Afghanistan       AFG   Asia-Pacific Investment Environment     Contract Enforcement score_2009  4.210000
    8  Afghanistan       AFG   Asia-Pacific Investment Environment      Investor Protection score_2009  4.510000
    9  Afghanistan       AFG   Asia-Pacific Investment Environment          Property Rights score_2009  8.750000
    10 Afghanistan       AFG   Asia-Pacific                Val.Avg                  Val.Avg score_2009  5.442222
    11     Albania       ALB Eastern Europe             Governance    Executive Constraints score_2009  8.510000
    12     Albania       ALB Eastern Europe             Governance Government Effectiveness score_2009  9.870000
    13     Albania       ALB Eastern Europe             Governance     Government Integrity score_2009  7.790000
    14     Albania       ALB Eastern Europe             Governance Political Accountability score_2009 11.500000
    15     Albania       ALB Eastern Europe             Governance       Regulatory Quality score_2009  7.180000
    16     Albania       ALB Eastern Europe             Governance              Rule of Law score_2009  6.370000
    17     Albania       ALB Eastern Europe Investment Environment     Contract Enforcement score_2009  9.200000
    18     Albania       ALB Eastern Europe Investment Environment      Investor Protection score_2009 11.300000
    19     Albania       ALB Eastern Europe Investment Environment          Property Rights score_2009 13.200000
    20     Albania       ALB Eastern Europe                Val.Avg                  Val.Avg score_2009  9.435556