Search code examples
rtransformpanel

Transform panel data of static values to panel dataset of changes from previous period for several variables


I have a panel dataset that looks something like:

df <- data.frame(id = c(1:10), income_3 = c(2:11), income_4 = c(3:8, NA, 10:12), income_5 = c(4:13), health_3 = c(NA, 6:14), health_4 = c(7:10, NA, 11:15), health_5 = c(9:18))

    
id income_3 income_4 income_5 health_3 health_4 health_5
1  2        3        4        NA       7        9
2  3        4        5        6        8        10
3  4        5        6        7        9        11
4  5        6        7        8        10       12
5  6        7        8        9        NA       13
6  7        8        9        10       11       14
7  8        NA       10       11       12       15
8  9        10       11       12       13       16
9  10       11       12       13       14       17
10 11       12       13       14       15       18

although with many more variables and periods and many missing values and zeros.

I would like to change it to a dataset that shows the change relative to the previous period.

Ideally, the resulting dataset would look something like

df2 <- data.frame(id = 1:10, d_income_3_4 = df$income_4/df$income_3 - 1, d_income_4_5 = df$income_5/df$income_4 - 1, d_health_3_4 = df$health_4/df$health_3 - 1, d_health_4_5 = df$health_5/df$health_4 - 1)

id  d_income_3_4 d_income_4_5 d_health_3_4 d_health_4_5
1   0.50000000   0.33333333   NA            0.2857143
2   0.33333333   0.25000000   0.33333333    0.2500000
3   0.25000000   0.20000000   0.28571429    0.2222222
4   0.20000000   0.16666667   0.25000000    0.2000000
5   0.16666667   0.14285714   NA            NA
6   0.14285714   0.12500000   0.10000000    0.2727273
7   NA           NA           0.09090909    0.2500000
8   0.11111111   0.10000000   0.08333333    0.2307692
9   0.10000000   0.09090909   0.07692308    0.2142857
10  0.09090909   0.08333333   0.07142857    0.2000000

With NaN values having been replaced with NA.

Since there are many variables in my dataset, it would be ideal to work with a vector of the bases of the variable names, i.e.

vars <- c(income, health)

to write the code.

The first 50 rows and 17 columns of my actual dataset are (although they do not include values of zero, other columns do):

structure(list(NAME_0_bound = c("Benin", "Benin", "Benin", "Benin", 
"Benin", "Benin", "Benin", "Benin", "Benin", "Benin", "Benin", 
"Benin", "Botswana", "Botswana", "Botswana", "Botswana", "Botswana", 
"Botswana", "Botswana", "Botswana", "Botswana", "Botswana", "Botswana", 
"Botswana", "Botswana", "Botswana", "Cabo Verde", "Cabo Verde", 
"Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde", 
"Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde", 
"Ghana", "Ghana", "Ghana", "Ghana", "Ghana", "Ghana", "Ghana", 
"Ghana", "Ghana", "Ghana", "Kenya", "Kenya"), NAME_1.y = c("Alibori", 
"Atakora", "Atlantique", "Borgou", "Collines", "Donga", "Kouffo", 
"Littoral", "Mono", "Oueme", "Plateau", "Zou", "Central", "Chobe", 
"Francistown", "Gaborone", "Ghanzi", "Jwaneng", "Kgalagadi", 
"Kgatleng", "Kweneng", "Lobatse", "North-East", "Selibe Phikwe", 
"South-East", "Southern", "Mosteiros", "Paúl", "Porto Novo", 
"Praia", "Ribeira Grande", "Santa Catarina", "Santa Cruz", "Sao Domingos", 
"Sao Filipe", "Sao Miguel", "Sao Vicente", "Tarrafal", "Ashanti", 
"Brong Ahafo", "Central", "Eastern", "Greater Accra", "Northern", 
"Upper East", "Upper West", "Volta", "Western", "Bomet", "Bungoma"
), trust_locgov_3 = c(68.0683098591549, 68.4376, 53.4718705035971, 
84.3830952380952, 68.882, 71.4214285714286, 43.5853846153846, 
39.3123076923077, 41.14171875, 51.1576744186046, 50.2595238095238, 
40.8455882352941, 65.6967052023121, 54.16125, 63.5156603773585, 
54.6612, 56.055, 58.3275, 55.9083870967742, 75.6334615384615, 
63.1406832298137, 58.3275, 78.1531034482759, 56.661, 58.7660526315789, 
57.8113274336283, 55.752, 49.2014285714286, 66.66, 36.2349082568807, 
59.6184507042253, 43.23, 51.3106578947368, 50.5, 51.6077419354839, 
58.5527027027027, 65.49345, 57.2592307692308, 56.8395535714286, 
69.084, 44.9955, 58.8325, 42.4908771929825, 74.235, 62.7388235294118, 
62.0308333333333, 49.6548979591837, 52.247027027027, 35.413125, 
42.9781578947368), trust_locgov_4 = c(62.7388235294118, 75.250206185567, 
50.3825581395349, 53.9997674418605, 53.2513793103448, 74.3886956521739, 
51.9948, 38.5282568807339, 48.9198387096774, 45.2335714285714, 
47.7564179104478, 55.3136170212766, 63.4993965517241, 64.576875, 
70.82625, 51.5997777777778, 56.5160869565217, 53.328, 74.9925, 
52.7079069767442, 54.0106626506024, 59.71625, 67.7015625, 62.49375, 
64.4863043478261, NA, 56.8832, 49.16175, 63.4082926829268, 38.9768181818182, 
56.5302941176471, 38.05175, 39.618679245283, 44.44, 47.6560526315789, 
33.33, 57.8373529411765, 54.7564285714286, 51.5504, 68.175, 58.8593617021277, 
61.3973684210526, 42.0500581395349, 60.371320754717, 58.176, 
75.6681081081081, 50.3495744680851, 54.566814159292, 29.16375, 
36.2282608695652), trust_locgov_5 = c(51.51, 63.5353125, 54.368085106383, 
52.217, 50.7525, 59.88984375, 77.908875, 45.2169230769231, 61.4521875, 
61.9341044776119, 72.762676056338, 45.82875, 54.8795689655172, 
49.995, 50.9383018867925, 49.7386153846154, 62.3126086956522, 
47.6142857142857, 45.82875, 50.904, 48.0177966101695, 62.49375, 
54.6953846153846, 46.8703125, 47.911875, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 46.2365106382979, NA, 36.2349082568807, 
47.2049321266968, 47.6619, 44.3122988505747, 43.8025409836066, 
58.4320920502092, 50.1451351351351, 64.576875, 47.5527155172414, 
41.5052830188679, NA, NA), trust_locgov_6 = c(56.5915625, 65.5106896551724, 
39.76875, 54.16125, 52.3154430379747, 59.2533333333333, 45.5650632911392, 
34.2914423076923, 51.7805357142857, 39.2117647058824, 49.995, 
49.3540384615385, 51.2769230769231, 52.078125, 47.874, 48.2275, 
72.72, 29.16375, 67.7015625, 59.1135849056604, 51.4568421052632, 
22.914375, 64.9935, 46.8703125, 52.7725, NA, 30.4317391304348, 
41.6625, 42.4688709677419, 45.1306216216216, 50.82825, 37.1892631578947, 
34.1429268292683, NA, 41.9312903225806, NA, 41.4288785046729, 
41.0811627906977, 28.1568292682927, 42.1432211538461, 38.8570854271357, 
34.1396356275304, 29.653309352518, 55.2370422535211, 52.49475, 
63.6841071428571, 44.8934693877551, 32.8971428571429, 65.965625, 
46.9251315789474), cont_locgov_3 = c(9.8475, 16.665, 12.7302083333333, 
14.321484375, 24.303125, 17.18578125, 16.665, 3.70333333333333, 
15.10265625, 12.49875, 9.3740625, 9.93490384615385, 14.6724456521739, 
0, 14.8794642857143, 13.9691911764706, 12.49875, 24.9975, 11.4571875, 
15.4746428571429, 13.8875, 6.94375, 14.581875, 15.6234375, 14.16525, 
18.887, 10.2140322580645, 8.3325, 4.70038461538461, 8.52933070866142, 
5.555, 16.5249579831933, 15.3241379310345, 12.49875, 9.8522641509434, 
13.332, 6.06, 17.359375, 9.16938864628821, 7.14214285714286, 
7.05057692307692, 12.3725, 4.0289010989011, 13.9866964285714, 
5.35660714285714, 4.9995, 14.7421153846154, 13.5043965517241, 
31.246875, 19.16475), cont_locgov_4 = c(18.9375, 15.532427184466, 
27.4482352941176, 13.9289552238806, 20.07375, 14.8133333333333, 
16.5032038834951, 8.61025, 14.581875, 18.60925, 9.72125, 21.525625, 
28.3305, 64.576875, 28.1221875, 18.9795833333333, 24.9975, 0, 
37.6773913043478, 30.5525, 16.2682142857143, 22.22, 36.4546875, 
19.7896875, 20.136875, NA, 14.7171428571429, 31.94125, 12.49875, 
10.7516129032258, 19.5175675675676, 10.7600787401575, 19.392, 
14.581875, 9.88601694915254, 19.4425, 13.0031838565022, 20.83125, 
18.6937826086957, 13.0939285714286, 24.3565384615385, 32.8325373134328, 
16.665, 29.16375, 30.9492857142857, 33.33, 34.6119230769231, 
35.3241025641026, 24.9975, 26.38625), cont_locgov_5 = c(14.01375, 
13.8875, 10.8785416666667, 9.16575, 11.74125, 13.01953125, 20.83125, 
8.10104166666667, 17.7065625, 21.0763235294118, 18.05375, 9.93490384615385, 
18.60925, 16.665, 24.4023214285714, 13.9691911764706, 12.49875, 
16.665, 11.4571875, 16.665, 23.0049456521739, 22.914375, 19.16475, 
17.7065625, 10.415625, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 7.4211328125, NA, 22.0175398633257, 15.4746428571429, 23.3950961538462, 
30.5419391634981, 14.04304, 19.6216935483871, 22.0216071428571, 
27.080625, 14.2634934497817, 21.9276315789474, NA, NA), cont_locgov_6 = c(22.5671875, 
9.46875, 10.605, 24.0172058823529, 14.581875, 16.665, 23.331, 
7.37105769230769, 27.9733928571429, 13.2339705882353, 15.415125, 
17.6264423076923, 17.776, 24.9975, 26.7830357142857, 5.14654411764706, 
22.22, 49.995, 15.6234375, 11.9035714285714, 14.960625, 14.581875, 
24.9975, 17.7065625, 20.83125, NA, 11.11, 14.581875, 16.14421875, 
11.3130548302872, 13.8875, 11.9035714285714, 11.804375, NA, 10.5809523809524, 
NA, 9.05081896551724, 11.804375, 18.6446436285097, 17.443738317757, 
24.431213592233, 21.7426171875, 14.4813103448276, 26.5405555555556, 
28.1221875, 24.9975, 18.9083653846154, 17.3833189655172, 16.665, 
15.83175), corr_locgov_3 = c(58.6608, 65.9354347826087, 38.784, 
77.4851282051282, 63.8825, 68.1091304347826, 43.0189534883721, 
40.4721428571429, 35.4803225806452, 35.5104672897196, 31.0313793103448, 
37.8602912621359, 66.66, 69.0407142857143, 58.3275, 67.5219827586207, 
71.4214285714286, 72.215, 60.7082142857143, 63.9391836734694, 
65.5339864864865, 66.66, 53.328, 67.9419230769231, 61.61, 64.9594897959184, 
75.9183333333333, 80.295, 61.8985714285714, 48.9951, 79.15875, 
69.69, 59.3690625, 57.772, 65.145, 61.3973684210526, 70.5557142857143, 
61.4521875, 61.9211374407583, 77.0149514563107, 58.0466292134831, 
68.4714130434783, 62.7130263157895, 72.153956043956, 70.3633333333333, 
74.0666666666667, 63.0459036144578, 56.6153424657534, 42.218, 
53.6983333333333), corr_locgov_4 = c(60.969512195122, 61.1660439560439, 
48.9198387096774, 61.8985714285714, 65.3614285714286, 58.58, 
58.3275, 51.51, 52.2936206896552, 55.0259433962264, 57.2859375, 
45.7318604651163, 76.2269444444444, 66.66, 74.5957142857143, 
63.1084426229508, 76.659, 66.66, 77.77, 68.6205882352941, 71.2891666666667, 
70.1684210526316, 82.1346428571429, 71.7876923076923, 68.4616216216216, 
NA, 85.8888461538461, 75.548, 79.6216666666667, 63.7543076923077, 
79.836976744186, 58.2164, 59.2533333333333, 72.0858139534884, 
69.993, 51.8466666666667, 66.9804807692308, 59.2533333333333, 
59.3242105263158, 68.217476635514, 56.8125, 66.1090909090909, 
57.758198757764, 65.5612087912088, 63.5156603773585, 63.327, 
70.8701052631579, 72.1016326530612, 54.16125, 47.6142857142857
), corr_locgov_5 = c(46.3555172413793, 59.021875, 50.3886614173228, 
48.884, 46.1194186046512, 59.3690625, 64.1286075949367, 40.8942553191489, 
50.8145901639344, 55.4651908396947, 63.7617391304348, 47.0541176470588, 
61.2118269230769, 64.0961538461538, 59.1135849056604, 63.63, 
65.0728571428571, 66.66, 62.9566666666667, 59.71625, 61.243875, 
61.8985714285714, 63.4345161290323, 61.105, 63.63, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 68.8455737704918, NA, 55.6022823529412, 
59.7799541284404, 51.8831527093596, 60.671015625, 58.0973204419889, 
69.0307531380753, 69.0407142857143, 64.4003389830508, 62.216, 
60.2810526315789, NA, NA), corr_locgov_6 = c(53.1196875, 47.4462352941176, 
44.0568965517241, 48.1433333333333, 46.9251315789474, 54.9945, 
50.2059493670886, 46.7273529411765, 43.632, 39.2553333333333, 
45.143164556962, 47.2444660194175, 64.7554285714286, 58.3275, 
52.3757142857143, 60.922868852459, 64.576875, 57.1371428571429, 
61.8985714285714, 55.3278, 64.7247096774194, 51.2769230769231, 
59.2533333333333, 60.2090322580645, 57.57, NA, 43.935, 66.66, 
57.4655172413793, 65.191220338983, 66.66, 61.7716, 59.71625, 
NA, 47.268, NA, 60.1472413793103, 61.3033928571429, 48.2550348027842, 
47.0639698492462, 49.2543333333333, 46.662, 42.7940740740741, 
57.4981990521327, 47.9952, 73.7631147540984, 42.5447647058824, 
43.0450224215247, 66.66, 51.3106578947368), corr_taxoff_3 = c(28.7327586206897, 
36.801875, 24.3218918918919, 16.665, 21.0123913043478, 33.33, 
14.1162352941176, 16.3825423728814, 7.2215, 22.1161682242991, 
11.8267741935484, 9.38417475728155, 65.0791304347826, 70.82625, 
60.1389130434783, 60.2278947368421, 68.2471428571428, 66.66, 
59.5178571428571, 63.63, 66.3975590551181, 68.175, 55.0669565217391, 
65.4255555555555, 66.66, 63.9111340206186, 85.7057142857143, 
84.84, 72.215, 55.1916129032258, 84.3052941176471, 64.6994117647059, 
59.59, 57.1371428571429, 72.5853333333333, 68.3265, 75.7125925925926, 
66.66, 48.039014084507, 65.7255140186916, 58.1423333333333, 52.247027027027, 
56.0271165644172, 51.2769230769231, 60.5202631578947, 55.1226923076923, 
53.0067469879518, 45.7219230769231, 48.48, 55.2028125), corr_taxoff_4 = c(26.1018072289157, 
40.27375, 23.253488372093, 38.4974418604651, 48.940253164557, 
36.2708823529412, 40.289010989011, 32.3777142857143, 31.0313793103448, 
33.0185046728972, 40.6209375, 24.3421348314607, 79.4246808510638, 
77.77, 75.7988709677419, 62.7388235294118, 74.5023529411765, 
71.4214285714286, 66.66, 62.62, 70.30546875, 63.1515789473684, 
84.5153571428571, 71.205, 64.64, NA, 83.325, 77.77, 74.9925, 
61.1337823834197, 75.431052631579, 60.5067692307692, 59.6839534883721, 
70.7246341463415, 76.588085106383, 59.2533333333333, 69.0407142857143, 
61.3973684210526, 53.7329439252336, 60.4300934579439, 47.49525, 
56.0053278688525, 48.388734939759, 46.8817582417582, 51.8466666666667, 
54.8332258064516, 58.4170967741935, 60.5381632653061, 46.1492307692308, 
36.36), corr_taxoff_5 = c(37.194347826087, 42.42, 40.7083969465649, 
36.1970967741935, 40.3881176470588, 43.2055555555556, 44.2878082191781, 
37.2923076923077, 43.9109523809524, 47.3070967741935, 57.4821739130435, 
38.5582352941176, 65.3087837837838, 66.66, 64.619387755102, 65.7077142857143, 
72.909375, 77.77, 64.9057894736842, 57.8889473684211, 65.5205128205128, 
61.105, 59.994, 59.5178571428571, 70.3633333333333, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 66.66, NA, 50.0340281030445, 
50.9123394495413, 41.5783333333333, 47.5957587548638, 51.2769230769231, 
58.2524324324324, 56.5419642857143, 62.7715, 51.7452054794521, 
41.2731308411215, NA, NA)), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -50L), groups = structure(list(
    NAME_1.y = c("Alibori", "Ashanti", "Atakora", "Atlantique", 
    "Bomet", "Borgou", "Brong Ahafo", "Bungoma", "Central", "Chobe", 
    "Collines", "Donga", "Eastern", "Francistown", "Gaborone", 
    "Ghanzi", "Greater Accra", "Jwaneng", "Kgalagadi", "Kgatleng", 
    "Kouffo", "Kweneng", "Littoral", "Lobatse", "Mono", "Mosteiros", 
    "North-East", "Northern", "Oueme", "Paúl", "Plateau", "Porto Novo", 
    "Praia", "Ribeira Grande", "Santa Catarina", "Santa Cruz", 
    "Sao Domingos", "Sao Filipe", "Sao Miguel", "Sao Vicente", 
    "Selibe Phikwe", "South-East", "Southern", "Tarrafal", "Upper East", 
    "Upper West", "Volta", "Western", "Zou"), .rows = structure(list(
        1L, 39L, 2L, 3L, 49L, 4L, 40L, 50L, c(13L, 41L), 14L, 
        5L, 6L, 42L, 15L, 16L, 17L, 43L, 18L, 19L, 20L, 7L, 21L, 
        8L, 22L, 9L, 27L, 23L, 44L, 10L, 28L, 11L, 29L, 30L, 
        31L, 32L, 33L, 34L, 35L, 36L, 37L, 24L, 25L, 26L, 38L, 
        45L, 46L, 47L, 48L, 12L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -49L), .drop = TRUE))

Solution

  • For my actual dataset I use:

    vars <- names(df) 
    
    num<- seq(3, length(vars)-1,1)
    

    I start the sequence at 3, because I have two ID variables (that are strings).

    for (i in num ) {
      
      df[paste0("d_", vars[i],"_",vars[i+1])]<-(df[,i+1]/df[,i]-1)
    }
    

    The issue now is that I also calculate the change between the last period of one variable and the first period of the next variable, so I have to delete those columns with the combination of the two periods (in my case 3 & 6):

    df <- df %>% select(-c(contains("3") & contains("6")))