I have a panel dataset that looks something like:
df <- data.frame(id = c(1:10), income_3 = c(2:11), income_4 = c(3:8, NA, 10:12), income_5 = c(4:13), health_3 = c(NA, 6:14), health_4 = c(7:10, NA, 11:15), health_5 = c(9:18))
id income_3 income_4 income_5 health_3 health_4 health_5
1 2 3 4 NA 7 9
2 3 4 5 6 8 10
3 4 5 6 7 9 11
4 5 6 7 8 10 12
5 6 7 8 9 NA 13
6 7 8 9 10 11 14
7 8 NA 10 11 12 15
8 9 10 11 12 13 16
9 10 11 12 13 14 17
10 11 12 13 14 15 18
although with many more variables and periods and many missing values and zeros.
I would like to change it to a dataset that shows the change relative to the previous period.
Ideally, the resulting dataset would look something like
df2 <- data.frame(id = 1:10, d_income_3_4 = df$income_4/df$income_3 - 1, d_income_4_5 = df$income_5/df$income_4 - 1, d_health_3_4 = df$health_4/df$health_3 - 1, d_health_4_5 = df$health_5/df$health_4 - 1)
id d_income_3_4 d_income_4_5 d_health_3_4 d_health_4_5
1 0.50000000 0.33333333 NA 0.2857143
2 0.33333333 0.25000000 0.33333333 0.2500000
3 0.25000000 0.20000000 0.28571429 0.2222222
4 0.20000000 0.16666667 0.25000000 0.2000000
5 0.16666667 0.14285714 NA NA
6 0.14285714 0.12500000 0.10000000 0.2727273
7 NA NA 0.09090909 0.2500000
8 0.11111111 0.10000000 0.08333333 0.2307692
9 0.10000000 0.09090909 0.07692308 0.2142857
10 0.09090909 0.08333333 0.07142857 0.2000000
With NaN values having been replaced with NA.
Since there are many variables in my dataset, it would be ideal to work with a vector of the bases of the variable names, i.e.
vars <- c(income, health)
to write the code.
The first 50 rows and 17 columns of my actual dataset are (although they do not include values of zero, other columns do):
structure(list(NAME_0_bound = c("Benin", "Benin", "Benin", "Benin",
"Benin", "Benin", "Benin", "Benin", "Benin", "Benin", "Benin",
"Benin", "Botswana", "Botswana", "Botswana", "Botswana", "Botswana",
"Botswana", "Botswana", "Botswana", "Botswana", "Botswana", "Botswana",
"Botswana", "Botswana", "Botswana", "Cabo Verde", "Cabo Verde",
"Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde",
"Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde", "Cabo Verde",
"Ghana", "Ghana", "Ghana", "Ghana", "Ghana", "Ghana", "Ghana",
"Ghana", "Ghana", "Ghana", "Kenya", "Kenya"), NAME_1.y = c("Alibori",
"Atakora", "Atlantique", "Borgou", "Collines", "Donga", "Kouffo",
"Littoral", "Mono", "Oueme", "Plateau", "Zou", "Central", "Chobe",
"Francistown", "Gaborone", "Ghanzi", "Jwaneng", "Kgalagadi",
"Kgatleng", "Kweneng", "Lobatse", "North-East", "Selibe Phikwe",
"South-East", "Southern", "Mosteiros", "Paúl", "Porto Novo",
"Praia", "Ribeira Grande", "Santa Catarina", "Santa Cruz", "Sao Domingos",
"Sao Filipe", "Sao Miguel", "Sao Vicente", "Tarrafal", "Ashanti",
"Brong Ahafo", "Central", "Eastern", "Greater Accra", "Northern",
"Upper East", "Upper West", "Volta", "Western", "Bomet", "Bungoma"
), trust_locgov_3 = c(68.0683098591549, 68.4376, 53.4718705035971,
84.3830952380952, 68.882, 71.4214285714286, 43.5853846153846,
39.3123076923077, 41.14171875, 51.1576744186046, 50.2595238095238,
40.8455882352941, 65.6967052023121, 54.16125, 63.5156603773585,
54.6612, 56.055, 58.3275, 55.9083870967742, 75.6334615384615,
63.1406832298137, 58.3275, 78.1531034482759, 56.661, 58.7660526315789,
57.8113274336283, 55.752, 49.2014285714286, 66.66, 36.2349082568807,
59.6184507042253, 43.23, 51.3106578947368, 50.5, 51.6077419354839,
58.5527027027027, 65.49345, 57.2592307692308, 56.8395535714286,
69.084, 44.9955, 58.8325, 42.4908771929825, 74.235, 62.7388235294118,
62.0308333333333, 49.6548979591837, 52.247027027027, 35.413125,
42.9781578947368), trust_locgov_4 = c(62.7388235294118, 75.250206185567,
50.3825581395349, 53.9997674418605, 53.2513793103448, 74.3886956521739,
51.9948, 38.5282568807339, 48.9198387096774, 45.2335714285714,
47.7564179104478, 55.3136170212766, 63.4993965517241, 64.576875,
70.82625, 51.5997777777778, 56.5160869565217, 53.328, 74.9925,
52.7079069767442, 54.0106626506024, 59.71625, 67.7015625, 62.49375,
64.4863043478261, NA, 56.8832, 49.16175, 63.4082926829268, 38.9768181818182,
56.5302941176471, 38.05175, 39.618679245283, 44.44, 47.6560526315789,
33.33, 57.8373529411765, 54.7564285714286, 51.5504, 68.175, 58.8593617021277,
61.3973684210526, 42.0500581395349, 60.371320754717, 58.176,
75.6681081081081, 50.3495744680851, 54.566814159292, 29.16375,
36.2282608695652), trust_locgov_5 = c(51.51, 63.5353125, 54.368085106383,
52.217, 50.7525, 59.88984375, 77.908875, 45.2169230769231, 61.4521875,
61.9341044776119, 72.762676056338, 45.82875, 54.8795689655172,
49.995, 50.9383018867925, 49.7386153846154, 62.3126086956522,
47.6142857142857, 45.82875, 50.904, 48.0177966101695, 62.49375,
54.6953846153846, 46.8703125, 47.911875, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 46.2365106382979, NA, 36.2349082568807,
47.2049321266968, 47.6619, 44.3122988505747, 43.8025409836066,
58.4320920502092, 50.1451351351351, 64.576875, 47.5527155172414,
41.5052830188679, NA, NA), trust_locgov_6 = c(56.5915625, 65.5106896551724,
39.76875, 54.16125, 52.3154430379747, 59.2533333333333, 45.5650632911392,
34.2914423076923, 51.7805357142857, 39.2117647058824, 49.995,
49.3540384615385, 51.2769230769231, 52.078125, 47.874, 48.2275,
72.72, 29.16375, 67.7015625, 59.1135849056604, 51.4568421052632,
22.914375, 64.9935, 46.8703125, 52.7725, NA, 30.4317391304348,
41.6625, 42.4688709677419, 45.1306216216216, 50.82825, 37.1892631578947,
34.1429268292683, NA, 41.9312903225806, NA, 41.4288785046729,
41.0811627906977, 28.1568292682927, 42.1432211538461, 38.8570854271357,
34.1396356275304, 29.653309352518, 55.2370422535211, 52.49475,
63.6841071428571, 44.8934693877551, 32.8971428571429, 65.965625,
46.9251315789474), cont_locgov_3 = c(9.8475, 16.665, 12.7302083333333,
14.321484375, 24.303125, 17.18578125, 16.665, 3.70333333333333,
15.10265625, 12.49875, 9.3740625, 9.93490384615385, 14.6724456521739,
0, 14.8794642857143, 13.9691911764706, 12.49875, 24.9975, 11.4571875,
15.4746428571429, 13.8875, 6.94375, 14.581875, 15.6234375, 14.16525,
18.887, 10.2140322580645, 8.3325, 4.70038461538461, 8.52933070866142,
5.555, 16.5249579831933, 15.3241379310345, 12.49875, 9.8522641509434,
13.332, 6.06, 17.359375, 9.16938864628821, 7.14214285714286,
7.05057692307692, 12.3725, 4.0289010989011, 13.9866964285714,
5.35660714285714, 4.9995, 14.7421153846154, 13.5043965517241,
31.246875, 19.16475), cont_locgov_4 = c(18.9375, 15.532427184466,
27.4482352941176, 13.9289552238806, 20.07375, 14.8133333333333,
16.5032038834951, 8.61025, 14.581875, 18.60925, 9.72125, 21.525625,
28.3305, 64.576875, 28.1221875, 18.9795833333333, 24.9975, 0,
37.6773913043478, 30.5525, 16.2682142857143, 22.22, 36.4546875,
19.7896875, 20.136875, NA, 14.7171428571429, 31.94125, 12.49875,
10.7516129032258, 19.5175675675676, 10.7600787401575, 19.392,
14.581875, 9.88601694915254, 19.4425, 13.0031838565022, 20.83125,
18.6937826086957, 13.0939285714286, 24.3565384615385, 32.8325373134328,
16.665, 29.16375, 30.9492857142857, 33.33, 34.6119230769231,
35.3241025641026, 24.9975, 26.38625), cont_locgov_5 = c(14.01375,
13.8875, 10.8785416666667, 9.16575, 11.74125, 13.01953125, 20.83125,
8.10104166666667, 17.7065625, 21.0763235294118, 18.05375, 9.93490384615385,
18.60925, 16.665, 24.4023214285714, 13.9691911764706, 12.49875,
16.665, 11.4571875, 16.665, 23.0049456521739, 22.914375, 19.16475,
17.7065625, 10.415625, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 7.4211328125, NA, 22.0175398633257, 15.4746428571429, 23.3950961538462,
30.5419391634981, 14.04304, 19.6216935483871, 22.0216071428571,
27.080625, 14.2634934497817, 21.9276315789474, NA, NA), cont_locgov_6 = c(22.5671875,
9.46875, 10.605, 24.0172058823529, 14.581875, 16.665, 23.331,
7.37105769230769, 27.9733928571429, 13.2339705882353, 15.415125,
17.6264423076923, 17.776, 24.9975, 26.7830357142857, 5.14654411764706,
22.22, 49.995, 15.6234375, 11.9035714285714, 14.960625, 14.581875,
24.9975, 17.7065625, 20.83125, NA, 11.11, 14.581875, 16.14421875,
11.3130548302872, 13.8875, 11.9035714285714, 11.804375, NA, 10.5809523809524,
NA, 9.05081896551724, 11.804375, 18.6446436285097, 17.443738317757,
24.431213592233, 21.7426171875, 14.4813103448276, 26.5405555555556,
28.1221875, 24.9975, 18.9083653846154, 17.3833189655172, 16.665,
15.83175), corr_locgov_3 = c(58.6608, 65.9354347826087, 38.784,
77.4851282051282, 63.8825, 68.1091304347826, 43.0189534883721,
40.4721428571429, 35.4803225806452, 35.5104672897196, 31.0313793103448,
37.8602912621359, 66.66, 69.0407142857143, 58.3275, 67.5219827586207,
71.4214285714286, 72.215, 60.7082142857143, 63.9391836734694,
65.5339864864865, 66.66, 53.328, 67.9419230769231, 61.61, 64.9594897959184,
75.9183333333333, 80.295, 61.8985714285714, 48.9951, 79.15875,
69.69, 59.3690625, 57.772, 65.145, 61.3973684210526, 70.5557142857143,
61.4521875, 61.9211374407583, 77.0149514563107, 58.0466292134831,
68.4714130434783, 62.7130263157895, 72.153956043956, 70.3633333333333,
74.0666666666667, 63.0459036144578, 56.6153424657534, 42.218,
53.6983333333333), corr_locgov_4 = c(60.969512195122, 61.1660439560439,
48.9198387096774, 61.8985714285714, 65.3614285714286, 58.58,
58.3275, 51.51, 52.2936206896552, 55.0259433962264, 57.2859375,
45.7318604651163, 76.2269444444444, 66.66, 74.5957142857143,
63.1084426229508, 76.659, 66.66, 77.77, 68.6205882352941, 71.2891666666667,
70.1684210526316, 82.1346428571429, 71.7876923076923, 68.4616216216216,
NA, 85.8888461538461, 75.548, 79.6216666666667, 63.7543076923077,
79.836976744186, 58.2164, 59.2533333333333, 72.0858139534884,
69.993, 51.8466666666667, 66.9804807692308, 59.2533333333333,
59.3242105263158, 68.217476635514, 56.8125, 66.1090909090909,
57.758198757764, 65.5612087912088, 63.5156603773585, 63.327,
70.8701052631579, 72.1016326530612, 54.16125, 47.6142857142857
), corr_locgov_5 = c(46.3555172413793, 59.021875, 50.3886614173228,
48.884, 46.1194186046512, 59.3690625, 64.1286075949367, 40.8942553191489,
50.8145901639344, 55.4651908396947, 63.7617391304348, 47.0541176470588,
61.2118269230769, 64.0961538461538, 59.1135849056604, 63.63,
65.0728571428571, 66.66, 62.9566666666667, 59.71625, 61.243875,
61.8985714285714, 63.4345161290323, 61.105, 63.63, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 68.8455737704918, NA, 55.6022823529412,
59.7799541284404, 51.8831527093596, 60.671015625, 58.0973204419889,
69.0307531380753, 69.0407142857143, 64.4003389830508, 62.216,
60.2810526315789, NA, NA), corr_locgov_6 = c(53.1196875, 47.4462352941176,
44.0568965517241, 48.1433333333333, 46.9251315789474, 54.9945,
50.2059493670886, 46.7273529411765, 43.632, 39.2553333333333,
45.143164556962, 47.2444660194175, 64.7554285714286, 58.3275,
52.3757142857143, 60.922868852459, 64.576875, 57.1371428571429,
61.8985714285714, 55.3278, 64.7247096774194, 51.2769230769231,
59.2533333333333, 60.2090322580645, 57.57, NA, 43.935, 66.66,
57.4655172413793, 65.191220338983, 66.66, 61.7716, 59.71625,
NA, 47.268, NA, 60.1472413793103, 61.3033928571429, 48.2550348027842,
47.0639698492462, 49.2543333333333, 46.662, 42.7940740740741,
57.4981990521327, 47.9952, 73.7631147540984, 42.5447647058824,
43.0450224215247, 66.66, 51.3106578947368), corr_taxoff_3 = c(28.7327586206897,
36.801875, 24.3218918918919, 16.665, 21.0123913043478, 33.33,
14.1162352941176, 16.3825423728814, 7.2215, 22.1161682242991,
11.8267741935484, 9.38417475728155, 65.0791304347826, 70.82625,
60.1389130434783, 60.2278947368421, 68.2471428571428, 66.66,
59.5178571428571, 63.63, 66.3975590551181, 68.175, 55.0669565217391,
65.4255555555555, 66.66, 63.9111340206186, 85.7057142857143,
84.84, 72.215, 55.1916129032258, 84.3052941176471, 64.6994117647059,
59.59, 57.1371428571429, 72.5853333333333, 68.3265, 75.7125925925926,
66.66, 48.039014084507, 65.7255140186916, 58.1423333333333, 52.247027027027,
56.0271165644172, 51.2769230769231, 60.5202631578947, 55.1226923076923,
53.0067469879518, 45.7219230769231, 48.48, 55.2028125), corr_taxoff_4 = c(26.1018072289157,
40.27375, 23.253488372093, 38.4974418604651, 48.940253164557,
36.2708823529412, 40.289010989011, 32.3777142857143, 31.0313793103448,
33.0185046728972, 40.6209375, 24.3421348314607, 79.4246808510638,
77.77, 75.7988709677419, 62.7388235294118, 74.5023529411765,
71.4214285714286, 66.66, 62.62, 70.30546875, 63.1515789473684,
84.5153571428571, 71.205, 64.64, NA, 83.325, 77.77, 74.9925,
61.1337823834197, 75.431052631579, 60.5067692307692, 59.6839534883721,
70.7246341463415, 76.588085106383, 59.2533333333333, 69.0407142857143,
61.3973684210526, 53.7329439252336, 60.4300934579439, 47.49525,
56.0053278688525, 48.388734939759, 46.8817582417582, 51.8466666666667,
54.8332258064516, 58.4170967741935, 60.5381632653061, 46.1492307692308,
36.36), corr_taxoff_5 = c(37.194347826087, 42.42, 40.7083969465649,
36.1970967741935, 40.3881176470588, 43.2055555555556, 44.2878082191781,
37.2923076923077, 43.9109523809524, 47.3070967741935, 57.4821739130435,
38.5582352941176, 65.3087837837838, 66.66, 64.619387755102, 65.7077142857143,
72.909375, 77.77, 64.9057894736842, 57.8889473684211, 65.5205128205128,
61.105, 59.994, 59.5178571428571, 70.3633333333333, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 66.66, NA, 50.0340281030445,
50.9123394495413, 41.5783333333333, 47.5957587548638, 51.2769230769231,
58.2524324324324, 56.5419642857143, 62.7715, 51.7452054794521,
41.2731308411215, NA, NA)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -50L), groups = structure(list(
NAME_1.y = c("Alibori", "Ashanti", "Atakora", "Atlantique",
"Bomet", "Borgou", "Brong Ahafo", "Bungoma", "Central", "Chobe",
"Collines", "Donga", "Eastern", "Francistown", "Gaborone",
"Ghanzi", "Greater Accra", "Jwaneng", "Kgalagadi", "Kgatleng",
"Kouffo", "Kweneng", "Littoral", "Lobatse", "Mono", "Mosteiros",
"North-East", "Northern", "Oueme", "Paúl", "Plateau", "Porto Novo",
"Praia", "Ribeira Grande", "Santa Catarina", "Santa Cruz",
"Sao Domingos", "Sao Filipe", "Sao Miguel", "Sao Vicente",
"Selibe Phikwe", "South-East", "Southern", "Tarrafal", "Upper East",
"Upper West", "Volta", "Western", "Zou"), .rows = structure(list(
1L, 39L, 2L, 3L, 49L, 4L, 40L, 50L, c(13L, 41L), 14L,
5L, 6L, 42L, 15L, 16L, 17L, 43L, 18L, 19L, 20L, 7L, 21L,
8L, 22L, 9L, 27L, 23L, 44L, 10L, 28L, 11L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 24L, 25L, 26L, 38L,
45L, 46L, 47L, 48L, 12L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -49L), .drop = TRUE))
For my actual dataset I use:
vars <- names(df)
num<- seq(3, length(vars)-1,1)
I start the sequence at 3, because I have two ID variables (that are strings).
for (i in num ) {
df[paste0("d_", vars[i],"_",vars[i+1])]<-(df[,i+1]/df[,i]-1)
}
The issue now is that I also calculate the change between the last period of one variable and the first period of the next variable, so I have to delete those columns with the combination of the two periods (in my case 3 & 6):
df <- df %>% select(-c(contains("3") & contains("6")))