Search code examples
rdplyrdata-manipulation

Manipulating Single Values in R to Column values


I have imported some data provided by someone else from Excel. It's pretty messy, so I'm trying to get it into shape for analysis, but the format of the code is making it difficult. Here is a minimal example of the data as it is:

Contraption 1
Attempt 1                           
#s      AX          AY      AZ      Distance
3->6    162.4       17.3    175.6   9.1
3->11   67.3        5.3     67.3    32.7
Contraption 1
Attempt 2                       
#s      AX          AY      AZ      Distance
3->6    162.4       17.3    175.6   9.1
3->11   67.3        5.3     67.3    32.7
Contraption 2
Attempt 1                           
#s      AX          AY      AZ      Distance
3->6    162.4       17.3    175.6   9.1
3->11   67.3        5.3     67.3    32.7

What I'd like to get is:

#s      AX          AY      AZ      Distance    Contraption    Attempt
3->6    162.4       17.3    175.6   9.1         1              1
3->11   67.3        5.3     67.3    32.7        1              1
3->6    162.4       17.3    175.6   9.1         1              2
3->11   67.3        5.3     67.3    32.7        1              2
3->6    162.4       17.3    175.6   9.1         2              1
3->11   67.3        5.3     67.3    32.7        2              1

I can obviously just go by row numbers and create a list and then reassign the values to a new column, but I'd like to try to find a way for it to be repeatable with new data, as I've got quite a lot of it and will probably get updates.

Also, ideally, a dplyr solution is preferred.

Thanks in advance!

EDIT: Here is a slightly modified dput:

structure(list(` #s` = c("GROUND TRUTH", " #s", "3->6",
                              "3->11", "3->14", "6->11", "6->14", "11->14", "Contraption 1",
                              "Attempt 1", " #s", "3->6", "3->11",
                              "3->14", "6->11", "6->14", "11->14", "Attempt 2",
                              " #s", "3->6"), AX = c(NA, "AX", "162.4435258", "67.325616600000004",
                                                          "97.847449400000002", "115.628574", "76.795228199999997", "164.19540980000002",
                                                          NA, NA, "AX", "17.729424000000002", "45.376750999999999", "20.891473000000001",
                                                          "50.795566999999998", "7.6219530000000004", "58.288466999999997",
                                                          NA, "AX", "160.67087599999999"), AY = c(NA, "AY", "17.371937600000003",
                                                                                                  "5.3626663999999993", "17.839726800000001", "12.1478062", "5.5127848000000004",
                                                                                                  "13.4053042", NA, NA, "AY", "17.269538000000001", "5.4522810000000002",
                                                                                                  "17.783394999999999", "12.026367", "6.0820509999999999", "13.563755",
                                                                                                  NA, "AY", "17.594677000000001"), AZ = c(NA, "AZ", "175.6576848",
                                                                                                                                          "67.382918000000004", "99.333905000000001", "116.01154019999998",
                                                                                                                                          "76.799018200000006", "162.97033279999999", NA, NA, "AZ", "6.0498320000000003",
                                                                                                                                          "45.120047999999997", "15.705375999999999", "50.471809999999998",
                                                                                                                                          "9.6571890000000007", "59.333387999999999", NA, "AZ", "167.606852"
), DX = c(NA, "DX", "9.1008000000000013", "32.729599999999998",
          "47.664360000000002", "23.628800000000002", "38.563539999999996",
          "14.93474", NA, NA, "DX", "-11.773400000000001", "-35.472099999999998",
          "-48.059600000000003", "-23.698699999999999", "-36.286200000000001",
          "-12.5875", NA, "DX", "-11.7559"), DY = c(NA, "DY", "-1.2008800000000002",
                                                    "-0.66572000000000009", "1.0446199999999999", "0.53513999999999995",
                                                    "2.2454800000000001", "1.71034", NA, NA, "DY", "-19.358799999999999",
                                                    "-18.635200000000001", "0.52759999999999996", "0.72360000000000002",
                                                    "19.886500000000002", "19.1629", NA, "DY", "-19.334599999999998"
), DZ = c(NA, "DZ", "-20.673919999999999", "-23.138440000000003",
          "-5.8264999999999993", "-2.4645199999999998", "14.847440000000001",
          "17.31194", NA, NA, "DZ", "1.09E-2", "-1.9400000000000001E-2",
          "-1.24E-2", "-3.04E-2", "-2.3400000000000001E-2", "7.0000000000000001E-3",
          NA, "DZ", "-9.7000000000000003E-3"), Distance = c(NA, "Distance",
                                                            "22.6203", "40.088119999999989", "48.030499999999996", "23.762999999999998",
                                                            "41.383979999999994", "22.927579999999999", NA, NA, "Distance",
                                                            "22.657900000000001", "40.069200000000002", "48.0625", "23.709700000000002",
                                                            "41.3782", "22.927299999999999", NA, "Distance", "22.628")), row.names = c(NA,
                                                                                                                                       -20L), class = c("tbl_df", "tbl", "data.frame"))

Solution

  • # I had to add NAs into it to make read.table() recognise it
    df <- read.table(text="
    Contraption 1 NA NA NA
    Attempt 1 NA NA NA                     
    #s      AX          AY      AZ      Distance
    3->6    162.4       17.3    175.6   9.1
    3->11   67.3        5.3     67.3    32.7
    Contraption 1 NA NA NA
    Attempt 2 NA NA NA                
    #s      AX          AY      AZ      Distance
    3->6    162.4       17.3    175.6   9.1
    3->11   67.3        5.3     67.3    32.7
    Contraption 2 NA NA NA
    Attempt 1 NA NA NA                          
    #s      AX          AY      AZ      Distance
    3->6    162.4       17.3    175.6   9.1
    3->11   67.3        5.3     67.3    32.7", header=F)
    
    df %>%
      mutate(Contraption = ifelse(V1 == "Contraption", V2, NA),
              Attempt = ifelse(V1 == "Attempt", V2, NA),
              Group = rep(1:3, each=4)) %>%
              group_by(Group) %>%
              mutate(Contraption = Contraption[1],
                     Attempt = Attempt[2]) %>%
              filter(!is.na(V3)) %>%
              ungroup() %>%
              select("#s" = V1, AX = V2, AY = V3, AZ = V4, Distance = V5, Contraption, Attempt)
    
    # A tibble: 6 × 7
      `#s`     AX    AY    AZ Distance Contraption Attempt
      <chr> <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>
    1 3->6  162.   17.3 176.       9.1           1       1
    2 3->11  67.3   5.3  67.3     32.7           1       1
    3 3->6  162.   17.3 176.       9.1           1       2
    4 3->11  67.3   5.3  67.3     32.7           1       2
    5 3->6  162.   17.3 176.       9.1           2       1
    6 3->11  67.3   5.3  67.3     32.7           2       1
    

    Let me know if you have any questions/thoughts/things that could be improved!