My dataframe looks like this
dput(head(catch,70))
structure(list(fldFinalLength = c(NA, 260L, NA, NA, 460L, NA,
630L, 1030L, 820L, NA, NA, NA, 710L, 850L, 250L, 380L, 290L,
NA, NA, 320L, 270L, 740L, NA, 370L, NA, 590L, NA, 510L, NA, 470L,
340L, NA, NA, NA, 450L, 670L, NA, NA, NA, NA, 680L, 690L, NA,
270L, 370L, 300L, NA, NA, NA, 450L, 280L, 460L, NA, NA, 370L,
410L, NA, 760L, 650L, 280L, 550L, NA, 550L, 320L, NA, NA, NA,
NA, 630L, 940L), fldInitialLength = c(NA, 220L, NA, NA, 460L,
NA, 630L, 220L, 670L, NA, NA, NA, 170L, 120L, 250L, 250L, 230L,
NA, NA, 260L, 190L, 470L, NA, 290L, NA, 590L, NA, 160L, NA, 270L,
310L, NA, NA, NA, 420L, 490L, NA, NA, NA, NA, 250L, 170L, NA,
110L, 260L, 190L, NA, NA, NA, 220L, 260L, 230L, NA, NA, 250L,
410L, NA, 760L, 650L, 280L, 550L, NA, 290L, 320L, NA, NA, NA,
NA, 630L, 940L), fldCatchWeight = c(0.73, 0.672, 61.3, 0.298,
1.024, 0.206, 1.47, 11.21, 8.06, 0.412, 2.894, 0.674, 67.32,
34.683, 0.252, 1.774, 0.626, 2.6, 0.34, 1.272, 0.332, 12.12,
0.014, 1.672, 0.358, 1.53, 0.256, 1.534, 0.162, 6.31, 0.708,
0.474, 266.23, 0.796, 1.642, 4.35, 1.298, 0.114, 13.86, 20.5,
63.546, 39.07, 0.686, 1.222, 2.338, 1.244, 9.18, 4.062, 0.428,
3.692, 0.28, 2.23, 0.182, 0.052, 1.252, 0.614, 1.88, 3.63, 0.934,
0.244, 2.55, 0.136, 13.784, 0.182, 0.27, 1.538, 0.116, 0.012,
1.712, 3.8), fldMeasuringInterval = c(NA, 10L, NA, NA, 10L, NA,
10L, 10L, 10L, NA, NA, NA, 10L, 10L, 10L, 10L, 10L, NA, NA, 10L,
10L, 10L, NA, 10L, NA, 10L, NA, 10L, NA, 10L, 10L, NA, NA, NA,
10L, 10L, NA, NA, NA, NA, 10L, 10L, NA, 10L, 10L, 10L, NA, NA,
NA, 10L, 10L, 10L, NA, NA, 10L, 10L, NA, 10L, 10L, 10L, 10L,
NA, 10L, 10L, NA, NA, NA, NA, 10L, 10L), fldMeasuringOperator = c(NA,
"DFE", NA, NA, "DFE", NA, "DFE", "DFE", "DFE", NA, NA, NA, "DFE",
"DFE", "DFE", "DFE", "DFE", NA, NA, "DFE", "DFE", "DFE", NA,
"DFE", NA, "DFE", NA, "DFE", NA, "DFE", "DFE", NA, NA, NA, "MSL",
"MSL", NA, NA, NA, NA, "MSL", "MSL", NA, "MSL", "MSL", "MSL",
NA, NA, NA, "MSL", "MSL", "MSL", NA, NA, "MSL", "MSL", NA, "MSL",
"MSL", "MSL", "MSL", NA, "MSL", "MSL", NA, NA, NA, NA, "MSL",
"MSL"), fldCruiseStationNumber = c(59L, 59L, 59L, 59L, 59L, 59L,
59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L,
59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 59L, 60L,
60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L,
60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L,
60L, 60L, 60L, 60L, 60L, 60L, 61L, 61L, 61L, 61L, 61L, 61L),
fldMainSpeciesCode = c("BEN", "BIB", "BOF", "CDT", "COD",
"CTC", "CUR", "DGS", "DGS", "EDC", "GUG", "GUR", "HAD", "HKE",
"JOD", "LEM", "LEM", "LSD", "LSS", "MEG", "MEG", "MON", "PLA",
"PLE", "POD", "SDR", "TUB", "WAF", "WHB", "WHG", "WIT", "BEN",
"BOF", "CDT", "COD", "CUR", "EDC", "GFB", "GUG", "GUR", "HAD",
"HKE", "HOM", "JOD", "LEM", "LEM", "LSD", "LSS", "MAC", "MEG",
"MEG", "MON", "MUR", "NSQ", "PLE", "PLE", "POD", "POL", "SMH",
"TBR", "WAF", "WHB", "WHG", "WIT", "BEN", "BOF", "CDT", "CTC",
"CUR", "DGS")), row.names = c(NA, 70L), class = "data.frame")
I want to subset my data based on two columns:
fldCruiseStationNumber and select the rows with 59 and 61
fldMainSpeciesCode and select the rows MEG, MON, PLE and WHG
How can I do this subset in one go?
Any help is much appreciated.
R accepts a logical vector to subset. It will take the values that are TRUE, and discard the ones that are FALSE.
So you can compound your requirements in a logical statement:
isGood <- catch$fldCruiseStationNumber %in% c(59,61) &
catch$fldMainSpeciesCode %in% c( "MEG", "MON", "PLE", "WHG")
catch <- catch[isGood,]
Note the &
which is the logical "and" in R.