I have two vectors:
y
[1] 54.36 70.43 36.49 39.59 15.06 34.60 0.24 24.00 20.22 24.73 29.86 49.30 37.36
[14] 29.68 37.64 7.99 22.42 28.99 3.09 63.92 17.72 9.51 13.06 3.83 51.91 1.79
[27] 16.86 50.74 28.41 20.03 24.24 33.70 7.70 24.83 53.98 -8.74 30.14 20.80 12.10
[40] 38.31 35.35 33.96 25.07 44.86 18.73 36.66 19.57 30.62 -0.65 21.66 54.59 35.28
[53] 27.83 48.27 17.99
z
[1] 53.0 17.5 48.5 40.0 46.0 38.0 14.5 19.5 37.5 5.5 16.0 25.0 11.0
[14] 17.5 14.0 21.0 22.5 0.0 3.0 18.5 37.0 53.0 33.0 47.0 18.5 22.0
[27] 14.5 48.5 48.0 26.0 28.0 56.5 15.0 29.5 7.5 35.0 7.0 25.5 21.0
[40] 8.5 53.0 51.5 9.0 15.0 20.5 13.0 23.0 15.0 18.0 38.5 7.0 17.5
[53] 35.0 29.5 16.5 34.5 27.5 18.5 18.5 24.0 24.0 30.5 28.0 27.0 15.5
[66] 12.5 23.0 22.0 45.0 12.0 19.0 10.0 85.5 16.0 34.5 43.0 27.0 13.0
[79] 22.5 30.5 22.0 59.0 21.0 7.5 23.0 10.5 16.0 20.0 62.0 13.0 64.0
[92] 52.0 18.5 33.0 0.0 25.0 11.0 16.5 17.0 48.0 30.5 21.5 18.0 19.5
[105] 11.0 95.0 38.0 17.5 42.5 7.0 48.0 38.0 23.5 16.5 7.5 51.0 14.5
[118] 20.5 23.5 8.0 46.0 45.0 64.0 75.0 35.0 10.0 10.0 10.5 12.0 12.0
[131] 13.0 15.5 39.0 29.5 3.0 13.0 25.0 5.0 0.0 29.0 28.0 7.5 14.0
[144] 26.5 19.5 62.0 23.0 8.5 31.5 23.5 26.0 11.0 18.5 28.0 31.0 42.0
[157] 57.0 54.0 10.0 12.5 13.5 11.0 8.0 35.5 60.0 18.0 101.5 15.0 21.5
[170] 9.5 17.5 18.0 16.0 28.5 35.0 47.0 26.0 50.5 13.0 18.5 14.0 18.5
[183] 27.0 33.0 28.5 24.5 34.5 9.0 9.0 53.5 15.0 14.0 18.0 16.5 27.0
[196] 11.5 14.0 27.0 10.0 46.0 0.0 18.0 27.5 67.5 22.5 12.5 26.0 24.5
[209] 0.0 22.0 12.0 30.5 23.5 2.5 15.5 34.5 50.0 62.0 5.0 9.5 11.0
[222] 10.5 6.5 23.0 12.5 18.0 17.5 31.5 42.5 15.0 54.5 48.5 9.5 16.5
[235] 18.0 42.0 49.0 4.0 42.0 47.0 13.5 28.0 8.0 43.0 8.5 10.0 26.5
[248] 13.5 28.0 37.0 23.5 10.5 45.0 15.0 10.0 35.0 36.0 20.5 17.0 22.5
[261] 64.5 21.0 25.0 8.0 12.5 21.5 24.0 8.5 27.0 70.0 34.0 31.5 12.0
[274] 5.5 36.0 28.0 6.5 14.5 18.5 21.5 20.0 23.5 20.0 27.0 19.0 17.0
[287] 10.0 50.0 36.0 8.0 22.0 58.5 30.0 19.5 0.0 22.0 0.0 7.5 0.0
[300] 59.0 13.0 4.5 19.0 14.0 0.0 17.5 19.5
I would select from the second subset z those approaching (meaning not being exactly the same) as much as possible within the y vector.
Which function is suggested in this case?
You could create a distance matrix using outer
, then subset by a tol
erance. To find the best value, we optimize
the trade-off between low tolerance and length of result, in an interval between 0
and 1
.
f <- \(a) {
r <- z[rowSums(abs(abs(outer(z, y, `-`))) < abs(y*a)) > 0]
a + length(r)/length(z)
}
(tol <- optimize(f, c(0, 1))$objective)
# [1] 0.00983826
(z1 <- z[rowSums(abs(abs(outer(z, y, `-`))) < abs(y*tol)) > 0])
# [1] 17.5 38.0 19.5 37.5 25.0 22.5 3.0 18.5 37.0 18.5 22.0 48.0 28.0 15.0
# [15] 29.5 7.5 35.0 21.0 8.5 13.0 15.0 18.0 17.5 35.0 29.5 34.5 18.5 24.0
# [29] 24.0 30.5 15.5 22.0 45.0 12.0 34.5 13.0 22.5 30.5 22.0 21.0 7.5 20.0
# [43] 64.0 52.0 0.0 25.0 17.0 30.5 21.5 18.0 19.5 38.0 38.0 20.5 8.0 45.0
# [57] 35.0 10.0 12.0 13.0 3.0 13.0 25.0 0.0 29.0 28.0 7.5 19.5 28.0 54.0
# [71] 12.5 8.0 35.5 18.0 15.0 21.5 9.5 17.5 18.0 28.5 35.0 50.5 13.0 18.5
# [85] 24.5 34.5 9.0 15.0 18.0 18.0 27.5 22.5 24.5 12.0 30.5 2.5 34.5 9.5
# [99] 18.0 17.5 15.0 54.5 48.5 9.5 18.0 8.0 28.0 37.0 23.5 45.0 15.0 35.0
# [113] 20.5 17.0 22.5 25.0 8.0 12.5 24.0 34.0 12.0 28.0 18.5 20.0 20.0 17.0
# [127] 8.0 22.0 30.0 19.5 7.5 0.0 13.0 19.0 0.0 17.5
Visualization:
stripchart(z, pch='|', col=8, axes=FALSE, panel.first=abline(h=.5, col=8))
axis(1, axTicks(1), lwd=0, lwd.ticks=1)
abline(v=y, col=2, lty=3)
stripchart(z1, pch='|', col=2, add=TRUE)
legend(80, 2.5, horiz=TRUE, bty='n', xpd=TRUE, lty=c(1, 3, 1), col=c(8, 2, 2),
legend=c('z', 'y', bquote(z[1])))
Data:
y <- c(54.36, 70.43, 36.49, 39.59, 15.06, 34.6, 0.24, 24, 20.22, 24.73,
29.86, 49.3, 37.36, 29.68, 37.64, 7.99, 22.42, 28.99, 3.09, 63.92,
17.72, 9.51, 13.06, 3.83, 51.91, 1.79, 16.86, 50.74, 28.41, 20.03,
24.24, 33.7, 7.7, 24.83, 53.98, -8.74, 30.14, 20.8, 12.1, 38.31,
35.35, 33.96, 25.07, 44.86, 18.73, 36.66, 19.57, 30.62, -0.65,
21.66, 54.59, 35.28, 27.83, 48.27, 17.99)
z <- c(53, 17.5, 48.5, 40, 46, 38, 14.5, 19.5, 37.5, 5.5, 16, 25,
11, 17.5, 14, 21, 22.5, 0, 3, 18.5, 37, 53, 33, 47, 18.5, 22,
14.5, 48.5, 48, 26, 28, 56.5, 15, 29.5, 7.5, 35, 7, 25.5, 21,
8.5, 53, 51.5, 9, 15, 20.5, 13, 23, 15, 18, 38.5, 7, 17.5, 35,
29.5, 16.5, 34.5, 27.5, 18.5, 18.5, 24, 24, 30.5, 28, 27, 15.5,
12.5, 23, 22, 45, 12, 19, 10, 85.5, 16, 34.5, 43, 27, 13, 22.5,
30.5, 22, 59, 21, 7.5, 23, 10.5, 16, 20, 62, 13, 64, 52, 18.5,
33, 0, 25, 11, 16.5, 17, 48, 30.5, 21.5, 18, 19.5, 11, 95, 38,
17.5, 42.5, 7, 48, 38, 23.5, 16.5, 7.5, 51, 14.5, 20.5, 23.5,
8, 46, 45, 64, 75, 35, 10, 10, 10.5, 12, 12, 13, 15.5, 39, 29.5,
3, 13, 25, 5, 0, 29, 28, 7.5, 14, 26.5, 19.5, 62, 23, 8.5, 31.5,
23.5, 26, 11, 18.5, 28, 31, 42, 57, 54, 10, 12.5, 13.5, 11, 8,
35.5, 60, 18, 101.5, 15, 21.5, 9.5, 17.5, 18, 16, 28.5, 35, 47,
26, 50.5, 13, 18.5, 14, 18.5, 27, 33, 28.5, 24.5, 34.5, 9, 9,
53.5, 15, 14, 18, 16.5, 27, 11.5, 14, 27, 10, 46, 0, 18, 27.5,
67.5, 22.5, 12.5, 26, 24.5, 0, 22, 12, 30.5, 23.5, 2.5, 15.5,
34.5, 50, 62, 5, 9.5, 11, 10.5, 6.5, 23, 12.5, 18, 17.5, 31.5,
42.5, 15, 54.5, 48.5, 9.5, 16.5, 18, 42, 49, 4, 42, 47, 13.5,
28, 8, 43, 8.5, 10, 26.5, 13.5, 28, 37, 23.5, 10.5, 45, 15, 10,
35, 36, 20.5, 17, 22.5, 64.5, 21, 25, 8, 12.5, 21.5, 24, 8.5,
27, 70, 34, 31.5, 12, 5.5, 36, 28, 6.5, 14.5, 18.5, 21.5, 20,
23.5, 20, 27, 19, 17, 10, 50, 36, 8, 22, 58.5, 30, 19.5, 0, 22,
0, 7.5, 0, 59, 13, 4.5, 19, 14, 0, 17.5, 19.5)