I have a vector of 100 integers, x1, where each element of the vector is in [1, 7]. I want to find, using R, another vector of 100 elements, x2, also containing integers ranging from 1 to 7, that minimizes the following function:
abs(sum(x2 - x1 > 1) - 50)
In other words, I want as close to 50% of the x2 values to be larger than the x1 values by more than 1. There are no other constraints, except that x2 must be between 1 and 7.
Here is the data for x1:
> dput(x1)
c(3L, 4L, 2L, 5L, 1L, 3L, 2L, 5L, 2L, 2L, 1L, 2L, 2L, 2L, 3L,
5L, 3L, 3L, 3L, 1L, 5L, 2L, 2L, 7L, 2L, 4L, 2L, 2L, 3L, 3L, 1L,
5L, 2L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L,
2L, 3L, 4L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 4L, 5L, 3L, 2L, 3L, 3L,
3L, 4L, 2L, 4L, 4L, 4L, 2L, 4L, 2L, 4L, 3L, 2L, 3L, 3L, 2L, 3L,
3L, 3L, 2L, 2L, 4L, 3L, 2L, 6L, 5L, 5L, 3L, 3L, 2L, 3L, 2L, 3L,
3L, 4L, 3L, 2L, 2L)
Simulate samples x2
and find the function's minimum, which is zero.
x1 <- c(3L, 4L, 2L, 5L, 1L, 3L, 2L, 5L, 2L, 2L, 1L, 2L, 2L, 2L, 3L,
5L, 3L, 3L, 3L, 1L, 5L, 2L, 2L, 7L, 2L, 4L, 2L, 2L, 3L, 3L, 1L,
5L, 2L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L,
2L, 3L, 4L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 4L, 5L, 3L, 2L, 3L, 3L,
3L, 4L, 2L, 4L, 4L, 4L, 2L, 4L, 2L, 4L, 3L, 2L, 3L, 3L, 2L, 3L,
3L, 3L, 2L, 2L, 4L, 3L, 2L, 6L, 5L, 5L, 3L, 3L, 2L, 3L, 2L, 3L,
3L, 4L, 3L, 2L, 2L)
fobj <- function(x, y, const = 50L) abs(sum( (y - x) > 1L) - const)
R <- 1000L
mat <- replicate(R, sample(7, length(x1), TRUE))
vec <- apply(mat, 2, fobj, x = x1)
i_min <- which(vec == min(vec))
# any of these columns is a solution to the problem
# mat[, i_min]
# show a few solutions
mat[, head(i_min)]
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 6 7 5 4 4 4
#> [2,] 1 5 6 3 6 2
#> [3,] 7 1 2 4 3 2
#> [4,] 3 7 5 5 2 7
#> [5,] 4 4 7 6 4 7
#> [6,] 2 1 2 5 1 1
#> [7,] 5 5 6 6 2 2
#> [8,] 7 7 2 6 7 6
#> [9,] 6 6 7 6 4 2
#> [10,] 7 1 4 6 4 1
#> [11,] 2 3 5 6 7 2
#> [12,] 6 3 4 4 7 6
#> [13,] 6 1 6 5 2 3
#> [14,] 4 7 7 1 4 3
#> [15,] 5 6 7 2 1 2
#> [16,] 5 1 6 7 4 1
#> [17,] 3 4 2 5 4 3
#> [18,] 7 1 5 7 7 3
#> [19,] 3 1 4 5 7 7
#> [20,] 1 5 7 4 4 5
#> [21,] 4 4 2 1 6 5
#> [22,] 3 5 2 5 2 7
#> [23,] 2 5 6 6 1 7
#> [24,] 3 1 5 3 1 6
#> [25,] 7 1 7 2 2 2
#> [26,] 1 3 4 7 3 6
#> [27,] 4 5 1 6 7 1
#> [28,] 6 4 4 5 7 1
#> [29,] 1 5 2 1 1 7
#> [30,] 5 3 7 3 7 3
#> [31,] 6 7 1 4 4 2
#> [32,] 2 6 2 3 5 4
#> [33,] 5 6 5 7 5 6
#> [34,] 6 7 7 2 2 6
#> [35,] 1 4 7 7 6 1
#> [36,] 5 4 6 3 1 7
#> [37,] 7 4 1 1 4 6
#> [38,] 7 1 7 7 4 5
#> [39,] 4 1 4 7 5 7
#> [40,] 7 5 2 4 7 3
#> [41,] 2 3 7 3 4 6
#> [42,] 3 6 4 4 7 7
#> [43,] 2 2 1 7 4 6
#> [44,] 7 7 3 2 5 7
#> [45,] 3 1 5 3 5 4
#> [46,] 7 5 5 3 4 6
#> [47,] 4 4 2 5 5 7
#> [48,] 4 3 4 6 2 4
#> [49,] 4 4 6 7 6 5
#> [50,] 5 6 2 1 4 2
#> [51,] 6 5 1 4 3 1
#> [52,] 1 5 4 3 3 3
#> [53,] 6 6 7 3 7 2
#> [54,] 5 6 7 4 7 5
#> [55,] 2 6 6 7 4 5
#> [56,] 2 3 4 7 2 5
#> [57,] 4 3 7 3 5 5
#> [58,] 7 2 3 5 7 5
#> [59,] 1 4 1 2 5 6
#> [60,] 1 2 6 3 3 6
#> [61,] 3 2 4 2 2 5
#> [62,] 6 5 7 6 5 7
#> [63,] 5 3 1 1 7 7
#> [64,] 2 1 1 4 6 5
#> [65,] 6 7 2 2 7 5
#> [66,] 2 3 1 5 4 6
#> [67,] 5 2 3 1 4 3
#> [68,] 2 6 1 5 3 2
#> [69,] 2 4 4 3 6 1
#> [70,] 6 6 2 3 6 5
#> [71,] 3 4 5 3 5 5
#> [72,] 7 4 4 6 7 6
#> [73,] 6 6 5 3 6 1
#> [74,] 3 6 4 5 5 3
#> [75,] 4 3 3 3 7 4
#> [76,] 7 6 6 2 4 7
#> [77,] 5 7 2 5 1 5
#> [78,] 4 4 6 4 2 3
#> [79,] 7 7 1 6 6 5
#> [80,] 4 1 6 1 4 5
#> [81,] 1 6 5 3 5 4
#> [82,] 7 5 3 6 3 2
#> [83,] 5 5 1 5 5 5
#> [84,] 3 4 3 7 2 6
#> [85,] 1 4 6 3 6 3
#> [86,] 7 1 7 5 2 6
#> [87,] 2 4 6 6 2 7
#> [88,] 5 7 6 5 5 5
#> [89,] 1 2 7 7 3 2
#> [90,] 6 7 4 2 2 1
#> [91,] 1 5 2 3 5 6
#> [92,] 5 3 7 7 2 5
#> [93,] 3 5 7 5 2 7
#> [94,] 6 1 5 5 3 6
#> [95,] 6 3 6 7 3 6
#> [96,] 4 7 7 7 2 4
#> [97,] 2 7 6 3 7 7
#> [98,] 5 2 4 1 6 2
#> [99,] 5 2 1 7 6 3
#> [100,] 3 7 3 6 5 7
tbl <- table(vec)
# around 3% of the samples is a solution
proportions(table(vec))
#> vec
#> 0 1 2 3 4 5 6 7 8 9 10 11 12
#> 0.036 0.075 0.054 0.059 0.090 0.081 0.079 0.079 0.088 0.076 0.073 0.059 0.041
#> 13 14 15 16 17 18 19 20 21
#> 0.035 0.030 0.016 0.008 0.008 0.001 0.007 0.004 0.001
# plot the results, the bar we want is the first
barplot(tbl)
Created on 2023-07-21 with reprex v2.0.2