I got a list of nodes, and I need to randomly assign 'p' hubs to 'n' clients.
I got the following data, where the first row shows:
The following lines show:
Below I will show the raw data, adding colnames()
it would look something like this:
total_nodes hubs_required total_capacity
50 5 120
node number x_coordinate y_coordinate node_demand
1 2 62 3
2 80 25 14
3 36 88 1
4 57 23 14
. . . .
. . . .
. . . .
50 1 58 2
The x and y values are provided so we can calculate the Euclidean distance.
nodes:
50 5 120
1 2 62 3
2 80 25 14
3 36 88 1
4 57 23 14
5 33 17 19
6 76 43 2
7 77 85 14
8 94 6 6
9 89 11 7
10 59 72 6
11 39 82 10
12 87 24 18
13 44 76 3
14 2 83 6
15 19 43 20
16 5 27 4
17 58 72 14
18 14 50 11
19 43 18 19
20 87 7 15
21 11 56 15
22 31 16 4
23 51 94 13
24 55 13 13
25 84 57 5
26 12 2 16
27 53 33 3
28 53 10 7
29 33 32 14
30 69 67 17
31 43 5 3
32 10 75 3
33 8 26 12
34 3 1 14
35 96 22 20
36 6 48 13
37 59 22 10
38 66 69 9
39 22 50 6
40 75 21 18
41 4 81 7
42 41 97 20
43 92 34 9
44 12 64 1
45 60 84 8
46 35 100 5
47 38 2 1
48 9 9 7
49 54 59 9
50 1 58 2
I extracted the information from the first line.
nodes <- as.matrix(read.table(data))
header<-colnames(nodes)
clean_header <-gsub('X','',header)
requested_hubs <- as.numeric(clean_header[2])
max_supply_capacity <- as.numeric(clean_header[3])
I need to randomly select 5 nodes, that will act as hubs
set.seed(37)
node_to_hub <-nodes[sample(nrow(nodes),requested_hubs,replace = FALSE),]
Then randomly I need to assign nodes to each hub calculate the distances between the hub and each one of the nodes and when the max_supply_capacity
(120) is exceeded select the following hub and repeat the process.
After the final iteration I need to return the cumulative sum of distances for all the hubs.
I need to repeat this process 100 times and return the min()
value of the cumulative sum of distances.
This is where I'm completely stuck since I'm not sure how to loop through a matrix let alone when I have to select elements randomly.
I got the following elements:
capacity <- c(numeric()) # needs to be <= to 120
distance_sum <- c(numeric())
global_hub_distance <- c(numeric())
The formula for the euclidean distance (rounded) would be as below but I'm not sure how I can reflect the random selection when assigning nodes.
distance <-round(sqrt(((node_to_hub[i,2]-nodes[i,2]))^2+(node_to_hub[random,3]-nodes[random,3])^2))
The idea for the loop I think I need is below, but as I mentioned before I don't know how to deal with the sample client selection, and the distance calculation of the random clients.
for(i in 1:100){
node_to_hub
for(i in 1:nrow(node_to_hub){
#Should I randomly sample the clients here???
while(capacity < 120){
node_demand <- nodes[**random**,3]
distance <-round(sqrt(((node_to_hub[i,2]-nodes[i,2]))^2+(node_to_hub[**random**,3]-nodes[**random**,3])^2))
capacity <-c(capacity, node_demand)
distance_sum <- c(distance_sum,distance)
}
global_hub_distance <- c(global_hub_distance,distance_sum)
capacity <- 0
distance_sum <- 0
}
min(global_hub_distance)
}
Not EXACTLY sure what you are looking for but this code may be able to help you. It's not extremely fast, as instead of using a while to stop after hitting your total_capacity it just does a cumsum on the full node list and find the place where you exceed 120.
nodes <- structure(list(node_number = 1:50,
x = c(2L, 80L, 36L, 57L, 33L, 76L, 77L, 94L,
89L, 59L, 39L, 87L, 44L, 2L, 19L, 5L,
58L, 14L, 43L, 87L, 11L, 31L, 51L, 55L,
84L, 12L, 53L, 53L, 33L, 69L, 43L, 10L,
8L, 3L, 96L, 6L, 59L, 66L, 22L, 75L, 4L,
41L, 92L, 12L, 60L, 35L, 38L, 9L, 54L, 1L),
y = c(62L, 25L, 88L, 23L, 17L, 43L, 85L, 6L, 11L,
72L, 82L, 24L, 76L, 83L, 43L, 27L, 72L, 50L,
18L, 7L, 56L, 16L, 94L, 13L, 57L, 2L, 33L, 10L,
32L, 67L, 5L, 75L, 26L, 1L, 22L, 48L, 22L, 69L,
50L, 21L, 81L, 97L, 34L, 64L, 84L, 100L, 2L, 9L, 59L, 58L),
node_demand = c(3L, 14L, 1L, 14L, 19L, 2L, 14L, 6L,
7L, 6L, 10L, 18L, 3L, 6L, 20L, 4L,
14L, 11L, 19L, 15L, 15L, 4L, 13L,
13L, 5L, 16L, 3L, 7L, 14L, 17L,
3L, 3L, 12L, 14L, 20L, 13L, 10L,
9L, 6L, 18L, 7L, 20L, 9L, 1L, 8L,
5L, 1L, 7L, 9L, 2L)),
.Names = c("node_number", "x", "y", "node_demand"),
class = "data.frame", row.names = c(NA, -50L))
total_nodes = nrow(nodes)
hubs_required = 5
total_capacity = 120
iterations <- 100
track_sums <- matrix(NA, nrow = iterations, ncol = hubs_required)
colnames(track_sums) <- paste0("demand_at_hub",1:hubs_required)
And then I prefer using a function for distance, in this case A and B are 2 separate vectors with c(x,y) and c(x,y).
euc.dist <- function(A, B) round(sqrt(sum((A - B) ^ 2))) # distances
The Loop:
for(i in 1:iterations){
# random hub selection
hubs <- nodes[sample(1:total_nodes, hubs_required, replace = FALSE),]
for(h in 1:hubs_required){
# sample the nodes into a random order
random_nodes <- nodes[sample(1:nrow(nodes), size = nrow(nodes), replace = FALSE),]
# cumulative sum their demand, and get which number passes 120,
# and subtract 1 to get the node before that
last <- which(cumsum(random_nodes$node_demand) > total_capacity) [1] - 1
# get sum of all distances to those nodes (1 though the last)
all_distances <- apply(random_nodes[1:last,], 1, function(rn) {
euc.dist(A = hubs[h,c("x","y")],
B = rn[c("x","y")])
})
track_sums[i,h] <- sum(all_distances)
}
}
min(rowSums(track_sums))
EDIT
as a function:
hubnode <- function(nodes, hubs_required = 5, total_capacity = 120, iterations = 10){
# initialize results matrices
track_sums <- node_count <- matrix(NA, nrow = iterations, ncol = hubs_required)
colnames(track_sums) <- paste0("demand_at_hub",1:hubs_required)
colnames(node_count) <- paste0("nodes_at_hub",1:hubs_required)
# user defined distance function (only exists wihtin hubnode() function)
euc.dist <- function(A, B) round(sqrt(sum((A - B) ^ 2)))
for(i in 1:iterations){
# random hub selection
assigned_hubs <- sample(1:nrow(nodes), hubs_required, replace = FALSE)
hubs <- nodes[assigned_hubs,]
assigned_nodes <- NULL
for(h in 1:hubs_required){
# sample the nodes into a random order
assigned_nodes <- sample((1:nrow(nodes))[-assigned_hubs], replace = FALSE)
random_nodes <- nodes[assigned_nodes,]
# cumulative sum their demand, and get which number passes 120,
# and subtract 1 to get the node before that
last <- which(cumsum(random_nodes$node_demand) > total_capacity) [1] - 1
# if there are none
if(is.na(last)) last = nrow(random_nodes)
node_count[i,h] <- last
# get sum of all distances to those nodes (1 though the last)
all_distances <- apply(random_nodes[1:last,], 1, function(rn) {
euc.dist(A = hubs[h,c("x","y")],
B = rn[c("x","y")])
})
track_sums[i,h] <- sum(all_distances)
}
}
return(list(track_sums = track_sums, node_count = node_count))
}
output <- hubnode(nodes, iterations = 100)
node_count <- output$node_count
track_sums <- output$track_sums
plot(rowSums(node_count),
rowSums(track_sums), xlab = "Node Count", ylab = "Total Demand", main = paste("Result of", 100, "iterations"))
min(rowSums(track_sums))