I am a beginner in the use of genetic algorithms, especially the GA algorithm. I would like to know how to optimize a function with two independent variables and several parameters. My first variable is a continuous variable and my second variable is a binary variable. Here is the code I produced but it does not work.
Var1_obs <- c(-1.942000, -1.338000, -2.065000, -2.080125, -3.247944, -5.365086,
-1.608000, -3.970000, -1.423000, -8.180000, -4.620000, -1.657000,
-5.200000, -6.850000, -6.950000, -1.180000, -1.175000, -1.969000,
-1.115000, -2.620000, -1.870000, -0.433000, -1.102000, -2.093687,
-2.480000, -0.580000, -0.600000, -1.807383, -2.367000, -2.276017,
-2.125331)
Var2_obs <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1)
TE_obs <- c(92.73958, 93.39356, 84.39019, 93.40717, 97.53228, 92.04734, 82.06016,
78.50015, 83.43671, 55.24498, 67.92513, 77.53455, 65.33344, 47.05005,
46.52794, 96.79697, 84.79326, 83.64457, 82.17259, 88.96605, 84.93663,
83.16691, 95.22838, 96.46441, 79.61302, 80.39901, 88.38439, 72.23954,
85.64084, 69.33542, 82.30360)
data<-data.frame(Var1_obs,Var2_obs,TE_obs)
plot(data$Var1_obs[data$Var2_obs==0],data$TE_obs[data$Var2_obs==0],
xlim=range(data$Var1_obs), ylim=range(data$TE_obs), col=2, pch=19,
xlab='Var1_obs', ylab='TE_obs')
points(data$Var1_obs[data$Var2_obs==1],data$TE_obs[data$Var2_obs==1],
xlim=range(data$Var1_obs), ylim=range(data$TE_obs), col=3, pch=19)
library(GA)
library(hydroGOF)
My_function <- function(Var1, Var2, P1, P2, P3, P4, P5, P6, P7) {
A <- (-1 * (Var1 + P1 - P2) - sqrt((Var1 + P1 - P2)^2 + 4 * (Var1 * P2))) / (2 * P2)
B <- 1 - P1 / Var1
C <- c(A, B)
Sel <- c(A > B, B > A)
RS <- C[Sel]
PC <- 100 / (1 + exp(P3 / 25 * (Var1 - P4)))
RMC <- (1 - RS)
symp <- RMC * (1 / (P5 / 1000) - 1) * 100 * (1 - P6)
apo <- (1 - PC / 100) * (1 / (P5 / 1000) - 1) * 100 * (P6)
TE_Pred <- (apo + symp) * (1 + P7 * Var2)
NRMSE <- (nrmse(TE_Pred, TE_obs, na.rm = T, norm = "sd"))
# if(is.na(NRMSE)|is.nan(NRMSE)|is.infinite(NRMSE)) NRMSE <- -1e6
return(NRMSE)
}
# ----------------------- BOUNDARIES --------------------------- #
P1 <- c(-3.5, -2.3)
P2 <- c(5, 15)
P3 <- c(15, 60)
P4 <- c(-7.5, -6)
P5 <- c(500, 600)
P6 <- c(0.3, 0.6)
P7 <- c(-1, 0)
min_boundary <- c(P1[1], P2[1], P3[1], P4[1], P5[1], P6[1], P7[1])
max_boundary <- c(P1[2], P2[2], P3[2], P4[2], P5[2], P6[2], P7[2])
ga(
type = "real-valued",
fitness = function(x) -My_function(Var1 = Var1_obs, Var2 = Var2_obs, P1[1],
P2[2], P3[3], P4[4], P5[5], P6[6], P7[7]),
lower = min_boundary, upper = max_boundary,
popSize = 50, maxiter = 1000, run = 100
)
I got this when I run the code
#> GA | iter = 1 | Mean = NaN | Best = -Inf
#> GA | iter = 2 | Mean = NaN | Best = -Inf
#> Error in if (object@run >= run) break :
#> missing value where TRUE/FALSE needed
Thanks for your help
The way the ga
function is designed, it requires a fitness function which takes one input (the variable so to say).
If you want to minimize the function f
with two variables x1
and x2
(that is, you want to find the values for x1
and x2
which result in a minimum value of f(x1, x2)
), you have to call the function like so
# Taken from `?ga` 3) two-dimensional Rastrigin function
# slightly adopted to have three parameters (that are not optimized)
library(GA)
f <- function(x1, x2, p1 = 20, p2 = -10, p3 = 2) {
p1 + x1^2 + x2^2 + p2*(cos(p3*pi*x1) + cos(p3*pi*x2))
}
# evaluate the function at (2, 4)
f(2, 4, p1 = 20, p2 = -10, p3 = 2)
#> [1] 20
# minimize the Rastrigin function
GA <- ga(type = "real-valued", fitness = function(x) -f(x[1], x[2], 20, 10, 2),
lower = c(-5.12, -5.12), upper = c(5.12, 5.12),
popSize = 50, maxiter = 100, seed = 1521)
summary(GA)
#> -- Genetic Algorithm -------------------
#>
#> GA settings:
#> Type = real-valued
#> Population size = 50
#> Number of generations = 100
#> Elitism = 2
#> Crossover probability = 0.8
#> Mutation probability = 0.1
#> Search domain =
#> x1 x2
#> lower -5.12 -5.12
#> upper 5.12 5.12
#>
#> GA results:
#> Iterations = 100
#> Fitness function value = -0.4974871
#> Solution =
#> x1 x2
#> [1,] -0.4976645 0.4975342
If you want to find the value P1
- P7
for which your My_Function()
function is minimized (given your values of Val1
and Val2
, you can do it like this:
# ...
res <- ga(
type = "real-valued",
fitness = function(x) -My_function(Var1 = Var1_obs, Var2 = Var2_obs,
x[1], x[2], x[3], x[4], x[5], x[6], x[7]),
lower = min_boundary, upper = max_boundary,
popSize = 50, maxiter = 1000, run = 100
)
summary(res)
#> -- Genetic Algorithm -------------------
#>
#> GA settings:
#> Type = real-valued
#> Population size = 50
#> Number of generations = 1000
#> Elitism = 2
#> Crossover probability = 0.8
#> Mutation probability = 0.1
#> Search domain =
#> x1 x2 x3 x4 x5 x6 x7
#> lower -3.5 5 15 -7.5 500 0.3 -1
#> upper -2.3 15 60 -6.0 600 0.6 0
#>
#> GA results:
#> Iterations = 341
#> Fitness function value = -63.2
#> Solution =
#> x1 x2 x3 x4 x5 x6 x7
#> [1,] -3.353621 9.373742 27.51849 -6.217355 511.5233 0.5698783 -0.0307688
Notice how we substitute the values from x
as the parameters P1
- P7
from your question.
-my_function()
?Consulting ?ga
, we see that the ga-function maximizes the target function, as you want to minimize the function, taking the negative effectively minimizes the function.