I'm trying to use a non-linear regression (NLR) function to predict the change in a value (y) over time (x), and then calclulating the time in which the prediction is at max (Optimum). I get predictions around the actual measured values (y) which is great, but these predictions are anchored to the x values, meaning i only get predicted values at certain increments. This can be seen in the following picture.
Predicted values (Line) over actual values (Points).
This means that the calculated optimum will always be at one of the x values, but i'm using this NLR function to get a mathmatical sound estimation of what time y is at optimum.
I don't know if the problem is in the method that i'm getting these values, but here is a sample:
dat <- structure(list(measure = structure(c(1L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 2L, 3L, 1L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 1L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 2L, 3L,
4L, 5L, 1L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 2L, 3L, 4L,
5L), .Label = c("L1", "L10", "L11", "L12", "L13", "L14", "L15",
"L16", "L17", "L18", "L19", "L2", "L3", "L4", "L5", "L6", "L7",
"L8", "L9"), class = "factor"), sample = structure(c(64L, 64L,
64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 65L, 65L, 65L, 65L,
65L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L,
66L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L,
67L, 67L, 67L, 67L, 67L), .Label = c("010719A", "010719B", "010719C",
"020419A", "020419B", "020419C", "040219A", "040219B", "040219C",
"040319A", "040319B", "040319C", "050219A", "050219B", "050219C",
"060519B", "070519A", "070519B", "070519C", "080419A", "080419B",
"080419C", "080719A", "080719B", "080719C", "090419A", "090419B",
"090419C", "100419A", "100419B", "100419C", "110219A", "110219B",
"110219C", "110319A", "110319B", "110319C", "110619A", "110619B",
"110619C", "120609A", "120609B", "120609C", "130519A", "130519B",
"130519C", "140519A", "140519B", "140519C", "150419A", "150419B",
"150419C", "170619A", "170619B", "170619C", "180219B", "180219C",
"180319A", "180319B", "180319C", "180619A", "180619B", "180619C",
"220119A", "220119C", "230119A", "230119B", "230119C", "250219A",
"250219B", "250219C", "250319A", "250319B", "250319C", "260319A",
"260319B", "260319C", "280119A", "280119B", "280119C", "290119A",
"290119B", "290119C", "300119A", "300119B", "300119C"), class = "factor"),
y = c(0, 10, 10, 13.33, 16.67, 16.67, 26.67, 13.33, 30, 36.67,
26.67, 0, 3.33, 3.33, 10, 16.67, 16.67, 3.33, 3.33, 0, 0,
0, 11.43, 20, 14.29, 14.29, 20, 14.29, 2.86, 17.14, 28.57,
34.29, 11.43, 0, 2.94, 2.94, 11.76, 20.59, 20.59, 23.53,
20.59, 14.71, 17.65, 32.35, 20.59, 8.82), x = c(0, 5.833,
8.667, 12, 14.667, 16.833, 23.667, 29.833, 32.833, 35.833,
38.583, 0, 5.833, 8.667, 12, 14.667, 16.833, 23.667, 29.833,
32.833, 0, 5.833, 8.833, 11.917, 14.667, 16.917, 23.667,
29.833, 32.833, 35.833, 38.833, 41.583, 47.833, 0, 5.833,
8.833, 11.917, 14.667, 16.917, 23.667, 29.833, 32.833, 35.833,
38.833, 41.583, 47.833)), row.names = c(NA, -46L), class = c("tbl_df",
"tbl", "data.frame"))
This is a snippit of what i'm using. Here is how i got the predictions to each x and y value.
library(tidyverse)
library(modelr)
samples <- dat$sample[dat$measure == "L1"]
output <- tibble(predictions = c(0))
for (i in seq_along(samples)) {
df <- tibble(ex = dat$x[dat$sample == samples[i]],
why = dat$y[dat$sample == samples[i]])
nlm <- nls(df$why ~ alpha * df$ex^beta * exp((-gamma) * df$ex),
data = df,
start = list(alpha = 1.5, beta = 1.85, gamma = 0.095),
control = list(maxiter = 10000))
output <- add_row(output, predictions = predict(nlm, newdata = df$ex))
output <- output %>%
mutate(predictions = round(predictions, digits = 2))
}
output <- output[-1,]
dat <- dat %>%
mutate(pred = output$predictions)
Making a ggplot out of this yields the same result as shown above. In short, i do not know how i can extrapolate (Interpolate?) smoothly between two or more points of a graph, and then calculate when this graph (line) is at optimum. Is there a way that i can predict between the points? And can it be done iteratively? I have close to a 100 samples in the full data that i need to do this to.
Shortly:
You can define a new dataframe when using predict
:
df <- dat[dat$sample == dat$sample[1],]
nlm <- nls(y ~ alpha * x^beta * exp((-gamma) * x),
data = df,
start = list(alpha = 1.5, beta = 1.85, gamma = 0.095),
control = list(maxiter = 10000))
predicted <- data.frame(x = seq(min(df$x),max(df$x),0.01),
y = predict(nlm,newdata = data.frame(x = seq(min(df$x),max(df$x),0.01))))
Here it gives you a lot of points, which should allow you to fin your maximum. But: