I have a dataframe df
df<- structure(list(ID = structure(c(1L, 3L, 5L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
31L, 32L, 33L, 36L, 37L, 40L, 41L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 52L, 53L, 54L, 56L, 57L, 58L, 60L, 62L, 63L, 66L, 67L, 68L,
69L, 70L, 71L, 72L, 75L, 77L, 80L, 81L, 82L, 88L, 93L, 95L, 97L,
99L, 101L, 102L, 107L, 108L, 114L), .Label = c("AU-Tum", "AU-Wac",
"BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", "BR-Ma2", "BR-Sa1", "BR-Sa3",
"BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1",
"CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas",
"CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3",
"CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CN-Cha",
"CN-Ku1", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Meh", "DE-Tha",
"DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy", "FI-Sod", "FR-Fon", "FR-Hes",
"FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag", "IL-Yat", "IT-Col", "IT-Cpz",
"IT-Lav", "IT-Lma", "IT-Noe", "IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2",
"IT-Sro", "JP-Tak", "JP-Tef", "JP-Tom", "NL-Loo", "PT-Esp", "RU-Fyo",
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1",
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3",
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2", "US-Ho1",
"US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1", "US-Oho",
"US-So2", "US-So3", "US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb",
"US-Wbw", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8",
"VU-Coc", "CA-Cbo", "CN-Lao", "ID-Buk", "IS-Gun", "JP-Fuj", "MY-Pso",
"RU-Ab", "RU-Be", "RU-Mix", "TH-Mae"), class = "factor"), Y = c(436.783385497984,
55.1825021383702, 526.4133417369, 391.49284084118, -519.814235572849,
11.5525291214872, 162.441016515717, 39.0395567645998, -70.4910326673707,
17.1155716306239, -106.326129257097, -94.9308303585276, -66.4285516217351,
-144.929052323413, -220.613145695315, 157.129576861289, 44.1257786633602,
46.8326830295943, -146.719591499443, 30.8043649939355, -4.10548956954153,
-108.258462657337, 90.3369144331664, 126.866108251153, 42.9489971246803,
-45.4886732113082, 483.932040393885, 590.754048774834, 82.1480000555981,
76.8863707484328, 404.007940533033, 202.629066249886, -46.9675149230141,
557.939170770813, 300.979565786038, 224.256197650044, 148.719307398695,
201.195892312115, 466.727302447427, 552.762670615377, 595.145436977735,
481.359543363331, 467.379381521489, 444.812610935212, 308.198167469197,
-638.973101716489, 321.395064735785, 181.896345832773, 629.214319321327,
-176.181996958815, 59.1716887350485, -186.42650026083, 515.533437888983,
595.091753601562, 367.15020653978, 934.415348643437, 255.499246957091,
368.347069109092, 141.97570288631, 39.5917358684237, 105.039591642989,
77.9087587283187, 153.700042322307, 157.436949779134, 358.242634316906
), Unc = c(-2.87896446519996, -0.30731156873436, -1.3811336535939,
-3.60168125065523, 1.35359565655672, -0.58525692609091, -0.463995294634932,
-0.112770209421705, -0.178508318809592, -0.44506337354913, 0.285085608169751,
0.241425707960461, -0.616179720920167, -0.00579570274186878,
0.385699289486463, -0.43071884834486, -1.32799753416588, -0.138248737701239,
0.026437443324628, -0.0981101016865843, -0.125326368431498, 0.289668409902704,
-0.224679559714174, -0.376257445433255, -0.0904535633017475,
-1.27942478849042, -2.78944896222686, -1.57015451106923, -3.02435991211342,
-0.188885650489005, -2.77697810019308, -0.683634153351544, 0.148164853468482,
-1.520102142822, -0.855422614115418, -0.580609573477037, -2.12306082165876,
-1.2334420909422, -2.00323122411995, -1.45967340674881, -1.60448511158608,
-2.52530298868671, -1.28908559855364, -1.16270411420386, -1.5186009244046,
0.24330408272554, -1.72852090322909, -0.497423296440042, -2.79905035399537,
0.453520174531953, -0.38557736709315, 0.513504024431323, -1.58608831551316,
-1.56046815861851, -3.32259575879769, -7.99135003959363, -0.913109035398266,
-3.48447862397436, -0.518022500487711, -0.352263975401941, -0.331662926968978,
-0.236234610041281, -2.31039763656225, -0.987148209221828, -3.37441047823435
), X = c(98.5, 77, 63.2222222222222, 52.5, 3.5, 15.5, 71, 161.833333333333,
153.5, 73, 39, 40, 23, 14, 5.5, 78, 129.5, 73.5, 4, 100, 10,
3, 30, 65.5, 198, 45.5, 20, 111.5, 44, 68.5, 102.5, 39.1111111111111,
83.8, 136, 31.5, 56.5, 101, 39.25, 108.5, 52.1666666666667, 54.5,
9.5, 13, 52.1428571428571, 66.5, 1, 44.25, 106, 19, 202.571428571429,
36.6, 2, 21.2, 69, 67.5, 21, 135, 46.5, 17.5, 96, 80.6666666666667,
10.6666666666667, 86.5, 66.2, 2.5)), .Names = c("ID", "Y", "Unc",
"X"), row.names = c(1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L), class = "data.frame", na.action = structure(c(4L,
52L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L), .Names = c("4",
"52", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77"
), class = "omit"))
I am currently implementing a non-linear regression analysis with a leave one out ID as follows:
library (minpack.lm)
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500)), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
Basically, for each iteration, I remove one ID and perform a non-linear regression model using the other IDs which is then used to predict the response of the left out ID.
However, each observation used to create the non-linear models have different uncertainty values (see Unc
column in df
). Therefore, I wanted to include a cost function in the nls based on the uncertainty of my observations. I made some research and the following cost function should do the work : cost= sum(((obs-pred)/ Unc)^2)
which is basically the sum of squared error (SEE) weighted by the uncertainty.
I have troubles finding a way to include this cost function in the nlsLM()
object. I have seen the weights
parameters which could be included in the nlsLM()
object but the possibilities of this parameter look quite limited.
This is what I have tried so far by including the weights parameters in the nlsLM() function, but it does not work so far:
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500), weights = wfct(sum(((fitted-Y)/Unc)^2))), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
You can use 1/Unc^2
as the weights in nlsLM
to get the objective function referred to as cost
in the question and obj.w
below.
To illustrate, the two unweighted examples using optim
and nlsLM
below give the same result and the two weighted examples (also using optim
and nlsLM
) give the same result. Since the objective here is to show that the weights are the same we used starting values fairly close to the final values in each case so that differences in the algorithms do not come into play.
rhs <- function(p, df) with(df, p[[3]] + p[[1]]*(1-exp(p[[2]]*X)))
# unweighted
st.u <- list(A = 800, k = -0.2, offset = -700)
obj.u <- function(p, df) with(df, sum((Y - rhs(p, df))^2))
fit.optim.u <- optim(st.u, obj.u, df = df)
fit.optim.u$par; fit.optim.u$value # 1
fit.nlsLM.u <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, start = st.u)
coef(fit.nlsLM.u); deviance(fit.nlsLM.u) # 2 - same as 1
# weighted
st.w <- list(A = 2704, k = -1.7, offset = -2845)
obj.w <- function(p, df) with(df, sum((Y - rhs(p, df))^2/Unc^2))
fit.optim.w <- optim(st.w, obj.w, df = df)
fit.optim.w$par; fit.optim.w$value # 3
fit.nlsLM.w <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, weights = 1/Unc^2,
start = st.w)
coef(fit.nlsLM.w); deviance(fit.nlsLM.w) # 4 = same as 3
It seems that precisely what objective function/weights you want it is still not clear so you might want to play with the examples above using optim
until you find the objective function that you want (since with it the objective is explicit) and then implement it in nlsLM checking that they give the same answers. Note that optim
and nlsLM
can find different answers even if you are using identical objectives/weights due to the different algorithms and stopping criteria so if you run both and find that then try starting them both at the best coefficients found to eliminate differences.
The point of all this is that since with optim
we specify the objective explicitly and not with nlsLM
it gives a way of double checking what nlsLM
is doing.
Update: reworked focusing on comparing optim and nlsLM.