I am trying to make a piece-wise regression using non-linear data in the ggplot2 package (R). I have written code that creates one successfully but can't seem to get it where I want. The graph (below) is a CDF with two distinct increases, I'm attempting to objectively assign dates to where these increases begin and end. Unfortunately, the code I have recognises only one of the increases and ignores the send increase unless I plot it separately after creating a new dataframe. Here is the code and graph:
library(segmented)
attach(dai15E_flux_split)
y<- dai15E_flux_split$CDF15E
x<- dai15E_flux_split$Dates
pw_reg_15E<- data.frame(x = x, y = y)
out.lm <- lm(y ~ x, data = pw_reg_15E)
o <- segmented(out.lm, seg.Z = ~x, psi = list(x = c(as.POSIXct('2020-12-20 18:00:00'),as.POSIXct('2021-01-23 18:00:00'))),
control = seg.control(display = FALSE)
)
dat2 = data.frame(x = x, y = broken.line(o)$fit)
w<- ggplot(pw_reg_15E, aes(x = x, y = y)) +
geom_point() +
geom_line(data = dat2, color = 'blue', linewidth = 2) +
#geom_line(data = dat3, color = 'blue') +
scale_x_datetime(date_breaks = '5 days', date_labels = '%b-%d') +
scale_y_continuous(breaks = ~ seq(0, max(.x), .1)) +
annotate('segment', x= as.POSIXct("2021-01-29"), xend= as.POSIXct("2021-01-29"), yend=-Inf, y=Inf, colour = "black", alpha = 1) +#creates the continuous density function plot for Dai15
geom_vline(xintercept = (dai15E_flux_split$Dates[dai15E_flux_split$peak_status == "Peak"]), color = "black") +
geom_point(size = 4) + theme_bw() + #ylab(~paste('CDF', Phi)) +
labs(x = "Date")
plot(w)
If I cut the data off at a date post-second break-point I am able to detect the start of the second increase but not the end (granted this could be due to just the nature of the data):
I did try creating two separate segment lines for one graph but nothing came of it, anything I'm doing wrong? Ultimately what Im trying to do is this (please excuse the crudeness of that graph, I drew the line in mspaint):
reprex data:
structure(list(x = structure(c(1606820400, 1606906800, 1606993200,
1607079600, 1607166000, 1607252400, 1607338800, 1607425200, 1607511600,
1607598000, 1607684400, 1607770800, 1607857200, 1607943600, 1608030000,
1608116400, 1608202800, 1608289200, 1608375600, 1608462000, 1608548400,
1608634800, 1608721200, 1608807600, 1608894000, 1608980400, 1609066800,
1609153200, 1609239600, 1609326000, 1609412400, 1609498800, 1609585200,
1609671600, 1609758000, 1609844400, 1609930800, 1610017200, 1610103600,
1610190000, 1610276400, 1610362800, 1610449200, 1610535600, 1610622000,
1610708400, 1610794800, 1610881200, 1610967600, 1611054000, 1611140400,
1611226800, 1611313200, 1611399600, 1611486000, 1611572400, 1611658800,
1611745200, 1611831600), class = c("POSIXct", "POSIXt"), tzone = "Asia/Bangkok"),
y = c(0.0127654103696743, 0.0250904882020871, 0.0370714866508862,
0.0468027880724444, 0.0536991190985604, 0.0626795693063142,
0.0708962963854844, 0.0772446662188717, 0.0842076373362264,
0.090949381870277, 0.0970952248012049, 0.104371307628584,
0.110358027715881, 0.115450773819791, 0.122580977690793,
0.134406168897197, 0.146368284831593, 0.158716595955345,
0.170651213164138, 0.190479315670677, 0.224094027748295,
0.280359267223023, 0.337800104800282, 0.406506532914733,
0.491550665708546, 0.548897965427954, 0.577827044961975,
0.596475967835595, 0.605583211717681, 0.611728728447968,
0.616878285109383, 0.623334778422036, 0.631319388073111,
0.641552889847983, 0.650051719012484, 0.659696487319351,
0.666246012002399, 0.672683290607584, 0.677061608197219,
0.68231817736804, 0.687772229223841, 0.696619586579348, 0.710702270394893,
0.718733992834477, 0.728995458593268, 0.744779674223926,
0.763218116035974, 0.790875495532422, 0.825075587533513,
0.846944735886294, 0.869832760922987, 0.892032203445386,
0.919806428133041, 0.943454538344951, 0.963248306979268,
0.977788349576517, 0.986129869261138, 0.993916256029506,
1)), row.names = c(NA, 59L), class = "data.frame")
If you want to catch two increases, you need at least three breakpoints. Here's an attempt with 4:
library(segmented)
out.lm <- lm(y ~ x, data = pw_reg_15E)
br <- as.POSIXct(c('2020-12-20 18:00:00','2020-12-28 18:00:00', '2021-01-15 18:00:00', '2021-01-23 18:00:00'))
o <- segmented(out.lm, seg.Z = ~x, psi = list(x = br))
dat2 = data.frame(x = pw_reg_15E$x, y = predict(o))
ggplot(pw_reg_15E, aes(x = x, y = y)) +
geom_point() +
geom_line(data = dat2, col = 'firebrick') +
geom_vline(xintercept = o$psi[, 'Est.'])
> as.POSIXct(o$psi[, 'Est.'])
psi1.x psi2.x psi3.x psi4.x
"2020-12-20 13:14:11 PST" "2020-12-26 15:35:47 PST" "2021-01-14 12:22:15 PST" "2021-01-23 21:34:05 PST"