Search code examples
rggplot2plotreplicate

Line plot using ggplot on replicated data


I am trying to do a line plot using ggplot on my replicated data. There are five data sets and each data has five replicates as a1, a2, a3, a4, a5. Similarly for data b I have b1, b2, b3, b4 and b5. So for data c, d and e. The header of the file is:

window,a1,b1,c1,d1,e1,a2,b2,c2,d2,e2,a3,b3,c3,d3,e3,a4,b4,c4,d4,e4,a5,b5,c5,d5,e5

input data file: sample.csv

window,a1,b1,c1,d1,e1,a2,b2,c2,d2,e2,a3,b3,c3,d3,e3,a4,b4,c4,d4,e4,a5,b5,c5,d5,e5 1,0.5119139270063112,0.47791314579835403,0.5113649113621064,0.4768479198351406,0.4781196611442378,0.4904659238214216,0.4759001987193641,0.5045061445003193,0.46595531689590725,0.46895114448250247,0.5157597562047104,0.4626524507763233,0.5119529472920509,0.46679831665520277,0.4753377144386213,0.5124522484203766,0.47866380278778836,0.5043826069075172,0.46449419260825464,0.4653139518854975,0.5154697074773474,0.48248274005573855,0.5050305296130029,0.49623759342174706,0.4722137779263525 2,0.5056365340283908,0.46758449834277266,0.49619468151650975,0.4665775998779715,0.4651027722705412,0.48324302406609954,0.46548949368747544,0.4881297422328141,0.45664340325880926,0.4569799185976083,0.5095802510067207,0.45217566931579356,0.4962174884734367,0.4541139099188144,0.4622091078391724,0.5054415475649608,0.4675162679265218,0.48893015580115085,0.4526988654743916,0.4543939472080714,0.5082810308114483,0.47333995451366645,0.4892583331670169,0.4867492749442232,0.4607009344412719 3,0.5029917780963806,0.4506082336787582,0.48882506001136095,0.4618629443073386,0.4617223672843401,0.48114715217930987,0.44907838904575925,0.4839223049902416,0.4511580881044095,0.45572089865752335,0.5080864892752102,0.4350796753702363,0.49098197740136734,0.4519287009416857,0.4615085603108922,0.5025292480080604,0.45217319967673864,0.4824742730108645,0.4495284602643657,0.4496526917343683,0.5050565730983835,0.4551333544455344,0.4831129861925819,0.48318507707190794,0.4591711467216344 4,0.4860720719602721,0.43752609389709796,0.47703314450655865,0.44903659289442394,0.45218688630860454,0.4651313683779647,0.44235562588716304,0.4720672218558838,0.4396877162058581,0.4426667739398868,0.48880911805420235,0.4267229992446821,0.47998318074643315,0.44049405993827934,0.451073514863146,0.48367668084996757,0.44193057677912956,0.4711244386359243,0.439707452759909,0.43988457507554785,0.48873634973006397,0.4441146906280695,0.4715323550100301,0.4757715584753876,0.44871908674391586 5,0.4807988674178904,0.4281526905980928,0.47586024905478386,0.4427077017757281,0.44957536591323116,0.4601169309591469,0.4326716980374979,0.47274778256960215,0.43049870633801507,0.4421312487544498,0.4844120076485326,0.417411296248015,0.47940558916162573,0.4321717759744735,0.4506770232182257,0.4794851393888802,0.43127390836070945,0.47045878954792775,0.4321381904939295,0.43552106233750815,0.48281846812897367,0.4335871681888636,0.47182093520357893,0.469747294370466,0.44414938500789214 6,0.46697868449038515,0.4110182141540053,0.4665989235017278,0.42965099167109877,0.43365693016694545,0.44868681383467235,0.41432513148417094,0.4615368625997305,0.4191419469166949,0.4249355380683784,0.46957399329423505,0.4011386609854494,0.46996089286953063,0.4212430433482374,0.43391126996072205,0.4662145334023028,0.4116292029207663,0.46046649791928934,0.4179878776915891,0.42242058699267215,0.47148645813781453,0.4175979061579417,0.46091994598441166,0.4559704065436049,0.42670443571750466 7,0.4539585432996916,0.4027164860276206,0.4571606009190912,0.4213806501748478,0.42696015461576975,0.43440389748725805,0.40456857455462664,0.45190755377965985,0.4137397619863431,0.41555105482939025,0.45478232248259287,0.3923155126953304,0.4600184223020188,0.4154714754213875,0.426731702076013,0.45321786256641894,0.4028526788819317,0.4515137517994161,0.41197026032900613,0.4167327687345927,0.4602469173245063,0.4054732255216844,0.4526326217432182,0.45132121321665086,0.4201850569025235 8,0.4500307501501692,0.40390978328355526,0.4431832309492467,0.40482053867483664,0.4202986891257512,0.4333109973987116,0.407973565414009,0.43940157378549466,0.39729271359467316,0.4124600814133578,0.45372705610740427,0.38786532198574536,0.44962703798764053,0.3976432921342444,0.42040488370861245,0.44826397790043493,0.41102914472852703,0.44033344026346616,0.39413846587704454,0.40858434464233323,0.4501347092728965,0.39886987931157086,0.44001010571256627,0.43494571104007607,0.41615579336324593 9,0.4330092047324554,0.3741173358623737,0.4335645495527913,0.39883730440357434,0.40392560876927674,0.41868374766231153,0.37326716723050685,0.4322724319330117,0.3902581394607779,0.3960105153714149,0.4346725123572353,0.3611578879266523,0.43916476550749584,0.39047856335166836,0.4039376739885303,0.43303799318380637,0.37606248214759563,0.4313202734544609,0.38419036223588765,0.3962166977665812,0.43695250227993837,0.3740277728117008,0.4312086862781132,0.42687187408061195,0.40068898761510013

I tried the below code to plot my data when not in replicates with header as: window,a1,b1,c1,d1,e1

input: sample2.csv window,a1,b1,c1,d1,e1 1,0.5119139270063112,0.47791314579835403,0.5113649113621064,0.4768479198351406,0.4781196611442378 2,0.5056365340283908,0.46758449834277266,0.49619468151650975,0.4665775998779715,0.4651027722705412 3,0.5029917780963806,0.4506082336787582,0.48882506001136095,0.4618629443073386,0.4617223672843401 4,0.4860720719602721,0.43752609389709796,0.47703314450655865,0.44903659289442394,0.45218688630860454 5,0.4807988674178904,0.4281526905980928,0.47586024905478386,0.4427077017757281,0.44957536591323116 6,0.46697868449038515,0.4110182141540053,0.4665989235017278,0.42965099167109877,0.43365693016694545 7,0.4539585432996916,0.4027164860276206,0.4571606009190912,0.4213806501748478,0.42696015461576975 8,0.4500307501501692,0.40390978328355526,0.4431832309492467,0.40482053867483664,0.4202986891257512 9,0.4330092047324554,0.3741173358623737,0.4335645495527913,0.39883730440357434,0.40392560876927674

library(ggplot2)
df <- read.csv("sample2.csv")
head(df)
window <- df$window
window
a1 <- df$a1
a1
b1 <- df$b1
b1
c1 <- df$c1
c1
d1 <- df$d1
d1
e1 <- df$e1
e1
ggplot() +
geom_line(data=df,aes(x=window, y=a1, color="a1"))+
geom_line(data=df,aes(x=window, y=b1, color="b1")) + 
geom_line(data=df,aes(x=window, y=c1, color="c1")) + 
geom_line(data=df,aes(x=window, y=d1, color="d1")) + 
geom_line(data=df,aes(x=window, y=e1, color="e1")) +
scale_color_manual(name="color", values = 
c("a1"="red","b1"="blue","c1"="yellow","d1"="black","e1"="green"))+
xlab("window") +
ylab("values")

Solution

  • You can data.table::melt your data beforehand and use variable column as x like this:

    library(ggplot2)
    library(data.table)
    df <- read.csv("sample2.csv")
    measure_vars <- paste0(rep(c("a", "b", "c", "d", "e"), times = 5), rep(1:5, each = 5))
    df <- melt(data.table(df), id.vars = "window", measure.vars = measure_vars, variable.factor = F)
    df[, color_var := substr(variable, 1, 1)]
    ggplot() +
        geom_line(data = df,aes(x = window, y = value, color = color_var, group = variable))+
        scale_color_manual(
            name="color", 
            values = c("a" = "red", "b" = "blue", "c" = "yellow", "d" = "black", "e" = "green")
        ) +
        xlab("window") +
        ylab("values")