Current Output:
I've currently plotted 2 normal curves overlayed on a histogram, which I created by selecting a range of values manually and using ggplot.
However, I can't figure out how to smoothen the curve as I'm relatively new to R studio. Can anyone help?
My current code:
#calculate normal curve and set interval
data_m1 <- data %>% filter(masses_kDa >= 0 & masses_kDa <= 150) #change range here
data_m2 <- data %>% filter(masses_kDa >= 300 & masses_kDa <= 900) #change range here
#calculate mean and standard deviation
mean_m1 <- mean(data_m1$masses_kDa)
sd_m1 <- sd(data_m1$masses_kDa)
mean_m2 <- mean(data_m2$masses_kDa)
sd_m2 <- sd(data_m2$masses_kDa)
library(ggplot2)
# Calculate the bin width and the total number of data points
bin_width <- 50
total_points_m1 <- length(data_m1$masses_kDa)
total_points_m2 <- length(data_m2$masses_kDa)
# Plot the histogram
ggplot(cleaned_data, aes(x=masses_kDa)) +
geom_histogram(color='transparent', fill='red', alpha = 0.5, bins= 50) + # Adjust the number of bins here
labs(x=' ', y=' ', title=' ') +
# Add the normal curves to the histogram
stat_function(fun = function(x) dnorm(x, mean = mean_m1, sd = sd_m1) * bin_width * total_points_m1, col= '#8B0000') +
stat_function(fun = function(x) dnorm(x, mean = mean_m2, sd = sd_m2) * bin_width * total_points_m2, col= '#8B0000') +
# Add the mean lines to the histogram
geom_vline(aes(xintercept = mean_m1), linetype="dashed", color = 'black') +
geom_vline(aes(xintercept = mean_m2), linetype="dashed", color = 'black') +
# Ensure there are no gaps between the graph and axis
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) +
coord_cartesian(xlim = c(0, 2000))+ # Adjust the cutoff point here
theme(panel.background = element_blank(), # Make the background clear
axis.line = element_line(color = "black"))
A simple fix would be giving the stat_function
calls more points to calculate a y value for with n = 1001
:
library(ggplot2)
df <- tibble::tibble(x = c(rnorm(1000, mean = 750, sd = 180),
rnorm(30, mean = 75, sd = 25)))
bin_width <- 50
ggplot(df, aes(x)) +
geom_histogram(
color = 'transparent',
fill = 'red',
alpha = 0.5,
binwidth = bin_width
) +
stat_function(
fun = function(x)
dnorm(x, mean = 750,
sd = 180) * bin_width * 1000,
n = 1001,
col = '#8B0000'
) +
stat_function(
fun = function(x)
dnorm(x, mean = 75,
sd = 25) * bin_width * 30,
n = 1001,
col = '#8B0000'
) +
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) +
coord_cartesian(xlim = c(0, 2000))+
theme(panel.background = element_blank(),
axis.line = element_line(color = "black"))