I have a combined_seurat_object
(called CJ
) to which I would like to add additional columns (conditions, gene names, etc.) which is called sample_data
. I am using the merge()
function. Both my combined_seurat_object
and sample_data
share a column called orig.ident
. After merging and inspecting the object using head()
and is.na()
, everything looks good. But when I go to plot using Seurat's DimPlot
, all my cells are NA.
I'd prefer to use base-R or Seurat.
sample_data <- data.frame(
gene_name = c("EGFP", "KMT2B", "mEGFPg", "mT5g", "mNRAS_BCg", "mT5g_F20A1", "SKMT2B1", "SEGFP1", "F20A1"),
orig.ident = c("cao_9_EGFP_clean", "cao_9_KMT2B_clean", "cao_17_mEGFPg_clean", "cao_17_mT5g_clean", "cao_17_mNRAS_BCg_clean",
"cao_17_mT5g_F20Ag_clean", "cao_19_SKMT2B1_clean", "cao_19_SEGFP1_clean", "cao_19_F20A1_clean"),
condition = c("EGFP", "Pure", "EGFP", "Stim", "Onc_Ctrl", "Rescue", "Stim", "Onc_Ctrl", "Res")
)
existing_metadata <- [email protected]
merged_metadata <- base::merge(existing_metadata, sample_data, by = "orig.ident", all = TRUE) # I tried both base::merge() and just merge() in case of package masking
[email protected] <- merged_metadata
DimPlot(CJ, group.by = "condition") # This results in a UMAP, but shows all cells as NA
# Some inspections
#sample_data
# gene_name orig.ident condition
#1 EGFP cao_9_EGFP_clean EGFP
#2 KMT2B cao_9_KMT2B_clean Pure
#3 mEGFPg cao_17_mEGFPg_clean EGFP
#4 mT5g cao_17_mT5g_clean Stim
#5 mNRAS_BCg cao_17_mNRAS_BCg_clean Onc_Ctrl
#6 mT5g_F20A1 cao_17_mT5g_F20Ag_clean Rescue
#7 SKMT2B1 cao_19_SKMT2B1_clean Stim
#8 SEGFP1 cao_19_SEGFP1_clean Onc_Ctrl
#9 F20A1 cao_19_F20A1_clean Res
unique(CJ$orig.ident)
[1] "cao_17_mEGFPg_clean" "cao_17_mNRAS_BCg_clean" "cao_17_mT5g_clean" "cao_17_mT5g_F20Ag_clean" "cao_19_F20A1_clean"
[6] "cao_19_SEGFP1_clean" "cao_19_SKMT2B1_clean" "cao_9_EGFP_clean" "cao_9_KMT2B_clean"
unique(sample_data$orig.ident)
[1] "cao_9_EGFP_clean" "cao_9_KMT2B_clean" "cao_17_mEGFPg_clean" "cao_17_mT5g_clean" "cao_17_mNRAS_BCg_clean"
[6] "cao_17_mT5g_F20Ag_clean" "cao_19_SKMT2B1_clean" "cao_19_SEGFP1_clean" "cao_19_F20A1_clean"
I've tried to create a reproducible example, I had to add rownames to the merged_data
to make your code work:
library(Seurat)
packageVersion("Seurat") # Just FYI
#[1] ‘5.0.1’
library(SeuratData)
InstallData("pbmc3k")
pbmc3k <- LoadData("pbmc3k", type = "pbmc3k.final")
pbmc3k$orig.ident <- rep_len(c("cao_9_EGFP_clean", "cao_9_KMT2B_clean", "cao_17_mEGFPg_clean", "cao_17_mT5g_clean", "cao_17_mNRAS_BCg_clean",
"cao_17_mT5g_F20Ag_clean", "cao_19_SKMT2B1_clean", "cao_19_SEGFP1_clean", "cao_19_F20A1_clean"),
length.out = length(pbmc3k$orig.ident))
sample_data <- data.frame(
gene_name = c("EGFP", "KMT2B", "mEGFPg", "mT5g", "mNRAS_BCg", "mT5g_F20A1", "SKMT2B1", "SEGFP1", "F20A1"),
orig.ident = c("cao_9_EGFP_clean", "cao_9_KMT2B_clean", "cao_17_mEGFPg_clean", "cao_17_mT5g_clean", "cao_17_mNRAS_BCg_clean",
"cao_17_mT5g_F20Ag_clean", "cao_19_SKMT2B1_clean", "cao_19_SEGFP1_clean", "cao_19_F20A1_clean"),
condition = c("EGFP", "Pure", "EGFP", "Stim", "Onc_Ctrl", "Rescue", "Stim", "Onc_Ctrl", "Res")
)
existing_metadata <- [email protected]
merged_metadata <- merge(existing_metadata,
sample_data, by = "orig.ident",
all = TRUE)
# This is the only line I had to add to make your code work
rownames(merged_metadata) <- rownames(existing_metadata)
[email protected] <- merged_metadata
DimPlot(pbmc3k, group.by = "condition")
Did your merged_metadata
had rownames corresponding to your cells?
Hope that helps!