Search code examples
rdataframemergeseurat

When I merge 2 dataframes in R, my plots show NA cells exist but I can't find them in my Seurat object


I have a combined_seurat_object (called CJ) to which I would like to add additional columns (conditions, gene names, etc.) which is called sample_data. I am using the merge() function. Both my combined_seurat_object and sample_data share a column called orig.ident . After merging and inspecting the object using head() and is.na(), everything looks good. But when I go to plot using Seurat's DimPlot, all my cells are NA.

I'd prefer to use base-R or Seurat.

sample_data <- data.frame(
  gene_name = c("EGFP", "KMT2B", "mEGFPg", "mT5g", "mNRAS_BCg", "mT5g_F20A1", "SKMT2B1", "SEGFP1", "F20A1"),
  orig.ident = c("cao_9_EGFP_clean", "cao_9_KMT2B_clean", "cao_17_mEGFPg_clean", "cao_17_mT5g_clean", "cao_17_mNRAS_BCg_clean",
                 "cao_17_mT5g_F20Ag_clean", "cao_19_SKMT2B1_clean", "cao_19_SEGFP1_clean", "cao_19_F20A1_clean"),
  condition = c("EGFP", "Pure", "EGFP", "Stim", "Onc_Ctrl", "Rescue", "Stim", "Onc_Ctrl", "Res")
)

existing_metadata <- [email protected]


merged_metadata <- base::merge(existing_metadata, sample_data, by = "orig.ident", all = TRUE) # I tried both base::merge() and just merge() in case of package masking 

[email protected] <- merged_metadata

DimPlot(CJ, group.by = "condition") # This results in a UMAP, but shows all cells as NA
# Some inspections

#sample_data
#   gene_name              orig.ident condition
#1       EGFP        cao_9_EGFP_clean      EGFP
#2      KMT2B       cao_9_KMT2B_clean      Pure
#3     mEGFPg     cao_17_mEGFPg_clean      EGFP
#4       mT5g       cao_17_mT5g_clean      Stim
#5  mNRAS_BCg  cao_17_mNRAS_BCg_clean  Onc_Ctrl
#6 mT5g_F20A1 cao_17_mT5g_F20Ag_clean    Rescue
#7    SKMT2B1    cao_19_SKMT2B1_clean      Stim
#8     SEGFP1     cao_19_SEGFP1_clean  Onc_Ctrl
#9      F20A1      cao_19_F20A1_clean       Res

unique(CJ$orig.ident)
[1] "cao_17_mEGFPg_clean"     "cao_17_mNRAS_BCg_clean"  "cao_17_mT5g_clean"       "cao_17_mT5g_F20Ag_clean" "cao_19_F20A1_clean"     
[6] "cao_19_SEGFP1_clean"     "cao_19_SKMT2B1_clean"    "cao_9_EGFP_clean"        "cao_9_KMT2B_clean"   

unique(sample_data$orig.ident)
[1] "cao_9_EGFP_clean"        "cao_9_KMT2B_clean"       "cao_17_mEGFPg_clean"     "cao_17_mT5g_clean"       "cao_17_mNRAS_BCg_clean" 
[6] "cao_17_mT5g_F20Ag_clean" "cao_19_SKMT2B1_clean"    "cao_19_SEGFP1_clean"     "cao_19_F20A1_clean" 

Solution

  • I've tried to create a reproducible example, I had to add rownames to the merged_data to make your code work:

    library(Seurat)
    packageVersion("Seurat") # Just FYI
    #[1] ‘5.0.1’ 
    library(SeuratData)
    InstallData("pbmc3k")
    pbmc3k <- LoadData("pbmc3k", type = "pbmc3k.final")
    
    pbmc3k$orig.ident <- rep_len(c("cao_9_EGFP_clean", "cao_9_KMT2B_clean", "cao_17_mEGFPg_clean", "cao_17_mT5g_clean", "cao_17_mNRAS_BCg_clean",
                                   "cao_17_mT5g_F20Ag_clean", "cao_19_SKMT2B1_clean", "cao_19_SEGFP1_clean", "cao_19_F20A1_clean"),
                                 length.out = length(pbmc3k$orig.ident))
    
    
    sample_data <- data.frame(
      gene_name = c("EGFP", "KMT2B", "mEGFPg", "mT5g", "mNRAS_BCg", "mT5g_F20A1", "SKMT2B1", "SEGFP1", "F20A1"),
      orig.ident = c("cao_9_EGFP_clean", "cao_9_KMT2B_clean", "cao_17_mEGFPg_clean", "cao_17_mT5g_clean", "cao_17_mNRAS_BCg_clean",
                     "cao_17_mT5g_F20Ag_clean", "cao_19_SKMT2B1_clean", "cao_19_SEGFP1_clean", "cao_19_F20A1_clean"),
      condition = c("EGFP", "Pure", "EGFP", "Stim", "Onc_Ctrl", "Rescue", "Stim", "Onc_Ctrl", "Res")
    )
    existing_metadata <- [email protected]
    merged_metadata <- merge(existing_metadata, 
                             sample_data, by = "orig.ident", 
                             all = TRUE) 
    # This is the only line I had to add to make your code work
    rownames(merged_metadata) <- rownames(existing_metadata)
    
    [email protected] <- merged_metadata
    
    DimPlot(pbmc3k, group.by = "condition") 
    

    Did your merged_metadata had rownames corresponding to your cells?

    Hope that helps!