Search code examples
rdplyrreshape2

Split string based on a key word in r programming, keyword= BAS, Getting duplicate results. especially FRM_id = 1014


if (!require("librarian"))install.packages("librarian")
librarian::shelf(stringr,stringi,dplyr,reshape2)


FRM_ID<-c(1006,1007,1011,1012,1014,1014,1015)
FRM_Name<-c("ABMPSNY NSMF BAS  SVFNUF SUPFRMSR BAS M&M BAS PBWFR GRJD ABRP BAS SUN PHSRMS",
"BSJSJ FJNSNAF BAS  LSRSFN BAS NTPA BAS SDSNJ PBRTS  ",
"KBTSK MSHJNDRS WJPRB JSW STEEL    ",
"SSJSN PSJNTS TJTSN ABMPSNY      ",
"EEE FNTFRPRJS BAS CTD NFSTLF      ",
"EEE FNTFRPRJS BAS CTD. NFSTLF.     ",
"MSRUTJ SUZUKJ BAS  XYX CORP BAS       "
)
FRM_DATE <- c('1990-02-02','1990-02-05','1990-02-06','1990-02-06','1990-02-06','1990-02-07','1990-02-08')
Samp_Data<-data.frame(FRM_ID,FRM_Name,FRM_DATE)

SD_New <- Samp_Data %>% 
          filter(grepl(" BAS ",FRM_Name))

SD_New1 <- str_split_fixed(SD_New$FRM_Name," BAS ",5)


SD_New2 <- cbind.data.frame(SD_New1[,1],SD_New1[,2],SD_New1[,3],SD_New1[,4],SD_New1[,5])

SD_New2$FRM_ID <- SD_New$FRM_ID

colnames(SD_New2) <- c("NAME1","NAME2","NAME3","NAME4","NAME5","FRM_ID")

RJ_NEW_NEW2 <- SD_New2 %>% 
               right_join(SD_New,by=c("FRM_ID"))

SD_Melt <- reshape2::melt(RJ_NEW_NEW2,id=c("FRM_ID","FRM_Name","FRM_DATE"))


SD_Melt <- SD_Melt %>% 
           select(FRM_ID,FRM_DATE,value) %>% 
           rename(FRM_Name=value)

SD_Melt <-SD_Melt %>% 
          mutate_all(na_if,"")

SD_Melt[SD_Melt==""]<- NA

SD_Melt_clean <- SD_Melt %>% 
                 filter(!(is.na(FRM_Name)))


SD_Melt_clean <- SD_Melt_clean %>% 
                 filter(nchar(FRM_Name)>1) %>% 
                 unique()

Solution

  • Are you looking for something like this?

    library(tidyverse)
    
    Samp_Data %>% 
      filter(grepl(" BAS ",FRM_Name)) %>% 
      separate_rows(FRM_Name, sep = " BAS ") %>% 
      mutate(FRM_Name = trimws(FRM_Name))
    

    Output

       FRM_ID FRM_Name          FRM_DATE  
        <dbl> <chr>             <chr>     
     1   1006 "ABMPSNY NSMF"    1990-02-02
     2   1006 "SVFNUF SUPFRMSR" 1990-02-02
     3   1006 "M&M"             1990-02-02
     4   1006 "PBWFR GRJD ABRP" 1990-02-02
     5   1006 "SUN PHSRMS"      1990-02-02
     6   1007 "BSJSJ FJNSNAF"   1990-02-05
     7   1007 "LSRSFN"          1990-02-05
     8   1007 "NTPA"            1990-02-05
     9   1007 "SDSNJ PBRTS"     1990-02-05
    10   1014 "EEE FNTFRPRJS"   1990-02-06
    11   1014 "CTD NFSTLF"      1990-02-06
    12   1014 "EEE FNTFRPRJS"   1990-02-07
    13   1014 "CTD. NFSTLF."    1990-02-07
    14   1015 "MSRUTJ SUZUKJ"   1990-02-08
    15   1015 "XYX CORP"        1990-02-08
    16   1015 ""                1990-02-08