Search code examples
rrenamefile-rename

R: Renaming files using elements of a particular column in the dataframe


I have a folder of .txt files, each has a long string names such as "ctrl_Jack_DrugA_XXuM.txt". However the name is missing an important string, timestamps.

However, I have that information in the dataframe inside each file. for example, in each file, contains multiple columns, one of the column is called "Pid_treatmentsum": the elements in it is "Jack_R4_200514_DrugA_XXuM.txt"

So before I proceed to downstream I want to sort the files out into subfolders based on the names such as Jack and timestamp such as "R4_200514", and in order to do that I need to rename the file title with "Pid_treatmentsum".

Now the code:

```
#create MRE
#file 1
Row <- c(rep("16", 20))
column <- c(rep("3", 20))
Pid<- c(rep("Jack", 20))
Stimulation<- c(rep("3S", 20))
Drug <- c(rep("2DG", 20))
Dose <-c(rep("3uM", 20))
Treatmentsum <-c(rep(paste("Jack","3S",'2DG','3uM',sep = "_"), 20))
PiD_treatmentsum <- c(rep(paste('Jack',"T4_20200501",'3S','2DG','3uM',sep = "_"), 20))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Jack_3S_2DG_3uM.txt",sep="\t", row.names = F, col.names = T)

#file 2
Row <- c(rep("16", 40))
column <- c(rep("3", 40))
Pid<- c(rep("Mark", 40))
Stimulation<- c(rep("3S", 40))
Drug <- c(rep("STS", 40))
Dose <-c(rep("1uM", 40))
Treatmentsum <-c(rep(paste("Mark","3S",'STS','1uM',sep = "_"), 40))
PiD_treatmentsum <- c(rep(paste('Mark',"T5_20200501",'3S','STS','1uM',sep = "_"), 40))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Mark_3S_STS_1uM.txt",sep="\t", row.names = F,col.names = T)

# rename all the files using their PiD_treatmentsum 
filenames <- list.files("C:/UsersXXX", pattern="*.txt")
outdirectory <- "~/out"
lapply(filenames, function(x) {
df <- read.csv(x,sep="\t", header=TRUE, fill = T,stringsAsFactors = F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
newname <- file.rename(basename(x), b)
write.table(df, paste0(outdirectory,"/", newname, sep="\t", 
          quote=FALSE, row.names=F, col.names=TRUE)
})

Here it says error in unexpected }. I think I must have screwed up the loop.

If I just dissect the code and run one file as an example, the code works:

  df <- read.csv('ctrl_Jack_3S_2DG_3uM.txt',sep="\t", header=TRUE, 
             fill = T,stringsAsFactors=F)

  a <- as.character(unique(df[["PiD_treatmentsum"]]))
  b<-paste0("ctrl_",a, '.txt', sep="")
  basename('ctrl_Jack_3S_2DG_3uM.txt')
  file.rename(basename('ctrl_Jack_3S_2DG_3uM.txt'), b)

```

A little help and explanation will be appreciated :)


Solution

  • This should work:

    create MRE
    #file 1
    Row <- c(rep("16", 20))
    column <- c(rep("3", 20))
    Pid<- c(rep("Jack", 20))
    Stimulation<- c(rep("3S", 20))
    Drug <- c(rep("2DG", 20))
    Dose <-c(rep("3uM", 20))
    Treatmentsum <-c(rep(paste("Jack","3S",'2DG','3uM',sep = "_"), 20))
    PiD_treatmentsum <- c(rep(paste('Jack',"T4_20200501",'3S','2DG','3uM',sep = "_"), 20))
    sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
    write.table(sampleset, file = "ctrl_Jack_3S_2DG_3uM.txt",sep="\t", row.names = F, col.names = T)
    
    #file 2
    Row <- c(rep("16", 40))
    column <- c(rep("3", 40))
    Pid<- c(rep("Mark", 40))
    Stimulation<- c(rep("3S", 40))
    Drug <- c(rep("STS", 40))
    Dose <-c(rep("1uM", 40))
    Treatmentsum <-c(rep(paste("Mark","3S",'STS','1uM',sep = "_"), 40))
    PiD_treatmentsum <- c(rep(paste('Mark',"T5_20200501",'3S','STS','1uM',sep = "_"), 40))
    sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
    write.table(sampleset, file = "ctrl_Mark_3S_STS_1uM.txt",sep="\t", row.names = F,col.names = T)
    

    I only changed the last three lines. We rename the file using file.rename (newname is now TRUE or FALSE if there was an error while renaming)

    Then we create outdirectory (it will raise a warning if dir already exists, but nothing will be overwritten. We could test first if outdir already exists and if so omit the dir.create)

    Finally we use file.copy to copy the renamed file into outdirectory. We can use file.path to concatenate the directory and filename.

    # rename all the files using their PiD_treatmentsum 
    # and copy them to outdirectory
    filenames <- list.files(".", pattern="*M\\.txt")
    outdirectory <- "~/out"
    lapply(filenames, function(x) {
      df <- read.csv(x, sep="\t", header=TRUE, fill = T,stringsAsFactors = F)
      a <- as.character(unique(df[["PiD_treatmentsum"]]))
      b<-paste0("ctrl_",a, '.txt', sep="")
        newname <- file.rename(basename(x), b)
        dir.create(outdirectory)
        file.copy(b, file.path(outdirectory, b))
    })
    

    I'd suggest updating the variable names to something more meaningful to make future refactoring easier ;)