TARGET : Check whether a list of files have same encoding before import and rbind ,if not the same STOP run
# files list & check encoding
FL_PATH <- list.files(path,pattern = "*.csv", = T)
# if there is "UTF-8" , STOP RUN , if "Shift_JIS" , RUN the next scripts below :
# import
DT <- rbindlist(lapply(FL_PATH ,import,sep=",",setclass = "data.table"))
# OVER 500 rows to run if the files are same encoding to rbind
# result of --lapply(FL_PATH,guess_encoding)
> lapply(FL_PATH,guess_encoding)
# A tibble: 3 x 2
encoding confidence
<chr> <dbl>
1 Shift_JIS 0.8
2 GB18030 0.76
3 Big5 0.46
# A tibble: 3 x 2
encoding confidence
<chr> <dbl>
1 GB18030 0.82
2 UTF-8 0.8
3 Big5 0.44
First, get the most probable encoding:
enc <- sapply(FL_PATH,function(x) guess_encoding(x)$encoding[1])
Then, if any of the files are UTF-8, stop execution.
stop('UTF-8 present') # This will stop with an error if true
# Now, read files and rbind
dlist <- lapply(FL_PATH,read_csv)
DT <- rbindlist(dlist)