Search code examples
rfor-looprstudiok-meansrtvs

Loop URL R too many open files


I have the following file with URLs in it. The Idea is to download image from URL, get a 6 color palette, get the color names and percentages and bind them all together in a list alongside product number. But I get the "too many files" error.

library(readxl)
library(jpeg)
library(scales)
library(plotrix)
library(gridExtra)
library(dplyr)
library(data.table)
dataset = read_excel("C:/Temp/Product.xlsx", sheet = "All")
datalist = list()
nRowsDf <- nrow(dataset)
avector <- as.vector(dataset$URL)
varenummer <- as.vector(dataset$Varenr)
for (i in 1:nRowsDf) {  
  tryCatch({
#Convert this from Data.frame to Vector
Sku <- as.vector(varenummer[[i]])
download.file(avector[[i]], paste(Sku,".jpg" ,sep = ""), mode = "wb")
painting <- readJPEG(paste(Sku,".jpg" ,sep = ""))

dimension <- dim(painting)
painting_rgb <- data.frame(
  x = rep(1:dimension[2], each = dimension[1]),
  y = rep(dimension[1]:1, dimension[2]),
  R = as.vector(painting[,, 1]), #slicing array into RGB Channels
  G = as.vector(painting[,, 2]),
  B = as.vector(painting[,, 3])
)


k_means = kmeans(painting_rgb[, c("R", "G", "B")], algorithm = "Lloyd", centers = 6, iter.max = 300)
test = (sapply(rgb(k_means$centers), color.id))

Color = lapply(test, `[[`, 1)
Values = k_means$size
Percentage = k_means$size / sum(k_means$size)
Final = do.call(rbind, Map(data.frame, Color = lapply(test, `[[`, 1), Values = k_means$size, ProductNumber = Sku, Percentage = Percentage))
Final$i <- i #  iteration 
datalist[[i]] <- Final # add iteration to list
big_data = rbindlist(datalist)
#grid.table(big_data)
write.table(big_data, file = "myDF.csv", sep = ",", col.names = TRUE, append = TRUE)


#R = Final[with(Final, order(-Percentage)),]
}, error = function(e) { closeAllConnections() })
closeAllConnections() 

}

Code stops after downloading around 266 unique JPEG images.

This code downloads only JPG files, if another file type is return it will simply ignore it.

Error :

Error in file(file, ifelse(append, "a", "w")) : 
cannot open the connection
In addition: Warning message:
In file(file, ifelse(append, "a", "w")) :
cannot open file 'myDF.csv': Too many open files

If I remove the trycatch i get these:

Error in download.file(avector[[i]], "image.jpg", mode = "wb") : 
cannot open destfile 'image.jpg', reason 'Too many open files'

Solution

  • The code had an error or better said an unnecessary step, that keep open connections until it reach the limit impose by "file".

    By simply removing the iteration steps and rbind datalist, it run flawless.

    Below the modified version.

    for (i in 1:nRowsDf) {
    tryCatch({
        #Convert this from Data.frame to Vector
    
        Sku <- as.vector(varenummer[[i]]) #for testing use 23406
        download.file(avector[[i]], paste(Sku, ".jpg", sep = ""), mode = "wb")
        # painting <- readJPEG(paste(Sku,".jpg" ,sep = ""))
    
        painting = load.image(paste(Sku, ".jpg", sep = ""))
        dimension <- dim(painting)
        painting_rgb <- data.frame(
      x = rep(1:dimension[2], each = dimension[1]),
      y = rep(dimension[1]:1, dimension[2]),
      R = as.vector(painting[,, 1]), #slicing our array into three
      G = as.vector(painting[,, 2]),
      B = as.vector(painting[,, 3])
    )
    
    
        k_means = kmeans(painting_rgb[, c("R", "G", "B")], algorithm = "Lloyd", centers = 6, iter.max = 300)
    test = (sapply(rgb(k_means$centers), color.id))
    
        Color = lapply(test, `[[`, 1)
    Values = k_means$size
    Percentage = k_means$size / sum(k_means$size)
    Final = do.call(rbind, Map(data.frame, Color = lapply(test, `[[`, 1), Values =     k_means$size, ProductNumber = Sku, Percentage = Percentage))
        #Final$i <- i # maybe you want to keep track of which iteration produced it?
        #datalist[[i]] <- Final # add it to your list
        #big_data = rbindlist(datalist)
        #grid.table(big_data)
        write.table(Final, file = "myDF.csv", sep = ",", col.names = TRUE, append = TRUE)
    
    
        #R = Final[with(Final, order(-Percentage)),]
    }, error = function(e) { closeAllConnections() })
     closeAllConnections()
    
    }