Search code examples
pythonweb-crawlergoogle-crawlers

how to download the image from google and rename the image with keywords at the same time in google-images-download


how to download the image from google and rename the image with keywords at the same time when using google-images-download? While using this package, the name is generated from the Image URL enter image description here

what I except is that the image can be named with the Item name Could anyone help me ?

here is code:

from google_images_download import google_images_download

response = google_images_download.googleimagesdownload()
arguments = {"keywords":"cat, dog, pig", "limit":1, "print_urls":True, "image_directory":'home1', "size":"large"} 
absolute_image_paths = response.download(arguments)

Solution

  • you need to override the methods in google_images_download including: download_image, _get_all_items, download. _get_all_items and download need to post download_image

    # Download Images
        def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,search_term):
            if print_urls:
                print("Image URL: " + image_url)
            try:
                req = Request(image_url, headers={
                    "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
                try:
                    # timeout time to download an image
                    if socket_timeout:
                        timeout = float(socket_timeout)
                    else:
                        timeout = 10
    
                    response = urlopen(req, None, timeout)
                    data = response.read()
                    response.close()
                    # keep everything after the last '/'
                    # for keyword in keywords.split(','):
                    #     image_name = str(keyword)
                    #     print(image_name)
                    image_name = search_term + "." + "jpeg"
                    print(image_name,'XXXxXXXXx')
    
    
                    # image_name = str(image_url[(image_url.rfind('/')) + 1:])
                    # image_name = image_name.lower()
                    # if no extension then add it
                    # remove everything after the image name
                    # if image_format == "":
                    #     image_name = image_name + "." + "jpg"
                    # elif image_format == "jpeg":
                    #     image_name = image_name[:image_name.find(image_format) + 4]
                    # else:
                    #     image_name = image_name[:image_name.find(image_format) + 3]
    
                    # prefix name in image
                    if prefix:
                        prefix = prefix + " "
                    else:
                        prefix = ''
    
                    # if no_numbering:
                    path = main_directory + "/" + dir_name + "/" + prefix + image_name
                    # else:
                    # path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name
    
                    print(path)
    
                    try:
                        output_file = open(path, 'wb')
                        output_file.write(data)
                        output_file.close()
                        absolute_path = os.path.abspath(path)
                    except OSError as e:
                        download_status = 'fail'
                        download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
                        return_image_name = ''
                        absolute_path = ''
    
                    #return image name back to calling method to use it for thumbnail downloads
                    download_status = 'success'
                    download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name
                    return_image_name = prefix + str(count) + ". " + image_name
    
                    # image size parameter
                    if print_size:
                        print("Image Size: " + str(self.file_size(path)))
    
                except UnicodeEncodeError as e:
                    download_status = 'fail'
                    download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
                    return_image_name = ''
                    absolute_path = ''
    
                except URLError as e:
                    download_status = 'fail'
                    download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
                    return_image_name = ''
                    absolute_path = ''
    
            except HTTPError as e:  # If there is any HTTPError
                download_status = 'fail'
                download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''
    
            except URLError as e:
                download_status = 'fail'
                download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''
    
            except ssl.CertificateError as e:
                download_status = 'fail'
                download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''
    
            except IOError as e:  # If there is any IOError
                download_status = 'fail'
                download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''
    
            except IncompleteRead as e:
                download_status = 'fail'
                download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''
    
            return download_status,download_message,return_image_name,absolute_path