python selenium-webdriver web-scraping alfresco-webscripts ironwebscraper

web scrapping Proplem list object has no attribute timeout

I am trying to do web scrapping using Selenium, I want to download images from Google but I have more than one issue:

I got this issue AttributeError: 'list' object has no attribute 'timeout'.
I can't deal with base64 image and can't download it.
I have an issue with the function name download_image in my code.
also there are URLs contain image ural ,how to download it anyone can help me

here is my Code :

from urllib.parse import urlparse
from selenium import webdriver
import time as t
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time as t
import urllib
import base64
try:
    os.mkdir("G:/Smokking_Project")    
except:
    pass

name="smoked"

chrome_options = webdriver.ChromeOptions() 
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
#driver = webdriver.Chrome(executable_path='chromedriver.exe',options=chrome_options)  
driver = webdriver.Chrome(options=chrome_options)  
wait = WebDriverWait(driver, 5)

strr="https://www.google.com/search?q=smokinng&tbm=isch&ved=2ahUKEwi8k9zn9eOBAxVtlycCHTa_DnUQ2-cCegQIABAA&oq=smokinng&gs_lcp=CgNpbWcQAzIJCAAQGBCABBAKMgkIABAYEIAEEAoyCQgAEBgQgAQQCjoECCMQJzoFCAAQgAQ6BggAEAUQHjoECAAQHjoICAAQgAQQsQM6BAgAEAM6BwgAEBgQgARQjwdY8xJg-RloAHAAeACAAb0BiAHsCZIBAzAuOZgBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=uUwhZfzSFO2unsEPtv66qAc&bih=723&biw=1517&hl=en"

driver.get(strr)
t.sleep(3)
links=[]
x=1
last_height=0

def download_image(url,filename):
        resource = urllib.request.urlopen(url)
        output = open(filename,"wb")
        output.write(resource.read())
        output.close()
    
while True:
     driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
     t.sleep(4)    
     #try:
     img_link = wait.until(EC.presence_of_all_elements_located((By.XPATH,'//a[1]/div[1]/img')))
     t.sleep(1)

     for img in img_link:
        url = img.get_attribute('src')
        if url not in links:
            links.append(url)
            print (url)
            
            try:
                os.mkdir('G://Smokking_Project//'+name)
            except:
                pass
            try:
                os.mkdir('G://Smokking_Project//'+name)
            except:
                pass
            file_name='Smokking_Project//'+name+'//'+str(x)+'.jpg'
            download_image(img_link,file_name)
            
            x+=1
        #except:
            #print('-',end='')
     new_height = driver.execute_script("return document.body.scrollHeight")
     print(new_height)
     if new_height == last_height:
        break
     last_height = new_height
            
driver.close()`

below is full error

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
c:\Users\Geka\Desktop\openCV\vision_ahmed_ibrahim\webscrapping\webscrapping_Google.ipynb Cell 6 line 6
     62         pass
     63     file_name='Smokking_Project//'+name+'//'+str(x)+'.jpg'
---> 64     download_image(img_link,file_name)
     66     x+=1
     67 #except:
     68     #print('-',end='')

c:\Users\Geka\Desktop\openCV\vision_ahmed_ibrahim\webscrapping\webscrapping_Google.ipynb Cell 6 line 3
     33 def download_image(url,filename):
---> 34         resource = urllib.request.urlopen(url)
     35         output = open(filename,"wb")
     36         output.write(resource.read())

File c:\Users\Geka\anaconda3\Lib\urllib\request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    214 else:
    215     opener = _opener
--> 216 return opener.open(url, data, timeout)

File c:\Users\Geka\anaconda3\Lib\urllib\request.py:509, in OpenerDirector.open(self, fullurl, data, timeout)
    506     if data is not None:
    507         req.data = data
--> 509 req.timeout = timeout
    510 protocol = req.type
    512 # pre-process request

AttributeError: 'list' object has no attribute 'timeout'

Solution

Errors we have Identified in your code are

1)AttributeError: 'list' object has no attribute 'timeout': This error occurs because you're passing a list of elements (img_link) to the download_image function instead of a single URL string. You need to pass the url variable to the function instead.

2)Handling Base64 images: To handle Base64 images, you need to decode the Base64 string and save it as an image file.

3)Downloading URLs that contain image URLs: You can download images from URLs by sending an HTTP request to the URL and saving the response content.

Let's modify your code to address these issues:

import os
import time
import urllib.request
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

try:
    os.mkdir("G:/Smokking_Project")
except FileExistsError:
    pass

name = "smoked"

chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
driver = webdriver.Chrome(options=chrome_options)
wait = WebDriverWait(driver, 5)

strr = "https://www.google.com/search?q=smokinng&tbm=isch&ved=2ahUKEwi8k9zn9eOBAxVtlycCHTa_DnUQ2-cCegQIABAA&oq=smokinng&gs_lcp=CgNpbWcQAzIJCAAQGBCABBAKMgkIABAYEIAEEAoyCQgAEBgQgAQQCjoECCMQJzoFCAAQgAQ6BggAEAUQHjoECAAQHjoICAAQgAQQsQM6BAgAEAM6BwgAEBgQgARQjwdY8xJg-RloAHAAeACAAb0BiAHsCZIBAzAuOZgBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=uUwhZfzSFO2unsEPtv66qAc&bih=723&biw=1517&hl=en"

driver.get(strr)
time.sleep(3)

x = 1
last_height = 0

def download_image(url, filename):
    resource = urllib.request.urlopen(url)
    output = open(filename, "wb")
    output.write(resource.read())
    output.close()

while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(4)
    
    img_links = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//a[1]/div[1]/img')))
    time.sleep(1)

    for img in img_links:
        url = img.get_attribute('src')
        if url:
            if url.startswith('data:image'):
                # Decode Base64 image and save it
                img_data = url.split(',')[1]
                img_data = img_data.encode()
                filename = f'G:/Smokking_Project/{name}/{x}.jpg'
                with open(filename, 'wb') as f:
                    f.write(base64.b64decode(img_data))
            else:
                # Download image from URL
                try:
                    os.makedirs(f'G:/Smokking_Project/{name}', exist_ok=True)
                except FileExistsError:
                    pass
                filename = f'G:/Smokking_Project/{name}/{x}.jpg'
                download_image(url, filename)
            x += 1
    
    new_height = driver.execute_script("return document.body.scrollHeight")
    print(new_height)
    if new_height == last_height:
        break
    last_height = new_height

driver.close()

This code should address the issues you mentioned. It handles Base64 images by decoding them and saves them as image files. Additionally, it downloads images from URLs by sending an HTTP request and saving the response content.