Search code examples
pythonhtmlweb-scrapingxpathinstagram

Trying to Scrape Instagram Post Data from .csv with links - For Masters Thesis


I am trying to scrape instagram post data (number of likes, Caption, Hashtags, Mentions and number of comments) from a collection of links in a .csv for data analysis to put towards my Masters Thesis. however i am coming across an error where the xpath or element cannot be found. Here is the error message:

selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button"}

Here is the code block i have written using selenium:

def scrape_post_data():
influencerpostsdata = []
# Specify the path to chromedriver.exe
chromedriver_path = r"C:\\Users\\stuar\\Instagram Scraper\\ChromeDrivers\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chromedriver_path)
time.sleep(2)
    
# Open the webpage
url = "https://www.instagram.com"
driver.get(url)
time.sleep(3)
    
# Alert number 1
time.sleep(5)
alert = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept All")]'))).click()
    
# Target Username Entry
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
    
# Enter Username and Password
login_username = str(enter_loginusername_entry.get())
login_password = str(enter_password_entry.get())
username.clear()
username.send_keys(login_username)
password.clear()
password.send_keys(login_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
    
# Alert number 2
time.sleep(5)
alert2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
    
# Alert number 3
time.sleep(5)
alert3 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()    
       
with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)

for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)

try:
# This captures the standard like count.
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button""").text.split()[0]
post_type = 'photo'
except:
# This captures the like count for videos which is stored
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/span""").text.split()[0]
post_type = 'video'
age = driver.find_element_by_css_selector('a time').text
comment = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span""").text
hashtags = find_hashtags(comment)
mentions = find_mentions(comment)
post_details = {'link': url, 'type': post_type, 'likes/views': likes,
'age': age, 'comment': comment, 'hashtags': hashtags,
'mentions': mentions}
time.sleep(10)
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerposts)
print(df)
df.to_csv('influencerpostsdata.csv')
            
driver.close() 

Solution

  • Not To worry i have resolved the problem..

     with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
        csv_reading = csv.reader(csv_file)
    
        for line in csv_reading:
            links = line[1]
            try:
                Page = driver.get(links)
            except Exception as e:
                Page = None
                time.sleep(20)
    
            try:    
                likes = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/div/a/span')
            except Exception as e:
                likes = None
            try:
                likes2 = likes.text
            except Exception as e:
                likes2 = None
                time.sleep(20)    
            try:    
                age = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[2]/a/time')
            except Exception as e:
                age = None
            try:
                age2 = age.text
            except Exception as e:
                age2 = None
                time.sleep(20)  
                
            try:    
                caption = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[1]/ul/div/li/div/div/div[2]/span')    
            except Exception as e:
                caption = None
            try:
                caption2 = caption.text
            except Exception as e:
                caption2 = None
                time.sleep(20)   
                
            try:
                AccountName = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/header/div[2]/div[1]/div/span/a')
            except Exception as e:
                AccountName = None
            try:
                AccountName2 = AccountName.text
            except Exception as e:
                AccountName2 = None
                time.sleep(20)
                
            post_details = {'Username': AccountName2,'Caption': caption2, 'Likes/Views': likes2,
                            'Age': age2 }
            
    
            #turning data into a .csv file
            influencerpostsdata.append(post_details)
            df = pd.DataFrame(influencerpostsdata)
            print(df)
            df.to_csv('influencerpostsdata.csv')
            
    driver.close()