Search code examples
pythonselenium-webdriverweb-scraping

How to obtain data from an IFRAME with Python and Selenium


I am trying to obtain a value from this page: https://www.bbva.com.co/personas/productos/inversion/fondos/pais.html

I the imagen, I show you what I need to obtain.

Inspected page

The first thing that I see is that this value is inside an Iframe with the class = "iframe_base ".

I tried the next code to extract the value, but it did not work properly, because I did not obtain nothing.

I am using selenium and the webdriver of Microsoft Edge.

What am I doing wrong? and How can I obtain what I need?

Thank´s.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
import time

# Configura el controlador de Edge
edge_options = Options()
# edge_options.add_argument("--headless") 
service = Service("C:/Users/PERSONAL/Downloads/msedgedriver.exe")  
driver = webdriver.Edge(service=service, options=edge_options)

# Abre la página web
driver.get("https://www.bbva.com.co/personas/productos/inversion/fondos/pais.html")  # Reemplaza con la URL real

# Espera hasta que el iframe esté presente
time.sleep(5)  # Espera 5 segundos, ajusta según sea necesario

print("Seleccionamos el IFRAME")
iframe1 = driver.find_element(By.XPATH, "//*[@id = 'content-iframe_copy']")

print("Cambiamos el foco el IFRAME")
driver.switch_to.frame(iframe1)

print("Obtener HTML del IFRAME")
html = driver.page_source
print(html)

print("Obtener el dato")
dato = driver.find_elements(By.TAG_NAME, "g")
print(dato)

driver.quit()

Solution

  • u can use non selenium solution:

    import requests
    import json
    
    headers = {
        'accept': 'application/json, text/plain, */*; q=0.01',
        'content-type': 'application/json',
    }
    
    
    def get_tsec() -> str:
        url = "https://www.bbva.es/ASO/TechArchitecture/grantingTickets/V02"
        payload = json.dumps({
            "authentication": {
                "consumerID": "30000032",
                "userID": "IMMH032",
                "authenticationType": "04",
                "authenticationData": [
                    {
                        "authenticationData": [
                            "WAMH032P"
                        ],
                        "idAuthenticationData": "password"
                    }
                ]
            }
        })
        response = requests.request("POST", url, headers=headers, data=payload)
        return response.headers['tsec']
    
    
    def get_value(pair_name: str, tsec: str) -> float:
        url = f"https://www.bbva.es/ASO/management-entity-funds/v1/management-entities-funds/{pair_name}"
        headers['tsec'] = tsec
        response = requests.get(url, headers=headers)
        for x in response.json()['data']['netAssetValues']:
            if x['netAssetValueType']['id'] == 'LAST_DATE':
                return x['netAssetValueAmount']['amount']
    
    
    print(get_value('CCAPAISCB', get_tsec()))
    

    OUTPUT:

    21128.849089