Search code examples
pythonseleniumselenium-webdriverweb-scrapingiframe

selenium get data from script tag


I have a script that allows to enter the chassis number and displays the vehicle information. I want to extract the following information: Marque et type, Dénomination commerciale, Variante and Version. (please see attached screenshot)

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

browser = webdriver.Chrome(executable_path=ChromeDriverManager().install())
browser.get(URL_BASE)  

wait = WebDriverWait(browser, 20)

wait.until(EC.frame_to_be_available_and_switch_to_it(browser.find_element(By.CSS_SELECTOR, "[name='AppWindow']")))

wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='Writable3']"))).send_keys("U5YEH813ACL001327")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[name='btnOphalen']"))).click()
browser.switch_to.default_content()

enter image description here

The problem is that the data I want is in:

<script charset="UTF-8">
        document.write("<FORM ID=DATAFORM METHOD=POST ACTION=" + parent.jspPath + " TARGET=_parent>");
        document.write("<INPUT TYPE=HIDDEN NAME=PAGEDATA>");
        document.write("<INPUT TYPE=HIDDEN NAME=DYNAMIC>");
        document.write("<INPUT TYPE=HIDDEN NAME=SUBSESSIONID>");
        document.write("<INPUT TYPE=HIDDEN NAME=NAME>");
        document.write("<INPUT TYPE=HIDDEN NAME=PSTEPID>");
        document.write("<INPUT TYPE=HIDDEN NAME=WINDOWID>");
        document.write("<INPUT TYPE=HIDDEN NAME=ABRESULT>");
        document.write("</FORM>");
        var base = parent.base;
        base.responsePath=location.href.substring(0,location.href.indexOf(location.pathname));

        function doABCommand(srcPath,srcHref) {
          parent.requestType = "ABREQUEST";
          doReplace(srcPath,srcHref,"&ABRESULT="+parent.abResult);
        }

        function doMultiURL(srcPath,srcHref) {
          parent.requestType = "MULTIURL";
          doReplace(srcPath,srcHref,parent.dataContinue.pop());
        }

        function doDataUpdate(srcPath,srcHref) {
          parent.requestType = "GETUPDATE";
          doReplace(srcPath,srcHref);
        }

        function doBaseUpdate(srcPath,srcHref) {
          parent.requestType = "GETBASE";
          doReplace(srcPath,srcHref);
        }

        function doReplace(srcPath,srcHref,abCmd) {
          pageData = "";
          // MUST USE JSPPATH HERE as location wont have the ;jsessionid information
          var appPath=srcPath.substring(0,srcPath.lastIndexOf('/',srcPath.length)+1);
          var responsePath=srcHref.substring(0,srcHref.indexOf(srcPath))+appPath+parent.jspPath+"?";
          var url=responsePath+"SUBSESSIONID="+parent.subSessionId+"&DYNAMIC="+parent.requestType+"&NAME="+parent.pageName+"&PSTEPID="+parent.pStepId+"&WINDOWID="+parent.windowId+(typeof abCmd=="undefined"?"":abCmd);
          windowUpdateList = new Array();
          location.replace(url);
        }

        function doEvent(rcid, v1, eventtype, pageName) {
          parent.pageName = pageName;
          parent.doPageLauncher = true;
          pageData = parent.getFormData(rcid, v1, eventtype);
          parent.requestType = "DOREQUEST";
          var urlStart=location.pathname;
          url = parent.buildURL(urlStart, pageData, "SUBSESSIONID="+parent.subSessionId+"&DYNAMIC="+parent.requestType+"&NAME="+parent.pageName+"&PSTEPID="+parent.pStepId+"&WINDOWID="+parent.windowId);
          location.replace(url);
        }
    
        function updatePage() {
            base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Writable1.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Writable2.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Writable3.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly1.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly2.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly3.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly4.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly5.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly6.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly7.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Field3.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Field4.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Field2.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Field5.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Field1.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly9.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly8.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly11.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly10.updateAttributes(parent);
    base.CMV_WD_PBL01_CONSULT_STATUS_1795241017_Readonly12.updateAttributes(parent);

          parent.updateFocus(parent.lastFocus);
          parent.move_Controls();
          parent.disable_image();
          if (typeof parent.processDisabledby != 'undefined') {
            parent.processDisabledby();
          }
          if ( parent.AppWindow.userOnLoad ) {
            parent.AppWindow.userOnLoad();
          }
          parent.eventinProcess = false;
        } 
   
        // Alerts
        
        // Data code
        if ( base.CMV_WD_PBL01_CONSULT_STATUS_1795241017 ) {
                base.addWindowToUpdateList("CMV_WD_PBL01_CONSULT_STATUS",1795241017);
        parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Writable1',"")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Writable2',"")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Writable3',"U5YEH813ACL001327")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('CommandButton','1795241017','CMV_WD_PBL01_CONSULT_STATUS','btnOphalen',"","visible",null));
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly1',"Marque et type : KIA VENGA")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly2',"Dénomination commerciale : KIA VENGA")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly3',"WVTA / PVA : e4*2007/46*0261*02")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly4',"Variante : F5D41")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly5',"Version : M61BZ1")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly6',"Catégorie : M1 - VEH.POUR LE TRANSPORT DE PASSAGERS,OUTRE LE SIEGE DU CONDUCTEUR,8 PLACES ASSISES AU MAXIMUM")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly7',"Code carrosserie : AF - VEHICULE A USAGES MULTIPLES")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Field3',"CO2 NEDC combinées: 114")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Field4',"CO2 NEDC combinées, pondérées:-")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Field2',"CO2 WLTP combinées: -")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Field5',"CO2 WLTP combinées, pondérées: -")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Field1',"Euronorm: Euro 5")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly9',"Date de la dernière immatriculation : 27-06-2014")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly8',"Date de pre-enregistrement : 22-12-2011 (Fiscal OK)")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly11',"Date de fin de validité du contrôle technique : 17-10-2022")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly10',"Date de radiation :")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('GroupBox','1795241017','CMV_WD_PBL01_CONSULT_STATUS','GroupBox1',null,null));
    parent.instanciate(new Array('EntryField','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Readonly12',"Le véhicule est complet.")).setAttributes(false,'','','none','noop',null,null);
    parent.instanciate(new Array('GroupBox','1795241017','CMV_WD_PBL01_CONSULT_STATUS','GroupBoxSub1',null,null));
    parent.instanciate(new Array('GroupBox','1795241017','CMV_WD_PBL01_CONSULT_STATUS','GroupBox2',null,null));
    parent.instanciate(new Array('GroupBox','1795241017','CMV_WD_PBL01_CONSULT_STATUS','GroupBoxSub2',null,null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Literal2',null));
    parent.instanciate(new Array('GroupBox','1795241017','CMV_WD_PBL01_CONSULT_STATUS','GroupBoxSub3',null,null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Literal4',null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Title1',null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Literal5',null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Literal12',null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Literal11',null));
    parent.instanciate(new Array('StaticText','1795241017','CMV_WD_PBL01_CONSULT_STATUS','Literal13',null));
    parent.instanciate(new Array('GroupBox','1795241017','CMV_WD_PBL01_CONSULT_STATUS','GroupBox4',null,null));
}
else {  if( parent.doPageLauncher==false){
     alert ('Page has encountered a problem and must close') 
    parent.base.closeChildren(); 

    base.location.replace((typeof parent.context=='undefined'?'':parent.context)+"ief_EndPage.html");

}}
    if (parent.doPageLauncher == true) {
        base.updateWindowData();
    }

        // Window closure code
        
        // Page Launch code
        
        parent.loadController=false;
        // Errors
        
      </script>

I have tried :

wait = WebDriverWait(browser, 20)
wait.until(EC.frame_to_be_available_and_switch_to_it(browser.find_element(By.CSS_SELECTOR, "[name='Controller']")))
a = browser.find_elements(By.TAG_NAME, "script")[0].get_attribute('outerHTML')

But it dosen't work.


Solution

  • The elements you trying to access here are still inside that iframe, so you do not need to switch to the default content after the click.
    Also, each those elements containing the data you are trying to get are input elements enter image description here

    And the data inside those elements is those elements value, not text.
    In my sample code I'm extracting and printing the first 2 fields data.
    It works

    import time
    
    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    
    options = Options()
    options.add_argument("start-maximized")
    
    
    webdriver_service = Service('C:\webdrivers\chromedriver.exe')
    driver = webdriver.Chrome(service=webdriver_service, options=options)
    url = 'https://www.mobilit.fgov.be/WebdivPub_FR/wmvpstv1_fr?SUBSESSIONID=16382865'
    driver.get(url)
    wait = WebDriverWait(driver, 20)
    
    wait.until(EC.frame_to_be_available_and_switch_to_it(driver.find_element(By.CSS_SELECTOR, "[name='AppWindow']")))
    
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='Writable3']"))).send_keys("U5YEH813ACL001327")
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[name='btnOphalen']"))).click()
    time.sleep(2)
    val1 = driver.find_element(By.CSS_SELECTOR, "input#Readonly1").get_attribute("value")
    print(val1)
    val2 = driver.find_element(By.CSS_SELECTOR, "input#Readonly2").get_attribute("value")
    print(val2)