Search code examples
pythonregexseleniumurl-rewritingurlparse

URL list from driver.current url


How do I edit driver current url so links that goes from: http://centrebet.com/Sports/12313443 to the following: http://centrebet.com/#Sports/12313443

http://centrebet.com/ and /Sports/ is constant

I've found a lot of examples with static links but I'm at a loss as to how I would do that with a list of scraped current urls.

Code:

driver = webdriver.Chrome()

url = "http://centrebet.com/"
driver.get(url)

def page_counter():
  for x in range(1000):
      yield x

count = page_counter()
driver.get(url)
sports = driver.find_element_by_id("accordionMenu1_ulSports")
links = [url + link.get_attribute("onclick").replace("menulink('", "").replace("')", "") for link in sports.find_elements_by_xpath('//ul[@id="accordionMenu1_ulSports"]//li//ul//li//ul//li//a[starts-with(@onclick, "menulink")]')]



links = dict((next(count) + 1, e) for e in links)

desc_links = collections.OrderedDict(sorted(links.items(), reverse=True))
for key, value in desc_links.items():
    try:
        driver.get(value)
        ...



        langs4 = driver.find_elements_by_css_selector("tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > table > tbody > tr > td:nth-child(1) > div > div")
        langs4_text = []
        for lang in langs4:
            # print(lang.text)
            langs4_text.append(lang.text)

        url1 = driver.current_url

try:
    import urlparse
    from urllib import urlencode
except: 
    import urllib.parse as urlparse
    from urllib.parse import urlencode

url = "http://centrebet.com/"
params = {'#':'#','Sports':'Sports'}

url_parts = list(urlparse.urlparse(url))
query = dict(urlparse.parse_qsl(url_parts[4]))
query.update(params)

url_parts[4] = urlencode(query)

print(urlparse.urlunparse(url_parts))







        with open('C:\\O131.csv', 'a', newline='', encoding="utf-8") as outfile:
            writer = csv.writer(outfile)
            for row in zip(langs4_text):
                writer.writerow(row + (url1,))
    except TimeoutException as ex:
        pass

Solution

  • Still not sure I understand exactly what you want to do. But if it's only about adding # in URL then you can simply aplly this solution:

    url = "http://centrebet.com/"
    current_url = driver.current_url # http://centrebet.com/Sports/12313443
    new_url = url + "#".join(current_url.split(url)) # http://centrebet.com/#Sports/12313443 
    

    or

    url = "http://centrebet.com/"
    current_url = driver.current_url # http://centrebet.com/Sports/12313443
    new_url = current_url.replace(url, url + "#") # http://centrebet.com/#Sports/12313443