How do I edit driver current url so links that goes from:
http://centrebet.com/Sports/12313443
to the following: http://centrebet.com/#Sports/12313443
http://centrebet.com/
and /Sports/
is constant
I've found a lot of examples with static links but I'm at a loss as to how I would do that with a list of scraped current urls.
Code:
driver = webdriver.Chrome()
url = "http://centrebet.com/"
driver.get(url)
def page_counter():
for x in range(1000):
yield x
count = page_counter()
driver.get(url)
sports = driver.find_element_by_id("accordionMenu1_ulSports")
links = [url + link.get_attribute("onclick").replace("menulink('", "").replace("')", "") for link in sports.find_elements_by_xpath('//ul[@id="accordionMenu1_ulSports"]//li//ul//li//ul//li//a[starts-with(@onclick, "menulink")]')]
links = dict((next(count) + 1, e) for e in links)
desc_links = collections.OrderedDict(sorted(links.items(), reverse=True))
for key, value in desc_links.items():
try:
driver.get(value)
...
langs4 = driver.find_elements_by_css_selector("tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > table > tbody > tr > td:nth-child(1) > div > div")
langs4_text = []
for lang in langs4:
# print(lang.text)
langs4_text.append(lang.text)
url1 = driver.current_url
try:
import urlparse
from urllib import urlencode
except:
import urllib.parse as urlparse
from urllib.parse import urlencode
url = "http://centrebet.com/"
params = {'#':'#','Sports':'Sports'}
url_parts = list(urlparse.urlparse(url))
query = dict(urlparse.parse_qsl(url_parts[4]))
query.update(params)
url_parts[4] = urlencode(query)
print(urlparse.urlunparse(url_parts))
with open('C:\\O131.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs4_text):
writer.writerow(row + (url1,))
except TimeoutException as ex:
pass
Still not sure I understand exactly what you want to do. But if it's only about adding #
in URL then you can simply aplly this solution:
url = "http://centrebet.com/"
current_url = driver.current_url # http://centrebet.com/Sports/12313443
new_url = url + "#".join(current_url.split(url)) # http://centrebet.com/#Sports/12313443
or
url = "http://centrebet.com/"
current_url = driver.current_url # http://centrebet.com/Sports/12313443
new_url = current_url.replace(url, url + "#") # http://centrebet.com/#Sports/12313443