I'm trying to get universities names, from an OpenStreetMap embedded to this website https://collegecrisis.shinyapps.io/dashboard/.
I have tried to automate this task using Python Selenium Library, and I hover over all universities one by one and take their names, it seemed fine but when I took a deep look, I found some wrong data, I think that happened when the script tried to hover over a spot which was full of universities, which made it hover over a different university and take its name, I thought about zooming than taking the name than zooming out, but this will take really too long and may cause some run time errors by missing a zoom in or a zoom out.
I don't have a considerable knowledge in maps, so I want to ask if there is any way to take the names of the marked universities on the map at once.
If anyone needs the code I tried, it was this one:
from selenium import webdriver
from bs4 import BeautifulSoup
import lxml
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
# setup drivers
PATH = "/Applications/chromedriver"
driver = webdriver.Chrome(PATH)
driver.implicitly_wait(10) # seconds
driver.get("https://collegecrisis.shinyapps.io/dashboard/")
# find all class elements =leaflet-interactive
nodes = driver.find_elements_by_class_name("leaflet-interactive")
# use actionchains
nodelist = []
# loop through each node
for node in nodes:
ActionChains(driver).move_to_element(node).perform() # Used actionchains class to click to open popup
sleep(.5)
nodelist.append(BeautifulSoup(driver.page_source, 'lxml').find(class_=lambda value: value and 'leaflet-tooltip leaflet-zoom-animated' in value).text.lower())
which was inspired from this one
This service uses http streaming. It will just open an http connection on the following endpoint:
POST https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_streaming
And it will send commands using the following endpoint :
POST https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_send
You can check the result looking for xhr_streaming
in the network tab in Chrome development console.
The token
is retrieved from another http call on :
GET https://collegecrisis.shinyapps.io/dashboard/{workerIDFull}__token__
while the workerID
is present in the original page itself
Some parameters named singletons are necessary and are also located in the original page in a script
tag like this :
<script type="application/shiny-singletons">fafb5589cb5a9f24485f3df0511b50d5cd0c7497,603e796bcfc2ab3685167d58c426f64c15a95192</script>
The following script :
'["0#0|o|"]'
The complete code :
import requests
from bs4 import BeautifulSoup
import re
import time
from random import choice
from string import ascii_letters,digits
from threading import Thread
from time import sleep
import json
session = requests.Session()
r = session.get("https://collegecrisis.shinyapps.io/dashboard/")
soup = BeautifulSoup(r.content, "lxml")
singletons = soup.find("script", {"type":"application/shiny-singletons"}).text
workerIDFull = soup.find("base")["href"]
workerID = re.search('_w_(\w+)', workerIDFull).group(1)
timestamp = int(round(time.time() * 1000))
r = session.get(f"https://collegecrisis.shinyapps.io/dashboard/{workerIDFull}__token__",
params = {
"_": timestamp
})
token = r.text
random_token = ''.join(choice(ascii_letters) for i in range(18))
random_token2 = ''.join(choice(ascii_letters) for i in range(8))
random_num = ''.join(choice(digits) for i in range(3))
def getData():
r = requests.Request("POST", f"https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_streaming").prepare()
resp = session.send(r, stream=True)
for line in resp.iter_lines():
if line:
print(line)
splitted = str(line.decode('unicode_escape'))[2:-2].split("|")
if (len(splitted) > 2):
data = json.loads(splitted[2])
if ("values" in data):
print([ t["args"][8] for t in data["values"]["homeMap"]["x"]["calls"] if t["method"] == "addCircles"][0])
def openChannel():
r = session.post(f"https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_send",
data = '["0#0|o|"]', headers = {"Content-Type":"text/plain;charset=UTF-8"})
def sendInit():
data = json.dumps({
"method":"init",
"data":{
"sidebarItemExpanded":None,
"sidebarCollapsed":True,
"resetAll:shiny.action":0,
"fallResetAll:shiny.action":0,
"lawResetAll:shiny.action":0,
".clientdata_output_authModal_hidden":False,
".clientdata_output_homefullOnlineVB_hidden":False,
".clientdata_output_homepOnlineVB_hidden":False,
".clientdata_output_homeHybridVB_hidden":False,
".clientdata_output_homepPersonVB_hidden":False,
".clientdata_output_homePersonVB_hidden":False,
".clientdata_output_homeTBDVB_hidden":False,
".clientdata_output_homeOtherVB_hidden":False,
".clientdata_output_homeTotalShownVB_hidden":False,
".clientdata_output_homeMap_hidden":False,
".clientdata_output_graphStateFilter_hidden":True,
".clientdata_output_fallBarGraph_hidden":True,
".clientdata_output_covidAthleticGraph_hidden":True,
".clientdata_output_schoolCovidPlot_hidden":True,
".clientdata_output_intlFilter_hidden":True,
".clientdata_output_intlGraph_hidden":True,
".clientdata_output_facultyBarGraph_hidden":True,
".clientdata_output_stateTrendsGraph_hidden":True,
".clientdata_output_covidHeatmap_hidden":True,
".clientdata_output_announceHeatmap_hidden":True,
".clientdata_output_onlineHeatmap_hidden":True,
".clientdata_output_springBreak_hidden":True,
".clientdata_output_peerInstPicker_hidden":True,
".clientdata_output_statusFilter_hidden":True,
".clientdata_output_rankcatFilter_hidden":True,
".clientdata_output_hospitalFilter_hidden":True,
".clientdata_output_covidFilter_hidden":True,
".clientdata_output_campusTypeFilter_hidden":True,
".clientdata_output_sectorFilter_hidden":True,
".clientdata_output_ccbasicFilter_hidden":True,
".clientdata_output_divisionFilter_hidden":True,
".clientdata_output_conferenceFilter_hidden":True,
".clientdata_output_sizeSlider_hidden":True,
".clientdata_output_resHallSlider_hidden":True,
".clientdata_output_sportsRevenueSlider_hidden":True,
".clientdata_output_intlSlider_hidden":True,
".clientdata_output_onlineVB_hidden":True,
".clientdata_output_announcedVB_hidden":True,
".clientdata_output_noDecisionVB_hidden":True,
".clientdata_output_totalVB_hidden":True,
".clientdata_output_dateSlider_hidden":True,
".clientdata_output_springMap_hidden":True,
".clientdata_output_fallPeerInstPicker_hidden":True,
".clientdata_output_fallStatusFilter_hidden":True,
".clientdata_output_fallRankcatFilter_hidden":True,
".clientdata_output_fallFacultyFilter_hidden":True,
".clientdata_output_fallHospitalFilter_hidden":True,
".clientdata_output_fallCovidFilter_hidden":True,
".clientdata_output_fallCampusTypeFilter_hidden":True,
".clientdata_output_fallSectorFilter_hidden":True,
".clientdata_output_fallCcbasicFilter_hidden":True,
".clientdata_output_fallStaffFilter_hidden":True,
".clientdata_output_fallDivisionFilter_hidden":True,
".clientdata_output_fallConferenceFilter_hidden":True,
".clientdata_output_fallSizeSlider_hidden":True,
".clientdata_output_fallResHallSlider_hidden":True,
".clientdata_output_fallSportsRevenueSlider_hidden":True,
".clientdata_output_fallIntlSlider_hidden":True,
".clientdata_output_fallfullOnlineVB_hidden":True,
".clientdata_output_fallpOnlineVB_hidden":True,
".clientdata_output_fallHybridVB_hidden":True,
".clientdata_output_fallpPersonVB_hidden":True,
".clientdata_output_fallPersonVB_hidden":True,
".clientdata_output_fallTBDVB_hidden":True,
".clientdata_output_fallOtherVB_hidden":True,
".clientdata_output_fallTotalShownVB_hidden":True,
".clientdata_output_fallMap_hidden":True,
".clientdata_output_greFilter_hidden":True,
".clientdata_output_modelFilter_hidden":True,
".clientdata_output_planFilter_hidden":True,
".clientdata_output_videoPlatformFilter_hidden":True,
".clientdata_output_lawSectorFilter_hidden":True,
".clientdata_output_lawMinoritySlider_hidden":True,
".clientdata_output_lawLSATtwofiveSlider_hidden":True,
".clientdata_output_lawLSATmedianSlider_hidden":True,
".clientdata_output_lawLSATsevenfiveSlider_hidden":True,
".clientdata_output_lawAcceptanceSlider_hidden":True,
".clientdata_output_lawFYSlider_hidden":True,
".clientdata_output_lawFullOnlineVB_hidden":True,
".clientdata_output_lawPartialOnlineVB_hidden":True,
".clientdata_output_lawHybridVB_hidden":True,
".clientdata_output_lawPersonVB_hidden":True,
".clientdata_output_lawNDVB_hidden":True,
".clientdata_output_lawTotalVB_hidden":True,
".clientdata_output_lawMap_hidden":True,
".clientdata_output_intlOnlineVB_hidden":True,
".clientdata_output_intlHybridVB_hidden":True,
".clientdata_output_intlInPersonVB_hidden":True,
".clientdata_output_intlCovidVB_hidden":True,
".clientdata_output_intlTBDVB_hidden":True,
".clientdata_output_intlTotalVB_hidden":True,
".clientdata_output_intlMap_hidden":True,
".clientdata_pixelratio":1,
".clientdata_url_protocol":"https:",
".clientdata_url_hostname":"collegecrisis.shinyapps.io",
".clientdata_url_port":"",
".clientdata_url_pathname":"/dashboard/",
".clientdata_url_search":"",
".clientdata_url_hash_initial":"",
".clientdata_url_hash":"",
".clientdata_singletons": singletons,
".clientdata_allowDataUriScheme":True
}
})
r = session.post(f"https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_send",
data = f'["1#0|m|{json.dumps(data)[1:-1]}"]', headers = {"Content-Type":"text/plain;charset=UTF-8"})
thread = Thread(target = getData, args = ())
thread.start()
sleep(1)
openChannel()
sendInit()
thread.join()
Checkout the field data["values"]["homeMap"]["x"]["calls"]
if you need more data from the map