I am trying to download the following file, which I have obtained from a website I am scraping:
urllib.urlretrieve does not seem to work for .aspx. Any suggestions?
The site is requiring Javascript to download the file, but you can try this script:
import re
from math import pow, pi, cos
import requests
# algorithm used by site to compute challenge headers:
def test(Challenge):
var_arr = [int(c) for c in str(Challenge)][::-1]
LastDig = var_arr[0]
var_arr.sort()
minDig = min(var_arr)
subvar1 = (2 * (var_arr[2])) + (var_arr[1] * 1)
subvar2 = int(str((2 * var_arr[2])) + str(var_arr[1]))
my_pow = pow(((var_arr[0] * 1) + 2), var_arr[1])
x = (Challenge * 3 + subvar1) * 1
y = cos(pi * subvar2)
answer = x * y
answer -= my_pow * 1
answer += (minDig * 1) - (LastDig * 1)
answer = str(int(answer)) + str(subvar2)
return(answer)
url = 'http://mavat.moin.gov.il/mavatps/forms/Attachment.aspx?edid=6000405287445&edn=8F90EFA829F078A90C93EAE032F3A079636EBC6FCFC3BC74C87EAF3A9A0E9E4B&opener=AttachmentError.aspx'
with requests.session() as s:
text = s.get(url).text
Challenge, ChallengeId = int(re.findall(r'Challenge=(\d+);', text)[0]), int(re.findall(r'ChallengeId=(\d+);', text)[0])
headers = {'X-AA-Challenge-ID':str(ChallengeId),
'X-AA-Challenge-Result':str(test(Challenge)),
'X-AA-Challenge': str(Challenge),
'Content-Type': 'text/plain'}
r = s.post(url, headers=headers)
r = s.get(url)
filename = re.findall(r'filename=(.*)', r.headers['Content-Disposition'])[0]
print('Writing {}'.format(filename))
with open(filename, 'wb') as f_out:
f_out.write(r.content)
Prints:
Writing KML_2000972605.kml
And content of the KML_2000972605.kml
is:
<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2"
xmlns:gx="http://www.google.com/kml/ext/2.2">
<Document>
<Style id="Plan">
<LabelStyle>
<color>00000000</color>
<scale>0</scale>
</LabelStyle>
<LineStyle>
<color>ffff0000</color>
<width>3</width>
</LineStyle>
<PolyStyle>
<fill>0</fill>
</PolyStyle>
<BalloonStyle>
<text>
<![CDATA[<?xml version="1.0"?>
<table border="0" width="560" cellpadding="2" cellspacing="2" dir="rtl" xmlns:fo="http://www.w3.org/1999/XSL/Format">
<tr>
<td align="center" colspan="2" height="20" style="border-bottom: Solid 1px #b4b5b4">
<font face="Arial" size="3" color="#b50027">
<b>פרטי גבול תכנית</b>
</font>
</td>
</tr>
<tr>
<td align="right"><font face="Arial" size="3">מספר תכנית</font>
</td>
<td align="right" bgcolor="#efefef" height="20">
<font face="Arial" size="3" color="#202520">
<b>$[PL_NUMBER]</b>
</font>
</td>
</tr>
<tr>
<td align="right"><font face="Arial" size="3">שם תכנית</font>
</td>
<td align="right" bgcolor="#efefef" height="20">
<font face="Arial" size="3" color="#202520">
<b>$[PL_NAME]</b>
</font>
</td>
</tr>
<tr>
<td align="right"><font face="Arial" size="3">שטח התכנית</font>
</td>
<td align="right" bgcolor="#efefef" height="20">
<font face="Arial" size="3" color="#202520">
<b>$[PL_AREA]</b>
</font>
</td>
</tr>
<tr>
<td align="right"><font face="Arial" size="3">קישור לתכנית</font>
</td>
<td align="right" bgcolor="#efefef" height="20">
<font face="Arial" size="3" color="#202520">
<b>$[PL_URL]</b>
</font>
</td>
</tr>
<tr>
<td align="right"><font face="Arial" size="3">תאריך יצור הקובץ</font>
</td>
<td align="right" bgcolor="#efefef" height="20">
<font face="Arial" size="3" color="#202520">
<b>$[CREATE_DATE]</b>
</font>
</td>
</tr>
</table>]]>
</text>
</BalloonStyle>
</Style>
<Placemark>
<name name="name"><value>גבול תכנית</value></name>
<description/>
<styleUrl>#Plan</styleUrl>
<MultiGeometry>
<Polygon><outerBoundaryIs><LinearRing><coordinates>35.4171078765367,32.6609184162888,0 35.4173052515744,32.6610948838976,0 35.4173202997416,32.6610835076465,0 35.4174529407652,32.6609714669149,0 35.4172353097953,32.6608066226567,0 35.4171078765367,32.6609184162888,0 </coordinates></LinearRing></outerBoundaryIs></Polygon>
</MultiGeometry>
<ExtendedData>
<Data name="PL_NUMBER"><value>254-0641407</value></Data>
<Data name="PL_NAME"><value>תוספת זכויות בניה למגרש 427 בעין דור.</value></Data>
<Data name="PL_AREA"><value>0.497</value></Data>
<Data name="PL_URL"><value>http://mavat.moin.gov.il/MavatPS/Forms/SV3.aspx?tid=3&tnumb=254-0641407</value></Data>
<Data name="CREATE_DATE"><value>12/11/2018 19:49:58</value></Data>
</ExtendedData>
</Placemark>
</Document>
</kml>