I'm trying to create a script using the requests module and the BeautifulSoup library from this website that will do the following:
Select the Strata plan number button, input 11 in the input box, and then hit the search button. Finally, scrape the address from the result.
After running the script, When I verify the result, I don't see the address within it.
import re
import requests
from bs4 import BeautifulSoup
link = 'https://www.nsw.gov.au/housing-and-construction/strata/strata-search'
url = 'https://www.stratahub.nsw.gov.au/prweb/PRAuth/app/ssr_4380/6nxCgYjOTS_fVOVfeekVPA*/!SchemeSearch?pzTransactionId=cc5ddc1ecec1c095231675db14450f87&pzFromFrame=&pzPrimaryPageName=pyDisplayHarness&AJAXTrackID=22'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br, zstd',
'accept-language': 'en-US,en;q=0.9',
'X-Requested-With': 'XMLHttpRequest',
'origin': 'https://www.stratahub.nsw.gov.au',
}
payload = {
"$PSchemeSearch$pSearchBy": "Strata Plan Number",
"$PSchemeSearch$pSchemePlanNumber": 11,
"pzuiactionzzz": "",
"PreActivitiesList": "",
"sectionParam": "",
"ActivityParams": "=",
"$ODesktopWrapperInclude": "",
"$ODeterminePortalTop": "",
"$ODynamicLayout": "",
"$ODynamicLayoutCell": "",
"$OEvalDOMScripts_Include": "",
"$OForm": "",
"$OHarness": "",
"$OHarnessStaticJSEnd": "",
"$OHarnessStaticJSStart": "",
"$OHarnessStaticScriptsClientValidation": "",
"$OPMCHarnessStaticScripts": "",
"$OSessionUser": "",
"$OSurveyStaticScripts": "",
"$OWorkformStyles": "",
"$OpxAutoComplete": "",
"$OpxButton": "",
"$OpxDisplayText": "",
"$OpxHarnessContent": "",
"$OpxLayoutContainer": "",
"$OpxNonTemplate": "",
"$OpxRadioButtons": "",
"$OpxSection": "",
"$OpxVisible": "",
"$OpxWorkArea": "",
"$OpxWorkAreaContent": "",
"$OpyDirtyCheckConfirm": "",
"$OpyWorkFormStandardEnd": "",
"$OpyWorkFormStandardStart": "",
"$OpzAutoCompleteAGIncludes": "",
"$OpzHarnessInlineScriptsEnd": "",
"$OpzHarnessInlineScriptsStart": "",
"$OpzPortalFavIcon": "",
"$OpzPortalIcon": "",
"$Opzpega_ui_harnesscontext": "",
"$Opzpega_web_mashup": "",
"$OpxTextInput": "",
"$OpzDecimalInclude": "",
"pyEncodedParameters": True,
"pzKeepPageMessages": False,
"strPHarnessClass": "Data-Portal",
"strPHarnessPurpose": "SearchStrataScheme",
"UITemplatingStatus": "Y",
"StreamName": "SchemeSearch",
"BaseReference": "SchemeSearch",
"bClientValidation": True,
"FormError": "NONE",
"pyCustomError": "DisplayErrors",
"UsingPage": True,
"HeaderButtonSectionName": "-1",
"PagesToRemove": "",
"pzHarnessID": "HID387D2E2FCEE4EC200B5BAEA8C6A5D859",
"inStandardsMode": True
}
with requests.Session() as s:
s.headers.update(headers)
res = s.get(link)
soup = BeautifulSoup(res.text,"lxml")
code_url = soup.select_one("iframe[title='Strata Search Production']")['data-src']
s.headers['referer'] = code_url
payload['pzuiactionzzz'] = code_url.split("?")[-1]
r = s.post(url,data=payload)
print(r.status_code)
print(r.text)
How can I generate the result containing the address I'm after?
I think easier method would be to change strategy how to obtain the data:
When you see the result and click on "View Map" the page makes request which is lot simpler and address is there.
E.g.:
import requests
api_url = (
"https://portal.spatial.nsw.gov.au/server/rest/services/StrataHub/MapServer/0/query"
)
plan = "planlabel='SP{}'"
params = {
"f": "json",
"where": None,
"returnGeometry": "true",
"spatialRel": "esriSpatialRelIntersects",
"maxAllowableOffset": "0.00001",
"outFields": "*",
"outSR": "102100",
}
params["where"] = plan.format(11) # <-- change to the number you want
data = requests.get(api_url, params=params).json()
print(data)
Prints:
{
"displayFieldName": "plannumber",
"fieldAliases": {
"objectid": "objectid",
"plannumber": "plannumber",
"registrationdate": "registrationdate",
"shape_length": "shape_length",
"shape_area": "shape_area",
"address": "address",
"suburb": "suburb",
"lga": "lga",
"lottotal": "lottotal",
"postcode": "postcode",
"planlabel": "planlabel",
"st_area(shape)": "st_area(shape)",
"st_perimeter(shape)": "st_perimeter(shape)",
},
"geometryType": "esriGeometryPolygon",
"spatialReference": {"wkid": 102100, "latestWkid": 3857},
"fields": [
{"name": "objectid", "type": "esriFieldTypeOID", "alias": "objectid"},
{"name": "plannumber", "type": "esriFieldTypeInteger", "alias": "plannumber"},
{
"name": "registrationdate",
"type": "esriFieldTypeDate",
"alias": "registrationdate",
"length": 8,
},
{
"name": "shape_length",
"type": "esriFieldTypeDouble",
"alias": "shape_length",
},
{"name": "shape_area", "type": "esriFieldTypeDouble", "alias": "shape_area"},
{
"name": "address",
"type": "esriFieldTypeString",
"alias": "address",
"length": 255,
},
{
"name": "suburb",
"type": "esriFieldTypeString",
"alias": "suburb",
"length": 255,
},
{"name": "lga", "type": "esriFieldTypeString", "alias": "lga", "length": 255},
{"name": "lottotal", "type": "esriFieldTypeSmallInteger", "alias": "lottotal"},
{"name": "postcode", "type": "esriFieldTypeInteger", "alias": "postcode"},
{
"name": "planlabel",
"type": "esriFieldTypeString",
"alias": "planlabel",
"length": 255,
},
{
"name": "st_area(shape)",
"type": "esriFieldTypeDouble",
"alias": "st_area(shape)",
},
{
"name": "st_perimeter(shape)",
"type": "esriFieldTypeDouble",
"alias": "st_perimeter(shape)",
},
],
"features": [
{
"attributes": {
"objectid": 9,
"plannumber": 11,
"registrationdate": -259545600000,
"shape_length": 0.00128814089728482,
"shape_area": 9.29112433528216e-08,
"address": "6 BURRANEER BAY ROAD CRONULLA",
"suburb": "CRONULLA",
"lga": "SUTHERLAND SHIRE",
"lottotal": 14,
"postcode": 2230,
"planlabel": "SP11",
"st_area(shape)": 9.291124335282166e-08,
"st_perimeter(shape)": 0.001288140897284823,
},
"geometry": {
"rings": [
[
[16825771.206571, -4035936.439062],
[16825749.918205, -4035952.800724],
[16825718.247838, -4035911.771859],
[16825739.304771, -4035895.376397],
[16825771.206571, -4035936.439062],
]
]
},
}
],
}
The address is under "features" key.