Search code examples
pythonsearchpython-requests

Running through all pages


I'm making a peoplefinder app that will be free. It will look through all the people in Texas by name, and find their home address. I just got started, and can't seem to figure out how cycling through all the pages would work. Is there a good, reliable way to do this? Am I just dumb? This is my first big project, and I could really use some help.

import json
from time import sleep

name = input("Enter full name: ")

def namesearchwilcounty(name):
    url = "https://search.wcad.org/ProxyT/Search/Properties/quick/"
    params = {
        "f": name,
        "pn": 1,
        "st": 4,
        "so": "desc",
        "pt": "RP;PP;MH;NR",
        "ty": "2024"
    }

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json()
        if data and "ResultList" in data and data["ResultList"]:
            print("Situs Addresses and Owner Names:")
            processed_results = set()  # Set to store processed results
            for item in data["ResultList"]:
                situs_address = item["SitusAddress"]
                owner_name = item["OwnerName"]
                total_page_count = item.get("TotalPageCount", 1)
                current_page = 1
                while current_page <= total_page_count:
                    params["pn"] = current_page  # Update the page number in params
                    response = requests.get(url, params=params)
                    if response.status_code == 200:
                        page_data = response.json()
                        for item in page_data["ResultList"]:
                            situs_address = item["SitusAddress"]
                            owner_name = item["OwnerName"]
                            result = f"{owner_name} {situs_address}"
                            if result not in processed_results:
                                print(f"Page {current_page}: {result}")
                                processed_results.add(result)  # Add result to the set
                        current_page += 1
                        sleep(1)  # Add a delay between requests
                    else:
                        print(f"Error: {response.status_code} - {response.text}")
                        break
        else:
            print("No data found in the response.")
    else:
        print(f"Error: {response.status_code} - {response.text}")


namesearchwilcounty(name)

Solution

  • It seems that the pn parameters is a page number (base 1) and that no more pages are available when the RecordCount is zero.

    So, simplifying your code, you could do this:

    import requests
    from requests.adapters import HTTPAdapter, Retry
    
    URL = "https://search.wcad.org/ProxyT/Search/Properties/quick/"
    
    MAX_RETRIES = 5
    RETRIES = Retry(total=MAX_RETRIES, status_forcelist=[429])
    
    def search(name):
        params = {
            "f": name,
            "pn": 1,
            "st": 4,
            "so": "desc",
            "pt": "RP;PP;MH;NR",
            "ty": "2024"
        }
        with requests.Session() as session:
            session.mount(URL, HTTPAdapter(max_retries=RETRIES))
            while True:
                with session.get(URL, params=params) as response:
                    response.raise_for_status()
                    data = response.json()
                    if (record_count := data.get("RecordCount", 0)) == 0:
                        print("No more records")
                        break
                    print(f"current_page={params["pn"]} {record_count=}")
                    params["pn"] += 1
    
    search("Kennedy")
    

    Output:

    current_page=1 record_count=25
    current_page=2 record_count=25
    current_page=3 record_count=25
    current_page=4 record_count=25
    current_page=5 record_count=25
    current_page=6 record_count=25
    current_page=7 record_count=9
    No more records