Search code examples
pythonweb-scraping

webscrape data from adviserinfo.sec.gov using python


I am trying to pull financial advisor data from Adviserinfo.sec.gov using python and have not had much luck. This is a public disclosure website where you can enter a advisor name and get the details like Firm Name, Address, Experience etc. by clicking search. I am trying to see if i can programmatically do that in python by passing on the advisor name and get the desired info. Any pointers to this is greatly appreciated. Thanks!

Here's a sample code that I have tried so far and I get "Failed to retrieve advisor information"

import requests
from bs4 import BeautifulSoup


def search_adviser_by_name(first_name, last_name):
    search_url = "https://adviserinfo.sec.gov/IAPD/Individual/Search/Search"
    search_params = {
        'ADVANCED': 'true',
        'FIND_BY_NAME': 'true',
        'INDIVIDUAL_NAME': f"{first_name} {last_name}",
        'resultsPerPage': '10'
    }

    response = requests.get(search_url, params=search_params)
    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.content, 'html.parser')
    search_results = soup.find_all('a', {'class': 'individual-summary'})

    for result in search_results:
        if first_name.lower() in result.text.lower() and last_name.lower() in result.text.lower():
            adviser_url = "https://adviserinfo.sec.gov" + result['href']
            return get_adviser_info(adviser_url)

    return None


def get_adviser_info(adviser_url):
    response = requests.get(adviser_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract specific information using the new HTML structure provided
        name = soup.find('span', class_='text-lg sm:text-sm font-semibold').text.strip()
        firm = soup.find('span', {'class': 'firmName'}).text.strip()
        crd_number = soup.find('span', {'class': 'crdNumber'}).text.strip()

        return {
            'Name': name,
            'Firm': firm,
            'CRD Number': crd_number
        }
    else:
        return None


# Example usage
first_name = 'Kelly'
last_name = 'Demers'
info = search_adviser_by_name(first_name, last_name)
if info:
    print(info)
else:
    print("Failed to retrieve adviser information.")

Solution

  • I suggest rather use the underlying API directly.

    import requests
    import json
    
    def search_adviser_by_name(first, last):
        params = {
            'query': f'{first} {last}',
            'includePrevious': 'true',
            'hl': 'true',
            'nrows': '12',
            'start': '0',
            'r': '25',
            'sort': 'score+desc',
            'wt': 'json',
        }
    
        response = requests.get('https://api.adviserinfo.sec.gov/search/individual', params=params)
    
        records = response.json()["hits"]["hits"]
    
        return [record["_source"] for record in records]
    
    info = search_adviser_by_name("Kelly", "Demers")
    
    print(json.dumps(info, indent=2))
    

    The return value from search_adviser_by_name() is now a list of dictionaries with all of the information relating to each advisor. You can manipulate that as required, but most of the information that you are looking for should be included. The CRD number is in the ind_source_id field.

    🚨 The results on the site (and via the API) are paginated. So if there are many matches for your query then you'll need to iterate over the result pages.

    Results for the specified first and last names:

    [
      {
        "ind_source_id": "6842521",
        "ind_firstname": "KELLY",
        "ind_middlename": "MARIE",
        "ind_lastname": "DEMERS",
        "ind_other_names": [
          "KELLY  DEMERS",
          "KELLY DEMERS RATOWITZ"
        ],
        "ind_bc_scope": "Active",
        "ind_ia_scope": "Active",
        "ind_ia_disclosure_fl": "N",
        "ind_approved_finra_registration_count": 1,
        "ind_employments_count": 2,
        "ind_industry_cal_date_iapd": "2017-09-19",
        "ind_ia_current_employments": [
          {
            "firm_id": "7691",
            "firm_name": "MERRILL LYNCH, PIERCE, FENNER & SMITH INCORPORATED",
            "branch_city": "BIRMINGHAM",
            "branch_state": "AL",
            "branch_zip": "35209",
            "ia_only": "Y",
            "firm_bd_sec_number": "7221",
            "firm_bd_full_sec_number": "8-7221",
            "firm_ia_sec_number": "14235",
            "firm_ia_full_sec_number": "801-14235"
          },
          {
            "firm_id": "7691",
            "firm_name": "MERRILL LYNCH, PIERCE, FENNER & SMITH INCORPORATED",
            "branch_city": "BIRMINGHAM",
            "branch_state": "AL",
            "branch_zip": "35209",
            "ia_only": "N",
            "firm_bd_sec_number": "7221",
            "firm_bd_full_sec_number": "8-7221",
            "firm_ia_sec_number": "14235",
            "firm_ia_full_sec_number": "801-14235"
          }
        ]
      },
      {
        "ind_source_id": "4038008",
        "ind_firstname": "KELLIE",
        "ind_middlename": "G",
        "ind_lastname": "BEIERS",
        "ind_other_names": [
          "KELLIE J GIBBS",
          "KELLIE J GIBBS BEIERS",
          "KELLIE J GIBBS/BEIERS"
        ],
        "ind_bc_scope": "NotInScope",
        "ind_ia_scope": "Active",
        "ind_ia_disclosure_fl": "N",
        "ind_approved_finra_registration_count": 0,
        "ind_employments_count": 2,
        "ind_industry_cal_date_iapd": "2014-04-06",
        "ind_ia_current_employments": [
          {
            "firm_id": "307424",
            "firm_name": "ASPEN WEALTH STRATEGIES, LLC",
            "branch_city": "ARVADA",
            "branch_state": "CO",
            "branch_zip": "80002",
            "ia_only": "Y"
          },
          {
            "firm_id": "173912",
            "firm_name": "CHASEFIELD CAPITAL INC.",
            "branch_city": "LAKEWOOD",
            "branch_state": "CO",
            "branch_zip": "80235",
            "ia_only": "Y"
          }
        ]
      }
    ]