Search code examples
pythonweb-scrapingbeautifulsoupurllib

How to get the title and url in html page with python


I want to go to department and only want to select/print the name and url. I have tried the following but I am unable to understand how to go in department and select those 2 specific things. How can I get the "name" and "url" for all the links?

import json
import urllib.request
from bs4 import BeautifulSoup


def getContent():
    # target site url
    url = "www.xyz.com"
    # requesting the url for data
    request = urllib.request.Request(url)
    # get the html, whole page
    htmlpage = urllib.request.urlopen(request).read()
    bsoup = BeautifulSoup(htmlpage, "html.parser")
    # print(bsoup.prettify())

    # main_table = bsoup.find("div",attrs)
    # print(main_table)
    # print(bsoup.find_all('name'))
    # nav = bsoup.nav
    # print(bsoup.title.department.url)
    # for url in find_all('a'):
    # print(url.get('href'))

    for link in bsoup.find_all("a"):
        print("Title: {}".format(link.get("name")))
        print("href: {}".format(link.get("href")))

Solution

  • You can get the name / url using the json module as follows:

    import json
    import urllib.request
    from bs4 import BeautifulSoup
    
    
    def get_content():
        url = "http://www.ucdenver.edu/pages/ucdwelcomepage.aspx"
        request = urllib.request.Request(url)
        html_page = urllib.request.urlopen(request).read()
        soup = BeautifulSoup(html_page, 'html.parser')
    
        json_data = json.loads(soup.find("script", type="application/ld+json").string)
        for data in json_data["department"]:
            print("{:<60} {}".format(data["name"], data["url"]))
    
    get_content()
    

    Output:

    Center for Undergraduate Exploration and Advising            https://www.ucdenver.edu/center-for-undergraduate-exploration-and-advising
    Commencement                                                 https://www.ucdenver.edu/commencement
    Counseling Center                                            https://www.ucdenver.edu/counseling-center
    First Year Experiences                                       https://www.ucdenver.edu/first-year-experiences
    Health Programs                                              https://www.ucdenver.edu/programs/health-programs
    Housing and Dining                                           https://www.ucdenver.edu/housing-and-dining
    ...