Search code examples
pythonxml

How to get value from XML Tag using Python?


I want to get all the 'home id' and 'away id' from the below xml script in python. Please let me know to provide further info if needed.

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" charset="UTF-8" href="/xslt/hockey/schedule-v6.0.xsl"?>
<!-- Generation started @ 2023-12-11 19:32:15 UTC -->
<league xmlns="http://feed.elasticstats.com/schema/hockey/schedule-v6.0.xsd" id="fd560107-a85b-4388-ab0d-655ad022aff7" name="NHL" alias="NHL">
  <daily-schedule date="2023-12-11">
    <games>
      <game id="23fa5cd7-d680-4dcd-83b5-cf010257ef1f" status="scheduled" coverage="full" scheduled="2023-12-12T00:00:00Z" sr_id="sr:match:41971769" reference="20428" home_team="4416d559-0f24-11e2-8525-18a905767e44" away_team="44153da1-0f24-11e2-8525-18a905767e44">
        <venue id="bd7b42fa-19bb-4b91-8615-214ccc3ff987" name="KeyBank Center" capacity="19070" address="1 Seymour H Knox III Plaza" city="Buffalo" state="NY" zip="14203" country="USA" time_zone="US/Eastern" sr_id="sr:venue:5950"/>
        <home id="4416d559-0f24-11e2-8525-18a905767e44" name="Buffalo Sabres" alias="BUF" sr_id="sr:team:3678" reference="7">
        </home>
        <away id="44153da1-0f24-11e2-8525-18a905767e44" name="Arizona Coyotes" alias="ARI" sr_id="sr:team:3698" reference="53">
        </away>
        <broadcasts>
          <broadcast network="MSG-B" type="TV" channel="635-1"/>
          <broadcast network="SCRIPPS" type="TV" locale="Away"/>
          <broadcast network="ESPN+" type="Internet" locale="National"/>
        </broadcasts>
      </game>
      <game id="7026bc87-b286-40ac-92af-1ce88b17dec2" status="scheduled" coverage="full" scheduled="2023-12-12T00:00:00Z" sr_id="sr:match:41971771" reference="20429" home_team="441766b9-0f24-11e2-8525-18a905767e44" away_team="441730a9-0f24-11e2-8525-18a905767e44">
        <venue id="e256f8f5-5469-43b0-a95e-8d3adb3cb7ee" name="UBS Arena" capacity="17113" address="2150 Hempstead Turnpike" city="Belmont Park" state="NY" zip="11003" country="USA" time_zone="US/Eastern" sr_id="sr:venue:59880"/>
        <home id="441766b9-0f24-11e2-8525-18a905767e44" name="New York Islanders" alias="NYI" sr_id="sr:team:3703" reference="2">
        </home>
        <away id="441730a9-0f24-11e2-8525-18a905767e44" name="Toronto Maple Leafs" alias="TOR" sr_id="sr:team:3693" reference="10">
        </away>
        <broadcasts>
          <broadcast network="MSGSN" type="TV" channel="635"/>
          <broadcast network="ESPN+" type="Internet" locale="National"/>
        </broadcasts>
      </game>
      <game id="47979944-74f1-43af-bebd-e82fab1432ee" status="scheduled" coverage="full" scheduled="2023-12-12T01:00:00Z" sr_id="sr:match:41971773" reference="20430" home_team="44157522-0f24-11e2-8525-18a905767e44" away_team="44169bb9-0f24-11e2-8525-18a905767e44">
        <venue id="adbd1acb-a053-4944-ba15-383eda91c12e" name="American Airlines Center" capacity="18532" address="2500 Victory Avenue" city="Dallas" state="TX" zip="75219" country="USA" time_zone="US/Central" sr_id="sr:venue:5988"/>
        <home id="44157522-0f24-11e2-8525-18a905767e44" name="Dallas Stars" alias="DAL" sr_id="sr:team:3684" reference="25">
        </home>
        <away id="44169bb9-0f24-11e2-8525-18a905767e44" name="Detroit Red Wings" alias="DET" sr_id="sr:team:3685" reference="17">
        </away>
        <broadcasts>
          <broadcast network="BSDET" type="TV" channel="663"/>
          <broadcast network="BSSWX" type="TV" channel="676-1"/>
          <broadcast network="ESPN+" type="Internet" locale="National"/>
        </broadcasts>
      </game>
      <game id="5e2b7cee-05da-429f-92cc-06678c4257bf" status="scheduled" coverage="full" scheduled="2023-12-12T02:30:00Z" sr_id="sr:match:41971775" reference="20431" home_team="4415ce44-0f24-11e2-8525-18a905767e44" away_team="44159241-0f24-11e2-8525-18a905767e44">
        <venue id="a75eea49-c384-4edb-99f1-d5252773ec83" name="Ball Arena" capacity="18007" address="1000 Chopper Circle" city="Denver" state="CO" zip="80204" country="USA" time_zone="US/Mountain" sr_id="sr:venue:5976"/>
        <home id="4415ce44-0f24-11e2-8525-18a905767e44" name="Colorado Avalanche" alias="COL" sr_id="sr:team:3682" reference="21">
        </home>
        <away id="44159241-0f24-11e2-8525-18a905767e44" name="Calgary Flames" alias="CGY" sr_id="sr:team:3679" reference="20">
        </away>
        <broadcasts>
          <broadcast network="ALT" type="TV" channel="681"/>
          <broadcast network="ESPN+" type="Internet" locale="National"/>
        </broadcasts>
      </game>
    </games>
  </daily-schedule>
</league>
<!-- Generation ended @ 2023-12-11 19:32:15 UTC -->

I tried using beautifulsoup and xml.etree.ElementTree, yet I am struggling with how to use these tools.

import http.client
from datetime import datetime
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup


today = datetime.now().strftime("%Y/%m/%d")

URL = "http://api.sportradar.us/nhl/trial/v7/en/games/"  + today + "/schedule.xml?api_key=tx3ccn5bhk7nb5acb4cwd8np"

conn = http.client.HTTPSConnection("api.sportradar.us")

conn.request("GET", "/nhl/trial/v7/en/games/" + today + "/schedule.xml?api_key=tx3ccn5bhk7nb5acb4cwd8np")

res = conn.getresponse()
data = res.read()

#print(data.decode("utf-8"))

###########################################################

#print(data)

#############################################################

with open('pengins_daily_schedule.xml', 'w') as f:
    f.write(data.decode("utf-8"))

##############################################################

with open('pengins_daily_schedule.xml', 'r') as f:
    file = f.read() 

# 'xml' is the parser used. For html files, which BeautifulSoup is typically used for, it would be 'html.parser'.
soup = BeautifulSoup(file, 'xml')

################################################################

print(soup)

print('---------------------------------------------------')

##############################################################

tree = ET.parse('pengins_daily_schedule.xml')
root = tree.getroot()

Solution

  • Here is example how you can get all away/home ID using bs4:

    from bs4 import BeautifulSoup
    
    xml_text = """\
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" charset="UTF-8" href="/xslt/hockey/schedule-v6.0.xsl"?>
    <!-- Generation started @ 2023-12-11 19:32:15 UTC -->
    <league xmlns="http://feed.elasticstats.com/schema/hockey/schedule-v6.0.xsd" id="fd560107-a85b-4388-ab0d-655ad022aff7" name="NHL" alias="NHL">
      <daily-schedule date="2023-12-11">
        <games>
          <game id="23fa5cd7-d680-4dcd-83b5-cf010257ef1f" status="scheduled" coverage="full" scheduled="2023-12-12T00:00:00Z" sr_id="sr:match:41971769" reference="20428" home_team="4416d559-0f24-11e2-8525-18a905767e44" away_team="44153da1-0f24-11e2-8525-18a905767e44">
            <venue id="bd7b42fa-19bb-4b91-8615-214ccc3ff987" name="KeyBank Center" capacity="19070" address="1 Seymour H Knox III Plaza" city="Buffalo" state="NY" zip="14203" country="USA" time_zone="US/Eastern" sr_id="sr:venue:5950"/>
            <home id="4416d559-0f24-11e2-8525-18a905767e44" name="Buffalo Sabres" alias="BUF" sr_id="sr:team:3678" reference="7">
            </home>
            <away id="44153da1-0f24-11e2-8525-18a905767e44" name="Arizona Coyotes" alias="ARI" sr_id="sr:team:3698" reference="53">
            </away>
            <broadcasts>
              <broadcast network="MSG-B" type="TV" channel="635-1"/>
              <broadcast network="SCRIPPS" type="TV" locale="Away"/>
              <broadcast network="ESPN+" type="Internet" locale="National"/>
            </broadcasts>
          </game>
          <game id="7026bc87-b286-40ac-92af-1ce88b17dec2" status="scheduled" coverage="full" scheduled="2023-12-12T00:00:00Z" sr_id="sr:match:41971771" reference="20429" home_team="441766b9-0f24-11e2-8525-18a905767e44" away_team="441730a9-0f24-11e2-8525-18a905767e44">
            <venue id="e256f8f5-5469-43b0-a95e-8d3adb3cb7ee" name="UBS Arena" capacity="17113" address="2150 Hempstead Turnpike" city="Belmont Park" state="NY" zip="11003" country="USA" time_zone="US/Eastern" sr_id="sr:venue:59880"/>
            <home id="441766b9-0f24-11e2-8525-18a905767e44" name="New York Islanders" alias="NYI" sr_id="sr:team:3703" reference="2">
            </home>
            <away id="441730a9-0f24-11e2-8525-18a905767e44" name="Toronto Maple Leafs" alias="TOR" sr_id="sr:team:3693" reference="10">
            </away>
            <broadcasts>
              <broadcast network="MSGSN" type="TV" channel="635"/>
              <broadcast network="ESPN+" type="Internet" locale="National"/>
            </broadcasts>
          </game>
          <game id="47979944-74f1-43af-bebd-e82fab1432ee" status="scheduled" coverage="full" scheduled="2023-12-12T01:00:00Z" sr_id="sr:match:41971773" reference="20430" home_team="44157522-0f24-11e2-8525-18a905767e44" away_team="44169bb9-0f24-11e2-8525-18a905767e44">
            <venue id="adbd1acb-a053-4944-ba15-383eda91c12e" name="American Airlines Center" capacity="18532" address="2500 Victory Avenue" city="Dallas" state="TX" zip="75219" country="USA" time_zone="US/Central" sr_id="sr:venue:5988"/>
            <home id="44157522-0f24-11e2-8525-18a905767e44" name="Dallas Stars" alias="DAL" sr_id="sr:team:3684" reference="25">
            </home>
            <away id="44169bb9-0f24-11e2-8525-18a905767e44" name="Detroit Red Wings" alias="DET" sr_id="sr:team:3685" reference="17">
            </away>
            <broadcasts>
              <broadcast network="BSDET" type="TV" channel="663"/>
              <broadcast network="BSSWX" type="TV" channel="676-1"/>
              <broadcast network="ESPN+" type="Internet" locale="National"/>
            </broadcasts>
          </game>
          <game id="5e2b7cee-05da-429f-92cc-06678c4257bf" status="scheduled" coverage="full" scheduled="2023-12-12T02:30:00Z" sr_id="sr:match:41971775" reference="20431" home_team="4415ce44-0f24-11e2-8525-18a905767e44" away_team="44159241-0f24-11e2-8525-18a905767e44">
            <venue id="a75eea49-c384-4edb-99f1-d5252773ec83" name="Ball Arena" capacity="18007" address="1000 Chopper Circle" city="Denver" state="CO" zip="80204" country="USA" time_zone="US/Mountain" sr_id="sr:venue:5976"/>
            <home id="4415ce44-0f24-11e2-8525-18a905767e44" name="Colorado Avalanche" alias="COL" sr_id="sr:team:3682" reference="21">
            </home>
            <away id="44159241-0f24-11e2-8525-18a905767e44" name="Calgary Flames" alias="CGY" sr_id="sr:team:3679" reference="20">
            </away>
            <broadcasts>
              <broadcast network="ALT" type="TV" channel="681"/>
              <broadcast network="ESPN+" type="Internet" locale="National"/>
            </broadcasts>
          </game>
        </games>
      </daily-schedule>
    </league>
    <!-- Generation ended @ 2023-12-11 19:32:15 UTC -->"""
    
    soup = BeautifulSoup(xml_text, "xml")
    
    for game in soup.select("game"):
        home_id = game.home["id"]
        away_id = game.away["id"]
    
        print(f"{home_id=} {away_id=}")
    

    Prints:

    home_id='4416d559-0f24-11e2-8525-18a905767e44' away_id='44153da1-0f24-11e2-8525-18a905767e44'
    home_id='441766b9-0f24-11e2-8525-18a905767e44' away_id='441730a9-0f24-11e2-8525-18a905767e44'
    home_id='44157522-0f24-11e2-8525-18a905767e44' away_id='44169bb9-0f24-11e2-8525-18a905767e44'
    home_id='4415ce44-0f24-11e2-8525-18a905767e44' away_id='44159241-0f24-11e2-8525-18a905767e44'