Search code examples
pythonhtml-tableextract

Python find data in table row


Lets say i have a html table with 10 columns and 100 rows all i want to do is to use Beautifulsoup to look up for a data if exits and print full row.

import urllib3
from bs4 import BeautifulSou
http = urllib3.PoolManager()
url "https://en.wikipedia.org/wiki/List_of_Asian_countries_by_area"
response = http.request('GET', url)
soup = BeautifulSoup(response.data, "html.parser")

for tr in soup.find_all('tr')[2:]:
    tds = tr.find_all('td')
    print(tds)

Solution

  • Edit:

    import urllib3
    import re
    from bs4 import BeautifulSoup
    
    def searchTableForCountry(country):
        outlist = []
        http = urllib3.PoolManager()
        url = "https://en.wikipedia.org/wiki/List_of_Asian_countries_by_area"
        response = http.request('GET', url)
        soup = BeautifulSoup(response.data, "html.parser")
    
        for td in soup.find_all('tr'):
              if(len(td.find_all('a', {'href': re.compile(r'/wiki/' + country)})) >=1):
                    outlist.append(td.text.replace('\n',' ').replace('\xa0',' '))
                    print(''.join(outlist))
        return outlist
    
    searchTableForCountry('China')
    #2   China  9,596,961  excludes Hong Kong, Macau, Taiwan and disputed areas/islands