I am working in python 3. My objective is extracting differents values of one table and to put them in differents lists.
The problem is that i can't take the value of "img alt" in a td.
This is my code:
from bs4 import BeautifulSoup
import urllib.request
redditFile = urllib.request.urlopen("http://www.mtggoldfish.com/movers/online/all")
redditHtml = redditFile.read()
redditFile.close()
soup = BeautifulSoup(redditHtml)
all_tables = soup.find_all('table')
right_table = soup.find('table', class_='table table-bordered table-striped table-condensed movers-table')
#create a list
A=[]
B=[]
C=[]
D=[]
for row in right_table.findAll("tr"):
cells = row.findAll('td')
increment = row.findAll('span')
colection = row.findAll('img')
link = row.findAll('a')
if len(cells) == 6:
A.append(cells[0].find(text=True))
B.append(increment[0].find(text=True))
C.append(colection[0])
D.append(link[0].find(text=True))
print(A)
print(B)
print(C)
print(D)
This code gives me this result:
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
['+8.40', '+2.47', '+1.35', '+1.28', '+1.14', '+0.99', '+0.94', '+0.91', '+0.90', '+0.75']
[<img alt="ORI" class="sprite-set_symbols_ORI" src="//assets1.mtggoldfish.com/assets/s-407aaa9c9786d606684c6967c47739c5.gif"/>, <img alt="PRM" class="sprite-set_symbols_PRM" src="//assets1.mtggoldfish.com/assets/s-407aaa9c9786d606684c6967c47739c5.gif"/>, <img alt="8ED" class="sprite-set_symbols_8ED" src="//assets1.mtggoldfish.com/assets/s-407aaa9c9786d606684c6967c47739c5.gif"/>, <img alt="EX" class="sprite-set_symbols_EX" src="//assets1.mtggoldfish.com/assets/s-407aaa9c9786d606684c6967c47739c5.gif"/>, <img alt="TSB" class="sprite-set_symbols_TSB" src="//assets1.mtggoldfish.com/assets/s-407aaa9c9786d606684c6967c47739c5.gif"/>, <img alt="WL" class="sprite-set_symbols_WL"
src="//assets1.mtggoldfish.com/assets/s-407aaa9c9786d606684c6967c47739c5.gif"/>, , , , ] ["Jace, Vryn's Prodigy", "Gaea's Cradle", 'Ensnaring Bridge', 'City of Traitors', 'Pendelhaven', 'Firestorm', 'Kor Spiritdancer', 'Scalding Tarn', 'Daybreak Coronet', 'Grove of the Burnwillows']
But I need the IMG ALT VALUE in (for exemple the first img alt value is "ORI")
colection variable
I don't have any idea that I can do. Guys, could you help me with this, please?
Thanks so much in advance
If you just want the alt's from the img tags, you just need to select the img tags from the table and extract the alt attributes:
right_table = soup.find('table', class_='table table-bordered table-striped table-condensed movers-table')
print([img["alt"] for img in right_table.select("img[alt]")])
['ORI', 'PRM', '8ED', 'EX', 'TSB', 'WL', 'ROE', 'ZEN', 'FUT', 'FUT']
In your own loop you are using findAll when you seem to want only one element, if you only want the first then use find row.find('span')
etc.. and row.find('img')["alt"]
will give you the alt value for each row, looking at the page there is only one per tr so you definitely don't need findAll.
If you want to recreate the table locally I would put the data in a dict:
right_table = soup.find('table', class_='table table-bordered table-striped table-condensed movers-table')
table_dict = {}
for row in right_table.select("tr"):
# increase class are where increments are
increments = [s.text for s in row.select('span.increase')]
# make sure we have some data in tr
if increments:
# rank/place is first text in td, could also use find("td",{"class":"first-right"})
place = int(row.td.text)
# text/character name is in a tag text
title = row.find("a").text
increments.append(title)
# get alt attribute from img tag
increments.append(row.find("img")["alt"])
table_dict[place] = increments
from pprint import pprint as pp
pp(table_dict)
Output:
{1: [u'+8.78', u'68.03', u'+15.00%', u"Jace, Vryn's Prodigy", 'ORI'],
2: [u'+2.47', u'47.96', u'+5.00%', u"Gaea's Cradle", 'PRM'],
3: [u'+1.95', u'20.37', u'+11.00%', u'Firestorm', 'WL'],
4: [u'+1.73', u'23.91', u'+8.00%', u'Force of Will', 'VMA'],
5: [u'+1.35', u'40.88', u'+3.00%', u'Ensnaring Bridge', '8ED'],
6: [u'+1.28', u'44.02', u'+3.00%', u'City of Traitors', 'EX'],
7: [u'+1.15', u'41.98', u'+3.00%', u'Time Walk', 'VMA'],
8: [u'+1.01', u'28.68', u'+4.00%', u'Daze', 'NE'],
9: [u'+1.01', u'19.96', u'+5.00%', u"Goryo's Vengeance", 'BOK'],
10: [u'+1.00', u'3.99', u'+33.00%', u'Unearth', 'UL']}
Which you will see matches the current table data exactly, also if you want all the winners just change the url to http://www.mtggoldfish.com/movers-details/online/all/winners/dod
Or if you want to break the fields up and just pull the firs increment:
for row in right_table.select("tr"):
increment = row.find('span',{"class":'increase'})
if increment:
increment = increment.text
place = int(row.td.text)
title = row.select("a[data-full-image]")[0].text
alt = (row.find("img")["alt"])
table_dict[place] = {"title":title,"alt":alt, "inc":increment}
from pprint import pprint as pp
pp(table_dict)
Output:
{1: {'alt': 'ORI', 'inc': u'+8.78', 'title': u"Jace, Vryn's Prodigy"},
2: {'alt': 'PRM', 'inc': u'+2.47', 'title': u"Gaea's Cradle"},
3: {'alt': 'WL', 'inc': u'+1.95', 'title': u'Firestorm'},
4: {'alt': 'VMA', 'inc': u'+1.73', 'title': u'Force of Will'},
5: {'alt': '8ED', 'inc': u'+1.35', 'title': u'Ensnaring Bridge'},
6: {'alt': 'EX', 'inc': u'+1.28', 'title': u'City of Traitors'},
7: {'alt': 'VMA', 'inc': u'+1.15', 'title': u'Time Walk'},
8: {'alt': 'NE', 'inc': u'+1.01', 'title': u'Daze'},
9: {'alt': 'BOK', 'inc': u'+1.01', 'title': u"Goryo's Vengeance"},
10: {'alt': 'UL', 'inc': u'+1.00', 'title': u'Unearth'}}