I was trying to get data from the website and input it into csv
table_array = []
def get_data(page):
url = "https://www.sl886.com/stockrating/list?list=stockrating&page=" + str(page)
get = requests.get(url, headers=headers)
table_bs4 = BeautifulSoup(get.text, "html.parser")
main_table = table_bs4.find("table").find("tbody")
for i in main_table.find_all("tr"):
array = []
for x in i.find_all("td"):
array.append(x.text)
print(x.text, end = " ")
table_array.append(array[:-1])
print("")
def get_page_number():
url = "https://www.sl886.com/stockrating/list?list=stockrating&page=1"
get = requests.get(url, headers=headers)
table_bs4 = BeautifulSoup(get.text, "html.parser")
sum_of_items = table_bs4.find("div", {"class": "summary"})
sum_of_items = int(sum_of_items.find_all("b")[1].text.replace(",", ""))
total_pages = math.ceil(sum_of_items/20)
return total_pages
Run:
for i in range(1):#range(get_page_number()):
get_data(i + 9)
with open("big_bank_rating.csv", "w", newline="") as csvfile:
write = csv.writer(csvfile)
write.writerow(["日期", "大行", "股票", "最新評級", "目標價", "變化", "潛在升幅"])
for xi in table_array:
print(xi)
write.writerows(table_array)
print("!!!Done Append!!!")
but it comes to an error of encoding
Traceback (most recent call last):
File "data_crawl.py", line 55, in <module>
write.writerows(table_array)
UnicodeEncodeError: 'cp950' codec can't encode character '\u7dab' in position 19: illegal multibyte sequence
I tried to use encoding "uft-8" but the result I got was garbled. How can I fix it? to encode the character '\u7dab'
I have to use
encoding="utf_8_sig"