This is my code
import requests
from bs4 import BeautifulSoup
def xeberi_oxu(url):
request = requests.get(url)
bs4 = BeautifulSoup(request.content, "html.parser")
news_content = {}
news_content['title'] = bs4.find("h1", class_='full-post-title').text
news_content['content'] = bs4.find("div", class_='full-post-article').text
with open(output_file, 'a', encoding='utf8') as file:
file.write(f"Başlıq:{news_content['title']}\n")
file.write(f"{news_content['content']}\n\n")
url = "https://xebertv.info/"
output_file = 'xeberler.txt'
request = requests.get(url)
bs4 = BeautifulSoup(request.content, "html.parser")
xeberler = bs4.find_all("div", class_="last-posts-list")
for xeber in xeberler:
links = xeber.find_all("a")
for link in links:
href = link.get("href")
xeberi_oxu(href)
When I run it some letters on output file aren't normal
I tried to change encoding to UTF-8 as you see on code but it doesn't works
Try to explicitly decode the request.content
to utf-8
:
import requests
from bs4 import BeautifulSoup
def xeberi_oxu(url):
request = requests.get(url)
bs4 = BeautifulSoup(request.content.decode('utf-8'), "html.parser") # <--- decode the content here
news_content = {}
news_content["title"] = bs4.find("h1", class_="full-post-title").text
news_content["content"] = bs4.find("div", class_="full-post-article").text
return f"Başlıq:{news_content['title']}\n{news_content['content']}"
url = "https://xebertv.info/"
output_file = "xeberler.txt"
request = requests.get(url)
bs4 = BeautifulSoup(request.content, "html.parser")
xeberler = bs4.find_all("div", class_="last-posts-list")
out = []
for xeber in xeberler:
links = xeber.find_all("a")
for link in links:
href = link.get("href")
out.append(xeberi_oxu(href))
with open(output_file, "w", encoding="utf8") as f_out:
print('\n\n'.join(out), file=f_out)
Prints the characters correctly:
....
Başlıq: Sabah 32 dərəcə isti olacaq
...