Today I tried to make simple requests to a Website and implement custom headers that I defined into a separate file called useragents.txt. I now wasted lot of time working on it to get it to work. The issue is, that python wont request the site during a valueerror: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
I'm not sure why it add a b' and a \n there. If I print the variable the output is without these symbols. Here a little code that might show better what I mean:
def get_soup(url, header):
time.sleep(random.choice([1, 2, 3]))
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
# count
user_agent = random.choice(user_agents_lines)
print(f"USER-AGENT: {user_agent}")
# for user_agent in user_agents_lines:
# count += 1
# print(f"Line{count}: {user_agent.strip()}")
The Full error is:
Traceback (most recent call last):
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 118, in <module>
scraper() # run the function
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 69, in scraper
soup = get_soup(surveyingurl, testheader)
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 43, in get_soup
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1389, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1257, in request
self._send_request(method, url, body, headers, encode_chunked)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1298, in _send_request
self.putheader(hdr, value)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1235, in putheader
raise ValueError('Invalid header value %r' % (values[i],))
ValueError: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
Process finished with exit code 1
Below a screenshot that shows how my useragents.txt file looks like:
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
user_agent = random.choice(user_agents_lines)
user_agent = user_agent.replace(b'\n', b'')
print(f"USER-AGENT: {user_agent}")