I'm using some older Python 3 code that works like this:
import os
import json
import re
import csv
import urllib.request
import requests
url = "ftp://username:password@server/path-to-file.txt"
try:
response = urllib.request.urlopen(url)
lines = [l.decode('latin-1') for l in response.readlines()]
rows = csv.reader(lines, delimiter=';')
return rows
except Exception as err:
current_app.log.error('Error when trying to read URL and parse CSV: %s' % (url))
raise
This has always worked fine, but recently the FTP server, which I don't have any control over, switched to explicit TLS. This results in an error trace like this:
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 519, in open
response = self._open(req, data)
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 536, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(*args)
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 1583, in ftp_open
raise exc.with_traceback(sys.exc_info()[2])
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 1565, in ftp_open
fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 1586, in connect_ftp
return ftpwrapper(user, passwd, host, port, dirs, timeout,
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 2407, in __init__
self.init()
File ".pyenv/versions/3.10.0/lib/python3.10/urllib/request.py", line 2417, in init
self.ftp.login(self.user, self.passwd)
File ".pyenv/versions/3.10.0/lib/python3.10/ftplib.py", line 412, in login
resp = self.sendcmd('USER ' + user)
File ".pyenv/versions/3.10.0/lib/python3.10/ftplib.py", line 281, in sendcmd
return self.getresp()
File ".pyenv/versions/3.10.0/lib/python3.10/ftplib.py", line 254, in getresp
raise error_perm(resp)
urllib.error.URLError: <urlopen error ftp error: error_perm('530 Not logged in.')>
The relevant part, I think, is that the ftplib
library is now unable to log in with the urllib
library.
For the sake of testing my ability to access the server at all, I tried using FTP_TLS
like this:
from ftplib import FTP_TLS
ftp = FTP_TLS()
ftp.context.set_ciphers('DEFAULT@SECLEVEL=1')
ftp.connect('ftp.serverpath')
ftp.login('username','password')
ftp.close()
This works fine. The server reports that I'm logged in: '230 User logged in, proceed.'
is the message.
So, the urllib.request.urlopen()
functionality is really convenient for accessing the data I need later in the application, but the way it uses the ftplib
now keeps me from logging in to the server. Using FTP_TLS
works fine to access the server, but I'm not sure how to download the CSV once I'm logged in.
Is there a way I can either tell urllib.request.urlopen()
to use FTP_TLS
, or that I can do an equivalent to quickly open the file once I'm logged in with ftplib
?
FTPS support can be added to urllib.request.urlopen
by installing a new OpenerDirector
with a modified FTPHandler
subclass:
import csv
import io
import urllib.request
class FTPSWrapper(urllib.request.ftpwrapper):
"""
Like urllib.request.ftpwrapper, but enforces FTPS.
"""
def init(self):
# This code was copied and modified from the standard library.
# https://github.com/python/cpython/blob/f14ced6062ecdd3c654f3c558f79e1edf4f10cc8/Lib/urllib/request.py#L2412-L2419
import ftplib
self.busy = 0
# Specify FTPS here
self.ftp = ftplib.FTP_TLS()
self.ftp.connect(self.host, self.port, self.timeout)
self.ftp.login(self.user, self.passwd)
# Set up a secure data connection
self.ftp.prot_p()
_target = "/".join(self.dirs)
self.ftp.cwd(_target)
class FTPSHandler(urllib.request.FTPHandler):
"""
Like urllib.request.FTPHandler, but enforces FTPS.
"""
def connect_ftp(self, *args):
# Use the subclass we defined above.
return FTPSWrapper(*args, persistent=False)
def download_ftp_file(url: str):
"""
Given a URL to a file, download it, decode it using latin-1,
and return a text stream of its contents.
"""
urllib.request.install_opener(urllib.request.build_opener(FTPSHandler))
response = urllib.request.urlopen(url)
return io.TextIOWrapper(response, encoding="latin-1")
if __name__ == "__main__":
url = "ftp://username:password@server/path-to-file.txt"
reader = csv.reader(download_ftp_file(url))
print(list(reader))
The magic is the calls to build_opener
and install_opener
, which allow us to add new URL handling logic to urlopen
.