I am trying to replicate this experiment presented in this webpage https://adversarial-ml-tutorial.org/adversarial_examples/
I got the jupyter notebook and loaded in my localhost and open it using Jupiter notebook. When I run the following code to get the dataset using the following code:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
mnist_train = datasets.MNIST("../data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST("../data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(mnist_train, batch_size = 100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size = 100, shuffle=False)
and I get the following error:
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data\MNIST\raw\train-images-idx3-ubyte.gz
0/? [00:00<?, ?it/s]
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-15-e6f62798f426> in <module>
2 from torch.utils.data import DataLoader
3
----> 4 mnist_train = datasets.MNIST("../data", train=True, download=True, transform=transforms.ToTensor())
5 mnist_test = datasets.MNIST("../data", train=False, download=True, transform=transforms.ToTensor())
6 train_loader = DataLoader(mnist_train, batch_size = 100, shuffle=True)
~\Anaconda3\lib\site-packages\torchvision\datasets\mnist.py in __init__(self, root, train, transform, target_transform, download)
77
78 if download:
---> 79 self.download()
80
81 if not self._check_exists():
~\Anaconda3\lib\site-packages\torchvision\datasets\mnist.py in download(self)
144 for url, md5 in self.resources:
145 filename = url.rpartition('/')[2]
--> 146 download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
147
148 # process and save as torch files
~\Anaconda3\lib\site-packages\torchvision\datasets\utils.py in download_and_extract_archive(url, download_root, extract_root, filename, md5, remove_finished)
254 filename = os.path.basename(url)
255
--> 256 download_url(url, download_root, filename, md5)
257
258 archive = os.path.join(download_root, filename)
~\Anaconda3\lib\site-packages\torchvision\datasets\utils.py in download_url(url, root, filename, md5)
82 )
83 else:
---> 84 raise e
85 # check integrity of downloaded file
86 if not check_integrity(fpath, md5):
~\Anaconda3\lib\site-packages\torchvision\datasets\utils.py in download_url(url, root, filename, md5)
70 urllib.request.urlretrieve(
71 url, fpath,
---> 72 reporthook=gen_bar_updater()
73 )
74 except (urllib.error.URLError, IOError) as e: # type: ignore[attr-defined]
~\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
245 url_type, path = splittype(url)
246
--> 247 with contextlib.closing(urlopen(url, data)) as fp:
248 headers = fp.info()
249
~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response
~\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
639 if not (200 <= code < 300):
640 response = self.parent.error(
--> 641 'http', request, response, code, msg, hdrs)
642
643 return response
~\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
567 if http_err:
568 args = (dict, 'default', 'http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
~\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self, req, fp, code, msg, hdrs):
--> 649 raise HTTPError(req.full_url, code, msg, hdrs, fp)
650
651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
Any help solving this issue is much appreciated. I also can download the dataset directly from the link but then I don't know how to use that!
Yes it's a known bug: https://github.com/pytorch/vision/issues/3500
The possible solution can be to patch MNIST
download
method.
But it requires wget
to be installed.
For Linux:
sudo apt install wget
For Windows:
choco install wget
import os
import subprocess as sp
from torchvision.datasets.mnist import MNIST, read_image_file, read_label_file
from torchvision.datasets.utils import extract_archive
def patched_download(self):
"""wget patched download method.
"""
if self._check_exists():
return
os.makedirs(self.raw_folder, exist_ok=True)
os.makedirs(self.processed_folder, exist_ok=True)
# download files
for url, md5 in self.resources:
filename = url.rpartition('/')[2]
download_root = os.path.expanduser(self.raw_folder)
extract_root = None
remove_finished = False
if extract_root is None:
extract_root = download_root
if not filename:
filename = os.path.basename(url)
# Use wget to download archives
sp.run(["wget", url, "-P", download_root])
archive = os.path.join(download_root, filename)
print("Extracting {} to {}".format(archive, extract_root))
extract_archive(archive, extract_root, remove_finished)
# process and save as torch files
print('Processing...')
training_set = (
read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
)
test_set = (
read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
)
with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
torch.save(training_set, f)
with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
torch.save(test_set, f)
print('Done!')
MNIST.download = patched_download
mnist_train = MNIST("../data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = MNIST("../data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(mnist_train, batch_size=1, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=1, shuffle=False)