I have test code that downloads a set of images, does some processing on them, and asserts that the processing worked as expected:
@pytest.fixture
def image_dir(tmp_path):
test_imgs = [
# ... list of img URLs
]
for idx, im_url in enumerate(test_imgs):
urllib.request.urlretrieve(im_url, tmp_path / f"{idx}.png")
yield tmp_path
def test_op_A(image_dir: Path):
for im_path in image_dir.iterdir():
# load the image
# modify the image
# save the image back to disk
# assert that the modification worked as expected
def test_op_B(image_dir: Path):
for im_path in image_dir.iterdir():
# load the image
# modify the image
# save the image back to disk
# assert that the modification worked as expected
# ... more tests with a similar format
This works but is incredibly slow. I suspect that this is because the images are downloaded anew for each test.
Is there a clean way to create the temporary directory once, cache it, and use a copy of the directory for each test? This way each test can modify the images as desired, without influencing the other tests and while remaining performant.
So a possible solution to achieve this would make use of pytest_sessionstart
and pytest_sessionfinish
. We will also use a fixture in order to copy files over.
A general break down of the flow we hope to achieve is the following:
Place the following two hooks in your conftest.py
in the root directory where your tests reside.
from pathlib import Path
from tempfile import TemporaryDirectory
def pytest_sessionstart(session):
test_imgs = [
# ... list of img URLs
]
td = TemporaryDirectory()
tmp_path = Path(td.name)
for idx, im_url in enumerate(test_imgs):
urllib.request.urlretrieve(im_url, tmp_path / f"{idx}.png")
session.__IMAGE_CACHE = tmp_path
def pytest_sessionfinish(session, exitstatus):
# remove the cached images
session.__IMAGE_CACHE.cleanup()
Now that we have the images cached in a location, we can have our fixture copy them over for every test instead of having to download them.
from shutil import copytree
@pytest.fixture
def image_dir(tmp_path, request):
session = request.session
# copy the data from our cache to the temp location for the test
copytree(session.__IMAGE_CACHE, tmp_path)
yield tmp_path