I'm unexperienced with Python automated testing, and I'm having a really hard time trying to mock (or patch?) a class and some of its methods.
This is our folder structure:
src
|--- main
|--- run_data_process.py
tests
|--- main_tests
|--- test_run_data_process.py
Below are some code for these files so far:
run_data_process.py
:
import logging
import pandas as pd
import sys
from company.connections import DBConnection # internal library from my company
from company.data_tools import DataStorageManager # another internal library
logger = logging.getLogger(__name__)
class DataProcessException(Exception):
pass
class DataProcess:
def run_task(self):
try:
df_raw = self.gather_data()
self.validate_gathered_data(df_raw)
except DataProcessException as e:
logger.error(f"Error on the Data Process task: {e}")
raise
else:
self.save_output_data(df_raw)
logger.info("Data Process task executed successfully.")
def gather_data(self) -> pd.DataFrame:
"""This calls the database and returns a DataFrame. I don't have access to the implementation of DBConnection()."""
conn = DBConnection()
return conn.run()
def validate_gathered_data(self, df_raw: pd.DataFrame) -> None:
if len(df_raw) == 0:
raise DataProcessException("Raw dataset is empty.")
def save_output_data(self, df_raw: pd.DataFrame) -> None:
"""Another internal library that serialises the DataFrame and sends it to another server. Don't have the implementation either."""
DataStorageManager().save_output_object(df_raw)
test_run_data_process.py
:
from unittest.mock import Mock, MagicMock, patch
import pandas as pd
import pytest
from src.main.run_data_process import DataProcess, RawDataException
class TestDataProcess:
def __get_mocked_dataset():
return pd.DataFrame(
[[111, 222, 333], ['text_1', 'text_2', 'text_3'], [True, False, False]],
columns=['col_1', 'col_2', 'col_3'])
def test_gathered_data_is_validated_successfully(self):
# arrange / act
with patch('src.main.run_data_process.DataProcess', return_value=MagicMock()) as p:
p.return_value.gather_data = Mock(return_value=self.__get_mocked_dataset)
p.return_value.save_output_data = Mock(return_value=None)
p.return_value.run_task()
# assert
p.return_value.validate_gathered_data.assert_called_with(p.return_value.gather_data.return_value)
What I'm trying to do is test validate_gathered_data
while mocking gather_data
(to avoid the database query) and save_output_data
(to avoid calling our server), but I can't make it work and I'm pulling my hair out with this.
The code above gives me this error:
./tests/main_tests/test_run_data_process.py::TestDataProcess::test_gathered_data_is_validated_successfully Failed: [undefined]AssertionError: expected call not found.
Expected: validate_gathered_data(<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)
Actual: not called.
__wrapped_mock_method__ = <function NonCallableMock.assert_called_with at 0x108964550>
args = (<MagicMock name='mock.validate_gathered_data' id='4894604448'>, <bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)
kwargs = {}, __tracebackhide__ = True
msg = 'expected call not found.\nExpected: validate_gathered_data(<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)\nActual: not called.'
__mock_self = <MagicMock name='mock.validate_gathered_data' id='4894604448'>
def assert_wrapper(
__wrapped_mock_method__: Callable[..., Any], *args: Any, **kwargs: Any
) -> None:
__tracebackhide__ = True
try:
> __wrapped_mock_method__(*args, **kwargs)
../../../../Library/Python/3.8/lib/python/site-packages/pytest_mock/plugin.py:414:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <MagicMock name='mock.validate_gathered_data' id='4894604448'>
args = (<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>,)
kwargs = {}
expected = 'validate_gathered_data(<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)'
actual = 'not called.'
error_message = 'expected call not found.\nExpected: validate_gathered_data(<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)\nActual: not called.'
def assert_called_with(self, /, *args, **kwargs):
"""assert that the last call was made with the specified arguments.
Raises an AssertionError if the args and keyword args passed in are
different to the last call to the mock."""
if self.call_args is None:
expected = self._format_mock_call_signature(args, kwargs)
actual = 'not called.'
error_message = ('expected call not found.\nExpected: %s\nActual: %s'
% (expected, actual))
> raise AssertionError(error_message)
E AssertionError: expected call not found.
E Expected: validate_gathered_data(<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)
E Actual: not called.
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/unittest/mock.py:904: AssertionError
During handling of the above exception, another exception occurred:
self = <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>
def test_gathered_data_is_validated_successfully(self):
# arrange / act
with patch('src.main.run_data_process.DataProcess', return_value=MagicMock()) as p:
p.return_value.gather_data = Mock(return_value=self.__get_mocked_dataset)
p.return_value.save_output_artifact = Mock(return_value=None)
p.return_value.run_task()
# assert
> p.return_value.validate_gathered_data.assert_called_with(p.return_value.gather_data.return_value)
E AssertionError: expected call not found.
E Expected: validate_gathered_data(<bound method TestDataProcess.__get_mocked_dataset of <tests.main_tests.test_run_data_process.TestDataProcess object at 0x123b9fe50>>)
E Actual: not called.
tests/main_tests/test_run_data_process.py:23: AssertionError
Among my previous shots on this, I've already tried @mock.patch.multiple
while instantiating DataProcess
(one solution that I've seen on older, countless questions here), but got stuck on the assert_called_with
part.
Any help would be greatly appreciated.
It's generally better/easier to avoid patching the class under test, and instead patch its dependencies in the module under test (where needed).
In this case, since the specific things you want to avoid instantiating are DBConnection
and DataStorageManager
, the straightforward solution is to just patch DBConnection
and DataStorageManager
:
@patch('src.main.run_data_process.DataStorageManager')
@patch('src.main.run_data_process.DBConnection')
def test_gathered_data_is_validated_successfully(self, db, storage):
db().run.return_value = self.__get_mocked_dataset()
DataProcess().run_task()
assert storage().save_output_object.call_count == 1
Remember that patch()
creates a MagicMock
by default, and that every call or attribute lookup on a MagicMock
also creates a MagicMock
by default (which is reused on subsequent calls), so you can easily set up a bunch of attributes on a mock by simply accessing them the way your code does -- in this case the mock objects represent the classes, so calling them automatically creates a mock instance, and we can then access attributes of those instances to mock and test the methods of those instances that are called by the code under test.