123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- try:
- from unittest import mock
- except ImportError:
- import mock
- from unittest import TestCase
- from iepy.preprocess.pipeline import PreProcessPipeline, PreProcessSteps
- from iepy.data.db import DocumentManager
- class TestPreProcessPipeline(TestCase):
- def patch_object(self, *args, **kwargs):
- patcher = mock.patch.object(*args, **kwargs)
- patched = patcher.start()
- patched.patcher = patcher
- self.addCleanup(patcher.stop)
- return patched
- def test_walk_document_applies_all_step_runners_to_the_given_doc(self):
- step1_runner = mock.MagicMock()
- step1_runner.side_effect = lambda x: x.call_order.append(1)
- step2_runner = mock.MagicMock()
- step2_runner.side_effect = lambda x: x.call_order.append(2)
- doc = mock.MagicMock()
- doc.call_order = []
- p = PreProcessPipeline([step1_runner, step2_runner], [])
- p.walk_document(doc)
- step1_runner.assert_called_once_with(doc)
- step2_runner.assert_called_once_with(doc)
- self.assertEqual(doc.call_order, [1, 2])
- def test_walk_document_applies_all_step_runners_again_if_they_were_already_run(self):
- step_runner1 = mock.MagicMock()
- p = PreProcessPipeline([step_runner1], [])
- doc = object()
- p.walk_document(doc)
- p.walk_document(doc)
- self.assertEqual(step_runner1.call_count, 2)
- def test_walk_document_itself_does_not_save_the_document(self):
- step_runner1 = mock.MagicMock()
- p = PreProcessPipeline([step_runner1], [])
- doc = mock.MagicMock()
- p.walk_document(doc)
- self.assertEqual(doc.save.call_count, 0)
- def test_process_step_in_batch_applies_runner_to_all_documents(self):
- # We take care that doesn't have attr "step"
- _runner = lambda x: x
- runner = mock.Mock(wraps=_runner)
- docs = [object() for i in range(5)]
- p = PreProcessPipeline([runner], docs)
- p.process_step_in_batch(runner)
- self.assertEqual(runner.call_count, len(docs))
- self.assertEqual(runner.call_args_list, [mock.call(d) for d in docs])
- def test_process_step_in_batch_does_nothing_with_previous_steps_runner(self):
- runner1 = mock.Mock(wraps=lambda x: x)
- runner2 = mock.Mock(wraps=lambda x: x)
- docs = [object() for i in range(5)]
- p = PreProcessPipeline([runner1, runner2], docs)
- p.process_step_in_batch(runner2)
- self.assertFalse(runner1.called)
- def test_process_step_in_batch_filter_docs_to_apply_if_has_attr_step(self):
- step_runner = mock.MagicMock(step=PreProcessSteps.tokenization,
- override=False, increment=False)
- all_docs = [object() for i in range(5)]
- self.patch_object(DocumentManager, '__iter__', return_value=all_docs)
- dm_get_docs = self.patch_object(DocumentManager, 'get_documents_lacking_preprocess',
- return_value=all_docs[:2])
- # Ok, docs manager has 5 docs, but get_documents_lacking_preprocess will return
- # only 2 of them
- p = PreProcessPipeline([step_runner], DocumentManager())
- p.process_step_in_batch(step_runner)
- dm_get_docs.assert_called_once_with(step_runner.step)
- self.assertNotEqual(step_runner.call_count, 5)
- self.assertEqual(step_runner.call_count, 2)
- self.assertEqual(step_runner.call_args_list, [mock.call(d) for d in all_docs[:2]])
- def test_process_step_in_batch_does_not_call_docs_save(self):
- runner = mock.Mock(wraps=lambda x: x)
- docs = [mock.Mock() for i in range(5)]
- p = PreProcessPipeline([runner], docs)
- p.process_step_in_batch(runner)
- for d in docs:
- self.assertFalse(d.save.called)
- def test_process_everythin_calls_successively_process_step_in_batch(self):
- runner1 = mock.Mock(wraps=lambda x: x)
- runner2 = mock.Mock(wraps=lambda x: x)
- docs = [object() for i in range(5)]
- p = PreProcessPipeline([runner1, runner2], docs)
- with mock.patch.object(p, 'process_step_in_batch') as mock_batch:
- p.call_order = []
- mock_batch.side_effect = lambda r: p.call_order.append(r)
- p.process_everything()
- self.assertEqual(mock_batch.call_count, 2)
- self.assertEqual(mock_batch.call_args_list,
- [mock.call(runner1), mock.call(runner2)])
- self.assertEqual(p.call_order, [runner1, runner2])
|