test_preprocess_pipeline.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. try:
  2. from unittest import mock
  3. except ImportError:
  4. import mock
  5. from unittest import TestCase
  6. from iepy.preprocess.pipeline import PreProcessPipeline, PreProcessSteps
  7. from iepy.data.db import DocumentManager
  8. class TestPreProcessPipeline(TestCase):
  9. def patch_object(self, *args, **kwargs):
  10. patcher = mock.patch.object(*args, **kwargs)
  11. patched = patcher.start()
  12. patched.patcher = patcher
  13. self.addCleanup(patcher.stop)
  14. return patched
  15. def test_walk_document_applies_all_step_runners_to_the_given_doc(self):
  16. step1_runner = mock.MagicMock()
  17. step1_runner.side_effect = lambda x: x.call_order.append(1)
  18. step2_runner = mock.MagicMock()
  19. step2_runner.side_effect = lambda x: x.call_order.append(2)
  20. doc = mock.MagicMock()
  21. doc.call_order = []
  22. p = PreProcessPipeline([step1_runner, step2_runner], [])
  23. p.walk_document(doc)
  24. step1_runner.assert_called_once_with(doc)
  25. step2_runner.assert_called_once_with(doc)
  26. self.assertEqual(doc.call_order, [1, 2])
  27. def test_walk_document_applies_all_step_runners_again_if_they_were_already_run(self):
  28. step_runner1 = mock.MagicMock()
  29. p = PreProcessPipeline([step_runner1], [])
  30. doc = object()
  31. p.walk_document(doc)
  32. p.walk_document(doc)
  33. self.assertEqual(step_runner1.call_count, 2)
  34. def test_walk_document_itself_does_not_save_the_document(self):
  35. step_runner1 = mock.MagicMock()
  36. p = PreProcessPipeline([step_runner1], [])
  37. doc = mock.MagicMock()
  38. p.walk_document(doc)
  39. self.assertEqual(doc.save.call_count, 0)
  40. def test_process_step_in_batch_applies_runner_to_all_documents(self):
  41. # We take care that doesn't have attr "step"
  42. _runner = lambda x: x
  43. runner = mock.Mock(wraps=_runner)
  44. docs = [object() for i in range(5)]
  45. p = PreProcessPipeline([runner], docs)
  46. p.process_step_in_batch(runner)
  47. self.assertEqual(runner.call_count, len(docs))
  48. self.assertEqual(runner.call_args_list, [mock.call(d) for d in docs])
  49. def test_process_step_in_batch_does_nothing_with_previous_steps_runner(self):
  50. runner1 = mock.Mock(wraps=lambda x: x)
  51. runner2 = mock.Mock(wraps=lambda x: x)
  52. docs = [object() for i in range(5)]
  53. p = PreProcessPipeline([runner1, runner2], docs)
  54. p.process_step_in_batch(runner2)
  55. self.assertFalse(runner1.called)
  56. def test_process_step_in_batch_filter_docs_to_apply_if_has_attr_step(self):
  57. step_runner = mock.MagicMock(step=PreProcessSteps.tokenization,
  58. override=False, increment=False)
  59. all_docs = [object() for i in range(5)]
  60. self.patch_object(DocumentManager, '__iter__', return_value=all_docs)
  61. dm_get_docs = self.patch_object(DocumentManager, 'get_documents_lacking_preprocess',
  62. return_value=all_docs[:2])
  63. # Ok, docs manager has 5 docs, but get_documents_lacking_preprocess will return
  64. # only 2 of them
  65. p = PreProcessPipeline([step_runner], DocumentManager())
  66. p.process_step_in_batch(step_runner)
  67. dm_get_docs.assert_called_once_with(step_runner.step)
  68. self.assertNotEqual(step_runner.call_count, 5)
  69. self.assertEqual(step_runner.call_count, 2)
  70. self.assertEqual(step_runner.call_args_list, [mock.call(d) for d in all_docs[:2]])
  71. def test_process_step_in_batch_does_not_call_docs_save(self):
  72. runner = mock.Mock(wraps=lambda x: x)
  73. docs = [mock.Mock() for i in range(5)]
  74. p = PreProcessPipeline([runner], docs)
  75. p.process_step_in_batch(runner)
  76. for d in docs:
  77. self.assertFalse(d.save.called)
  78. def test_process_everythin_calls_successively_process_step_in_batch(self):
  79. runner1 = mock.Mock(wraps=lambda x: x)
  80. runner2 = mock.Mock(wraps=lambda x: x)
  81. docs = [object() for i in range(5)]
  82. p = PreProcessPipeline([runner1, runner2], docs)
  83. with mock.patch.object(p, 'process_step_in_batch') as mock_batch:
  84. p.call_order = []
  85. mock_batch.side_effect = lambda r: p.call_order.append(r)
  86. p.process_everything()
  87. self.assertEqual(mock_batch.call_count, 2)
  88. self.assertEqual(mock_batch.call_args_list,
  89. [mock.call(runner1), mock.call(runner2)])
  90. self.assertEqual(p.call_order, [runner1, runner2])