diff --git a/revscoring/datasources/tests/test_diff.py b/revscoring/datasources/tests/test_diff.py index 150c7d54..ace5a54f 100644 --- a/revscoring/datasources/tests/test_diff.py +++ b/revscoring/datasources/tests/test_diff.py @@ -42,6 +42,13 @@ def test_operations(): eq_(b, REVISION_TOKENS) + # Make sure we don't error when there is no parent revision + cache = { + parent_revision.text: None, + revision.text: REVISION_TEXT + } + + operations, a, b = solve(diff.operations, cache=cache) def test_added_tokens(): cache = { diff --git a/revscoring/datasources/tests/test_parent_revision.py b/revscoring/datasources/tests/test_parent_revision.py index 634e27d4..9b53ce6c 100644 --- a/revscoring/datasources/tests/test_parent_revision.py +++ b/revscoring/datasources/tests/test_parent_revision.py @@ -10,6 +10,7 @@ def test_words(): cache={parent_revision.text: "Some text words 55."}) eq_(words, ["Some", "text", "words"]) + # Make sure we don't error when there is no parent revision words = solve(parent_revision.words, cache={parent_revision.text: None}) eq_(words, []) diff --git a/revscoring/extractors/api.py b/revscoring/extractors/api.py index a767348a..8c194cf6 100644 --- a/revscoring/extractors/api.py +++ b/revscoring/extractors/api.py @@ -57,7 +57,7 @@ def __init__(self, session, language=None, context=None, cache=None): Datasource("parent_revision.doc", self.process_parent_revision_doc, depends_on=[revision.metadata]), Datasource("parent_revision.metadata", - self.process_revision_metadata, + self.process_revision_metadata_if_exists, depends_on=[parent_revision_doc]), Datasource("parent_revision.text", self.process_revision_text, @@ -66,7 +66,7 @@ def __init__(self, session, language=None, context=None, cache=None): self.process_previous_user_revision_doc, depends_on=[revision.metadata]), Datasource("previous_user_revision.metadata", - self.process_revision_metadata, + self.process_revision_metadata_if_exists, depends_on=[previous_user_revision_doc]), Datasource("page_creation.doc", self.process_page_creation_doc, @@ -298,6 +298,13 @@ def process_revision_metadata(cls, revision_doc): raise RevisionDocumentNotFound() return cls.revision_metadata_from_doc(revision_doc) + @classmethod + def process_revision_metadata_if_exists(cls, revision_doc): + if revision_doc is None: + return None + else: + return cls.revision_metadata_from_doc(revision_doc) + @classmethod def process_user_info(cls, user_doc): return cls.user_info_from_doc(user_doc) diff --git a/revscoring/features/tests/test_parent_revision.py b/revscoring/features/tests/test_parent_revision.py index bd9695b0..b8becf6e 100644 --- a/revscoring/features/tests/test_parent_revision.py +++ b/revscoring/features/tests/test_parent_revision.py @@ -34,6 +34,13 @@ def test_was_same_user(): } eq_(solve(was_same_user, cache=cache), True) + # Make sure we don't error when there is no parent revision + cache = { + revision.metadata: FakeRevisionMetadata(None, "127.4.5.6"), + parent_revision.metadata: None + } + eq_(solve(was_same_user, cache=cache), False) + def test_seconds_since(): FakeRevisionMetadata = namedtuple("FakeRevisionMetadata", ['timestamp']) @@ -44,6 +51,13 @@ def test_seconds_since(): } eq_(solve(seconds_since, cache=cache), 9) + # Make sure we don't error when there is no parent revision + cache = { + revision.metadata: FakeRevisionMetadata(Timestamp(10)), + parent_revision.metadata: None + } + eq_(solve(seconds_since, cache=cache), 0) + def test_bytes(): FakeRevisionMetadata = namedtuple("FakeRevisionMetadata", ['bytes']) @@ -53,18 +67,31 @@ def test_bytes(): } eq_(solve(bytes, cache=cache), 25) + # Make sure we don't error when there is no parent revision + cache = { + parent_revision.metadata: None + } + eq_(solve(bytes, cache=cache), 0) + def test_chars(): cache = { parent_revision.text: "Twelve chars" } eq_(solve(chars, cache=cache), 12) + # Make sure we don't error when there is no parent revision + cache = { + parent_revision.text: None + } + eq_(solve(chars, cache=cache), 0) + def test_markup_chars(): cache = { parent_revision.text: "Twelve {{chars}}" } eq_(solve(markup_chars, cache=cache), 4) + # Make sure we don't error when there is no parent revision cache = { parent_revision.text: None } @@ -76,6 +103,7 @@ def test_numeric_chars(): } eq_(solve(numeric_chars, cache=cache), 2) + # Make sure we don't error when there is no parent revision cache = { parent_revision.text: None } @@ -87,6 +115,7 @@ def test_symbolic_chars(): } eq_(solve(symbolic_chars, cache=cache), 4) + # Make sure we don't error when there is no parent revision cache = { parent_revision.text: None } @@ -98,6 +127,7 @@ def test_uppercase_chars(): } eq_(solve(uppercase_chars, cache=cache), 2) + # Make sure we don't error when there is no parent revision cache = { parent_revision.text: None } diff --git a/revscoring/features/tests/test_previous_user_revision.py b/revscoring/features/tests/test_previous_user_revision.py index ac13c7dc..7480b4c9 100644 --- a/revscoring/features/tests/test_previous_user_revision.py +++ b/revscoring/features/tests/test_previous_user_revision.py @@ -17,3 +17,10 @@ def test_seconds_since(): previous_user_revision.metadata: FakeRevisionMetadata(Timestamp(1)) } eq_(solve(seconds_since, cache=cache), 9) + + # Makes sure we don't crash when there was no previous user revision + cache = { + revision.metadata: FakeRevisionMetadata(Timestamp(10)), + previous_user_revision.metadata: None + } + eq_(solve(seconds_since, cache=cache), 0)