Ensure that invalid vts have results_dir cleaned before passing to ta…

…sks. Tasks that sometimes fail due to outside factors (download failures, resolve issues, etc) often would call safe_mkdir(vt.results_dir, clean=True) in order to wipe possibly truncated or crufty state. But since vt.results_dir is a symlink, that replaced it with a real dir. That ended up breaking caching, since the task output was therefore never making it into the artifact cache, and other weird bugs. This is a small change that deletes the existing directories if a target is invalid, removing the need for tasks to wipe the results_dir, and also now checks to make sure that the results_dir is legal before marking valid and passing to the artifact_caches. The majority of the change is added test coverage around the breaks. I have a couple immediate followups that cover an additional failure state, and reworks the cache_manager to remove some of the harder to reason about bits. Closes: pantsbuild#4137
mateor · Dec 14, 2016 · 5380eb1 · 5380eb1
1 parent 75ba41c
commit 5380eb1
Show file tree

Hide file tree

Showing 6 changed files with 170 additions and 12 deletions.
diff --git a/src/python/pants/cache/local_artifact_cache.py b/src/python/pants/cache/local_artifact_cache.py
@@ -67,6 +67,10 @@ def store_and_use_artifact(self, cache_key, src, results_dir=None):
       tarball = self._store_tarball(cache_key, tmp.name)
       artifact = self._artifact(tarball)
 
+      # NOTE(mateo): The two clean=True args passed in this method are likely safe, since the cache will by
+      # definition be dealing with unique results_dir, as opposed to the stable vt.results_dir (aka 'current').
+      # But if by chance it's passed the stable results_dir, safe_makedir(clean=True) will silently convert it
+      # from a symlink to a real dir and cause mysterious 'Operation not permitted' errors until the workdir is cleaned.
       if results_dir is not None:
         safe_mkdir(results_dir, clean=True)
 

diff --git a/src/python/pants/invalidation/cache_manager.py b/src/python/pants/invalidation/cache_manager.py
@@ -13,7 +13,7 @@
 from pants.build_graph.build_graph import sort_targets
 from pants.build_graph.target import Target
 from pants.invalidation.build_invalidator import BuildInvalidator, CacheKeyGenerator
-from pants.util.dirutil import relative_symlink, safe_mkdir
+from pants.util.dirutil import relative_symlink, safe_mkdir, safe_rmtree
 
 
 class VersionedTargetSet(object):
@@ -25,6 +25,11 @@ class VersionedTargetSet(object):
   built together into a single artifact.
   """
 
+
+
+  class InvalidResultsDir(Exception):
+    """Indicate a problem interacting with a versioned target results directory."""
+
   @staticmethod
   def from_versioned_targets(versioned_targets):
     """
@@ -117,6 +122,17 @@ def previous_results_dir(self):
       raise ValueError('There is no previous_results_dir for: {}'.format(self))
     return self._previous_results_dir
 
+  def _ensure_legal(self):
+    """Return True as long as a vt's results_dir state does not break any internal contracts."""
+    # Could also check that the current_results_dir exists and matches the os.realpath(results_dir).
+    # I am not sure it provides enough value to warrant burdening every VT with those checks, though.
+    if self._results_dir and not os.path.islink(self.results_dir):
+      raise self.InvalidResultsDir(
+        "The self.results_dir is no longer a symlink: {}\nThe results_dir should not be manually cleaned or recreated."
+        .format(self.results_dir)
+      )
+    return True
+
   def live_dirs(self):
     """Yields directories that must exist for this VersionedTarget to function."""
     if self.has_results_dir:
@@ -169,10 +185,12 @@ def _results_dir_path(self, root_dir, key, stable):
     )
 
   def create_results_dir(self, root_dir, allow_incremental):
-    """Ensures that a results_dir exists under the given root_dir for this versioned target.
+    """Ensures that a cleaned results_dir exists for invalid versioned targets.
 
     If incremental=True, attempts to clone the results_dir for the previous version of this target
-    to the new results dir. Otherwise, simply ensures that the results dir exists.
+    to the new results dir.
+
+    Only guarantees results_dirs for invalid VTs, pertinent result_dirs are assumed to exist for valid VTs.
     """
     # Generate unique and stable directory paths for this cache key.
     current_dir = self._results_dir_path(root_dir, self.cache_key, stable=False)
@@ -183,6 +201,11 @@ def create_results_dir(self, root_dir, allow_incremental):
       # If the target is valid, both directories can be assumed to exist.
       return
 
+    # If the vt is invalid, clean. Deletes both because if the stable_dir has somehow been replaced with a real dir,
+    # the relative_symlink call below raises an uncaught exception when it attempts to unlink the real directory.
+    safe_rmtree(current_dir)
+    safe_rmtree(stable_dir)
+
     # Clone from the previous results_dir if incremental, or initialize.
     previous_dir = self._use_previous_dir(allow_incremental, root_dir, current_dir)
     if previous_dir is not None:
@@ -200,7 +223,7 @@ def _use_previous_dir(self, allow_incremental, root_dir, current_dir):
       # Not incremental.
       return None
     previous_dir = self._results_dir_path(root_dir, self.previous_cache_key, stable=False)
-    if not os.path.isdir(previous_dir) or os.path.isdir(current_dir):
+    if not os.path.isdir(previous_dir):
       # Could be useful, but no previous results are present.
       return None
     return previous_dir
@@ -264,11 +287,13 @@ def __init__(self,
   def update(self, vts):
     """Mark a changed or invalidated VersionedTargetSet as successfully processed."""
     for vt in vts.versioned_targets:
+      vt._ensure_legal()
       if not vt.valid:
         self._invalidator.update(vt.cache_key)
         vt.valid = True
         self._artifact_write_callback(vt)
     if not vts.valid:
+      vts._ensure_legal()
       self._invalidator.update(vts.cache_key)
       vts.valid = True
       self._artifact_write_callback(vts)

diff --git a/tests/python/pants_test/cache/test_caching.py b/tests/python/pants_test/cache/test_caching.py
@@ -25,7 +25,6 @@ def __init__(self, address, source, *args, **kwargs):
 
 
 class DummyTask(Task):
-  """A task that appends the content of a DummyLibrary's source into its results_dir."""
   options_scope = 'dummy'
 
   @property

diff --git a/tests/python/pants_test/invalidation/BUILD b/tests/python/pants_test/invalidation/BUILD
@@ -7,6 +7,7 @@ python_tests(
   sources = ['test_cache_manager.py'],
   dependencies = [
     'src/python/pants/invalidation',
+    'src/python/pants/util:dirutil',
     'tests/python/pants_test/testutils:mock_logger',
     'tests/python/pants_test/tasks:task_test_base',
   ]

diff --git a/tests/python/pants_test/invalidation/test_cache_manager.py b/tests/python/pants_test/invalidation/test_cache_manager.py
@@ -5,11 +5,13 @@
 from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
                         unicode_literals, with_statement)
 
+import os
 import shutil
 import tempfile
 
+from pants.util.dirutil import safe_mkdir, safe_rmtree
 from pants.invalidation.build_invalidator import CacheKey, CacheKeyGenerator
-from pants.invalidation.cache_manager import InvalidationCacheManager, VersionedTarget
+from pants.invalidation.cache_manager import InvalidationCacheManager, VersionedTargetSet
 from pants_test.base_test import BaseTest
 
 
@@ -56,8 +58,33 @@ def tearDown(self):
     shutil.rmtree(self._dir, ignore_errors=True)
     super(InvalidationCacheManagerTest, self).tearDown()
 
-  def make_vts(self, target):
-    return VersionedTarget(self.cache_manager, target, target.id)
+  def make_vt(self, invalid=False):
+    # Create an arbitrary VT. If invalid is False, it will mimic the state of the VT handed back by a task.
+    a_target = self.make_target(':a', dependencies=[])
+    ic = self.cache_manager.check([a_target])
+    vt = ic.all_vts[0]
+    if not invalid:
+      self.task_execute(vt)
+      vt.update()
+    return vt
+
+  def task_execute(self, vt):
+    vt.create_results_dir(self._dir, allow_incremental=False)
+    task_output = os.path.join(vt.results_dir, 'a_file')
+    self.create_file(task_output, 'foo')
+
+  def is_empty(self, dirname):
+    return not os.listdir(dirname)
+
+  def matching_result_dirs(self, vt):
+    # Ensure that the result_dirs contain the same files.
+    return self.is_empty(vt.results_dir) == self.is_empty(vt.current_results_dir)
+
+  def clobber_symlink(self, vt):
+    # Munge the state to mimic a common error found before we added the clean- it accidentally clobbers the symlink!
+    # Commonly caused by safe_mkdir(vt.results_dir, clean=True), broken up here to keep the test from being brittle.
+    safe_rmtree(vt.results_dir)
+    safe_mkdir(vt.results_dir)
 
   def test_check_marks_all_as_invalid_by_default(self):
     a = self.make_target(':a', dependencies=[])
@@ -77,3 +104,79 @@ def test_check_marks_all_as_invalid_by_default(self):
     self.assertEquals(5, len(all_vts))
     vts_targets = [vt.targets[0] for vt in all_vts]
     self.assertEquals(set(targets), set(vts_targets))
+
+  def test_force_invalidate(self):
+    vt = self.make_vt()
+    self.assertTrue(vt.valid)
+    vt.force_invalidate()
+    self.assertFalse(vt.valid)
+
+  def test_invalid_vts_are_cleaned(self):
+    # Ensure that calling create_results_dir on an invalid target will wipe any pre-existing output.
+    vt = self.make_vt()
+    self.assertFalse(self.is_empty(vt.results_dir))
+    self.assertTrue(self.matching_result_dirs(vt))
+
+    vt.force_invalidate()
+    vt.create_results_dir(self._dir, allow_incremental=False)
+    self.assertTrue(self.is_empty(vt.results_dir))
+    self.assertTrue(self.matching_result_dirs(vt))
+    vt._ensure_legal()
+
+  def test_valid_vts_are_not_cleaned(self):
+    # No cleaning of results_dir occurs, since create_results_dir short-circuits if the VT is valid.
+    vt = self.make_vt()
+    self.assertFalse(self.is_empty(vt.results_dir))
+    vt.create_results_dir(self._dir, allow_incremental=False)
+    self.assertFalse(self.is_empty(vt.results_dir))
+    self.assertTrue(self.matching_result_dirs(vt))
+
+  def test_illegal_results_dir_cannot_be_updated_to_valid(self):
+    # A regression test for a former bug. Calling safe_mkdir(vt.results_dir, clean=True) would silently
+    # delete the results_dir symlink and yet leave any existing crufty content behind in the vt.current_results_dir.
+    # https://github.com/pantsbuild/pants/issues/4137
+    # https://github.com/pantsbuild/pants/issues/4051
+
+    with self.assertRaises(VersionedTargetSet.InvalidResultsDir):
+      # All is right with the world, mock task is generally well-behaved and output is placed in both result_dirs.
+      vt = self.make_vt()
+      self.assertFalse(self.is_empty(vt.results_dir))
+      self.assertTrue(self.matching_result_dirs(vt))
+      self.assertTrue(os.path.islink(vt.results_dir))
+      vt.force_invalidate()
+      self.clobber_symlink(vt)
+
+      # Arg, and the resultingly unlinked current_results_dir is uncleaned. The two directories have diverging contents!
+      # The product pipeline and the artifact cache will get different task output!
+      self.assertFalse(os.path.islink(vt.results_dir))
+      self.assertFalse(self.matching_result_dirs(vt))
+
+      # The main protection for this is the exception raised when the cache_manager attempts to mark the VT valid.
+      self.assertFalse(vt.valid)
+      vt.update()
+
+  def test_recreation_of_invalid_vt_result_dirs(self):
+    # Show that the invalidation recreates legal result_dirs.
+    vt = self.make_vt()
+    self.clobber_symlink(vt)
+    self.assertFalse(os.path.islink(vt.results_dir))
+
+    # This only is caught here if the VT is still invalid for some reason, otherwise it's caught by the update() method.
+    vt.force_invalidate()
+    vt.create_results_dir(self._dir, allow_incremental=False)
+    self.assertTrue(os.path.islink(vt.results_dir))
+    self.assertTrue(os.path.isdir(vt.current_results_dir))
+
+  def test_for_illegal_vt(self):
+    with self.assertRaises(VersionedTargetSet.InvalidResultsDir):
+      vt = self.make_vt()
+      self.clobber_symlink(vt)
+      vt._ensure_legal()
+
+  def test_for_illegal_vts(self):
+    # The update() checks this through vts._ensure_legal, checked here since those checks are on different branches.
+    with self.assertRaises(VersionedTargetSet.InvalidResultsDir):
+      vt = self.make_vt()
+      self.clobber_symlink(vt)
+      vts = VersionedTargetSet.from_versioned_targets([vt])
+      vts.update()
diff --git a/tests/python/pants_test/task/test_task.py b/tests/python/pants_test/task/test_task.py
@@ -58,6 +58,7 @@ def execute(self):
 class TaskTest(TaskTestBase):
 
   _filename = 'f'
+  _file_contents = 'results_string\n'
 
   @classmethod
   def task_type(cls):
@@ -74,10 +75,19 @@ def _fixture(self, incremental):
     task._incremental = incremental
     return target, task
 
+  def _run_fixture(self, content=None, incremental=False):
+    content = content or self._file_contents
+    target, task = self._fixture(incremental=incremental)
+    self._create_clean_file(target, content)
+    vtA = task.execute()
+    return target, task, vtA
+
   def _create_clean_file(self, target, content):
     self.create_file(self._filename, content)
     target.mark_invalidation_hash_dirty()
 
+  # TODO(mateo): This test was relying on some implementation bugs and should probably be revisited, and either made
+  # more representative of the common case or provided with some additional coverage.
   def test_incremental(self):
     """Run three times with two unique fingerprints."""
 
@@ -95,15 +105,17 @@ def test_incremental(self):
     vtB = task.execute()
     self.assertContent(vtB, one + two)
 
-    # Incremental atop existing directory for vtA.
+    # vtC.previous_cache_key == vtB.cache_key so the task appends the file to vtB's results.
+    # This behavior used to be skipped because the current_dir existed when checked by cache_manager._use_previous_dir.
     self._create_clean_file(target, one)
     vtC = task.execute()
-    self.assertContent(vtC, one + one)
+    self.assertEqual(vtC.previous_cache_key, vtB.cache_key)
+    self.assertContent(vtC, one + two + one)
 
     # Confirm that there were two unique results dirs, and that the second was cloned.
-    self.assertContent(vtA, one + one)
+    self.assertContent(vtA, one + two + one)
     self.assertContent(vtB, one + two)
-    self.assertContent(vtC, one + one)
+    self.assertContent(vtC, one + two + one)
     self.assertNotEqual(vtA.current_results_dir, vtB.current_results_dir)
     self.assertEqual(vtA.current_results_dir, vtC.current_results_dir)
 
@@ -156,3 +168,17 @@ def test_implementation_version(self):
     self.assertContent(vtB, two)
     self.assertNotEqual(vtA.current_results_dir, vtB.current_results_dir)
     self.assertNotEqual(vtA.results_dir, vtB.results_dir)
+
+  def test_execute_cleans_invalid_result_dirs(self):
+    # Regression test to protect task.execute() from returning invalid dirs.
+    _, task, vt = self._run_fixture()
+    self.assertNotEqual(os.listdir(vt.results_dir), [])
+    self.assertTrue(os.path.islink(vt.results_dir))
+
+    # Mimic the failure case, where an invalid task is run twice, due to failed download or something.
+    vt.force_invalidate()
+
+    # But if this VT is invalid for a second run, the next invalidation deletes and recreates.
+    task.execute()
+    self.assertTrue(os.path.islink(vt.results_dir))
+    self.assertTrue(os.path.isdir(vt.current_results_dir))