Skip to content

Commit

Permalink
[dbt-labs#9570] Improve testability and coverage for partial parsing
Browse files Browse the repository at this point in the history
* Transform skip_parsing (private variable of ManifestLoader.load()) into instance-attribute of ManifestLoader(), with default value False
  (to enable splitting of ManifestLoader.load())

* Split ManifestLoader.load(), to extract operation of PartialParsing into new method called ManifestLoader.safe_update_project_parser_files_partially()
  (to simplify both cognitive complexity in the codebase and mocking in unittestest)

* Add "ignore" type-comments in new ManifestLoader.safe_update_project_parser_files_partially()
  (to silence mypy warnings regarding instance-attributes which can be initialized as None or as something else, e.g. self.saved_manifest)[1]

[1] Although I wanted avoid "ignore" type-comments, it seems like addressing these mypy warnings in a stricter sense requires technical alignment and broader code changes.
	For example, might need to initialize self.saved_manifest as Manifest, instead of Optional[Manifest], so that PartialParsing gets inputs with type it currently expects.
	... perhaps too far beyond the scope of this fix?
  • Loading branch information
slothkong committed Mar 6, 2024
1 parent eb0bcba commit 2c165fc
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 63 deletions.
133 changes: 70 additions & 63 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ def __init__(
# have been enabled, but not happening because of some issue.
self.partially_parsing = False
self.partial_parser: Optional[PartialParsing] = None
self.skip_parsing = False

# This is a saved manifest from a previous run that's used for partial parsing
self.saved_manifest: Optional[Manifest] = self.read_manifest_for_partial_parse()
Expand Down Expand Up @@ -374,71 +375,15 @@ def load(self) -> Manifest:
self._perf_info.path_count = len(self.manifest.files)
self._perf_info.read_files_elapsed = time.perf_counter() - start_read_files

skip_parsing = False
if self.saved_manifest is not None:
self.partial_parser = PartialParsing(self.saved_manifest, self.manifest.files)
skip_parsing = self.partial_parser.skip_parsing()
if skip_parsing:
# nothing changed, so we don't need to generate project_parser_files
self.manifest = self.saved_manifest
else:
# create child_map and parent_map
self.saved_manifest.build_parent_and_child_maps()
# create group_map
self.saved_manifest.build_group_map()
# files are different, we need to create a new set of
# project_parser_files.
try:
project_parser_files = self.partial_parser.get_parsing_files()
self.partially_parsing = True
self.manifest = self.saved_manifest
except Exception as exc:
# pp_files should still be the full set and manifest is new manifest,
# since get_parsing_files failed
fire_event(
UnableToPartialParse(
reason="an error occurred. Switching to full reparse."
)
)

# Get traceback info
tb_info = traceback.format_exc()
# index last stack frame in traceback (i.e. lastest exception and its context)
tb_last_frame = traceback.extract_tb(exc.__traceback__)[-1]
exc_info = {
"traceback": tb_info,
"exception": tb_info.splitlines()[-1],
"code": tb_last_frame.line, # if the source is not available, it is None
"location": f"line {tb_last_frame.lineno} in {tb_last_frame.name}",
}

# get file info for local logs
parse_file_type: str = ""
file_id = self.partial_parser.processing_file
if file_id:
source_file = None
if file_id in self.saved_manifest.files:
source_file = self.saved_manifest.files[file_id]
elif file_id in self.manifest.files:
source_file = self.manifest.files[file_id]
if source_file:
parse_file_type = source_file.parse_file_type
fire_event(PartialParsingErrorProcessingFile(file=file_id))
exc_info["parse_file_type"] = parse_file_type
fire_event(PartialParsingError(exc_info=exc_info))

# Send event
if dbt.tracking.active_user is not None:
exc_info["full_reparse_reason"] = ReparseReason.exception
dbt.tracking.track_partial_parser(exc_info)

if os.environ.get("DBT_PP_TEST"):
raise exc
self.skip_parsing = False
project_parser_files = self.safe_update_project_parser_files_partially(
project_parser_files
)

if self.manifest._parsing_info is None:
self.manifest._parsing_info = ParsingInfo()

if skip_parsing:
if self.skip_parsing:
fire_event(PartialParsingSkipParsing())
else:
# Load Macros and tests
Expand Down Expand Up @@ -556,7 +501,7 @@ def load(self) -> Manifest:

# Inject any available external nodes, reprocess refs if changes to the manifest were made.
external_nodes_modified = False
if skip_parsing:
if self.skip_parsing:
# If we didn't skip parsing, this will have already run because it must run
# before process_refs. If we did skip parsing, then it's possible that only
# external nodes have changed and we need to run this to capture that.
Expand All @@ -570,14 +515,76 @@ def load(self) -> Manifest:
)
# parent and child maps will be rebuilt by write_manifest

if not skip_parsing or external_nodes_modified:
if not self.skip_parsing or external_nodes_modified:
# write out the fully parsed manifest
self.write_manifest_for_partial_parse()

self.check_for_model_deprecations()

return self.manifest

def safe_update_project_parser_files_partially(self, project_parser_files: Dict) -> Dict:
if self.saved_manifest is None:
return project_parser_files

self.partial_parser = PartialParsing(self.saved_manifest, self.manifest.files) # type: ignore[arg-type]
self.skip_parsing = self.partial_parser.skip_parsing()
if self.skip_parsing:
# nothing changed, so we don't need to generate project_parser_files
self.manifest = self.saved_manifest # type: ignore[assignment]
else:
# create child_map and parent_map
self.saved_manifest.build_parent_and_child_maps() # type: ignore[union-attr]
# create group_map
self.saved_manifest.build_group_map() # type: ignore[union-attr]
# files are different, we need to create a new set of
# project_parser_files.
try:
project_parser_files = self.partial_parser.get_parsing_files()
self.partially_parsing = True
self.manifest = self.saved_manifest # type: ignore[assignment]
except Exception as exc:
# pp_files should still be the full set and manifest is new manifest,
# since get_parsing_files failed
fire_event(
UnableToPartialParse(reason="an error occurred. Switching to full reparse.")
)

# Get traceback info
tb_info = traceback.format_exc()
# index last stack frame in traceback (i.e. lastest exception and its context)
tb_last_frame = traceback.extract_tb(exc.__traceback__)[-1]
exc_info = {
"traceback": tb_info,
"exception": tb_info.splitlines()[-1],
"code": tb_last_frame.line, # if the source is not available, it is None
"location": f"line {tb_last_frame.lineno} in {tb_last_frame.name}",
}

# get file info for local logs
parse_file_type: str = ""
file_id = self.partial_parser.processing_file
if file_id:
source_file = None
if file_id in self.saved_manifest.files:
source_file = self.saved_manifest.files[file_id]
elif file_id in self.manifest.files:
source_file = self.manifest.files[file_id]
if source_file:
parse_file_type = source_file.parse_file_type
fire_event(PartialParsingErrorProcessingFile(file=file_id))
exc_info["parse_file_type"] = parse_file_type
fire_event(PartialParsingError(exc_info=exc_info))
# Send event
if dbt.tracking.active_user is not None:
exc_info["full_reparse_reason"] = ReparseReason.exception
dbt.tracking.track_partial_parser(exc_info)

if os.environ.get("DBT_PP_TEST"):
raise exc

return project_parser_files

def check_for_model_deprecations(self):
for node in self.manifest.nodes.values():
if isinstance(node, ModelNode):
Expand Down
43 changes: 43 additions & 0 deletions tests/unit/test_parse_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,46 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s
ManifestLoader(mock_project, {})
# if specified in flags, we use the specified path
patched_open.assert_called_with("specified_partial_parse_path", "rb")


class TestFailedPartialParse(unittest.TestCase):
@patch("dbt.tracking.track_partial_parser")
@patch("dbt.tracking.active_user")
@patch("dbt.parser.manifest.PartialParsing")
@patch("dbt.parser.manifest.ManifestLoader.read_manifest_for_partial_parse")
@patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check")
def test_partial_parse_safe_update_project_parser_files_partially(
self,
patched_state_check,
patched_read_manifest_for_partial_parse,
patched_partial_parsing,
patched_active_user,
patched_track_partial_parser,
):
mock_instance = MagicMock()
mock_instance.skip_parsing.return_value = False
mock_instance.get_parsing_files.side_effect = KeyError("Whoopsie!")
patched_partial_parsing.return_value = mock_instance

mock_project = MagicMock(RuntimeConfig)
mock_project.project_target_path = "mock_target_path"

mock_saved_manifest = MagicMock(Manifest)
mock_saved_manifest.files = {}
patched_read_manifest_for_partial_parse.return_value = mock_saved_manifest

set_from_args(Namespace(), {})
loader = ManifestLoader(mock_project, {})
loader.safe_update_project_parser_files_partially({})

patched_track_partial_parser.assert_called_once()
exc_info = patched_track_partial_parser.call_args[0][0]
self.assertIn("traceback", exc_info)
self.assertIn("exception", exc_info)
self.assertIn("code", exc_info)
self.assertIn("location", exc_info)
self.assertIn("full_reparse_reason", exc_info)
self.assertEqual("KeyError: 'Whoopsie!'", exc_info["exception"])
self.assertTrue(
isinstance(exc_info["code"], str) or isinstance(exc_info["code"], type(None))
)

0 comments on commit 2c165fc

Please sign in to comment.