Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ntfs path hint #2702

Closed
wants to merge 7 commits into from
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions plaso/parsers/ntfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from plaso.parsers import manager


# pylint: disable=too-many-instance-attributes,too-few-public-methods
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which version of pylint did you use?

class NTFSFileStatEventData(events.EventData):
"""NTFS file system stat event data.

Expand All @@ -35,6 +36,7 @@ class NTFSFileStatEventData(events.EventData):
name (str): name associated with the stat event, e.g. that of
a $FILE_NAME attribute or None if not available.
parent_file_reference (int): NTFS file reference of the parent.
path_hint (str): A path to the NTFS file constructed from the `parent_file_reference`
"""

DATA_TYPE = 'fs:stat:ntfs'
Expand All @@ -49,6 +51,7 @@ def __init__(self):
self.is_allocated = None
self.name = None
self.parent_file_reference = None
self.path_hint = None


class NTFSUSNChangeEventData(events.EventData):
Expand Down Expand Up @@ -90,6 +93,13 @@ class NTFSMFTParser(interface.FileObjectParser):
_MFT_ATTRIBUTE_STANDARD_INFORMATION = 0x00000010
_MFT_ATTRIBUTE_FILE_NAME = 0x00000030
_MFT_ATTRIBUTE_OBJECT_ID = 0x00000040
_PATH_SEPARATOR = '/'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why use the forward slash?

_PATH_NO_NAME_REPLACEMENT = '???'

def __init__(self):
"""Intializes the NTFS MFT Parser"""
super(NTFSMFTParser, self).__init__()
self.path_info = dict()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not self.path_info = {} ?


@classmethod
def GetFormatSpecification(cls):
Expand Down Expand Up @@ -158,6 +168,8 @@ def _ParseMFTAttribute(self, parser_mediator, mft_entry, mft_attribute):
name = getattr(mft_attribute, 'name', None)
parent_file_reference = getattr(
mft_attribute, 'parent_file_reference', None)
path_hint_list = self._GetPathHint(mft_entry.file_reference)
path_hint_list.reverse()

event_data = NTFSFileStatEventData()
event_data.attribute_type = mft_attribute.attribute_type
Expand All @@ -166,6 +178,7 @@ def _ParseMFTAttribute(self, parser_mediator, mft_entry, mft_attribute):
event_data.is_allocated = mft_entry.is_allocated()
event_data.name = name
event_data.parent_file_reference = parent_file_reference
event_data.path_hint = self._PATH_SEPARATOR.join(path_hint_list)

try:
creation_time = mft_attribute.get_creation_time_as_integer()
Expand Down Expand Up @@ -275,6 +288,70 @@ def _ParseMFTEntry(self, parser_mediator, mft_entry):
'unable to parse MFT attribute: {0:d} with error: {1!s}').format(
attribute_index, exception))

def _CollectMFTEntryPathInfo(self, mft_entry):
"""Extracts data from a given $MFT entry and takes note of the
file reference, its name, and the parent

Args:
mft_entry (pyfsntfs.file_entry): MFT entry.

Raises:
IOError: if MFT is not readable
"""

if mft_entry.is_empty() or mft_entry.base_record_file_reference != 0:
return

entry_reference = mft_entry.file_reference
entry_parent_reference = None
entry_name = None

for attribute_index in range(0, mft_entry.number_of_attributes):
mft_attribute = mft_entry.get_attribute(attribute_index)
if mft_attribute.attribute_type in [
self._MFT_ATTRIBUTE_STANDARD_INFORMATION,
self._MFT_ATTRIBUTE_FILE_NAME]:

if not entry_name:
entry_name = getattr(mft_attribute, 'name', None)
entry_parent_reference = getattr(
mft_attribute, 'parent_file_reference', None)

if entry_reference:
self.path_info[entry_reference] = (entry_name, entry_parent_reference)

def _GetPathHint(self, file_reference, path_parts=None):
"""Constructs a path hint for a MFT entry for a given
`file_reference` by looking up the parents and appending them to
`path_parts`. For that reason the result is a list in reverse
order!

Args:
file_reference (int): The `file_reference` from a mft_entry
path_parts (list): A list that gets appended the path parts in recursive calls

Returns:
list: List of parent path objects in reverse order
"""

if path_parts is None:
path_parts = []

if not file_reference or file_reference not in self.path_info.keys():
return path_parts

(reference_name, parent_reference) = self.path_info[file_reference]

if reference_name is not None:
path_parts.append(reference_name)
elif parent_reference is not None:
path_parts.append(self._PATH_NO_NAME_REPLACEMENT)

if file_reference != parent_reference:
self._GetPathHint(parent_reference, path_parts)

return path_parts

def ParseFileObject(self, parser_mediator, file_object):
"""Parses a NTFS $MFT metadata file-like object.

Expand All @@ -291,6 +368,16 @@ def ParseFileObject(self, parser_mediator, file_object):
parser_mediator.ProduceExtractionWarning(
'unable to open file with error: {0!s}'.format(exception))

# Collect path information in a first round of parsing
for entry_index in range(0, mft_metadata_file.number_of_file_entries):
try:
mft_entry = mft_metadata_file.get_file_entry(entry_index)
self._CollectMFTEntryPathInfo(mft_entry)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doing this for every MFT entry can be memory intensive. Technically we only need to cache the parent path hints in self.path_info

except IOError as exception:
# We ignore the exception as it will be raised again in the
# MFT entry processing below
pass

for entry_index in range(0, mft_metadata_file.number_of_file_entries):
try:
mft_entry = mft_metadata_file.get_file_entry(entry_index)
Expand Down