Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dotnet update: emit enclosing class information for nested classes #1913

Merged
merged 50 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
3f488f3
Update helpers.py
bkojusner Dec 20, 2023
94346e4
Update helpers.py
bkojusner Dec 20, 2023
8049bdc
TypeRef correction in helpers.py
bkojusner Dec 20, 2023
46103b5
Fixed TypeRef to proper functionality
bkojusner Dec 20, 2023
b2621c7
Accounts for TypeRef updated tuple
bkojusner Dec 20, 2023
94a8b65
Corrected TypeDef tuple creation in helpers.py
bkojusner Dec 20, 2023
eefdfcf
Update types.py
bkojusner Dec 21, 2023
36110d5
Update types.py
bkojusner Dec 21, 2023
73c8db7
Create helpers_draft.py
bkojusner Dec 23, 2023
bfcbb0d
Update capa/features/extractors/dnfile/helpers.py
bkojusner Dec 23, 2023
30267c0
Update helper functions, variables, and draft further implementations
bkojusner Dec 23, 2023
4f05fc6
Update helpers.py
bkojusner Dec 26, 2023
43e972a
Update types.py
bkojusner Dec 26, 2023
7381a1c
Directly access TypeDef and TypeRef tables
bkojusner Dec 27, 2023
b943ebb
Update helpers.py
bkojusner Dec 27, 2023
931cd84
Update helpers.py
bkojusner Dec 27, 2023
a1ea3f8
Delete capa/features/extractors/dnfile/helpers_draft.py
bkojusner Dec 27, 2023
0150b17
Update types.py
bkojusner Dec 27, 2023
9966ca3
Update dotnetfile.py
bkojusner Dec 27, 2023
8f16a57
Update types.py comment
bkojusner Jan 2, 2024
6257203
Clean extract_file_class_features in dotnetfile.py
bkojusner Jan 2, 2024
74abe41
Cleaned up callers, var names, and other small items
bkojusner Jan 2, 2024
66f01c0
Update dotnetfile.py
bkojusner Jan 2, 2024
f8a97cf
Clean up caller logic in dotnetfile.py
bkojusner Jan 3, 2024
bb381e5
Clean up callers and update helper logic in helpers.py
bkojusner Jan 3, 2024
bda8727
Linter corrections for types.py
bkojusner Jan 3, 2024
8679964
Linter corrections for dotnetfile.py
bkojusner Jan 3, 2024
531a35e
Linter corrections and caller functions cleanup for helpers.py
bkojusner Jan 3, 2024
b0c90de
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
ba0ecbd
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
f97f7f5
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
807fc1f
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
abccf7d
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
a1b9319
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
d9800b7
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
9ccdd01
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
d84f2b1
Update capa/features/extractors/dnfile/helpers.py
bkojusner Jan 4, 2024
62533f7
Update helpers.py
bkojusner Jan 4, 2024
1dd923a
Update dotnetfile.py
bkojusner Jan 4, 2024
b71c8ea
Update tuple type in types.py
bkojusner Jan 4, 2024
500ded3
Update dotnetfile.py
bkojusner Jan 4, 2024
465cb35
Update return value annotations in helpers.py
bkojusner Jan 4, 2024
c3b8e26
Linting update types.py
bkojusner Jan 4, 2024
89e7878
Linting update dotnetfile.py
bkojusner Jan 4, 2024
ee6f745
Added unit tests to fixtures.py
bkojusner Jan 4, 2024
3194caa
Update types.py
bkojusner Jan 5, 2024
aff5a13
Linting fix for types.py
bkojusner Jan 5, 2024
38ee13d
Update CHANGELOG.md
bkojusner Jan 5, 2024
edbd336
Merge branch 'master' into master
bkojusner Jan 5, 2024
3c6c82d
Small changes to return types in helpers.py
bkojusner Jan 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 117 additions & 6 deletions capa/features/extractors/dnfile/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
# remove get_/set_ from MemberRef name
member_ref_name = member_ref_name[4:]

typerefnamespace, typerefname = resolve_nested_typeref_helper(
member_ref.Class.row_index, member_ref.Class.row, pe
)

yield DnType(
token,
member_ref.Class.row.TypeName,
namespace=member_ref.Class.row.TypeNamespace,
typerefname,
namespace=typerefnamespace,
member=member_ref_name,
access=access,
)
Expand Down Expand Up @@ -188,6 +192,8 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type)
"""
nested_class_table = enclosing_and_nested_classes_index_table(pe)

accessor_map: Dict[int, str] = {}
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
accessor_map[methoddef] = methoddef_access
Expand All @@ -211,7 +217,9 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
# remove get_/set_
method_name = method_name[4:]

yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access)
typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)

yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access)


def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
Expand All @@ -225,6 +233,8 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
TypeNamespace (index into String heap)
FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type)
"""
nested_class_table = enclosing_and_nested_classes_index_table(pe)

for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)

Expand All @@ -235,8 +245,11 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
if field.row is None:
logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx)
continue

typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)

token: int = calculate_dotnet_token_value(field.table.number, field.row_index)
yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name)
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)


def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
Expand Down Expand Up @@ -300,19 +313,117 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
yield DnUnmanagedMethod(token, module, method)


def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int):
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
assert pe.net is not None
assert pe.net.mdtables is not None

if row_index - 1 <= 0:
return None

try:
table = pe.net.mdtables.tables.get(table_index, [])
return table[row_index - 1]
except IndexError:
return None


def resolve_nested_typedef_name(
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
):
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
bkojusner marked this conversation as resolved.
Show resolved Hide resolved

if index in nested_class_table:
typedef_name = []
name = typedef.TypeName

# Append the current typedef name
typedef_name.append(name)

while nested_class_table[index] in nested_class_table:
# Iterate through the typedef table to resolve the nested name
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
if table_row is None:
return typedef.TypeNamespace, tuple(typedef_name[::-1])

name = table_row.TypeName
typedef_name.append(name)
index = nested_class_table[index]

# Document the root enclosing details
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
if table_row is None:
return typedef.TypeNamespace, tuple(typedef_name[::-1])

enclosing_name = table_row.TypeName
typedef_name.append(enclosing_name)

return table_row.TypeNamespace, tuple(typedef_name[::-1])

else:
return typedef.TypeNamespace, (typedef.TypeName,)


def resolve_nested_typeref_helper(index: int, typeref: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE):
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
# If the ResolutionScope decodes to a typeRef type then it is nested
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
typeref_name = []
name = typeref.TypeName
# Not appending the current typeref name to avoid potential duplicate
bkojusner marked this conversation as resolved.
Show resolved Hide resolved

# Validate index
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
if table_row is None:
return typeref.TypeNamespace, (typeref.TypeName,)

while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
# Iterate through the typeref table to resolve the nested name
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
typeref_name.append(name)
name = table_row.TypeName
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
if table_row is None:
return typeref.TypeNamespace, tuple(typeref_name[::-1])

# Document the root enclosing details
typeref_name.append(table_row.TypeName)

return table_row.TypeNamespace, tuple(typeref_name[::-1])

else:
return typeref.TypeNamespace, (typeref.TypeName,)


def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE):
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
nested_class_table = {}

# Used to find nested classes in typedef
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
for _rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for _rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number):
for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number):

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is _rid being used here? If not, see my suggestion above

assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow)
nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index

return nested_class_table


def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]:
"""get .NET types from TypeDef and TypeRef tables"""
nested_class_table = enclosing_and_nested_classes_index_table(pe)

for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)

typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)

typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace)
yield DnType(typedef_token, typedefname, namespace=typedefnamespace)

for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)

typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe)

typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace)
yield DnType(typeref_token, typerefname, namespace=typerefnamespace)


def calculate_dotnet_token_value(table: int, rid: int) -> int:
Expand Down
12 changes: 8 additions & 4 deletions capa/features/extractors/dnfile/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@


class DnType:
def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None):
def __init__(self, token: int, class_: tuple, namespace: str = "", member: str = "", access: Optional[str] = None):
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
self.token: int = token
self.access: Optional[str] = access
self.namespace: str = namespace
self.class_: str = class_
self.class_: tuple = class_
bkojusner marked this conversation as resolved.
Show resolved Hide resolved

if member == ".ctor":
member = "ctor"
Expand Down Expand Up @@ -42,9 +42,13 @@ def __repr__(self):
return str(self)

@staticmethod
def format_name(class_: str, namespace: str = "", member: str = ""):
def format_name(class_: tuple, namespace: str = "", member: str = ""):
bkojusner marked this conversation as resolved.
Show resolved Hide resolved
if len(class_) > 1:
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
else:
class_str = "".join(class_) # Convert tuple to str
# like File::OpenRead
name: str = f"{class_}::{member}" if member else class_
name: str = f"{class_str}::{member}" if member else class_str
if namespace:
# like System.IO.File::OpenRead
name = f"{namespace}.{name}"
Expand Down
13 changes: 11 additions & 2 deletions capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@
is_dotnet_mixed_mode,
get_dotnet_managed_imports,
get_dotnet_managed_methods,
resolve_nested_typedef_name,
calculate_dotnet_token_value,
get_dotnet_unmanaged_imports,
resolve_nested_typeref_helper,
enclosing_and_nested_classes_index_table,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -92,19 +95,25 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple

def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
"""emit class features from TypeRef and TypeDef tables"""
nested_class_table = enclosing_and_nested_classes_index_table(pe)

for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
# emit internal .NET classes
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)

typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe)

token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid)
yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token)
yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token)

for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
# emit external .NET classes
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)

typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe)

token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid)
yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token)
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)


def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
Expand Down
Loading