Skip to content

Commit

Permalink
detect and use third-party analysis backends when possible (#2380)
Browse files Browse the repository at this point in the history
* introduce script to detect 3P backends

ref #2376

* add idalib backend

* binary ninja: search for API using XDG desktop entry

ref #2376

* binja: search more XDG locations for desktop entry

* binary ninja: optimize embedded PE scanning

closes #2397

* add script for comparing the performance of analysis backends
  • Loading branch information
williballenthin authored Sep 26, 2024
1 parent 12337be commit bcd57a9
Show file tree
Hide file tree
Showing 14 changed files with 820 additions and 74 deletions.
3 changes: 3 additions & 0 deletions .github/pyinstaller/pyinstaller.spec
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ a = Analysis(
"qt5",
"pyqtwebengine",
"pyasn1",
# don't pull in Binary Ninja/IDA bindings that should
# only be installed locally.
"binaryninja",
"ida",
],
)

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,4 @@ Pipfile.lock
.github/binja/download_headless.py
.github/binja/BinaryNinja-headless.zip
justfile
data/
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### New Features

- add IDA v9.0 backend via idalib #2376 @williballenthin
- locate Binary Ninja API using XDG Desktop Entries #2376 @williballenthin

### Breaking Changes

### New Rules (7)
Expand Down
54 changes: 10 additions & 44 deletions capa/features/extractors/binja/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import struct
from typing import Tuple, Iterator

from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
Expand All @@ -20,56 +18,24 @@
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name


def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]:
"""check segment for embedded PE
adapted for binja from:
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
"""
mz_xor = [
(
capa.features.extractors.helpers.xor_static(b"MZ", i),
capa.features.extractors.helpers.xor_static(b"PE", i),
i,
)
for i in range(256)
]

todo = []
# If this is the first segment of the binary, skip the first bytes. Otherwise, there will always be a matched
# PE at the start of the binaryview.
start = seg.start
if bv.view_type == "PE" and start == bv.start:
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
"""check segment for embedded PE"""
start = 0
if bv.view_type == "PE" and seg.start == bv.start:
# If this is the first segment of the binary, skip the first bytes.
# Otherwise, there will always be a matched PE at the start of the binaryview.
start += 1

for mzx, pex, i in mz_xor:
for off, _ in bv.find_all_data(start, seg.end, mzx):
todo.append((off, mzx, pex, i))

while len(todo):
off, mzx, pex, i = todo.pop()

# The MZ header has one field we will check e_lfanew is at 0x3c
e_lfanew = off + 0x3C

if seg.end < (e_lfanew + 4):
continue

newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(bv.read(e_lfanew, 4), i))[0]

peoff = off + newoff
if seg.end < (peoff + 2):
continue
buf = bv.read(seg.start, seg.length)

if bv.read(peoff, 2) == pex:
yield off, i
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)


def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
"""extract embedded PE features"""
for seg in bv.segments:
for ea, _ in check_segment_for_pe(bv, seg):
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
yield from check_segment_for_pe(bv, seg)


def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
Expand Down
160 changes: 152 additions & 8 deletions capa/features/extractors/binja/find_binja_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,175 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import sys
import logging
import subprocess
import importlib.util
from typing import Optional
from pathlib import Path

logger = logging.getLogger(__name__)


# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
# to find out the path of the binaryninja module that has been installed.
# Note, including the binaryninja module in the `pyinstaller.spec` would not work, since the binaryninja module tries to
# find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the
# binaryninja module is extracted by the PyInstaller.
code = r"""
CODE = r"""
from pathlib import Path
from importlib import util
spec = util.find_spec('binaryninja')
if spec is not None:
if len(spec.submodule_search_locations) > 0:
path = Path(spec.submodule_search_locations[0])
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
print(str(path.parent).encode('utf8').hex())
path = Path(spec.submodule_search_locations[0])
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
print(str(path.parent).encode('utf8').hex())
"""


def find_binja_path() -> Path:
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
return Path(bytes.fromhex(raw_output).decode("utf8"))
def find_binaryninja_path_via_subprocess() -> Optional[Path]:
raw_output = subprocess.check_output(["python", "-c", CODE]).decode("ascii").strip()
output = bytes.fromhex(raw_output).decode("utf8")
if not output.strip():
return None
return Path(output)


def get_desktop_entry(name: str) -> Optional[Path]:
"""
Find the path for the given XDG Desktop Entry name.
Like:
>> get_desktop_entry("com.vector35.binaryninja.desktop")
Path("~/.local/share/applications/com.vector35.binaryninja.desktop")
"""
assert sys.platform in ("linux", "linux2")
assert name.endswith(".desktop")

data_dirs = os.environ.get("XDG_DATA_DIRS", "/usr/share") + f":{Path.home()}/.local/share"
for data_dir in data_dirs.split(":"):
applications = Path(data_dir) / "applications"
for application in applications.glob("*.desktop"):
if application.name == name:
return application

return None


def get_binaryninja_path(desktop_entry: Path) -> Optional[Path]:
# from: Exec=/home/wballenthin/software/binaryninja/binaryninja %u
# to: /home/wballenthin/software/binaryninja/
for line in desktop_entry.read_text(encoding="utf-8").splitlines():
if not line.startswith("Exec="):
continue

if not line.endswith("binaryninja %u"):
continue

binaryninja_path = Path(line[len("Exec=") : -len("binaryninja %u")])
if not binaryninja_path.exists():
return None

return binaryninja_path

return None


def validate_binaryninja_path(binaryninja_path: Path) -> bool:
if not binaryninja_path:
return False

module_path = binaryninja_path / "python"
if not module_path.is_dir():
return False

if not (module_path / "binaryninja" / "__init__.py").is_file():
return False

return True


def find_binaryninja() -> Optional[Path]:
binaryninja_path = find_binaryninja_path_via_subprocess()
if not binaryninja_path or not validate_binaryninja_path(binaryninja_path):
if sys.platform == "linux" or sys.platform == "linux2":
# ok
logger.debug("detected OS: linux")
elif sys.platform == "darwin":
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
return False
elif sys.platform == "win32":
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
return False
else:
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
return False

desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
if not desktop_entry:
logger.debug("failed to find Binary Ninja application")
return None
logger.debug("found Binary Ninja application: %s", desktop_entry)

binaryninja_path = get_binaryninja_path(desktop_entry)
if not binaryninja_path:
logger.debug("failed to determine Binary Ninja installation path")
return None

if not validate_binaryninja_path(binaryninja_path):
logger.debug("failed to validate Binary Ninja installation")
return None

logger.debug("found Binary Ninja installation: %s", binaryninja_path)

return binaryninja_path / "python"


def is_binaryninja_installed() -> bool:
"""Is the binaryninja module ready to import?"""
try:
return importlib.util.find_spec("binaryninja") is not None
except ModuleNotFoundError:
return False


def has_binaryninja() -> bool:
if is_binaryninja_installed():
logger.debug("found installed Binary Ninja API")
return True

logger.debug("Binary Ninja API not installed, searching...")

binaryninja_path = find_binaryninja()
if not binaryninja_path:
logger.debug("failed to find Binary Ninja installation")

logger.debug("found Binary Ninja API: %s", binaryninja_path)
return binaryninja_path is not None


def load_binaryninja() -> bool:
try:
import binaryninja

return True
except ImportError:
binaryninja_path = find_binaryninja()
if not binaryninja_path:
return False

sys.path.append(binaryninja_path.absolute().as_posix())
try:
import binaryninja # noqa: F401 unused import

return True
except ImportError:
return False


if __name__ == "__main__":
print(find_binja_path())
print(find_binaryninja_path_via_subprocess())
5 changes: 3 additions & 2 deletions capa/features/extractors/ida/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import List, Tuple, Iterator

import idaapi
import ida_nalt

import capa.ida.helpers
import capa.features.extractors.elf
Expand All @@ -32,7 +31,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
def __init__(self):
super().__init__(
hashes=SampleHashes(
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
md5=capa.ida.helpers.retrieve_input_file_md5(),
sha1="(unknown)",
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
)
)
self.global_features: List[Tuple[Feature, Address]] = []
Expand Down
Loading

0 comments on commit bcd57a9

Please sign in to comment.