diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index f46fcef40..b86a6e6fe 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -11,6 +11,11 @@ For a detailed view of what has changed, refer to the {uri-repo}/commits/master[ == v1.2.0 - 2021-09-?? +=== Backwards Compatibility Changes + +* Collected files are now stored as their SHA-256 hash value instead of SHA-1 ({uri-issue}389[#389]) +* The log field `shasum` now holds the SHA-256 hash value of files instead of SHA-1 ({uri-issue}389[#389]) + === Security * Backported security fixes from rdesktop to our Python C extension doing RLE processing. diff --git a/README.md b/README.md index c945208a5..75828cc0c 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ pyrdp_output/ │   ├── WinDev2108Eval.crt │   └── WinDev2108Eval.pem ├── files -│   ├── 3dc9575a72ea896a3a910af8f4e43c92939a4421 +│   ├── e91c6a5eb3ca15df5a5cb4cf4ebb6f33b2d379a3a12d7d6de8c412d4323feb4c ├── filesystems │   ├── Kimberly835337 │   │   └── device1 @@ -265,7 +265,7 @@ pyrdp_output/ ``` * `certs/` contains the certificates generated stored using the `CN` of the certificate as the file name -* `files/` contains all files captured and are deduplicated by saving them using the SHA1 hash of the content as the filename +* `files/` contains all files captured and are deduplicated by saving them using the SHA-256 hash of the content as the filename * `filesystems/` contains a recreation of the filesystem of the targets classified by session IDs. To save space on similar sessions, files are symbolic links to the actual files under `files/`. * `logs/` contains all the various logs with most in both JSON and plaintext formats: diff --git a/pyrdp/mitm/FileMapping.py b/pyrdp/mitm/FileMapping.py index 40515890b..896e20459 100644 --- a/pyrdp/mitm/FileMapping.py +++ b/pyrdp/mitm/FileMapping.py @@ -40,9 +40,13 @@ def write(self, data: bytes): self.file.write(data) self.written = True - def getSha1Hash(self): + def getShaHash(self): with open(self.dataPath, "rb") as f: - sha1 = hashlib.sha1() + # Note: In early 2022 we switched to sha256 for file hashes. If you + # want to use sha1, uncomment the next line and comment the + # other one below. + #hash = hashlib.sha1() + hash = hashlib.sha256() while True: buffer = f.read(65536) @@ -50,9 +54,9 @@ def getSha1Hash(self): if len(buffer) == 0: break - sha1.update(buffer) + hash.update(buffer) - return sha1.hexdigest() + return hash.hexdigest() def finalize(self): if self.file.closed: @@ -61,7 +65,7 @@ def finalize(self): self.log.debug("Closing file %(path)s", {"path": self.dataPath}) self.file.close() - fileHash = self.getSha1Hash() + fileHash = self.getShaHash() # Go up one directory because files are saved to outDir / tmp while we're downloading them hashPath = (self.dataPath.parents[1] / fileHash) @@ -82,7 +86,7 @@ def finalize(self): # Make the symlink relative so you can move the output folder around and it will still work. self.filesystemPath.symlink_to(Path(os.path.relpath(hashPath, self.filesystemPath.parent))) - self.log.info("SHA1 '%(path)s' = '%(shasum)s'", { + self.log.info("SHA-256 '%(path)s' = '%(shasum)s'", { "path": str(self.filesystemPath.relative_to(self.filesystemDir)), "shasum": fileHash }) diff --git a/test/test_FileMapping.py b/test/test_FileMapping.py index 58c00f77d..b2db606d1 100644 --- a/test/test_FileMapping.py +++ b/test/test_FileMapping.py @@ -22,7 +22,7 @@ def setUp(self): def createMapping(self, mkdir: MagicMock, mkstemp: MagicMock, mock_open_object): mkstemp.return_value = (1, str(self.outDir / "tmp" / "tmp_test")) mapping = FileMapping.generate("/test", self.outDir, Path("filesystems"), self.log) - mapping.getSha1Hash = Mock(return_value = self.hash) + mapping.getShaHash = Mock(return_value = self.hash) mapping.file.closed = False return mapping, mkdir, mkstemp, mock_open_object