py-pdf · exiledkingcc · Jul 22, 2023 · Sep 11, 2023 · Dec 23, 2023 · Dec 23, 2023
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -33,6 +33,7 @@
 import enum
 import hashlib
 import re
+import time
 import uuid
 import warnings
 from io import BytesIO, FileIO, IOBase
@@ -145,13 +146,6 @@ class ObjectDeletionFlag(enum.IntFlag):
     IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES
 
 
-def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str:
-    hash = hashlib.md5()
-    for block in iter(lambda: stream.read(blocksize), b""):
-        hash.update(block)
-    return hash.hexdigest()
-
-
 class PdfWriter:
     """
     Write a PDF file out, given pages produced by another class.
@@ -1223,24 +1217,34 @@ def cloneDocumentFromReader(
         )
         self.clone_document_from_reader(reader, after_page_append)
 
-    def _compute_document_identifier_from_content(self) -> ByteStringObject:
-        stream = BytesIO()
-        self._write_pdf_structure(stream)
-        stream.seek(0)
-        return ByteStringObject(_rolling_checksum(stream).encode("utf8"))
+    def _compute_document_identifier(self) -> ByteStringObject:
+        md5 = hashlib.md5()
+        md5.update(str(time.time()).encode("utf-8"))
+        md5.update(str(self.fileobj).encode("utf-8"))
+        md5.update(str(len(self._objects)).encode("utf-8"))
+        if hasattr(self, "_info"):
+            for k, v in cast(DictionaryObject, self._info.get_object()).items():
+                md5.update(f"{k}={v}".encode())
+        return ByteStringObject(md5.hexdigest().encode("utf-8"))
 
     def generate_file_identifiers(self) -> None:
         """
         Generate an identifier for the PDF that will be written.
 
         The only point of this is ensuring uniqueness. Reproducibility is not
-        required; see 14.4 "File Identifiers".
+        required;
+        When a file is first written, both identifiers shall be set to the same value.
+        If both identifiers match when a file reference is resolved, it is very
+        likely that the correct and unchanged file has been found. If only the first
+         identifier matches, a different version of the correct file has been found.
+        see 14.4 "File Identifiers".
         """
         if self._ID:
             id1 = self._ID[0]
+            id2 = self._compute_document_identifier()
         else:
-            id1 = self._compute_document_identifier_from_content()
-        id2 = self._compute_document_identifier_from_content()
+            id1 = self._compute_document_identifier()
+            id2 = ByteStringObject(id1.original_bytes)
         self._ID = ArrayObject((id1, id2))
 
     def encrypt(
@@ -1325,8 +1329,9 @@ def encrypt(
             if not use_128bit:
                 alg = EncryptAlgorithm.RC4_40
         self.generate_file_identifiers()
-        assert self._ID
-        self._encryption = Encryption.make(alg, permissions_flag, self._ID[0])
+        self._encryption = Encryption.make(
+            alg, permissions_flag, cast(ArrayObject, self._ID)[0]
+        )
         # in case call `encrypt` again
         entry = self._encryption.write_entry(user_password, owner_password)
         if self._encrypt_entry: