From 277643f0bf9892790e61f5369108a78dbef7e37f Mon Sep 17 00:00:00 2001 From: exiledkingcc Date: Fri, 28 Jul 2023 04:12:07 +0800 Subject: [PATCH] ENH: Use `cryptography` for encryption/decryption as a fallback for PyCryptodome (#2000) Use the [`cryptography` package](https://pypi.org/project/cryptography/) (maintained by the Python Cryptographic Authority, pyca) as a fallback if [PyCryptodome](https://pypi.org/project/pycryptodome/) (maintained by Legrandin / Helder Eijs) is not installed. Closes #1998 --- .github/workflows/github-ci.yaml | 14 ++- pypdf/_crypt_providers/__init__.py | 76 +++++++++++++ pypdf/_crypt_providers/_base.py | 38 +++++++ pypdf/_crypt_providers/_cryptography.py | 109 +++++++++++++++++++ pypdf/_crypt_providers/_fallback.py | 90 ++++++++++++++++ pypdf/_crypt_providers/_pycryptodome.py | 94 ++++++++++++++++ pypdf/_encryption.py | 137 +++--------------------- pyproject.toml | 1 + tests/test_encryption.py | 36 +++++-- 9 files changed, 459 insertions(+), 136 deletions(-) create mode 100644 pypdf/_crypt_providers/__init__.py create mode 100644 pypdf/_crypt_providers/_base.py create mode 100644 pypdf/_crypt_providers/_cryptography.py create mode 100644 pypdf/_crypt_providers/_fallback.py create mode 100644 pypdf/_crypt_providers/_pycryptodome.py diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 852d5ecb3..bf26836e5 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -22,10 +22,12 @@ jobs: strategy: matrix: python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] - use-cryptodome: [""] + use-crypto-lib: ["pycryptodome"] include: + - python-version: "3.9" + use-crypto-lib: "cryptography" - python-version: "3.10" - use-cryptodome: "false" + use-crypto-lib: "" steps: - name: Checkout Code uses: actions/checkout@v3 @@ -62,10 +64,14 @@ jobs: run: | pip install -r requirements/ci-3.11.txt if: matrix.python-version == '3.11' - - name: Remove cryptodome + - name: Remove pycryptodome run: | pip uninstall pycryptodome -y - if: matrix.use-cryptodome == 'false' + if: matrix.use-crypto-lib != 'pycryptodome' + - name: Install cryptography + run: | + pip install cryptography + if: matrix.use-crypto-lib == 'cryptography' - name: Install pypdf run: | pip install . diff --git a/pypdf/_crypt_providers/__init__.py b/pypdf/_crypt_providers/__init__.py new file mode 100644 index 000000000..d1d5cb07e --- /dev/null +++ b/pypdf/_crypt_providers/__init__.py @@ -0,0 +1,76 @@ +# Copyright (c) 2023, exiledkingcc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from pypdf._crypt_providers._base import CryptBase, CryptIdentity + +try: + from pypdf._crypt_providers._pycryptodome import ( + CryptAES, + CryptRC4, + aes_cbc_decrypt, + aes_cbc_encrypt, + aes_ecb_decrypt, + aes_ecb_encrypt, + rc4_decrypt, + rc4_encrypt, + ) +except ImportError: + try: + from pypdf._crypt_providers._cryptography import ( # type: ignore + CryptAES, + CryptRC4, + aes_cbc_decrypt, + aes_cbc_encrypt, + aes_ecb_decrypt, + aes_ecb_encrypt, + rc4_decrypt, + rc4_encrypt, + ) + except ImportError: + from pypdf._crypt_providers._fallback import ( # type: ignore + CryptAES, + CryptRC4, + aes_cbc_decrypt, + aes_cbc_encrypt, + aes_ecb_decrypt, + aes_ecb_encrypt, + rc4_decrypt, + rc4_encrypt, + ) + +__all__ = [ + "CryptBase", + "CryptIdentity", + "CryptRC4", + "CryptAES", + "rc4_encrypt", + "rc4_decrypt", + "aes_ecb_encrypt", + "aes_ecb_decrypt", + "aes_cbc_encrypt", + "aes_cbc_decrypt", +] diff --git a/pypdf/_crypt_providers/_base.py b/pypdf/_crypt_providers/_base.py new file mode 100644 index 000000000..894025f3b --- /dev/null +++ b/pypdf/_crypt_providers/_base.py @@ -0,0 +1,38 @@ +# Copyright (c) 2023, exiledkingcc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +class CryptBase: + def encrypt(self, data: bytes) -> bytes: # pragma: no cover + return data + + def decrypt(self, data: bytes) -> bytes: # pragma: no cover + return data + + +class CryptIdentity(CryptBase): + pass diff --git a/pypdf/_crypt_providers/_cryptography.py b/pypdf/_crypt_providers/_cryptography.py new file mode 100644 index 000000000..b7e126da0 --- /dev/null +++ b/pypdf/_crypt_providers/_cryptography.py @@ -0,0 +1,109 @@ +# Copyright (c) 2023, exiledkingcc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import secrets + +from cryptography.hazmat.primitives import padding # type: ignore[import] +from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4 # type: ignore[import] +from cryptography.hazmat.primitives.ciphers.base import Cipher # type: ignore[import] +from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB # type: ignore[import] + +from pypdf._crypt_providers._base import CryptBase + + +class CryptRC4(CryptBase): + def __init__(self, key: bytes) -> None: + self.cipher = Cipher(ARC4(key), mode=None) + + def encrypt(self, data: bytes) -> bytes: + encryptor = self.cipher.encryptor() + return encryptor.update(data) + encryptor.finalize() + + def decrypt(self, data: bytes) -> bytes: + decryptor = self.cipher.decryptor() + return decryptor.update(data) + decryptor.finalize() + + +class CryptAES(CryptBase): + def __init__(self, key: bytes) -> None: + self.alg = AES(key) + + def encrypt(self, data: bytes) -> bytes: + iv = secrets.token_bytes(16) + pad = padding.PKCS7(128).padder() + data = pad.update(data) + pad.finalize() + + cipher = Cipher(self.alg, CBC(iv)) + encryptor = cipher.encryptor() + return iv + encryptor.update(data) + encryptor.finalize() + + def decrypt(self, data: bytes) -> bytes: + iv = data[:16] + data = data[16:] + # for empty encrypted data + if not data: + return data + + # just for robustness, it does not happen under normal circumstances + if len(data) % 16 != 0: + pad = padding.PKCS7(128).padder() + data = pad.update(data) + pad.finalize() + + cipher = Cipher(self.alg, CBC(iv)) + decryptor = cipher.decryptor() + d = decryptor.update(data) + decryptor.finalize() + return d[: -d[-1]] + + +def rc4_encrypt(key: bytes, data: bytes) -> bytes: + encryptor = Cipher(ARC4(key), mode=None).encryptor() + return encryptor.update(data) + encryptor.finalize() + + +def rc4_decrypt(key: bytes, data: bytes) -> bytes: + decryptor = Cipher(ARC4(key), mode=None).decryptor() + return decryptor.update(data) + decryptor.finalize() + + +def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes: + encryptor = Cipher(AES(key), mode=ECB()).encryptor() + return encryptor.update(data) + encryptor.finalize() + + +def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes: + decryptor = Cipher(AES(key), mode=ECB()).decryptor() + return decryptor.update(data) + decryptor.finalize() + + +def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes: + encryptor = Cipher(AES(key), mode=CBC(iv)).encryptor() + return encryptor.update(data) + encryptor.finalize() + + +def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes: + decryptor = Cipher(AES(key), mode=CBC(iv)).decryptor() + return decryptor.update(data) + decryptor.finalize() diff --git a/pypdf/_crypt_providers/_fallback.py b/pypdf/_crypt_providers/_fallback.py new file mode 100644 index 000000000..b10319cf9 --- /dev/null +++ b/pypdf/_crypt_providers/_fallback.py @@ -0,0 +1,90 @@ +# Copyright (c) 2023, exiledkingcc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from pypdf._crypt_providers._base import CryptBase +from pypdf.errors import DependencyError + +_DEPENDENCY_ERROR_STR = "PyCryptodome is required for AES algorithm" + + +class CryptRC4(CryptBase): # type: ignore + def __init__(self, key: bytes) -> None: + self.S = bytearray(range(256)) + j = 0 + for i in range(256): + j = (j + self.S[i] + key[i % len(key)]) % 256 + self.S[i], self.S[j] = self.S[j], self.S[i] + + def encrypt(self, data: bytes) -> bytes: + S = bytearray(self.S) + out = [0 for _ in range(len(data))] + i, j = 0, 0 + for k in range(len(data)): + i = (i + 1) % 256 + j = (j + S[i]) % 256 + S[i], S[j] = S[j], S[i] + x = S[(S[i] + S[j]) % 256] + out[k] = data[k] ^ x + return bytes(bytearray(out)) + + def decrypt(self, data: bytes) -> bytes: + return self.encrypt(data) + + +class CryptAES(CryptBase): # type: ignore + def __init__(self, key: bytes) -> None: + pass + + def encrypt(self, data: bytes) -> bytes: + raise DependencyError(_DEPENDENCY_ERROR_STR) + + def decrypt(self, data: bytes) -> bytes: + raise DependencyError(_DEPENDENCY_ERROR_STR) + + +def rc4_encrypt(key: bytes, data: bytes) -> bytes: + return CryptRC4(key).encrypt(data) + + +def rc4_decrypt(key: bytes, data: bytes) -> bytes: + return CryptRC4(key).decrypt(data) + + +def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes: + raise DependencyError(_DEPENDENCY_ERROR_STR) + + +def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes: + raise DependencyError(_DEPENDENCY_ERROR_STR) + + +def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes: + raise DependencyError(_DEPENDENCY_ERROR_STR) + + +def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes: + raise DependencyError(_DEPENDENCY_ERROR_STR) diff --git a/pypdf/_crypt_providers/_pycryptodome.py b/pypdf/_crypt_providers/_pycryptodome.py new file mode 100644 index 000000000..33310670f --- /dev/null +++ b/pypdf/_crypt_providers/_pycryptodome.py @@ -0,0 +1,94 @@ +# Copyright (c) 2023, exiledkingcc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import secrets + +from Crypto.Cipher import AES, ARC4 # type: ignore[import] +from Crypto.Util.Padding import pad # type: ignore[import] + +from pypdf._crypt_providers._base import CryptBase + + +class CryptRC4(CryptBase): + def __init__(self, key: bytes) -> None: + self.key = key + + def encrypt(self, data: bytes) -> bytes: + return ARC4.ARC4Cipher(self.key).encrypt(data) + + def decrypt(self, data: bytes) -> bytes: + return ARC4.ARC4Cipher(self.key).decrypt(data) + + +class CryptAES(CryptBase): + def __init__(self, key: bytes) -> None: + self.key = key + + def encrypt(self, data: bytes) -> bytes: + iv = secrets.token_bytes(16) + data = pad(data, 16) + aes = AES.new(self.key, AES.MODE_CBC, iv) + return iv + aes.encrypt(data) + + def decrypt(self, data: bytes) -> bytes: + iv = data[:16] + data = data[16:] + # for empty encrypted data + if not data: + return data + + # just for robustness, it does not happen under normal circumstances + if len(data) % 16 != 0: + data = pad(data, 16) + + aes = AES.new(self.key, AES.MODE_CBC, iv) + d = aes.decrypt(data) + return d[: -d[-1]] + + +def rc4_encrypt(key: bytes, data: bytes) -> bytes: + return ARC4.ARC4Cipher(key).encrypt(data) + + +def rc4_decrypt(key: bytes, data: bytes) -> bytes: + return ARC4.ARC4Cipher(key).decrypt(data) + + +def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes: + return AES.new(key, AES.MODE_ECB).encrypt(data) + + +def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes: + return AES.new(key, AES.MODE_ECB).decrypt(data) + + +def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes: + return AES.new(key, AES.MODE_CBC, iv).encrypt(data) + + +def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes: + return AES.new(key, AES.MODE_CBC, iv).decrypt(data) diff --git a/pypdf/_encryption.py b/pypdf/_encryption.py index b7a65a9ee..ef7affc74 100644 --- a/pypdf/_encryption.py +++ b/pypdf/_encryption.py @@ -30,8 +30,20 @@ from enum import Enum, IntEnum from typing import Any, Dict, Optional, Tuple, Union, cast +from pypdf._crypt_providers import ( + CryptAES, + CryptBase, + CryptIdentity, + CryptRC4, +) +from pypdf._crypt_providers import aes_cbc_decrypt as AES_CBC_decrypt # noqa: N812 +from pypdf._crypt_providers import aes_cbc_encrypt as AES_CBC_encrypt # noqa: N812 +from pypdf._crypt_providers import aes_ecb_decrypt as AES_ECB_decrypt # noqa: N812 +from pypdf._crypt_providers import aes_ecb_encrypt as AES_ECB_encrypt # noqa: N812 +from pypdf._crypt_providers import rc4_decrypt as RC4_decrypt # noqa: N812 +from pypdf._crypt_providers import rc4_encrypt as RC4_encrypt # noqa: N812 + from ._utils import logger_warning -from .errors import DependencyError from .generic import ( ArrayObject, ByteStringObject, @@ -45,129 +57,6 @@ ) -class CryptBase: - def encrypt(self, data: bytes) -> bytes: # pragma: no cover - return data - - def decrypt(self, data: bytes) -> bytes: # pragma: no cover - return data - - -class CryptIdentity(CryptBase): - pass - - -try: - from Crypto.Cipher import AES, ARC4 # type: ignore[import] - from Crypto.Util.Padding import pad # type: ignore[import] - - class CryptRC4(CryptBase): - def __init__(self, key: bytes) -> None: - self.key = key - - def encrypt(self, data: bytes) -> bytes: - return ARC4.ARC4Cipher(self.key).encrypt(data) - - def decrypt(self, data: bytes) -> bytes: - return ARC4.ARC4Cipher(self.key).decrypt(data) - - class CryptAES(CryptBase): - def __init__(self, key: bytes) -> None: - self.key = key - - def encrypt(self, data: bytes) -> bytes: - iv = secrets.token_bytes(16) - p = 16 - len(data) % 16 - data += bytes(bytearray(p for _ in range(p))) - aes = AES.new(self.key, AES.MODE_CBC, iv) - return iv + aes.encrypt(data) - - def decrypt(self, data: bytes) -> bytes: - if len(data) == 0: - return data - iv = data[:16] - data = data[16:] - aes = AES.new(self.key, AES.MODE_CBC, iv) - if len(data) % 16: - data = pad(data, 16) - d = aes.decrypt(data) - if len(d) == 0: - return d - else: - return d[: -d[-1]] - - def RC4_encrypt(key: bytes, data: bytes) -> bytes: - return ARC4.ARC4Cipher(key).encrypt(data) - - def RC4_decrypt(key: bytes, data: bytes) -> bytes: - return ARC4.ARC4Cipher(key).decrypt(data) - - def AES_ECB_encrypt(key: bytes, data: bytes) -> bytes: - return AES.new(key, AES.MODE_ECB).encrypt(data) - - def AES_ECB_decrypt(key: bytes, data: bytes) -> bytes: - return AES.new(key, AES.MODE_ECB).decrypt(data) - - def AES_CBC_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes: - return AES.new(key, AES.MODE_CBC, iv).encrypt(data) - - def AES_CBC_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes: - return AES.new(key, AES.MODE_CBC, iv).decrypt(data) - -except ImportError: - - class CryptRC4(CryptBase): # type: ignore - def __init__(self, key: bytes) -> None: - self.S = list(range(256)) - j = 0 - for i in range(256): - j = (j + self.S[i] + key[i % len(key)]) % 256 - self.S[i], self.S[j] = self.S[j], self.S[i] - - def encrypt(self, data: bytes) -> bytes: - S = list(self.S) - out = [0 for _ in range(len(data))] - i, j = 0, 0 - for k in range(len(data)): - i = (i + 1) % 256 - j = (j + S[i]) % 256 - S[i], S[j] = S[j], S[i] - x = S[(S[i] + S[j]) % 256] - out[k] = data[k] ^ x - return bytes(bytearray(out)) - - def decrypt(self, data: bytes) -> bytes: - return self.encrypt(data) - - class CryptAES(CryptBase): # type: ignore - def __init__(self, key: bytes) -> None: - pass - - def encrypt(self, data: bytes) -> bytes: - raise DependencyError("PyCryptodome is required for AES algorithm") - - def decrypt(self, data: bytes) -> bytes: - raise DependencyError("PyCryptodome is required for AES algorithm") - - def RC4_encrypt(key: bytes, data: bytes) -> bytes: - return CryptRC4(key).encrypt(data) - - def RC4_decrypt(key: bytes, data: bytes) -> bytes: - return CryptRC4(key).decrypt(data) - - def AES_ECB_encrypt(key: bytes, data: bytes) -> bytes: - raise DependencyError("PyCryptodome is required for AES algorithm") - - def AES_ECB_decrypt(key: bytes, data: bytes) -> bytes: - raise DependencyError("PyCryptodome is required for AES algorithm") - - def AES_CBC_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes: - raise DependencyError("PyCryptodome is required for AES algorithm") - - def AES_CBC_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes: - raise DependencyError("PyCryptodome is required for AES algorithm") - - class CryptFilter: def __init__( self, stmCrypt: CryptBase, strCrypt: CryptBase, efCrypt: CryptBase diff --git a/pyproject.toml b/pyproject.toml index e99a9945e..425f6ecbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -192,6 +192,7 @@ ignore = [ "tests/*" = ["S101", "ANN001", "ANN201","D104", "S105", "S106", "D103", "B018", "B017"] "sample-files/*" = ["D100", "INP001"] "_encryption.py" = ["S324", "S311"] +"_cryptography.py" = ["S304", "S305"] # Use of insecure cipher / modes, aka RC4 and AES-ECB "_writer.py" = ["S324"] "make_changelog.py" = ["T201", "S603", "S607"] "json_consistency.py" = ["T201"] diff --git a/tests/test_encryption.py b/tests/test_encryption.py index f9218831a..ff33d2121 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -1,4 +1,4 @@ -"""Test the pypdf.encryption module.""" +"""Test the pypdf._encryption module.""" import secrets from pathlib import Path @@ -6,16 +6,24 @@ import pypdf from pypdf import PasswordType, PdfReader, PdfWriter -from pypdf._encryption import AlgV5, CryptRC4 +from pypdf._encryption import AlgV5, CryptAES, CryptRC4 from pypdf.errors import DependencyError, PdfReadError try: from Crypto.Cipher import AES # noqa: F401 HAS_PYCRYPTODOME = True + HAS_CRYPTOGRAPHY = False except ImportError: HAS_PYCRYPTODOME = False + try: + from cryptography.hazmat.primitives import padding # noqa: F401 + + HAS_CRYPTOGRAPHY = True + except ImportError: + HAS_CRYPTOGRAPHY = False + TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent RESOURCE_ROOT = PROJECT_ROOT / "resources" @@ -75,13 +83,13 @@ def test_encryption(name, requires_pycryptodome): Encrypted PDFs are handled correctly. This test function ensures that: - - If PyCryptodome is not available and required, a DependencyError is raised + - If PyCryptodome or cryptography is not available and required, a DependencyError is raised - Encrypted PDFs are identified correctly - Decryption works for encrypted PDFs - Metadata is properly extracted from the decrypted PDF """ inputfile = RESOURCE_ROOT / "encryption" / name - if requires_pycryptodome and not HAS_PYCRYPTODOME: + if requires_pycryptodome and not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY: with pytest.raises(DependencyError) as exc: ipdf = pypdf.PdfReader(inputfile) ipdf.decrypt("asdfzxcv") @@ -117,7 +125,7 @@ def test_encryption(name, requires_pycryptodome): ("r6-both-passwords.pdf", "foo", "bar"), ], ) -@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome") +@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography") def test_pdf_with_both_passwords(name, user_passwd, owner_passwd): """ PDFs with both user and owner passwords are handled correctly. @@ -143,7 +151,7 @@ def test_pdf_with_both_passwords(name, user_passwd, owner_passwd): ("crazyones-encrypted-256.pdf", b"password"), ], ) -@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome") +@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography") def test_read_page_from_encrypted_file_aes_256(pdffile, password): """ A page can be read from an encrypted. @@ -168,7 +176,7 @@ def test_read_page_from_encrypted_file_aes_256(pdffile, password): ), ], ) -@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome") +@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography") @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_merge_encrypted_pdfs(names): """Encrypted PDFs can be merged after decryption.""" @@ -183,6 +191,10 @@ def test_merge_encrypted_pdfs(names): merger.close() +@pytest.mark.skipif( + HAS_CRYPTOGRAPHY, + reason="Limitations of cryptography. see https://github.com/pyca/cryptography/issues/2494" +) @pytest.mark.parametrize( "cryptcls", [ @@ -265,7 +277,7 @@ def test_pdf_encrypt(pdf_file_path, alg, requires_pycryptodome): assert exc.value.args[0] == "algorithm 'ABCD' NOT supported" return - if requires_pycryptodome and not HAS_PYCRYPTODOME: + if requires_pycryptodome and not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY: with pytest.raises(DependencyError) as exc: writer.encrypt( user_password=user_password, @@ -332,3 +344,11 @@ def test_pdf_encrypt_multiple(pdf_file_path, count): page = reader.pages[0] text1 = page.extract_text() assert text0 == text1 + + +@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography") +def test_aes_decrypt_corrupted_data(): + """Just for robustness""" + aes = CryptAES(secrets.token_bytes(16)) + for num in [0, 17, 32]: + aes.decrypt(secrets.token_bytes(num))