Skip to content

Commit

Permalink
SEC: Use secrets instead of random (#1748)
Browse files Browse the repository at this point in the history
The implication of this is that PDFs which were encrypted with pypdf before might be less secure than they should be.

It's unclear to me if "less secure" means "insecure" or if it's just a theoretical advantage.
  • Loading branch information
MartinThoma authored Mar 26, 2023
1 parent b0d92b3 commit c75bb16
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 10 deletions.
18 changes: 13 additions & 5 deletions pypdf/_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.

import hashlib
import random
import secrets
import struct
from enum import IntEnum
from typing import Any, Dict, Optional, Tuple, Union, cast
Expand Down Expand Up @@ -56,6 +56,14 @@ class CryptIdentity(CryptBase):
pass


def _randrange(lower_inclusive: int, upper_exclusive: int) -> int:
return secrets.choice(range(lower_inclusive, upper_exclusive))


def _randint(lower_inclusive: int, upper_inclusive: int) -> int:
return secrets.choice(range(lower_inclusive, upper_inclusive + 1))


try:
from Crypto.Cipher import AES, ARC4 # type: ignore[import]
from Crypto.Util.Padding import pad # type: ignore[import]
Expand All @@ -75,7 +83,7 @@ def __init__(self, key: bytes) -> None:
self.key = key

def encrypt(self, data: bytes) -> bytes:
iv = bytes(bytearray(random.randint(0, 255) for _ in range(16)))
iv = bytes(bytearray(_randint(0, 255) for _ in range(16)))
p = 16 - len(data) % 16
data += bytes(bytearray(p for _ in range(p)))
aes = AES.new(self.key, AES.MODE_CBC, iv)
Expand Down Expand Up @@ -773,7 +781,7 @@ def compute_U_value(password: bytes, key: bytes) -> Tuple[bytes, bytes]:
Returns:
A tuple (u-value, ue value)
"""
random_bytes = bytes(random.randrange(0, 256) for _ in range(16))
random_bytes = bytes(_randrange(0, 256) for _ in range(16))
val_salt = random_bytes[:8]
key_salt = random_bytes[8:]
u_value = hashlib.sha256(password + val_salt).digest() + val_salt + key_salt
Expand Down Expand Up @@ -816,7 +824,7 @@ def compute_O_value(
Returns:
A tuple (O value, OE value)
"""
random_bytes = bytes(random.randrange(0, 256) for _ in range(16))
random_bytes = bytes(_randrange(0, 256) for _ in range(16))
val_salt = random_bytes[:8]
key_salt = random_bytes[8:]
o_value = (
Expand Down Expand Up @@ -861,7 +869,7 @@ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
The perms value
"""
b8 = b"T" if metadata_encrypted else b"F"
rr = bytes(random.randrange(0, 256) for _ in range(4))
rr = bytes(_randrange(0, 256) for _ in range(4))
data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
perms = AES_ECB_encrypt(key, data)
return perms
Expand Down
7 changes: 5 additions & 2 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
import decimal
import enum
import logging
import random
import re
import secrets
import struct
import time
import uuid
Expand Down Expand Up @@ -1072,10 +1072,13 @@ def encrypt(
V = 1
rev = 2
keylen = int(40 / 8)
secrets_generator = secrets.SystemRandom()
P = permissions_flag
O = ByteStringObject(_alg33(owner_password, user_password, rev, keylen)) # type: ignore[arg-type] # noqa
ID_1 = ByteStringObject(md5((repr(time.time())).encode("utf8")).digest())
ID_2 = ByteStringObject(md5((repr(random.random())).encode("utf8")).digest())
ID_2 = ByteStringObject(
md5((repr(secrets_generator.uniform(0, 1))).encode("utf8")).digest()
)
self._ID = ArrayObject((ID_1, ID_2))
if rev == 2:
U, key = _alg34(user_password, O, P, ID_1)
Expand Down
14 changes: 13 additions & 1 deletion tests/test_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pypdf
from pypdf import PasswordType, PdfReader
from pypdf._encryption import AlgV5, CryptRC4
from pypdf._encryption import AlgV5, CryptRC4, _randint, _randrange
from pypdf.errors import DependencyError, PdfReadError

try:
Expand Down Expand Up @@ -205,3 +205,15 @@ def test_generate_values():
"/OE": values["/OE"],
"/Perms": values["/Perms"],
}


def test_randrange():
# This might randomly fail in very rare cases
random_set = {_randrange(0, 10) for _ in range(1000)}
assert random_set == {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}


def test_randint():
# This might randomly fail in very rare cases
random_set = {_randint(0, 10) for _ in range(1000)}
assert random_set == {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
4 changes: 2 additions & 2 deletions tests/test_page.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Test the pypdf._page module."""
import json
import random
from copy import deepcopy
from io import BytesIO
from pathlib import Path
from random import shuffle
from typing import List, Tuple

import pytest
Expand Down Expand Up @@ -915,7 +915,7 @@ def test_merge_page_reproducible_with_proc_set():
ordered = sorted(NameObject(f"/{x}") for x in range(20))

shuffled = list(ordered)
random.shuffle(shuffled)
shuffle(shuffled)

# each page has some overlap in their /ProcSet, and they're in a weird order
page1[NameObject("/Resources")][NameObject("/ProcSet")] = ArrayObject(shuffled[:15])
Expand Down

0 comments on commit c75bb16

Please sign in to comment.