STY: Improve language, add docstrings, fix TODOs (#1772)

py-pdf · Apr 9, 2023 · 10cc057 · 10cc057
1 parent 117ce45
commit 10cc057
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -21,23 +21,22 @@ from PDFs as well.
 
 ## Installation
 
-You can install pypdf via pip:
+Install pypdf using pip:
 
 ```
 pip install pypdf
 ```
 
-If you plan to use pypdf for encrypting or decrypting PDFs that use AES, you
-will need to install some extra dependencies. Encryption using RC4 is supported
-using the regular installation.
+For using pypdf with AES encryption or decryption, install extra dependencies:
 
 ```
 pip install pypdf[crypto]
 ```
 
-> **NOTE**: `pypdf>=3.1.0` improved a lot compared to `pyPdf<2.0.0` and compared to
-> `PyPDF2 < 2.0.0`. Please
-> read [the migration guide](https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html).
+> **NOTE**: `pypdf` 3.1.0 and above include significant improvements compared to
+> previous versions. Please refer to [the migration
+> guide](https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html) for
+> more information.
 
 ## Usage
 
@@ -51,19 +50,18 @@ text = page.extract_text()
 ```
 
 pypdf can do a lot more, e.g. splitting, merging, reading and creating
-annotations, decrypting and encrypting, and more.
+annotations, decrypting and encrypting, and more. Check out [the
+documentation](https://pypdf.readthedocs.io/en/stable/) for additional usage
+examples!
 
-Please see [the documentation](https://pypdf.readthedocs.io/en/stable/)
-for more usage examples!
-
-A lot of questions are asked and answered
-[on StackOverflow](https://stackoverflow.com/questions/tagged/pypdf)
-(formerly tagged with [PyPDF2](https://stackoverflow.com/questions/tagged/pypdf2)).
+For questions and answers, visit
+[StackOverflow](https://stackoverflow.com/questions/tagged/pypdf)
+(tagged with [pypdf](https://stackoverflow.com/questions/tagged/pypdf)).
 
 ## Contributions
 
-Maintaining pypdf is a collaborative effort. You can support pypdf by writing
-documentation, helping to narrow down issues, and adding code.
+Maintaining pypdf is a collaborative effort. You can support the project by
+writing documentation, helping to narrow down issues, and submitting code.
 
 ### Q&A
 

diff --git a/docs/user/cropping-and-transforming.md b/docs/user/cropping-and-transforming.md
@@ -1,5 +1,9 @@
 # Cropping and Transforming PDFs
 
+> **Notice**: Just because content is no longer visible, it is not gone.
+> Cropping works by adjusting the viewbox. That means content that was cropped
+> away can still be restored.
+
 ```python
 from pypdf import PdfWriter, PdfReader
 

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
@@ -1273,8 +1273,8 @@ def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
         This is equivalent to generic.IndirectObject(num,gen,self).get_object()
 
         Args:
-            num:
-            gen:
+            num: The object number of the indirect object.
+            gen: The generation number of the indirect object.
 
         Returns:
             A PdfObject

diff --git a/pypdf/constants.py b/pypdf/constants.py
@@ -306,6 +306,17 @@ class FieldDictionaryAttributes:
 
     @classmethod
     def attributes(cls) -> Tuple[str, ...]:
+        """
+        Get a tuple of all the attributes present in a Field Dictionary.
+
+        This method returns a tuple of all the attribute constants defined in
+        the FieldDictionaryAttributes class. These attributes correspond to the
+        entries that are common to all field dictionaries as specified in the
+        PDF 1.7 reference.
+
+        Returns:
+            A tuple containing all the attribute constants.
+        """
         return (
             cls.TM,
             cls.T,
@@ -321,6 +332,18 @@ def attributes(cls) -> Tuple[str, ...]:
 
     @classmethod
     def attributes_dict(cls) -> Dict[str, str]:
+        """
+        Get a dictionary of attribute keys and their human-readable names.
+
+        This method returns a dictionary where the keys are the attribute
+        constants defined in the FieldDictionaryAttributes class and the values
+        are their corresponding human-readable names. These attributes
+        correspond to the entries that are common to all field dictionaries as
+        specified in the PDF 1.7 reference.
+
+        Returns:
+            A dictionary containing attribute keys and their names.
+        """
         return {
             cls.FT: "Field Type",
             cls.Parent: "Parent",
@@ -340,10 +363,33 @@ class CheckboxRadioButtonAttributes:
 
     @classmethod
     def attributes(cls) -> Tuple[str, ...]:
+        """
+        Get a tuple of all the attributes present in a Field Dictionary.
+
+        This method returns a tuple of all the attribute constants defined in
+        the CheckboxRadioButtonAttributes class. These attributes correspond to
+        the entries that are common to all field dictionaries as specified in
+        the PDF 1.7 reference.
+
+        Returns:
+            A tuple containing all the attribute constants.
+        """
         return (cls.Opt,)
 
     @classmethod
     def attributes_dict(cls) -> Dict[str, str]:
+        """
+        Get a dictionary of attribute keys and their human-readable names.
+
+        This method returns a dictionary where the keys are the attribute
+        constants defined in the CheckboxRadioButtonAttributes class and the
+        values are their corresponding human-readable names. These attributes
+        correspond to the entries that are common to all field dictionaries as
+        specified in the PDF 1.7 reference.
+
+        Returns:
+            A dictionary containing attribute keys and their names.
+        """
         return {
             cls.Opt: "Options",
         }
@@ -381,13 +427,35 @@ class PageLayouts:
 
 
 class GraphicsStateParameters:
-    """Table 4.8 of the 1.7 reference."""
+    """Table 58 – Entries in a Graphics State Parameter Dictionary"""
 
     TYPE = "/Type"  # name, optional
     LW = "/LW"  # number, optional
-    # TODO: Many more!
+    LC = "/LC"  # integer, optional
+    LJ = "/LJ"  # integer, optional
+    ML = "/ML"  # number, optional
+    D = "/D"  # array, optional
+    RI = "/RI"  # name, optional
+    OP = "/OP"
+    op = "/op"
+    OPM = "/OPM"
     FONT = "/Font"  # array, optional
+    BG = "/BG"
+    BG2 = "/BG2"
+    UCR = "/UCR"
+    UCR2 = "/UCR2"
+    TR = "/TR"
+    TR2 = "/TR2"
+    HT = "/HT"
+    FL = "/FL"
+    SM = "/SM"
+    SA = "/SA"
+    BM = "/BM"
     S_MASK = "/SMask"  # dictionary or name, optional
+    CA = "/CA"
+    ca = "/ca"
+    AIS = "/AIS"
+    TK = "/TK"
 
 
 class CatalogDictionary:

diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -67,6 +67,19 @@
 
 
 def decompress(data: bytes) -> bytes:
+    """
+    Decompress the given data using zlib.
+
+    This function attempts to decompress the input data using zlib. If the
+    decompression fails due to a zlib error, it falls back to using a
+    decompression object with a larger window size.
+
+    Args:
+        data: The input data to be decompressed.
+
+    Returns:
+        The decompressed data.
+    """
     try:
         return zlib.decompress(data)
     except zlib.error:
@@ -195,6 +208,15 @@ def _decode_png_prediction(data: str, columns: int, rowlength: int) -> bytes:
 
     @staticmethod
     def encode(data: bytes) -> bytes:
+        """
+        Compress the input data using zlib.
+
+        Args:
+            data: The data to be compressed.
+
+        Returns:
+            The compressed data.
+        """
         return zlib.compress(data)
 
 
@@ -376,7 +398,7 @@ def decode(
         group_index = b = 0
         out = bytearray()
         for char in data:
-            if ord("!") <= char and char <= ord("u"):
+            if ord("!") <= char <= ord("u"):
                 group_index += 1
                 b = b * 85 + (char - 33)
                 if group_index == 5:
@@ -536,6 +558,23 @@ def decode(
 
 
 def decode_stream_data(stream: Any) -> Union[str, bytes]:  # utils.StreamObject
+    """
+    Decode the stream data based on the specified filters.
+
+    This function decodes the stream data using the filters provided in the
+    stream. It supports various filter types, including FlateDecode,
+    ASCIIHexDecode, LZWDecode, ASCII85Decode, DCTDecode, JPXDecode, and
+    CCITTFaxDecode.
+
+    Args:
+        stream: The input stream object containing the data and filters.
+
+    Returns:
+        The decoded stream data.
+
+    Raises:
+        NotImplementedError: If an unsupported filter type is encountered.
+    """
     filters = stream.get(SA.FILTER, ())
     if isinstance(filters, IndirectObject):
         filters = cast(ArrayObject, filters.get_object())
@@ -580,6 +619,7 @@ def decode_stream_data(stream: Any) -> Union[str, bytes]:  # utils.StreamObject
 
 
 def decodeStreamData(stream: Any) -> Union[str, bytes]:  # deprecated
+    """Deprecated. Use decode_stream_data."""
     deprecate_with_replacement("decodeStreamData", "decode_stream_data", "4.0.0")
     return decode_stream_data(stream)
 

diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
@@ -73,6 +73,7 @@
 def readHexStringFromStream(
     stream: StreamType,
 ) -> Union["TextStringObject", "ByteStringObject"]:  # deprecated
+    """Deprecated, use read_hex_string_from_stream."""
     deprecate_with_replacement(
         "readHexStringFromStream", "read_hex_string_from_stream", "4.0.0"
     )
@@ -83,6 +84,7 @@ def readStringFromStream(
     stream: StreamType,
     forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
 ) -> Union["TextStringObject", "ByteStringObject"]:  # deprecated
+    """Deprecated, use read_string_from_stream."""
     deprecate_with_replacement(
         "readStringFromStream", "read_string_from_stream", "4.0.0"
     )
@@ -93,6 +95,7 @@ def createStringObject(
     string: Union[str, bytes],
     forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
 ) -> Union[TextStringObject, ByteStringObject]:  # deprecated
+    """Deprecated, use create_string_object."""
     deprecate_with_replacement("createStringObject", "create_string_object", "4.0.0")
     return create_string_object(string, forced_encoding)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -156,7 +156,6 @@ ignore = [
     "C901",
     "D101",  # Missing docstring in public class
     "D102", # Missing docstring in public method
-    "D103",  # Missing docstring in public function
     "D417",  # Missing argument descriptions in the docstring
     "FBT001", # Boolean positional arg in function definition
     "FBT002", # Boolean default value in function definition
@@ -177,7 +176,7 @@ ignore = [
 ]
 
 [tool.ruff.per-file-ignores]
-"tests/*" = ["S101", "ANN001", "ANN201","D104", "S105", "S106"]
+"tests/*" = ["S101", "ANN001", "ANN201","D104", "S105", "S106", "D103"]
 "sample-files/*" = ["D100", "INP001"]
 "_encryption.py" = ["S324", "S311"]
 "_security.py" = ["S324"]

diff --git a/sample-files b/sample-files
diff --git a/tests/test_constants.py b/tests/test_constants.py
@@ -2,7 +2,7 @@
 import re
 from typing import Callable
 
-from pypdf.constants import PDF_KEYS
+from pypdf.constants import PDF_KEYS, GraphicsStateParameters
 
 
 def test_slash_prefix():
@@ -18,11 +18,19 @@ def test_slash_prefix():
     pattern = re.compile(r"^\/[A-Z]+[a-zA-Z0-9]*$")
     for cls in PDF_KEYS:
         for attr in dir(cls):
+            # Skip magic methods
             if attr.startswith("__") and attr.endswith("__"):
                 continue
+
+            # Skip methods
             constant_value = getattr(cls, attr)
             if isinstance(constant_value, Callable):
                 continue
+
             assert constant_value.startswith("/")
-            assert pattern.match(constant_value)
             assert attr.replace("_", "").lower() == constant_value[1:].lower()
+
+            # There are a few exceptions that may be lowercase
+            if cls == GraphicsStateParameters and attr in ["ca", "op"]:
+                continue
+            assert pattern.match(constant_value)