diff --git a/corpusama/source/pdf.py b/corpusama/source/pdf.py index cac77d0..f29c81e 100644 --- a/corpusama/source/pdf.py +++ b/corpusama/source/pdf.py @@ -100,7 +100,7 @@ def _try_extract(file: str, clean: bool, n: int = 0) -> None: text = extract_text(file, clean) with open(file.with_suffix(".txt"), "w") as f: f.write(text) - except (fitz.fitz.FileDataError, RuntimeError) as e: + except (fitz.fitz.FileDataError, RuntimeError, fitz.mupdf.FzErrorFormat) as e: logger.warning(f"{n} - {file} - {e}")