Skip to content

Commit

Permalink
core: improve typing, fix some type assignments, docstrings use pathl…
Browse files Browse the repository at this point in the history
…ib in some places
  • Loading branch information
ACA committed Mar 3, 2024
1 parent 522978b commit 02f44f4
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 12 deletions.
5 changes: 3 additions & 2 deletions lncrawl/core/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import shutil
from pathlib import Path
from threading import Thread
from typing import Dict, List, Optional, Tuple
from urllib.parse import urlparse
Expand Down Expand Up @@ -167,7 +168,7 @@ def start_download(self):
fetch_chapter_images(self)
save_metadata(self, True)

if not self.output_formats.get("json", False):
if not self.output_formats.get(OutputFormat.json.value, False):
shutil.rmtree(os.path.join(self.output_path, "json"), ignore_errors=True)

if self.can_do("logout"):
Expand Down Expand Up @@ -233,7 +234,7 @@ def compress_books(self, archive_singles=False):
logger.info("Not archiving single file inside %s" % root_dir)
archived_file = os.path.join(root_dir, file_list[0])
else:
base_path = os.path.join(self.output_path, output_name)
base_path = Path(self.output_path) / output_name
logger.info("Compressing %s to %s" % (root_dir, base_path))
archived_file = shutil.make_archive(
base_path,
Expand Down
6 changes: 4 additions & 2 deletions lncrawl/core/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,17 @@ def download_chapters(
unit="item",
fail_fast=fail_fast,
)
chapter = None
for (index, future) in futures.items():
try:
chapter = chapters[index]
chapter.body = future.result()
self.extract_chapter_images(chapter)
chapter.success = True
except Exception as e:
chapter.body = ""
chapter.success = False
if isinstance(chapter, Chapter):
chapter.body = ""
chapter.success = False
if isinstance(e, KeyboardInterrupt):
break
finally:
Expand Down
4 changes: 2 additions & 2 deletions lncrawl/core/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ def fetch_chapter_body(app):
old_chapter = json.load(file)
chapter.update(**old_chapter)
except FileNotFoundError:
logger.info("Missing File: %s Retrieved!" % (file_name))
logger.info("Missing File: %s Retrieved!" % file_name)
except json.JSONDecodeError:
logger.info("Unable to decode JSON from the file: %s" % (file_name))
logger.info("Unable to decode JSON from the file: %s" % file_name)
except Exception as e:
logger.exception("An error occurred while reading the file:", e)

Expand Down
4 changes: 2 additions & 2 deletions lncrawl/core/logconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def configure_logging():
}
if not log_file:
del config["handlers"]["file"]
config["root"]["level"] = level
config["root"]["level"] = logging.getLevelName(level)
config["root"]["handlers"] = ["console"]
config["handlers"]["console"]["level"] = level
config["handlers"]["console"]["level"] = logging.getLevelName(level)

logging.config.dictConfig(config)
3 changes: 2 additions & 1 deletion lncrawl/core/novel_info.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import math
import os
import re
from pathlib import Path
from typing import Dict

from .. import constants as C
Expand Down Expand Up @@ -109,5 +110,5 @@ def save_metadata(app, completed=False):
)

os.makedirs(app.output_path, exist_ok=True)
file_name = os.path.join(app.output_path, C.META_FILE_NAME)
file_name = Path(app.output_path) / C.META_FILE_NAME
novel.to_json(file_name, encoding="utf-8", indent=2)
4 changes: 2 additions & 2 deletions lncrawl/core/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def __find_proxies():
__proxy_list.setdefault(scheme, [])
if __proxy_visited_at.get(url, 0) + __proxy_ttl < time.time():
__validate_and_add(scheme, ip, url)
__proxy_visited_at[url] = time.time()
__proxy_visited_at[url] = int(time.time())

wait_times = 3 * 60
while wait_times and not __has_exit:
Expand All @@ -178,6 +178,6 @@ def start_proxy_fetcher():
Thread(target=__find_proxies, daemon=False).start()


def stop_proxy_fetcher():
def stop_proxy_fetcher(*args, **kwargs):
global __has_exit
__has_exit = True
3 changes: 2 additions & 1 deletion lncrawl/core/taskman.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def domain_gate(self, hostname: str = ""):
"""Limit number of entry per hostname.
Args:
url: A fully qualified url.
hostname: A fully qualified url.
Returns:
A semaphore object to wait.
Expand Down Expand Up @@ -179,6 +179,7 @@ def resolve_futures(
disable_bar: Hides the progress bar if True.
desc: The progress bar description
unit: The progress unit name
fail_fast: Fail on first error
"""
if not futures:
return
Expand Down

0 comments on commit 02f44f4

Please sign in to comment.