Skip to content

Commit

Permalink
chore(refactor): Move get_spider_list function into caller
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jul 19, 2024
1 parent 7387214 commit 8407b0f
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 81 deletions.
77 changes: 0 additions & 77 deletions scrapyd/utils.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,8 @@
import os
import sys
from subprocess import PIPE, Popen
from typing import ClassVar
from urllib.parse import urlsplit

from scrapy.utils.misc import load_object

from scrapyd.config import Config
from scrapyd.exceptions import RunnerError
from scrapyd.sqlite import JsonSqliteDict


class UtilsCache:
# array of project name that need to be invalided
invalid_cached_projects: ClassVar = []

def __init__(self):
self.cache_manager = JsonSqliteDict(table="utils_cache_manager")

# Invalid the spider's list's cache of a given project (by name)
@staticmethod
def invalid_cache(project):
UtilsCache.invalid_cached_projects.append(project)

def __getitem__(self, key):
for p in UtilsCache.invalid_cached_projects:
if p in self.cache_manager:
del self.cache_manager[p]
UtilsCache.invalid_cached_projects[:] = []
return self.cache_manager[key]

def __setitem__(self, key, value):
self.cache_manager[key] = value

def __repr__(self):
return f"UtilsCache(cache_manager={self.cache_manager!r})"


def get_spider_queues(config):
"""Return a dict of Spider Queues keyed by project name"""
Expand Down Expand Up @@ -89,50 +56,6 @@ def native_stringify_dict(dct_or_tuples, encoding="utf-8", *, keys_only=True):
return d


def get_spider_list(project, runner=None, pythonpath=None, version=None):
"""Return the spider list from the given project, using the given runner"""

# UtilsCache uses JsonSqliteDict, which encodes the project's value as JSON, but JSON allows only string keys,
# so the stored dict will have a "null" key, instead of a None key.
if version is None:
version = ""

if "cache" not in get_spider_list.__dict__:
get_spider_list.cache = UtilsCache()
try:
return get_spider_list.cache[project][version]
except KeyError:
pass

if runner is None:
runner = Config().get("runner")

env = os.environ.copy()
env["PYTHONIOENCODING"] = "UTF-8"
env["SCRAPY_PROJECT"] = project
if pythonpath:
env["PYTHONPATH"] = pythonpath
if version:
env["SCRAPYD_EGG_VERSION"] = version
pargs = [sys.executable, "-m", runner, "list", "-s", "LOG_STDOUT=0"]
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
if proc.returncode:
msg = err or out or ""
msg = msg.decode("utf8")
raise RunnerError(msg)

spiders = out.decode("utf-8").splitlines()
try:
project_cache = get_spider_list.cache[project]
project_cache[version] = spiders
except KeyError:
project_cache = {version: spiders}
get_spider_list.cache[project] = project_cache

return spiders


def to_native_str(text, encoding="utf-8", errors="strict"):
if isinstance(text, str):
return text
Expand Down
79 changes: 77 additions & 2 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@

import functools
import json
import os
import sys
import traceback
import uuid
import zipfile
from copy import copy
from io import BytesIO
from subprocess import PIPE, Popen
from typing import ClassVar

from twisted.python import log
from twisted.web import error, http, resource

from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError
from scrapyd.config import Config
from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError, RunnerError
from scrapyd.jobstorage import job_items_url, job_log_url
from scrapyd.utils import UtilsCache, get_spider_list, native_stringify_dict
from scrapyd.sqlite import JsonSqliteDict
from scrapyd.utils import native_stringify_dict


def param(
Expand Down Expand Up @@ -53,6 +58,76 @@ def wrapper(self, txrequest, *args, **kwargs):
return decorator


def get_spider_list(project, runner=None, pythonpath=None, version=None):
"""Return the spider list from the given project, using the given runner"""

# UtilsCache uses JsonSqliteDict, which encodes the project's value as JSON, but JSON allows only string keys,
# so the stored dict will have a "null" key, instead of a None key.
if version is None:
version = ""

if "cache" not in get_spider_list.__dict__:
get_spider_list.cache = UtilsCache()
try:
return get_spider_list.cache[project][version]
except KeyError:
pass

if runner is None:
runner = Config().get("runner")

env = os.environ.copy()
env["PYTHONIOENCODING"] = "UTF-8"
env["SCRAPY_PROJECT"] = project
if pythonpath:
env["PYTHONPATH"] = pythonpath
if version:
env["SCRAPYD_EGG_VERSION"] = version
pargs = [sys.executable, "-m", runner, "list", "-s", "LOG_STDOUT=0"]
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
if proc.returncode:
msg = err or out or ""
msg = msg.decode("utf8")
raise RunnerError(msg)

spiders = out.decode("utf-8").splitlines()
try:
project_cache = get_spider_list.cache[project]
project_cache[version] = spiders
except KeyError:
project_cache = {version: spiders}
get_spider_list.cache[project] = project_cache

return spiders


class UtilsCache:
# array of project name that need to be invalided
invalid_cached_projects: ClassVar = []

def __init__(self):
self.cache_manager = JsonSqliteDict(table="utils_cache_manager")

# Invalid the spider's list's cache of a given project (by name)
@staticmethod
def invalid_cache(project):
UtilsCache.invalid_cached_projects.append(project)

def __getitem__(self, key):
for p in UtilsCache.invalid_cached_projects:
if p in self.cache_manager:
del self.cache_manager[p]
UtilsCache.invalid_cached_projects[:] = []
return self.cache_manager[key]

def __setitem__(self, key, value):
self.cache_manager[key] = value

def __repr__(self):
return f"UtilsCache(cache_manager={self.cache_manager!r})"


class JsonResource(resource.Resource):
json_encoder = json.JSONEncoder()

Expand Down
4 changes: 2 additions & 2 deletions tests/test_webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from scrapyd.exceptions import DirectoryTraversalError, RunnerError
from scrapyd.interfaces import IEggStorage
from scrapyd.jobstorage import Job
from scrapyd.utils import UtilsCache, get_spider_list
from scrapyd.webservice import UtilsCache, get_spider_list


def fake_list_jobs(*args, **kwargs):
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_failed_spider_list(app):
with pytest.raises(RunnerError) as exc:
get_spider_list("mybot3", pythonpath=get_pythonpath_scrapyd())

assert re.search(r"Exception: This should break the `scrapy list` command$", str(exc.value))
assert re.search(f"Exception: This should break the `scrapy list` command{os.linesep}$", str(exc.value))


def test_list_spiders(txrequest, site_no_egg):
Expand Down

0 comments on commit 8407b0f

Please sign in to comment.