Skip to content

Commit

Permalink
Added maximum idle waiting time MAX_IDLE_TIME_BEFORE_CLOSE.
Browse files Browse the repository at this point in the history
  • Loading branch information
nieweiming committed Apr 26, 2021
1 parent 1d0fab0 commit a18ad4f
Showing 1 changed file with 29 additions and 3 deletions.
32 changes: 29 additions & 3 deletions src/scrapy_redis/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from scrapy.exceptions import DontCloseSpider
from scrapy.spiders import Spider, CrawlSpider
from collections import Iterable
import time


from . import connection, defaults
Expand All @@ -16,7 +17,10 @@ class RedisMixin(object):

# Redis client placeholder.
server = None


# 记录空闲开始的时间
spider_idle_start_time = time.time_ns()

def start_requests(self):
"""Returns a batch of start requests from redis."""
return self.next_requests()
Expand Down Expand Up @@ -140,10 +144,32 @@ def schedule_next_requests(self):
self.crawler.engine.crawl(req, spider=self)

def spider_idle(self):
"""Schedules a request if available, otherwise waits."""
# XXX: Handle a sentinel to close the spider.
"""
Schedules a request if available, otherwise waits.
or close spider when waiting seconds > MAX_IDLE_TIME_BEFORE_CLOSE.
MAX_IDLE_TIME_BEFORE_CLOSE will not affect SCHEDULER_IDLE_BEFORE_CLOSE.
-------------
当空闲等待的时间大于IDLE_TIME_BEFORE_CLOSE时, 关闭爬虫.
MAX_IDLE_TIME_BEFORE_CLOSE 不会影响SCHEDULER_IDLE_BEFORE_CLOSE的使用.
"""

if self.server is not None:
if self.count_start_urls() > 0:
self.spider_idle_start_time = time.time_ns()
self.schedule_next_requests()

_idle_time = self.settings.getint("MAX_IDLE_TIME_BEFORE_CLOSE")
if _idle_time != 0:
_idle_time_ns = _idle_time * 10**9
if (time.time_ns() - self.spider_idle_start_time) > _idle_time_ns:
return
raise DontCloseSpider

def count_start_urls(self):
"""统计start_urls的数量"""
use_set = self.settings.getbool('REDIS_START_URLS_AS_SET', defaults.START_URLS_AS_SET)
count_size = self.server.scard if use_set else self.server.llen
return count_size(self.redis_key)


class RedisSpider(RedisMixin, Spider):
Expand Down

0 comments on commit a18ad4f

Please sign in to comment.