Skip to content
This repository has been archived by the owner on Jan 12, 2023. It is now read-only.

Commit

Permalink
Ensure mirror existence check is cached correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
c-w committed Jun 26, 2018
1 parent 69a61ed commit 5a1c078
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 72 deletions.
10 changes: 2 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,11 @@ python:
- '3.5'
- '3.6'

env:
- BERKELEYDB_DIR=/usr

before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq -y libdb5.1-dev
- if [ "$TRAVIS_PYTHON_VERSION" != "2.7" ]; then sudo apt-get update -qq && sudo apt-get install -qq -y libdb5.1-dev && export BERKELEYDB_DIR=/usr; fi

install:
- pip install -U setuptools
- pip install -r requirements.pip
- pip install -r requirements-py3.pip
- python setup.py install
- pip install -r requirements-dev.pip

script:
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include requirements.pip
include requirements-py2.pip
include requirements-py3.pip
16 changes: 0 additions & 16 deletions gutenberg/_util/decorators.py

This file was deleted.

51 changes: 32 additions & 19 deletions gutenberg/acquire/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@
import os
from contextlib import closing

try:
from functools import lru_cache
except ImportError:
from functools32 import lru_cache

from requests.exceptions import RequestException
import requests

from gutenberg._domain_model.exceptions import UnknownDownloadUriException
from gutenberg._domain_model.persistence import local_path
from gutenberg._domain_model.types import validate_etextno
from gutenberg._util.decorators import execute_only_once
from gutenberg._util.os import makedirs
from gutenberg._util.os import remove

Expand Down Expand Up @@ -42,15 +47,17 @@ def _etextno_to_uri_subdirectory(etextno):
return subdir


@execute_only_once
def _check_mirror_exists(mirror):
response = requests.head(mirror)
if not response.ok:
raise UnknownDownloadUriException(
'Could not reach Gutenberg mirror "{0:s}". Try setting a '
'different mirror (https://www.gutenberg.org/MIRRORS.ALL) for '
'--mirror flag or GUTENBERG_MIRROR environment variable.'
.format(mirror))
def _does_uri_exist(uri):
try:
response = requests.head(uri)
except RequestException:
return False
return response.ok


@lru_cache(maxsize=32)
def _does_mirror_exist(mirror):
return _does_uri_exist(mirror)


def _format_download_uri_for_extension(etextno, extension, mirror=None):
Expand All @@ -59,13 +66,12 @@ def _format_download_uri_for_extension(etextno, extension, mirror=None):
text can be found via the formaturi metadata extractor.
"""
uri_root = mirror or _GUTENBERG_MIRROR
uri_root = uri_root.strip().rstrip('/')
_check_mirror_exists(uri_root)

mirror = mirror or _GUTENBERG_MIRROR
root = mirror.strip().rstrip('/')
path = _etextno_to_uri_subdirectory(etextno)

uri = '{root}/{path}/{etextno}{extension}'.format(
root=uri_root,
root=root,
path=path,
etextno=etextno,
extension=extension)
Expand All @@ -84,6 +90,14 @@ def _format_download_uri(etextno, mirror=None, prefer_ascii=False):
Raises:
UnknownDownloadUri: If no download location can be found for the text.
"""
mirror = mirror or _GUTENBERG_MIRROR
if not _does_mirror_exist(mirror):
raise UnknownDownloadUriException(
'Could not reach Gutenberg mirror "{0:s}". Try setting a '
'different mirror (https://www.gutenberg.org/MIRRORS.ALL) for '
'--mirror flag or GUTENBERG_MIRROR environment variable.'
.format(mirror))

# Check https://www.gutenberg.org/files/ for details about available
# extensions ;
# - .txt is plaintext us-ascii
Expand All @@ -93,16 +107,15 @@ def _format_download_uri(etextno, mirror=None, prefer_ascii=False):
utf8_first = ('-0.txt', '-8.txt', '.txt')
extensions = ascii_first if prefer_ascii else utf8_first
for extension in extensions:
uri = _format_download_uri_for_extension(etextno, extension)
response = requests.head(uri)
if response.ok:
uri = _format_download_uri_for_extension(etextno, extension, mirror)
if _does_uri_exist(uri):
return uri

raise UnknownDownloadUriException(
'Failed to find a textual download candidate for {0} on {1}. '
'Either the book does not exist or it is only available in '
'non-textual formats.'
.format(etextno, mirror or _GUTENBERG_MIRROR))
.format(etextno, mirror))


def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False):
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.pip
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
coverage
flake8
responses
1 change: 1 addition & 0 deletions requirements-py2.pip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
functools32>=3.2.3-2
47 changes: 18 additions & 29 deletions tests/test_acquire.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

from __future__ import absolute_import, unicode_literals
from builtins import str
from collections import namedtuple
import itertools
import unittest
import re

import responses

from gutenberg._domain_model.exceptions import UnknownDownloadUriException
from gutenberg._domain_model.vocabulary import DCTERMS
Expand Down Expand Up @@ -48,6 +50,7 @@ def test_load_etext(self):
self.assertIsInstance(etext, str)
self.assertNotIn(u'\ufffd', etext)

@responses.activate
def test_invalid_etext(self):
with self.assertRaises(UnknownDownloadUriException):
text.load_etext(1, mirror='http://example.com')
Expand All @@ -62,45 +65,27 @@ def test_load_etext(self):


class TestFailLoadEtext(unittest.TestCase):
def setUp(self):
self._original_head = text.requests.head

def tearDown(self):
text.requests.head = self._original_head

def request_head_response(self, ok=False):
response = namedtuple('Response', 'ok')

def head(*args, **kwargs):
return response(ok)
text.requests.head = head
status = 200 if ok else 404
responses.add(responses.HEAD, text._GUTENBERG_MIRROR, status=status)

@responses.activate
def test_unreachable_mirror(self):
self.request_head_response(ok=False)

with self.assertRaises(UnknownDownloadUriException):
text.load_etext(1)

class TestExtensionsLoadEtext(unittest.TestCase):
def setUp(self):
self._original_head = text.requests.head
self._original_check = text._check_mirror_exists

def tearDown(self):
text.requests.head = self._original_head
text._check_mirror_exists = self._original_check

class TestExtensionsLoadEtext(unittest.TestCase):
def request_head_response(self, valid_files):
response = namedtuple('Response', 'ok')

def head(*args, **kwargs):
req_file = args[0].split('/')[-1]
return response(req_file in valid_files)
text.requests.head = head
responses.add(responses.HEAD, text._GUTENBERG_MIRROR, status=200)

def mirror_exist(*args, **kwargs):
return response(True)
text._check_mirror_exists = mirror_exist
for valid_file in valid_files:
url = re.compile('^.*{}$'.format(valid_file))
responses.add(responses.HEAD, url, status=200)

@responses.activate
def test_extensions_order_utf8_only(self):
utf8_filename = '12345-0.txt'
self.request_head_response(valid_files=[utf8_filename])
Expand All @@ -111,6 +96,7 @@ def test_extensions_order_utf8_only(self):
extensions = text._format_download_uri(12345, prefer_ascii=False)
self.assertEqual(extensions.split('/')[-1], utf8_filename)

@responses.activate
def test_extensions_order_ascii_only(self):
ascii_filename = '12345.txt'
self.request_head_response(valid_files=[ascii_filename])
Expand All @@ -121,6 +107,7 @@ def test_extensions_order_ascii_only(self):
extensions = text._format_download_uri(12345, prefer_ascii=True)
self.assertEqual(extensions.split('/')[-1], ascii_filename)

@responses.activate
def test_extensions_order_utf8_first(self):
utf8_filename = '12345-0.txt'
all_files = ['12345.txt', '12345-8.txt', '12345-0.txt']
Expand All @@ -132,6 +119,7 @@ def test_extensions_order_utf8_first(self):
extensions = text._format_download_uri(12345, prefer_ascii=False)
self.assertEqual(extensions.split('/')[-1], utf8_filename)

@responses.activate
def test_extensions_order_ascii_first(self):
ascii_filename = '12345.txt'
all_files = ['12345-8.txt', '12345-0.txt', '12345.txt']
Expand All @@ -143,6 +131,7 @@ def test_extensions_order_ascii_first(self):
extensions = text._format_download_uri(12345, prefer_ascii=True)
self.assertEqual(extensions.split('/')[-1], ascii_filename)

@responses.activate
def test_extensions_order_eightbit_first(self):
eightbit_filename = '12345-8.txt'
ascii_filename = '12345.txt'
Expand Down

0 comments on commit 5a1c078

Please sign in to comment.