Skip to content

Commit

Permalink
Memory cache off-by-one error (#396)
Browse files Browse the repository at this point in the history
* Fixes an off by one in the handling of the in memory cache, which also caused it to blow up if cache size == 0

* Fixed InfoCache getter bypassing size restrictions of the RAM cache and actually remedied the issues with the RAM cache size == 0 case, as I thought I had in the previous commit. Added tests for these cases.

* Minor effeciency improvement - only read from disk once in the case where none of the info is cached in RAM. Still does two disk reads in all cases where RAM cache size > 0

* per conversation with @bcail in #396 - Avoiding writing to disk on every get()
  • Loading branch information
Brian Balsamo authored and bcail committed Feb 13, 2018
1 parent dff373b commit 80b707a
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 21 deletions.
47 changes: 26 additions & 21 deletions loris/img_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,7 @@ def get(self, request):
info_and_lastmod = (info, lastmod)
logger.debug('Info for %s read from file system', request)
# into mem:
self._dict[request.url] = info_and_lastmod

self.__setitem__(request, info, _to_fs=False)
return info_and_lastmod

def has_key(self, request):
Expand All @@ -389,31 +388,37 @@ def __getitem__(self, request):
else:
return info_lastmod

def __setitem__(self, request, info):
# to fs
logger.debug('request passed to __setitem__: %s', request)
def __setitem__(self, request, info, _to_fs=True):
info_fp = self._get_info_fp(request)
dp = os.path.dirname(info_fp)
mkdir_p(dp)
logger.debug('Created %s', dp)
if _to_fs:
# to fs
logger.debug('request passed to __setitem__: %s', request)
dp = os.path.dirname(info_fp)
mkdir_p(dp)
logger.debug('Created %s', dp)

with open(info_fp, 'w') as f:
f.write(info.to_full_info_json())
logger.debug('Created %s', info_fp)
with open(info_fp, 'w') as f:
f.write(info.to_full_info_json())
logger.debug('Created %s', info_fp)


if info.color_profile_bytes:
icc_fp = self._get_color_profile_fp(request)
with open(icc_fp, 'wb') as f:
f.write(info.color_profile_bytes)
logger.debug('Created %s', icc_fp)
if info.color_profile_bytes:
icc_fp = self._get_color_profile_fp(request)
with open(icc_fp, 'wb') as f:
f.write(info.color_profile_bytes)
logger.debug('Created %s', icc_fp)

# into mem
lastmod = datetime.utcfromtimestamp(os.path.getmtime(info_fp))
with self._lock:
while len(self._dict) >= self.size:
self._dict.popitem(last=False)
self._dict[request.url] = (info,lastmod)
# The info file cache on disk must already exist before
# this is called - it's where the mtime gets drawn from.
# aka, nothing outside of this class should be using
# to_fs=False
if self.size > 0:
lastmod = datetime.utcfromtimestamp(os.path.getmtime(info_fp))
with self._lock:
self._dict[request.url] = (info,lastmod)
while len(self._dict) > self.size:
self._dict.popitem(last=False)

def __delitem__(self, request):
with self._lock:
Expand Down
47 changes: 47 additions & 0 deletions tests/img_info_t.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from os import path
import json
import tempfile
from datetime import datetime

try:
from urllib.parse import unquote
Expand Down Expand Up @@ -505,6 +506,29 @@ def test_info_goes_to_http_fs_cache(self):
)
self.assertTrue(path.exists(expected_path))

def test_just_ram_cache_update(self):
# Cache size of one, so it's easy to manipulate
cache = img_info.InfoCache(root=self.SRC_IMAGE_CACHE, size=1)
self.app.info_cache = cache
# First request
request_uri = '/%s/%s' % (self.test_jp2_color_id,'info.json')
resp = self.client.get(request_uri)
expected_path = path.join(
self.app.info_cache.http_root,
unquote(self.test_jp2_color_id),
'info.json'
)
fs_first_time = datetime.utcfromtimestamp(os.path.getmtime(expected_path))
# Push this entry out of the RAM cache with another
push_request_uri = '/%s/%s' % (self.test_jp2_gray_id,'info.json')
resp = self.client.get(push_request_uri)
# Request the first file again
# It should now exist on disk, but not in RAM, so it shouldn't
# have been rewritten by the second get.
resp = self.client.get(request_uri)
fs_second_time = datetime.utcfromtimestamp(os.path.getmtime(expected_path))
self.assertTrue(fs_first_time == fs_second_time)

def test_can_delete_items_from_infocache(self):
cache, req = self._cache_with_request()
del cache[req]
Expand All @@ -513,6 +537,29 @@ def test_empty_cache_has_zero_size(self):
cache = img_info.InfoCache(root=self.SRC_IMAGE_CACHE)
assert len(cache) == 0

def test_cache_limit(self):
cache = img_info.InfoCache(root=self.SRC_IMAGE_CACHE, size=2)
self.app.info_cache = cache
request_uris = [
'/%s/%s' % (self.test_jp2_color_id,'info.json'),
'/%s/%s' % (self.test_jpeg_id,'info.json'),
'/%s/%s' % (self.test_png_id,'info.json'),
'/%s/%s' % (self.test_jp2_gray_id,'info.json')
]
for x in request_uris:
resp = self.client.get(x)

# Check we only cache two
assert len(self.app.info_cache) == 2

def test_no_cache(self):
cache = img_info.InfoCache(root=self.SRC_IMAGE_CACHE, size=0)
self.app.info_cache = cache
request_uri = '/%s/%s' % (self.test_jp2_color_id,'info.json')
resp = self.client.get(request_uri)

assert len(self.app.info_cache) == 0

def test_deleting_cache_item_removes_color_profile_fp(self):
# First assemble the cache
cache, req = self._cache_with_request()
Expand Down

0 comments on commit 80b707a

Please sign in to comment.