Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache remotely hosted images #409

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 71 additions & 1 deletion ReText/tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
from markups import get_markup_for_file_name, find_markup_class_by_name
from markups.common import MODULE_HOME_PAGE

import os
import re
import requests
import tempfile

from ReText import app_version, globalSettings, converterprocess
from ReText.editor import ReTextEdit
from ReText.highlighter import ReTextHighlighter
Expand All @@ -36,6 +41,7 @@

try:
from ReText.webkitpreview import ReTextWebKitPreview

except ImportError:
ReTextWebKitPreview = None

Expand All @@ -45,12 +51,14 @@
ReTextWebEnginePreview = None

PreviewDisabled, PreviewLive, PreviewNormal = range(3)
HTML_IMG_TAG_SRC_RE = re.compile(r'<img\s[^>]*?src\s*=\s*[\'\"]([^\'\"]*?)[\'\"][^>]*?>')

class ReTextTab(QSplitter):

fileNameChanged = pyqtSignal()
modificationStateChanged = pyqtSignal()
activeMarkupChanged = pyqtSignal()
seenImages = {}

# Make _fileName a read-only property to make sure that any
# modification happens through the proper functions. These functions
Expand Down Expand Up @@ -237,6 +245,7 @@ def updatePreviewBox(self):
distToBottom = scrollbar.maximum() - scrollbarValue
try:
html = self.getHtmlFromConverted(self.converted)
html = self.cacheRemotelyHostedImages(html)
except Exception:
return self.p.printError()
if isinstance(self.previewBox, QTextEdit):
Expand Down Expand Up @@ -275,13 +284,19 @@ def startPendingConversion(self):
self.converterProcess.start_conversion(self.getActiveMarkupClass().name,
self.fileName,
requested_extensions,
self.editBox.toPlainText(),
self.editBox.toPlainText(),
QDir.currentPath())

def updateBoxesVisibility(self):
self.editBox.setVisible(self.previewState < PreviewNormal)
self.previewBox.setVisible(self.previewState > PreviewDisabled)

# If we're closing the preview tab, clean the cached images.
# We do this so that we can provide the user with a way to change
# the hosted image and have it reflect here
if self.previewState != PreviewLive:
self.cleanImageCache()

def rebuildPreviewBox(self):
self.previewBox.disconnectExternalSignals()
self.previewBox.setParent(None)
Expand Down Expand Up @@ -456,3 +471,58 @@ def openSourceFile(self, fileToOpen):
if exists(fileToOpen) and get_markup_for_file_name(fileToOpen, return_class=True):
self.p.openFileWrapper(fileToOpen)
return fileToOpen

def cacheRemotelyHostedImages(self, htmlText):
"""
Search the text for remotely hosted images and cache them into a temp folder so that
they don't have to be loaded over again.
"""
offset=0
for match in HTML_IMG_TAG_SRC_RE.finditer(htmlText):
imgSrc = match.groups(1)[0]
startPosition = match.span(1)[0]
endPosition = match.span(1)[1]
localFile = None

if imgSrc.startswith('http') is False:
continue

# If it is remotely hosted and we haven't seen it before,
# give it a unique filename and save it as a tmp file. Also, save it into
# a dict (seenImages) so that we can track it for later use and cleanup
if imgSrc not in self.seenImages:
with tempfile.NamedTemporaryFile(delete=False) as temp:
try:
r = requests.get(imgSrc, timeout=0.001)
with open(temp.name, 'wb') as f:
f.write(r.content)
temp.close()
localFile = temp.name
self.seenImages[imgSrc] = {
'localFile': localFile,
'startPosition': startPosition,
'endPosition': endPosition}

# If we can't acquire the image, don't change the text.
# Silently move on.
except requests.exceptions.ConnectionError:
continue

# If we've seen this image before, pull it up where it is saved
else:
localFile = self.seenImages[imgSrc]['localFile']

# Replace the path of the tag with our locally saved file
htmlText = '{start}{localFile}{end}'\
.format(start=htmlText[:startPosition-offset],
localFile=localFile,
end=htmlText[endPosition-offset:])
offset += len(imgSrc)-len(localFile)
return htmlText

def cleanImageCache(self):
"""
This runs when a tab is closed OR when the application is closed.
"""
[os.remove(image['localFile']) for image in self.seenImages.values()]
self.seenImages = {}
2 changes: 2 additions & 0 deletions ReText/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ def closeTab(self, ind):
if closedTab.fileName:
self.fileSystemWatcher.removePath(closedTab.fileName)
self.tabWidget.removeTab(ind)
closedTab.cleanImageCache()
closedTab.deleteLater()

def changeIndex(self, ind):
Expand Down Expand Up @@ -1194,6 +1195,7 @@ def closeEvent(self, closeevent):
files = [tab.fileName for tab in self.iterateTabs()]
writeListToSettings("lastFileList", files)
globalSettings.lastTabIndex = self.tabWidget.currentIndex()
[tab.cleanImageCache() for tab in self.iterateTabs()]
closeevent.accept()

def viewHtml(self):
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def run(self):
# On Linux distro-packaged Qt/PyQt is preferred
'PyQt5;platform_system=="Windows"',
'PyQt5;platform_system=="Darwin"',
'requests'
],
extras_require={
'spellcheck': ['pyenchant'],
Expand Down
82 changes: 82 additions & 0 deletions tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from contextlib import suppress
import hashlib
import markups
import os
import requests
import sys
import tempfile
import time
Expand All @@ -32,6 +34,7 @@
from PyQt5.QtWidgets import QApplication
import ReText
from ReText.window import ReTextWindow
from ReText.tab import ReTextTab

defaultEventTimeout = 0.0
path_to_testdata = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'testdata')
Expand Down Expand Up @@ -397,6 +400,85 @@ def test_doesNotTweakSpecialCharacters(self):
os.remove(fileName)


class TestCachingOfRemotelyHostedImages(unittest.TestCase):
def setUp(self):
self.globalSettingsMock = patch('ReText.window.globalSettings', MagicMock(**ReText.configOptions)).start()
self.globalSettingsMock.useWebKit = True
self.window = ReTextWindow()
self.window.createNew('')
self.tab = self.window.currentTab
self.fakeRemoteUrl = 'https://tupac.com/i-am-alive-and-i-code-now'
self.testLocalImg = os.path.join(path_to_testdata, 'tupac.jpg')

@patch.object(requests, 'get')
def test_cacheHelper(self, requestsGet):
dummyImgContent = b'dummy'
requestsGet.return_value = MagicMock(content=dummyImgContent)

# Test that our cache helper does not impact locally hosted images.
localImg = "<img alt='I am alive' src='{img}'/>".format(img=self.testLocalImg)
self.assertEqual(
self.tab.cacheRemotelyHostedImages(localImg),
localImg)

# Test that our cache helper does modify remotely hosted images.
remoteImg = "<img alt='Come with me' src='{url}'/>".format(url=self.fakeRemoteUrl)
transformedText = self.tab.cacheRemotelyHostedImages(remoteImg)
self.assertNotEqual(
transformedText,
remoteImg)

# Check that the new path points to one in 'tmp' (non-windows) or 'temp' (windows)
self.assertTrue('tmp' in transformedText or 'temp' in transformedText)

# Check hash of temp file matches hash of original content
matches = ReText.tab.HTML_IMG_TAG_SRC_RE.findall(transformedText)
self.assertEqual(
hashlib.md5(open(matches[0],'rb').read()).hexdigest(),
hashlib.md5(dummyImgContent).hexdigest())

# If we add the same image again, ensure it points to the same temporary file
transformedText = self.tab.cacheRemotelyHostedImages(transformedText + remoteImg)
matches = ReText.tab.HTML_IMG_TAG_SRC_RE.findall(transformedText)
self.assertEqual(
matches[0],
matches[1])

# Run the cache cleaner and make sure our temporary files are gone
self.tab.cleanImageCache()
self.assertFalse(os.path.isfile(matches[0]))

@patch('ReText.window.QMessageBox.warning', return_value='QMessageBox.Discard')
@patch.object(requests, 'get')
def test_cacheHelperWithGui(self, requestsGet, qMsg):
fh = open(self.testLocalImg, 'rb')
requestsGet.return_value = MagicMock(content=fh.read())

self.globalSettingsMock.useWebKit = True
self.window = ReTextWindow()
self.window.createNew('')
self.window.preview(ReText.tab.PreviewLive)

# Insert an image into the edit box, wait for the preview to be generated
processEventsUntilIdle()
time.sleep(0.5)
self.window.currentTab.editBox.textCursor().insertText(
'![]({url})'.format(url=self.fakeRemoteUrl))
processEventsUntilIdle()
time.sleep(0.5)
processEventsUntilIdle()

# Check that the cache is non-empty
self.assertTrue(self.window.currentTab.seenImages)

# Check that we see a temporary file generated in the preview box text
text = self.window.currentTab.previewBox.document().toHtml()
self.assertTrue('tmp' in text or 'temp' in text)

# Close the tab. Make sure our cache is empty
self.window.closeTab(self.window.ind)
self.assertFalse(self.window.currentTab.seenImages)

if __name__ == '__main__':
unittest.main()

Binary file added tests/testdata/tupac.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.