Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle FileNotFoundError on download of messages #5573

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions securedrop/journalist_app/col.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-

from pathlib import Path

from flask import (
Blueprint,
abort,
Expand Down Expand Up @@ -93,6 +95,18 @@ def download_single_file(filesystem_id: str, fn: str) -> werkzeug.Response:
if '..' in fn or fn.startswith('/'):
abort(404)

file = current_app.storage.path(filesystem_id, fn)
if not Path(file).is_file():
flash(
gettext(
"Your download failed because a file could not be found. An admin can find "
+ "more information in the system and monitoring logs."
),
"error"
)
current_app.logger.error("File {} not found".format(file))
return redirect(url_for("col.col", filesystem_id=filesystem_id))

# mark as seen by the current user
try:
journalist = g.get("user")
Expand Down
17 changes: 15 additions & 2 deletions securedrop/journalist_app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import datetime
import os
from typing import Optional, List, Union, Any
from urllib.parse import urlparse

import flask
import werkzeug
Expand Down Expand Up @@ -190,7 +191,7 @@ def mark_seen(targets: List[Union[Submission, Reply]], user: Journalist) -> None
raise


def download(zip_basename: str, submissions: List[Union[Submission, Reply]]) -> flask.Response:
def download(zip_basename: str, submissions: List[Union[Submission, Reply]]) -> werkzeug.Response:
"""Send client contents of ZIP-file *zip_basename*-<timestamp>.zip
containing *submissions*. The ZIP-file, being a
:class:`tempfile.NamedTemporaryFile`, is stored on disk only
Expand All @@ -201,7 +202,19 @@ def download(zip_basename: str, submissions: List[Union[Submission, Reply]]) ->
:param list submissions: A list of :class:`models.Submission`s to
include in the ZIP-file.
"""
zf = current_app.storage.get_bulk_archive(submissions, zip_directory=zip_basename)
try:
zf = current_app.storage.get_bulk_archive(submissions, zip_directory=zip_basename)
DrGFreeman marked this conversation as resolved.
Show resolved Hide resolved
except FileNotFoundError:
flash(
gettext(
"Your download failed because a file could not be found. An admin can find "
+ "more information in the system and monitoring logs."
),
"error"
)
referrer = urlparse(str(request.referrer)).path
return redirect(referrer)

attachment_filename = "{}--{}.zip".format(
zip_basename, datetime.datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S")
)
Expand Down
33 changes: 22 additions & 11 deletions securedrop/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,25 +201,36 @@ def get_bulk_archive(self,
for i in selected_submissions])
# The below nested for-loops are there to create a more usable
# folder structure per #383
missing_files = False

with zipfile.ZipFile(zip_file, 'w') as zip:
for source in sources:
fname = ""
submissions = [s for s in selected_submissions
if s.source.journalist_designation == source]
for submission in submissions:
filename = self.path(submission.source.filesystem_id, submission.filename)
document_number = submission.filename.split('-')[0]
if zip_directory == submission.source.journalist_filename:
fname = zip_directory

if os.path.exists(filename):
sssoleileraaa marked this conversation as resolved.
Show resolved Hide resolved
document_number = submission.filename.split('-')[0]
if zip_directory == submission.source.journalist_filename:
fname = zip_directory
else:
fname = os.path.join(zip_directory, source)
zip.write(filename, arcname=os.path.join(
fname,
"%s_%s" % (document_number,
submission.source.last_updated.date()),
os.path.basename(filename)
))
else:
fname = os.path.join(zip_directory, source)
zip.write(filename, arcname=os.path.join(
fname,
"%s_%s" % (document_number,
submission.source.last_updated.date()),
os.path.basename(filename)
))
return zip_file
missing_files = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be missing something but why not raise FileNotFoundError here directly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To allow logging all the missing files if more than one are missing (see commit message ee18a23)

current_app.logger.error("File {} not found".format(filename))

if missing_files:
raise FileNotFoundError
else:
return zip_file

def move_to_shredder(self, path: str) -> None:
"""
Expand Down
50 changes: 50 additions & 0 deletions securedrop/tests/functional/journalist_navigation_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,17 @@ def cookie_string_from_selenium_cookies(cookies):

assert self.secret_message == submission

def _journalist_downloads_message_missing_file(self):
self._journalist_selects_the_first_source()

self.wait_for(lambda: self.driver.find_element_by_css_selector("ul#submissions"))

submissions = self.driver.find_elements_by_css_selector("#submissions a")
assert 1 == len(submissions)

file_link = submissions[0]
file_link.click()

def _journalist_composes_reply(self):
reply_text = (
"Thanks for the documents. Can you submit more " "information about the main program?"
Expand Down Expand Up @@ -1097,3 +1108,42 @@ def _journalist_uses_js_buttons_to_download_unread(self):
for checkbox in checkboxes:
classes = checkbox.get_attribute("class")
assert "unread-cb" in classes

def _journalist_sees_missing_file_error_message(self):
notification = self.driver.find_element_by_css_selector(".error")

if self.accept_languages is None:
expected_text = (
"Your download failed because a file could not be found. An admin can find "
+ "more information in the system and monitoring logs."
)
assert expected_text == notification.text

def _journalist_is_on_collection_page(self):
return self.wait_for(
lambda: self.driver.find_element_by_css_selector("div.journalist-view-single")
)

def _journalist_clicks_source_unread(self):
self.driver.find_element_by_css_selector("span.unread a").click()

def _journalist_selects_first_source_then_download_all(self):
checkboxes = self.driver.find_elements_by_name("cols_selected")
assert len(checkboxes) == 1
checkboxes[0].click()

self.driver.find_element_by_xpath("//button[@value='download-all']").click()

def _journalist_selects_first_source_then_download_unread(self):
checkboxes = self.driver.find_elements_by_name("cols_selected")
assert len(checkboxes) == 1
checkboxes[0].click()

self.driver.find_element_by_xpath("//button[@value='download-unread']").click()

def _journalist_selects_message_then_download_selected(self):
checkboxes = self.driver.find_elements_by_name("doc_names_selected")
assert len(checkboxes) == 1
checkboxes[0].click()

self.driver.find_element_by_xpath("//button[@value='download']").click()
76 changes: 76 additions & 0 deletions securedrop/tests/functional/test_journalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from pathlib import Path

import pytest

from . import functional_test as ft
from . import journalist_navigation_steps
Expand Down Expand Up @@ -76,3 +79,76 @@ def test_journalist_interface_ui_with_modal(self):
self._journalist_selects_the_first_source()
self._journalist_uses_js_buttons_to_download_unread()
self._journalist_delete_all_confirmation()


class TestJournalistMissingFile(
ft.FunctionalTest,
source_navigation_steps.SourceNavigationStepsMixin,
journalist_navigation_steps.JournalistNavigationStepsMixin
):
"""Test error handling when a message file has been deleted from disk but remains in the
database. Ref #4787."""

@pytest.fixture(scope="function")
def missing_msg_file(self):
"""Fixture to setup the message with missing file used in the following tests."""
# Submit a message
self._source_visits_source_homepage()
self._source_chooses_to_submit_documents()
self._source_continues_to_submit_page()
self._source_submits_a_message()
self._source_logs_out()

# Remove the message file from the store
storage_path = Path(self.journalist_app.storage.storage_path)
msg_files = [p for p in storage_path.rglob("*") if p.is_file()]
assert len(msg_files) == 1
msg_files[0].unlink()

self.switch_to_firefox_driver()

yield

self.switch_to_torbrowser_driver()

def test_download_source_unread(self, missing_msg_file):
"""Test behavior when the journalist clicks on the source's "n unread" button from the
journalist home page."""
self._journalist_logs_in()
self._journalist_clicks_source_unread()
self._journalist_sees_missing_file_error_message()
self._is_on_journalist_homepage()

def test_select_source_and_download_all(self, missing_msg_file):
"""Test behavior when the journalist selects the source then clicks the "Download" button
from the journalist home page."""
self._journalist_logs_in()
self._journalist_selects_first_source_then_download_all()
self._journalist_sees_missing_file_error_message()
self._is_on_journalist_homepage()

def test_select_source_and_download_unread(self, missing_msg_file):
"""Test behavior when the journalist selects the source then clicks the "Download Unread"
button from the journalist home page."""
self._journalist_logs_in()
self._journalist_selects_first_source_then_download_unread()
self._journalist_sees_missing_file_error_message()
self._is_on_journalist_homepage()

def test_download_message(self, missing_msg_file):
"""Test behavior when the journalist clicks on the individual message from the source page.
"""
self._journalist_logs_in()
self._journalist_checks_messages()
self._journalist_downloads_message_missing_file()
self._journalist_sees_missing_file_error_message()
self._journalist_is_on_collection_page()

def test_select_message_and_download_selected(self, missing_msg_file):
"""Test behavior when the journalist selects the individual message from the source page
then clicks "Download Selected"."""
self._journalist_logs_in()
self._journalist_selects_the_first_source()
self._journalist_selects_message_then_download_selected()
self._journalist_sees_missing_file_error_message()
self._journalist_is_on_collection_page()
93 changes: 93 additions & 0 deletions securedrop/tests/test_journalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import base64
import binascii
import io
from mock import call
import os
from pathlib import Path
import pytest
import random
import zipfile
Expand Down Expand Up @@ -2286,6 +2288,97 @@ def test_download_selected_submissions_previously_downloaded(
)


@pytest.fixture(scope="function")
def selected_missing_files(journalist_app, test_source):
"""Fixture for the download tests with missing files in storage."""
source = Source.query.get(test_source["id"])
submissions = utils.db_helper.submit(source, 2)
selected = sorted([s.filename for s in submissions])

storage_path = Path(journalist_app.storage.storage_path)
msg_files = sorted([p for p in storage_path.rglob("*") if p.is_file()])
assert len(msg_files) == 2
for file in msg_files:
file.unlink()

yield selected


def test_download_selected_submissions_missing_files(
journalist_app, test_journo, test_source, mocker, selected_missing_files
):
"""Tests download of selected submissions with missing files in storage."""
mocked_error_logger = mocker.patch('journalist.app.logger.error')
journo = Journalist.query.get(test_journo["id"])

with journalist_app.test_client() as app:
_login_user(
app,
journo.username,
test_journo["password"],
test_journo["otp_secret"]
)
resp = app.post(
url_for("main.bulk"),
data=dict(
action="download",
filesystem_id=test_source["filesystem_id"],
doc_names_selected=selected_missing_files,
)
)

assert resp.status_code == 302

expected_calls = []
for file in selected_missing_files:
missing_file = (
Path(journalist_app.storage.storage_path)
.joinpath(test_source["filesystem_id"])
.joinpath(file)
.as_posix()
)
expected_calls.append(call("File {} not found".format(missing_file)))

mocked_error_logger.assert_has_calls(expected_calls)


def test_download_single_submission_missing_file(
journalist_app, test_journo, test_source, mocker, selected_missing_files
):
"""Tests download of single submissions with missing files in storage."""
mocked_error_logger = mocker.patch('journalist.app.logger.error')
journo = Journalist.query.get(test_journo["id"])
missing_file = selected_missing_files[0]

with journalist_app.test_client() as app:
_login_user(
app,
journo.username,
test_journo["password"],
test_journo["otp_secret"]
)
resp = app.get(
url_for(
"col.download_single_file",
filesystem_id=test_source["filesystem_id"],
fn=missing_file,
)
)

assert resp.status_code == 302

missing_file = (
Path(journalist_app.storage.storage_path)
.joinpath(test_source["filesystem_id"])
.joinpath(missing_file)
.as_posix()
)

mocked_error_logger.assert_called_once_with(
"File {} not found".format(missing_file)
)


def test_download_unread_all_sources(journalist_app, test_journo):
"""
Test that downloading all unread creates a zip that contains all unread submissions from the
Expand Down