Skip to content

Commit

Permalink
Create AsyncLargeFileManager
Browse files Browse the repository at this point in the history
  • Loading branch information
Mariko Wakabayashi committed Nov 20, 2020
1 parent 9aa57a7 commit 01a8664
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 16 deletions.
2 changes: 1 addition & 1 deletion jupyter_server/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@

# Next beta/alpha/rc release: The version number for beta is X.Y.ZbN **without dots**.

version_info = (1, 0, 6, '')
version_info = (2, 0, 'twttr')
__version__ = '.'.join(map(str, version_info[:3])) + ''.join(version_info[3:])
70 changes: 69 additions & 1 deletion jupyter_server/services/contents/largefilemanager.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from jupyter_server.services.contents.filemanager import FileContentsManager
from anyio import run_sync_in_worker_thread
from contextlib import contextmanager
from tornado import web
import nbformat
import base64
import os, io

from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager


class LargeFileManager(FileContentsManager):
"""Handle large file upload."""
Expand Down Expand Up @@ -71,3 +73,69 @@ def _save_large_file(self, os_path, content, format):
with io.open(os_path, 'ab') as f:
f.write(bcontent)


class AsyncLargeFileManager(AsyncFileContentsManager):
"""Handle large file upload asynchronously"""

async def save(self, model, path=''):
"""Save the file model and return the model with no content."""
chunk = model.get('chunk', None)
if chunk is not None:
path = path.strip('/')

if 'type' not in model:
raise web.HTTPError(400, u'No file type provided')
if model['type'] != 'file':
raise web.HTTPError(400, u'File type "{}" is not supported for large file transfer'.format(model['type']))
if 'content' not in model and model['type'] != 'directory':
raise web.HTTPError(400, u'No file content provided')

os_path = self._get_os_path(path)

try:
if chunk == 1:
self.log.debug("Saving %s", os_path)
self.run_pre_save_hook(model=model, path=path)
await super(AsyncLargeFileManager, self)._save_file(os_path, model['content'], model.get('format'))
else:
await self._save_large_file(os_path, model['content'], model.get('format'))
except web.HTTPError:
raise
except Exception as e:
self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True)
raise web.HTTPError(500, u'Unexpected error while saving file: %s %s' %
(path, e)) from e

model = await self.get(path, content=False)

# Last chunk
if chunk == -1:
self.run_post_save_hook(model=model, os_path=os_path)
return model
else:
return await super(AsyncLargeFileManager, self).save(model, path)

async def _save_large_file(self, os_path, content, format):
"""Save content of a generic file."""
if format not in {'text', 'base64'}:
raise web.HTTPError(
400,
"Must specify format of file contents as 'text' or 'base64'",
)
try:
if format == 'text':
bcontent = content.encode('utf8')
else:
b64_bytes = content.encode('ascii')
bcontent = base64.b64decode(b64_bytes)
except Exception as e:
raise web.HTTPError(
400, u'Encoding error saving %s: %s' % (os_path, e)
) from e

with self.perm_to_403(os_path):
if os.path.islink(os_path):
os_path = os.path.join(os.path.dirname(os_path), os.readlink(os_path))
with io.open(os_path, 'ab') as f:
await run_sync_in_worker_thread(f.write, bcontent)

37 changes: 23 additions & 14 deletions tests/services/contents/test_largefilemanager.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
import pytest
import tornado

from jupyter_server.services.contents.largefilemanager import AsyncLargeFileManager, LargeFileManager
from jupyter_server.utils import ensure_async
from ...utils import expected_http_error


def test_save(jp_large_contents_manager):
@pytest.fixture(params=[LargeFileManager, AsyncLargeFileManager])
def jp_large_contents_manager(request, tmp_path):
"""Returns a LargeFileManager instance."""
file_manager = request.param
return file_manager(root_dir=str(tmp_path))


async def test_save(jp_large_contents_manager):
cm = jp_large_contents_manager
model = cm.new_untitled(type='notebook')
model = await ensure_async(cm.new_untitled(type='notebook'))
name = model['name']
path = model['path']

# Get the model with 'content'
full_model = cm.get(path)
full_model = await ensure_async(cm.get(path))
# Save the notebook
model = cm.save(full_model, path)
model = await ensure_async(cm.save(full_model, path))
assert isinstance(model, dict)
assert 'name' in model
assert 'path' in model
Expand Down Expand Up @@ -43,26 +52,26 @@ def test_save(jp_large_contents_manager):
)
]
)
def test_bad_save(jp_large_contents_manager, model, err_message):
async def test_bad_save(jp_large_contents_manager, model, err_message):
with pytest.raises(tornado.web.HTTPError) as e:
jp_large_contents_manager.save(model, model['path'])
await ensure_async(jp_large_contents_manager.save(model, model['path']))
assert expected_http_error(e, 400, expected_message=err_message)


def test_saving_different_chunks(jp_large_contents_manager):
async def test_saving_different_chunks(jp_large_contents_manager):
cm = jp_large_contents_manager
model = {'name': 'test', 'path': 'test', 'type': 'file',
'content': u'test==', 'format': 'text'}
name = model['name']
path = model['path']
cm.save(model, path)
await ensure_async(cm.save(model, path))

for chunk in (1, 2, -1):
for fm in ('text', 'base64'):
full_model = cm.get(path)
full_model = await ensure_async(cm.get(path))
full_model['chunk'] = chunk
full_model['format'] = fm
model_res = cm.save(full_model, path)
model_res = await ensure_async(cm.save(full_model, path))
assert isinstance(model_res, dict)
assert 'name' in model_res
assert 'path' in model_res
Expand All @@ -71,16 +80,16 @@ def test_saving_different_chunks(jp_large_contents_manager):
assert model_res['path'] == path


def test_save_in_subdirectory(jp_large_contents_manager, tmp_path):
async def test_save_in_subdirectory(jp_large_contents_manager, tmp_path):
cm = jp_large_contents_manager
sub_dir = tmp_path / 'foo'
sub_dir.mkdir()
model = cm.new_untitled(path='/foo/', type='notebook')
model = await ensure_async(cm.new_untitled(path='/foo/', type='notebook'))
path = model['path']
model = cm.get(path)
model = await ensure_async(cm.get(path))

# Change the name in the model for rename
model = cm.save(model, path)
model = await ensure_async(cm.save(model, path))
assert isinstance(model, dict)
assert 'name' in model
assert 'path' in model
Expand Down

0 comments on commit 01a8664

Please sign in to comment.