Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SE-3520] Fixes Transcripts Incompletely Uploaded to S3 Bucket #266

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion edxval/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,7 @@ def import_transcript_from_fs(edx_video_id, language_code, file_name, provider,
video_id=edx_video_id,
language_code=language_code,
file_format=file_format,
content=ContentFile(file_content),
content=ContentFile(file_content.encode('utf-8')),
provider=provider
)

Expand Down
42 changes: 41 additions & 1 deletion edxval/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from io import open
from tempfile import mkdtemp

import chardet
import mock
from ddt import data, ddt, unpack
from django.conf import settings
Expand All @@ -24,7 +25,7 @@
from fs.osfs import OSFS
from fs.path import combine
from lxml import etree
from mock import patch
from mock import Mock, patch
from rest_framework import status

from edxval import api, utils
Expand Down Expand Up @@ -1895,6 +1896,45 @@ def test_import_transcript_from_fs_resource_not_found(self, mock_logger):
edx_video_id
)

@patch('edxval.api.create_video_transcript')
@patch('edxval.api.get_transcript_format', Mock())
def test_import_transcript_from_fs_created_transcript_content_encoding(self, mock_create_video_transcript):
"""
Test that `import_transcript_from_fs` correctly calls `create_video_transcript` with `utf-8` file content.
"""
language_code = 'en'
edx_video_id = constants.VIDEO_DICT_FISH['edx_video_id']

# First create utf-8 encoded transcript file in the file system.
# Make sure to include utf-8 characters to chardet recognizes it is utf-8 and not ascii
transcript_file_name = 'transcript.txt'
video_transcript = dict(
constants.VIDEO_TRANSCRIPT_CUSTOM_SJSON,
video_id=edx_video_id,
file_data='Hello, edX greets you. random utf-8 characters: éâô'
)

utils.create_file_in_fs(
video_transcript['file_data'],
transcript_file_name,
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)

api.import_transcript_from_fs(
edx_video_id=edx_video_id,
language_code=language_code,
file_name=transcript_file_name,
provider=TranscriptProviderType.CUSTOM,
resource_fs=self.file_system,
static_dir=constants.EXPORT_IMPORT_STATIC_DIR
)

transcript_content = mock_create_video_transcript.call_args.kwargs['content']
content_encoding = chardet.detect(transcript_content.read())['encoding']

self.assertEqual(content_encoding, 'utf-8')

@patch('edxval.api.logger')
def test_import_transcript_from_fs_invalid_format(self, mock_logger):
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def load_requirements(*requirements_paths):
return list(requirements)


VERSION = '1.4.2'
VERSION = '1.4.3'

if sys.argv[-1] == 'tag':
print("Tagging the version on github:")
Expand Down