Skip to content

Commit

Permalink
Make transcripts indexable for LMS search (#294)
Browse files Browse the repository at this point in the history
* Make transcripts indexable for LMS search

* Fix javascript unit tests
  • Loading branch information
wowkalucky committed Jan 24, 2018
1 parent c4543f3 commit fb2cdc4
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 3 deletions.
12 changes: 12 additions & 0 deletions video_xblock/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@ def convert_caps_to_vtt(caps):
return WebVTTWriter().write(reader().read(caps))
return u''

@staticmethod
def vtt_to_text(vtt_content):
"""
Utility method to extract text from WebVTT format transcript.
"""
text_lines = []
for line in vtt_content.splitlines():
if '-->' in line or line == '':
continue
text_lines.append(line)
return ' '.join(text_lines)

def route_transcripts(self):
"""
Re-route transcripts to appropriate handler.
Expand Down
4 changes: 4 additions & 0 deletions video_xblock/static/vendor/js/jquery.min.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions video_xblock/static/video-xblock-karma.conf.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ module.exports = function (config) {

// list of files / patterns to load in the browser
files: [
'https://ajax.googleapis.com/ajax/libs/jquery/3.1.0/jquery.min.js',
'vendor/js/jquery.min.js',
'vendor/js/video.min.js',
'js/spec/test-context.js',
'js/base.js',
'js/runtime-handlers.js',
'js/studio-edit/utils.js',
'js/studio-edit/transcripts-manual-upload.js',
'js/spec/*spec.js',
'js/spec/studio-edit/*spec.js'
'js/spec/studio-edit/*spec.js',
'js/studio-edit/transcripts-manual-upload.js'
],
plugins: [
'karma-chrome-launcher',
Expand Down
45 changes: 45 additions & 0 deletions video_xblock/video_xblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,3 +834,48 @@ def get_enabled_managed_transcripts(self):
except ValueError:
log.exception("JSON parser can't handle 'self.transcripts' field value: {}".format(self.transcripts))
return []

def index_dictionary(self):
"""
Part of edx-platform search index API.
Is invoked during course [re]index operation.
Takes enabled transcripts' content and puts it to search index.
"""
xblock_body = super(VideoXBlock, self).index_dictionary()
video_body = {"display_name": self.display_name}

content = None
enabled_transcripts = self.route_transcripts()
for transcript in enabled_transcripts:
asset_file_name = transcript[u'url'].split('@')[-1]
try:
if transcript['source'] in [TranscriptSource.MANUAL, TranscriptSource.DEFAULT]:
asset_location = self.static_content.compute_location(self.course_key, asset_file_name)
asset = self.contentstore().find(asset_location) # pylint: disable=not-callable
content = asset.data
elif transcript['source'] == TranscriptSource.THREE_PLAY_MEDIA:
external_transcript = self.fetch_single_3pm_translation({
'id': transcript['id'], 'language_id': transcript['lang_id']
})
content = external_transcript and external_transcript.content
except IOError:
log.exception("Transcript indexing failure: can't fetch external transcript[{}]".format(transcript))
except (ValueError, KeyError, TypeError, AttributeError):
log.exception(
"Transcript indexing failure: can't parse transcript for indexing: [{}]".format(transcript)
)
else:
if content:
content_ = self.vtt_to_text(content)
video_body.update({transcript[u'lang']: content_})
finally:
content = None

if "content" in xblock_body:
xblock_body["content"].update(video_body)
else:
xblock_body["content"] = video_body
xblock_body["content_type"] = "Video"

return xblock_body

0 comments on commit fb2cdc4

Please sign in to comment.