-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvcat.go
102 lines (88 loc) · 3.36 KB
/
vcat.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package vcat
import "fmt"
// GetVideoWithLanguage returns the whole video data, including the transcript in a specified language.
// Do pass in the language code, not the name of the language. E.g. "en", not "English".
//
// Make sure the provided language is supported by asserting it is available on the content.
// You can use `vcat.GetAvailableLanguages`.
func GetVideoWithLanguage(url, languageCode string) (*Video, error) {
return getVideo(url, languageCode)
}
// GetVideo returns the whole video data, including the transcript, in English.
//
// An alias for `vcat.GetVideoWithLanguage(url, "en")`
func GetVideo(url string) (*Video, error) {
return GetVideoWithLanguage(url, "en")
}
// GetAvailableLanguages returns all valid transcript languages available for the specified url.
//
// The returned language names (not codes) could be translated to the language in your location.
// I.e. if the process calling this function has a Spanish IP, the names of the available langugues are going to be in Spanish.
func GetAvailableLanguages(url string) ([]AvailableLanguage, error) {
v, err := getVideoDetail(url)
if err != nil {
return nil, err
}
var languages = make([]AvailableLanguage, 0, len(v.captions.PlayerCaptionsTracklistRenderer.TranslationLanguages))
for _, l := range v.captions.PlayerCaptionsTracklistRenderer.TranslationLanguages {
languages = append(languages, AvailableLanguage{
Name: l.LanguageName.SimpleText,
Code: l.LanguageCode,
})
}
return languages, nil
}
// getVideo takes in a base URL for the video, and the language code, to return the video metadata along with the transcript.
//
// It is up to the caller to validate the passed in languageCode is supported for this video URL.
func getVideo(url, languageCode string) (*Video, error) {
detail, err := getVideoDetail(url)
if err != nil {
return nil, err
}
var transcriptUrl = detail.captions.PlayerCaptionsTracklistRenderer.CaptionTracks[0].BaseUrl
// Only include the language if it isn't English
if languageCode != "en" {
// @TODO: Should we validate the passed in languageCode is supported for this video? Otherwise we are wasting http requests.
transcriptUrl += "&tlang=" + languageCode
}
chunks, err := getTranscriptFromUrl(transcriptUrl)
if err != nil {
return nil, err
}
return &Video{
Metadata: detail.metadata,
Transcript: chunks,
}, nil
}
// getVideoDetail returns the raw detail from the base video URL
func getVideoDetail(url string) (*rawVideoDetail, error) {
body, err := do(httpclient, url)
if err != nil {
return nil, fmt.Errorf("could not request video detail, got err=%s", err)
}
return getRawVideoDetailFromInitialHttpResponse(body)
}
// getTranscriptFromUrl takes in the actual transcript URL and parses it into a slice of TranscriptTextChunks.
//
// The provided transcript is returned as-is without any special chunking. It is the raw YouTube transcript.
func getTranscriptFromUrl(transcriptUrl string) ([]TranscriptTextChunk, error) {
body, err := do(httpclient, transcriptUrl)
if err != nil {
return nil, err
}
t, err := getTranscriptFromXMLResponse(body)
if err != nil {
return nil, err
}
var chunks = make([]TranscriptTextChunk, 0, len(t.Text))
for _, text := range t.Text {
chunks = append(chunks, TranscriptTextChunk{
Start: text.Start,
End: text.End,
Duration: text.Duration,
Text: text.Text,
})
}
return chunks, nil
}