-
Notifications
You must be signed in to change notification settings - Fork 60
/
ssml-builder.go
184 lines (137 loc) · 6.73 KB
/
ssml-builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
package skillserver
import (
"bytes"
"fmt"
)
/**
* Details about the Speech Synthesis Markup Language (SSML) can be found on this page:
* https://developer.amazon.com/public/solutions/alexa/alexa-skills-kit/docs/speech-synthesis-markup-language-ssml-reference
*/
// Helper Types
// SSMLTextBuilder implements the builder pattern for constructing a speech string
// which may or may not contain SSML tags.
type SSMLTextBuilder struct {
buffer *bytes.Buffer
}
// WordRole is used as the role argument in the AppendPartOfSpeech method. This should
// be one of the constants defined in the Amazon SSML Reference docs.
type WordRole string
const (
// PresentSimple is used to pronounce the word as a verb
PresentSimple WordRole = "amazon:VB"
// PastParticle is used to pronounce the word as a past particle
PastParticle WordRole = "amazon:VBD"
// Noun is used to pronounce the word as a noun
Noun WordRole = "amazon:NN"
// AlternateSense is used to select the alternate sense for a specific word. According
// to the Amazon SSML Reference:
// " Use the non-default sense of the word. For example, the noun "bass" is pronounced
// differently depending on meaning. The "default" meaning is the lowest part of the
// musical range. The alternate sense (which is still a noun) is a freshwater fish.
// Specifying <speak><w role="amazon:SENSE_1">bass</w>"</speak> renders the non-default
// pronunciation (freshwater fish)."
AlternateSense WordRole = "amazon:SENSE_1"
)
// PhoneticAlphabet represents the alphabet to be used when appending phonemes
type PhoneticAlphabet string
const (
// Ipa is the International Phonetic Alphabet
Ipa PhoneticAlphabet = "ipa"
// XSampa is the Extended Speech Assesment Methods Phonetic Alphabet
XSampa PhoneticAlphabet = "x-sampa"
)
// NewSSMLTextBuilder is a convenienve method for constructing a new SSMLTextBuilder
// instance that starts with no speech text added.
func NewSSMLTextBuilder() *SSMLTextBuilder {
return &SSMLTextBuilder{bytes.NewBufferString("")}
}
// AppendPlainSpeech will append the supplied text as regular speech to be spoken by the Alexa device.
func (builder *SSMLTextBuilder) AppendPlainSpeech(text string) *SSMLTextBuilder {
builder.buffer.WriteString(text)
return builder
}
// AppendAmazonEffect will add a new speech string with the provided effect name.
// Check the SSML reference page for a list of available effects.
func (builder *SSMLTextBuilder) AppendAmazonEffect(text, name string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<amazon:effect name=\"%s\">%s</amazon:effect>", name, text))
return builder
}
// AppendAudio will append the playback of an MP3 file to the response. The audio playback
// will take place at the specific point in the text to speech response.
func (builder *SSMLTextBuilder) AppendAudio(src string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<audio src=\"%s\"/>", src))
return builder
}
// AppendBreak will add a pause to the text to speech output. The default is a medium pause.
// Refer to the SSML reference for the available strength values.
func (builder *SSMLTextBuilder) AppendBreak(strength, time string) *SSMLTextBuilder {
if strength == "" {
// The default strength is medium
strength = "medium"
}
builder.buffer.WriteString(fmt.Sprintf("<break strength=\"%s\" time=\"%s\"/>", strength, time))
return builder
}
// AppendEmphasis will include a set of text to be spoken with the specific level of emphasis.
// Refer to the SSML reference for available emphasis level values.
func (builder *SSMLTextBuilder) AppendEmphasis(text, level string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<emphasis level=\"%s\">%s</emphasis>", level, text))
return builder
}
// AppendParagraph will append the specific text as a new paragraph. Extra strong breaks will
// be used before and after this text.
func (builder *SSMLTextBuilder) AppendParagraph(text string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<p>%s</p>", text))
return builder
}
// AppendProsody provides a way to modify the rate, pitch, and volume of a piece of spoken text.
func (builder *SSMLTextBuilder) AppendProsody(text, rate, pitch, volume string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<prosody rate=\"%s\" pitch=\"%s\" volume=\"%s\">%s</prosody>", rate, pitch, volume, text))
return builder
}
// AppendSentence will indicate the provided text should be spoken as a new sentence. This text will
// include strong breaks before and after.
func (builder *SSMLTextBuilder) AppendSentence(text string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<s>%s</s>", text))
return builder
}
// AppendPartOfSpeech is used to explictily define the part of speech for a word that is being
// appended to the text to speech output sent in a skill server response.
func (builder *SSMLTextBuilder) AppendPartOfSpeech(role WordRole, text string) *SSMLTextBuilder {
if role != "" {
builder.buffer.WriteString(fmt.Sprintf("<w role=\"%s\">%s</w>", role, text))
}
return builder
}
// AppendSubstitution provides a way to indicate an alternate pronunciation for a piece of text.
func (builder *SSMLTextBuilder) AppendSubstitution(text, alias string) *SSMLTextBuilder {
builder.buffer.WriteString(fmt.Sprintf("<sub alias=\"%s\">%s</sub>", alias, text))
return builder
}
// AppendSayAs is used to provide additional information about how the text string being appended
// should be interpreted. For example this can be used to interpret the string as a list
// of individual characters or to read out digits one at a time. The format string is
// ignored unless the interpret-as argument is `date`. Refer to the SSML referene for valid
// values for the interpretAs parameter.
func (builder *SSMLTextBuilder) AppendSayAs(interpretAs, format, text string) *SSMLTextBuilder {
if interpretAs == "date" {
builder.buffer.WriteString(fmt.Sprintf("<say-as interpret-as=\"%s\" format=\"%s\">%s</say-as>",
interpretAs, format, text))
} else if interpretAs != "" {
builder.buffer.WriteString(fmt.Sprintf("<say-as interpret-as=\"%s\">%s</say-as>", interpretAs, text))
}
return builder
}
// AppendPhoneme is used to specify a phonetic pronunciation for a piece of text to be appended
// to the response.
func (builder *SSMLTextBuilder) AppendPhoneme(alphabet PhoneticAlphabet, phoneme, text string) *SSMLTextBuilder {
if phoneme != "" && text != "" && alphabet != PhoneticAlphabet("") {
builder.buffer.WriteString(fmt.Sprintf("<phoneme alphabet=\"%s\" ph=\"%s\">%s</phoneme>", alphabet, phoneme, text))
}
return builder
}
// Build will construct the appropriate speech string including any SSML
// tags that were added to the Builder.
func (builder *SSMLTextBuilder) Build() string {
return fmt.Sprintf("<speak>%s</speak>", builder.buffer.String())
}