-
Notifications
You must be signed in to change notification settings - Fork 629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Create Audio Feature in SDK #344
Changes from 2 commits
81d54e6
0be2512
d843de7
5d1b040
3de9514
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,9 +79,14 @@ PYBIND11_MODULE(core, m) { | |
auto tablet = self.tablet(tag); | ||
return vs::components::ImageReader(self.mode(), tablet); | ||
}) | ||
.def("get_text", [](vs::LogReader& self, const std::string& tag) { | ||
.def("get_text", | ||
[](vs::LogReader& self, const std::string& tag) { | ||
auto tablet = self.tablet(tag); | ||
return vs::components::TextReader(tablet); | ||
}) | ||
.def("get_audio", [](vs::LogReader& self, const std::string& tag) { | ||
auto tablet = self.tablet(tag); | ||
return vs::components::TextReader(tablet); | ||
return vs::components::AudioReader(self.mode(), tablet); | ||
}); | ||
|
||
// clang-format on | ||
|
@@ -119,10 +124,19 @@ PYBIND11_MODULE(core, m) { | |
auto tablet = self.AddTablet(tag); | ||
return vs::components::Image(tablet, num_samples, step_cycle); | ||
}) | ||
.def("new_text", [](vs::LogWriter& self, const std::string& tag) { | ||
auto tablet = self.AddTablet(tag); | ||
return vs::components::Text(tablet); | ||
}); | ||
.def("new_text", | ||
[](vs::LogWriter& self, const std::string& tag) { | ||
auto tablet = self.AddTablet(tag); | ||
return vs::components::Text(tablet); | ||
}) | ||
.def("new_audio", | ||
[](vs::LogWriter& self, | ||
const std::string& tag, | ||
int num_samples, | ||
int step_cycle) { | ||
auto tablet = self.AddTablet(tag); | ||
return vs::components::Audio(tablet, num_samples, step_cycle); | ||
}); | ||
|
||
//------------------- components -------------------- | ||
#define ADD_SCALAR_READER(T) \ | ||
|
@@ -219,6 +233,61 @@ PYBIND11_MODULE(core, m) { | |
.def("total_records", &cp::TextReader::total_records) | ||
.def("size", &cp::TextReader::size); | ||
|
||
py::class_<cp::Audio>(m, "AudioWriter", R"pbdoc( | ||
PyBind class. Must instantiate through the LogWriter. | ||
)pbdoc") | ||
.def("set_caption", &cp::Audio::SetCaption, R"pbdoc( | ||
PyBind class. Must instantiate through the LogWriter. | ||
)pbdoc") | ||
.def("start_sampling", &cp::Audio::StartSampling, R"pbdoc( | ||
Start a sampling period, this interface will start a new reservoir sampling phase. | ||
)pbdoc") | ||
.def("is_sample_taken", &cp::Audio::IsSampleTaken, R"pbdoc( | ||
Will this sample be taken, this interface is introduced to reduce the cost | ||
of copy audio data, by testing whether this audio will be sampled, and only | ||
copy data when it should be sampled. In that way, most of un-sampled audio | ||
data need not be copied or processed at all. | ||
|
||
:return: Index | ||
:rtype: integer | ||
)pbdoc") | ||
.def("finish_sampling", &cp::Audio::FinishSampling, R"pbdoc( | ||
End a sampling period, it will clear all states for reservoir sampling. | ||
)pbdoc") | ||
.def("set_sample", &cp::Audio::SetSample, R"pbdoc( | ||
Store the flatten audio data with sample rate specified. | ||
|
||
:param index: | ||
:type index: integer | ||
:param sample_rate: Sample rate of audio | ||
:type sample_rate: integer | ||
:param audio_data: Flatten audio data | ||
:type audio_data: list | ||
)pbdoc") | ||
.def("add_sample", &cp::Audio::AddSample, R"pbdoc( | ||
A combined interface for is_sample_taken and set_sample, simpler but is less efficient. | ||
|
||
:param sample_rate: Sample rate of audio | ||
:type sample_rate: integer | ||
:param audio_data: Flatten audio data | ||
:type audio_data: list | ||
)pbdoc"); | ||
|
||
py::class_<cp::AudioReader::AudioRecord>(m, "AudioRecord") | ||
// TODO(ChunweiYan) make these copyless. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either remove the TODO or update it to yours There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ya |
||
.def("data", [](cp::AudioReader::AudioRecord& self) { return self.data; }) | ||
.def("sample_rate", | ||
[](cp::AudioReader::AudioRecord& self) { return self.sample_rate; }) | ||
.def("step_id", | ||
[](cp::AudioReader::AudioRecord& self) { return self.step_id; }); | ||
|
||
py::class_<cp::AudioReader>(m, "AudioReader") | ||
.def("caption", &cp::AudioReader::caption) | ||
.def("num_records", &cp::AudioReader::num_records) | ||
.def("num_samples", &cp::AudioReader::num_samples) | ||
.def("record", &cp::AudioReader::record) | ||
.def("timestamp", &cp::AudioReader::timestamp); | ||
|
||
#define ADD_HISTOGRAM_WRITER(T) \ | ||
py::class_<cp::Histogram<T>>(m, "HistogramWriter__" #T, \ | ||
R"pbdoc(PyBind class. Must instantiate through the LogWriter.)pbdoc") \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -222,11 +222,6 @@ void Image::SetSample(int index, | |
CHECK_LT(index, num_samples_); | ||
CHECK_LE(index, num_records_); | ||
|
||
// trick to store int8 to protobuf | ||
std::vector<byte_t> data_str(data.size()); | ||
for (int i = 0; i < data.size(); i++) { | ||
data_str[i] = data[i]; | ||
} | ||
Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]); | ||
NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]); | ||
|
||
|
@@ -352,6 +347,111 @@ std::string TextReader::caption() const { | |
|
||
size_t TextReader::size() const { return reader_.total_records(); } | ||
|
||
void Audio::StartSampling() { | ||
if (!ToSampleThisStep()) return; | ||
|
||
step_ = writer_.AddRecord(); | ||
step_.SetId(step_id_); | ||
|
||
time_t time = std::time(nullptr); | ||
step_.SetTimeStamp(time); | ||
|
||
// resize record | ||
for (int i = 0; i < num_samples_; i++) { | ||
step_.AddData(); | ||
} | ||
num_records_ = 0; | ||
} | ||
|
||
int Audio::IsSampleTaken() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor stuff, the function name is implying that the function will return a BOOL, but the function returns an index. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. true |
||
if (!ToSampleThisStep()) return -1; | ||
num_records_++; | ||
if (num_records_ <= num_samples_) { | ||
return num_records_ - 1; | ||
} | ||
float prob = float(num_samples_) / num_records_; | ||
float randv = (float)rand() / RAND_MAX; | ||
if (randv < prob) { | ||
// take this sample | ||
int index = rand() % num_samples_; | ||
return index; | ||
} | ||
return -1; | ||
} | ||
|
||
void Audio::FinishSampling() { | ||
step_id_++; | ||
if (ToSampleThisStep()) { | ||
writer_.parent()->PersistToDisk(); | ||
} | ||
} | ||
|
||
void Audio::AddSample(int sample_rate, const std::vector<value_t>& data) { | ||
auto idx = IsSampleTaken(); | ||
if (idx >= 0) { | ||
SetSample(idx, sample_rate, data); | ||
} | ||
} | ||
|
||
void Audio::SetSample(int index, | ||
int sample_rate, | ||
const std::vector<value_t>& data) { | ||
CHECK_GT(sample_rate, 0) | ||
<< "sample rate should be something like 6000, 8000 or 44100"; | ||
CHECK_LT(index, num_samples_) | ||
<< "index should be less than number of samples"; | ||
CHECK_LE(index, num_records_) | ||
<< "index should be less than or equal to number of records"; | ||
|
||
// convert float vector to char vector | ||
std::vector<char> data_str(data.size()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it seems that data_str can directly be a std::string data_str(data.size());
...
BinaryRecord brcd(xxdir, std::move(data_str)); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I end up just use |
||
for (int i = 0; i < data.size(); i++) { | ||
data_str[i] = data[i]; | ||
} | ||
|
||
BinaryRecord brcd(GenBinaryRecordDir(step_.parent()->dir()), | ||
std::string(data_str.data())); | ||
brcd.tofile(); | ||
|
||
auto entry = step_.MutableData<std::vector<byte_t>>(index); | ||
// update record | ||
auto old_hash = entry.reader().GetRaw(); | ||
if (!old_hash.empty()) { | ||
std::string old_path = | ||
GenBinaryRecordDir(step_.parent()->dir()) + "/" + old_hash; | ||
CHECK_EQ(std::remove(old_path.c_str()), 0) << "delete old binary record " | ||
<< old_path << " failed"; | ||
} | ||
entry.SetRaw(brcd.filename()); | ||
} | ||
|
||
std::string AudioReader::caption() { | ||
CHECK_EQ(reader_.captions().size(), 1); | ||
auto caption = reader_.captions().front(); | ||
if (LogReader::TagMatchMode(caption, mode_)) { | ||
return LogReader::GenReadableTag(mode_, caption); | ||
} | ||
string::TagDecode(caption); | ||
return caption; | ||
} | ||
|
||
AudioReader::AudioRecord AudioReader::record(int offset, int index) { | ||
AudioRecord res; | ||
auto record = reader_.record(offset); | ||
auto entry = record.data(index); | ||
auto filename = entry.GetRaw(); | ||
CHECK(!g_log_dir.empty()) | ||
<< "g_log_dir should be set in LogReader construction"; | ||
BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), filename); | ||
|
||
std::transform(brcd.data.begin(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is brcd.data the same as res.data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. brcd.data is the data in string format when we saved in file, when we read the data we convert to integer that becomes res.data |
||
brcd.data.end(), | ||
std::back_inserter(res.data), | ||
[](byte_t i) { return (int)(i); }); | ||
res.step_id = record.id(); | ||
return res; | ||
} | ||
|
||
} // namespace components | ||
|
||
} // namespace visualdl |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will it be weird to have documentations published on the website but not the code is not in the release pip? I am not sure what's the best approach here.