Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Audio Feature in SDK #344

Merged
merged 5 commits into from
Mar 29, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 75 additions & 6 deletions visualdl/logic/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,14 @@ PYBIND11_MODULE(core, m) {
auto tablet = self.tablet(tag);
return vs::components::ImageReader(self.mode(), tablet);
})
.def("get_text", [](vs::LogReader& self, const std::string& tag) {
.def("get_text",
[](vs::LogReader& self, const std::string& tag) {
auto tablet = self.tablet(tag);
return vs::components::TextReader(tablet);
})
.def("get_audio", [](vs::LogReader& self, const std::string& tag) {
auto tablet = self.tablet(tag);
return vs::components::TextReader(tablet);
return vs::components::AudioReader(self.mode(), tablet);
});

// clang-format on
Expand Down Expand Up @@ -119,10 +124,19 @@ PYBIND11_MODULE(core, m) {
auto tablet = self.AddTablet(tag);
return vs::components::Image(tablet, num_samples, step_cycle);
})
.def("new_text", [](vs::LogWriter& self, const std::string& tag) {
auto tablet = self.AddTablet(tag);
return vs::components::Text(tablet);
});
.def("new_text",
[](vs::LogWriter& self, const std::string& tag) {
auto tablet = self.AddTablet(tag);
return vs::components::Text(tablet);
})
.def("new_audio",
[](vs::LogWriter& self,
const std::string& tag,
int num_samples,
int step_cycle) {
auto tablet = self.AddTablet(tag);
return vs::components::Audio(tablet, num_samples, step_cycle);
});

//------------------- components --------------------
#define ADD_SCALAR_READER(T) \
Expand Down Expand Up @@ -219,6 +233,61 @@ PYBIND11_MODULE(core, m) {
.def("total_records", &cp::TextReader::total_records)
.def("size", &cp::TextReader::size);

py::class_<cp::Audio>(m, "AudioWriter", R"pbdoc(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will it be weird to have documentations published on the website but not the code is not in the release pip? I am not sure what's the best approach here.

PyBind class. Must instantiate through the LogWriter.
)pbdoc")
.def("set_caption", &cp::Audio::SetCaption, R"pbdoc(
PyBind class. Must instantiate through the LogWriter.
)pbdoc")
.def("start_sampling", &cp::Audio::StartSampling, R"pbdoc(
Start a sampling period, this interface will start a new reservoir sampling phase.
)pbdoc")
.def("is_sample_taken", &cp::Audio::IsSampleTaken, R"pbdoc(
Will this sample be taken, this interface is introduced to reduce the cost
of copy audio data, by testing whether this audio will be sampled, and only
copy data when it should be sampled. In that way, most of un-sampled audio
data need not be copied or processed at all.

:return: Index
:rtype: integer
)pbdoc")
.def("finish_sampling", &cp::Audio::FinishSampling, R"pbdoc(
End a sampling period, it will clear all states for reservoir sampling.
)pbdoc")
.def("set_sample", &cp::Audio::SetSample, R"pbdoc(
Store the flatten audio data with sample rate specified.

:param index:
:type index: integer
:param sample_rate: Sample rate of audio
:type sample_rate: integer
:param audio_data: Flatten audio data
:type audio_data: list
)pbdoc")
.def("add_sample", &cp::Audio::AddSample, R"pbdoc(
A combined interface for is_sample_taken and set_sample, simpler but is less efficient.

:param sample_rate: Sample rate of audio
:type sample_rate: integer
:param audio_data: Flatten audio data
:type audio_data: list
)pbdoc");

py::class_<cp::AudioReader::AudioRecord>(m, "AudioRecord")
// TODO(ChunweiYan) make these copyless.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either remove the TODO or update it to yours

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ya

.def("data", [](cp::AudioReader::AudioRecord& self) { return self.data; })
.def("sample_rate",
[](cp::AudioReader::AudioRecord& self) { return self.sample_rate; })
.def("step_id",
[](cp::AudioReader::AudioRecord& self) { return self.step_id; });

py::class_<cp::AudioReader>(m, "AudioReader")
.def("caption", &cp::AudioReader::caption)
.def("num_records", &cp::AudioReader::num_records)
.def("num_samples", &cp::AudioReader::num_samples)
.def("record", &cp::AudioReader::record)
.def("timestamp", &cp::AudioReader::timestamp);

#define ADD_HISTOGRAM_WRITER(T) \
py::class_<cp::Histogram<T>>(m, "HistogramWriter__" #T, \
R"pbdoc(PyBind class. Must instantiate through the LogWriter.)pbdoc") \
Expand Down
110 changes: 105 additions & 5 deletions visualdl/logic/sdk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,6 @@ void Image::SetSample(int index,
CHECK_LT(index, num_samples_);
CHECK_LE(index, num_records_);

// trick to store int8 to protobuf
std::vector<byte_t> data_str(data.size());
for (int i = 0; i < data.size(); i++) {
data_str[i] = data[i];
}
Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]);
NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]);

Expand Down Expand Up @@ -352,6 +347,111 @@ std::string TextReader::caption() const {

size_t TextReader::size() const { return reader_.total_records(); }

void Audio::StartSampling() {
if (!ToSampleThisStep()) return;

step_ = writer_.AddRecord();
step_.SetId(step_id_);

time_t time = std::time(nullptr);
step_.SetTimeStamp(time);

// resize record
for (int i = 0; i < num_samples_; i++) {
step_.AddData();
}
num_records_ = 0;
}

int Audio::IsSampleTaken() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor stuff, the function name is implying that the function will return a BOOL, but the function returns an index.
Maybe rename the function to NextRandSampleIndex or provide a comment here to explain the logic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true

if (!ToSampleThisStep()) return -1;
num_records_++;
if (num_records_ <= num_samples_) {
return num_records_ - 1;
}
float prob = float(num_samples_) / num_records_;
float randv = (float)rand() / RAND_MAX;
if (randv < prob) {
// take this sample
int index = rand() % num_samples_;
return index;
}
return -1;
}

void Audio::FinishSampling() {
step_id_++;
if (ToSampleThisStep()) {
writer_.parent()->PersistToDisk();
}
}

void Audio::AddSample(int sample_rate, const std::vector<value_t>& data) {
auto idx = IsSampleTaken();
if (idx >= 0) {
SetSample(idx, sample_rate, data);
}
}

void Audio::SetSample(int index,
int sample_rate,
const std::vector<value_t>& data) {
CHECK_GT(sample_rate, 0)
<< "sample rate should be something like 6000, 8000 or 44100";
CHECK_LT(index, num_samples_)
<< "index should be less than number of samples";
CHECK_LE(index, num_records_)
<< "index should be less than or equal to number of records";

// convert float vector to char vector
std::vector<char> data_str(data.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems that data_str can directly be a string and no need to tranform from vector to string again.

std::string data_str(data.size());
...
BinaryRecord brcd(xxdir, std::move(data_str));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I end up just use std::string(data.begin(),data.end()) to directly convert the data vector to string

for (int i = 0; i < data.size(); i++) {
data_str[i] = data[i];
}

BinaryRecord brcd(GenBinaryRecordDir(step_.parent()->dir()),
std::string(data_str.data()));
brcd.tofile();

auto entry = step_.MutableData<std::vector<byte_t>>(index);
// update record
auto old_hash = entry.reader().GetRaw();
if (!old_hash.empty()) {
std::string old_path =
GenBinaryRecordDir(step_.parent()->dir()) + "/" + old_hash;
CHECK_EQ(std::remove(old_path.c_str()), 0) << "delete old binary record "
<< old_path << " failed";
}
entry.SetRaw(brcd.filename());
}

std::string AudioReader::caption() {
CHECK_EQ(reader_.captions().size(), 1);
auto caption = reader_.captions().front();
if (LogReader::TagMatchMode(caption, mode_)) {
return LogReader::GenReadableTag(mode_, caption);
}
string::TagDecode(caption);
return caption;
}

AudioReader::AudioRecord AudioReader::record(int offset, int index) {
AudioRecord res;
auto record = reader_.record(offset);
auto entry = record.data(index);
auto filename = entry.GetRaw();
CHECK(!g_log_dir.empty())
<< "g_log_dir should be set in LogReader construction";
BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), filename);

std::transform(brcd.data.begin(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is brcd.data the same as res.data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

brcd.data is the data in string format when we saved in file, when we read the data we convert to integer that becomes res.data

brcd.data.end(),
std::back_inserter(res.data),
[](byte_t i) { return (int)(i); });
res.step_id = record.id();
return res;
}

} // namespace components

} // namespace visualdl
110 changes: 109 additions & 1 deletion visualdl/logic/sdk.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ struct Image {

/*
* A combined interface for IsSampleTaken and SetSample, simpler but might be
* low effience.
* low efficiency.
*/
void AddSample(const std::vector<shape_t>& shape,
const std::vector<value_t>& data);
Expand Down Expand Up @@ -326,6 +326,114 @@ struct TextReader {
TabletReader reader_;
};

/*
* Image component writer.
*/
struct Audio {
using value_t = float;

/*
* step_cycle: store every `step_cycle` as a record.
* num_samples: how many samples to take in a step.
*/
Audio(Tablet tablet, int num_samples, int step_cycle)
: writer_(tablet), num_samples_(num_samples), step_cycle_(step_cycle) {
CHECK_GT(step_cycle, 0);
CHECK_GT(num_samples, 0);

writer_.SetType(Tablet::Type::kAudio);
// make audio's tag as the default caption.
writer_.SetNumSamples(num_samples);
SetCaption(tablet.reader().tag());
}

void SetCaption(const std::string& c) {
writer_.SetCaptions(std::vector<std::string>({c}));
}

/*
* Start a sampling period, this interface will start a new reservior sampling
* phase.
*/
void StartSampling();
/*
* End a sampling period, it will clear all states for reservior sampling.
*/
void FinishSampling();

/*
* A combined interface for IsSampleTaken and SetSample, simpler but might be
* low efficiency.
*/
void AddSample(int sample_rate, const std::vector<value_t>& data);

/*
* Will this sample be taken, this interface is introduced to reduce the cost
* of copy audio data, by testing whether this audio will be sampled, and only
* copy data when it should be sampled. In that way, most of unsampled audio
* data need not be copied or processed at all.
*/
int IsSampleTaken();
/*
* Store audio data with sample rate
*/
void SetSample(int index, int sample_rate, const std::vector<value_t>& data);

protected:
bool ToSampleThisStep() { return step_id_ % step_cycle_ == 0; }

private:
Tablet writer_;
Record step_;
int num_records_{0};
int num_samples_{0};
int step_id_{0};
int step_cycle_;
};

/*
* Audio reader.
*/
struct AudioReader {
using value_t = typename Audio::value_t;

struct AudioRecord {
int step_id;
int sample_rate;
std::vector<int> data;
};

AudioReader(const std::string& mode, TabletReader tablet)
: reader_(tablet), mode_{mode} {}

std::string caption();

// number of steps.
int num_records() { return reader_.total_records(); }

int num_samples() { return reader_.num_samples(); }

int64_t timestamp(int step) { return reader_.record(step).timestamp(); }

/*
* offset: offset of a step.
* index: index of a sample.
*/
AudioRecord record(int offset, int index);

/*
* offset: offset of a step.
* index: index of a sample.
*/
std::vector<value_t> data(int offset, int index);

int stepid(int offset, int index);

private:
TabletReader reader_;
std::string mode_;
};

} // namespace components
} // namespace visualdl

Expand Down
26 changes: 26 additions & 0 deletions visualdl/python/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ def text(self, tag):
check_tag_name_valid(tag)
return self.reader.get_text(tag)

def audio(self, tag):
"""
Get an audio reader with tag

:param tag: The reader will read the audio data marked with tag
:type tag: basestring
"""
check_tag_name_valid(tag)
return self.reader.get_audio(tag)

def __enter__(self):
return self

Expand Down Expand Up @@ -226,6 +236,22 @@ def histogram(self, tag, num_buckets, type='float'):
}
return types[type](tag, num_buckets)

def audio(self, tag, num_samples, step_cycle=1):
"""
Create an audio writer that used to write audio data.

:param tag: The audio writer will label the audio with tag
:type tag: basestring
:param num_samples: how many samples to take in a step.
:type num_samples: integer
:param step_cycle: store every `step_cycle` as a record.
:type step_cycle: integer
:return: A audio writer to sample audio
:rtype: AudioWriter
"""
check_tag_name_valid(tag)
return self.writer.new_audio(tag, num_samples, step_cycle)

def text(self, tag):
check_tag_name_valid(tag)
return self.writer.new_text(tag)
Expand Down
1 change: 1 addition & 0 deletions visualdl/storage/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ message Tablet {
kHistogram = 1;
kImage = 2;
kText = 3;
kAudio = 4;
}
// The unique identification for this `Tablet`. VisualDL will have no the
// concept of FileWriter like TB. It will store all the tablets in a single
Expand Down
Loading