Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix ctc edit distance evaluator can not print information in v2 API. #3844

Merged
merged 3 commits into from
Sep 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 52 additions & 11 deletions paddle/gserver/evaluators/CTCErrorEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@ limitations under the License. */

#include "Evaluator.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "paddle/utils/StringUtil.h"

namespace paddle {

/**
* calculate sequence-to-sequence edit distance
*/
class CTCErrorEvaluator : public NotGetableEvaluator {
class CTCErrorEvaluator : public Evaluator {
private:
MatrixPtr outActivations_;
int numTimes_, numClasses_, numSequences_, blank_;
real deletions_, insertions_, substitutions_;
int seqClassficationError_;
mutable std::unordered_map<std::string, real> evalResults_;

std::vector<int> path2String(const std::vector<int>& path) {
std::vector<int> str;
Expand Down Expand Up @@ -183,6 +185,18 @@ class CTCErrorEvaluator : public NotGetableEvaluator {
return stringAlignment(gtStr, recogStr);
}

void storeLocalValues() const {
evalResults_["error"] = numSequences_ ? totalScore_ / numSequences_ : 0;
evalResults_["deletion_error"] =
numSequences_ ? deletions_ / numSequences_ : 0;
evalResults_["insertion_error"] =
numSequences_ ? insertions_ / numSequences_ : 0;
evalResults_["substitution_error"] =
numSequences_ ? substitutions_ / numSequences_ : 0;
evalResults_["sequence_error"] =
(real)seqClassficationError_ / numSequences_;
}

public:
CTCErrorEvaluator()
: numTimes_(0),
Expand Down Expand Up @@ -245,16 +259,12 @@ class CTCErrorEvaluator : public NotGetableEvaluator {
}

virtual void printStats(std::ostream& os) const {
os << config_.name() << "="
<< (numSequences_ ? totalScore_ / numSequences_ : 0);
os << " deletions error"
<< "=" << (numSequences_ ? deletions_ / numSequences_ : 0);
os << " insertions error"
<< "=" << (numSequences_ ? insertions_ / numSequences_ : 0);
os << " substitutions error"
<< "=" << (numSequences_ ? substitutions_ / numSequences_ : 0);
os << " sequences error"
<< "=" << (real)seqClassficationError_ / numSequences_;
storeLocalValues();
os << config_.name() << " error = " << evalResults_["error"];
os << " deletions error = " << evalResults_["deletion_error"];
os << " insertions error = " << evalResults_["insertion_error"];
os << " substitution error = " << evalResults_["substitution_error"];
os << " sequence error = " << evalResults_["sequence_error"];
}

virtual void distributeEval(ParameterClient2* client) {
Expand All @@ -272,6 +282,37 @@ class CTCErrorEvaluator : public NotGetableEvaluator {
seqClassficationError_ = (int)buf[4];
numSequences_ = (int)buf[5];
}

void getNames(std::vector<std::string>* names) {
storeLocalValues();
names->reserve(names->size() + evalResults_.size());
for (auto it = evalResults_.begin(); it != evalResults_.end(); ++it) {
names->push_back(config_.name() + "." + it->first);
}
}

real getValue(const std::string& name, Error* err) const {
storeLocalValues();

std::vector<std::string> buffers;
paddle::str::split(name, '.', &buffers);
auto it = evalResults_.find(buffers[buffers.size() - 1]);

if (it == evalResults_.end()) {
*err = Error("Evaluator does not have the key %s", name.c_str());
return 0.0f;
}

return it->second;
}

std::string getType(const std::string& name, Error* err) const {
this->getValue(name, err);
if (!err->isOK()) {
return "";
}
return "ctc_edit_distance";
}
};

REGISTER_EVALUATOR(ctc_edit_distance, CTCErrorEvaluator);
Expand Down
8 changes: 7 additions & 1 deletion paddle/gserver/evaluators/ChunkEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,13 @@ class ChunkEvaluator : public Evaluator {
}

// get type of evaluator
std::string getTypeImpl() const { return "chunk"; }
std::string getType(const std::string& name, Error* err) const {
this->getValue(name, err);
if (!err->isOK()) {
return "";
}
return "chunk";
}

private:
void storeLocalValues() const {
Expand Down
13 changes: 8 additions & 5 deletions paddle/gserver/evaluators/Evaluator.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ class NotGetableEvaluator : public Evaluator {
*err = Error("Not implemented");
return .0f;
}

std::string getType(const std::string& name, Error* err) const {
*err = Error("Not implemented");
return "";
Expand Down Expand Up @@ -331,6 +332,7 @@ class RankAucEvaluator : public Evaluator {
protected:
std::string getTypeImpl() const;
};

/**
* @brief precision, recall and f1 score Evaluator
* \f[
Expand Down Expand Up @@ -358,6 +360,12 @@ class PrecisionRecallEvaluator : public Evaluator {

virtual void distributeEval(ParameterClient2* client);

void getNames(std::vector<std::string>* names);

real getValue(const std::string& name, Error* err) const;

std::string getType(const std::string& name, Error* err) const;

struct StatsInfo {
/// numbers of true positives
double TP;
Expand Down Expand Up @@ -428,11 +436,6 @@ class PrecisionRecallEvaluator : public Evaluator {
mutable std::unordered_map<std::string, real> values_;

void storeLocalValues() const;
// Evaluator interface
public:
void getNames(std::vector<std::string>* names);
real getValue(const std::string& name, Error* err) const;
std::string getType(const std::string& name, Error* err) const;
};

/*
Expand Down