Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement IsDeterministic and IsEpsilonFree, write tests #4

Merged
merged 1 commit into from
Apr 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,12 @@

# emacs save files
*.~

# Files generated by ctags
CTAGS
GTAGS
GRTAGS
GSYMS
GPATH
tags
TAGS
14 changes: 9 additions & 5 deletions k2/csrc/fsa.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

namespace k2 {

using Label = int32_t;
using StateId = int32_t;
using Weight = float;
qindazhu marked this conversation as resolved.
Show resolved Hide resolved

enum {
kFinalSymbol = -1, // final-costs are represented as arcs with
// kFinalSymbol as their label, to the final
Expand All @@ -31,9 +35,9 @@ struct Range {
};

struct Arc {
int32_t src_state;
int32_t dest_state;
int32_t label; // 'label' as in a finite state acceptor.
StateId src_state;
StateId dest_state;
Label label; // 'label' as in a finite state acceptor.
// For FSTs, the other label will be present in the
// aux_label array. Which of the two represents the input
// vs. the output can be decided by the user; in general,
Expand Down Expand Up @@ -91,7 +95,7 @@ struct Fsa {

*/
struct DenseFsa {
float* weights; // Would typically be a log-prob or unnormalized log-prob
Weight* weights; // Would typically be a log-prob or unnormalized log-prob
int32_t T; // The number of time steps == rows in the matrix `weights`;
// this FSA has T + 2 states, see explanation above.
int32_t num_symbols; // The number of symbols == columns in the matrix
Expand All @@ -105,7 +109,7 @@ struct DenseFsa {
CAUTION: we may later enforce that stride == num_symbols, in order to
be able to know the layout of a phantom matrix of arcs. (?)
*/
DenseFsa(float* data, int32_t T, int32_t num_symbols, int32_t stride);
DenseFsa(Weight* data, int32_t T, int32_t num_symbols, int32_t stride);
};

/*
Expand Down
53 changes: 37 additions & 16 deletions k2/csrc/properties.cc
Original file line number Diff line number Diff line change
@@ -1,36 +1,57 @@
// k2/csrc/properties.cc

// Copyright (c) 2020 Daniel Povey
// Copyright (c) 2020 Haowen Qiu
// Daniel Povey

// See ../../LICENSE for clarification regarding multiple authors

#include "k2/csrc/properties.h"
#include <unordered_set>

#include "k2/csrc/properties.h"
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
#include "k2/csrc/fsa.h"

namespace k2 {

bool IsTopSorted(const Fsa& fsa) {
for (const auto& range : fsa.leaving_arcs) {
for (auto arc_idx = range.begin; arc_idx < range.end; ++arc_idx) {
const Arc& arc = fsa.arcs[arc_idx];
if (arc.dest_state < arc.src_state) {
return false;
}
bool IsTopSorted(const Fsa &fsa) {
for (auto &arc : fsa.arcs) {
if (arc.dest_state < arc.src_state) {
return false;
}
}
return true;
}

bool HasSelfLoops(const Fsa& fsa) {
// TODO(haowen): refactor code below as we have
// so many for-for-loop structures
for (const auto& range : fsa.leaving_arcs) {
for (auto arc_idx = range.begin; arc_idx < range.end; ++arc_idx) {
const Arc& arc = fsa.arcs[arc_idx];
if (arc.dest_state == arc.src_state) {
bool HasSelfLoops(const Fsa &fsa) {
for (auto &arc : fsa.arcs) {
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
if (arc.dest_state == arc.src_state) {
return true;
}
}
return false;
}

bool IsDeterministic(const Fsa &fsa) {
std::unordered_set<Label> labels;
StateId state = 0;
for (auto &arc : fsa.arcs) {
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
if (arc.src_state == state) {
if (labels.find(arc.label) != labels.end()) {
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
return false;
}
labels.insert(arc.label);
} else {
state = arc.src_state;
labels.clear();
labels.insert(arc.label);
}
}
return true;
}

bool IsEpsilonFree(const Fsa &fsa) {
for (auto &arc : fsa.arcs) {
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
if (arc.label == kEpsilon) {
return false;
}
}
return true;
Expand Down
95 changes: 72 additions & 23 deletions k2/csrc/properties_test.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// k2/csrc/properties_test.cc

// Copyright (c) 2020 Haowen Qiue
// Copyright (c) 2020 Haowen Qiu
// Fangjun Kuang ([email protected])

// See ../../LICENSE for clarification regarding multiple authors
Expand All @@ -17,35 +17,84 @@ using k2::Fsa;
using k2::Range;

// TODO(haowen): create Fsa examples in a more elegant way (add methods
// addState, addArc, etc.)
static Fsa CreateNonTopSortedFsaExample() {
std::vector<Arc> arcs = {
{0, 2, 0},
{2, 1, 0},
{0, 1, 0},
};
std::vector<Range> leaving_arcs = {
{0, 1},
{1, 2},
{2, 3},
};
// addState, addArc, etc.) and use Test Fixtures by constructing
// reusable FSA examples.
TEST(Properties, IsNotTopSorted) {
std::vector<Arc> arcs = {{0, 1, 0}, {0, 2, 0}, {2, 1, 0}, };
std::vector<Range> leaving_arcs = {{0, 2}, {2, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
return fsa;
bool sorted = IsTopSorted(fsa);
EXPECT_FALSE(sorted);
}

TEST(Properties, IsTopSorted) {
Fsa fsa = CreateNonTopSortedFsaExample();
std::vector<Arc> arcs = {{0, 1, 0}, {0, 2, 0}, {1, 2, 0}, };
std::vector<Range> leaving_arcs = {{0, 2}, {2, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
bool sorted = IsTopSorted(fsa);
EXPECT_FALSE(sorted);
EXPECT_TRUE(sorted);
}

TEST(Properties, HasNotSelfLoops) {
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
std::vector<Arc> arcs = {{0, 1, 0}, {0, 2, 0}, {1, 2, 0}, };
std::vector<Range> leaving_arcs = {{0, 2}, {2, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
bool hasSelfLoops = HasSelfLoops(fsa);
EXPECT_FALSE(hasSelfLoops);
}

std::vector<Arc> arcs = {
{0, 1, 0},
{1, 2, 0},
{0, 2, 0},
};
TEST(Properties, HasSelfLoops) {
std::vector<Arc> arcs = {{0, 1, 0}, {1, 2, 0}, {1, 1, 0}, };
std::vector<Range> leaving_arcs = {{0, 1}, {1, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
sorted = IsTopSorted(fsa);
EXPECT_TRUE(sorted);
bool hasSelfLoops = HasSelfLoops(fsa);
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_TRUE(hasSelfLoops);
}

TEST(Properties, IsNotDeterministic) {
std::vector<Arc> arcs = {{0, 1, 2}, {1, 2, 0}, {1, 3, 0}, };
std::vector<Range> leaving_arcs = {{0, 1}, {1, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
bool isDeterministic = IsDeterministic(fsa);
qindazhu marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_FALSE(isDeterministic);
}

TEST(Properties, IsDeterministic) {
std::vector<Arc> arcs = {{0, 1, 2}, {1, 2, 0}, {1, 3, 2}, };
std::vector<Range> leaving_arcs = {{0, 1}, {1, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
bool isDeterministic = IsDeterministic(fsa);
EXPECT_TRUE(isDeterministic);
}

TEST(Properties, IsNotEpsilonFree) {
std::vector<Arc> arcs = {{0, 1, 2}, {0, 2, 0}, {1, 2, 1}, };
std::vector<Range> leaving_arcs = {{0, 2}, {2, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
bool isEpsilonFree = IsEpsilonFree(fsa);
EXPECT_FALSE(isEpsilonFree);
}

TEST(Properties, IsEpsilonFree) {
std::vector<Arc> arcs = {{0, 1, 2}, {0, 2, 1}, {1, 2, 1}, };
std::vector<Range> leaving_arcs = {{0, 2}, {2, 3}, };
Fsa fsa;
fsa.leaving_arcs = leaving_arcs;
fsa.arcs = arcs;
bool isEpsilonFree = IsEpsilonFree(fsa);
EXPECT_TRUE(isEpsilonFree);
}