Skip to content

Commit

Permalink
remove arcs and arc_indexes in fsa and add some functions in Array3
Browse files Browse the repository at this point in the history
  • Loading branch information
qindazhu committed Jul 3, 2020
1 parent 3e725be commit 7bc217e
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 782 deletions.
89 changes: 75 additions & 14 deletions k2/csrc/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ struct Array2 {

// as we require `indexes[0] == 0` if Array2 is empty,
// the implementation of `begin` and `end` would be fine for empty object.
PtrT begin() { return data + indexes[0]; }
const PtrT begin() const { return data + indexes[0]; }
PtrT begin() { return data + indexes[0]; } // NOLINT
const PtrT begin() const { return data + indexes[0]; } // NOLINT

PtrT end() { return data + indexes[size1]; }
const PtrT end() const { return data + indexes[size1]; }
PtrT end() { return data + indexes[size1]; } // NOLINT
const PtrT end() const { return data + indexes[size1]; } // NOLINT

// just to replace `Swap` functions for Fsa and AuxLabels for now,
// may delete it if we finally find that we don't need to call it.
Expand All @@ -161,24 +161,33 @@ struct Array3 {
using IndexT = I;
using PtrT = Ptr;

IndexT size;
IndexT *indexes1; // indexes1[0,1,...size] should be defined; note,
// this means the array must be of at least size+1.
IndexT size1; // equal to the number of Array2 object in this Array3 object;
// `size1 + 1` will be the number of elements in indexes1.

IndexT size2; // equal to indexes1[size1] - indexes1[0];
// `size2 + 1` will be the number of elements in indexes2;

IndexT size3; // the number of elements in `data`, equal to
// indexes2[indexes1[size1]] - indexes2[indexes1[0]].

IndexT *indexes1; // indexes1[0,1,...size1] should be defined; note,
// this means the array must be of at least size1+1.
// We require that indexes[i] <= indexes[i+1], but it
// is not required that indexes[0] == 0, it may be
// greater than 0.
// is not required that indexes[0] == 0, it may be greater
// than 0.

IndexT *indexes2; // indexes2[indexes1[0]]
// .. indexes2[indexes1[size]-1] should be defined.
// .. indexes2[indexes1[size1]] should be defined;
// note, this means the array must be of at least size2+1.

Ptr data; // `data` might be an actual pointer, or might be some object
// supporting operator []. data[indexes2[indexes1[0]]] through
// data[indexes2[indexes1[size] - 1]] must be accessible through
// this object.
// data[indexes2[indexes1[size1]] - 1] must be accessible
// through this object.

Array2<Ptr, I> operator[](I i) const {
DCHECK_GE(i, 0);
DCHECK_LT(i, size);
DCHECK_LT(i, size1);

Array2<Ptr, I> array;
array.size1 = indexes1[i + 1] - indexes1[i];
Expand All @@ -187,6 +196,59 @@ struct Array3 {
array.data = data;
return array;
}

/*
Set `size1`, `size2` and `size3` so that we can know how much memory we need
to allocate for `indexes1`, `indexes2` and `data` to represent the vector
of Array2 as an Array3.
@param [in] arrays A vector of Array2;
@param [in] array_size The number element of vector `arrays`
*/
void GetSizes(const Array2<Ptr, I> *arrays, I array_size) {
size1 = array_size;
size2 = size3 = 0;
for (I i = 0; i != array_size; ++i) {
size2 += arrays[i].size1;
size3 += arrays[i].size2;
}
}

/*
Create Array3 from the vector of Array2. `size1`, `size2` and `size3` must
have been set by calling `GetSizes` above, and the memory of `indexes1`,
`indexes2`and `data` must have been allocated according to those size.
@param [in] arrays A vector of Array2;
@param [in] array_size The number element of vector `arrays`
*/
void Create(const Array2<Ptr, I> *arrays, I array_size) {
CHECK_EQ(size1, array_size);
I size2_tmp = 0, size3_tmp = 0;
for (I i = 0; i != array_size; ++i) {
const auto &curr_array = arrays[i];

indexes1[i] = size2_tmp;

// copy indexes
CHECK_LE(size2_tmp + curr_array.size1, size2);
I begin_index = curr_array.indexes[0]; // indexes[0] is always valid and
// may be greater than 0
for (I j = 0; j != curr_array.size1; ++j) {
indexes2[size2_tmp++] = size3_tmp + curr_array.indexes[j] - begin_index;
}

// copy data
CHECK_LE(size3_tmp + curr_array.size2, size3);
for (I n = 0; n != curr_array.size2; ++n) {
data[size3_tmp + n] = curr_array.data[n + begin_index];
}
size3_tmp += curr_array.size2;
}
CHECK_EQ(size2_tmp, size2);
CHECK_EQ(size3_tmp, size3);

indexes1[size1] = size2_tmp;
indexes2[indexes1[size1]] = size3_tmp;
}
};

// Note: we can create Array4 later if we need it.
Expand Down Expand Up @@ -261,7 +323,6 @@ struct Array2Storage {

namespace std {
template <typename T, typename I>

struct iterator_traits<k2::StridedPtr<T, I>> {
typedef T value_type;
};
Expand Down
92 changes: 90 additions & 2 deletions k2/csrc/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "k2/csrc/array.h"

#include <iterator>
#include <memory>
#include <numeric>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -132,10 +133,97 @@ void TestArray2(int32_t stride) {
}
}

TEST(Array2Test, RawPointer) { TestArray2<int32_t *, int32_t>(1); }
template <typename Ptr, typename IndexType>
void TestArray3(int32_t stride) {
using ValueType = typename std::iterator_traits<Ptr>::value_type;

Array2Size<IndexType> size1 = {4, 10};
std::vector<IndexType> indexes1 = {0, 3, 5, 9, 10};
std::vector<ValueType> data1(size1.size2);
std::iota(data1.begin(), data1.end(), 0);
Array2Storage<Ptr, IndexType> storage1(size1, stride);
storage1.FillIndexes(indexes1);
storage1.FillData(data1);
Array2<Ptr, IndexType> &array1 = storage1.GetArray2();
EXPECT_EQ(array1.data[array1.indexes[0]], 0);

Array2Size<IndexType> size2 = {3, 10};
// note indexes2[0] starts from 3 instead of 0
std::vector<IndexType> indexes2 = {3, 5, 8, 10};
std::vector<ValueType> data2(10); // 10 instead of 7 here on purpose
std::iota(data2.begin(), data2.end(), 0);
Array2Storage<Ptr, IndexType> storage2(size2, stride);
storage2.FillIndexes(indexes2);
storage2.FillData(data2);
Array2<Ptr, IndexType> &array2 = storage2.GetArray2();
array2.size2 = 7; // change the size to the correct value
EXPECT_EQ(array2.data[array2.indexes[0]], 3);

std::vector<Array2<Ptr, IndexType>> arrays;
arrays.emplace_back(array1);
arrays.emplace_back(array2);

Array3<Ptr, IndexType> array3;
array3.GetSizes(arrays.data(), 2);
EXPECT_EQ(array3.size1, 2);
EXPECT_EQ(array3.size2, 7);
EXPECT_EQ(array3.size3, 17);

// Test Array3 Creation
std::vector<IndexType> array3_indexes1(array3.size1 + 1);
std::vector<IndexType> array3_indexes2(array3.size2 + 1);
std::unique_ptr<ValueType[]> array3_data(
new ValueType[array3.size3 * stride]);
array3.indexes1 = array3_indexes1.data();
array3.indexes2 = array3_indexes2.data();
array3.data = DataPtrCreator<Ptr, IndexType>::Create(array3_data, stride);

array3.Create(arrays.data(), 2);
EXPECT_THAT(array3_indexes1, ::testing::ElementsAre(0, 4, 7));
EXPECT_THAT(array3_indexes2,
::testing::ElementsAre(0, 3, 5, 9, 10, 12, 15, 17));
for (auto i = array1.indexes[0]; i != array1.indexes[array1.size1]; ++i) {
EXPECT_EQ(array3.data[i], array1.data[i]);
}
EXPECT_EQ(array2.indexes[0], 3);
for (auto i = array2.indexes[0]; i != array2.indexes[array2.size1]; ++i) {
EXPECT_EQ(array3.data[array1.size2 + i - array2.indexes[0]],
array2.data[i]);
}

// Test Array3's operator[]
Array2<Ptr, IndexType> array1_copy = array3[0];
EXPECT_EQ(array1_copy.size1, array1.size1);
EXPECT_EQ(array1_copy.size2, array1.size2);
for (auto i = 0; i != array1.size1 + 1; ++i) {
EXPECT_EQ(array1_copy.indexes[i], array1.indexes[i]);
}
for (auto i = array1.indexes[0]; i != array1.indexes[array1.size1]; ++i) {
EXPECT_EQ(array1_copy.data[i], array1.data[i]);
}

Array2<Ptr, IndexType> array2_copy = array3[1];
EXPECT_EQ(array2_copy.size1, array2.size1);
EXPECT_EQ(array2_copy.size2, array2.size2);
for (auto i = 0; i != array2.size1 + 1; ++i) {
// output indexes may starts from n > 0
EXPECT_EQ(array2_copy.indexes[i],
array2.indexes[i] + array1.size1 + array2.indexes[0]);
}
for (auto i = array2.indexes[0]; i != array2.indexes[array2.size1]; ++i) {
EXPECT_EQ(array1_copy.data[i + array1.size2 - array2.indexes[0]],
array1.data[i]);
}
}

TEST(ArrayTest, RawPointer) {
TestArray2<int32_t *, int32_t>(1);
TestArray3<int32_t *, int32_t>(1);
}

TEST(Array2Test, StridedPtr) {
TEST(ArrayTest, StridedPtr) {
TestArray2<StridedPtr<int32_t, int32_t>, int32_t>(2);
TestArray3<StridedPtr<int32_t, int32_t>, int32_t>(2);
}

} // namespace k2
Loading

0 comments on commit 7bc217e

Please sign in to comment.