Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

Commit

Permalink
Adding operator+[=] and operator/[=] for FloatingArray and IntegerArray.
Browse files Browse the repository at this point in the history
This includes a design change that obviates the need for an ArrayView.
Instead, every array has an internal offset. Shallow copy is achieved by copy
constructor, though the current set of copy constructors don't yet
support a slice. Deep copy is still achieved through the Copy virtual
function.

More detailed explanation of the changes:

* Adding copy/move constructors for {Floating,Integer,Numeric}Array
* Adding various method for marking/getting nulls (valid bits) in
  integer arrays
* Changing data() and mutable_data() in NumericArray so that they
  return a pointer that starts at the array's offset
* Addition of Addable/Divisable classes (similar to Boost operators)
  for easy support of operator[+/]
* Unit test scaffolding for testing permutations of left/right hand
  side types on arithmetic operators
* Implementing IntegerArray::operator/, IntegerArray::operator+=
  FloatingArray::operator/=, FloatingArray::operator+=
  • Loading branch information
joshuastorck committed Oct 27, 2016
1 parent 29df124 commit bad99a1
Show file tree
Hide file tree
Showing 12 changed files with 607 additions and 56 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ endif()
# GCC cannot always verify whether strict aliasing rules are indeed followed due to
# fundamental limitations in escape analysis, which can result in subtle bad code generation.
# This has a small perf hit but worth it to avoid hard to debug crashes.
set(CXX_COMMON_FLAGS "-std=c++11 -fno-strict-aliasing -msse4.2 -Wall -Wno-sign-compare -Wno-deprecated -pthread -D__STDC_FORMAT_MACROS")
set(CXX_COMMON_FLAGS "-std=c++1y -fno-strict-aliasing -msse4.2 -Wall -Wno-sign-compare -Wno-deprecated -pthread -D__STDC_FORMAT_MACROS")

# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .')
# For all builds:
Expand Down
2 changes: 1 addition & 1 deletion pandas/native.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ cdef extern from "pandas/api.h" namespace "pandas":
c_bool Equals(const DataType& other)
string ToString()

ctypedef shared_ptr[DataType] TypePtr
ctypedef shared_ptr[const DataType] TypePtr

cdef cppclass Int8Type(DataType):
pass
Expand Down
2 changes: 1 addition & 1 deletion pandas/native.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ cdef Array wrap_array(const lp.ArrayPtr& arr):

cdef PandasType wrap_type(const lp.TypePtr& sp_type):
cdef:
lp.DataType* type = sp_type.get()
const lp.DataType* type = sp_type.get()
PandasType result

if type.type() == lp.TypeId_CATEGORY:
Expand Down
171 changes: 171 additions & 0 deletions src/pandas/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,177 @@ TEST_F(TestArray, Attrs) {
ASSERT_EQ(values_.size(), array_->length());
}

template <template <typename> class LEFT_ARRAY_TYPE, typename LEFT_DATA_TYPE,
template <typename> class RIGHT_ARRAY_TYPE, typename RIGHT_DATA_TYPE,
std::size_t LENGTH = 10>
class OperatorTest {
public:
OperatorTest()
: left_buffer_(std::make_shared<Buffer>(
reinterpret_cast<const std::uint8_t*>(Initialize(left_data_)),
LENGTH * sizeof(LEFT_C_TYPE))),
right_buffer_(std::make_shared<Buffer>(
reinterpret_cast<const std::uint8_t*>(Initialize(right_data_)),
LENGTH * sizeof(RIGHT_C_TYPE))),
left_array_(LENGTH, left_buffer_),
right_array_(LENGTH, right_buffer_) {}

template <typename OPERATOR>
void TestOperator(OPERATOR& operation) {
auto result = operation(left_array_, right_array_);
for (auto ii = 0; ii < left_array_.length(); ++ii) {
ASSERT_EQ(result.data()[ii], operation(left_data_[ii], right_data_[ii]));
}
}

template <typename OPERATOR, typename INPLACE_OPERATOR>
void TestInplaceOperator(OPERATOR& operation, INPLACE_OPERATOR& inplace_operation) {
auto result = operation(left_array_, right_array_);
for (auto ii = 0; ii < left_array_.length(); ++ii) {
ASSERT_EQ(result.data()[ii], operation(left_data_[ii], right_data_[ii]));
}
inplace_operation(left_array_, right_array_);
for (auto ii = 0; ii < left_array_.length(); ++ii) {
ASSERT_EQ(left_array_.data()[ii], operation(left_data_[ii], right_data_[ii]));
}
for (auto ii = 0; ii < left_array_.length(); ++ii) {
ASSERT_EQ(left_array_.data()[ii], result.data()[ii]);
}
}

private:
template <typename C_TYPE>
static C_TYPE* Initialize(C_TYPE (&value)[LENGTH]) {
for (auto ii = 0; ii < LENGTH; ++ii) {
// Start at 1 so that we don't get FPE with operator/
value[ii] = static_cast<C_TYPE>(ii + 1);
}
return value;
}

using LEFT_C_TYPE = typename LEFT_DATA_TYPE::c_type;

using RIGHT_C_TYPE = typename RIGHT_DATA_TYPE::c_type;

LEFT_C_TYPE left_data_[LENGTH];

RIGHT_C_TYPE right_data_[LENGTH];

std::shared_ptr<Buffer> left_buffer_;

std::shared_ptr<Buffer> right_buffer_;

LEFT_ARRAY_TYPE<LEFT_DATA_TYPE> left_array_;

RIGHT_ARRAY_TYPE<RIGHT_DATA_TYPE> right_array_;
};

TEST(TestArrayOperators, Addition) {
auto plus = [](auto const& left, auto const& right) { return left + right; };
auto plus_inplace = [](auto& left, auto const& right) { left += right; };

OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt8Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int8Type, IntegerArray, Int16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int8Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt32Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int64Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int32Type>().TestInplaceOperator(
plus, plus_inplace);

OperatorTest<IntegerArray, Int8Type, IntegerArray, Int8Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int32Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int64Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt8Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt32Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt64Type>().TestInplaceOperator(
plus, plus_inplace);

OperatorTest<FloatingArray, FloatType, IntegerArray, UInt8Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<FloatingArray, FloatType, IntegerArray, UInt64Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<FloatingArray, FloatType, IntegerArray, Int8Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<FloatingArray, FloatType, IntegerArray, Int64Type>().TestInplaceOperator(
plus, plus_inplace);

OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt64Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int16Type>().TestInplaceOperator(
plus, plus_inplace);
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int64Type>().TestInplaceOperator(
plus, plus_inplace);
}

TEST(TestArrayOperators, Division) {
auto divide = [](auto const& left, auto const& right) { return left / right; };
auto divide_inplace = [](auto& left, auto const& right) { left /= right; };

OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt16Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt8Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int8Type, IntegerArray, Int16Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int8Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt16Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt32Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int64Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int32Type>().TestOperator(divide);

OperatorTest<IntegerArray, Int8Type, IntegerArray, Int8Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int16Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int32Type>().TestOperator(divide);
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int64Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt8Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt16Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt32Type>().TestOperator(divide);
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt64Type>().TestOperator(divide);

OperatorTest<FloatingArray, FloatType, IntegerArray, UInt8Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, FloatType, IntegerArray, UInt64Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, FloatType, IntegerArray, Int8Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, FloatType, IntegerArray, Int64Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, FloatType, FloatingArray, FloatType>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, FloatType, FloatingArray, DoubleType>().TestInplaceOperator(
divide, divide_inplace);

OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt16Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt64Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int16Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int64Type>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, DoubleType, FloatingArray, FloatType>().TestInplaceOperator(
divide, divide_inplace);
OperatorTest<FloatingArray, DoubleType, FloatingArray, DoubleType>()
.TestInplaceOperator(divide, divide_inplace);
}

// ----------------------------------------------------------------------
// Array view object

Expand Down
5 changes: 1 addition & 4 deletions src/pandas/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,8 @@ namespace pandas {
// ----------------------------------------------------------------------
// Array

Array::Array(const std::shared_ptr<DataType>& type, int64_t length)
: type_(type), length_(length) {}

Status Array::Copy(std::shared_ptr<Array>* out) const {
return Copy(0, length_, out);
return Copy(0, length(), out);
}

// ----------------------------------------------------------------------
Expand Down
25 changes: 16 additions & 9 deletions src/pandas/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,19 @@ class Array {
public:
virtual ~Array() {}

int64_t length() const { return length_; }
std::shared_ptr<DataType> type() const { return type_; }
DataType::TypeId type_id() const { return type_->type(); }
virtual int64_t length() const = 0;
// There are two methods to obtain the data type.
// The signature without a shared_ptr allows sub-classes
// to have a covariant return type, which eliminates the
// need/danger of doing a static_cast when dealing with
// a concrete sub-class. Ideally, the shared_ptr signature
// would suffice, but the compiler cannot treat a shared_ptr
// to a base class and a shared_ptr to a subclass as a
// covariant return type.
virtual TypePtr type() const = 0;
virtual const DataType& type_reference() const = 0;

DataType::TypeId type_id() const { return type()->type(); }

// Copy a section of the array into a new output array
virtual Status Copy(
Expand All @@ -42,13 +52,10 @@ class Array {
virtual bool owns_data() const = 0;

protected:
std::shared_ptr<DataType> type_;
int64_t length_;
Array() {}

Array(const std::shared_ptr<DataType>& type, int64_t length);

private:
DISALLOW_COPY_AND_ASSIGN(Array);
Array(const Array& other) = default;
Array(Array&& other) = default;
};

// An object that is a view on a section of another array (possibly the whole
Expand Down
4 changes: 2 additions & 2 deletions src/pandas/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ class DataType {

virtual std::string ToString() const = 0;

virtual bool Equals(const DataType& other) { return type_ == other.type_; }
virtual bool Equals(const DataType& other) const { return type_ == other.type_; }

TypeId type() const { return type_; }

private:
TypeId type_;
};

typedef std::shared_ptr<DataType> TypePtr;
using TypePtr = std::shared_ptr<const DataType>;

class PANDAS_EXPORT TimestampType : public DataType {
public:
Expand Down
3 changes: 3 additions & 0 deletions src/pandas/types/category.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

namespace pandas {

CategoryArray::CategoryArray(ArrayView codes, const std::shared_ptr<CategoryType>& type)
: codes_(codes), type_(type) {}

std::string CategoryType::ToString() const {
std::stringstream s;
s << "category<" << category_type()->ToString() << ">";
Expand Down
11 changes: 7 additions & 4 deletions src/pandas/types/category.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ struct CategoryType : public DataType {

std::string ToString() const override;

std::shared_ptr<DataType> category_type() const { return categories_.data()->type(); }
std::shared_ptr<const DataType> category_type() const {
return categories_.data()->type();
}

const ArrayView& categories() const { return categories_; }

Expand All @@ -30,14 +32,15 @@ struct CategoryType : public DataType {

class CategoryArray : public Array {
public:
CategoryArray(ArrayView codes, const std::shared_ptr<CategoryType>& type);

const ArrayView& codes() const { return codes_; }

const ArrayView& categories() const {
return static_cast<CategoryType*>(type_.get())->categories();
}
const ArrayView& categories() const { return type_->categories(); }

private:
ArrayView codes_;
std::shared_ptr<CategoryType> type_;
};

} // namespace pandas
Loading

0 comments on commit bad99a1

Please sign in to comment.