Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

Commit

Permalink
[pandas 2.0] Re-factor of the DataType classes
Browse files Browse the repository at this point in the history
* Changing PrimitiveType to NumericType  * Removing the macro for
declaring sub-types of NumericType and    instead using template
arguments  * Addding a static SINGLETON member to the NumericType base
class    instead of the macros for methods for creating singletons for
each numeric type  * Removing NullType's inheritance of PrimitiveType
* Removing intermediate base class between {Integer,Floating}ArrayImpl
and PrimitiveType and just making a concrete template based    class
{Integer,Floating}Array  * Changing FLOAT and DOUBLE to FLOAT32 and
FLOAT64

Author: Joshua Storck <[email protected]>

Closes #55 from joshuastorck/pandas-2.0 and squashes the following commits:

1614d44 [Joshua Storck] * Fixing native.p{xd,yx} so that it matches the latest C++ code * Changing PrimitiveType to NumericType * Removing the macro for declaring sub-types of NumericType and   instead using template arguments * Addding a static SINGLETON member to the NumericType base class   instead of the macros for methods for creating singletons for   each numeric type * Removing NullType's inheritance of PrimitiveType * Removing intermediate base class between {Integer,Floating}ArrayImpl   and PrimitiveType and just making a concrete template based   class {Integer,Floating}Array * Changing FLOAT and DOUBLE to FLOAT32 and FLOAT64
  • Loading branch information
Joshua Storck authored and wesm committed Oct 20, 2016
1 parent 2d4c8f9 commit 29df124
Show file tree
Hide file tree
Showing 10 changed files with 209 additions and 203 deletions.
44 changes: 23 additions & 21 deletions pandas/native.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cdef extern from "<iostream>":
pass


cdef extern from "pandas/status.h" namespace "pandas" nogil:
cdef extern from "pandas/common.h" namespace "pandas" nogil:

# We can later add more of the common status factory methods as needed
cdef Status Status_OK "Status::OK"()
Expand All @@ -44,28 +44,30 @@ cdef extern from "pandas/status.h" namespace "pandas" nogil:
c_bool IsUnknownError()
c_bool IsNotImplemented()

cdef extern from "pandas/api.h" namespace "pandas":
cdef extern from "pandas/api.h" namespace "pandas::DataType":

enum TypeId:
TypeId_NA " pandas::DataType::NA"
TypeId_UINT8 " pandas::DataType::UINT8"
TypeId_UINT16 " pandas::DataType::UINT16"
TypeId_UINT32 " pandas::DataType::UINT32"
TypeId_UINT64 " pandas::DataType::UINT64"
TypeId_INT8 " pandas::DataType::INT8"
TypeId_INT16 " pandas::DataType::INT16"
TypeId_INT32 " pandas::DataType::INT32"
TypeId_INT64 " pandas::DataType::INT64"
TypeId_BOOL " pandas::DataType::BOOL"
TypeId_FLOAT " pandas::DataType::FLOAT"
TypeId_DOUBLE " pandas::DataType::DOUBLE"
TypeId_PYOBJECT " pandas::DataType::PYOBJECT"
TypeId_CATEGORY " pandas::DataType::CATEGORY"
TypeId_TIMESTAMP " pandas::DataType::TIMESTAMP"
TypeId_TIMESTAMP_TZ " pandas::DataType::TIMESTAMP_TZ"
TypeId_NA " pandas::DataType::TypeId::NA"
TypeId_UINT8 " pandas::DataType::TypeId::UINT8"
TypeId_UINT16 " pandas::DataType::TypeId::UINT16"
TypeId_UINT32 " pandas::DataType::TypeId::UINT32"
TypeId_UINT64 " pandas::DataType::TypeId::UINT64"
TypeId_INT8 " pandas::DataType::TypeId::INT8"
TypeId_INT16 " pandas::DataType::TypeId::INT16"
TypeId_INT32 " pandas::DataType::TypeId::INT32"
TypeId_INT64 " pandas::DataType::TypeId::INT64"
TypeId_BOOL " pandas::DataType::TypeId::BOOL"
TypeId_FLOAT32 " pandas::DataType::TypeId::FLOAT32"
TypeId_FLOAT64 " pandas::DataType::TypeId::FLOAT64"
TypeId_PYOBJECT " pandas::DataType::TypeId::PYOBJECT"
TypeId_CATEGORY " pandas::DataType::TypeId::CATEGORY"
TypeId_TIMESTAMP " pandas::DataType::TypeId::TIMESTAMP"
TypeId_TIMESTAMP_TZ " pandas::DataType::TypeId::TIMESTAMP_TZ"

cdef extern from "pandas/api.h" namespace "pandas":

cdef cppclass DataType:
TypeId type
TypeId type()

DataType()

Expand Down Expand Up @@ -116,8 +118,8 @@ cdef extern from "pandas/api.h" namespace "pandas":
TypeId type_id()
size_t length()

object GetValue(size_t i)
void SetValue(size_t i, object val)
object GetItem(size_t i)
void SetItem(size_t i, object val)

cdef cppclass CCategoryArray" pandas::CategoryArray"(CArray):
pass
Expand Down
10 changes: 5 additions & 5 deletions pandas/native.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ cdef class Array:
cdef inline _getitem(self, size_t i):
if i >= self.ap.length():
raise IndexError('Out of bounds: %d' % i)
return self.ap.GetValue(i)
return self.ap.GetItem(i)

def __setitem__(self, i, val):
cdef:
Expand All @@ -226,7 +226,7 @@ cdef class Array:
cdef inline _setitem(self, size_t i, object val):
if i >= self.ap.length():
raise IndexError('Out of bounds: %d' % i)
self.ap.SetValue(i, val)
self.ap.SetItem(i, val)

def slice(self, start, end):
pass
Expand All @@ -251,7 +251,7 @@ cdef class Float32Array(FloatingArray):

cdef class BooleanArray(Array):
cdef:
lp.cBooleanArray* inst
lp.CBooleanArray* inst

cdef init(self, const ArrayPtr& arr):
Array.init(self, arr)
Expand All @@ -265,7 +265,7 @@ cdef Array wrap_array(const lp.ArrayPtr& arr):
cdef:
Array result

if arr.get().type_enum() == lp.TypeId_CATEGORY:
if arr.get().type_id() == lp.TypeId_CATEGORY:
result = CategoryArray()
else:
result = Array()
Expand All @@ -280,7 +280,7 @@ cdef PandasType wrap_type(const lp.TypePtr& sp_type):
lp.DataType* type = sp_type.get()
PandasType result

if type.type == lp.TypeId_CATEGORY:
if type.type() == lp.TypeId_CATEGORY:
result = Category()
else:
result = PandasType()
Expand Down
2 changes: 1 addition & 1 deletion src/pandas/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class TestArray : public ::testing::Test {
TEST_F(TestArray, Attrs) {
DoubleType ex_type;
ASSERT_TRUE(array_->type()->Equals(ex_type));
ASSERT_EQ(DataType::DOUBLE, array_->type_id());
ASSERT_EQ(DataType::FLOAT64, array_->type_id());

ASSERT_EQ(values_.size(), array_->length());
}
Expand Down
2 changes: 2 additions & 0 deletions src/pandas/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,6 @@ class ArrayView {
int64_t length_;
};

using ArrayPtr = std::shared_ptr<Array>;

} // namespace pandas
4 changes: 2 additions & 2 deletions src/pandas/dispatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ Status primitive_type_from_enum(DataType::TypeId tp_enum, DataType** out) {
MAKE_TYPE_CASE(DataType::UINT16, UInt16);
MAKE_TYPE_CASE(DataType::UINT32, UInt32);
MAKE_TYPE_CASE(DataType::UINT64, UInt64);
MAKE_TYPE_CASE(DataType::FLOAT, Float);
MAKE_TYPE_CASE(DataType::DOUBLE, Double);
MAKE_TYPE_CASE(DataType::FLOAT32, Float);
MAKE_TYPE_CASE(DataType::FLOAT64, Double);
MAKE_TYPE_CASE(DataType::BOOL, Boolean);
MAKE_TYPE_CASE(DataType::PYOBJECT, PyObject);
default:
Expand Down
4 changes: 2 additions & 2 deletions src/pandas/numpy_interop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ Status numpy_type_num_to_pandas(int type_num, DataType::TypeId* pandas_type) {
TYPE_MAP_CASE(UINT16, UINT16);
TYPE_MAP_CASE(UINT32, UINT32);
TYPE_MAP_CASE(UINT64, UINT64);
TYPE_MAP_CASE(FLOAT32, FLOAT);
TYPE_MAP_CASE(FLOAT64, DOUBLE);
TYPE_MAP_CASE(FLOAT32, FLOAT32);
TYPE_MAP_CASE(FLOAT64, FLOAT64);
TYPE_MAP_CASE(BOOL, BOOL);
TYPE_MAP_CASE(OBJECT, PYOBJECT);
default:
Expand Down
13 changes: 13 additions & 0 deletions src/pandas/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,17 @@ std::string TimestampType::ToString() const {
return ss.str();
}

// Constexpr numeric type names
constexpr const char* UInt8Type::NAME;
constexpr const char* Int8Type::NAME;
constexpr const char* UInt16Type::NAME;
constexpr const char* Int16Type::NAME;
constexpr const char* UInt32Type::NAME;
constexpr const char* Int32Type::NAME;
constexpr const char* UInt64Type::NAME;
constexpr const char* Int64Type::NAME;
constexpr const char* FloatType::NAME;
constexpr const char* DoubleType::NAME;
constexpr const char* BooleanType::NAME;

} // namespace pandas
106 changes: 65 additions & 41 deletions src/pandas/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ class DataType {
BOOL = 9,

// 4-byte floating point value
FLOAT = 10,
FLOAT32 = 10,

// 8-byte floating point value
DOUBLE = 11,
FLOAT64 = 11,

// PyObject*
PYOBJECT = 12,
Expand Down Expand Up @@ -94,72 +94,96 @@ class PANDAS_EXPORT PyObjectType : public DataType {
std::string ToString() const override;
};

template <typename Derived>
class PANDAS_EXPORT PrimitiveType : public DataType {
template <typename DERIVED, typename C_TYPE, DataType::TypeId TYPE_ID,
std::size_t SIZE = sizeof(C_TYPE)>
class PANDAS_EXPORT NumericType : public DataType {
public:
PrimitiveType() : DataType(Derived::type_enum) {}
using c_type = C_TYPE;
static constexpr DataType::TypeId type_id = TYPE_ID;
static constexpr size_t size = SIZE;

std::string ToString() const override {
return std::string(static_cast<const Derived*>(this)->name());
}
NumericType() : DataType(type_id) {}

std::string ToString() const override { return std::string(DERIVED::NAME); }

static std::shared_ptr<DERIVED> SINGLETON;
};

#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
public: \
typedef C_TYPE c_type; \
static constexpr DataType::TypeId type_enum = DataType::ENUM; \
static constexpr size_t size = SIZE; \
\
explicit TYPENAME() : PrimitiveType<TYPENAME>() {} \
\
static const char* name() { return NAME; }
template <typename DERIVED, typename C_TYPE, DataType::TypeId TYPE_ID, std::size_t SIZE>
std::shared_ptr<DERIVED> NumericType<DERIVED, C_TYPE, TYPE_ID, SIZE>::SINGLETON(
std::move(std::make_shared<DERIVED>()));

class PANDAS_EXPORT NullType : public DataType {
public:
NullType() : DataType(DataType::TypeId::NA) {}

class PANDAS_EXPORT NullType : public PrimitiveType<NullType> {
PRIMITIVE_DECL(NullType, void, NA, 0, "null");
std::string ToString() const override { return std::string("null"); }
};

class PANDAS_EXPORT UInt8Type : public PrimitiveType<UInt8Type> {
PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8");
class PANDAS_EXPORT UInt8Type
: public NumericType<UInt8Type, std::uint8_t, DataType::TypeId::UINT8> {
public:
constexpr static const char* NAME = "uint8";
};

class PANDAS_EXPORT Int8Type : public PrimitiveType<Int8Type> {
PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8");
class PANDAS_EXPORT Int8Type
: public NumericType<Int8Type, std::int8_t, DataType::TypeId::INT8> {
public:
constexpr static const char* NAME = "int8";
};

class PANDAS_EXPORT UInt16Type : public PrimitiveType<UInt16Type> {
PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16");
class PANDAS_EXPORT UInt16Type
: public NumericType<UInt16Type, std::uint16_t, DataType::TypeId::UINT16> {
public:
constexpr static const char* NAME = "uint16";
};

class PANDAS_EXPORT Int16Type : public PrimitiveType<Int16Type> {
PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16");
class PANDAS_EXPORT Int16Type
: public NumericType<Int16Type, std::int16_t, DataType::TypeId::INT16> {
public:
constexpr static const char* NAME = "int16";
};

class PANDAS_EXPORT UInt32Type : public PrimitiveType<UInt32Type> {
PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32");
class PANDAS_EXPORT UInt32Type
: public NumericType<UInt32Type, std::uint32_t, DataType::TypeId::UINT32> {
public:
constexpr static const char* NAME = "uint32";
};

class PANDAS_EXPORT Int32Type : public PrimitiveType<Int32Type> {
PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32");
class PANDAS_EXPORT Int32Type
: public NumericType<Int32Type, std::int32_t, DataType::TypeId::INT32> {
public:
constexpr static const char* NAME = "int32";
};

class PANDAS_EXPORT UInt64Type : public PrimitiveType<UInt64Type> {
PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64");
class PANDAS_EXPORT UInt64Type
: public NumericType<UInt64Type, std::uint64_t, DataType::TypeId::UINT64> {
public:
constexpr static const char* NAME = "uint64";
};

class PANDAS_EXPORT Int64Type : public PrimitiveType<Int64Type> {
PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64");
class PANDAS_EXPORT Int64Type
: public NumericType<Int64Type, std::int64_t, DataType::TypeId::INT64> {
public:
constexpr static const char* NAME = "int64";
};

class PANDAS_EXPORT FloatType : public PrimitiveType<FloatType> {
PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float");
class PANDAS_EXPORT FloatType
: public NumericType<FloatType, float, DataType::TypeId::FLOAT32> {
public:
constexpr static const char* NAME = "float32";
};

class PANDAS_EXPORT DoubleType : public PrimitiveType<DoubleType> {
PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
class PANDAS_EXPORT DoubleType
: public NumericType<DoubleType, double, DataType::TypeId::FLOAT64> {
public:
constexpr static const char* NAME = "float64";
};

class PANDAS_EXPORT BooleanType : public PrimitiveType<BooleanType> {
PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
class PANDAS_EXPORT BooleanType
: public NumericType<BooleanType, std::uint8_t, DataType::TypeId::BOOL> {
public:
constexpr static const char* NAME = "bool";
};

} // namespace pandas
Loading

0 comments on commit 29df124

Please sign in to comment.