forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
apacheGH-43454: [C++][Python] Add Opaque canonical extension type (ap…
…ache#43458) ### Rationale for this change Add the newly ratified extension type. ### What changes are included in this PR? The C++/Python implementation only. ### Are these changes tested? Yes ### Are there any user-facing changes? No. * GitHub Issue: apache#43454 Lead-authored-by: David Li <[email protected]> Co-authored-by: Weston Pace <[email protected]> Signed-off-by: David Li <[email protected]>
- Loading branch information
1 parent
4d200dc
commit 6e7125b
Showing
17 changed files
with
627 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "arrow/extension/opaque.h" | ||
|
||
#include <sstream> | ||
|
||
#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep | ||
#include "arrow/util/logging.h" | ||
|
||
#include <rapidjson/document.h> | ||
#include <rapidjson/error/en.h> | ||
#include <rapidjson/writer.h> | ||
|
||
namespace arrow::extension { | ||
|
||
std::string OpaqueType::ToString(bool show_metadata) const { | ||
std::stringstream ss; | ||
ss << "extension<" << this->extension_name() | ||
<< "[storage_type=" << storage_type_->ToString(show_metadata) | ||
<< ", type_name=" << type_name_ << ", vendor_name=" << vendor_name_ << "]>"; | ||
return ss.str(); | ||
} | ||
|
||
bool OpaqueType::ExtensionEquals(const ExtensionType& other) const { | ||
if (extension_name() != other.extension_name()) { | ||
return false; | ||
} | ||
const auto& opaque = internal::checked_cast<const OpaqueType&>(other); | ||
return storage_type()->Equals(*opaque.storage_type()) && | ||
type_name() == opaque.type_name() && vendor_name() == opaque.vendor_name(); | ||
} | ||
|
||
std::string OpaqueType::Serialize() const { | ||
rapidjson::Document document; | ||
document.SetObject(); | ||
rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); | ||
|
||
rapidjson::Value type_name(rapidjson::StringRef(type_name_)); | ||
document.AddMember(rapidjson::Value("type_name", allocator), type_name, allocator); | ||
rapidjson::Value vendor_name(rapidjson::StringRef(vendor_name_)); | ||
document.AddMember(rapidjson::Value("vendor_name", allocator), vendor_name, allocator); | ||
|
||
rapidjson::StringBuffer buffer; | ||
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | ||
document.Accept(writer); | ||
return buffer.GetString(); | ||
} | ||
|
||
Result<std::shared_ptr<DataType>> OpaqueType::Deserialize( | ||
std::shared_ptr<DataType> storage_type, const std::string& serialized_data) const { | ||
rapidjson::Document document; | ||
const auto& parsed = document.Parse(serialized_data.data(), serialized_data.length()); | ||
if (parsed.HasParseError()) { | ||
return Status::Invalid("Invalid serialized JSON data for OpaqueType: ", | ||
rapidjson::GetParseError_En(parsed.GetParseError()), ": ", | ||
serialized_data); | ||
} else if (!document.IsObject()) { | ||
return Status::Invalid("Invalid serialized JSON data for OpaqueType: not an object"); | ||
} | ||
if (!document.HasMember("type_name")) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: missing type_name"); | ||
} else if (!document.HasMember("vendor_name")) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: missing vendor_name"); | ||
} | ||
|
||
const auto& type_name = document["type_name"]; | ||
const auto& vendor_name = document["vendor_name"]; | ||
if (!type_name.IsString()) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: type_name is not a string"); | ||
} else if (!vendor_name.IsString()) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: vendor_name is not a string"); | ||
} | ||
|
||
return opaque(std::move(storage_type), type_name.GetString(), vendor_name.GetString()); | ||
} | ||
|
||
std::shared_ptr<Array> OpaqueType::MakeArray(std::shared_ptr<ArrayData> data) const { | ||
DCHECK_EQ(data->type->id(), Type::EXTENSION); | ||
DCHECK_EQ("arrow.opaque", | ||
internal::checked_cast<const ExtensionType&>(*data->type).extension_name()); | ||
return std::make_shared<OpaqueArray>(data); | ||
} | ||
|
||
std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type, | ||
std::string type_name, std::string vendor_name) { | ||
return std::make_shared<OpaqueType>(std::move(storage_type), std::move(type_name), | ||
std::move(vendor_name)); | ||
} | ||
|
||
} // namespace arrow::extension |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "arrow/extension_type.h" | ||
#include "arrow/type.h" | ||
|
||
namespace arrow::extension { | ||
|
||
/// \brief Opaque is a placeholder for a type from an external (usually | ||
/// non-Arrow) system that could not be interpreted. | ||
class ARROW_EXPORT OpaqueType : public ExtensionType { | ||
public: | ||
/// \brief Construct an OpaqueType. | ||
/// | ||
/// \param[in] storage_type The underlying storage type. Should be | ||
/// arrow::null if there is no data. | ||
/// \param[in] type_name The name of the type in the external system. | ||
/// \param[in] vendor_name The name of the external system. | ||
explicit OpaqueType(std::shared_ptr<DataType> storage_type, std::string type_name, | ||
std::string vendor_name) | ||
: ExtensionType(std::move(storage_type)), | ||
type_name_(std::move(type_name)), | ||
vendor_name_(std::move(vendor_name)) {} | ||
|
||
std::string extension_name() const override { return "arrow.opaque"; } | ||
std::string ToString(bool show_metadata) const override; | ||
bool ExtensionEquals(const ExtensionType& other) const override; | ||
std::string Serialize() const override; | ||
Result<std::shared_ptr<DataType>> Deserialize( | ||
std::shared_ptr<DataType> storage_type, | ||
const std::string& serialized_data) const override; | ||
/// Create an OpaqueArray from ArrayData | ||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override; | ||
|
||
std::string_view type_name() const { return type_name_; } | ||
std::string_view vendor_name() const { return vendor_name_; } | ||
|
||
private: | ||
std::string type_name_; | ||
std::string vendor_name_; | ||
}; | ||
|
||
/// \brief Opaque is a wrapper for (usually binary) data from an external | ||
/// (often non-Arrow) system that could not be interpreted. | ||
class ARROW_EXPORT OpaqueArray : public ExtensionArray { | ||
public: | ||
using ExtensionArray::ExtensionArray; | ||
}; | ||
|
||
/// \brief Return an OpaqueType instance. | ||
ARROW_EXPORT std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type, | ||
std::string type_name, | ||
std::string vendor_name); | ||
|
||
} // namespace arrow::extension |
Oops, something went wrong.