Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-3144: [C++/Python] Move "dictionary" member from DictionaryType to ArrayData to allow for variable dictionaries #4316

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ before_install:


matrix:
allow_failures:
- language: r
fast_finish: true
include:
- name: "Lint C++, Python, R, Docker"
Expand Down
30 changes: 21 additions & 9 deletions c_glib/arrow-glib/composite-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,25 +534,37 @@ garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass)

/**
* garrow_dictionary_array_new:
* @data_type: The data type of dictionary.
* @data_type: The data type of the dictionary array.
* @indices: The indices of values in dictionary.
* @dictionary: The dictionary of the dictionary array.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: A newly created #GArrowDictionaryArray.
* Returns: (nullable): A newly created #GArrowDictionaryArray
* or %NULL on error.
*
* Since: 0.8.0
*/
GArrowDictionaryArray *
garrow_dictionary_array_new(GArrowDataType *data_type,
GArrowArray *indices)
GArrowArray *indices,
GArrowArray *dictionary,
GError **error)
{
const auto arrow_data_type = garrow_data_type_get_raw(data_type);
const auto arrow_indices = garrow_array_get_raw(indices);
auto arrow_dictionary_array =
std::make_shared<arrow::DictionaryArray>(arrow_data_type,
arrow_indices);
auto arrow_array =
std::static_pointer_cast<arrow::Array>(arrow_dictionary_array);
return GARROW_DICTIONARY_ARRAY(garrow_array_new_raw(&arrow_array));
const auto arrow_dictionary = garrow_array_get_raw(dictionary);
std::shared_ptr<arrow::Array> arrow_dictionary_array;
auto status = arrow::DictionaryArray::FromArrays(arrow_data_type,
arrow_indices,
arrow_dictionary,
&arrow_dictionary_array);
if (garrow_error_check(error, status, "[dictionary-array][new]")) {
auto arrow_array =
std::static_pointer_cast<arrow::Array>(arrow_dictionary_array);
return GARROW_DICTIONARY_ARRAY(garrow_array_new_raw(&arrow_array));
} else {
return NULL;
}
}

/**
Expand Down
5 changes: 4 additions & 1 deletion c_glib/arrow-glib/composite-array.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ struct _GArrowDictionaryArrayClass
};

GArrowDictionaryArray *
garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices);
garrow_dictionary_array_new(GArrowDataType *data_type,
GArrowArray *indices,
GArrowArray *dictionary,
GError **error);
GArrowArray *
garrow_dictionary_array_get_indices(GArrowDictionaryArray *array);
GArrowArray *
Expand Down
22 changes: 11 additions & 11 deletions c_glib/arrow-glib/composite-data-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass)
/**
* garrow_dictionary_data_type_new:
* @index_data_type: The data type of index.
* @dictionary: The dictionary.
* @value_data_type: The data type of dictionary values.
* @ordered: Whether dictionary contents are ordered or not.
*
* Returns: The newly created dictionary data type.
Expand All @@ -522,13 +522,13 @@ garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass)
*/
GArrowDictionaryDataType *
garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
GArrowArray *dictionary,
GArrowDataType *value_data_type,
gboolean ordered)
{
auto arrow_index_data_type = garrow_data_type_get_raw(index_data_type);
auto arrow_dictionary = garrow_array_get_raw(dictionary);
auto arrow_value_data_type = garrow_data_type_get_raw(value_data_type);
auto arrow_data_type = arrow::dictionary(arrow_index_data_type,
arrow_dictionary,
arrow_value_data_type,
ordered);
return GARROW_DICTIONARY_DATA_TYPE(garrow_data_type_new_raw(&arrow_data_type));
}
Expand All @@ -552,21 +552,21 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictio
}

/**
* garrow_dictionary_data_type_get_dictionary:
* garrow_dictionary_data_type_get_value_data_type:
* @dictionary_data_type: The #GArrowDictionaryDataType.
*
* Returns: (transfer full): The dictionary as #GArrowArray.
* Returns: (transfer full): The #GArrowDataType of dictionary values.
*
* Since: 0.8.0
* Since: 0.14.0
*/
GArrowArray *
garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *dictionary_data_type)
GArrowDataType *
garrow_dictionary_data_type_get_value_data_type(GArrowDictionaryDataType *dictionary_data_type)
{
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type));
auto arrow_dictionary_data_type =
std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type);
auto arrow_dictionary = arrow_dictionary_data_type->dictionary();
return garrow_array_new_raw(&arrow_dictionary);
auto arrow_value_data_type = arrow_dictionary_data_type->value_type();
return garrow_data_type_new_raw(&arrow_value_data_type);
}

/**
Expand Down
7 changes: 4 additions & 3 deletions c_glib/arrow-glib/composite-data-type.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,13 @@ struct _GArrowDictionaryDataTypeClass

GArrowDictionaryDataType *
garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
GArrowArray *dictionary,
GArrowDataType *value_data_type,
gboolean ordered);
GArrowDataType *
garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type);
GArrowArray *
garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *dictionary_data_type);
GARROW_AVAILABLE_IN_0_14
GArrowDataType *
garrow_dictionary_data_type_get_value_data_type(GArrowDictionaryDataType *dictionary_data_type);
gboolean
garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type);

Expand Down
10 changes: 7 additions & 3 deletions c_glib/test/test-dictionary-array.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ def setup
@dictionary = build_string_array(["C", "C++", "Ruby"])
@ordered = false
@data_type = Arrow::DictionaryDataType.new(@index_data_type,
@dictionary,
@dictionary.value_data_type,
@ordered)
end

sub_test_case(".new") do
def test_new
indices = build_int32_array([0, 2, 2, 1, 0])
dictionary_array = Arrow::DictionaryArray.new(@data_type, indices)
dictionary_array = Arrow::DictionaryArray.new(@data_type,
indices,
@dictionary)
assert_equal(<<-STRING.chomp, dictionary_array.to_s)

-- dictionary:
Expand All @@ -55,7 +57,9 @@ def test_new
def setup
super
@indices = build_int32_array([0, 2, 2, 1, 0])
@dictionary_array = Arrow::DictionaryArray.new(@data_type, @indices)
@dictionary_array = Arrow::DictionaryArray.new(@data_type,
@indices,
@dictionary)
end

def test_indices
Expand Down
8 changes: 4 additions & 4 deletions c_glib/test/test-dictionary-data-type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ class TestDictionaryDataType < Test::Unit::TestCase

def setup
@index_data_type = Arrow::Int32DataType.new
@dictionary = build_string_array(["C", "C++", "Ruby"])
@value_data_type = Arrow::StringDataType.new
@ordered = true
@data_type = Arrow::DictionaryDataType.new(@index_data_type,
@dictionary,
@value_data_type,
@ordered)
end

Expand All @@ -44,8 +44,8 @@ def test_index_data_type
assert_equal(@index_data_type, @data_type.index_data_type)
end

def test_dictionary
assert_equal(@dictionary, @data_type.dictionary)
def test_value_data_type
assert_equal(@value_data_type, @data_type.value_data_type)
end

def test_ordered?
Expand Down
Loading