Skip to content

Commit

Permalink
Exclude dict_id and dict_is_ordered from equality comparison of `…
Browse files Browse the repository at this point in the history
…Field` (#1647)

* Impl Eq for Field

* Impl Hash

* Impl PartialOrd for Field

* Impl Ord instead

* Add comment and test.
  • Loading branch information
viirya authored May 8, 2022
1 parent 6ad893c commit f74ad34
Showing 1 changed file with 85 additions and 1 deletion.
86 changes: 85 additions & 1 deletion arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.

use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::hash::{Hash, Hasher};

use serde_derive::{Deserialize, Serialize};
use serde_json::{json, Value};
Expand All @@ -27,7 +29,7 @@ use super::DataType;
/// Contains the meta-data for a single relative type.
///
/// The `Schema` object is an ordered collection of `Field` objects.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Field {
name: String,
data_type: DataType,
Expand All @@ -39,6 +41,47 @@ pub struct Field {
metadata: Option<BTreeMap<String, String>>,
}

// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
// into comparison. However, these properties are only used in IPC context
// for matching dictionary encoded data. They are not necessary to be same
// to consider schema equality. For example, in C++ `Field` implementation,
// it doesn't contain these dictionary properties too.
impl PartialEq for Field {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.data_type == other.data_type
&& self.nullable == other.nullable
&& self.metadata == other.metadata
}
}

impl Eq for Field {}

impl PartialOrd for Field {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for Field {
fn cmp(&self, other: &Self) -> Ordering {
self.name
.cmp(other.name())
.then(self.data_type.cmp(other.data_type()))
.then(self.nullable.cmp(&other.nullable))
.then(self.metadata.cmp(&other.metadata))
}
}

impl Hash for Field {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.data_type.hash(state);
self.nullable.hash(state);
self.metadata.hash(state);
}
}

impl Field {
/// Creates a new field
pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
Expand Down Expand Up @@ -623,6 +666,8 @@ impl std::fmt::Display for Field {
#[cfg(test)]
mod test {
use super::{DataType, Field};
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};

#[test]
fn test_fields_with_dict_id() {
Expand Down Expand Up @@ -676,4 +721,43 @@ mod test {
assert_eq!(dict2, *field);
}
}

fn get_field_hash(field: &Field) -> u64 {
let mut s = DefaultHasher::new();
field.hash(&mut s);
s.finish()
}

#[test]
fn test_field_comparison_case() {
// dictionary-encoding properties not used for field comparison
let dict1 = Field::new_dict(
"dict1",
DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
false,
10,
false,
);
let dict2 = Field::new_dict(
"dict1",
DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
false,
20,
false,
);

assert_eq!(dict1, dict2);
assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));

let dict1 = Field::new_dict(
"dict0",
DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
false,
10,
false,
);

assert_ne!(dict1, dict2);
assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
}
}

0 comments on commit f74ad34

Please sign in to comment.