Skip to content

Commit

Permalink
ARROW-16754: [Java] StructVector's child vectors get unexpectedly reo…
Browse files Browse the repository at this point in the history
…rdered after adding duplicated fields (apache#13321)

Authored-by: Hongze Zhang <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
zhztheplayer authored and zagto committed Oct 7, 2022
1 parent eaca726 commit b7da7b2
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -54,7 +54,7 @@
public class MapWithOrdinalImpl<K, V> implements MapWithOrdinal<K, V> {
private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class);

private final Map<K, Map.Entry<Integer, V>> primary = new HashMap<>();
private final Map<K, Map.Entry<Integer, V>> primary = new LinkedHashMap<>();
private final IntObjectHashMap<V> secondary = new IntObjectHashMap<>();

private final Map<K, V> delegate = new Map<K, V>() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,20 @@

import static org.junit.Assert.*;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.complex.AbstractStructVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.holders.ComplexHolder;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.junit.After;
import org.junit.Assert;
Expand Down Expand Up @@ -171,13 +174,112 @@ public void testAddOrGetComplexChildVectors() {
vector.addOrGetStruct("struct");
vector.addOrGetMap("map", true);

List<FieldVector> childrens = vector.getChildrenFromFields();
assertEquals(5, childrens.size());
assertEquals(MinorType.LIST, childrens.get(0).getMinorType());
assertEquals(MinorType.FIXED_SIZE_LIST, childrens.get(1).getMinorType());
assertEquals(MinorType.UNION, childrens.get(2).getMinorType());
assertEquals(MinorType.STRUCT, childrens.get(3).getMinorType());
assertEquals(MinorType.MAP, childrens.get(4).getMinorType());
List<FieldVector> children = vector.getChildrenFromFields();
assertEquals(5, children.size());
assertEquals(MinorType.LIST, children.get(0).getMinorType());
assertEquals(MinorType.FIXED_SIZE_LIST, children.get(1).getMinorType());
assertEquals(MinorType.UNION, children.get(2).getMinorType());
assertEquals(MinorType.STRUCT, children.get(3).getMinorType());
assertEquals(MinorType.MAP, children.get(4).getMinorType());
}
}

@Test
public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyAppend() {
final FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
try (StructVector vector = new StructVector("struct", allocator, type, null,
AbstractStructVector.ConflictPolicy.CONFLICT_APPEND, true)) {
final List<Field> initFields = new ArrayList<>();

// Add a bit more fields to test against stability of the internal field
// ordering mechanism of StructVector
initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("int1", MinorType.INT.getType()));
initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("int2", MinorType.INT.getType()));
initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("int3", MinorType.INT.getType()));
initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType()));

// To ensure duplicated field names don't mess up the original field order
// in the struct vector
initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType()));

vector.initializeChildrenFromFields(initFields);

List<FieldVector> children = vector.getChildrenFromFields();
assertEquals(11, children.size());
assertEquals("varchar1", children.get(0).getName());
assertEquals("int1", children.get(1).getName());
assertEquals("varchar2", children.get(2).getName());
assertEquals("int2", children.get(3).getName());
assertEquals("varchar3", children.get(4).getName());
assertEquals("int3", children.get(5).getName());
assertEquals("uncertain-type", children.get(6).getName());
assertEquals("varchar1", children.get(7).getName());
assertEquals("varchar2", children.get(8).getName());
assertEquals("varchar3", children.get(9).getName());
assertEquals("uncertain-type", children.get(10).getName());
assertEquals(MinorType.VARCHAR, children.get(0).getMinorType());
assertEquals(MinorType.INT, children.get(1).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(2).getMinorType());
assertEquals(MinorType.INT, children.get(3).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(4).getMinorType());
assertEquals(MinorType.INT, children.get(5).getMinorType());
assertEquals(MinorType.INT, children.get(6).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(7).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(8).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(9).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(10).getMinorType());
}
}

@Test
public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyReplace() {
final FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
try (StructVector vector = new StructVector("struct", allocator, type, null,
AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, true)) {
final List<Field> initFields = new ArrayList<>();

// Add a bit more fields to test against stability of the internal field
// ordering mechanism of StructVector
initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("int1", MinorType.INT.getType()));
initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("int2", MinorType.INT.getType()));
initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("int3", MinorType.INT.getType()));
initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType()));

// To ensure duplicated field names don't mess up the original field order
// in the struct vector
initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType()));

vector.initializeChildrenFromFields(initFields);

List<FieldVector> children = vector.getChildrenFromFields();
assertEquals(7, children.size());
assertEquals("varchar1", children.get(0).getName());
assertEquals("int1", children.get(1).getName());
assertEquals("varchar2", children.get(2).getName());
assertEquals("int2", children.get(3).getName());
assertEquals("varchar3", children.get(4).getName());
assertEquals("int3", children.get(5).getName());
assertEquals("uncertain-type", children.get(6).getName());
assertEquals(MinorType.VARCHAR, children.get(0).getMinorType());
assertEquals(MinorType.INT, children.get(1).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(2).getMinorType());
assertEquals(MinorType.INT, children.get(3).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(4).getMinorType());
assertEquals(MinorType.INT, children.get(5).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(6).getMinorType());
}
}

}

0 comments on commit b7da7b2

Please sign in to comment.