Skip to content

Commit

Permalink
[#2266] fix(partition): enable preassign partition when creating rang…
Browse files Browse the repository at this point in the history
…e and list partitioning table (#3363)

### What changes were proposed in this pull request?

enable pre-assign partition when creating range and list partitioning
table

### Why are the changes needed?

Fix: #2266 

### Does this PR introduce _any_ user-facing change?

yes, enable pre-assign partition when creating range and list
partitioning table

### How was this patch tested?

tests modified

Co-authored-by: mchades <[email protected]>
  • Loading branch information
jerryshao and mchades authored May 13, 2024
1 parent 3621bec commit bb0a318
Show file tree
Hide file tree
Showing 11 changed files with 553 additions and 214 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@

package com.datastrato.gravitino.rel.expressions.transforms;

import static com.datastrato.gravitino.rel.partitions.Partitions.EMPTY_PARTITIONS;

import com.datastrato.gravitino.annotation.Evolving;
import com.datastrato.gravitino.rel.expressions.Expression;
import com.datastrato.gravitino.rel.expressions.NamedReference;
import com.datastrato.gravitino.rel.partitions.Partition;
import java.util.Objects;

/**
Expand All @@ -40,11 +43,12 @@ public interface Transform extends Expression {
Expression[] arguments();

/**
* @return The preassigned partitions in the partitioning. Currently, only ListTransform and
* RangeTransform need to deal with assignments
* @return The preassigned partitions in the partitioning. Currently, only {@link
* Transforms.ListTransform} and {@link Transforms.RangeTransform} need to deal with
* assignments
*/
default Expression[] assignments() {
return Expression.EMPTY_EXPRESSION;
default Partition[] assignments() {
return EMPTY_PARTITIONS;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import com.datastrato.gravitino.rel.expressions.NamedReference;
import com.datastrato.gravitino.rel.expressions.literals.Literal;
import com.datastrato.gravitino.rel.expressions.literals.Literals;
import com.datastrato.gravitino.rel.partitions.ListPartition;
import com.datastrato.gravitino.rel.partitions.RangePartition;
import com.google.common.collect.ObjectArrays;
import java.util.Arrays;
import java.util.Objects;
Expand Down Expand Up @@ -166,8 +168,20 @@ public static BucketTransform bucket(int numBuckets, String[]... fieldNames) {
* @return The created transform
*/
public static ListTransform list(String[]... fieldNames) {
return list(fieldNames, new ListPartition[0]);
}

/**
* Create a transform that includes multiple fields in a list with preassigned list partitions.
*
* @param fieldNames The field names to include in the list
* @param assignments The preassigned list partitions
* @return The created transform
*/
public static ListTransform list(String[][] fieldNames, ListPartition[] assignments) {
return new ListTransform(
Arrays.stream(fieldNames).map(NamedReference::field).toArray(NamedReference[]::new));
Arrays.stream(fieldNames).map(NamedReference::field).toArray(NamedReference[]::new),
assignments);
}

/**
Expand All @@ -177,7 +191,18 @@ public static ListTransform list(String[]... fieldNames) {
* @return The created transform
*/
public static RangeTransform range(String[] fieldName) {
return new RangeTransform(NamedReference.field(fieldName));
return range(fieldName, new RangePartition[0]);
}

/**
* Create a transform that returns the range of the input value with preassigned range partitions.
*
* @param fieldName The field name to transform
* @param assignments The preassigned range partitions
* @return The created transform
*/
public static RangeTransform range(String[] fieldName, RangePartition[] assignments) {
return new RangeTransform(NamedReference.field(fieldName), assignments);
}

/**
Expand Down Expand Up @@ -486,9 +511,16 @@ public int hashCode() {
public static final class ListTransform implements Transform {

private final NamedReference[] fields;
private final ListPartition[] assignments;

private ListTransform(NamedReference[] fields) {
this.fields = fields;
this.assignments = new ListPartition[0];
}

private ListTransform(NamedReference[] fields, ListPartition[] assignments) {
this.fields = fields;
this.assignments = assignments;
}

/** @return The field names to include in the list. */
Expand All @@ -510,9 +542,8 @@ public Expression[] arguments() {

/** @return The assignments to the transform. */
@Override
public Expression[] assignments() {
// todo: resolve this
return Transform.super.assignments();
public ListPartition[] assignments() {
return assignments;
}

@Override
Expand All @@ -537,9 +568,16 @@ public int hashCode() {
public static final class RangeTransform implements Transform {

private final NamedReference field;
private final RangePartition[] assignments;

private RangeTransform(NamedReference field) {
this.field = field;
this.assignments = new RangePartition[0];
}

private RangeTransform(NamedReference field, RangePartition[] assignments) {
this.field = field;
this.assignments = assignments;
}

/** @return The field name to transform. */
Expand All @@ -561,9 +599,8 @@ public Expression[] arguments() {

/** @return The assignments to the transform. */
@Override
public Expression[] assignments() {
// todo: resolve this
return Transform.super.assignments();
public RangePartition[] assignments() {
return assignments;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
/** The helper class for partition expressions. */
public class Partitions {

/** An empty array of partitions. */
public static Partition[] EMPTY_PARTITIONS = new Partition[0];

/**
* Creates a range partition.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import com.datastrato.gravitino.dto.rel.partitioning.DayPartitioningDTO;
import com.datastrato.gravitino.dto.rel.partitioning.IdentityPartitioningDTO;
import com.datastrato.gravitino.dto.rel.partitioning.Partitioning;
import com.datastrato.gravitino.dto.rel.partitioning.RangePartitioningDTO;
import com.datastrato.gravitino.dto.rel.partitions.RangePartitionDTO;
import com.datastrato.gravitino.dto.requests.CatalogCreateRequest;
import com.datastrato.gravitino.dto.requests.SchemaCreateRequest;
import com.datastrato.gravitino.dto.requests.SchemaUpdateRequest;
Expand Down Expand Up @@ -556,6 +558,60 @@ public void testCreatePartitionedTable() throws JsonProcessingException {
tableId, new Column[0], "comment", emptyMap, errorPartitioning));
Assertions.assertTrue(
ex3.getMessage().contains("\"columns\" field is required and cannot be empty"));

// Test partitioning with assignments
Partitioning[] partitioningWithAssignments = {
RangePartitioningDTO.of(
new String[] {columns[0].name()},
new RangePartitionDTO[] {
RangePartitionDTO.builder()
.withName("p1")
.withLower(
LiteralDTO.builder()
.withDataType(Types.IntegerType.get())
.withValue("1")
.build())
.withUpper(
LiteralDTO.builder()
.withDataType(Types.IntegerType.get())
.withValue("10")
.build())
.build()
})
};
expectedTable =
createMockTable(
"table1",
columns,
"comment",
Collections.emptyMap(),
partitioningWithAssignments,
DistributionDTO.NONE,
SortOrderDTO.EMPTY_SORT);

req =
new TableCreateRequest(
tableId.name(),
"comment",
columns,
Collections.emptyMap(),
SortOrderDTO.EMPTY_SORT,
DistributionDTO.NONE,
partitioningWithAssignments,
IndexDTO.EMPTY_INDEXES);
resp = new TableResponse(expectedTable);
buildMockResource(Method.POST, tablePath, req, resp, SC_OK);

table =
catalog
.asTableCatalog()
.createTable(
tableId,
fromDTOs(columns),
"comment",
Collections.emptyMap(),
partitioningWithAssignments);
assertTableEquals(fromDTO(expectedTable), table);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import static com.datastrato.gravitino.dto.rel.PartitionUtils.validateFieldExistence;

import com.datastrato.gravitino.dto.rel.ColumnDTO;
import com.datastrato.gravitino.dto.rel.partitions.ListPartitionDTO;
import com.datastrato.gravitino.rel.expressions.Expression;
import com.datastrato.gravitino.rel.expressions.NamedReference;
import java.util.Arrays;
Expand All @@ -17,26 +18,44 @@
public final class ListPartitioningDTO implements Partitioning {

/**
* Creates a new ListPartitioningDTO.
* Creates a new ListPartitioningDTO with no pre-assigned partitions.
*
* @param fieldNames The names of the fields to partition.
* @return The new ListPartitioningDTO.
*/
public static ListPartitioningDTO of(String[][] fieldNames) {
return new ListPartitioningDTO(fieldNames);
return of(fieldNames, new ListPartitionDTO[0]);
}

/**
* Creates a new ListPartitioningDTO.
*
* @param fieldNames The names of the fields to partition.
* @param assignments The pre-assigned list partitions.
* @return The new ListPartitioningDTO.
*/
public static ListPartitioningDTO of(String[][] fieldNames, ListPartitionDTO[] assignments) {
return new ListPartitioningDTO(fieldNames, assignments);
}

private final String[][] fieldNames;
private final ListPartitionDTO[] assignments;

private ListPartitioningDTO(String[][] fieldNames) {
private ListPartitioningDTO(String[][] fieldNames, ListPartitionDTO[] assignments) {
this.fieldNames = fieldNames;
this.assignments = assignments;
}

/** @return The names of the fields to partition. */
public String[][] fieldNames() {
return fieldNames;
}

@Override
public ListPartitionDTO[] assignments() {
return assignments;
}

/** @return The strategy of the partitioning. */
@Override
public Strategy strategy() {
Expand Down Expand Up @@ -65,34 +84,4 @@ public String name() {
public Expression[] arguments() {
return Arrays.stream(fieldNames).map(NamedReference::field).toArray(Expression[]::new);
}

/** The builder for ListPartitioningDTO. */
public static class Builder {
private String[][] fieldNames;

/**
* Set the field names for the builder.
*
* @param fieldNames The names of the fields to partition.
* @return The builder.
*/
public Builder withFieldNames(String[][] fieldNames) {
this.fieldNames = fieldNames;
return this;
}

/**
* Builds the ListPartitioningDTO.
*
* @return The ListPartitioningDTO.
*/
public ListPartitioningDTO build() {
return new ListPartitioningDTO(fieldNames);
}
}

/** @return the builder for creating a new instance of ListPartitioningDTO. */
public static Builder builder() {
return new Builder();
}
}
Loading

0 comments on commit bb0a318

Please sign in to comment.