Skip to content

Commit

Permalink
Remove countMissing from Storage and replace with a new `CountNothi…
Browse files Browse the repository at this point in the history
…ng` operation. (#9137)

Removing another small piece of logic from the storages to it's own operation.
  • Loading branch information
jdunkerley authored Feb 22, 2024
1 parent 5e4a7cf commit 0e2a91c
Show file tree
Hide file tree
Showing 15 changed files with 101 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ from project.Internal.Column_Format import all
from project.Internal.Java_Exports import make_date_builder_adapter, make_string_builder

polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.table.data.column.operation.CountNothing
polyglot java import org.enso.table.data.column.operation.UnaryOperation
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
polyglot java import org.enso.table.data.column.operation.unary.DatePartOperation
Expand Down Expand Up @@ -2085,7 +2086,7 @@ type Column

example_count_nothing = Examples.text_column_2.count_nothing
count_nothing : Integer
count_nothing self = self.java_column.getStorage.countMissing
count_nothing self = CountNothing.apply self.java_column

## GROUP Standard.Base.Metadata
Returns the number of non-null items in this column.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.enso.table.data.column.operation;

import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.table.Column;
import org.graalvm.polyglot.Context;

/** An operation for counting the number of Nothing values in a Column. */
public class CountNothing {
/** Counts the number of Nothing values in the given column. */
public static long apply(Column column) {
ColumnStorage storage = column.getStorage();
return applyToStorage(storage);
}

/** Counts the number of Nothing values in the given storage. */
public static long applyToStorage(ColumnStorage storage) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return withNothingMap.getIsNothingMap().cardinality();
}

Context context = Context.getCurrent();
long count = 0;
for (long i = 0; i < storage.getSize(); i++) {
if (storage.isNothing(i)) {
count += 1;
}
context.safepoint();
}
return count;
}

/** Returns true if any value in the storage is Nothing. */
public static boolean anyNothing(ColumnStorage storage) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return !withNothingMap.getIsNothingMap().isEmpty();
}

Context context = Context.getCurrent();
for (long i = 0; i < storage.getSize(); i++) {
if (storage.isNothing(i)) {
return true;
}
context.safepoint();
}
return false;
}

/** Returns true if all values in the storage are Nothing. */
public static boolean allNothing(ColumnStorage storage) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return withNothingMap.getIsNothingMap().nextClearBit(0) >= storage.getSize();
}

Context context = Context.getCurrent();
for (long i = 0; i < storage.getSize(); i++) {
if (!storage.isNothing(i)) {
return false;
}
context.safepoint();
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ public int size() {
return size;
}

/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
}

@Override
public StorageType getType() {
return BooleanType.INSTANCE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@ public int size() {
return underlyingStorage.size();
}

@Override
public int countMissing() {
return underlyingStorage.countMissing();
}

@Override
public StorageType getType() {
return AnyObjectType.INSTANCE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.util.AbstractList;
import java.util.BitSet;
import java.util.List;
import org.enso.table.data.column.operation.CountNothing;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.storage.type.StorageType;
Expand Down Expand Up @@ -42,23 +43,6 @@ public int size() {
return size;
}

/**
* @inheritDoc
*/
@Override
public int countMissing() {
Context context = Context.getCurrent();
int count = 0;
for (int i = 0; i < size; i++) {
if (data[i] == null) {
count += 1;
}

context.safepoint();
}
return count;
}

/**
* @param idx an index
* @return the data item contained at the given index.
Expand Down Expand Up @@ -161,7 +145,7 @@ public Storage<?> appendNulls(int count) {

@Override
public Storage<T> fillMissingFromPrevious(BoolStorage missingIndicator) {
if (missingIndicator != null && missingIndicator.countMissing() > 0) {
if (missingIndicator != null && CountNothing.anyNothing(missingIndicator)) {
throw new IllegalArgumentException(
"Missing indicator must not contain missing values itself.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ public abstract class Storage<T> implements ColumnStorage {
*/
public abstract int size();

/**
* @return the number of NA elements in this column
*/
public abstract int countMissing();

/**
* @return the type tag of this column's storage.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,13 @@
import org.enso.table.data.column.operation.map.numeric.comparisons.LessComparison;
import org.enso.table.data.column.operation.map.numeric.comparisons.LessOrEqualComparison;
import org.enso.table.data.column.operation.map.numeric.isin.LongIsInOp;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.ValueIsNothingException;
import org.enso.table.data.column.storage.*;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
import org.graalvm.polyglot.Context;

public abstract class AbstractLongStorage extends NumericStorage<Long>
implements ColumnLongStorage {
implements ColumnLongStorage, ColumnStorageWithNothingMap {
public abstract long getItem(int idx);

public abstract BitSet getIsMissing();
Expand Down Expand Up @@ -173,4 +170,9 @@ public long get(long index) throws ValueIsNothingException {
}
return getItem((int) index);
}

@Override
public BitSet getIsNothingMap() {
return getIsMissing();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ public int size() {
return size;
}

@Override
public int countMissing() {
return 0;
}

@Override
public IntegerType getType() {
return IntegerType.INT_64;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
protected final int size;

protected BitSet isMissing = null;

protected abstract Long computeItem(int idx);

protected ComputedNullableLongStorage(int size) {
Expand All @@ -28,11 +30,6 @@ public int size() {
return size;
}

@Override
public int countMissing() {
return 0;
}

@Override
public IntegerType getType() {
return IntegerType.INT_64;
Expand Down Expand Up @@ -64,16 +61,20 @@ public long getItem(int idx) {

@Override
public BitSet getIsMissing() {
BitSet missing = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
if (computeItem(i) == null) {
missing.set(i);
}
if (isMissing == null) {
// Only compute once as needed.
BitSet missing = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
if (computeItem(i) == null) {
missing.set(i);
}

context.safepoint();
context.safepoint();
}
isMissing = missing;
}
return missing;
return isMissing;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,6 @@ public int size() {
return size;
}

/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
}

/**
* @param idx an index
* @return the data item contained at the given index.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.BigIntegerBuilder;
import org.enso.table.data.column.builder.NumericBuilder;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
Expand All @@ -18,7 +17,7 @@
import org.graalvm.polyglot.Value;

/** A column storing 64-bit integers. */
public final class LongStorage extends AbstractLongStorage implements ColumnStorageWithNothingMap {
public final class LongStorage extends AbstractLongStorage {
// TODO [RW] at some point we will want to add separate storage classes for byte, short and int,
// for more compact storage and more efficient handling of smaller integers; for now we will be
// handling this just by checking the bounds
Expand Down Expand Up @@ -64,14 +63,6 @@ public int size() {
return size;
}

/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
}

/**
* @param idx an index
* @return the data item contained at the given index.
Expand Down Expand Up @@ -260,9 +251,4 @@ public LongStorage widen(IntegerType widerType) {
assert widerType.fits(type);
return new LongStorage(data, size, isMissing, widerType);
}

@Override
public BitSet getIsNothingMap() {
return isMissing;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.enso.base.text.TextFoldingStrategy;
import org.enso.table.aggregations.Aggregator;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.CountNothing;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
Expand Down Expand Up @@ -166,7 +167,7 @@ public int size() {
*/
public KeyType findAnyNullKey() {
for (Column c : keyColumns) {
boolean containsNulls = c.getStorage().countMissing() > 0;
boolean containsNulls = CountNothing.anyNothing(c.getStorage());
if (containsNulls) {
for (KeyType key : locs.keySet()) {
if (key.hasAnyNulls()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.enso.table.parsing;

import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.CountNothing;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.parsing.problems.CommonParseProblemAggregator;
import org.enso.table.parsing.problems.ParseProblemAggregator;
Expand Down Expand Up @@ -43,8 +44,7 @@ public Storage<?> parseColumn(
Storage<String> sourceStorage, CommonParseProblemAggregator problemAggregator) {
// If there are no values, the Auto parser would guess some random type (the first one that is
// checked). Instead, we just return the empty column unchanged.
boolean hasNoValues =
(sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size());
boolean hasNoValues = (sourceStorage.size() == 0) || CountNothing.allNothing(sourceStorage);
if (hasNoValues) {
return fallbackParser.parseColumn(sourceStorage, problemAggregator);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ public int size() {
return array.length;
}

@Override
public int countMissing() {
return 0;
}

@Override
public StorageType getType() {
return IntegerType.INT_64;
Expand Down
7 changes: 7 additions & 0 deletions test/Table_Tests/src/Formatting/Parse_Values_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,13 @@ add_specs suite_builder =
c1 = Column.from_vector "A" ["1", "2", "3"]
c1.parse type=Nothing . should_fail_with Illegal_Argument

group_builder.specify "should return unchanged if all are Nothing or no rows" <|
c1 = Column.from_vector "A" [Nothing, Nothing, Nothing] Value_Type.Char
c1.parse.value_type . should_equal Value_Type.Char

c2 = Column.from_vector "A" [] Value_Type.Char
c2.parse.value_type . should_equal Value_Type.Char

group_builder.specify "should error if the input column is not text" <|
c1 = Column.from_vector "A" [1, 2, 3]
r1 = c1.parse
Expand Down

0 comments on commit 0e2a91c

Please sign in to comment.