Skip to content

Commit

Permalink
BloomFilter equals tests and refactoring (#448)
Browse files Browse the repository at this point in the history
* Add equals tests to BloomFilter classes, rename ModeFromBloomContext, refactor ToBloomFilter

* Fix equals tests in BloomFilter classes

* Rename ToBloomFilter to BloomFilterBlob
  • Loading branch information
51-code authored Dec 9, 2024
1 parent a3097d9 commit 08872b8
Show file tree
Hide file tree
Showing 15 changed files with 343 additions and 206 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@
<version>2.0.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>nl.jqno.equalsverifier</groupId>
<artifactId>equalsverifier</artifactId>
<version>3.17.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.teragrep</groupId>
<artifactId>pth_06</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
import com.teragrep.pth10.ast.bo.Token;
import com.teragrep.pth10.ast.commands.logicalstatement.LogicalStatementCatalyst;
import com.teragrep.pth10.ast.commands.logicalstatement.LogicalStatementXML;
import com.teragrep.pth10.ast.commands.transformstatement.teragrep.ModeFromBloomContext;
import com.teragrep.pth10.ast.commands.transformstatement.teragrep.ContextBloomMode;
import com.teragrep.pth10.ast.commands.transformstatement.teragrep.EstimateColumnFromBloomContext;
import com.teragrep.pth10.ast.commands.transformstatement.teragrep.InputColumnFromBloomContext;
import com.teragrep.pth10.ast.commands.transformstatement.teragrep.OutputColumnFromBloomContext;
Expand Down Expand Up @@ -480,7 +480,7 @@ public Node visitT_bloomModeParameter(final DPLParser.T_bloomModeParameterContex
@Override
public Node visitT_bloomOptionParameter(final DPLParser.T_bloomOptionParameterContext ctx) {
// values from context
final ContextValue<TeragrepBloomStep.BloomMode> mode = new ModeFromBloomContext(ctx);
final ContextValue<TeragrepBloomStep.BloomMode> mode = new ContextBloomMode(ctx);
final ContextValue<String> inputCol = new InputColumnFromBloomContext(ctx);
final ContextValue<String> outputCol = new OutputColumnFromBloomContext(ctx, inputCol.value());
final ContextValue<String> estimateCol = new EstimateColumnFromBloomContext(ctx, inputCol.value());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@
import com.teragrep.pth10.steps.teragrep.TeragrepBloomStep;
import com.teragrep.pth_03.antlr.DPLParser;

public final class ModeFromBloomContext implements ContextValue<TeragrepBloomStep.BloomMode> {
public final class ContextBloomMode implements ContextValue<TeragrepBloomStep.BloomMode> {

private final DPLParser.T_bloomOptionParameterContext ctx;

public ModeFromBloomContext(final DPLParser.T_bloomOptionParameterContext ctx) {
public ContextBloomMode(final DPLParser.T_bloomOptionParameterContext ctx) {
this.ctx = ctx;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Teragrep Data Processing Language (DPL) translator for Apache Spark (pth_10)
* Copyright (C) 2019-2024 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
* Additional permission under GNU Affero General Public License version 3
* section 7
*
* If you modify this Program, or any covered work, by linking or combining it
* with other code, such other code is not for that reason alone subject to any
* of the requirements of the GNU Affero GPL version 3 as long as this Program
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
* modifications.
*
* Supplemented terms under GNU Affero General Public License version 3
* section 7
*
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
* versions must be marked as "Modified version of" The Program.
*
* Names of the licensors and authors may not be used for publicity purposes.
*
* No rights are granted for use of trade names, trademarks, or service marks
* which are in The Program if any.
*
* Licensee must indemnify licensors and authors for any liability that these
* contractual assumptions impose on licensors and authors.
*
* To the extent this program is licensed as part of the Commercial versions of
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
package com.teragrep.pth10.steps.teragrep.bloomfilter;

import org.apache.spark.util.sketch.BloomFilter;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;

/**
* BloomFilter from byte[] used in a constructor of TeragrepBloomFilter
*
* @see TeragrepBloomFilter
*/
public final class BloomFilterBlob {

private final byte[] bytes;

public BloomFilterBlob(final byte[] bytes) {
this.bytes = bytes;
}

public BloomFilter toBloomFilter() {
final BloomFilter filter;
try (ByteArrayInputStream bais = new ByteArrayInputStream(bytes)) {
filter = BloomFilter.readFrom(bais);
}
catch (IOException e) {
throw new RuntimeException("Error reading bytes to filter: " + e.getMessage());
}
return filter;
}

@Override
public boolean equals(final Object object) {
if (this == object)
return true;
if (object == null)
return false;
if (object.getClass() != this.getClass())
return false;
final BloomFilterBlob cast = (BloomFilterBlob) object;
return Arrays.equals(this.bytes, cast.bytes);
}

@Override
public int hashCode() {
return Arrays.hashCode(bytes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

import java.sql.Connection;
import java.util.Iterator;
import java.util.Objects;

public final class BloomFilterForeachPartitionFunction implements ForeachPartitionFunction<Row> {

Expand Down Expand Up @@ -101,4 +102,18 @@ public void call(final Iterator<Row> iter) throws Exception {
conn.commit();
}
}

@Override
public boolean equals(final Object o) {
if (o == null || getClass() != o.getClass())
return false;
final BloomFilterForeachPartitionFunction cast = (BloomFilterForeachPartitionFunction) o;
return filterTypes.equals(cast.filterTypes) && lazyConnection.equals(cast.lazyConnection)
&& overwrite == cast.overwrite && tableName.equals(cast.tableName) && regex.equals(cast.regex);
}

@Override
public int hashCode() {
return Objects.hash(filterTypes, lazyConnection, overwrite, tableName, regex);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public TeragrepBloomFilter(
String tableName,
String regex
) {
this(partition, new ToBloomFilter(bytes), connection, filterTypes, tableName, regex);
this(partition, new BloomFilterBlob(bytes).toBloomFilter(), connection, filterTypes, tableName, regex);
}

public TeragrepBloomFilter(
Expand Down

This file was deleted.

Loading

0 comments on commit 08872b8

Please sign in to comment.