Skip to content

Commit

Permalink
Merge pull request #7 from kortemik/refactor-1
Browse files Browse the repository at this point in the history
Refactor to byte[] version
  • Loading branch information
kortemik authored Oct 11, 2023
2 parents c086810 + 4a2db47 commit 524d517
Show file tree
Hide file tree
Showing 23 changed files with 166,993 additions and 409 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
.flattened-pom.xml
.idea/**
rlp_01.iml

src/test/**/internal/
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@
<exclude>README.md</exclude>
<!-- astyle -->
<exclude>astyle-options.ini</exclude>
<!-- perf-test-asset -->
<exclude>src/test/resources/base64.txt</exclude>
<exclude>src/test/resources/base64-8m.txt</exclude>
</excludes>
</configuration>
</plugin>
Expand Down
85 changes: 85 additions & 0 deletions src/main/java/com/teragrep/blf_01/ConcatenatedToken.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Teragrep Bloom Filter Library BLF-01
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
*
*
* Additional permission under GNU Affero General Public License version 3
* section 7
*
* If you modify this Program, or any covered work, by linking or combining it
* with other code, such other code is not for that reason alone subject to any
* of the requirements of the GNU Affero GPL version 3 as long as this Program
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
* modifications.
*
* Supplemented terms under GNU Affero General Public License version 3
* section 7
*
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
* versions must be marked as "Modified version of" The Program.
*
* Names of the licensors and authors may not be used for publicity purposes.
*
* No rights are granted for use of trade names, trademarks, or service marks
* which are in The Program if any.
*
* Licensee must indemnify licensors and authors for any liability that these
* contractual assumptions impose on licensors and authors.
*
* To the extent this program is licensed as part of the Commercial versions of
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/

package com.teragrep.blf_01;

import java.nio.ByteBuffer;
import java.util.ArrayList;

public class ConcatenatedToken {

private ByteBuffer concatenatedBuffer;

public ConcatenatedToken() {
this.concatenatedBuffer = ByteBuffer.allocateDirect(256*1024);
}

byte[] concatenate(ArrayList<Token> tokens) {
concatenatedBuffer.clear();
for (Token token : tokens) {
if (concatenatedBuffer.position() + token.bytes.length >= concatenatedBuffer.capacity()) {
int size = 0;
for (Token tokenForSize : tokens) {
size = size + tokenForSize.bytes.length;
}
concatenatedBuffer = extendBuffer(concatenatedBuffer, size);
}
concatenatedBuffer.put(token.bytes);
}

concatenatedBuffer.flip();
byte[] rv = new byte[concatenatedBuffer.remaining()];
concatenatedBuffer.get(rv);
return rv;
}

private ByteBuffer extendBuffer(ByteBuffer byteBuffer, int size) {
ByteBuffer newBuffer = ByteBuffer.allocateDirect(byteBuffer.capacity() + size);
byteBuffer.flip();
newBuffer.put(byteBuffer);
return newBuffer;
}
}
90 changes: 90 additions & 0 deletions src/main/java/com/teragrep/blf_01/Delimiter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Teragrep Bloom Filter Library BLF-01
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
*
*
* Additional permission under GNU Affero General Public License version 3
* section 7
*
* If you modify this Program, or any covered work, by linking or combining it
* with other code, such other code is not for that reason alone subject to any
* of the requirements of the GNU Affero GPL version 3 as long as this Program
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
* modifications.
*
* Supplemented terms under GNU Affero General Public License version 3
* section 7
*
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
* versions must be marked as "Modified version of" The Program.
*
* Names of the licensors and authors may not be used for publicity purposes.
*
* No rights are granted for use of trade names, trademarks, or service marks
* which are in The Program if any.
*
* Licensee must indemnify licensors and authors for any liability that these
* contractual assumptions impose on licensors and authors.
*
* To the extent this program is licensed as part of the Commercial versions of
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/

package com.teragrep.blf_01;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Objects;

public class Delimiter {
public final boolean isStub;

public final ByteBuffer delimiterBuffer;

Delimiter() {
this.isStub = true;
this.delimiterBuffer = ByteBuffer.allocate(0);
}

Delimiter(String delimiter) {
this.isStub = false;
byte[] bytes = delimiter.getBytes(StandardCharsets.UTF_8);
this.delimiterBuffer = ByteBuffer.allocateDirect(bytes.length);
this.delimiterBuffer.put(bytes);
this.delimiterBuffer.flip();
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Delimiter delimiter = (Delimiter) o;
return Objects.equals(delimiterBuffer, delimiter.delimiterBuffer);
}

@Override
public int hashCode() {
return Objects.hash(delimiterBuffer);
}

public HashMap<ByteBuffer, Delimiter> asMap() {
HashMap<ByteBuffer, Delimiter> map = new HashMap<>();
map.put(delimiterBuffer, this);
return map;
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package com.teragrep.blf_01;

/*
* Teragrep Bloom Filter Library BLF-01
* Copyright (C) 2019, 2020, 2021, 2022 Suomen Kanuuna Oy
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
Expand Down Expand Up @@ -46,15 +44,11 @@
* a licensee so wish it.
*/

import org.junit.jupiter.api.Test;

import java.io.IOException;
package com.teragrep.blf_01;

public class FilterTest {
import java.nio.ByteBuffer;
import java.util.HashMap;

@Test
public void testFilter() throws IOException {
FilterManager filterManager = new FilterManager();
filterManager.select();
}
public interface Delimiters {
HashMap<ByteBuffer, Delimiter> getDelimiters();
}
119 changes: 119 additions & 0 deletions src/main/java/com/teragrep/blf_01/Entanglement.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Teragrep Bloom Filter Library BLF-01
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
*
*
* Additional permission under GNU Affero General Public License version 3
* section 7
*
* If you modify this Program, or any covered work, by linking or combining it
* with other code, such other code is not for that reason alone subject to any
* of the requirements of the GNU Affero GPL version 3 as long as this Program
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
* modifications.
*
* Supplemented terms under GNU Affero General Public License version 3
* section 7
*
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
* versions must be marked as "Modified version of" The Program.
*
* Names of the licensors and authors may not be used for publicity purposes.
*
* No rights are granted for use of trade names, trademarks, or service marks
* which are in The Program if any.
*
* Licensee must indemnify licensors and authors for any liability that these
* contractual assumptions impose on licensors and authors.
*
* To the extent this program is licensed as part of the Commercial versions of
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/

package com.teragrep.blf_01;

import java.util.ArrayList;
import java.util.ListIterator;

public class Entanglement {

private final ArrayList<Token> endWindowScanTokens;
private final ArrayList<Token> allTokens;
private final ArrayList<Token> forwardScanTokens;
private final ArrayList<Token> backwardsScanTokens;
private final ConcatenatedToken concatenatedToken;
public Entanglement() {
this.endWindowScanTokens = new ArrayList<>(512);
this.allTokens = new ArrayList<>(512);
this.forwardScanTokens = new ArrayList<>(512);
this.backwardsScanTokens = new ArrayList<>(512);
this.concatenatedToken = new ConcatenatedToken();
}

public ArrayList<Token> entangle(ArrayList<Token> tokens) {
//System.out.println("entangling> " + tokens + " results into " + rv);
return startWindowScan(tokens);

}


/**
* Iterates Token list in forward order,
* starting from the largest subList and going to smaller ones
* and processes reverse order ones within
*/
private ArrayList<Token> startWindowScan(ArrayList<Token> tokenList) {
allTokens.clear();
for (int i = 0; i < tokenList.size(); i++) {
ListIterator<Token> forwardIterator = tokenList.listIterator(i);
// +++++ task
forwardScanTokens.clear();
while (forwardIterator.hasNext()) {
forwardScanTokens.add(forwardIterator.next());
}
// +++++ subtask endWindowScan
allTokens.addAll(endWindowScan(forwardScanTokens));
// -----
Token concatenated = new Token(concatenatedToken.concatenate(forwardScanTokens));
allTokens.add(concatenated);
// -----
}

return allTokens;
}

/**
* Iterates Token list in reverse order,
* starting from the largest subList and going to smaller ones
*/
private ArrayList<Token> endWindowScan(ArrayList<Token> tokenList) {
endWindowScanTokens.clear();

for (int i = tokenList.size() - 1; i > 0; i--) {
ListIterator<Token> backwardIterator = tokenList.listIterator(i);
// +++++ task
backwardsScanTokens.clear();
while (backwardIterator.hasPrevious()) {
backwardsScanTokens.add(0, backwardIterator.previous());
}
endWindowScanTokens.add(new Token(concatenatedToken.concatenate(backwardsScanTokens)));
// ----- task
}

return endWindowScanTokens;
}
}
Loading

0 comments on commit 524d517

Please sign in to comment.