Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadatalog interface and default in memory implementation #974

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.sql.executor.streaming;

import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.lang3.tuple.Pair;

/**
* In memory implementation of {@link MetadataLog}. Todo. Current implementation does not guarantee
* thread safe. We will re-evaluate it when adding pipeline execution.
*
* @param <T> type of metadata type.
*/
public class DefaultMetadataLog<T> implements MetadataLog<T> {

private static final long MIN_ACCEPTABLE_ID = 0L;

private SortedMap<Long, T> metadataMap = new TreeMap<>();

@Override
public boolean add(Long batchId, T metadata) {
Preconditions.checkArgument(batchId >= MIN_ACCEPTABLE_ID, "batch id must large or equal 0");

if (metadataMap.containsKey(batchId)) {
return false;
}
metadataMap.put(batchId, metadata);
return true;
}

@Override
public Optional<T> get(Long batchId) {
if (!metadataMap.containsKey(batchId)) {
return Optional.empty();
} else {
return Optional.of(metadataMap.get(batchId));
}
}

@Override
public List<T> get(Optional<Long> startBatchId, Optional<Long> endBatchId) {
if (startBatchId.isEmpty() && endBatchId.isEmpty()) {
return new ArrayList<>(metadataMap.values());
} else {
Long s = startBatchId.orElse(MIN_ACCEPTABLE_ID);
Long e = endBatchId.map(i -> i + 1).orElse(Long.MAX_VALUE);
return new ArrayList<>(metadataMap.subMap(s, e).values());
}
}

@Override
public Optional<Pair<Long, T>> getLatest() {
if (metadataMap.isEmpty()) {
return Optional.empty();
} else {
Long latestId = metadataMap.lastKey();
return Optional.of(Pair.of(latestId, metadataMap.get(latestId)));
}
}

@Override
public void purge(Long batchId) {
for (long i = MIN_ACCEPTABLE_ID; i < batchId; i++) {
metadataMap.remove(i);
}
penghuo marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.sql.executor.streaming;

import java.util.List;
import java.util.Optional;
import org.apache.commons.lang3.tuple.Pair;

/**
* Write-ahead Log (WAL). Which allow client write metadata associate with id.
*
* @param <T> type of metadata type.
*/
public interface MetadataLog<T> {

/**
* add metadata to WAL.
*
* @param id metadata index in WAL.
* @param metadata metadata.
* @return true if add success, otherwise return false.
*/
boolean add(Long id, T metadata);

/**
* get metadata from WAL.
*
* @param id metadata index in WAL.
* @return metadata.
*/
Optional<T> get(Long id);

/**
* Return metadata for id between [startId, endId].
*
* @param startId If startId is empty, return all metadata before endId (inclusive).
* @param endId If end is empty, return all batches after endId (inclusive).
* @return a list of metadata sorted by id (nature order).
*/
List<T> get(Optional<Long> startId, Optional<Long> endId);

/**
* Get latest batchId and metadata.
*
* @return pair of id and metadata if not empty.
*/
Optional<Pair<Long, T>> getLatest();

/**
* Remove all the metadata less then id (exclusive).
*
* @param id smallest batchId should keep.
*/
void purge(Long id);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.sql.executor.streaming;


import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Arrays;
import java.util.Optional;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;

@ExtendWith(MockitoExtension.class)
class DefaultMetadataLogTest {

private DefaultMetadataLog<Long> metadataLog;

@BeforeEach
void setup() {
metadataLog = new DefaultMetadataLog<>();
}

@Test
void addMetadataShouldSuccess() {
assertTrue(metadataLog.add(0L, 0L));
assertTrue(metadataLog.add(1L, 1L));
}

@Test
void addMetadataWithSameBatchIdShouldFail() {
assertTrue(metadataLog.add(0L, 0L));
assertFalse(metadataLog.add(0L, 1L));
}

@Test
void addMetadataWithInvalidIdShouldThrowException() {
IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> metadataLog.add(-1L, 0L));
assertEquals("batch id must large or equal 0", exception.getMessage());
}

@Test
void getWithIdReturnMetadata() {
metadataLog.add(0L, 0L);

assertTrue(metadataLog.get(0L).isPresent());
assertEquals(0L, metadataLog.get(0L).get());
}

@Test
void getWithNotExistShouldReturnEmtpy() {
metadataLog.add(0L, 0L);

assertTrue(metadataLog.get(1L).isEmpty());
assertTrue(metadataLog.get(-1L).isEmpty());
}

@Test
void getWithIdInRangeShouldReturnMetadataList() {
metadataLog.add(0L, 0L);
metadataLog.add(1L, 1L);
metadataLog.add(2L, 2L);

assertEquals(Arrays.asList(0L, 1L, 2L), metadataLog.get(Optional.of(0L), Optional.of(2L)));
assertEquals(Arrays.asList(0L, 1L, 2L), metadataLog.get(Optional.of(0L), Optional.of(4L)));
assertEquals(Arrays.asList(0L, 1L, 2L), metadataLog.get(Optional.of(-1L), Optional.of(4L)));
assertEquals(Arrays.asList(0L, 1L), metadataLog.get(Optional.of(0L), Optional.of(1L)));
assertEquals(Arrays.asList(1L, 2L), metadataLog.get(Optional.of(1L), Optional.empty()));
assertEquals(Arrays.asList(0L, 1L), metadataLog.get(Optional.empty(), Optional.of(1L)));
assertEquals(Arrays.asList(0L, 1L, 2L), metadataLog.get(Optional.empty(), Optional.empty()));
}

@Test
void getWithIdOutOfRangeShouldReturnEmpty() {
metadataLog.add(0L, 0L);
metadataLog.add(1L, 1L);
metadataLog.add(2L, 2L);

assertTrue(metadataLog.get(Optional.of(3L), Optional.of(5L)).isEmpty());
}

@Test
void getLatestShouldReturnMetadata() {
metadataLog.add(0L, 10L);
metadataLog.add(1L, 11L);

Optional<Pair<Long, Long>> latest = metadataLog.getLatest();
assertTrue(latest.isPresent());
assertEquals(1L, latest.get().getLeft());
assertEquals(11L, latest.get().getRight());
}

@Test
void getLatestFromEmptyWALShouldReturnEmpty() {
Optional<Pair<Long, Long>> latest = metadataLog.getLatest();
assertTrue(latest.isEmpty());
}

@Test
void purgeLatestShouldOnlyKeepLatest() {
metadataLog.add(0L, 10L);
metadataLog.add(1L, 11L);
metadataLog.add(2L, 12L);

Optional<Pair<Long, Long>> latest = metadataLog.getLatest();
assertTrue(latest.isPresent());
metadataLog.purge(latest.get().getLeft());

latest = metadataLog.getLatest();
assertTrue(latest.isPresent());
assertEquals(2L, latest.get().getLeft());
assertEquals(12L, latest.get().getRight());
}
}