Skip to content

Commit

Permalink
Do not record sortedness in the StreamDescriptor when sorting on a no…
Browse files Browse the repository at this point in the history
…n-ts index
  • Loading branch information
poodlewars committed Jan 20, 2025
1 parent cfdc854 commit 268a601
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
4 changes: 3 additions & 1 deletion cpp/arcticdb/stream/incompletes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,9 @@ void do_sort(SegmentInMemory& mutable_seg, const std::vector<std::string> sort_c
auto tsd = pack_timeseries_descriptor(frame->desc, frame->num_rows, std::nullopt, std::move(norm_meta));
segment.set_timeseries_descriptor(tsd);

bool is_sorted = options.sort_on_index || (options.sort_columns && options.sort_columns->at(0) == segment.descriptor().field(0).name());
bool is_timestamp_index = std::holds_alternative<stream::TimeseriesIndex>(frame->index);
bool is_sorted = is_timestamp_index && (options.sort_on_index || (
is_timestamp_index && options.sort_columns && options.sort_columns->at(0) == segment.descriptor().field(0).name()));

// Have to give each its own string pool for thread safety
bool filter_down_stringpool{true};
Expand Down
1 change: 1 addition & 0 deletions cpp/arcticdb/stream/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void register_types(py::module &m) {
.def("fields", [](const StreamDescriptor& desc){
return field_collection_to_ref_vector(desc.fields());
})
.def("sorted", &StreamDescriptor::sorted)
);

py::class_<TimeseriesDescriptor>(m, "TimeseriesDescriptor")
Expand Down
27 changes: 27 additions & 0 deletions python/tests/unit/arcticdb/version_store/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import string

from arcticdb_ext.storage import KeyType
from arcticdb_ext.version_store import SortedValue


def test_stage_finalize(arctic_library):
Expand Down Expand Up @@ -194,6 +195,7 @@ def test_stage_with_sort_index_chunking(lmdb_version_store_tiny_segment):
df = lib_tool.read_to_dataframe(k)
assert df.shape == (2, 3) # we should apply row slicing but not column slicing
assert df.index.is_monotonic_increasing
assert lib_tool.read_descriptor(k).sorted() == SortedValue.ASCENDING

ref_keys = lib_tool.find_keys_for_symbol(KeyType.APPEND_REF, symbol)
assert not ref_keys
Expand All @@ -204,6 +206,31 @@ def test_stage_with_sort_index_chunking(lmdb_version_store_tiny_segment):
assert_frame_equal(df1, actual)


def test_stage_with_sort_columns_not_ts(lmdb_version_store_v1):
symbol = "AAPL"
lib = lmdb_version_store_v1

df1 = pd.DataFrame({
"idx": np.arange(1, 51),
"col1": np.arange(1, 51),
"col2": [f"a{i:02d}" for i in range(1, 51)],
"col3": np.arange(1, 51)
}).set_index("idx")
df1_shuffled = df1.sample(frac=1)

lib.stage(symbol, df1_shuffled, validate_index=False, sort_on_index=False, sort_columns=["idx"])

lib_tool = lib.library_tool()
data_keys = lib_tool.find_keys_for_symbol(KeyType.APPEND_DATA, symbol)
assert len(data_keys) == 1
data_key = data_keys[0]
assert lib_tool.read_descriptor(data_key).sorted() == SortedValue.UNKNOWN

lib.compact_incomplete(symbol, append=False, convert_int_to_float=False)
actual = lib.read(symbol).data
assert_frame_equal(df1, actual)


def test_stage_finalize_dynamic_with_chunking(arctic_client, lib_name):
lib_opts = adb.LibraryOptions(dynamic_schema=True, rows_per_segment=2, columns_per_segment=2)
lib = arctic_client.get_library(lib_name, create_if_missing=True, library_options=lib_opts)
Expand Down

0 comments on commit 268a601

Please sign in to comment.