Skip to content

Commit

Permalink
DynamoDB: deterministic order on query and filter on gsi (#8376)
Browse files Browse the repository at this point in the history
  • Loading branch information
Polandia94 authored Feb 9, 2025
1 parent e5e392c commit 4e99750
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 28 deletions.
65 changes: 37 additions & 28 deletions moto/dynamodb/models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,18 +746,9 @@ def query(

# SORT
if index_name:
if len(range_attrs) == 2:
# Convert to float if necessary to ensure proper ordering
def conv(x: DynamoType) -> Any:
return float(x.value) if x.type == "N" else x.value

possible_results.sort(
key=lambda item: ( # type: ignore
conv(item.attrs[range_attrs[0]]) # type: ignore
if item.attrs.get(range_attrs[0]) # type: ignore
else None
)
)
possible_results = self.sorted_items(
hash_attrs, range_attrs, possible_results
)
else:
possible_results.sort(key=lambda item: item.range_key) # type: ignore

Expand Down Expand Up @@ -883,10 +874,7 @@ def has_idx_items(self, index_name: str) -> List[Item]:
else:
if idx_col_set.issubset(set(hash_set.attrs)): # type: ignore
items.append(hash_set) # type: ignore
return sorted(
items,
key=lambda x: (x.hash_key, x.range_key) if x.range_key else x.hash_key,
)
return items

def scan(
self,
Expand Down Expand Up @@ -934,6 +922,7 @@ def scan(
range_attrs = [self.range_key_attr]

items = self.has_idx_items(index_name)
items = self.sorted_items(hash_attrs, range_attrs, items)
else:
hash_attrs = [self.hash_key_attr]
range_attrs = [self.range_key_attr]
Expand Down Expand Up @@ -1030,6 +1019,35 @@ def _item_comes_before_dct(
If that is the case, we compare by the RK of the main table instead
Related: https://github.com/getmoto/moto/issues/7761
"""
attrs_to_sort_by = self._generate_attr_to_sort_by(
hash_key_attrs, range_key_attrs
)
for attr in attrs_to_sort_by:
if attr in item.attrs and item.attrs[attr] != DynamoType(dct.get(attr)): # type: ignore
return (
(item.attrs[attr] < DynamoType(dct.get(attr))) # type: ignore
== scan_index_forward
)
# Keys were equal, items are identical
return True

def sorted_items(
self,
hash_key_attrs: List[str],
range_key_attrs: List[Optional[str]],
items: List[Item],
) -> List[Item]:
attrs_to_sort_by = self._generate_attr_to_sort_by(
hash_key_attrs, range_key_attrs
)
items.sort(
key=lambda x: tuple([x.attrs[key] for key in attrs_to_sort_by]),
)
return items

def _generate_attr_to_sort_by(
self, hash_key_attrs: List[str], range_key_attrs: List[Optional[str]]
) -> List[str]:
gsi_hash_key = hash_key_attrs[0] if len(hash_key_attrs) == 2 else None
table_hash_key = str(
hash_key_attrs[0] if gsi_hash_key is None else hash_key_attrs[1]
Expand All @@ -1045,18 +1063,9 @@ def _item_comes_before_dct(
table_hash_key,
table_range_key,
]
for attr in attrs_to_sort_by:
if (
attr is not None
and attr in item.attrs
and item.attrs[attr] != DynamoType(dct.get(attr)) # type: ignore
):
return (
(item.attrs[attr] < DynamoType(dct.get(attr))) # type: ignore
== scan_index_forward
)
# Keys were equal, items are identical
return True
return [
attr for attr in attrs_to_sort_by if attr is not None and attr != "None"
]

def _get_last_evaluated_key(
self, last_result: Item, index_name: Optional[str]
Expand Down
45 changes: 45 additions & 0 deletions tests/test_dynamodb/test_dynamodb_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,51 @@ def test_scan_gsi_pagination_with_string_gsi_range_no_sk(table_name=None):
assert subjects == set(range(10))


@pytest.mark.aws_verified
@dynamodb_aws_verified(add_range=False, add_gsi_range=True)
def test_scan_gsi_order_range_key(table_name=None):
dynamodb = boto3.resource("dynamodb", region_name="us-east-1")
table = dynamodb.Table(table_name)

table.put_item(Item={"pk": "1", "gsi_pk": "john", "gsi_sk": "4"})
table.put_item(Item={"pk": "2", "gsi_pk": "john", "gsi_sk": "1"})
table.put_item(Item={"pk": "3", "gsi_pk": "john", "gsi_sk": "2"})
table.put_item(Item={"pk": "4", "gsi_pk": "john", "gsi_sk": "3"})

for i in range(1, 3):
table.put_item(Item={"pk": f"{i}", "gsi_pk": "john", "gsi_sk": f"{i}"})

for i in range(3, 5):
table.put_item(Item={"pk": f"{i}", "gsi_pk": "john", "gsi_sk": f"{7-i}"})

page = table.scan(IndexName="test_gsi")
items = page["Items"]

# whit same PK on GSI, the items are ordered by range key of GSI
assert items[0]["gsi_sk"] == "1"
assert items[1]["gsi_sk"] == "2"
assert items[2]["gsi_sk"] == "3"
assert items[3]["gsi_sk"] == "4"


@pytest.mark.aws_verified
@dynamodb_aws_verified(add_range=False, add_gsi_range=True)
def test_scan_gsi_exlusive_start_key(table_name=None):
dynamodb = boto3.resource("dynamodb", region_name="us-east-1")
table = dynamodb.Table(table_name)

for i in range(1, 5):
table.put_item(Item={"pk": f"{i}", "gsi_pk": "john", "gsi_sk": f"{5-i}"})

page = table.scan(IndexName="test_gsi", Limit=3)
assert len(page["Items"]) == 3
page = table.scan(
IndexName="test_gsi", Limit=3, ExclusiveStartKey=page["LastEvaluatedKey"]
)
# the total are four, we are using the ExclusiveStartKey of third item, only one left
assert len(page["Items"]) == 1


@mock_aws
class TestFilterExpression:
def test_scan_filter(self):
Expand Down

0 comments on commit 4e99750

Please sign in to comment.