Skip to content

Commit

Permalink
test: Add insert and upsert related cases for null and default value …
Browse files Browse the repository at this point in the history
…support (#37088)

issue: #36129

---------

Signed-off-by: qixuan <[email protected]>
  • Loading branch information
qixuan0212 authored Oct 25, 2024
1 parent c285853 commit 80aa9ab
Showing 1 changed file with 154 additions and 17 deletions.
171 changes: 154 additions & 17 deletions tests/python_client/testcases/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,67 @@ def test_insert_multi_fields_using_none_data(self, enable_partition_key, default
collection_w.insert(data=data)
assert collection_w.num_entities == ct.default_nb

@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("nullable", [True, False])
def test_insert_multi_fields_by_rows_using_default(self, enable_partition_key, nullable):
"""
target: test insert multi fields by rows with default value
method: 1. create a collection with one field using default value
2. insert using default value to replace the field value
expected: insert successfully
"""
# 1. initialize with data
if enable_partition_key is True and nullable is True:
pytest.skip("partition key field not support nullable")
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14), nullable=nullable),
cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable),
cf.gen_json_field(), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)

collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()

# 2. insert data
array = cf.gen_default_rows_data()
for i in range(0, ct.default_nb, 2):
array[i][ct.default_string_field_name] = None
collection_w.insert(array)

exp = f"{ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0]
assert len(res) == ct.default_nb/2

@pytest.mark.tags(CaseLabel.L1)
def test_insert_multi_fields_by_rows_using_none(self):
"""
target: test insert multi fields by rows with none value
method: 1. create a collection with one field using none value
2. insert using none to replace the field value
expected: insert successfully
"""
# 1. initialize with data
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(nullable=True),
cf.gen_string_field(default_value="abc", nullable=True), cf.gen_json_field(), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)

collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()

# 2. insert data
array = cf.gen_default_rows_data()
for i in range(0, ct.default_nb, 2):
array[i][ct.default_float_field_name] = None
array[i][ct.default_string_field_name] = None
collection_w.insert(array)

exp = f"{ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0]
assert len(res) == ct.default_nb/2
assert res[0][ct.default_float_field_name] is None

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("nullable", [True, False])
Expand Down Expand Up @@ -972,15 +1033,16 @@ def test_insert_dataframe_using_none_data(self):
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)

df = pd.DataFrame({
"int64": pd.Series(data=[i for i in range(0, ct.default_nb)]),
"float_vector": vectors,
"int64": pd.Series(data=[i for i in range(ct.default_nb)]),
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
"varchar": pd.Series(data=[None for _ in range(ct.default_nb)])
"varchar": pd.Series(data=[None for _ in range(ct.default_nb)]),
"float_vector": vectors
})
collection_w.insert(df)
assert collection_w.num_entities == ct.default_nb



class TestInsertAsync(TestcaseBase):
"""
******************************************************************
Expand Down Expand Up @@ -1308,10 +1370,10 @@ def test_insert_over_resource_limit(self):

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("default_value", [[], 123])
def test_insert_array_using_default_value(self, default_value):
def test_insert_rows_using_default_value(self, default_value):
"""
target: test insert with array
method: insert with invalid array
target: test insert with rows
method: insert with invalid rows
expected: raise exception
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
Expand All @@ -1321,7 +1383,7 @@ def test_insert_array_using_default_value(self, default_value):
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
data = [{"int64": 1, "float_vector": vectors[1],
"varchar": default_value, "float": np.float32(1.0)}]
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)

@pytest.mark.tags(CaseLabel.L2)
Expand All @@ -1340,7 +1402,7 @@ def test_insert_tuple_using_default_value(self, default_value):
int_values = [i for i in range(0, ct.default_nb)]
string_values = ["abc" for i in range(ct.default_nb)]
data = (int_values, vectors, string_values, default_value)
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
error = {ct.err_code: 999, ct.err_msg: "The type of data should be List, pd.DataFrame or Dict"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)

@pytest.mark.tags(CaseLabel.L2)
Expand Down Expand Up @@ -1949,15 +2011,86 @@ def test_upsert_multi_fields_using_none_data(self, enable_partition_key, default
default_value,
[[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[str(j) for j in range(10)] for _ in range(ct.default_nb)],
default_value,
default_value,
cf.gen_vectors(ct.default_nb, ct.default_dim)
]
collection_w.upsert(data=data)
exp = f"{ct.default_float_field_name} == {np.float32(1.0)} and {ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[default_float_name, json_embedded_object])[0]
exp = f"{ct.default_float_field_name} == {np.float32(1.0)}"
res = collection_w.query(exp, output_fields=[default_float_name, json_embedded_object,
ct.default_string_array_field_name])[0]
assert len(res) == ct.default_nb
assert res[0][json_embedded_object] is None
assert res[0][ct.default_string_array_field_name] is None

@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("nullable", [True, False])
def test_upsert_multi_fields_by_rows_using_default(self, enable_partition_key, nullable):
"""
target: test upsert multi fields by rows with default value
method: 1. create a collection with one field using default value
2. upsert using default value to replace the field value
expected: upsert successfully
"""
# 1. initialize with data
if enable_partition_key is True and nullable is True:
pytest.skip("partition key field not support nullable")
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14), nullable=nullable),
cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable),
cf.gen_json_field(), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)

collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()

# 2. upsert data
array = cf.gen_default_rows_data()
for i in range(0, ct.default_nb, 2):
array[i][ct.default_float_field_name] = None
array[i][ct.default_string_field_name] = None
collection_w.upsert(array)

exp = f"{ct.default_float_field_name} == {np.float32(3.14)} and {ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0]
assert len(res) == ct.default_nb/2

@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_partition_key", [True, False])
def test_upsert_multi_fields_by_rows_using_none(self,enable_partition_key):
"""
target: test insert/upsert multi fields by rows with none value
method: 1. create a collection with one field using none value
2. insert/upsert using none to replace the field value
expected: insert/upsert successfully
"""
# 1. initialize with data
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(nullable=True),
cf.gen_string_field(default_value="abc", nullable=True, enable_partition_key=enable_partition_key),
cf.gen_json_field(), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)

collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()

# 2. insert data
array = cf.gen_default_rows_data()
for i in range(1, ct.default_nb, 2):
array[i][ct.default_float_field_name] = None
array[i][ct.default_string_field_name] = None
collection_w.insert(array)

for i in range(0, ct.default_nb, 2):
array[i][ct.default_float_field_name] = None
array[i][ct.default_string_field_name] = None
collection_w.upsert(array)

exp = f"{ct.default_int64_field_name} >= 0"
res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0]
assert len(res) == ct.default_nb
assert res[0][ct.default_float_field_name] is None

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_partition_key", [True, False])
Expand Down Expand Up @@ -2013,9 +2146,13 @@ def test_upsert_dataframe_using_none_data(self):
"float_vector": vectors
})
collection_w.upsert(df)
exp = f"{ct.default_string_field_name} == ''"
exp = f"{ct.default_int64_field_name} >= 0"
res = collection_w.query(exp, output_fields=[ct.default_string_field_name])[0]
assert len(res) == ct.default_nb
assert res[0][ct.default_string_field_name] is None
exp = f"{ct.default_string_field_name} == ''"
res = collection_w.query(exp, output_fields=[ct.default_string_field_name])[0]
assert len(res) == 0

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index ", ct.all_index_types[9:11])
Expand Down Expand Up @@ -2210,10 +2347,10 @@ def test_upsert_with_auto_id_pk_type_dismacth(self):

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("default_value", [[], 123])
def test_upsert_array_using_default_value(self, default_value):
def test_upsert_rows_using_default_value(self, default_value):
"""
target: test upsert with array
method: upsert with invalid array
target: test upsert with rows
method: upsert with invalid rows
expected: raise exception
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
Expand All @@ -2223,7 +2360,7 @@ def test_upsert_array_using_default_value(self, default_value):
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
data = [{"int64": 1, "float_vector": vectors[1],
"varchar": default_value, "float": np.float32(1.0)}]
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)

@pytest.mark.tags(CaseLabel.L2)
Expand All @@ -2242,7 +2379,7 @@ def test_upsert_tuple_using_default_value(self, default_value):
int_values = [i for i in range(0, ct.default_nb)]
string_values = ["abc" for i in range(ct.default_nb)]
data = (int_values, default_value, string_values, vectors)
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
error = {ct.err_code: 999, ct.err_msg: "The type of data should be List, pd.DataFrame or Dict"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)


Expand Down

0 comments on commit 80aa9ab

Please sign in to comment.