diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 9756195a611d1..1e5120c0f0494 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -865,19 +865,20 @@ def test_insert_equal_to_resource_limit(self): assert collection_w.num_entities == nb @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip("not support default_value now") - @pytest.mark.parametrize("default_value", [[], None]) - def test_insert_one_field_using_default_value(self, default_value, auto_id): + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("default_value", [[], [None for i in range(ct.default_nb)]]) + def test_insert_one_field_using_default_value(self, default_value, nullable, auto_id): """ target: test insert with one field using default value method: 1. create a collection with one field using default value - 2. insert using []/None to replace the field value + 2. insert using default value to replace the field value []/[None] expected: insert successfully """ fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), - cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()] + cf.gen_string_field(default_value="abc", nullable=nullable), cf.gen_float_vec_field()] schema = cf.gen_collection_schema(fields, auto_id=auto_id) collection_w = self.init_collection_wrap(schema=schema) + # default value fields, [] or [None] data = [ [i for i in range(ct.default_nb)], [np.float32(i) for i in range(ct.default_nb)], @@ -890,63 +891,91 @@ def test_insert_one_field_using_default_value(self, default_value, auto_id): assert collection_w.num_entities == ct.default_nb @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip("not support default_value now") - @pytest.mark.parametrize("default_value", [[], None]) - def test_insert_multi_fields_using_default_value(self, default_value, auto_id): + @pytest.mark.parametrize("enable_partition_key", [True, False]) + @pytest.mark.parametrize("default_value", [[], [None for _ in range(ct.default_nb)]]) + def test_insert_multi_fields_using_none_data(self, enable_partition_key, default_value, auto_id): """ - target: test insert with multi fields using default value - method: 1. default value fields before vector, insert [], None, fail - 2. default value fields all after vector field, insert empty, succeed - expected: report error and insert successfully + target: test insert with multi fields include array using none value + method: 1. create a collection with multi fields using default value + 2. insert using none value to replace the field value + expected: insert successfully """ - # 1. default value fields before vector, insert [], None, fail - fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(1.0)), - cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()] + json_embedded_object = "json_embedded_object" + fields = [ + cf.gen_int64_field(is_primary=True), + cf.gen_int32_field(default_value=np.int32(1), nullable=True), + cf.gen_float_field(default_value=np.float32(1.0), nullable=True), + cf.gen_string_field(default_value="abc", enable_partition_key=enable_partition_key, nullable=True), + cf.gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32, nullable=True), + cf.gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, nullable=True), + cf.gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR, max_length=100, nullable=True), + cf.gen_json_field(name=json_embedded_object, nullable=True), + cf.gen_float_vec_field() + ] schema = cf.gen_collection_schema(fields, auto_id=auto_id) collection_w = self.init_collection_wrap(schema=schema) - data = [[i for i in range(ct.default_nb)], default_value, - # if multi default_value fields before vector field, every field must use []/None - cf.gen_vectors(ct.default_nb, ct.default_dim)] + # default value fields, [] or [None] + data = [ + [i for i in range(ct.default_nb)], + default_value, + default_value, + default_value, + [[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)], + [[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)], + default_value, + default_value, + cf.gen_vectors(ct.default_nb, ct.default_dim) + ] if auto_id: del data[0] - collection_w.insert(data, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, - ct.err_msg: "The data type of field varchar doesn't match"}) - # 2. default value fields all after vector field, insert empty, succeed - fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(), - cf.gen_float_field(default_value=np.float32(1.0)), - cf.gen_string_field(default_value="abc")] - schema = cf.gen_collection_schema(fields, auto_id=auto_id) + collection_w.insert(data=data) + assert collection_w.num_entities == ct.default_nb + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("enable_partition_key", [True, False]) + @pytest.mark.parametrize("nullable", [True, False]) + def test_insert_dataframe_using_default_data(self, enable_partition_key, nullable): + """ + target: test insert with dataframe + method: insert with valid dataframe using default data + expected: insert successfully + """ + if enable_partition_key is True and nullable is True: + pytest.skip("partition key field not support nullable") + fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), + cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable), + cf.gen_float_vec_field()] + schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap(schema=schema) - data = [[i for i in range(ct.default_nb)], cf.gen_vectors( - ct.default_nb, ct.default_dim)] - data1 = [[i for i in range(ct.default_nb)], cf.gen_vectors(ct.default_nb, ct.default_dim), - [np.float32(i) for i in range(ct.default_nb)]] - if auto_id: - del data[0], data1[0] - collection_w.insert(data) + vectors = cf.gen_vectors(ct.default_nb, ct.default_dim) + + df = pd.DataFrame({ + "int64": pd.Series(data=[i for i in range(ct.default_nb)]), + "float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"), + "varchar": pd.Series(data=[None for _ in range(ct.default_nb)]), + "float_vector": vectors + }) + collection_w.insert(df) assert collection_w.num_entities == ct.default_nb - collection_w.insert(data1) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("not support default_value now") - def test_insert_dataframe_using_default_value(self): + def test_insert_dataframe_using_none_data(self): """ target: test insert with dataframe - method: insert with invalid dataframe + method: insert with valid dataframe using none data expected: insert successfully """ fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), - cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()] + cf.gen_string_field(default_value=None, nullable=True), cf.gen_float_vec_field()] schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap(schema=schema) vectors = cf.gen_vectors(ct.default_nb, ct.default_dim) - # None/[] is not allowed when using dataframe - # To use default value, delete the whole item + df = pd.DataFrame({ "int64": pd.Series(data=[i for i in range(0, ct.default_nb)]), "float_vector": vectors, - "float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32") + "float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"), + "varchar": pd.Series(data=[None for _ in range(ct.default_nb)]) }) collection_w.insert(df) assert collection_w.num_entities == ct.default_nb @@ -1278,8 +1307,7 @@ def test_insert_over_resource_limit(self): data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("not support default_value now") - @pytest.mark.parametrize("default_value", [[], None]) + @pytest.mark.parametrize("default_value", [[], 123]) def test_insert_array_using_default_value(self, default_value): """ target: test insert with array @@ -1293,17 +1321,16 @@ def test_insert_array_using_default_value(self, default_value): vectors = cf.gen_vectors(ct.default_nb, ct.default_dim) data = [{"int64": 1, "float_vector": vectors[1], "varchar": default_value, "float": np.float32(1.0)}] - collection_w.insert(data, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"}) + error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"} + collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("not support default_value now") @pytest.mark.parametrize("default_value", [[], None]) def test_insert_tuple_using_default_value(self, default_value): """ target: test insert with tuple method: insert with invalid tuple - expected: insert successfully + expected: raise exception """ fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(), cf.gen_string_field(), cf.gen_float_field(default_value=np.float32(3.14))] @@ -1313,8 +1340,8 @@ def test_insert_tuple_using_default_value(self, default_value): int_values = [i for i in range(0, ct.default_nb)] string_values = ["abc" for i in range(ct.default_nb)] data = (int_values, vectors, string_values, default_value) - collection_w.insert(data, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"}) + error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"} + collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) def test_insert_with_nan_value(self): @@ -1846,20 +1873,23 @@ def test_upsert_in_row_with_enable_dynamic_field(self, auto_id): assert res[0].get('count(*)') == start + upsert_nb + extra_num @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip("not support default_value now") - @pytest.mark.parametrize("default_value", [[], None]) - def test_upsert_one_field_using_default_value(self, default_value): + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("default_value", [[], [None for i in range(ct.default_nb)]]) + def test_upsert_one_field_using_default_value(self, default_value, nullable): """ - target: test insert with one field using default value + target: test insert/upsert with one field using default value method: 1. create a collection with one field using default value - 2. insert using []/None to replace the field value - expected: insert successfully + 2. insert using default value to replace the field value []/[None] + expected: insert/upsert successfully """ fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), - cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()] + cf.gen_string_field(default_value="abc", nullable=nullable), cf.gen_float_vec_field()] schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap(schema=schema) cf.insert_data(collection_w, with_json=False) + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + collection_w.load() + # default value fields, [] or [None] data = [ [i for i in range(ct.default_nb)], [np.float32(i) for i in range(ct.default_nb)], @@ -1867,82 +1897,125 @@ def test_upsert_one_field_using_default_value(self, default_value): cf.gen_vectors(ct.default_nb, ct.default_dim) ] collection_w.upsert(data) + exp = f"{ct.default_string_field_name} == 'abc'" + res = collection_w.query(exp, output_fields=[default_float_name])[0] + assert len(res) == ct.default_nb @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip("not support default_value now") - @pytest.mark.parametrize("default_value", [[], None]) - def test_upsert_multi_fields_using_default_value(self, default_value): + @pytest.mark.parametrize("enable_partition_key", [True, False]) + @pytest.mark.parametrize("default_value", [[], [None for _ in range(ct.default_nb)]]) + def test_upsert_multi_fields_using_none_data(self, enable_partition_key, default_value): """ - target: test insert with multi fields using default value - method: 1. default value fields before vector, insert [], None, fail - 2. default value fields all after vector field, insert empty, succeed - expected: report error and insert successfully + target: test insert/upsert with multi fields include array using none value + method: 1. create a collection with multi fields include array using default value + 2. insert using none value to replace the field value + expected: insert/upsert successfully """ - # 1. default value fields before vector, insert [], None, fail + json_embedded_object = "json_embedded_object" fields = [ cf.gen_int64_field(is_primary=True), - cf.gen_float_field(default_value=np.float32(1.0)), - cf.gen_string_field(default_value="abc"), + cf.gen_int32_field(default_value=np.int32(1), nullable=True), + cf.gen_float_field(default_value=np.float32(1.0), nullable=True), + cf.gen_string_field(default_value="abc", enable_partition_key=enable_partition_key, nullable=True), + cf.gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32, nullable=True), + cf.gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, nullable=True), + cf.gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR, + max_length=100, nullable=True), + cf.gen_json_field(name=json_embedded_object, nullable=True), cf.gen_float_vec_field() ] schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap(schema=schema) - cf.insert_data(collection_w, with_json=False) + # insert data and load collection data = [ [i for i in range(ct.default_nb)], default_value, - # if multi default_value fields before vector field, every field must use []/None + [np.float32(2.0) for _ in range(ct.default_nb)], + [str(i) for i in range(ct.default_nb)], + [[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)], + [[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)], + [[str(j) for j in range(10)] for _ in range(ct.default_nb)], + cf.gen_json_data_for_diff_json_types(nb=ct.default_nb, start=0, json_type=json_embedded_object), cf.gen_vectors(ct.default_nb, ct.default_dim) ] - collection_w.upsert(data, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, - ct.err_msg: "The data type of field varchar doesn't match"}) - - # 2. default value fields all after vector field, insert empty, succeed - fields = [ - cf.gen_int64_field(is_primary=True), - cf.gen_float_vec_field(), - cf.gen_float_field(default_value=np.float32(1.0)), - cf.gen_string_field(default_value="abc") - ] - schema = cf.gen_collection_schema(fields) - collection_w = self.init_collection_wrap(schema=schema) + collection_w.insert(data=data) + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + collection_w.load() + # default value fields, [] or [None] data = [ [i for i in range(ct.default_nb)], + default_value, + default_value, + default_value, + [[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)], + [[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)], + [[str(j) for j in range(10)] for _ in range(ct.default_nb)], + default_value, cf.gen_vectors(ct.default_nb, ct.default_dim) ] - data1 = [ - [i for i in range(ct.default_nb)], - cf.gen_vectors(ct.default_nb, ct.default_dim), - [np.float32(i) for i in range(ct.default_nb)] - ] - collection_w.upsert(data) - assert collection_w.num_entities == ct.default_nb - collection_w.upsert(data1) + collection_w.upsert(data=data) + exp = f"{ct.default_float_field_name} == {np.float32(1.0)} and {ct.default_string_field_name} == 'abc'" + res = collection_w.query(exp, output_fields=[default_float_name, json_embedded_object])[0] + assert len(res) == ct.default_nb + assert res[0][json_embedded_object] is None @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("not support default_value now") - def test_upsert_dataframe_using_default_value(self): + @pytest.mark.parametrize("enable_partition_key", [True, False]) + @pytest.mark.parametrize("nullable", [True, False]) + def test_upsert_dataframe_using_default_data(self, enable_partition_key, nullable): """ target: test upsert with dataframe - method: upsert with invalid dataframe + method: upsert with valid dataframe using default data expected: upsert successfully """ + if enable_partition_key is True and nullable is True: + pytest.skip("partition key field not support nullable") fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), - cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()] + cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable), + cf.gen_float_vec_field()] schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap(schema=schema) + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + collection_w.load() vectors = cf.gen_vectors(ct.default_nb, ct.default_dim) - # None/[] is not allowed when using dataframe - # To use default value, delete the whole item + df = pd.DataFrame({ - "int64": pd.Series(data=[i for i in range(0, ct.default_nb)]), - "float_vector": vectors, - "float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32") + "int64": pd.Series(data=[i for i in range(ct.default_nb)]), + "float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"), + "varchar": pd.Series(data=[None for _ in range(ct.default_nb)]), + "float_vector": vectors }) collection_w.upsert(df) - assert collection_w.num_entities == ct.default_nb + exp = f"{ct.default_string_field_name} == 'abc'" + res = collection_w.query(exp, output_fields=[ct.default_string_field_name])[0] + assert len(res) == ct.default_nb + + @pytest.mark.tags(CaseLabel.L2) + def test_upsert_dataframe_using_none_data(self): + """ + target: test upsert with dataframe + method: upsert with valid dataframe using none data + expected: upsert successfully + """ + fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), + cf.gen_string_field(default_value=None, nullable=True), + cf.gen_float_vec_field()] + schema = cf.gen_collection_schema(fields) + collection_w = self.init_collection_wrap(schema=schema) + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + collection_w.load() + vectors = cf.gen_vectors(ct.default_nb, ct.default_dim) + + df = pd.DataFrame({ + "int64": pd.Series(data=[i for i in range(ct.default_nb)]), + "float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"), + "varchar": pd.Series(data=[None for _ in range(ct.default_nb)]), + "float_vector": vectors + }) + collection_w.upsert(df) + exp = f"{ct.default_string_field_name} == ''" + res = collection_w.query(exp, output_fields=[ct.default_string_field_name])[0] + assert len(res) == ct.default_nb @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index ", ct.all_index_types[9:11]) @@ -2136,8 +2209,7 @@ def test_upsert_with_auto_id_pk_type_dismacth(self): collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("not support default_value now") - @pytest.mark.parametrize("default_value", [[], None]) + @pytest.mark.parametrize("default_value", [[], 123]) def test_upsert_array_using_default_value(self, default_value): """ target: test upsert with array @@ -2151,17 +2223,16 @@ def test_upsert_array_using_default_value(self, default_value): vectors = cf.gen_vectors(ct.default_nb, ct.default_dim) data = [{"int64": 1, "float_vector": vectors[1], "varchar": default_value, "float": np.float32(1.0)}] - collection_w.upsert(data, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"}) + error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"} + collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("not support default_value now") @pytest.mark.parametrize("default_value", [[], None]) def test_upsert_tuple_using_default_value(self, default_value): """ target: test upsert with tuple method: upsert with invalid tuple - expected: upsert successfully + expected: raise exception """ fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14)), cf.gen_string_field(), cf.gen_float_vec_field()] @@ -2171,8 +2242,8 @@ def test_upsert_tuple_using_default_value(self, default_value): int_values = [i for i in range(0, ct.default_nb)] string_values = ["abc" for i in range(ct.default_nb)] data = (int_values, default_value, string_values, vectors) - collection_w.upsert(data, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"}) + error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"} + collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error) class TestInsertArray(TestcaseBase):