diff --git a/tests/restful_client_v2/testcases/test_collection_operations.py b/tests/restful_client_v2/testcases/test_collection_operations.py index 4535d60736863..c054d2b2ed47f 100644 --- a/tests/restful_client_v2/testcases/test_collection_operations.py +++ b/tests/restful_client_v2/testcases/test_collection_operations.py @@ -670,6 +670,40 @@ def create_collection(c_name, vector_dim, c_metric_type): assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name + def test_create_collections_with_nullable_default(self): + """ + target: test create collection + method: create a collection with default none + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 100}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}, + "nullable": True, "defaultValue": "123"}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 0 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 0 + assert rsp['data']['fields'][2]['defaultValue'] == {'Data': {'StringData': '123'}} + assert rsp['data']['fields'][2]['nullable'] is True + @pytest.mark.L1 class TestCreateCollectionNegative(TestBase): @@ -736,6 +770,125 @@ def test_create_collections_with_invalid_collection_name(self, name): assert rsp['code'] == 1100 assert "Invalid collection name" in rsp['message'] or "invalid parameter" in rsp['message'] + def test_create_collections_with_partition_key_nullable(self): + """ + partition key field not support nullable + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "isPartitionKey": True, + "nullable": True}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 1100 + assert "partition key field not support nullable" in rsp['message'] + + def test_create_collections_with_vector_nullable(self): + """ + vector field not support nullable + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}, + "nullable": True} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 1100 + assert "vector type not support null" in rsp['message'] + + def test_create_collections_with_primary_default(self): + """ + primary key field not support defaultValue + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}, + "defaultValue": 123}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 1100 + assert "primary field not support default_value" in rsp['message'] + + def test_create_collections_with_json_field_default(self): + """ + json field not support default value + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "defaultValue": {"key": 1}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 1100 + assert "convert defaultValue fail" in rsp['message'] + + def test_create_collections_with_array_field_default(self): + """ + array field not support default value + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64", "defaultValue": [1, 2], + "elementTypeParams": {"max_capacity": "1024"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 1100 + assert "convert defaultValue fail" in rsp['message'] + @pytest.mark.L0 class TestHasCollections(TestBase): diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py index 3f158d5e37506..ea2121eadf854 100644 --- a/tests/restful_client_v2/testcases/test_vector_operations.py +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -687,6 +687,86 @@ def test_insert_entities_with_all_json_datatype(self, nb, dim, insert_round, aut assert rsp['code'] == 0 assert len(rsp['data']) == 50 + @pytest.mark.parametrize("insert_round", [1]) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("is_partition_key", [True, False]) + @pytest.mark.parametrize("enable_dynamic_schema", [True, False]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + def test_insert_entities_with_default_none(self, nb, dim, insert_round, auto_id, is_partition_key, + enable_dynamic_schema): + """ + Insert a vector with defaultValue and none + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_schema, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key, + "elementTypeParams": {}, "defaultValue": 10}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "nullable": True}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}, + "defaultValue": "default", "nullable": True}, + {"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "nullable": True}, + {"fieldName": "varchar_array", "dataType": "Array", "elementDataType": "VarChar", + "elementTypeParams": {"max_capacity": "1024", "max_length": "256"}, "nullable": True}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "text_emb", "indexName": "text_emb", "metricType": "L2"}, + ] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 0 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 0 + # insert data + for k in range(insert_round): + data = [] + for i in range(nb): + if auto_id: + tmp = { + "user_id": None, + "word_count": None, + "book_describe": None, + "json": None, + "varchar_array": None, + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(), + } + else: + tmp = { + "book_id": i, + "user_id": None, + "word_count": None, + "book_describe": None, + "json": None, + "varchar_array": None, + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(), + } + if enable_dynamic_schema: + tmp.update({f"dynamic_field_{i}": i}) + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 0 + assert rsp['data']['insertCount'] == nb + # query data to make sure the data is inserted + rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 5}) + assert rsp['code'] == 0 + assert len(rsp['data']) == 5 + assert rsp['data'][0]['book_describe'] == 'default' + assert rsp['data'][0]['word_count'] is None + assert rsp['data'][0]['json'] is None @pytest.mark.L0 class TestInsertVectorNegative(TestBase): @@ -780,6 +860,44 @@ def test_insert_vector_with_mismatch_dim(self): assert rsp['code'] == 1804 assert "fail to deal the insert data" in rsp['message'] + def test_insert_entities_with_none_no_nullable_field(self): + """ + Insert a vector with none no nullable field + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": True, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{128}"}}, + ] + } + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 0 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 0 + # insert data + data = [] + for i in range(10): + tmp = { + "word_count": i if i % 2 else None, + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(), + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 1804 + assert "fail to deal the insert data" in rsp['message'] + @pytest.mark.L0 class TestUpsertVector(TestBase): @@ -948,6 +1066,85 @@ def test_upsert_vector_pk_auto_id(self, nb, dim, insert_round, id_type): res = utility.get_query_segment_info(name) logger.info(f"res: {res}") + @pytest.mark.parametrize("insert_round", [2]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("id_type", ["Int64", "VarChar"]) + def test_upsert_vector_with_default_none(self, nb, dim, insert_round, id_type): + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": f"{id_type}", "isPrimary": True, "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 123}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}, + "nullable": True}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 0 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 0 + # insert data + for i in range(insert_round): + data = [] + for j in range(nb): + tmp = { + "book_id": i * nb + j if id_type == "Int64" else f"{i * nb + j}", + "user_id": i * nb + j, + "word_count": i * nb + j, + "book_describe": f"book_{i * nb + j}", + "text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 0 + assert rsp['data']['insertCount'] == nb + c = Collection(name) + c.flush() + + # upsert data + for i in range(insert_round): + data = [] + for j in range(nb): + tmp = { + "book_id": i * nb + j if id_type == "Int64" else f"{i * nb + j}", + "user_id": i * nb + j + 1, + "word_count": None, + "book_describe": None, + "text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_upsert(payload) + # query data to make sure the data is updated + if id_type == "Int64": + rsp = self.vector_client.vector_query({"collectionName": name, "filter": "book_id > 0"}) + if id_type == "VarChar": + rsp = self.vector_client.vector_query({"collectionName": name, "filter": "book_id > '0'"}) + for data in rsp['data']: + assert data['user_id'] == int(data['book_id']) + 1 + assert data['word_count'] == 123 + assert data['book_describe'] is None + @pytest.mark.L0 class TestUpsertVectorNegative(TestBase): @@ -981,6 +1178,45 @@ def test_upsert_vector_with_invalid_collection_name(self): assert rsp['code'] == 100 assert "can't find collection" in rsp['message'] + def test_upsert_entities_with_none_no_nullable_field(self): + """ + Insert a vector with none no nullable field + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": True, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{128}"}}, + ] + } + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 0 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 0 + # insert data + data = [] + for i in range(10): + tmp = { + "word_count": i if i % 2 else None, + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(), + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_upsert(payload) + assert rsp['code'] == 1804 + assert "fail to deal the insert data" in rsp['message'] + + @pytest.mark.L0 class TestSearchVector(TestBase): @@ -2055,6 +2291,88 @@ def test_search_vector_with_text_match_filter(self, tokenizer): for d in rsp['data']: assert token in d[field] + @pytest.mark.parametrize("insert_round", [1]) + @pytest.mark.parametrize("auto_id", [True]) + @pytest.mark.parametrize("is_partition_key", [True]) + @pytest.mark.parametrize("enable_dynamic_schema", [True]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("nq", [1, 2]) + @pytest.mark.parametrize("metric_type", ['COSINE', "L2", "IP"]) + def test_search_vector_with_default_none(self, nb, dim, insert_round, auto_id, is_partition_key, + enable_dynamic_schema, nq, metric_type): + """ + Insert a vector with default and none + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_schema, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key, + "elementTypeParams": {}, "defaultValue": 8888}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "nullable": True}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}, + "nullable": True, "defaultValue": "8888"}, + {"fieldName": "float_vector", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "float_vector", "indexName": "float_vector", "metricType": metric_type}, + ] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 0 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 0 + # insert data + for i in range(insert_round): + data = [] + for i in range(nb): + if auto_id: + tmp = { + "user_id": i % 100 if i % 2 else None, + "word_count": None, + "book_describe": None, + "float_vector": gen_vector(datatype="FloatVector", dim=dim), + } + else: + tmp = { + "book_id": i, + "user_id": i % 100 if i % 2 else None, + "word_count": None, + "book_describe": None, + "float_vector": gen_vector(datatype="FloatVector", dim=dim), + } + if enable_dynamic_schema: + tmp.update({f"dynamic_field_{i}": i}) + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 0 + assert rsp['data']['insertCount'] == nb + # search data + payload = { + "collectionName": name, + "data": [gen_vector(datatype="FloatVector", dim=dim) for _ in range(nq)], + "filter": "book_id >= 0", + # "groupingField": "user_id", + "outputFields": ["*"], + "limit": 100, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 0 + assert rsp['data'][0]['book_describe'] == "8888" + assert rsp['data'][0]['word_count'] is None + assert len(rsp['data']) == 100 * nq @pytest.mark.L0 class TestSearchVectorNegative(TestBase): @@ -2905,6 +3223,105 @@ def test_query_vector_with_text_match_filter(self, tokenizer): for d in rsp['data']: assert token in d[field] + @pytest.mark.parametrize("insert_round", [1]) + @pytest.mark.parametrize("auto_id", [True]) + @pytest.mark.parametrize("is_partition_key", [True]) + @pytest.mark.parametrize("enable_dynamic_schema", [True]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + def test_query_entities_with_default_none(self, nb, dim, insert_round, auto_id, is_partition_key, + enable_dynamic_schema): + """ + Insert a vector with default and none + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_schema, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key, + "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 8888}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "25536"}, + "nullable": True}, + {"fieldName": "bool", "dataType": "Bool", "elementTypeParams": {}, "nullable": True}, + {"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "nullable": True}, + {"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64", + "elementTypeParams": {"max_capacity": "1024"}, "nullable": True}, + {"fieldName": "varchar_array", "dataType": "Array", "elementDataType": "VarChar", + "elementTypeParams": {"max_capacity": "1024", "max_length": "256"}, "nullable": True}, + {"fieldName": "bool_array", "dataType": "Array", "elementDataType": "Bool", + "elementTypeParams": {"max_capacity": "1024"}, "nullable": True}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "text_emb", "indexName": "text_emb", "metricType": "L2"}, + ] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 0 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 0 + # insert data + for i in range(insert_round): + data = [] + for i in range(nb): + if auto_id: + tmp = { + "user_id": i, + "word_count": None, + "book_describe": None, + "bool": random.choice([True, False]), + "json": None, + "int_array": None, + "varchar_array": None, + "bool_array": None, + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(), + } + else: + tmp = { + "book_id": i, + "user_id": i, + "word_count": None, + "book_describe": None, + "bool": random.choice([True, False]), + "json": None, + "int_array": None, + "varchar_array": None, + "bool_array": None, + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(), + } + if enable_dynamic_schema: + tmp.update({f"dynamic_field_{i}": i}) + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 0 + assert rsp['data']['insertCount'] == nb + # query data to make sure the data is inserted + payload = { + "collectionName": name, + "filter": "user_id > 0", + "limit": 50, + "outputFields": ["*"] + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 0 + assert rsp['data'][0]['book_describe'] is None + assert rsp['data'][0]['word_count'] == 8888 + assert rsp['data'][0]['json'] is None + assert rsp['data'][0]['varchar_array'] is None + assert len(rsp['data']) == 50 + @pytest.mark.L0 class TestQueryVectorNegative(TestBase):