Skip to content

Commit

Permalink
feat:support function for milvus2.5
Browse files Browse the repository at this point in the history
Signed-off-by: nameczz <[email protected]>
nameczz committed Dec 31, 2024
1 parent c031a9d commit e243574
Showing 7 changed files with 230 additions and 174 deletions.
252 changes: 126 additions & 126 deletions .github/workflows/update_assets.yml
Original file line number Diff line number Diff line change
@@ -1,132 +1,132 @@
# This is a basic workflow to help you get started with Actions
# # This is a basic workflow to help you get started with Actions

name: Update the release's assets after it published
# name: Update the release's assets after it published

# Controls when the workflow will run
on:
release:
types: [published]
# # Controls when the workflow will run
# on:
# release:
# types: [published]

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# # A workflow run is made up of one or more jobs that can run sequentially or in parallel
# jobs:
# # This workflow contains a single job called "build"
# build:
# # The type of runner that the job will run on
# runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.8"
architecture: "x64"
- name: Install pypa/build
run: >-
python -m
pip install
build
--user
- name: Clean dist/
run: |
sudo rm -fr dist/*
- name: Build a binary wheel and a source tarball
run: >-
python -m
build
--sdist
--wheel
--outdir dist/
.
- name: Update assets
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: ./dist/*
- name: Publish distribution 📦 to Test PyPI
if: contains(github.ref, 'beta') && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
packages_dir: dist/
verify_metadata: false
build_windows_exe:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.8"
architecture: "x64"
- name: Install requirements
run: >-
python -m
pip install
-e
.
- name: Install protobuf only for pymilvus2.1
run:
python -m pip install protobuf==3.20.0
# # Steps represent a sequence of tasks that will be executed as part of the job
# steps:
# # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
# - uses: actions/checkout@v2
# - uses: actions/setup-python@v2
# with:
# python-version: "3.8"
# architecture: "x64"
# - name: Install pypa/build
# run: >-
# python -m
# pip install
# build
# --user
# - name: Clean dist/
# run: |
# sudo rm -fr dist/*
# - name: Build a binary wheel and a source tarball
# run: >-
# python -m
# build
# --sdist
# --wheel
# --outdir dist/
# .
# - name: Update assets
# uses: softprops/action-gh-release@v1
# if: startsWith(github.ref, 'refs/tags/')
# with:
# files: ./dist/*
# - name: Publish distribution 📦 to Test PyPI
# if: contains(github.ref, 'beta') && startsWith(github.ref, 'refs/tags')
# uses: pypa/gh-action-pypi-publish@release/v1
# with:
# user: __token__
# password: ${{ secrets.TEST_PYPI_API_TOKEN }}
# repository_url: https://test.pypi.org/legacy/
# packages_dir: dist/
# verify_metadata: false
# build_windows_exe:
# runs-on: windows-latest
# steps:
# - uses: actions/checkout@v2
# - uses: actions/setup-python@v2
# with:
# python-version: "3.8"
# architecture: "x64"
# - name: Install requirements
# run: >-
# python -m
# pip install
# -e
# .
# - name: Install protobuf only for pymilvus2.1
# run:
# python -m pip install protobuf==3.20.0

- name: Install pyreadline
run: >-
python -m
pip install
pyreadline
- name: Install pyinstaller.
run: >-
python -m
pip install
pyinstaller
- name: build to dist/
run: |
pyinstaller -F ./milvus_cli/scripts/milvus_cli.py -p ./milvus_cli --clean --hidden-import pyreadline -n milvus_cli.exe
- name: Update assets
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: ./dist/*
build_linux_exec:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-latest, ubuntu-latest]
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.8"
architecture: "x64"
- name: Install requirements
run: >-
python -m
pip install
-e
.
- name: Install protobuf only for pymilvus2.1
run:
python -m pip install protobuf==3.20.0
# - name: Install pyreadline
# run: >-
# python -m
# pip install
# pyreadline
# - name: Install pyinstaller.
# run: >-
# python -m
# pip install
# pyinstaller
# - name: build to dist/
# run: |
# pyinstaller -F ./milvus_cli/scripts/milvus_cli.py -p ./milvus_cli --clean --hidden-import pyreadline -n milvus_cli.exe
# - name: Update assets
# uses: softprops/action-gh-release@v1
# if: startsWith(github.ref, 'refs/tags/')
# with:
# files: ./dist/*
# build_linux_exec:
# runs-on: ${{ matrix.os }}
# strategy:
# matrix:
# os: [macos-latest, ubuntu-latest]
# steps:
# - uses: actions/checkout@v2
# - uses: actions/setup-python@v2
# with:
# python-version: "3.8"
# architecture: "x64"
# - name: Install requirements
# run: >-
# python -m
# pip install
# -e
# .
# - name: Install protobuf only for pymilvus2.1
# run:
# python -m pip install protobuf==3.20.0

- name: Install pyinstaller.
run: >-
python -m
pip install
pyinstaller
- name: Clean dist/
run: |
sudo rm -fr dist/*
- name: Tag name
id: tag_name
run: |
echo ::set-output name=SOURCE_TAG::${GITHUB_REF#refs/tags/}
- name: build to dist/
run: |
pyinstaller -F ./milvus_cli/scripts/milvus_cli.py -p ./milvus_cli --clean --hidden-import pyreadline -n milvus_cli-${{ steps.tag_name.outputs.SOURCE_TAG }}-$RUNNER_OS
- name: Update assets
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: ./dist/*
# - name: Install pyinstaller.
# run: >-
# python -m
# pip install
# pyinstaller
# - name: Clean dist/
# run: |
# sudo rm -fr dist/*
# - name: Tag name
# id: tag_name
# run: |
# echo ::set-output name=SOURCE_TAG::${GITHUB_REF#refs/tags/}
# - name: build to dist/
# run: |
# pyinstaller -F ./milvus_cli/scripts/milvus_cli.py -p ./milvus_cli --clean --hidden-import pyreadline -n milvus_cli-${{ steps.tag_name.outputs.SOURCE_TAG }}-$RUNNER_OS
# - name: Update assets
# uses: softprops/action-gh-release@v1
# if: startsWith(github.ref, 'refs/tags/')
# with:
# files: ./dist/*
19 changes: 16 additions & 3 deletions milvus_cli/Collection.py
Original file line number Diff line number Diff line change
@@ -33,13 +33,15 @@ def create_collection(
isDynamic=None,
consistencyLevel="Bounded",
shardsNum=1,
functions=None,
):
schema = CollectionSchema(
fields=fields,
primary_field=primaryField,
auto_id=autoId,
description=description,
_enable_dynamic_field=isDynamic,
functions=functions,
)

collection = Collection(
@@ -118,22 +120,28 @@ def get_collection_details(self, collectionName="", collection=None):
partitions = target.partitions
indexes = target.indexes
fieldSchemaDetails = ""

for fieldSchema in schema.fields:
_name = f"{'*' if fieldSchema.is_primary else ''}{fieldSchema.name}"

_type = DataTypeByNum[fieldSchema.dtype]
_desc = fieldSchema.description
_params = fieldSchema.params
_is_function_output = fieldSchema.is_function_output
_dim = _params.get("dim")
_params_desc = f"dim: {_dim}" if _dim else ""
_params_desc += (
f", Is function output: {_is_function_output}"
if _is_function_output
else ""
)

_element_type = fieldSchema.element_type
_max_length = _params.get("max_length")
_max_capacity = _params.get("max_capacity")
_enable_match = _params.get("enable_match")
_enable_analyzer = _params.get("enable_analyzer")
_analyzer_params = _params.get("analyzer_params")
_params_desc = ""

_params_desc += f", max_capacity: {_max_capacity}" if _max_capacity else ""
_params_desc += f", element_type: {_element_type}" if _element_type else ""
@@ -186,10 +194,15 @@ def list_field_names(self, collectionName):
result = target.schema.fields
return [i.name for i in result]

def list_field_names_and_types(self, collectionName):
def list_fields_info(self, collectionName):
target = getTargetCollection(collectionName)
result = target.schema.fields
return [
{"name": i.name, "type": DataTypeByNum[i.dtype], "autoId": i.auto_id}
{
"name": i.name,
"type": DataTypeByNum[i.dtype],
"autoId": i.auto_id,
"isFunctionOut": i.is_function_output,
}
for i in result
]
1 change: 1 addition & 0 deletions milvus_cli/Types.py
Original file line number Diff line number Diff line change
@@ -146,6 +146,7 @@ def __str__(self):
"HAMMING",
"TANIMOTO",
"COSINE",
"BM25",
"",
]

25 changes: 0 additions & 25 deletions milvus_cli/Validation.py
Original file line number Diff line number Diff line change
@@ -225,31 +225,6 @@ def validateQueryParams(
return result


def validateCalcParams(
leftVectorMeta, rightVectorMeta, metric_type, sqrt, dim, timeout
):
result = {"params": {}}
vectors_left = validateVectorMeta(leftVectorMeta)
result["vectors_left"] = vectors_left
vectors_right = validateVectorMeta(rightVectorMeta)
result["vectors_right"] = vectors_right
params = result["params"]
params["metric_type"] = metric_type
if metric_type not in MetricTypes:
raise ParameterException(
"metric_type should be one of {}".format(str(MetricTypes))
)
if metric_type == "L2":
params["sqrt"] = sqrt
elif metric_type in ["HAMMING", "TANIMOTO"]:
params["dim"] = dim
if timeout:
result["timeout"] = float(timeout)
else:
result["timeout"] = None
return result


def validateVectorMeta(vectorMeta):
import json

69 changes: 60 additions & 9 deletions milvus_cli/scripts/collection_cli.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@

from .helper_cli import create, getList, delete, rename, show, load, release
from Types import FieldDataTypes, BUILT_IN_ANALYZERS
from pymilvus import FieldSchema, DataType
from pymilvus import FieldSchema, DataType, FunctionType, Function

NOT_SET = "Not set"

@@ -53,6 +53,54 @@ def handleNullableAndDefaultValue(isPrimary, fieldType):

return nullable, defaultValue

needFunction = click.confirm(
"Do you want to add embedding function?", default=False
)
functions = None
if needFunction:
funcName = click.prompt("Function name", type=str)
funcType = click.prompt(
"Please input function type(BM25(1))",
default=1,
type=int,
)
inputFieldName = click.prompt(
"Name of the VARCHAR field containing raw text data", type=str
)
outputFieldName = click.prompt(
"Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings",
type=str,
)
needCreateFields = click.confirm(
"Do you want to create two fields for the function? Otherwise, you should create them yourself later. Default is False",
default=False,
)
functions = [
Function(
name=funcName,
function_type=funcType,
input_field_names=[inputFieldName],
output_field_names=[outputFieldName],
)
]
if needCreateFields:
fields.append(
FieldSchema(
name=inputFieldName,
dtype=DataType.VARCHAR,
max_length=65535,
enable_analyzer=True,
description="Raw text data",
)
)
fields.append(
FieldSchema(
name=outputFieldName,
dtype=DataType.SPARSE_FLOAT_VECTOR,
description="Generated embeddings",
)
)

while True:
fieldName = click.prompt(
"Field name",
@@ -68,14 +116,16 @@ def handleNullableAndDefaultValue(isPrimary, fieldType):
"FLOAT16_VECTOR",
"SPARSE_FLOAT_VECTOR",
]:
dim = click.prompt("Dimension", type=int)
fields.append(
FieldSchema(
name=fieldName,
dtype=DataType[upperFieldType],
dim=int(dim),
)
)
field_schema_params = {
"name": fieldName,
"dtype": DataType[upperFieldType],
"description": fieldDesc,
}
if upperFieldType != "SPARSE_FLOAT_VECTOR":
dim = click.prompt("Dimension", type=int)
field_schema_params["params"] = {"dim": dim}
fields.append(FieldSchema(**field_schema_params))

elif upperFieldType == "VARCHAR":
maxLength = click.prompt("Max length", default=65535, type=int)
isPrimary = handle_primary_field(fieldName, primaryField)
@@ -172,6 +222,7 @@ def handleNullableAndDefaultValue(isPrimary, fieldType):
isDynamic,
consistencyLevel,
shardsNum,
functions,
)
)
click.echo("Create collection successfully!")
36 changes: 26 additions & 10 deletions milvus_cli/scripts/data_cli.py
Original file line number Diff line number Diff line change
@@ -220,7 +220,7 @@ def insert_row(obj):
"Partition name",
default="_default",
)
fields = obj.collection.list_field_names_and_types(collectionName)
fields = obj.collection.list_fields_info(collectionName)
data = {}
for field in fields:
fieldType = field["type"]
@@ -249,8 +249,8 @@ def insert_row(obj):
value = [float(x) for x in value.strip("[]").split(",")]
elif fieldType in ["JSON"]:
value = json.loads(value)

data[field["name"]] = value
if value != None:
data[field["name"]] = value
result = obj.data.insert(collectionName, data, partitionName)
except Exception as e:
click.echo("Error!\n{}".format(str(e)))
@@ -272,17 +272,33 @@ def search(obj):
"Collection name", type=click.Choice(obj.collection.list_collections())
)

vector = click.prompt(
"The vectors of search data (input as a list, e.g., [1,2,3]). The length should match your dimension.",
)
# format vector from string to list
vector = [float(x) for x in vector.strip("[]").split(",")]
data = [vector]

annsField = click.prompt(
"The vector field used to search of collection",
type=click.Choice(obj.collection.list_field_names(collectionName)),
)

fields_info = obj.collection.list_fields_info(collectionName)
annsField_info = next(
(field for field in fields_info if field["name"] == annsField), None
)
if not annsField_info:
click.echo(f"Field {annsField} not found in collection {collectionName}.")
return
isFunctionOut = annsField_info["isFunctionOut"]
data = None
if isFunctionOut:
text = click.prompt(
"Enter the text to search",
)
data = [text]
else:
vector = click.prompt(
"The vectors of search data (input as a list, e.g., [1,2,3]). The length should match your dimension.",
)
# format vector from string to list
vector = [float(x) for x in vector.strip("[]").split(",")]
data = [vector]

indexes = obj.index.list_indexes(collectionName, onlyData=True)
indexDetails = None
for index in indexes:
2 changes: 1 addition & 1 deletion milvus_cli/scripts/index_cli.py
Original file line number Diff line number Diff line change
@@ -27,7 +27,7 @@ def createIndex(obj):
Index type (FLAT, IVF_FLAT, IVF_SQ8, IVF_PQ, RNSG, HNSW, ANNOY,AUTOINDEX): IVF_FLAT
Index metric type (L2, IP, HAMMING, TANIMOTO, COSINE): L2
Index metric type (L2, IP, HAMMING, TANIMOTO, COSINE,BM25, ''): L2
Index params nlist: 2

0 comments on commit e243574

Please sign in to comment.