Skip to content

Commit

Permalink
Add upload file by knowledge base name API. (#539)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
Add upload file by knowledge base name API.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update

---------

Co-authored-by: chrysanthemum-boy <[email protected]>
  • Loading branch information
chrysanthemum-boy and chrysanthemum-boy authored Apr 25, 2024
1 parent 4130e5c commit 26003b5
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 4 deletions.
82 changes: 81 additions & 1 deletion api/apps/api_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import re
from datetime import datetime, timedelta
from flask import request
from flask_login import login_required, current_user

from api.db import FileType, ParserType
from api.db.db_models import APIToken, API4Conversation
from api.db.services import duplicate_name
from api.db.services.api_service import APITokenService, API4ConversationService
from api.db.services.dialog_service import DialogService, chat
from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.user_service import UserTenantService
from api.settings import RetCode
from api.utils import get_uuid, current_timestamp, datetime_format
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
from itsdangerous import URLSafeTimedSerializer

from api.utils.file_utils import filename_type, thumbnail
from rag.utils import MINIO


def generate_confirmation_token(tenent_id):
serializer = URLSafeTimedSerializer(tenent_id)
Expand Down Expand Up @@ -191,4 +201,74 @@ def get(conversation_id):

return get_json_result(data=conv.to_dict())
except Exception as e:
return server_error_response(e)
return server_error_response(e)


@manager.route('/document/upload', methods=['POST'])
@validate_request("kb_name")
def upload():
token = request.headers.get('Authorization').split()[1]
objs = APIToken.query(token=token)
if not objs:
return get_json_result(
data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)

kb_name = request.form.get("kb_name").strip()
tenant_id = objs[0].tenant_id

try:
e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
if not e:
return get_data_error_result(
retmsg="Can't find this knowledgebase!")
kb_id = kb.id
except Exception as e:
return server_error_response(e)

if 'file' not in request.files:
return get_json_result(
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)

file = request.files['file']
if file.filename == '':
return get_json_result(
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
try:
if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)):
return get_data_error_result(
retmsg="Exceed the maximum file number of a free user!")

filename = duplicate_name(
DocumentService.query,
name=file.filename,
kb_id=kb_id)
filetype = filename_type(filename)
if not filetype:
return get_data_error_result(
retmsg="This type of file has not been supported yet!")

location = filename
while MINIO.obj_exist(kb_id, location):
location += "_"
blob = request.files['file'].read()
MINIO.put(kb_id, location, blob)
doc = {
"id": get_uuid(),
"kb_id": kb.id,
"parser_id": kb.parser_id,
"parser_config": kb.parser_config,
"created_by": kb.tenant_id,
"type": filetype,
"name": filename,
"location": location,
"size": len(blob),
"thumbnail": thumbnail(filename, blob)
}
if doc["type"] == FileType.VISUAL:
doc["parser_id"] = ParserType.PICTURE.value
if re.search(r"\.(ppt|pptx|pages)$", filename):
doc["parser_id"] = ParserType.PRESENTATION.value
doc = DocumentService.insert(doc)
return get_json_result(data=doc.to_json())
except Exception as e:
return server_error_response(e)
19 changes: 17 additions & 2 deletions api/db/services/knowledgebase_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
page_number, items_per_page, orderby, desc):
kbs = cls.model.select().where(
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | (cls.model.tenant_id == user_id))
TenantPermission.TEAM.value)) | (
cls.model.tenant_id == user_id))
& (cls.model.status == StatusEnum.VALID.value)
)
if desc:
Expand Down Expand Up @@ -56,7 +57,8 @@ def get_detail(cls, kb_id):
cls.model.chunk_num,
cls.model.parser_id,
cls.model.parser_config]
kbs = cls.model.select(*fields).join(Tenant, on=((Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
kbs = cls.model.select(*fields).join(Tenant, on=(
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
(cls.model.id == kb_id),
(cls.model.status == StatusEnum.VALID.value)
)
Expand Down Expand Up @@ -86,6 +88,7 @@ def dfs_update(old, new):
old[k] = list(set(old[k] + v))
else:
old[k] = v

dfs_update(m.parser_config, config)
cls.update_by_id(id, {"parser_config": m.parser_config})

Expand All @@ -97,3 +100,15 @@ def get_field_map(cls, ids):
if k.parser_config and "field_map" in k.parser_config:
conf.update(k.parser_config["field_map"])
return conf

@classmethod
@DB.connection_context()
def get_by_name(cls, kb_name, tenant_id):
kb = cls.model.select().where(
(cls.model.name == kb_name)
& (cls.model.tenant_id == tenant_id)
& (cls.model.status == StatusEnum.VALID.value)
)
if kb:
return True, kb[0]
return False, None
58 changes: 57 additions & 1 deletion docs/conversation_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,5 +303,61 @@ This will be called to get the answer to users' questions.
## Get document content or image

This is usually used when display content of citation.
### Path: /document/get/\<id\>
### Path: /api/document/get/\<id\>
### Method: GET

## Upload file

This is usually used when upload a file to.
### Path: /api/document/upload/
### Method: POST

### Parameter:

| name | type | optional | description |
|---------|--------|----------|----------------------------------------|
| file | file | No | Upload file. |
| kb_name | string | No | Choose the upload knowledge base name. |

### Response
```json
{
"data": {
"chunk_num": 0,
"create_date": "Thu, 25 Apr 2024 14:30:06 GMT",
"create_time": 1714026606921,
"created_by": "553ec818fd5711ee8ea63043d7ed348e",
"id": "41e9324602cd11ef9f5f3043d7ed348e",
"kb_id": "06802686c0a311ee85d6246e9694c130",
"location": "readme.txt",
"name": "readme.txt",
"parser_config": {
"field_map": {
},
"pages": [
[
0,
1000000
]
]
},
"parser_id": "general",
"process_begin_at": null,
"process_duation": 0.0,
"progress": 0.0,
"progress_msg": "",
"run": "0",
"size": 929,
"source_type": "local",
"status": "1",
"thumbnail": null,
"token_num": 0,
"type": "doc",
"update_date": "Thu, 25 Apr 2024 14:30:06 GMT",
"update_time": 1714026606921
},
"retcode": 0,
"retmsg": "success"
}

```

0 comments on commit 26003b5

Please sign in to comment.