Skip to content

Commit

Permalink
perf: full text collection and search code
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu committed Jan 3, 2025
1 parent f163362 commit 31e1853
Show file tree
Hide file tree
Showing 15 changed files with 290 additions and 161 deletions.
4 changes: 3 additions & 1 deletion docSite/content/zh-cn/docs/development/upgrading/4818.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ weight: 806

1.
2. 新增 - 支持部门架构权限模式
3. 优化 - 图片上传安全校验。并增加头像图片唯一存储,确保不会累计存储。
3. 优化 - 图片上传安全校验。并增加头像图片唯一存储,确保不会累计存储。
4. 优化 - Mongo 全文索引表分离。
5. 优化 - 知识库检索查询语句合并,同时减少查库数量。
9 changes: 9 additions & 0 deletions packages/global/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ export type DatasetDataSchemaType = {
rebuilding?: boolean;
};

export type DatasetDataTextSchemaType = {
_id: string;
teamId: string;
datasetId: string;
collectionId: string;
dataId: string;
fullTextToken: string;
};

export type DatasetTrainingSchemaType = {
_id: string;
userId: string;
Expand Down
13 changes: 5 additions & 8 deletions packages/service/core/chat/chatItemSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,24 +86,21 @@ const ChatItemSchema = new Schema({
});

try {
ChatItemSchema.index({ dataId: 1 }, { background: true });
ChatItemSchema.index({ dataId: 1 });
/* delete by app;
delete by chat id;
get chat list;
get chat logs;
close custom feedback;
*/
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 }, { background: true });
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 });
// admin charts
ChatItemSchema.index({ time: -1, obj: 1 }, { background: true });
ChatItemSchema.index({ time: -1, obj: 1 });
// timer, clear history
ChatItemSchema.index({ teamId: 1, time: -1 }, { background: true });
ChatItemSchema.index({ teamId: 1, time: -1 });

// Admin charts
ChatItemSchema.index(
{ obj: 1, time: -1 },
{ background: true, partialFilterExpression: { obj: 'Human' } }
);
ChatItemSchema.index({ obj: 1, time: -1 }, { partialFilterExpression: { obj: 'Human' } });
} catch (error) {
console.log(error);
}
Expand Down
12 changes: 6 additions & 6 deletions packages/service/core/chat/chatSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,19 +81,19 @@ const ChatSchema = new Schema({
});

try {
ChatSchema.index({ chatId: 1 }, { background: true });
ChatSchema.index({ chatId: 1 });
// get user history
ChatSchema.index({ tmbId: 1, appId: 1, top: -1, updateTime: -1 }, { background: true });
ChatSchema.index({ tmbId: 1, appId: 1, top: -1, updateTime: -1 });
// delete by appid; clear history; init chat; update chat; auth chat; get chat;
ChatSchema.index({ appId: 1, chatId: 1 }, { background: true });
ChatSchema.index({ appId: 1, chatId: 1 });

// get chat logs;
ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 }, { background: true });
ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 });
// get share chat history
ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1 }, { background: true });
ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1 });

// timer, clear history
ChatSchema.index({ teamId: 1, updateTime: -1 }, { background: true });
ChatSchema.index({ teamId: 1, updateTime: -1 });
} catch (error) {
console.log(error);
}
Expand Down
14 changes: 10 additions & 4 deletions packages/service/core/dataset/collection/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import { pushDataListToTrainingQueue } from '../training/controller';
import { MongoImage } from '../../../common/file/image/schema';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { addDays } from 'date-fns';
import { MongoDatasetDataText } from '../data/dataTextSchema';

export const createCollectionAndInsertData = async ({
dataset,
Expand Down Expand Up @@ -240,12 +241,12 @@ export const delCollectionRelatedSource = async ({
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean);

// delete files
// Delete files
await delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList
});
// delete images
// Delete images
await delImgByRelatedId({
teamId,
relateIds: relatedImageIds,
Expand Down Expand Up @@ -273,7 +274,7 @@ export async function delCollection({
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
const collectionIds = collections.map((item) => String(item._id));

// delete training data
// Delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetIds: { $in: datasetIds },
Expand All @@ -285,11 +286,16 @@ export async function delCollection({
await delCollectionRelatedSource({ collections, session });
}

// delete dataset.datas
// Delete dataset_datas
await MongoDatasetData.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);

// delete collections
await MongoDatasetCollection.deleteMany(
Expand Down
13 changes: 10 additions & 3 deletions packages/service/core/dataset/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { ClientSession } from '../../common/mongo';
import { MongoDatasetTraining } from './training/schema';
import { MongoDatasetData } from './data/schema';
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
import { MongoDatasetDataText } from './data/dataTextSchema';

/* ============= dataset ========== */
/* find all datasetId by top datasetId */
Expand Down Expand Up @@ -92,7 +93,7 @@ export async function delDatasetRelevantData({
{ session }
).lean();

// image and file
// Delete Image and file
await delCollectionRelatedSource({ collections, session });

// delete collections
Expand All @@ -101,9 +102,15 @@ export async function delDatasetRelevantData({
datasetId: { $in: datasetIds }
}).session(session);

// delete dataset.datas(Not need session)
// No session delete:
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// delete dataset_datas
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });

// no session delete: delete files, vector data
// Delete vector data
await deleteDatasetDataVector({ teamId, datasetIds });
}
48 changes: 48 additions & 0 deletions packages/service/core/dataset/data/dataTextSchema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { connectionMongo, getMongoModel } from '../../../common/mongo';
const { Schema } = connectionMongo;
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
import { DatasetCollectionName } from '../schema';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetDataCollectionName } from './schema';

export const DatasetDataTextCollectionName = 'dataset_data_texts';

const DatasetDataTextSchema = new Schema({
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
collectionId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
required: true
},
dataId: {
type: String,
ref: DatasetDataCollectionName,
required: true
},
fullTextToken: {
type: String,
required: true
}
});

try {
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
DatasetDataTextSchema.index({ dataId: 'hashed' });
} catch (error) {
console.log(error);
}

export const MongoDatasetDataText = getMongoModel<DatasetDataSchemaType>(
DatasetDataTextCollectionName,
DatasetDataTextSchema
);
14 changes: 3 additions & 11 deletions packages/service/core/dataset/data/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,8 @@ const DatasetDataSchema = new Schema({
type: Number,
default: 0
},
inited: {
type: Boolean
},
rebuilding: Boolean
});

DatasetDataSchema.virtual('collection', {
ref: DatasetColCollectionName,
localField: 'collectionId',
foreignField: '_id',
justOne: true
rebuilding: Boolean,
inited: Boolean
});

try {
Expand All @@ -100,6 +91,7 @@ try {
DatasetDataSchema.index({ updateTime: 1 });
// rebuild data
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
DatasetDataSchema.index({ inited: 'hashed' });
} catch (error) {
console.log(error);
}
Expand Down
Loading

0 comments on commit 31e1853

Please sign in to comment.