Skip to content

Commit

Permalink
feat: Add delimiter field to naive parsing method infiniflow#1909
Browse files Browse the repository at this point in the history
  • Loading branch information
cike8899 committed Aug 12, 2024
1 parent ad48e8d commit c8763ad
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 1 deletion.
8 changes: 7 additions & 1 deletion web/src/components/chunk-method-modal/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import React, { useEffect, useMemo } from 'react';
import { useFetchParserListOnMount } from './hooks';

import { useTranslate } from '@/hooks/common-hooks';
import Delimiter from '../delimiter';
import EntityTypesItem from '../entity-types-item';
import LayoutRecognize from '../layout-recognize';
import ParseConfiguration, {
Expand Down Expand Up @@ -268,7 +269,12 @@ const ChunkMethodModal: React.FC<IProps> = ({
}
</Form.Item>
)}
{showMaxTokenNumber && <MaxTokenNumber></MaxTokenNumber>}
{showMaxTokenNumber && (
<>
<MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>
</>
)}
{showRaptorParseConfiguration(selectedTag) && (
<ParseConfiguration></ParseConfiguration>
)}
Expand Down
34 changes: 34 additions & 0 deletions web/src/components/delimiter.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { Form, Input } from 'antd';
import { useTranslation } from 'react-i18next';

interface IProps {
value?: string | undefined;
onChange?: (val: string | undefined) => void;
}

const DelimiterInput = ({ value, onChange }: IProps) => {
const nextValue = value?.replaceAll('\n', '\\n');
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const val = e.target.value;
const nextValue = val.replaceAll('\\n', '\n');
onChange?.(nextValue);
};
return <Input value={nextValue} onChange={handleInputChange}></Input>;
};

const Delimiter = () => {
const { t } = useTranslation();

return (
<Form.Item
name={['parser_config', 'delimiter']}
label={t('knowledgeDetails.delimiter')}
initialValue={`\\n!?;。;!?`}
rules={[{ required: true }]}
>
<DelimiterInput />
</Form.Item>
);
};

export default Delimiter;
1 change: 1 addition & 0 deletions web/src/locales/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ export default {
rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`,
topK: 'Top-K',
topKTip: `K chunks will be fed into rerank models.`,
delimiter: `Delimiter`,
},
knowledgeConfiguration: {
titleDescription:
Expand Down
1 change: 1 addition & 0 deletions web/src/locales/zh-traditional.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ export default {
rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則,它使用rerank評分代替矢量餘弦相似性。`,
topK: 'Top-K',
topKTip: `K塊將被送入Rerank型號。`,
delimiter: `分段標識符`,
},
knowledgeConfiguration: {
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
Expand Down
1 change: 1 addition & 0 deletions web/src/locales/zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ export default {
rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则,它使用rerank评分代替矢量余弦相似性。`,
topK: 'Top-K',
topKTip: `K块将被送入Rerank型号。`,
delimiter: `分段标识符`,
},
knowledgeConfiguration: {
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import Delimiter from '@/components/delimiter';
import EntityTypesItem from '@/components/entity-types-item';
import LayoutRecognize from '@/components/layout-recognize';
import MaxTokenNumber from '@/components/max-token-number';
Expand Down Expand Up @@ -111,6 +112,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
{parserId === 'naive' && (
<>
<MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>
<LayoutRecognize></LayoutRecognize>
</>
)}
Expand Down

0 comments on commit c8763ad

Please sign in to comment.