diff --git a/web/src/assets/svg/chunk-method/knowledge-graph-01.svg b/web/src/assets/svg/chunk-method/knowledge-graph-01.svg new file mode 100644 index 0000000000..f5e77a0947 --- /dev/null +++ b/web/src/assets/svg/chunk-method/knowledge-graph-01.svg @@ -0,0 +1,98 @@ + \ No newline at end of file diff --git a/web/src/assets/svg/chunk-method/knowledge-graph-02.svg b/web/src/assets/svg/chunk-method/knowledge-graph-02.svg new file mode 100644 index 0000000000..d2d7507edd --- /dev/null +++ b/web/src/assets/svg/chunk-method/knowledge-graph-02.svg @@ -0,0 +1,93 @@ + \ No newline at end of file diff --git a/web/src/components/chunk-method-modal/hooks.ts b/web/src/components/chunk-method-modal/hooks.ts index 9b76d1bcc5..ae4a28c3f6 100644 --- a/web/src/components/chunk-method-modal/hooks.ts +++ b/web/src/components/chunk-method-modal/hooks.ts @@ -27,7 +27,7 @@ const ParserListMap = new Map([ 'one', 'qa', 'manual', - 'knowledge_graph' + 'knowledge_graph', ], ], [ @@ -67,7 +67,7 @@ const ParserListMap = new Map([ ], [['md'], ['naive', 'qa', 'knowledge_graph']], [['json'], ['naive', 'knowledge_graph']], - [['eml'], ['email']] + [['eml'], ['email']], ]); const getParserList = ( diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index c5c4cf57d1..e2527c3d1e 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -199,7 +199,7 @@ export default { We assume manual has hierarchical section structure. We use the lowest section titles as pivots to slice documents. So, the figures and tables in the same section will not be sliced apart, and chunk size might be large.
`, - naive: `Supported file formats are DOCX, EXCEL, PPT, IMAGE, PDF, TXT.
+ naive: `Supported file formats are DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML.
This method apply the naive ways to chunk files:
If you want to summarize something that needs all the context of an article and the selected LLM's context length covers the document length, you can try this method.
`, + knowledgeGraph: `Supported file formats are DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML + +
After files being chunked, it uses chunks to extract knowledge graph and mind map of the entire document. This method apply the naive ways to chunk files: +Successive text will be sliced into pieces each of which is around 512 token number.
+Next, chunks will be transmited to LLM to extract nodes and relationships of a knowledge graph, and a mind map.
+ +Mind the entiry type you need to specify.`, useRaptor: 'Use RAPTOR to enhance retrieval', useRaptorTip: 'Recursive Abstractive Processing for Tree-Organized Retrieval, please refer to https://huggingface.co/papers/2401.18059', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 200b0b231d..27887d82a3 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -190,7 +190,7 @@ export default { 我們假設手冊具有分層部分結構。我們使用最低的部分標題作為對文檔進行切片的樞軸。 因此,同一部分中的圖和表不會被分割,並且塊大小可能會很大。 `, - naive: `支持的文件格式為DOCX、EXCEL、PPT、IMAGE、PDF、TXT。
+ naive: `支持的文件格式為DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML。
此方法將簡單的方法應用於塊文件:
如果你要總結的東西需要一篇文章的全部上下文,並且所選LLM的上下文長度覆蓋了文檔長度,你可以嘗試這種方法。
`, + knowledgeGraph: `支援的檔案格式為DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML + +
文件分塊後,使用分塊擷取整個文件的知識圖譜和心智圖。此方法將簡單的方法應用於區塊檔案: +連續的文字將被分割成多個片段,每個片段大約有 512 個令牌數。 +
接下來,區塊將傳送到LLM以提取知識圖譜和思維導圖的節點和關係。 + +
請注意您需要指定的條目類型。
`, useRaptor: '使用RAPTOR文件增強策略', useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059', prompt: '提示詞', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 730e9038e0..e0ee6d157c 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -191,7 +191,7 @@ export default { 我们假设手册具有分层部分结构。 我们使用最低的部分标题作为对文档进行切片的枢轴。 因此,同一部分中的图和表不会被分割,并且块大小可能会很大。 `, - naive: `支持的文件格式为DOCX、EXCEL、PPT、IMAGE、PDF、TXT。
+ naive: `支持的文件格式为DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML。
此方法将简单的方法应用于块文件:
如果你要总结的东西需要一篇文章的全部上下文,并且所选LLM的上下文长度覆盖了文档长度,你可以尝试这种方法。
`, + knowledgeGraph: `支持的文件格式为DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML + +
文件分块后,使用分块提取整个文档的知识图谱和思维导图。此方法将简单的方法应用于分块文件: +连续的文本将被切成大约 512 个 token 数的块。
+接下来,将分块传输到 LLM 以提取知识图谱和思维导图的节点和关系。
+ +注意您需要指定的条目类型。`, useRaptor: '使用召回增强RAPTOR策略', useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059', prompt: '提示词', diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx b/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx index bf5552909f..c4eb8ab650 100644 --- a/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx @@ -3,6 +3,7 @@ import { useTranslate } from '@/hooks/common-hooks'; import { useSelectParserList } from '@/hooks/user-setting-hooks'; import { Col, Divider, Empty, Row, Typography } from 'antd'; import DOMPurify from 'dompurify'; +import camelCase from 'lodash/camelCase'; import { useMemo } from 'react'; import styles from './index.less'; import { ImageMap } from './utils'; @@ -18,7 +19,7 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => { if (item) { return { title: item.label, - description: t(item.value), + description: t(camelCase(item.value)), }; } return { title: '', description: '' }; diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/hooks.ts b/web/src/pages/add-knowledge/components/knowledge-setting/hooks.ts index 303f804a2b..725c780eb0 100644 --- a/web/src/pages/add-knowledge/components/knowledge-setting/hooks.ts +++ b/web/src/pages/add-knowledge/components/knowledge-setting/hooks.ts @@ -37,6 +37,9 @@ export const useSubmitKnowledgeConfiguration = (form: FormInstance) => { }; }; +// The value that does not need to be displayed in the analysis method Select +const HiddenFields = ['email', 'picture', 'audio']; + export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => { const parserList = useSelectParserList(); const allOptions = useSelectLlmOptionsByModelType(); @@ -62,7 +65,9 @@ export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => { }, [form, knowledgeDetails]); return { - parserList, + parserList: parserList.filter( + (x) => !HiddenFields.some((y) => y === x.value), + ), embeddingModelOptions: allOptions[LlmModelType.Embedding], disabled: knowledgeDetails.chunk_num > 0, }; diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts b/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts index 57d74c548b..3c4f94f43b 100644 --- a/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts +++ b/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts @@ -15,6 +15,7 @@ export const ImageMap = { resume: getImageName('resume', 2), table: getImageName('table', 2), one: getImageName('one', 2), + knowledge_graph: getImageName('knowledge-graph', 2), }; export const TextMap = {