diff --git a/graph/component/__init__.py b/graph/component/__init__.py
index dfb288dc265..7bfd3f48856 100644
--- a/graph/component/__init__.py
+++ b/graph/component/__init__.py
@@ -10,6 +10,7 @@
from .rewrite import RewriteQuestion, RewriteQuestionParam
from .keyword import KeywordExtract, KeywordExtractParam
from .baidu import Baidu, BaiduParam
+from .duckduckgosearch import DuckDuckGoSearch, DuckDuckGoSearchParam
def component_class(class_name):
diff --git a/graph/component/baidu.py b/graph/component/baidu.py
index 394963e969c..ac196185461 100644
--- a/graph/component/baidu.py
+++ b/graph/component/baidu.py
@@ -50,12 +50,12 @@ def _run(self, history, **kwargs):
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'}
response = requests.get(url=url, headers=headers)
- baidu_res = re.findall(r'"contentText":"(.*?)"', response.text)
url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
- for i in range(min(len(baidu_res), len(url_res))):
- baidu_res[i] += '' + url_res[i] + ''
-
- del url_res
+ title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
+ body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
+ baidu_res = [re.sub('|', '', '' + title + ' ' + body) for url, title, body
+ in zip(url_res, title_res, body_res)]
+ del body_res, url_res, title_res
br = pd.DataFrame(baidu_res, columns=['content'])
print(">>>>>>>>>>>>>>>>>>>>>>>>>>\n", br)
diff --git a/graph/component/duckduckgosearch.py b/graph/component/duckduckgosearch.py
new file mode 100644
index 00000000000..4f70b89c825
--- /dev/null
+++ b/graph/component/duckduckgosearch.py
@@ -0,0 +1,62 @@
+#
+# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import random
+from abc import ABC
+from functools import partial
+from duckduckgosearch import DDGS
+import pandas as pd
+
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class DuckDuckGoSearchParam(ComponentParamBase):
+ """
+ Define the DuckDuckGoSearch component parameters.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.top_n = 10
+ self.channel = "text"
+
+ def check(self):
+ self.check_positive_integer(self.top_n, "Top N")
+ self.check_valid_value(self.channel, "Web Search or News", ["text", "news"])
+
+
+class DuckDuckGoSearch(ComponentBase, ABC):
+ component_name = "DuckDuckGoSearch"
+
+ def _run(self, history, **kwargs):
+ ans = self.get_input()
+ ans = " - ".join(ans["content"]) if "content" in ans else ""
+ if not ans:
+ return Baidu.be_output(self._param.no)
+
+ if self.channel == "text":
+ with DDGS() as ddgs:
+ # {'title': '', 'href': '', 'body': ''}
+ duck_res = ['' + i["title"] + ' ' + i["body"] for i in
+ ddgs.text(ans, max_results=self._param.top_n)]
+ elif self.channel == "news":
+ with DDGS() as ddgs:
+ # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
+ duck_res = ['' + i["title"] + ' ' + i["body"] for i in
+ ddgs.news(ans, max_results=self._param.top_n)]
+
+ dr = pd.DataFrame(duck_res, columns=['content'])
+ print(">>>>>>>>>>>>>>>>>>>>>>>>>>\n", dr)
+ return dr