Skip to content

Commit

Permalink
chore: update
Browse files Browse the repository at this point in the history
  • Loading branch information
Soontao committed Aug 11, 2024
1 parent 87272bf commit 008339c
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 6 deletions.
4 changes: 3 additions & 1 deletion lib/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ const value = {
},
longWarningThreshold: parseFloat(envs.LONG_WARNING_THRESHOLD, 10) || 5, // in seconds
prefetch: Boolean(envs.PRE_FETCH),
ua: envs.UA || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
ua:
envs.UA ||
"Mozilla/5.0 (Linux; Android 10; Redmi Note 8 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36",
listenInaddrAny: envs.LISTEN_INADDR_ANY || 1, // 是否允许公网连接,取值 0 1
requestRetry: parseInt(envs.REQUEST_RETRY) || 2, // 请求失败重试次数
// 是否显示 Debug 信息,取值 boolean 'false' 'key' ,取值为 'false' false 时永远不显示,取值为 'key' 时带上 ?debug=key 显示
Expand Down
15 changes: 15 additions & 0 deletions lib/customize/bbc/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const { createGenericEndpoint, linkExtractors, contentExtractors } = require("@/utils/common-utils");
const { fetchTextWithCrossWallProxy } = require("@/utils/http");

const endpoint = createGenericEndpoint({
feedTitle: "BBC News",
endpointPath: "/bbc-news",
translateTitle: true,
language: "en",
entryUrl: "https://feeds.bbci.co.uk/news/rss.xml",
fetchText: fetchTextWithCrossWallProxy,
linkExtractor: linkExtractors.feedXmlLinkExtractor(),
contentExtractor: contentExtractors.js,
});

module.exports = endpoint;
30 changes: 27 additions & 3 deletions lib/utils/common-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,34 @@ const toTitleCase = (str) =>
.map((word) => word.replace(word[0], word[0].toUpperCase()))
.join(" ");

function domAElementLinkExtractor(selector, prefix = "") {
function domAElementLinkExtractor(selector = "a", prefix = "") {
return ($) =>
$(selector)
.get()
.map((listItem) => prefix + $(listItem).attr("href"));
}

function fuzzyALinkSelector(matchReg, prefix = "") {
return ($) =>
$("a")
.get()
.map((listItem) => $(listItem).attr("href"))
.filter((href) => {
const match = href.match(matchReg);
logger.debug(`fuzzyALinkSelector: ${href} ${match}`);
return match;
})
.map((href) => prefix + href);
}

function feedXmlLinkExtractor() {
// $ is a valid rss feed content
return ($) =>
$("item link")
.get()
.map((link) => link?.next?.data?.trim());
}

/**
*
* @param {cheerio.Root} page
Expand Down Expand Up @@ -99,7 +120,7 @@ function createGenericEndpoint(options) {
ctx.state.skip_pure = options.skipPure;

if (options.linkExtractor && (options.contentExtractor || options.jsonExtractor)) {
const links = uniq(options.linkExtractor($).slice(0, options.maxItemsInList));
const links = uniq(options.linkExtractor($)).slice(0, options.maxItemsInList);

if (links.length === 0) {
logger.warn("no links found", { baseUrl: entryUrlValue });
Expand All @@ -120,7 +141,8 @@ function createGenericEndpoint(options) {
: await options.jsonExtractor(JSON.parse(content));

if (article === undefined) {
logger.warn("no content for link", link);
logger.warn("no content for link", link, "extractor", options.contentExtractor?.name);
logger.debug("content is", content);
return undefined;
}

Expand Down Expand Up @@ -202,6 +224,8 @@ module.exports = {
linkExtractors: {
domAElementLinkExtractor,
aLinkExtractor: domAElementLinkExtractor,
fuzzyALinkSelector,
feedXmlLinkExtractor,
},
contentExtractors: {
jsContentExtractor,
Expand Down
18 changes: 16 additions & 2 deletions lib/utils/http.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const config = require("@/config");
const logger = require("./logger");

function fetchText(url, encoding = "utf-8") {
return fetch(url, {
Expand All @@ -8,7 +9,12 @@ function fetchText(url, encoding = "utf-8") {
referer: url,
},
})
.then((res) => res.arrayBuffer())
.then((res) => {
if (res.status !== 200) {
logger.debug("fetch url", url, res.status, "headers", res.headers);
}
return res.arrayBuffer();
})
.then((buff) => new TextDecoder(encoding).decode(buff));
}

Expand All @@ -34,8 +40,16 @@ function fetchTextWithCrossWallProxy(url, encoding = "utf-8") {
bodyTimeout: 10_000,
maxRedirections: 2,
}),
headers: {
"User-Agent": config.value.ua,
},
})
.then((res) => res.arrayBuffer())
.then((res) => {
if (res.status !== 200) {
logger.debug("fetch url", url, res.status, "headers", res.headers);
}
return res.arrayBuffer();
})
.then((buff) => new TextDecoder(encoding).decode(buff));
}

Expand Down
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"@koa/router": "^12.0.1",
"@newdash/newdash": "^5.23.1",
"@node-rs/jieba": "^1.10.3",
"@xmldom/xmldom": "^0.8.10",
"art-template": "4.13.2",
"cheerio": "1.0.0-rc.12",
"chrono-node": "^2.7.6",
Expand Down

0 comments on commit 008339c

Please sign in to comment.