Skip to content

Commit

Permalink
Use range など (#727)
Browse files Browse the repository at this point in the history
  • Loading branch information
mei23 authored Mar 17, 2024
1 parent 7817432 commit ea65977
Show file tree
Hide file tree
Showing 14 changed files with 77 additions and 39 deletions.
2 changes: 1 addition & 1 deletion built/general.d.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import { SummalyEx } from './summaly';
declare const _default: (url: URL, lang?: string | null) => Promise<SummalyEx>;
declare const _default: (url: URL, lang?: string | null, useRange?: boolean) => Promise<SummalyEx>;
export default _default;
4 changes: 2 additions & 2 deletions built/general.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ const cleanup_title_1 = require("./utils/cleanup-title");
const decode_entities_1 = require("./utils/decode-entities");
const got_1 = require("./utils/got");
const cleanup_url_1 = require("./utils/cleanup-url");
exports.default = (url, lang = null) => __awaiter(void 0, void 0, void 0, function* () {
exports.default = (url, lang = null, useRange = false) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1, _2, _3, _4, _5, _6;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
lang = null;
const res = yield (0, got_1.scpaping)(url.href, { lang: lang || undefined });
const res = yield (0, got_1.scpaping)(url.href, { lang: lang || undefined, useRange });
const $ = res.$;
const landingUrl = new URL(res.response.url);
const twitterCard = (_a = $('meta[name="twitter:card"]').attr('content')) !== null && _a !== void 0 ? _a : $('meta[property="twitter:card"]').attr('content');
Expand Down
4 changes: 2 additions & 2 deletions built/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ type RequestOptions = {
*/
lang?: string | null;
/**
* Whether follow redirects
* Use range for the request
*/
followRedirects?: boolean;
useRange?: boolean;
};
export declare class Summary {
private plugins;
Expand Down
3 changes: 1 addition & 2 deletions built/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class Summary {
return __awaiter(this, void 0, void 0, function* () {
const opts = Object.assign({
lang: null,
followRedirects: true,
}, requestOptions);
const _url = new URL(url);
// pre
Expand All @@ -50,7 +49,7 @@ class Summary {
return summary;
}
else {
let summary = yield (0, general_1.default)(_url, opts.lang);
let summary = yield (0, general_1.default)(_url, opts.lang, opts.useRange);
if (summary == null)
throw 'failed summarize';
const landingUrl = summary.url;
Expand Down
10 changes: 2 additions & 8 deletions built/server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ const load_config_1 = require("./load-config");
const h3 = require("h3");
const http_1 = require("http");
const h3_typebox_1 = require("h3-typebox");
const status_error_1 = require("../utils/status-error");
const config = (0, load_config_1.default)();
const summaryInstance = new __1.Summary({
allowedPlugins: config.allowedPlugins
Expand All @@ -30,19 +29,14 @@ router.get('/url', h3.eventHandler((event) => __awaiter(void 0, void 0, void 0,
try {
const summary = yield summaryInstance.summary(query.url, {
lang: query.lang,
followRedirects: false,
useRange: config.useRange,
});
h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=604800');
return summary;
}
catch (e) {
console.log(`summaly error: ${e} ${query.url}`);
if (e instanceof status_error_1.StatusError && e.isPermanentError) {
h3.setResponseStatus(event, 400);
}
else {
h3.setResponseStatus(event, 500);
}
h3.setResponseStatus(event, 422);
h3.setResponseHeader(event, 'Content-Type', 'text/plain');
h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=3600');
return 'error';
Expand Down
1 change: 1 addition & 0 deletions built/server/load-config.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
type Config = {
allowedPlugins?: string[];
useRange?: boolean;
};
export default function (): Config;
export {};
1 change: 1 addition & 0 deletions built/utils/got.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as Got from 'got';
import * as cheerio from 'cheerio';
export declare function scpaping(url: string, opts?: {
lang?: string;
useRange?: boolean;
}): Promise<{
body: string;
$: cheerio.CheerioAPI;
Expand Down
33 changes: 27 additions & 6 deletions built/utils/got.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ function scpaping(url, opts) {
};
if (opts === null || opts === void 0 ? void 0 : opts.lang)
headers['accept-language'] = opts.lang;
if (opts === null || opts === void 0 ? void 0 : opts.useRange)
headers['range'] = `bytes=0-${MAX_RESPONSE_SIZE - 1}`;
const response = yield getResponse({
url,
method: 'GET',
Expand Down Expand Up @@ -100,7 +102,8 @@ function getResponse(args) {
});
req.on('redirect', (res, opts) => {
if (!(0, check_allowed_url_1.checkAllowedUrl)(opts.url)) {
req.cancel(`Invalid url: ${opts.url}`);
console.warn(`Invalid url: ${opts.url}`);
req.cancel();
}
});
return yield receiveResponce({ req, typeFilter: args.typeFilter });
Expand All @@ -111,25 +114,43 @@ function receiveResponce(args) {
const req = args.req;
const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res) => {
var _a;
var _a, _b;
if (res.statusCode === 206) {
const m = ((_a = res.headers['content-range']) !== null && _a !== void 0 ? _a : '').match(new RegExp(/^bytes\s+0-(\d+)\/(\d+)$/, 'i')); // bytes 0-47254/47255
if (m == null) {
console.warn(`Invalid content-range '${res.headers['content-range']}'`);
req.cancel();
return;
}
if (Number(m[1]) + 1 !== Number(m[2])) {
console.warn(`maxSize exceeded by content-range (${m[2]} > ${maxSize}) on response`);
req.cancel();
return;
}
}
// Check html
if (args.typeFilter && !((_a = res.headers['content-type']) === null || _a === void 0 ? void 0 : _a.match(args.typeFilter))) {
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
if (args.typeFilter && !((_b = res.headers['content-type']) === null || _b === void 0 ? void 0 : _b.match(args.typeFilter))) {
console.warn(`Rejected by type filter ${res.headers['content-type']}`);
req.cancel();
return;
}
// 応答ヘッダでサイズチェック
const contentLength = res.headers['content-length'];
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
console.warn(`maxSize exceeded by content-length (${size} > ${maxSize}) on response`);
req.cancel();
return;
}
}
});
// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
console.warn(`maxSize exceeded in transfer (${progress.transferred} > ${maxSize}) on response`);
req.cancel();
return;
}
});
// 応答取得 with ステータスコードエラーの整形
Expand Down
3 changes: 3 additions & 0 deletions server_config.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ allowedPlugins:
# - iwara
# - komiflo
# - dlsite

# Range付きリクエストを出すか
useRange: false
4 changes: 2 additions & 2 deletions src/general.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import { SummalyEx } from './summaly';
import { scpaping } from './utils/got';
import { cleanupUrl } from './utils/cleanup-url';

export default async (url: URL, lang: string | null = null): Promise<SummalyEx> => {
export default async (url: URL, lang: string | null = null, useRange = false): Promise<SummalyEx> => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;

const res = await scpaping(url.href, { lang: lang || undefined });
const res = await scpaping(url.href, { lang: lang || undefined, useRange });
const $ = res.$;
const landingUrl = new URL(res.response.url);

Expand Down
7 changes: 3 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ type RequestOptions = {
lang?: string | null;

/**
* Whether follow redirects
* Use range for the request
*/
followRedirects?: boolean;
useRange?: boolean;
};

export class Summary {
Expand All @@ -37,7 +37,6 @@ export class Summary {
public async summary(url: string, requestOptions?: RequestOptions): Promise<Summaly> {
const opts = Object.assign({
lang: null,
followRedirects: true,
}, requestOptions);

const _url = new URL(url);
Expand All @@ -56,7 +55,7 @@ export class Summary {

return summary;
} else {
let summary = await general(_url, opts.lang);
let summary = await general(_url, opts.lang, opts.useRange);
if (summary == null) throw 'failed summarize';
const landingUrl = summary.url;

Expand Down
8 changes: 2 additions & 6 deletions src/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,14 @@ router.get('/url', h3.eventHandler(async event => {
try {
const summary = await summaryInstance.summary(query.url, {
lang: query.lang,
followRedirects: false,
useRange: config.useRange,
});

h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=604800');
return summary;
} catch (e) {
console.log(`summaly error: ${e} ${query.url}`);
if (e instanceof StatusError && e.isPermanentError) {
h3.setResponseStatus(event, 400);
} else {
h3.setResponseStatus(event, 500);
}
h3.setResponseStatus(event, 422);
h3.setResponseHeader(event, 'Content-Type', 'text/plain');
h3.setResponseHeader(event, 'Cache-Control', 'public, max-age=3600');
return 'error';
Expand Down
1 change: 1 addition & 0 deletions src/server/load-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as yaml from 'js-yaml';

type Config = {
allowedPlugins?: string[];
useRange?: boolean;
};

export default function () {
Expand Down
35 changes: 29 additions & 6 deletions src/utils/got.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ const NOT_BOT_UA = [
'www.sankei.com',
];

export async function scpaping(url: string, opts?: { lang?: string; }) {
export async function scpaping(url: string, opts?: { lang?: string; useRange?: boolean }) {
const u = new URL(url);

const headers = {
'accept': 'text/html, application/xhtml+xml',
'user-agent': NOT_BOT_UA.includes(u.hostname) ? browserUA : BOT_UA,
};
} as Record<string, string>;

if (opts?.lang) headers['accept-language'] = opts.lang;
if (opts?.useRange) headers['range'] = `bytes=0-${MAX_RESPONSE_SIZE - 1}`;

const response = await getResponse({
url,
Expand Down Expand Up @@ -96,7 +97,8 @@ async function getResponse(args: { url: string, method: 'GET' | 'POST', body?: s

req.on('redirect', (res, opts) => {
if (!checkAllowedUrl(opts.url)) {
req.cancel(`Invalid url: ${opts.url}`);
console.warn(`Invalid url: ${opts.url}`);
req.cancel();
}
});

Expand All @@ -108,9 +110,26 @@ async function receiveResponce<T>(args: { req: Got.CancelableRequest<Got.Respons
const maxSize = MAX_RESPONSE_SIZE;

req.on('response', (res: Got.Response) => {
if (res.statusCode === 206) {
const m = (res.headers['content-range'] ?? '').match(new RegExp(/^bytes\s+0-(\d+)\/(\d+)$/, 'i')); // bytes 0-47254/47255

if (m == null) {
console.warn(`Invalid content-range '${res.headers['content-range']}'`);
req.cancel();
return;
}

if (Number(m[1]) + 1 !== Number(m[2])) {
console.warn(`maxSize exceeded by content-range (${m[2]} > ${maxSize}) on response`);
req.cancel();
return;
}
}

// Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
console.warn(`Rejected by type filter ${res.headers['content-type']}`);
req.cancel();
return;
}

Expand All @@ -119,15 +138,19 @@ async function receiveResponce<T>(args: { req: Got.CancelableRequest<Got.Respons
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
console.warn(`maxSize exceeded by content-length (${size} > ${maxSize}) on response`);
req.cancel();
return;
}
}
});

// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress: Got.Progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
console.warn(`maxSize exceeded in transfer (${progress.transferred} > ${maxSize}) on response`);
req.cancel();
return;
}
});

Expand Down

0 comments on commit ea65977

Please sign in to comment.