Skip to content

Commit

Permalink
feat: add '--filter-url-tracking' option
Browse files Browse the repository at this point in the history
refactor: stop using 'url.parse'
  • Loading branch information
lightpohl committed Nov 17, 2021
1 parent 4827a9c commit 9e7365f
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Type values surrounded in square brackets (`[]`) can be used as used as boolean
| --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
| --exec | String | false | Execute a command after each episode is downloaded. |
| --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
| --filter-url-tacking | | false | Attempts to extract the direct download link of an episode if detected (**experimental**). |
| --version | | false | Output the version number. |
| --help | | false | Output usage information. |

Expand Down
19 changes: 17 additions & 2 deletions bin/async.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
runExec,
writeItemMeta,
writeToArchive,
getUrlEmbed,
} from "./util.js";

const pipeline = promisify(stream.pipeline);
Expand All @@ -34,14 +35,26 @@ const download = async ({
archive,
override,
onAfterDownload,
filterUrlTracking,
}) => {
const logMessage = getLogMessageWithMarker(marker);
if (!override && fs.existsSync(outputPath)) {
logMessage("Download exists locally. Skipping...");
return;
}

const headResponse = await got(url, {
let embeddedUrl = null;
if (filterUrlTracking) {
logMessage("Attempting to find embedded URL...");
embeddedUrl = await getUrlEmbed(url);

if (!embeddedUrl) {
logMessage("Unable to find embedded URL. Defaulting to full address");
}
}

const finalUrl = embeddedUrl || url;
const headResponse = await got(finalUrl, {
timeout: 5000,
method: "HEAD",
responseType: "json",
Expand Down Expand Up @@ -87,7 +100,7 @@ const download = async ({
});

await pipeline(
got.stream(url).on("downloadProgress", onDownloadProgress),
got.stream(finalUrl).on("downloadProgress", onDownloadProgress),
fs.createWriteStream(outputPath)
);
} catch (error) {
Expand Down Expand Up @@ -140,6 +153,7 @@ let downloadItemsAsync = async ({
episodeTemplate,
exec,
feed,
filterUrlTracking,
includeEpisodeMeta,
mono,
override,
Expand Down Expand Up @@ -177,6 +191,7 @@ let downloadItemsAsync = async ({
archive,
override,
marker,
filterUrlTracking,
key: getArchiveKey({
prefix: archiveUrl,
name: getArchiveFilename({
Expand Down
9 changes: 7 additions & 2 deletions bin/bin.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import fs from "fs";
import _path from "path";
import _url from "url";
import commander from "commander";
import { createRequire } from "module";
import pluralize from "pluralize";
Expand Down Expand Up @@ -119,6 +118,10 @@ commander
createParseNumber({ min: 1, max: 32, name: "threads" }),
1
)
.option(
"--filter-url-tracking",
"attempts to extract the direct download link of an episode if detected (experimental)"
)
.parse(process.argv);

const {
Expand All @@ -140,6 +143,7 @@ const {
exec,
mono,
threads,
filterUrlTracking,
addMp3Metadata: addMp3MetadataFlag,
adjustBitrate: bitrate,
} = commander;
Expand All @@ -151,7 +155,7 @@ const main = async () => {
logErrorAndExit("No URL provided");
}

const { hostname, pathname } = _url.parse(url);
const { hostname, pathname } = new URL(url);
const archiveUrl = `${hostname}${pathname}`;
const feed = await getFeed(url);
const basePath = _path.resolve(
Expand Down Expand Up @@ -286,6 +290,7 @@ const main = async () => {
override,
targetItems,
threads,
filterUrlTracking,
});

if (numEpisodesDownloaded === 0) {
Expand Down
55 changes: 52 additions & 3 deletions bin/util.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import _url from "url";
import rssParser from "rss-parser";
import path from "path";
import fs from "fs";
import dayjs from "dayjs";
import got from "got";
import util from "util";
import { exec } from "child_process";

Expand Down Expand Up @@ -45,6 +45,54 @@ const getIsInArchive = ({ key, archive }) => {
return archiveResult.includes(key);
};

const getPossibleUrlEmbeds = (url, maxAmount = 5) => {
const fullUrl = new URL(url);
const possibleStartIndexes = [];

for (let i = 0; i < fullUrl.pathname.length; i++) {
if (fullUrl.pathname[i] === "/") {
possibleStartIndexes.push(i);
}
}

const possibleEmbedChoices = possibleStartIndexes.map((startIndex) => {
let possibleEmbed = fullUrl.pathname.slice(startIndex + 1);

if (!possibleEmbed.startsWith("http")) {
possibleEmbed = `https://${possibleEmbed}`;
}

return decodeURIComponent(possibleEmbed);
});

return possibleEmbedChoices
.slice(Math.max(possibleEmbedChoices.length - maxAmount, 0))
.reverse();
};

const getUrlEmbed = async (url) => {
const possibleUrlEmbeds = getPossibleUrlEmbeds(url);
for (const possibleUrl of possibleUrlEmbeds) {
try {
const embeddedUrl = new URL(possibleUrl);
await got(embeddedUrl.href, {
timeout: 3000,
method: "HEAD",
responseType: "json",
headers: {
accept: "*/*",
},
});

return embeddedUrl;
} catch (error) {
// do nothing
}
}

return null;
};

const getLoopControls = ({ limit, offset, length, reverse }) => {
if (reverse) {
const startIndex = length - 1 - offset;
Expand Down Expand Up @@ -341,7 +389,7 @@ const writeItemMeta = ({
};

const getUrlExt = (url) => {
const { pathname } = _url.parse(url);
const { pathname } = new URL(url);

if (!pathname) {
return "";
Expand Down Expand Up @@ -408,7 +456,7 @@ const getImageUrl = ({ image, itunes }) => {
};

const getFeed = async (url) => {
const { href } = _url.parse(url);
const { href } = new URL(url);

let feed;
try {
Expand Down Expand Up @@ -522,6 +570,7 @@ export {
getImageUrl,
getItemsToDownload,
getUrlExt,
getUrlEmbed,
logFeedInfo,
ITEM_LIST_FORMATS,
logItemsList,
Expand Down

0 comments on commit 9e7365f

Please sign in to comment.