diff --git a/README.md b/README.md index 2142b4e..c8ae027 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Type values surrounded in square brackets (`[]`) can be used as used as boolean | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. | | --exec | String | false | Execute a command after each episode is downloaded. | | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. | +| --filter-url-tacking | | false | Attempts to extract the direct download link of an episode if detected (**experimental**). | | --version | | false | Output the version number. | | --help | | false | Output usage information. | diff --git a/bin/async.js b/bin/async.js index ac98782..78e7ee7 100644 --- a/bin/async.js +++ b/bin/async.js @@ -20,6 +20,7 @@ import { runExec, writeItemMeta, writeToArchive, + getUrlEmbed, } from "./util.js"; const pipeline = promisify(stream.pipeline); @@ -34,6 +35,7 @@ const download = async ({ archive, override, onAfterDownload, + filterUrlTracking, }) => { const logMessage = getLogMessageWithMarker(marker); if (!override && fs.existsSync(outputPath)) { @@ -41,7 +43,18 @@ const download = async ({ return; } - const headResponse = await got(url, { + let embeddedUrl = null; + if (filterUrlTracking) { + logMessage("Attempting to find embedded URL..."); + embeddedUrl = await getUrlEmbed(url); + + if (!embeddedUrl) { + logMessage("Unable to find embedded URL. Defaulting to full address"); + } + } + + const finalUrl = embeddedUrl || url; + const headResponse = await got(finalUrl, { timeout: 5000, method: "HEAD", responseType: "json", @@ -87,7 +100,7 @@ const download = async ({ }); await pipeline( - got.stream(url).on("downloadProgress", onDownloadProgress), + got.stream(finalUrl).on("downloadProgress", onDownloadProgress), fs.createWriteStream(outputPath) ); } catch (error) { @@ -140,6 +153,7 @@ let downloadItemsAsync = async ({ episodeTemplate, exec, feed, + filterUrlTracking, includeEpisodeMeta, mono, override, @@ -177,6 +191,7 @@ let downloadItemsAsync = async ({ archive, override, marker, + filterUrlTracking, key: getArchiveKey({ prefix: archiveUrl, name: getArchiveFilename({ diff --git a/bin/bin.js b/bin/bin.js index 07fadbc..141846a 100644 --- a/bin/bin.js +++ b/bin/bin.js @@ -2,7 +2,6 @@ import fs from "fs"; import _path from "path"; -import _url from "url"; import commander from "commander"; import { createRequire } from "module"; import pluralize from "pluralize"; @@ -119,6 +118,10 @@ commander createParseNumber({ min: 1, max: 32, name: "threads" }), 1 ) + .option( + "--filter-url-tracking", + "attempts to extract the direct download link of an episode if detected (experimental)" + ) .parse(process.argv); const { @@ -140,6 +143,7 @@ const { exec, mono, threads, + filterUrlTracking, addMp3Metadata: addMp3MetadataFlag, adjustBitrate: bitrate, } = commander; @@ -151,7 +155,7 @@ const main = async () => { logErrorAndExit("No URL provided"); } - const { hostname, pathname } = _url.parse(url); + const { hostname, pathname } = new URL(url); const archiveUrl = `${hostname}${pathname}`; const feed = await getFeed(url); const basePath = _path.resolve( @@ -286,6 +290,7 @@ const main = async () => { override, targetItems, threads, + filterUrlTracking, }); if (numEpisodesDownloaded === 0) { diff --git a/bin/util.js b/bin/util.js index 782798e..ad0ce95 100644 --- a/bin/util.js +++ b/bin/util.js @@ -1,8 +1,8 @@ -import _url from "url"; import rssParser from "rss-parser"; import path from "path"; import fs from "fs"; import dayjs from "dayjs"; +import got from "got"; import util from "util"; import { exec } from "child_process"; @@ -45,6 +45,54 @@ const getIsInArchive = ({ key, archive }) => { return archiveResult.includes(key); }; +const getPossibleUrlEmbeds = (url, maxAmount = 5) => { + const fullUrl = new URL(url); + const possibleStartIndexes = []; + + for (let i = 0; i < fullUrl.pathname.length; i++) { + if (fullUrl.pathname[i] === "/") { + possibleStartIndexes.push(i); + } + } + + const possibleEmbedChoices = possibleStartIndexes.map((startIndex) => { + let possibleEmbed = fullUrl.pathname.slice(startIndex + 1); + + if (!possibleEmbed.startsWith("http")) { + possibleEmbed = `https://${possibleEmbed}`; + } + + return decodeURIComponent(possibleEmbed); + }); + + return possibleEmbedChoices + .slice(Math.max(possibleEmbedChoices.length - maxAmount, 0)) + .reverse(); +}; + +const getUrlEmbed = async (url) => { + const possibleUrlEmbeds = getPossibleUrlEmbeds(url); + for (const possibleUrl of possibleUrlEmbeds) { + try { + const embeddedUrl = new URL(possibleUrl); + await got(embeddedUrl.href, { + timeout: 3000, + method: "HEAD", + responseType: "json", + headers: { + accept: "*/*", + }, + }); + + return embeddedUrl; + } catch (error) { + // do nothing + } + } + + return null; +}; + const getLoopControls = ({ limit, offset, length, reverse }) => { if (reverse) { const startIndex = length - 1 - offset; @@ -341,7 +389,7 @@ const writeItemMeta = ({ }; const getUrlExt = (url) => { - const { pathname } = _url.parse(url); + const { pathname } = new URL(url); if (!pathname) { return ""; @@ -408,7 +456,7 @@ const getImageUrl = ({ image, itunes }) => { }; const getFeed = async (url) => { - const { href } = _url.parse(url); + const { href } = new URL(url); let feed; try { @@ -522,6 +570,7 @@ export { getImageUrl, getItemsToDownload, getUrlExt, + getUrlEmbed, logFeedInfo, ITEM_LIST_FORMATS, logItemsList,