From 711a53f9ac26b94cccd365dfaa5112c4d96dd203 Mon Sep 17 00:00:00 2001 From: Liran Piade Date: Wed, 26 May 2021 14:51:02 +1000 Subject: [PATCH] Made parser more flexible and added folder trimming --- config.ts | 2 +- main.ts | 5 +++++ responses/reddit.ts | 43 ++++++++++--------------------------------- retriever.ts | 39 ++++++++++++++++++++++++++++++++++----- 4 files changed, 50 insertions(+), 39 deletions(-) diff --git a/config.ts b/config.ts index b95b14e..d741555 100644 --- a/config.ts +++ b/config.ts @@ -32,7 +32,7 @@ if (!fileExists(path.join(appFolder, "config.json"))) { interval: 20, filterNsfw: true, maxFolderSize: 500, - minimumSize: 1, + minimumSize: 0.1, }; // Write defaults as config.json await Deno.writeFile(path.join(appFolder, "config.json"), new TextEncoder().encode( diff --git a/main.ts b/main.ts index be478d3..59a8cfe 100644 --- a/main.ts +++ b/main.ts @@ -19,4 +19,9 @@ async function getDownloaded(): Promise<(Deno.FileInfo & { name: string })[]> { export { AsyncMode, getDownloaded }; +console.log(`\ +Downloader starting. +Config found at ${config.configFilePath} +Files will be downloaded to ${config.targetFolder}` +); retriever(); \ No newline at end of file diff --git a/responses/reddit.ts b/responses/reddit.ts index 5a125c9..f3fedc4 100644 --- a/responses/reddit.ts +++ b/responses/reddit.ts @@ -81,7 +81,7 @@ export interface RedditPost { removed_by_category: null; banned_by: null; author_flair_type: FlairType; - domain: Domain; + domain: string; allow_live_comments: boolean; selftext_html: null | string; likes: null; @@ -98,7 +98,7 @@ export interface RedditPost { can_gild: boolean; spoiler: boolean; locked: boolean; - author_flair_text: AuthorFlairText | null; + author_flair_text: string | null; treatment_tags: any[]; visited: boolean; removed_by: null; @@ -173,11 +173,6 @@ export interface ResizedIcon { height: number; } -export enum AuthorFlairText { - CityMod = "City Mod", - Empty = "", -} - export enum FlairTextColor { Dark = "dark", } @@ -186,17 +181,6 @@ export enum FlairType { Text = "text", } -export enum ContentCategory { - Photography = "photography", -} - -export enum Domain { - IImgurCOM = "i.imgur.com", - IReddIt = "i.redd.it", - PBSTwimgCOM = "pbs.twimg.com", - SelfCityPorn = "self.CityPorn", -} - export interface Gildings { gid_1?: number; } @@ -463,7 +447,7 @@ const typeMap: any = { { json: "can_gild", js: "can_gild", typ: true }, { json: "spoiler", js: "spoiler", typ: true }, { json: "locked", js: "locked", typ: true }, - { json: "author_flair_text", js: "author_flair_text", typ: u(r("AuthorFlairText"), null) }, + { json: "author_flair_text", js: "author_flair_text", typ: u("", null) }, { json: "treatment_tags", js: "treatment_tags", typ: "any" }, { json: "visited", js: "visited", typ: true }, { json: "removed_by", js: "removed_by", typ: "any" }, @@ -497,7 +481,7 @@ const typeMap: any = { { json: "post_hint", js: "post_hint", typ: u(undefined, "") }, { json: "url_overridden_by_dest", js: "url_overridden_by_dest", typ: u(undefined, "") }, { json: "preview", js: "preview", typ: u(undefined, r("Preview")) }, - ], false), + ], "any"), "AllAwarding": o([ { json: "giver_coin_reward", js: "giver_coin_reward", typ: u(0, null) }, { json: "subreddit_id", js: "subreddit_id", typ: "any" }, @@ -529,40 +513,33 @@ const typeMap: any = { { json: "penny_price", js: "penny_price", typ: u(0, null) }, { json: "award_type", js: "award_type", typ: r("AwardType") }, { json: "static_icon_url", js: "static_icon_url", typ: "" }, - ], false), + ], "any"), "ResizedIcon": o([ { json: "url", js: "url", typ: "" }, { json: "width", js: "width", typ: 0 }, { json: "height", js: "height", typ: 0 }, - ], false), + ], "any"), "Gildings": o([ { json: "gid_1", js: "gid_1", typ: u(undefined, 0) }, - ], false), + ], "any"), "MediaEmbed": o([ - ], false), + ], "any"), "Preview": o([ { json: "images", js: "images", typ: "any" }, { json: "enabled", js: "enabled", typ: true }, - ], false), + ], "any"), "Image": o([ { json: "source", js: "source", typ: r("ResizedIcon") }, { json: "resolutions", js: "resolutions", typ: a(r("ResizedIcon")) }, { json: "variants", js: "variants", typ: r("MediaEmbed") }, { json: "id", js: "id", typ: "" }, - ], false), - "AuthorFlairText": [ - "City Mod", - "", - ], + ], "any"), "FlairTextColor": [ "dark", ], "FlairType": [ "text", ], - "ContentCategory": [ - "photography", - ], "WhitelistStatus": [ "all_ads", ], diff --git a/retriever.ts b/retriever.ts index f52f600..9e587d4 100644 --- a/retriever.ts +++ b/retriever.ts @@ -3,6 +3,29 @@ import { getDownloaded, AsyncMode } from "./main.ts"; import { Convert, RedditPost } from "./responses/reddit.ts"; import * as path from 'https://deno.land/std/path/mod.ts'; +// Delete old items from a folder to clear space before adding a new item +const trimFolder = async (folder: { + files: (Deno.FileInfo & { name: string })[] + path: string +}, newItemSize: number, maxSize: number) => { + if (newItemSize > maxSize) + throw new Error(`Maximum size is too small. Please edit the config and set a size larger than ${config.maxFolderSize}MB.`); + let folderSize = folder.files.reduce((acc, curr) => { + return acc + curr.size + }, 0) / 1000000; + const filesByDate = [...folder.files].sort((a, b) => { + if (a.birthtime! > b.birthtime!) return -1; + if (a.birthtime == b.birthtime) return 0; + return 0; + }) + while (folderSize > maxSize && filesByDate.length > 0) { + // Delete the oldest item + const fileToRemove = filesByDate.pop()!; + await Deno.remove(path.join(folder.path, fileToRemove.name)); + folderSize -= fileToRemove.size / 1000000; + } +} + // Given a reddit post, return the size of its link const measurePostSize = async (post: RedditPost) => { if (post.is_self) return null; @@ -12,10 +35,12 @@ const measurePostSize = async (post: RedditPost) => { return parseInt(possibleSizeStr) / 1000000; } -const postFilter = async (post: RedditPost) => { +const postFilter = async (existingDownloads: string[], post: RedditPost) => { const conditions: (() => boolean | Promise)[] = [ // Don't try to download self posts - () => !post.is_self + () => !post.is_self, + // Don't try to re-download posts + () => !existingDownloads.includes(post.id) ]; // Don't try to download NSFW content if configured not to if (config.filterNsfw) conditions.push(() => !post.over_18); @@ -38,13 +63,12 @@ const postFilter = async (post: RedditPost) => { async function retriever() { if (config.sources.length == 0) { - console.log("No sources coonfigured.") + console.log("No sources coonfigured."); return; } const source = config.sources[Math.floor(Math.random() * config.sources.length)]; if (source.startsWith("r/")) { const downloads = await getDownloaded(); - const existingPosts = downloads.map(file => file.name.split('.')[0].split('_').reverse()[0]); let after: string | undefined; let imageBytes: ArrayBuffer = new ArrayBuffer(0); let post: RedditPost | undefined; @@ -62,18 +86,23 @@ async function retriever() { .then(items => items.map(({ data }) => data)) ; after = posts[posts.length - 1].id; + const existingPosts = downloads.map(file => file.name.split('.')[0].split('_').reverse()[0]); post = await posts.asyncFind( - async post => !existingPosts.includes(post.id) && await postFilter(post), + async post => await postFilter(existingPosts, post), AsyncMode.Parellel ); } const fileName = `${source.replace("/", "_")}_${post.id}.${post.url.split(".").reverse()[0]}`; console.log(`Downloading ${post.id} from ${source}`); imageBytes = await (await fetch(post.url)).arrayBuffer(); + // If the folder's size will exceed the maximum, delete old items until it won't + await trimFolder({ files: downloads, path: config.targetFolder }, + imageBytes.byteLength / 1000000, config.maxFolderSize); // Download the post await Deno.writeFile(path.join(config.targetFolder, fileName), new Uint8Array(imageBytes), { create: true }); } + // Rerun the retriever based on the interval set in the config setTimeout(() => { retriever() }, config.interval * (1000 * 60)); }