Skip to content

Commit

Permalink
Made parser more flexible and added folder trimming
Browse files Browse the repository at this point in the history
  • Loading branch information
lirannl committed May 26, 2021
1 parent d345e8c commit 711a53f
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 39 deletions.
2 changes: 1 addition & 1 deletion config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ if (!fileExists(path.join(appFolder, "config.json"))) {
interval: 20,
filterNsfw: true,
maxFolderSize: 500,
minimumSize: 1,
minimumSize: 0.1,
};
// Write defaults as config.json
await Deno.writeFile(path.join(appFolder, "config.json"), new TextEncoder().encode(
Expand Down
5 changes: 5 additions & 0 deletions main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,9 @@ async function getDownloaded(): Promise<(Deno.FileInfo & { name: string })[]> {

export { AsyncMode, getDownloaded };

console.log(`\
Downloader starting.
Config found at ${config.configFilePath}
Files will be downloaded to ${config.targetFolder}`
);
retriever();
43 changes: 10 additions & 33 deletions responses/reddit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ export interface RedditPost {
removed_by_category: null;
banned_by: null;
author_flair_type: FlairType;
domain: Domain;
domain: string;
allow_live_comments: boolean;
selftext_html: null | string;
likes: null;
Expand All @@ -98,7 +98,7 @@ export interface RedditPost {
can_gild: boolean;
spoiler: boolean;
locked: boolean;
author_flair_text: AuthorFlairText | null;
author_flair_text: string | null;
treatment_tags: any[];
visited: boolean;
removed_by: null;
Expand Down Expand Up @@ -173,11 +173,6 @@ export interface ResizedIcon {
height: number;
}

export enum AuthorFlairText {
CityMod = "City Mod",
Empty = "",
}

export enum FlairTextColor {
Dark = "dark",
}
Expand All @@ -186,17 +181,6 @@ export enum FlairType {
Text = "text",
}

export enum ContentCategory {
Photography = "photography",
}

export enum Domain {
IImgurCOM = "i.imgur.com",
IReddIt = "i.redd.it",
PBSTwimgCOM = "pbs.twimg.com",
SelfCityPorn = "self.CityPorn",
}

export interface Gildings {
gid_1?: number;
}
Expand Down Expand Up @@ -463,7 +447,7 @@ const typeMap: any = {
{ json: "can_gild", js: "can_gild", typ: true },
{ json: "spoiler", js: "spoiler", typ: true },
{ json: "locked", js: "locked", typ: true },
{ json: "author_flair_text", js: "author_flair_text", typ: u(r("AuthorFlairText"), null) },
{ json: "author_flair_text", js: "author_flair_text", typ: u("", null) },
{ json: "treatment_tags", js: "treatment_tags", typ: "any" },
{ json: "visited", js: "visited", typ: true },
{ json: "removed_by", js: "removed_by", typ: "any" },
Expand Down Expand Up @@ -497,7 +481,7 @@ const typeMap: any = {
{ json: "post_hint", js: "post_hint", typ: u(undefined, "") },
{ json: "url_overridden_by_dest", js: "url_overridden_by_dest", typ: u(undefined, "") },
{ json: "preview", js: "preview", typ: u(undefined, r("Preview")) },
], false),
], "any"),
"AllAwarding": o([
{ json: "giver_coin_reward", js: "giver_coin_reward", typ: u(0, null) },
{ json: "subreddit_id", js: "subreddit_id", typ: "any" },
Expand Down Expand Up @@ -529,40 +513,33 @@ const typeMap: any = {
{ json: "penny_price", js: "penny_price", typ: u(0, null) },
{ json: "award_type", js: "award_type", typ: r("AwardType") },
{ json: "static_icon_url", js: "static_icon_url", typ: "" },
], false),
], "any"),
"ResizedIcon": o([
{ json: "url", js: "url", typ: "" },
{ json: "width", js: "width", typ: 0 },
{ json: "height", js: "height", typ: 0 },
], false),
], "any"),
"Gildings": o([
{ json: "gid_1", js: "gid_1", typ: u(undefined, 0) },
], false),
], "any"),
"MediaEmbed": o([
], false),
], "any"),
"Preview": o([
{ json: "images", js: "images", typ: "any" },
{ json: "enabled", js: "enabled", typ: true },
], false),
], "any"),
"Image": o([
{ json: "source", js: "source", typ: r("ResizedIcon") },
{ json: "resolutions", js: "resolutions", typ: a(r("ResizedIcon")) },
{ json: "variants", js: "variants", typ: r("MediaEmbed") },
{ json: "id", js: "id", typ: "" },
], false),
"AuthorFlairText": [
"City Mod",
"",
],
], "any"),
"FlairTextColor": [
"dark",
],
"FlairType": [
"text",
],
"ContentCategory": [
"photography",
],
"WhitelistStatus": [
"all_ads",
],
Expand Down
39 changes: 34 additions & 5 deletions retriever.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@ import { getDownloaded, AsyncMode } from "./main.ts";
import { Convert, RedditPost } from "./responses/reddit.ts";
import * as path from 'https://deno.land/std/path/mod.ts';

// Delete old items from a folder to clear space before adding a new item
const trimFolder = async (folder: {
files: (Deno.FileInfo & { name: string })[]
path: string
}, newItemSize: number, maxSize: number) => {
if (newItemSize > maxSize)
throw new Error(`Maximum size is too small. Please edit the config and set a size larger than ${config.maxFolderSize}MB.`);
let folderSize = folder.files.reduce((acc, curr) => {
return acc + curr.size
}, 0) / 1000000;
const filesByDate = [...folder.files].sort((a, b) => {
if (a.birthtime! > b.birthtime!) return -1;
if (a.birthtime == b.birthtime) return 0;
return 0;
})
while (folderSize > maxSize && filesByDate.length > 0) {
// Delete the oldest item
const fileToRemove = filesByDate.pop()!;
await Deno.remove(path.join(folder.path, fileToRemove.name));
folderSize -= fileToRemove.size / 1000000;
}
}

// Given a reddit post, return the size of its link
const measurePostSize = async (post: RedditPost) => {
if (post.is_self) return null;
Expand All @@ -12,10 +35,12 @@ const measurePostSize = async (post: RedditPost) => {
return parseInt(possibleSizeStr) / 1000000;
}

const postFilter = async (post: RedditPost) => {
const postFilter = async (existingDownloads: string[], post: RedditPost) => {
const conditions: (() => boolean | Promise<boolean>)[] = [
// Don't try to download self posts
() => !post.is_self
() => !post.is_self,
// Don't try to re-download posts
() => !existingDownloads.includes(post.id)
];
// Don't try to download NSFW content if configured not to
if (config.filterNsfw) conditions.push(() => !post.over_18);
Expand All @@ -38,13 +63,12 @@ const postFilter = async (post: RedditPost) => {

async function retriever() {
if (config.sources.length == 0) {
console.log("No sources coonfigured.")
console.log("No sources coonfigured.");
return;
}
const source = config.sources[Math.floor(Math.random() * config.sources.length)];
if (source.startsWith("r/")) {
const downloads = await getDownloaded();
const existingPosts = downloads.map(file => file.name.split('.')[0].split('_').reverse()[0]);
let after: string | undefined;
let imageBytes: ArrayBuffer = new ArrayBuffer(0);
let post: RedditPost | undefined;
Expand All @@ -62,18 +86,23 @@ async function retriever() {
.then(items => items.map(({ data }) => data))
;
after = posts[posts.length - 1].id;
const existingPosts = downloads.map(file => file.name.split('.')[0].split('_').reverse()[0]);
post = await posts.asyncFind(
async post => !existingPosts.includes(post.id) && await postFilter(post),
async post => await postFilter(existingPosts, post),
AsyncMode.Parellel
);
}
const fileName = `${source.replace("/", "_")}_${post.id}.${post.url.split(".").reverse()[0]}`;
console.log(`Downloading ${post.id} from ${source}`);
imageBytes = await (await fetch(post.url)).arrayBuffer();
// If the folder's size will exceed the maximum, delete old items until it won't
await trimFolder({ files: downloads, path: config.targetFolder },
imageBytes.byteLength / 1000000, config.maxFolderSize);
// Download the post
await Deno.writeFile(path.join(config.targetFolder, fileName),
new Uint8Array(imageBytes), { create: true });
}
// Rerun the retriever based on the interval set in the config
setTimeout(() => { retriever() }, config.interval * (1000 * 60));
}

Expand Down

0 comments on commit 711a53f

Please sign in to comment.