From 80fcf5a833a0cb02481987b0fccb5df08ad32395 Mon Sep 17 00:00:00 2001 From: kamtschatka Date: Mon, 28 Oct 2024 02:51:00 +0100 Subject: [PATCH] feature: Archive videos using yt-dlp. Fixes #215 (#525) * Allow downloading more content from a webpage and index it #215 Added a worker that allows downloading videos depending on the environment variables refactored the code a bit added new video asset updated documentation * Some tweaks * Drop the dependency on the yt-dlp wrapper * Update openapi specs * Dont log an error when the url is not supported * Better handle supported websites that dont download anything --------- Co-authored-by: Mohamed Bassem --- apps/web/app/api/assets/[assetId]/route.ts | 32 ++- .../dashboard/preview/AttachmentBox.tsx | 2 + .../dashboard/preview/LinkContentSection.tsx | 19 ++ apps/workers/crawlerWorker.ts | 59 +---- apps/workers/index.ts | 15 +- apps/workers/videoWorker.ts | 202 ++++++++++++++++++ apps/workers/workerUtils.ts | 48 +++++ docker/Dockerfile | 2 +- docs/docs/03-configuration.md | 27 +-- packages/db/schema.ts | 2 + packages/open-api/hoarder-openapi-spec.json | 9 + packages/shared/assetdb.ts | 16 ++ packages/shared/config.ts | 6 + packages/shared/queues.ts | 23 ++ packages/shared/types/bookmarks.ts | 2 + packages/trpc/lib/attachments.ts | 5 + packages/trpc/routers/bookmarks.ts | 5 + 17 files changed, 403 insertions(+), 71 deletions(-) create mode 100644 apps/workers/videoWorker.ts create mode 100644 apps/workers/workerUtils.ts diff --git a/apps/web/app/api/assets/[assetId]/route.ts b/apps/web/app/api/assets/[assetId]/route.ts index 73237d8d..3bff79ba 100644 --- a/apps/web/app/api/assets/[assetId]/route.ts +++ b/apps/web/app/api/assets/[assetId]/route.ts @@ -27,10 +27,30 @@ export async function GET( assetId: params.assetId, }); - return new Response(asset, { - status: 200, - headers: { - "Content-type": metadata.contentType, - }, - }); + const range = request.headers.get("Range"); + if (range) { + const parts = range.replace(/bytes=/, "").split("-"); + const start = parseInt(parts[0], 10); + const end = parts[1] ? parseInt(parts[1], 10) : asset.length - 1; + + // TODO: Don't read the whole asset into memory in the first place + const chunk = asset.subarray(start, end + 1); + return new Response(chunk, { + status: 206, // Partial Content + headers: { + "Content-Range": `bytes ${start}-${end}/${asset.length}`, + "Accept-Ranges": "bytes", + "Content-Length": chunk.length.toString(), + "Content-type": metadata.contentType, + }, + }); + } else { + return new Response(asset, { + status: 200, + headers: { + "Content-Length": asset.length.toString(), + "Content-type": metadata.contentType, + }, + }); + } } diff --git a/apps/web/components/dashboard/preview/AttachmentBox.tsx b/apps/web/components/dashboard/preview/AttachmentBox.tsx index 436f1026..d631f4d9 100644 --- a/apps/web/components/dashboard/preview/AttachmentBox.tsx +++ b/apps/web/components/dashboard/preview/AttachmentBox.tsx @@ -20,6 +20,7 @@ import { Pencil, Plus, Trash2, + Video, } from "lucide-react"; import { @@ -44,6 +45,7 @@ export default function AttachmentBox({ bookmark }: { bookmark: ZBookmark }) { screenshot: , fullPageArchive: , bannerImage: , + video: