Skip to content

Commit

Permalink
refactor: remove redundant code from crawler worker and refactor hand…
Browse files Browse the repository at this point in the history
…ling of asset types (#253)

* refactoring asset types
Extracted out functions to silently delete assets and to update them after crawling
Generalized the mapping of assets to bookmark fields to make extending them easier

* revert silentDeleteAsset and hide better-sqlite3

---------

Co-authored-by: MohamedBassem <[email protected]>
  • Loading branch information
kamtschatka and MohamedBassem authored Jun 29, 2024
1 parent e107f8b commit ccbff18
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 65 deletions.
81 changes: 49 additions & 32 deletions apps/workers/crawlerWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import StealthPlugin from "puppeteer-extra-plugin-stealth";
import { withTimeout } from "utils";

import type { ZCrawlLinkRequest } from "@hoarder/shared/queues";
import { db } from "@hoarder/db";
import { db, HoarderDBTransaction } from "@hoarder/db";
import {
assets,
AssetTypes,
Expand Down Expand Up @@ -544,27 +544,20 @@ async function crawlAndParseUrl(
})
.where(eq(bookmarkLinks.id, bookmarkId));

if (screenshotAssetId) {
if (oldScreenshotAssetId) {
await txn.delete(assets).where(eq(assets.id, oldScreenshotAssetId));
}
await txn.insert(assets).values({
id: screenshotAssetId,
assetType: AssetTypes.LINK_SCREENSHOT,
bookmarkId,
});
}

if (imageAssetId) {
if (oldImageAssetId) {
await txn.delete(assets).where(eq(assets.id, oldImageAssetId));
}
await txn.insert(assets).values({
id: imageAssetId,
assetType: AssetTypes.LINK_BANNER_IMAGE,
bookmarkId,
});
}
await updateAsset(
screenshotAssetId,
oldScreenshotAssetId,
bookmarkId,
AssetTypes.LINK_SCREENSHOT,
txn,
);
await updateAsset(
imageAssetId,
oldImageAssetId,
bookmarkId,
AssetTypes.LINK_BANNER_IMAGE,
txn,
);
});

// Delete the old assets if any
Expand All @@ -587,19 +580,16 @@ async function crawlAndParseUrl(
);

await db.transaction(async (txn) => {
if (oldFullPageArchiveAssetId) {
await txn
.delete(assets)
.where(eq(assets.id, oldFullPageArchiveAssetId));
}
await txn.insert(assets).values({
id: fullPageArchiveAssetId,
assetType: AssetTypes.LINK_FULL_PAGE_ARCHIVE,
await updateAsset(
fullPageArchiveAssetId,
oldFullPageArchiveAssetId,
bookmarkId,
});
AssetTypes.LINK_FULL_PAGE_ARCHIVE,
txn,
);
});
if (oldFullPageArchiveAssetId) {
deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch(
await deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch(
() => ({}),
);
}
Expand Down Expand Up @@ -673,3 +663,30 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
// Do the archival as a separate last step as it has the potential for failure
await archivalLogic();
}

/**
* Removes the old asset and adds a new one instead
* @param newAssetId the new assetId to add
* @param oldAssetId the old assetId to remove (if it exists)
* @param bookmarkId the id of the bookmark the asset belongs to
* @param assetType the type of the asset
* @param txn the transaction where this update should happen in
*/
async function updateAsset(
newAssetId: string | null,
oldAssetId: string | undefined,
bookmarkId: string,
assetType: AssetTypes,
txn: HoarderDBTransaction,
) {
if (newAssetId) {
if (oldAssetId) {
await txn.delete(assets).where(eq(assets.id, oldAssetId));
}
await txn.insert(assets).values({
id: newAssetId,
assetType,
bookmarkId,
});
}
}
14 changes: 14 additions & 0 deletions packages/db/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
import Database from "better-sqlite3";
import { ExtractTablesWithRelations } from "drizzle-orm";
import { SQLiteTransaction } from "drizzle-orm/sqlite-core";

import * as schema from "./schema";

export { db } from "./drizzle";
export * as schema from "./schema";
export { SqliteError } from "better-sqlite3";

// This is exported here to avoid leaking better-sqlite types outside of this package.
export type HoarderDBTransaction = SQLiteTransaction<
"sync",
Database.RunResult,
typeof schema,
ExtractTablesWithRelations<typeof schema>
>;
50 changes: 17 additions & 33 deletions packages/trpc/routers/bookmarks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,39 +73,23 @@ export const ensureBookmarkOwnership = experimental_trpcMiddleware<{
return opts.next();
});

function assetTypeToBookmarkField(
asset:
| {
id: string;
assetType: AssetTypes;
}
| undefined,
) {
if (!asset) {
return undefined;
}
switch (asset.assetType) {
case AssetTypes.LINK_SCREENSHOT:
return { screenshotAssetId: asset.id };
case AssetTypes.LINK_FULL_PAGE_ARCHIVE:
return { fullPageArchiveAssetId: asset.id };
case AssetTypes.LINK_BANNER_IMAGE:
return { imageAssetId: asset.id };
}
interface Asset {
id: string;
assetType: AssetTypes;
}

function getBookmarkAssets(assets: { id: string; assetType: AssetTypes }[]) {
return {
...assetTypeToBookmarkField(
assets.find((a) => a.assetType == AssetTypes.LINK_SCREENSHOT),
),
...assetTypeToBookmarkField(
assets.find((a) => a.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE),
),
...assetTypeToBookmarkField(
assets.find((a) => a.assetType == AssetTypes.LINK_BANNER_IMAGE),
),
};
const ASSET_TYE_MAPPING: Record<AssetTypes, string> = {
[AssetTypes.LINK_SCREENSHOT]: "screenshotAssetId",
[AssetTypes.LINK_FULL_PAGE_ARCHIVE]: "fullPageArchiveAssetId",
[AssetTypes.LINK_BANNER_IMAGE]: "imageAssetId",
};

function mapAssetsToBookmarkFields(assets: Asset | Asset[] = []) {
const assetsArray = Array.isArray(assets) ? assets : [assets];
return assetsArray.reduce((result: Record<string, string>, asset: Asset) => {
result[ASSET_TYE_MAPPING[asset.assetType]] = asset.id;
return result;
}, {});
}

async function getBookmark(ctx: AuthedContext, bookmarkId: string) {
Expand Down Expand Up @@ -196,7 +180,7 @@ function toZodSchema(bookmark: BookmarkQueryReturnType): ZBookmark {
if (link) {
content = {
type: "link",
...getBookmarkAssets(assets),
...mapAssetsToBookmarkFields(assets),
...link,
};
} else if (text) {
Expand Down Expand Up @@ -616,7 +600,7 @@ export const bookmarksAppRouter = router({
if (row.assets) {
acc[bookmarkId].content = {
...acc[bookmarkId].content,
...assetTypeToBookmarkField(row.assets),
...mapAssetsToBookmarkFields(row.assets),
};
}

Expand Down

0 comments on commit ccbff18

Please sign in to comment.