From 0646bf44fc446c1c851af9cde6f3940128a873dc Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Wed, 10 Jul 2024 16:54:49 +0200 Subject: [PATCH] database/delete-projects: expand scope --- .../database/postgres/bulk-delete.test.ts | 3 + src/packages/database/postgres/bulk-delete.ts | 11 ++- .../database/postgres/delete-projects.ts | 91 ++++++++++++++++--- 3 files changed, 88 insertions(+), 17 deletions(-) diff --git a/src/packages/database/postgres/bulk-delete.test.ts b/src/packages/database/postgres/bulk-delete.test.ts index 423c10b0091..acbbfff0bbc 100644 --- a/src/packages/database/postgres/bulk-delete.test.ts +++ b/src/packages/database/postgres/bulk-delete.test.ts @@ -3,6 +3,9 @@ * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ +// see packages/database/pool/pool.ts for where this name is also hard coded: +process.env.PGDATABASE = "smc_ephemeral_testing_database"; + import getPool, { initEphemeralDatabase } from "@cocalc/database/pool"; import { uuid } from "@cocalc/util/misc"; import { bulk_delete } from "./bulk-delete"; diff --git a/src/packages/database/postgres/bulk-delete.ts b/src/packages/database/postgres/bulk-delete.ts index b6b38490a40..098f73cafd1 100644 --- a/src/packages/database/postgres/bulk-delete.ts +++ b/src/packages/database/postgres/bulk-delete.ts @@ -1,14 +1,17 @@ -// see packages/database/pool/pool.ts for where this name is also hard coded: -process.env.PGDATABASE = "smc_ephemeral_testing_database"; - import { escapeIdentifier } from "pg"; import getPool from "@cocalc/database/pool"; import { SCHEMA } from "@cocalc/util/schema"; +type Field = + | "project_id" + | "account_id" + | "target_project_id" + | "source_project_id"; + interface Opts { table: string; // e.g. project_log, etc. - field: "project_id" | "account_id"; // for now, we only support a few + field: Field; // for now, we only support a few id?: string; // default "id", the ID field in the table, which identifies each row uniquely value: string; // a UUID limit?: number; // default 1024 diff --git a/src/packages/database/postgres/delete-projects.ts b/src/packages/database/postgres/delete-projects.ts index 8e88c8bf66b..a6ff0944043 100644 --- a/src/packages/database/postgres/delete-projects.ts +++ b/src/packages/database/postgres/delete-projects.ts @@ -9,11 +9,12 @@ Code related to permanently deleting projects. import getLogger from "@cocalc/backend/logger"; import getPool from "@cocalc/database/pool"; -import { callback2 } from "@cocalc/util/async-utils"; -import { PostgreSQL } from "./types"; -import { minutes_ago } from "@cocalc/util/misc"; import { getServerSettings } from "@cocalc/database/settings"; +import { callback2 } from "@cocalc/util/async-utils"; import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults"; +import { minutes_ago } from "@cocalc/util/misc"; +import { bulk_delete } from "./bulk-delete"; +import { PostgreSQL } from "./types"; const log = getLogger("db:delete-projects"); @@ -84,6 +85,8 @@ FROM projects as p ON p.project_id = s.project_id WHERE p.deleted = true AND p.state ->> 'state' != 'deleted' +ORDER BY + p.project_id, s.string_id `; /* @@ -102,6 +105,8 @@ export async function cleanup_old_projects_data( ) { const settings = await getServerSettings(); const on_prem = settings.kucalc === KUCALC_ON_PREMISES; + const L0 = log.extend("cleanup_old_projects_data"); + const L = L0.debug; log.debug("cleanup_old_projects_data", { delay_ms, max_run_m, on_prem }); const start_ts = new Date(); @@ -115,31 +120,91 @@ export async function cleanup_old_projects_data( for (const row of rows) { const { project_id, string_id } = row; if (start_ts < minutes_ago(max_run_m)) { - log.debug( - `cleanup_old_projects_data: too much time elapsed, breaking after ${num} syncstrings`, - ); + L(`too much time elapsed, breaking after ${num} syncstrings`); break; } - log.debug( - `cleanup_old_projects_data: deleting syncstring ${project_id}/${string_id}`, - ); + L(`deleting syncstring ${project_id}/${string_id}`); num += 1; await callback2(db.delete_syncstring, { string_id }); // wait for the given amount of delay_ms millio seconds await new Promise((done) => setTimeout(done, delay_ms)); + // Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes if (pid != project_id) { pid = project_id; + const L2 = L0.extend(project_id).debug; + if (on_prem) { - log.debug( - `cleanup_old_projects_data: deleting project data in ${project_id}`, - ); + L2(`cleanup_old_projects_data for project_id=${project_id}`); // TODO: this only works on-prem, and requires the project files to be mounted - log.debug(`deleting all shared files in project ${project_id}`); + L2(`deleting all shared files in project ${project_id}`); // TODO: do it directly like above, and also get rid of all those shares in the database + + const delPublicPaths = await bulk_delete({ + table: "public_paths", + field: "project_id", + value: project_id, + }); + L2(`deleted public_paths ${delPublicPaths.rowsDeleted} entries`); + + const delProjectLog = await bulk_delete({ + table: "project_log", + field: "project_id", + value: project_id, + }); + L2(`deleted project_log ${delProjectLog.rowsDeleted} entries`); + + const delFileUse = await bulk_delete({ + table: "file_use", + field: "project_id", + value: project_id, + }); + L2(`deleted file_use ${delFileUse.rowsDeleted} entries`); + + const delAccessLog = await bulk_delete({ + table: "file_access_log", + field: "project_id", + value: project_id, + }); + L2(`deleted file_access_log ${delAccessLog.rowsDeleted} entries`); + + const delJupyterApiLog = await bulk_delete({ + table: "jupyter_api_log", + field: "project_id", + value: project_id, + }); + L2(`deleted jupyter_api_log ${delJupyterApiLog.rowsDeleted} entries`); + + for (const field of [ + "target_project_id", + "source_project_id", + ] as const) { + const delCopyPaths = await bulk_delete({ + table: "copy_paths", + field, + value: project_id, + }); + L2(`deleted copy_paths/${field} ${delCopyPaths.rowsDeleted} entries`); + } + + const delListings = await bulk_delete({ + table: "listings", + field: "project_id", + id: "project_id", // TODO listings has a more complex ID, is this a problem? + value: project_id, + }); + L2(`deleted ${delListings.rowsDeleted} listings`); + + const delInviteTokens = await bulk_delete({ + table: "project_invite_tokens", + field: "project_id", + value: project_id, + id: "token", + }); + L2(`deleted ${delInviteTokens.rowsDeleted} entries`); } // now, that we're done with that project, mark it as state.state ->> 'deleted'