Skip to content

Commit

Permalink
database/cleanup: WIP bulk delete
Browse files Browse the repository at this point in the history
  • Loading branch information
haraldschilly committed Jul 10, 2024
1 parent b546a4d commit 3bcabeb
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 0 deletions.
49 changes: 49 additions & 0 deletions src/packages/database/postgres/bulk-delete.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* This file is part of CoCalc: Copyright © 2024 Sagemath, Inc.
* License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
*/

import getPool, { initEphemeralDatabase } from "@cocalc/database/pool";
import { uuid } from "@cocalc/util/misc";
import { bulk_delete } from "./bulk-delete";

beforeAll(async () => {
await initEphemeralDatabase();
}, 15000);

afterAll(async () => {
await getPool().end();
});

describe("bulk delete", () => {
test("deleting projects", async () => {
const p = getPool();
const project_id = uuid();
const N = 2000;
for (let i = 0; i < N; i++) {
await p.query(
"INSERT INTO project_log (id, project_id, time) VALUES($1::UUID, $2::UUID, $3::TIMESTAMP)",
[uuid(), project_id, new Date()],
);
}

const num1 = await p.query(
"SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1",
[project_id],
);
expect(num1.rows[0].num).toEqual(N);

await bulk_delete({
table: "project_log",
field: "project_id",
value: project_id,
limit: 100,
});

const num2 = await p.query(
"SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1",
[project_id],
);
expect(num2.rows[0].num).toEqual(0);
});
});
73 changes: 73 additions & 0 deletions src/packages/database/postgres/bulk-delete.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { escapeIdentifier } from "pg";

import getPool from "@cocalc/database/pool";
import { SCHEMA } from "@cocalc/util/schema";

interface Opts {
table: string;
field: "project_id" | "account_id"; // for now, we only support a few
value: string; // a UUID
limit?: number;
}

type Ret = Promise<{
rowsDeleted: number;
durationS: number;
}>;

function deleteQuery(table: string, field: string) {
const T = escapeIdentifier(table);
const F = escapeIdentifier(field);

return `
DELETE FROM ${T}
WHERE ${F} IN (
SELECT ${F} FROM ${T} WHERE ${F} = $1 LIMIT $2
)
RETURNING 1
`;
}

export async function bulk_delete(opts: Opts): Ret {
const { table, field, value } = opts;
let { limit = 1000 } = opts;
// assert table name is a key in SCHEMA
if (!(table in SCHEMA)) {
throw new Error(`table ${table} does not exist`);
}

const q = deleteQuery(table, field);
console.log(q);
console.log(opts);

const pool = getPool();

const start_ts = Date.now();
let rowsDeleted = 0;

while (true) {
const t0 = Date.now();
const ret = await pool.query(q, [value, limit]);
const td = Date.now() - t0;
rowsDeleted += ret.rowCount ?? 0;

// adjust the limit
const next = Math.round(
td > 0.1 ? limit / 2 : td < 0.05 ? limit * 2 : limit,
);
limit = Math.max(1, Math.min(10000, next));

// wait for a bit, but not more than 1 second ~ this aims for a max utilization of 10%
const wait_ms = Math.min(1000, td * 10);
await new Promise((done) => setTimeout(done, wait_ms));

console.log(
`loop: deleted ${ret.rowCount} | wait=${wait_ms} | limit=${limit}`,
);

if (ret.rowCount === 0) break;
}

const durationS = (Date.now() - start_ts) / 1000;
return { durationS, rowsDeleted };
}

0 comments on commit 3bcabeb

Please sign in to comment.