-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.ts
57 lines (50 loc) · 1.66 KB
/
main.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import {
DOMParser,
Element,
} from "https://deno.land/x/deno_dom/deno-dom-wasm.ts";
import { download } from "https://deno.land/x/download/mod.ts";
async function main() {
const collectionPageResponse = await fetch(
"https://www.gov.uk/government/collections/ministers-transparency-publications",
);
const collectionPageHtml = await collectionPageResponse.text();
const parser = new DOMParser();
const collectionPageDocument = parser.parseFromString(
collectionPageHtml,
"text/html",
);
const transparencyDataLinkElements = collectionPageDocument?.querySelectorAll(
"a.gem-c-document-list__item-title",
) as Element[] | undefined;
await Deno.mkdir("./output");
for (const linkElement of transparencyDataLinkElements || []) {
const url = linkElement.getAttribute("href");
const transparencyDataPageResponse = await fetch(
`https://www.gov.uk${url}`,
);
const transparencyDataPageHtml = await transparencyDataPageResponse
.text();
const transparencyDataPageDocument = parser.parseFromString(
transparencyDataPageHtml,
"text/html",
);
const csvDownloadUrlElements = transparencyDataPageDocument
?.querySelectorAll(
'a[href$=".csv"]',
) as Element[] | undefined;
console.log(
transparencyDataPageDocument?.querySelector(".gem-c-title__text")
?.textContent,
);
for (const csvDownloadUrlElement of csvDownloadUrlElements || []) {
const csvDownloadUrl = csvDownloadUrlElement.getAttribute("href");
if (csvDownloadUrl === null) {
continue;
}
await download(csvDownloadUrl, {
dir: "./output",
});
}
}
}
main();