forked from simonw/til
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_database.py
104 lines (96 loc) · 3.49 KB
/
build_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from datetime import timezone
import httpx
import git
import os
import pathlib
from urllib.parse import urlencode
import sqlite_utils
from sqlite_utils.db import NotFoundError
import time
root = pathlib.Path(__file__).parent.resolve()
def created_changed_times(repo_path, ref="main"):
created_changed_times = {}
repo = git.Repo(repo_path, odbt=git.GitDB)
commits = reversed(list(repo.iter_commits(ref)))
for commit in commits:
dt = commit.committed_datetime
affected_files = list(commit.stats.files.keys())
for filepath in affected_files:
if filepath not in created_changed_times:
created_changed_times[filepath] = {
"created": dt.isoformat(),
"created_utc": dt.astimezone(timezone.utc).isoformat(),
}
created_changed_times[filepath].update(
{
"updated": dt.isoformat(),
"updated_utc": dt.astimezone(timezone.utc).isoformat(),
}
)
return created_changed_times
def build_database(repo_path):
all_times = created_changed_times(repo_path)
db = sqlite_utils.Database(repo_path / "tils.db")
table = db.table("til", pk="path")
for filepath in root.glob("*/*.md"):
fp = filepath.open()
title = fp.readline().lstrip("#").strip()
body = fp.read().strip()
path = str(filepath.relative_to(root))
slug = filepath.stem
url = "https://github.com/simonw/til/blob/main/{}".format(path)
# Do we need to render the markdown?
path_slug = path.replace("/", "_")
try:
row = table.get(path_slug)
previous_body = row["body"]
previous_html = row["html"]
except (NotFoundError, KeyError):
previous_body = None
previous_html = None
record = {
"path": path_slug,
"slug": slug,
"topic": path.split("/")[0],
"title": title,
"url": url,
"body": body,
}
if (body != previous_body) or not previous_html:
retries = 0
response = None
while retries < 3:
headers = {}
if os.environ.get("GITHUB_TOKEN"):
headers = {
"authorization": "Bearer {}".format(os.environ["GITHUB_TOKEN"])
}
response = httpx.post(
"https://api.github.com/markdown",
json={
# mode=gfm would expand #13 issue links and suchlike
"mode": "markdown",
"text": body,
},
headers=headers,
)
if response.status_code == 200:
record["html"] = response.text
print("Rendered HTML for {}".format(path))
break
else:
print(" sleeping 60s")
time.sleep(60)
retries += 1
else:
assert False, "Could not render {} - last response was {}".format(
path, response.headers
)
record.update(all_times[path])
with db.conn:
table.upsert(record, alter=True)
table.enable_fts(
["title", "body"], tokenize="porter", create_triggers=True, replace=True
)
if __name__ == "__main__":
build_database(root)