-
-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts to misc/ for computing and applying diffs of mypy caches (#…
…8906) This can (with some infrastructure) allow for much faster distribution of cache artifacts.
- Loading branch information
Showing
2 changed files
with
207 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/usr/bin/env python3 | ||
"""Script for applying a cache diff. | ||
With some infrastructure, this can allow for distributing small cache diffs to users in | ||
many cases instead of full cache artifacts. | ||
""" | ||
|
||
import argparse | ||
import json | ||
import os | ||
import sys | ||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
|
||
from mypy.metastore import MetadataStore, FilesystemMetadataStore, SqliteMetadataStore | ||
|
||
|
||
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore: | ||
if sqlite: | ||
return SqliteMetadataStore(input_dir) | ||
else: | ||
return FilesystemMetadataStore(input_dir) | ||
|
||
|
||
def apply_diff(cache_dir: str, diff_file: str, sqlite: bool = False) -> None: | ||
cache = make_cache(cache_dir, sqlite) | ||
with open(diff_file, "r") as f: | ||
diff = json.load(f) | ||
|
||
old_deps = json.loads(cache.read("@deps.meta.json")) | ||
|
||
for file, data in diff.items(): | ||
if data is None: | ||
cache.remove(file) | ||
else: | ||
cache.write(file, data) | ||
if file.endswith('.meta.json') and "@deps" not in file: | ||
meta = json.loads(data) | ||
old_deps["snapshot"][meta["id"]] = meta["hash"] | ||
|
||
cache.write("@deps.meta.json", json.dumps(old_deps)) | ||
|
||
cache.commit() | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--sqlite', action='store_true', default=False, | ||
help='Use a sqlite cache') | ||
parser.add_argument('cache_dir', | ||
help="Directory for the cache") | ||
parser.add_argument('diff', | ||
help="Cache diff file") | ||
args = parser.parse_args() | ||
|
||
apply_diff(args.cache_dir, args.diff, args.sqlite) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
#!/usr/bin/env python3 | ||
"""Produce a diff between mypy caches. | ||
With some infrastructure, this can allow for distributing small cache diffs to users in | ||
many cases instead of full cache artifacts. | ||
""" | ||
|
||
import argparse | ||
import json | ||
import os | ||
import sys | ||
|
||
from collections import defaultdict | ||
from typing import Any, Dict, Optional, Set | ||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
|
||
from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore | ||
|
||
|
||
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore: | ||
if sqlite: | ||
return SqliteMetadataStore(input_dir) | ||
else: | ||
return FilesystemMetadataStore(input_dir) | ||
|
||
|
||
def merge_deps(all: Dict[str, Set[str]], new: Dict[str, Set[str]]) -> None: | ||
for k, v in new.items(): | ||
all.setdefault(k, set()).update(v) | ||
|
||
|
||
def load(cache: MetadataStore, s: str) -> Any: | ||
data = cache.read(s) | ||
obj = json.loads(data) | ||
if s.endswith(".meta.json"): | ||
# For meta files, zero out the mtimes and sort the | ||
# dependencies to avoid spurious conflicts | ||
obj["mtime"] = 0 | ||
obj["data_mtime"] = 0 | ||
if "dependencies" in obj: | ||
all_deps = obj["dependencies"] + obj["suppressed"] | ||
num_deps = len(obj["dependencies"]) | ||
thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"])) | ||
|
||
def unzip(x: Any) -> Any: | ||
return zip(*x) if x else ((), (), ()) | ||
|
||
obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps])) | ||
obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:])) | ||
obj["dep_prios"] = prios1 + prios2 | ||
obj["dep_lines"] = lines1 + lines2 | ||
if s.endswith(".deps.json"): | ||
# For deps files, sort the deps to avoid spurious mismatches | ||
for v in obj.values(): | ||
v.sort() | ||
return obj | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--verbose", action="store_true", default=False, help="Increase verbosity" | ||
) | ||
parser.add_argument( | ||
"--sqlite", action="store_true", default=False, help="Use a sqlite cache" | ||
) | ||
parser.add_argument("input_dir1", help="Input directory for the cache") | ||
parser.add_argument("input_dir2", help="Input directory for the cache") | ||
parser.add_argument("output", help="Output file") | ||
args = parser.parse_args() | ||
|
||
cache1 = make_cache(args.input_dir1, args.sqlite) | ||
cache2 = make_cache(args.input_dir2, args.sqlite) | ||
|
||
type_misses: Dict[str, int] = defaultdict(int) | ||
type_hits: Dict[str, int] = defaultdict(int) | ||
|
||
updates: Dict[str, Optional[str]] = {} | ||
|
||
deps1: Dict[str, Set[str]] = {} | ||
deps2: Dict[str, Set[str]] = {} | ||
|
||
misses = hits = 0 | ||
cache1_all = list(cache1.list_all()) | ||
for s in cache1_all: | ||
obj1 = load(cache1, s) | ||
try: | ||
obj2 = load(cache2, s) | ||
except FileNotFoundError: | ||
obj2 = None | ||
|
||
typ = s.split(".")[-2] | ||
if obj1 != obj2: | ||
misses += 1 | ||
type_misses[typ] += 1 | ||
|
||
# Collect the dependencies instead of including them directly in the diff | ||
# so we can produce a much smaller direct diff of them. | ||
if ".deps." not in s: | ||
if obj2 is not None: | ||
updates[s] = json.dumps(obj2) | ||
else: | ||
updates[s] = None | ||
elif obj2: | ||
merge_deps(deps1, obj1) | ||
merge_deps(deps2, obj2) | ||
else: | ||
hits += 1 | ||
type_hits[typ] += 1 | ||
|
||
cache1_all_set = set(cache1_all) | ||
for s in cache2.list_all(): | ||
if s not in cache1_all_set: | ||
updates[s] = cache2.read(s) | ||
|
||
# Compute what deps have been added and merge them all into the | ||
# @root deps file. | ||
new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2} | ||
new_deps = {k: v for k, v in new_deps.items() if v} | ||
try: | ||
root_deps = load(cache1, "@root.deps.json") | ||
except FileNotFoundError: | ||
root_deps = {} | ||
merge_deps(new_deps, root_deps) | ||
|
||
new_deps_json = {k: list(v) for k, v in new_deps.items() if v} | ||
updates["@root.deps.json"] = json.dumps(new_deps_json) | ||
|
||
# Drop updates to deps.meta.json for size reasons. The diff | ||
# applier will manually fix it up. | ||
updates.pop("./@deps.meta.json", None) | ||
updates.pop("@deps.meta.json", None) | ||
|
||
### | ||
|
||
print("Generated incremental cache:", hits, "hits,", misses, "misses") | ||
if args.verbose: | ||
print("hits", type_hits) | ||
print("misses", type_misses) | ||
|
||
with open(args.output, "w") as f: | ||
json.dump(updates, f) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |