Skip to content

Commit

Permalink
refactor: add pre-commit hooks to verify date consistancy (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
j0rd1smit authored Jul 8, 2024
1 parent e0732e5 commit 02ac038
Show file tree
Hide file tree
Showing 12 changed files with 123 additions and 5 deletions.
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,15 @@ repos:
- id: markdownlint-fix
name: Format Markdown files
exclude: ^utils

- repo: local
hooks:
- id: verify_data_matches_folder
name: verify_data_matches_folder
files: ^content/(blog|til)/.*/index\.md$
stages: [commit]
language: python
additional_dependencies:
- python-frontmatter==1.1.0
args: []
entry: python scripts/verify_data_matches_folder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
title: "DIY auto-grad Engine: A Step-by-Step Guide to Calculating Derivatives Automatically"
description: "Want to understand the magic of Jax, PyTorch, and TensorFlow auto-grad engines? The best to learn is to build your DIY version from scratch in Python."
date: 2022-12-22TT16:37:27+01:00
date: 2022-11-06T16:37:27+01:00
publishdate: 2022-12-22T16:37:27+01:00
tags:
- python
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
title: "How to avoid orientation bugs in Computer Vision labeling?"
description: "Image orientation metadata can cause strange bugs. This post teaches you how to identify these bugs and how to fix them."
date: 2024-04-13T19:54:44+02:00
date: 2024-03-13T19:54:44+02:00
publishdate: 2024-03-13T19:54:44+02:00
tags:
- computer vision
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
title: "How to Upload (Pre)-Annotations to Roboflow"
description: "Learn how to upload annotations and predictions to Roboflow using the Python SDK, allowing you to import pre-existing datasets or create active learning loops programmatically."
date: 2024-06-13T21:44:10+02:00
publishdate: 2024-06-13T21:44:10+02:00
date: 2024-04-03T21:44:10+02:00
publishdate: 2024-04-03T21:44:10+02:00
tags:
- computer vision
- labeling
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
invoke
invoke
python-frontmatter
105 changes: 105 additions & 0 deletions scripts/verify_data_matches_folder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import datetime
from argparse import Namespace, ArgumentParser
from pathlib import Path
import re
import sys

import frontmatter


def main() -> None:
args = parse_args()
paths = args.paths

errors_per_path = {}

for path in paths:
if not path.exists() or path.is_dir() or path.suffix != ".md":
continue

errors = []
folder_date = _get_folder_data(path)
publish_date = _get_publish_date(path)
creation_data = _get_data(path)



if folder_date is None:
errors.append(f"Folder name does not contain a date: {path.parent.name}")

if publish_date is None:
errors.append("Publish date not found in metadata")

if creation_data is None:
errors.append("Creation date not found in metadata")

if folder_date != creation_data:
errors.append(f"Folder date ({folder_date}) does not match creation date ({creation_data})")


if publish_date is not None and creation_data is not None:
if creation_data > publish_date:
errors.append(f"Publish date ({publish_date}) is before creation date ({creation_data})")

if len(errors) > 0:
errors_per_path[path] = errors

if len(errors_per_path) > 0:
for path, errors in errors_per_path.items():
print(f"Errors in {path}")
for error in errors:
print(f"- {error}")
sys.exit(1)


def parse_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("paths", type=Path, nargs='+', help="Paths to the files to verify")

return parser.parse_args()

def _get_folder_data(path: Path) -> datetime.date:
folder_name = path.parent.name

match = re.match(r"(\d{4}-\d{2}-\d{2})", folder_name)
if match is None:
raise None

return datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date()

def _get_publish_date(path: Path) -> datetime.date:
assert path.suffix == ".md", "File must be a markdown file"

with path.open() as file:
meta_data = frontmatter.load(file)


if "publishdate" in meta_data:
try:
return _parse_metadata_date(meta_data["publishdate"]).date()
except Exception:
return None


raise None

def _get_data(path: Path) -> datetime.date:
with path.open() as file:
meta_data = frontmatter.load(file)

if "date" in meta_data:
try:
return _parse_metadata_date(meta_data["date"]).date()
except Exception:
return None


raise None

def _parse_metadata_date(date: str|datetime.datetime) -> datetime.datetime:
if isinstance(date, datetime.datetime):
return date
return datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z")

if __name__ == "__main__":
main()

0 comments on commit 02ac038

Please sign in to comment.