Skip to content

Commit

Permalink
Add pytest, static test file
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrandt00 committed Nov 16, 2024
1 parent 642758d commit 27a2d50
Show file tree
Hide file tree
Showing 10 changed files with 2,375 additions and 0 deletions.
1 change: 1 addition & 0 deletions data_ingest/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tests/scrape_responses/*.html linguist-generated
1 change: 1 addition & 0 deletions data_ingest/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
venv/*
__pycache__/
1 change: 1 addition & 0 deletions data_ingest/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
Empty file added data_ingest/README.md
Empty file.
2 changes: 2 additions & 0 deletions data_ingest/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if __name__ == "__main__":
print("starting import")
13 changes: 13 additions & 0 deletions data_ingest/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[project]
name = "data-ingest"
version = "0.1.0"
description = "Scripts to scrape data and insert into database"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"imslp>=0.2.3",
"pytest>=8.3.3",
"ruff>=0.7.4",
]
[tool.pytest.ini_options]
pythonpath = ["."]
2,014 changes: 2,014 additions & 0 deletions data_ingest/tests/scrape_responses/pieces.html

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions data_ingest/tests/test_parse_movements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from bs4 import BeautifulSoup

from utils import parse_movements


def test_parse_movements():
with open("tests/scrape_responses/pieces.html", "r") as file:
html_content = file.read()
soup = BeautifulSoup(html_content, "html.parser")
general_info_div = soup.find("div", class_="wi_body")
if general_info_div is None:
raise ValueError("Could not find 'div' with class 'wi_body'")
result = parse_movements(general_info_div)
assert isinstance( result, list)
assert set(result[0].keys()) == ['name', 'number']
64 changes: 64 additions & 0 deletions data_ingest/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import re

from bs4 import NavigableString, Tag


def parse_movements(data: Tag | NavigableString):
movements = []

print(f"data: {data}")
movement_list = data.find("ol")
if movement_list:
print("movement list")
for index, li in enumerate(movement_list.find_all("li")):
line = li.get_text(strip=True).replace("\xa0", " ")
number = 3

if "(" in line and line.endswith(")"):
name, key_signature = line.rsplit("(", 1)
key_signature = key_signature.rstrip(")").replace("\xa0", " ")
movements.append(
{
"type": "movement",
"number": number,
"name": name.strip(),
"key_signature": key_signature.strip(),
}
)
else:
movements.append(
{
"type": "movement",
"number": number,
"name": line.strip(),
"key_signature": None,
}
)
piece_list = data.find("dl")
if piece_list:
for index, dd in enumerate(piece_list.find_all("dd")):
line = dd.get_text(strip=True).replace("\xa0", " ")
number = 3

if "(" in line and line.endswith(")"):
name, key_signature = line.rsplit("(", 1)
key_signature = key_signature.rstrip(")").replace("\xa0", " ")
movements.append(
{
"type": "piece",
"number": number,
"name": name.strip(),
"key_signature": key_signature.strip(),
}
)
else:
movements.append(
{
"type": "piece",
"number": number,
"name": line.strip(),
"key_signature": None,
}
)
print(f"movements {movements}")
return movements
264 changes: 264 additions & 0 deletions data_ingest/uv.lock

Large diffs are not rendered by default.

0 comments on commit 27a2d50

Please sign in to comment.