add clustering to CI #232
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: e2etest | |
on: [push] | |
jobs: | |
geocoder: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/setup | |
- name: Expand geocache | |
run: | | |
tar -xzf geocache.tgz | |
- name: Run geocoder | |
run: | | |
PYTHONPATH=. poetry run oldnyc/geocode/geocode.py --ids_filter test/random200-ids.txt --output_format id-location.txt --geocode --no-progress-bar > >(tee test/random200-geocoded.txt) 2> >(tee test/random200.logs.txt >&2) | |
# See https://stackoverflow.com/a/692407/388951 for the stdout/stderr redirection | |
- name: Generate intersections | |
run: | | |
export PYTHONPATH=. | |
poetry run python oldnyc/geocode/historic_grid.py | |
poetry run python oldnyc/geocode/osm/generate_intersections.py | |
- name: Generate truth data | |
run: | | |
export PYTHONPATH=. | |
poetry run oldnyc/geocode/geocode.py --output_format geojson --ids_filter data/geocode/random500-ids.txt --geocode > /tmp/images.geojson | |
poetry run oldnyc/geocode/truth/make_localturk_csv.py data/geocode/random500-ids.txt /tmp/images.geojson data/geocode/random500.csv | |
# We don't actually care about the diff on this file, just that make_localturk_csv.py doesn't error out. | |
git checkout data/geocode/random500.csv | |
poetry run oldnyc/geocode/truth/generate_truth_gtjson.py > data/geocode/truth.geojson | |
jq -r '.features[].id' data/geocode/truth.geojson > data/geocode/truth-ids.txt | |
poetry run python oldnyc/geocode/subjects/csv_to_geojson.py data/subjects/out.csv > data/subjects.geojson | |
- name: Check performance on truth data | |
run: | | |
export PYTHONPATH=. | |
poetry run oldnyc/geocode/geocode.py --ids_filter data/geocode/truth-ids.txt --output_format geojson --geocode --no-progress-bar > /tmp/actual.geojson | |
poetry run oldnyc/geocode/calculate_metrics.py --stats_only --truth_data data/geocode/truth.geojson --computed_data /tmp/actual.geojson > test/geocode-performance.txt | |
- name: Clustering | |
run: | | |
export PYTHONPATH=. | |
poetry run oldnyc/geocode/geocode.py --images_ndjson data/images.ndjson --output_format geojson --geocode > /tmp/images.geojson | |
poetry run python oldnyc/geocode/cluster.py /tmp/images.geojson > data/lat-lon-map.txt | |
- name: Check for diffs | |
run: | | |
git diff --exit-code test/ data/ | |
cropper: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/setup | |
- name: Run cropper | |
run: | | |
poetry run oldnyc/crop/crop_to_text.py --beta 2 --overwrite test/721675b.jpg | |
poetry run oldnyc/crop/crop_to_text.py --beta 2 --overwrite --border_only --output_pattern '%s.border.jpg' test/721675b.jpg | |
- name: Run photo detector | |
run: | | |
poetry run oldnyc/crop/find_pictures.py test/*f.jpg > test/detected-photos.ndjson | |
- name: Check for diffs | |
run: | | |
git diff --exit-code test/ | |
generation: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/setup | |
- name: Clone web site | |
run: | | |
cd .. | |
git clone https://github.com/oldnyc/oldnyc.github.io.git | |
- name: Run ingestion | |
run: | | |
PYTHONPATH=. poetry run oldnyc/ingest/ingest.py | |
- name: Check for diffs | |
run: | | |
git diff --exit-code data/ | |
- name: Split records to photos | |
run: | | |
export PYTHONPATH=. | |
poetry run oldnyc/crop/records_to_photos.py data/images.ndjson data/crops.ndjson data/photos.ndjson | |
# TODO: remove the OCR shootout after the dust settles, it only really needs to be work once. | |
- name: Run OCR shootout | |
run: | | |
PYTHONPATH=. poetry run oldnyc/ocr/ocr_shootout.py | |
- name: Check for diffs | |
run: | | |
git diff --exit-code data/ | |
- name: Generate geocodes | |
run: | | |
export PYTHONPATH=. | |
tar -xzf geocache.tgz | |
poetry run oldnyc/geocode/geocode.py --lat_lon_map data/lat-lon-map.txt --output_format lat-lon-to-ids.json --geocode --no-progress-bar > data/lat-lon-to-ids.json 2> >(tee >( sed -n '/Finalizing/,$p' > test/geocoding-stats.txt) >&2) | |
- name: Generate static site | |
run: | | |
export PYTHONPATH=. | |
echo '{"fixes": {}}' > data/feedback/fixes.json | |
poetry run oldnyc/site/generate_static_site.py --leave-timestamps-unchanged 2> >(tee test/site-stats.txt >&2) | |
- name: Check for diffs | |
run: | | |
git diff --exit-code test/ | |
cd ../oldnyc.github.io | |
git diff --exit-code |