Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New export management commands #804

Merged
merged 24 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ca5ba7c
minimalist export_creators
quadrismegistus Jan 30, 2024
ed2385c
minor
quadrismegistus Jan 30, 2024
901a9b9
more flexibly subclassing
quadrismegistus Feb 1, 2024
5bf80b4
more flexibly subclassing (m)
quadrismegistus Feb 1, 2024
bb362ff
more flexibly subclassing (m)
quadrismegistus Feb 1, 2024
046db90
more flexibly subclassing (m)
quadrismegistus Feb 1, 2024
cbc3578
forgot distinct in queryset
quadrismegistus Feb 1, 2024
7fc8279
books
quadrismegistus Feb 7, 2024
0db71e6
these 3 working
quadrismegistus Feb 7, 2024
7f207a5
locations working
quadrismegistus Feb 7, 2024
46da0d8
Update mep/people/management/commands/export_locations.py
quadrismegistus Feb 8, 2024
3eb1121
Update mep/people/management/commands/export_locations.py
quadrismegistus Feb 8, 2024
bf3cdec
Update mep/people/management/commands/export_locations.py
quadrismegistus Feb 8, 2024
5c3ccff
cleanup
quadrismegistus Feb 9, 2024
3cb1c64
cleanup 2
quadrismegistus Feb 9, 2024
88c2d0e
book tests
quadrismegistus Feb 9, 2024
f11f0c2
other tests
quadrismegistus Feb 9, 2024
7fd0853
other tests (cleanup)
quadrismegistus Feb 9, 2024
5c7600c
version with default get_object_data logic
quadrismegistus Feb 12, 2024
c1cbbbe
fixes to a few errors and loose ends
quadrismegistus Feb 20, 2024
cfa6f33
forgot .distinct()
quadrismegistus Feb 20, 2024
1381093
updated tests to match new export_locations logic
quadrismegistus Feb 20, 2024
5e3eb4c
Merge branch 'develop' into new_exports
quadrismegistus Feb 20, 2024
f70e772
adapting fixtures and tests to plural categories
quadrismegistus Feb 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions mep/accounts/management/commands/export_locations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Manage command to export location data for use by others.

Generates a CSV and JSON file including details on which member
of the library lived where (if known) during what time period
(if known). The table includes summary details and coordinates
for associated addresses.
"""

from django.db.models import Prefetch
from mep.common.management.export import BaseExport
from mep.common.utils import absolutize_url
from mep.accounts.models import Address


class Command(BaseExport):
"""Export member data."""

help = __doc__

model = Address

csv_fields = [
"member_id", # member slug
"member_uri",
"care_of_person_id", # person slug
"street_address",
"postal_code",
"city",
"arrondissement",
"country",
"start_date",
"end_date",
"longitude",
"latitude",
]

# def get_queryset(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so with the revised logic, no queryset customization is needed? or would prefetching on persons still be useful?

# """
# custom filter needed to return person-address combos,
# so we can pass a one object per row to `get_object_data`
# """
# addresses = Address.objects.prefetch_related(
# Prefetch("account"),
# Prefetch("person"),
# Prefetch("location"),
# )
# res = []
# for addr in addresses.all():
# persons = [addr.person] if addr.person else addr.account.persons.all()
# for person in persons:
# res.append((person, addr))
# return res

def get_base_filename(self):
"""set the filename to 'locations.csv'"""
return "locations"

def get_object_data(self, obj):
"""
Generate dictionary of data to export for a single
:class:`~mep.people.models.Person`
"""
addr = obj
loc = addr.location
persons = addr.account.persons.all()

# required properties
return dict(
# Member
member_id=[person.slug for person in persons],
member_uri=[
absolutize_url(person.get_absolute_url()) for person in persons
],
# Address data
start_date=addr.partial_start_date,
end_date=addr.partial_end_date,
care_of_person_id=addr.care_of_person.slug if addr.care_of_person else None,
# Location data
street_address=loc.street_address,
city=loc.city,
postal_code=loc.postal_code,
latitude=float(loc.latitude) if loc.latitude is not None else None,
longitude=float(loc.longitude) if loc.longitude is not None else None,
country=loc.country.name if loc.country else None,
arrondissement=loc.arrondissement(),
)
48 changes: 47 additions & 1 deletion mep/accounts/tests/test_accounts_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
export_events,
import_figgy_cards,
report_timegaps,
export_locations,
)
from mep.accounts.models import Account, Borrow, Event
from mep.accounts.models import Account, Borrow, Event, Address, Location
from mep.books.models import Creator, CreatorType
from mep.common.management.export import StreamArray
from mep.common.utils import absolutize_url
Expand Down Expand Up @@ -618,3 +619,48 @@ def test_command_line(self):
call_command("export_events", "-d", tempdir.name, "-m", 2, stdout=stdout)
# 2 objects (once each)
assert mock_get_obj_data.call_count == 2


class TestExportLocations(TestCase):
fixtures = ["sample_people"]

def setUp(self):
self.cmd = export_locations.Command()
self.cmd.stdout = StringIO()

def test_get_queryset(self):
# queryset should only include library members
member = Person.objects.get(pk=189) # francisque gay, member
location = Location.objects.get(pk=213)
address = Address.objects.get(pk=236)
qs = self.cmd.get_queryset()
people, addresses = zip(*qs)
assert member in set(people)
assert address in set(addresses)
assert address.location == location
assert member in set(address.account.persons.all())

def test_get_object_data(self):
# fetch some example people from fixture & call get_object_data
gay = Person.objects.get(name="Francisque Gay")
address = Address.objects.get(pk=236)
gay_data = self.cmd.get_object_data((gay, address))

# check some basic data

# slug is 'gay' in sample_people, 'gay-francisque' in db
assert gay_data["member_id"] == "gay"
assert gay_data["member_uri"] == "https://example.com/members/gay/"

# check addresses & coordinates
assert "3 Rue Garancière" == gay_data["street_address"]
assert "Paris" == gay_data["city"]
assert "France" == gay_data["country"]
assert 48.85101 == gay_data["latitude"]
assert 2.33590 == gay_data["longitude"]
assert "75006" == gay_data["postal_code"]
assert 6 == gay_data["arrondissement"]
assert gay_data["start_date"] == "1919-01-01"
assert gay_data["start_date"] == "1919-01-01"
assert gay_data["end_date"] == "1930-01-01"
assert gay_data["care_of_person_id"] == "hemingway"
20 changes: 20 additions & 0 deletions mep/books/fixtures/sample_works.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"sort_title": "Exit Eliza",
"year": 1912,
"slug": "exit-eliza",
"category_id":1,
"work_format": 1,
"uri": "http://www.worldcat.org/oclc/2777112",
"updated_at": "2020-03-27T13:26:15Z"
Expand All @@ -24,6 +25,7 @@
"sort_title": "Grimm Fairy Tales",
"year": null,
"uri": "",
"category_id":1,
"slug": "grimm-fairy-tales",
"work_format": 1,
"updated_at": "2020-03-27T13:26:15Z"
Expand All @@ -38,6 +40,7 @@
"title": "The Kreutzer sonata",
"sort_title": "Kreutzer sonata",
"year": null,
"category_id":1,
"work_format": 1,
"uri": "http://worldcat.org/entity/work/id/4918580916",
"slug": "kreutzer-sonata",
Expand All @@ -52,6 +55,7 @@
"mep_id": null,
"title": "Chronicle of my Life",
"sort_title": "Chronicle of my Life",
"category_id":2,
"year": null,
"uri": "",
"slug": "chronicle-life",
Expand All @@ -68,6 +72,7 @@
"mep_id": null,
"title": "Murder on the Blue Train",
"sort_title": "Murder on the Blue Train",
"category_id":1,
"year": null,
"uri": "",
"slug": "murder-blue-train",
Expand All @@ -88,6 +93,7 @@
"edition_uri": "http://www.worldcat.org/oclc/243873605",
"ebook_url": "http://example.com",
"work_format": 2,
"category_id":2,
"slug": "dial",
"updated_at": "2020-03-27T13:26:15Z"
}
Expand Down Expand Up @@ -269,5 +275,19 @@
"notes": "",
"uri": "http://schema.org/Periodical"
}
},
{
"model": "books.genre",
"pk": 1,
"fields": {
"name": "Fiction"
}
},
{
"model": "books.genre",
"pk": 2,
"fields": {
"name": "Nonfiction"
}
}
]
14 changes: 10 additions & 4 deletions mep/books/management/commands/export_books.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"""

from collections import OrderedDict

from django.db.models import F

from mep.books.models import CreatorType, Work
from mep.common.management.export import BaseExport
from mep.common.utils import absolutize_url
Expand All @@ -36,11 +34,14 @@ class Command(BaseExport):
# query the database at load time (but maybe only a problem for tests)

csv_fields = (
["uri", "title"]
# including "id" to store slug for exports,
# given not all exported entities have a URI
["id", "uri", "title"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pretty sure all books should have uris; did you encounter any that did not?

+ [creator.lower() for creator in creator_types]
+ [
"year",
"format",
"genre_category",
"uncertain",
"ebook_url",
"volumes_issues",
Expand Down Expand Up @@ -77,17 +78,23 @@ def get_object_data(self, work):
# required properties
data = OrderedDict(
[
("id", work.slug),
("uri", absolutize_url(work.get_absolute_url())),
("title", work.title),
]
)
data.update(self.creator_info(work))
if work.year:
data["year"] = work.year

# format is not currently set for all items
if work.work_format:
data["format"] = work.work_format.name

# genre category
if work.category:
data["genre_category"] = work.category.name

data["uncertain"] = work.is_uncertain

if work.ebook_url:
Expand All @@ -110,7 +117,6 @@ def get_object_data(self, work):

# date last modified
data["updated"] = work.updated_at.isoformat()

return data

def creator_info(self, work):
Expand Down
1 change: 1 addition & 0 deletions mep/books/tests/test_books_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ def test_get_object_data(self):
assert data["uri"] == absolutize_url(exit_e.get_absolute_url())
assert data["title"] == exit_e.title
assert data["year"] == exit_e.year
assert data["genre_category"] == exit_e.category.name
assert data["format"] == exit_e.work_format.name
assert not data["uncertain"] # not marked uncertain
assert "work_uri" not in data
Expand Down
9 changes: 7 additions & 2 deletions mep/common/management/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def add_arguments(self, parser):
parser.add_argument(
"-d",
"--directory",
help="Specify the directory where files should be generated",
help="Specify the directory where files should be generated. "
"The directory will be created if it does not already exist.",
)
parser.add_argument(
"-m",
Expand All @@ -94,6 +95,8 @@ def handle(self, *args, **kwargs):
# get stream array / generator of data for export
data = self.get_data(kwargs.get("max"))
self.stdout.write("Exporting JSON and CSV")
# ensure directory exists (useful to allow command line user to specify dated dir)
os.makedirs(os.path.dirname(base_filename), exist_ok=True)
rlskoeser marked this conversation as resolved.
Show resolved Hide resolved
# open and initialize CSV file
with open("{}.csv".format(base_filename), "w") as csvfile:
# write utf-8 byte order mark at the beginning of the file
Expand Down Expand Up @@ -146,7 +149,9 @@ def get_data(self, maximum=None):
# grab the first N if maximum is specified
if maximum:
objects = objects[:maximum]
total = objects.count()
total = len(
objects
) # fewer assumptions, allows other (multi model/class) objects
Comment on lines +155 to +157
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was this needed? are we exporting anything other than database content in the new export scripts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, I see it's due to the address person/account issue; I'd like to resolve it there instead

return StreamArray((self.get_object_data(obj) for obj in objects), total)

def get_object_data(self, obj):
Expand Down
19 changes: 15 additions & 4 deletions mep/people/fixtures/sample_people.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
"code": "FR"
}
},
{
"model": "people.country",
"pk": 3,
"fields": {
"name": "Ancient Greece",
"geonames_id": "http://sws.geonames.org/8354443/",
"code": "GR"
}
},
{
"model": "people.person",
"pk": 189,
Expand Down Expand Up @@ -78,7 +87,9 @@
"title": "",
"profession": null,
"slug": "aeschylus",
"nationalities": []
"nationalities": [
3
]
}
},
{
Expand Down Expand Up @@ -143,9 +154,9 @@
"location": 213,
"account": 4852,
"person": null,
"start_date": null,
"end_date": null,
"care_of_person": null
"start_date": "1919-01-01",
"end_date": "1930-01-01",
"care_of_person": 224
}
},
{
Expand Down
40 changes: 40 additions & 0 deletions mep/people/management/commands/export_creators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Manage command to export creator data for use by others.

Generates a CSV and JSON file including details on every creator
(author, translated, editor, etc) in the database, with details
on creator nationality, gender, and other information.
"""

from mep.people.models import Person
from mep.people.management.commands.export_members import Command as ExportMemberCommand


class Command(ExportMemberCommand):
"""Export creator data."""

csv_fields = [
"id", # no URI for authors so using slug as ID
"name",
"sort_name",
"title",
"gender",
"is_organization",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have any creator orgs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like one:

In [2]: Person.objects.filter(creator__isnull=False, is_organization=True)
Out[2]: <PersonQuerySet [<Person pk:10524 Fabian Society>]>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wow, fascinating; project full of edge cases

"birth_year",
"death_year",
"viaf_url",
"wikipedia_url",
# related country
"nationalities",
# generic
"notes",
"updated",
]

Comment on lines +24 to +34
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In #684 Josh requested/suggested creator types and associated items - creator types seems like it would be useful. I don't know if you already have a solution for associating books and creators.

def get_queryset(self):
"""filter to creators"""
return Person.objects.filter(creator__isnull=False).distinct()

def get_base_filename(self):
"""set the filename to "creators.csv" since it's a subset of people"""
return "creators"
Loading
Loading