-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New export management commands #804
Changes from 19 commits
ca5ba7c
ed2385c
901a9b9
5bf80b4
bb362ff
046db90
cbc3578
7fc8279
0db71e6
7f207a5
46da0d8
3eb1121
bf3cdec
5c3ccff
3cb1c64
88c2d0e
f11f0c2
7fd0853
5c7600c
c1cbbbe
cfa6f33
1381093
5e3eb4c
f70e772
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
""" | ||
Manage command to export location data for use by others. | ||
|
||
Generates a CSV and JSON file including details on which member | ||
of the library lived where (if known) during what time period | ||
(if known). The table includes summary details and coordinates | ||
for associated addresses. | ||
""" | ||
|
||
from django.db.models import Prefetch | ||
from mep.common.management.export import BaseExport | ||
from mep.common.utils import absolutize_url | ||
from mep.accounts.models import Address | ||
|
||
|
||
class Command(BaseExport): | ||
"""Export member data.""" | ||
|
||
help = __doc__ | ||
|
||
model = Address | ||
|
||
csv_fields = [ | ||
"member_id", # member slug | ||
"member_uri", | ||
"care_of_person_id", # person slug | ||
"street_address", | ||
"postal_code", | ||
"city", | ||
"arrondissement", | ||
"country", | ||
"start_date", | ||
"end_date", | ||
"longitude", | ||
"latitude", | ||
] | ||
|
||
# def get_queryset(self): | ||
# """ | ||
# custom filter needed to return person-address combos, | ||
# so we can pass a one object per row to `get_object_data` | ||
# """ | ||
# addresses = Address.objects.prefetch_related( | ||
# Prefetch("account"), | ||
# Prefetch("person"), | ||
# Prefetch("location"), | ||
# ) | ||
# res = [] | ||
# for addr in addresses.all(): | ||
# persons = [addr.person] if addr.person else addr.account.persons.all() | ||
# for person in persons: | ||
# res.append((person, addr)) | ||
# return res | ||
|
||
def get_base_filename(self): | ||
"""set the filename to 'locations.csv'""" | ||
return "locations" | ||
|
||
def get_object_data(self, obj): | ||
""" | ||
Generate dictionary of data to export for a single | ||
:class:`~mep.people.models.Person` | ||
""" | ||
addr = obj | ||
loc = addr.location | ||
persons = addr.account.persons.all() | ||
|
||
# required properties | ||
return dict( | ||
# Member | ||
member_id=[person.slug for person in persons], | ||
member_uri=[ | ||
absolutize_url(person.get_absolute_url()) for person in persons | ||
], | ||
# Address data | ||
start_date=addr.partial_start_date, | ||
end_date=addr.partial_end_date, | ||
care_of_person_id=addr.care_of_person.slug if addr.care_of_person else None, | ||
# Location data | ||
street_address=loc.street_address, | ||
city=loc.city, | ||
postal_code=loc.postal_code, | ||
latitude=float(loc.latitude) if loc.latitude is not None else None, | ||
longitude=float(loc.longitude) if loc.longitude is not None else None, | ||
country=loc.country.name if loc.country else None, | ||
arrondissement=loc.arrondissement(), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,9 +8,7 @@ | |
""" | ||
|
||
from collections import OrderedDict | ||
|
||
from django.db.models import F | ||
|
||
from mep.books.models import CreatorType, Work | ||
from mep.common.management.export import BaseExport | ||
from mep.common.utils import absolutize_url | ||
|
@@ -36,11 +34,14 @@ class Command(BaseExport): | |
# query the database at load time (but maybe only a problem for tests) | ||
|
||
csv_fields = ( | ||
["uri", "title"] | ||
# including "id" to store slug for exports, | ||
# given not all exported entities have a URI | ||
["id", "uri", "title"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pretty sure all books should have uris; did you encounter any that did not? |
||
+ [creator.lower() for creator in creator_types] | ||
+ [ | ||
"year", | ||
"format", | ||
"genre_category", | ||
"uncertain", | ||
"ebook_url", | ||
"volumes_issues", | ||
|
@@ -77,17 +78,23 @@ def get_object_data(self, work): | |
# required properties | ||
data = OrderedDict( | ||
[ | ||
("id", work.slug), | ||
("uri", absolutize_url(work.get_absolute_url())), | ||
("title", work.title), | ||
] | ||
) | ||
data.update(self.creator_info(work)) | ||
if work.year: | ||
data["year"] = work.year | ||
|
||
# format is not currently set for all items | ||
if work.work_format: | ||
data["format"] = work.work_format.name | ||
|
||
# genre category | ||
if work.category: | ||
data["genre_category"] = work.category.name | ||
|
||
data["uncertain"] = work.is_uncertain | ||
|
||
if work.ebook_url: | ||
|
@@ -110,7 +117,6 @@ def get_object_data(self, work): | |
|
||
# date last modified | ||
data["updated"] = work.updated_at.isoformat() | ||
|
||
return data | ||
|
||
def creator_info(self, work): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,7 +68,8 @@ def add_arguments(self, parser): | |
parser.add_argument( | ||
"-d", | ||
"--directory", | ||
help="Specify the directory where files should be generated", | ||
help="Specify the directory where files should be generated. " | ||
"The directory will be created if it does not already exist.", | ||
) | ||
parser.add_argument( | ||
"-m", | ||
|
@@ -94,6 +95,8 @@ def handle(self, *args, **kwargs): | |
# get stream array / generator of data for export | ||
data = self.get_data(kwargs.get("max")) | ||
self.stdout.write("Exporting JSON and CSV") | ||
# ensure directory exists (useful to allow command line user to specify dated dir) | ||
os.makedirs(os.path.dirname(base_filename), exist_ok=True) | ||
rlskoeser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# open and initialize CSV file | ||
with open("{}.csv".format(base_filename), "w") as csvfile: | ||
# write utf-8 byte order mark at the beginning of the file | ||
|
@@ -146,7 +149,9 @@ def get_data(self, maximum=None): | |
# grab the first N if maximum is specified | ||
if maximum: | ||
objects = objects[:maximum] | ||
total = objects.count() | ||
total = len( | ||
objects | ||
) # fewer assumptions, allows other (multi model/class) objects | ||
Comment on lines
+155
to
+157
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. was this needed? are we exporting anything other than database content in the new export scripts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, I see it's due to the address person/account issue; I'd like to resolve it there instead |
||
return StreamArray((self.get_object_data(obj) for obj in objects), total) | ||
|
||
def get_object_data(self, obj): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
""" | ||
Manage command to export creator data for use by others. | ||
|
||
Generates a CSV and JSON file including details on every creator | ||
(author, translated, editor, etc) in the database, with details | ||
on creator nationality, gender, and other information. | ||
""" | ||
|
||
from mep.people.models import Person | ||
from mep.people.management.commands.export_members import Command as ExportMemberCommand | ||
|
||
|
||
class Command(ExportMemberCommand): | ||
"""Export creator data.""" | ||
|
||
csv_fields = [ | ||
"id", # no URI for authors so using slug as ID | ||
"name", | ||
"sort_name", | ||
"title", | ||
"gender", | ||
"is_organization", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have any creator orgs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like one:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wow, fascinating; project full of edge cases |
||
"birth_year", | ||
"death_year", | ||
"viaf_url", | ||
"wikipedia_url", | ||
# related country | ||
"nationalities", | ||
# generic | ||
"notes", | ||
"updated", | ||
] | ||
|
||
Comment on lines
+24
to
+34
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In #684 Josh requested/suggested creator types and associated items - creator types seems like it would be useful. I don't know if you already have a solution for associating books and creators. |
||
def get_queryset(self): | ||
"""filter to creators""" | ||
return Person.objects.filter(creator__isnull=False).distinct() | ||
|
||
def get_base_filename(self): | ||
"""set the filename to "creators.csv" since it's a subset of people""" | ||
return "creators" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so with the revised logic, no queryset customization is needed? or would prefetching on persons still be useful?