Skip to content

Commit

Permalink
Remove the directory importer script 🔥 (PP-213) (#1345)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathangreen authored Aug 31, 2023
1 parent 77188cc commit 13513bc
Show file tree
Hide file tree
Showing 14 changed files with 7 additions and 1,917 deletions.
11 changes: 0 additions & 11 deletions api/admin/controller/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,17 +465,6 @@ def _get_collection_protocols(self, provider_apis):
protocols = self._get_integration_protocols(
provider_apis, protocol_name_attr="NAME"
)
protocols.append(
{
"name": ExternalIntegration.MANUAL,
"label": _("Manual import"),
"description": _(
"Books will be manually added to the circulation manager, "
"not imported automatically through a protocol."
),
"settings": [],
}
)

return protocols

Expand Down
420 changes: 0 additions & 420 deletions api/onix.py

This file was deleted.

14 changes: 0 additions & 14 deletions bin/directory_import

This file was deleted.

113 changes: 3 additions & 110 deletions core/metadata_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,15 @@
import csv
import datetime
import logging
import re
from collections import defaultdict
from typing import List, Optional

from dateutil.parser import parse
from pymarc import MARCReader
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import and_, or_

from .analytics import Analytics
from .classifier import NO_NUMBER, NO_VALUE, Classifier
from .classifier import NO_NUMBER, NO_VALUE
from .model import (
Classification,
Collection,
Expand All @@ -46,9 +44,9 @@
from .model.configuration import ExternalIntegrationLink
from .model.licensing import LicenseFunctions, LicenseStatus
from .util import LanguageCodes
from .util.datetime_helpers import strptime_utc, to_utc, utc_now
from .util.datetime_helpers import to_utc, utc_now
from .util.median import median
from .util.personal_names import display_name_to_sort_name, name_tidy
from .util.personal_names import display_name_to_sort_name


class ReplacementPolicy:
Expand Down Expand Up @@ -2380,108 +2378,3 @@ def _date_field(self, row, field_name):
self.log.warning('Could not parse date "%s"' % value)
value = None
return value


class MARCExtractor:

"""Transform a MARC file into a list of Metadata objects.
This is not totally general, but it's a good start.
"""

# Common things found in a MARC record after the name of the author
# which we sould like to remove.
END_OF_AUTHOR_NAME_RES = [
re.compile(r",\s+[0-9]+-"), # Birth year
re.compile(r",\s+active "),
re.compile(r",\s+graf,"),
re.compile(r",\s+author."),
]

@classmethod
def name_cleanup(cls, name):
# Turn 'Dante Alighieri, 1265-1321, author.'
# into 'Dante Alighieri'.
for regex in cls.END_OF_AUTHOR_NAME_RES:
match = regex.search(name)
if match:
name = name[: match.start()]
break
name = name_tidy(name)
return name

@classmethod
def parse_year(cls, value):
"""Handle a publication year that may not be in the right format."""
for format in ("%Y", "%Y."):
try:
return strptime_utc(value, format)
except ValueError:
continue
return None

@classmethod
def parse(cls, file, data_source_name, default_medium_type=None):
reader = MARCReader(file)
metadata_records = []

for record in reader:
title = record.title
if title.endswith(" /"):
title = title[: -len(" /")]
issued_year = cls.parse_year(record.pubyear)
publisher = record.publisher
if publisher.endswith(","):
publisher = publisher[:-1]

links = []
summary = record.notes[0]["a"]

if summary:
summary_link = LinkData(
rel=Hyperlink.DESCRIPTION,
media_type=Representation.TEXT_PLAIN,
content=summary,
)
links.append(summary_link)

isbn = record["020"]["a"].split(" ")[0]
primary_identifier = IdentifierData(Identifier.ISBN, isbn)

subjects = [
SubjectData(
Classifier.FAST,
subject["a"],
)
for subject in record.subjects
]

author = record.author
if author:
author = cls.name_cleanup(author)
author_names = [author]
else:
author_names = ["Anonymous"]
contributors = [
ContributorData(
sort_name=author,
roles=[Contributor.AUTHOR_ROLE],
)
for author in author_names
]

metadata_records.append(
Metadata(
data_source=data_source_name,
title=title,
language="eng",
medium=Edition.BOOK_MEDIUM,
publisher=publisher,
issued=issued_year,
primary_identifier=primary_identifier,
subjects=subjects,
contributors=contributors,
links=links,
)
)
return metadata_records
2 changes: 0 additions & 2 deletions core/model/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ class ExternalIntegration(Base):
ODL = "ODL"
ODL2 = "ODL 2.0"
LCP = DataSourceConstants.LCP
MANUAL = DataSourceConstants.MANUAL
PROQUEST = DataSourceConstants.PROQUEST

# These protocols were used on the Content Server when mirroring
Expand All @@ -201,7 +200,6 @@ class ExternalIntegration(Base):
AXIS_360,
GUTENBERG,
ENKI,
MANUAL,
]

# Some integrations with LICENSE_GOAL imply that the data and
Expand Down
Loading

0 comments on commit 13513bc

Please sign in to comment.