Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move ISBN lookup to identifier module. (PP-1948) #2196

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
from sqlalchemy.engine import Connection
from sqlalchemy.orm.session import Session

from palace.manager.sqlalchemy.model.identifier import Identifier
from palace.manager.sqlalchemy.model.time_tracking import (
_isbn_for_identifier,
_title_for_identifier,
)
from palace.manager.sqlalchemy.model.identifier import Identifier, isbn_for_identifier
from palace.manager.sqlalchemy.model.time_tracking import _title_for_identifier
from palace.manager.sqlalchemy.util import get_one

# revision identifiers, used by Alembic.
Expand Down Expand Up @@ -231,7 +228,7 @@ def update_summary_isbn_and_title(session: Session) -> None:
@cache
def cached_isbn_lookup(identifier: Identifier) -> str | None:
"""Given an identifier, return its ISBN."""
return _isbn_for_identifier(identifier)
return isbn_for_identifier(identifier)


@cache
Expand Down
30 changes: 30 additions & 0 deletions src/palace/manager/sqlalchemy/model/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,3 +1271,33 @@ def equivalent_identifiers(
results[parent_identifier] = equivalent.identifier

return results


def isbn_for_identifier(identifier: Identifier | None) -> str | None:
"""Find the strongest ISBN match for the given identifier.

:param identifier: The identifier to match.
:return: The ISBN string associated with the identifier or None, if no match is found.
"""
if identifier is None:
return None

if identifier.type == Identifier.ISBN:
return identifier.identifier

# If our identifier is not an ISBN itself, we'll use our Recursive Equivalency
# mechanism to find the next best one that is, if available.
db = Session.object_session(identifier)
eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter(
RecursiveEquivalencyCache.parent_identifier_id == identifier.id
)
equivalent_identifiers = (
db.query(Identifier)
.filter(Identifier.id.in_(eq_subquery))
.filter(Identifier.type == Identifier.ISBN)
)

return next(
map(lambda id_: id_.identifier, equivalent_identifiers),
None,
)
39 changes: 3 additions & 36 deletions src/palace/manager/sqlalchemy/model/time_tracking.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import datetime
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any

from sqlalchemy import (
Boolean,
Expand All @@ -17,10 +17,7 @@

from palace.manager.sqlalchemy.model.base import Base
from palace.manager.sqlalchemy.model.edition import Edition
from palace.manager.sqlalchemy.model.identifier import (
Identifier,
RecursiveEquivalencyCache,
)
from palace.manager.sqlalchemy.model.identifier import Identifier, isbn_for_identifier
from palace.manager.sqlalchemy.util import get_one_or_create
from palace.manager.util.datetime_helpers import minute_timestamp

Expand Down Expand Up @@ -215,7 +212,7 @@ def add(
if (not playtime.isbn or not playtime.title) and not identifier:
identifier, _ = Identifier.parse_urn(_db, identifier_str, autocreate=False)
if not playtime.isbn and identifier:
playtime.isbn = _isbn_for_identifier(identifier)
playtime.isbn = isbn_for_identifier(identifier)
if not playtime.title and identifier:
playtime.title = _title_for_identifier(identifier)

Expand Down Expand Up @@ -243,33 +240,3 @@ def _title_for_identifier(identifier: Identifier | None) -> str | None:
):
return edition.title
return None


def _isbn_for_identifier(identifier: Identifier | None) -> str | None:
"""Find the strongest ISBN match for the given identifier.

:param identifier: The identifier to match.
:return: The ISBN string associated with the identifier or None, if no match is found.
"""
if identifier is None:
return None

if identifier.type == Identifier.ISBN:
return cast(str, identifier.identifier)

# If our identifier is not an ISBN itself, we'll use our Recursive Equivalency
# mechanism to find the next best one that is, if available.
db = Session.object_session(identifier)
eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter(
RecursiveEquivalencyCache.parent_identifier_id == identifier.id
)
equivalent_identifiers = (
db.query(Identifier)
.filter(Identifier.id.in_(eq_subquery))
.filter(Identifier.type == Identifier.ISBN)
)

return next(
map(lambda id_: id_.identifier, equivalent_identifiers),
None,
)
72 changes: 72 additions & 0 deletions tests/manager/sqlalchemy/model/test_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Identifier,
ProQuestIdentifierParser,
RecursiveEquivalencyCache,
isbn_for_identifier,
)
from palace.manager.sqlalchemy.model.resource import Hyperlink
from palace.manager.sqlalchemy.presentation import PresentationCalculationPolicy
Expand Down Expand Up @@ -860,6 +861,77 @@ def test_equivalent_identifiers(self, db: DatabaseTransactionFixture) -> None:
proquest_identfier: proquest_gutenberg,
}

@pytest.mark.parametrize(
"id_key, equivalents, expected_isbn",
[
# If the identifier is an ISBN, we will not use an equivalency.
[
"i1",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"i2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
["i1", (("i1", "i2", 200),), "080442957X"],
["i2", (("i2", "i1", 200),), "9788175257665"],
# If identifier is not an ISBN, but has an equivalency that is, use the strongest match.
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
# If we don't find an equivalent ISBN identifier, then we should get None.
["g2", (), None],
["g1", (("g1", "g2", 1),), None],
# If identifier is None, expect default value.
[None, (), None],
],
)
def test_isbn_for_identifier(
self,
db: DatabaseTransactionFixture,
id_key: str | None,
equivalents: tuple[tuple[str, str, int | float]],
expected_isbn: str,
):
ids: dict[str, Identifier] = {
"i1": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="080442957X"
),
"i2": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="9788175257665"
),
"g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
"g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
}
equivalencies = [
Equivalency(
input_id=ids[equivalent[0]].id,
output_id=ids[equivalent[1]].id,
strength=equivalent[2],
)
for equivalent in equivalents
]
test_identifier: Identifier | None = ids[id_key] if id_key is not None else None
if test_identifier is not None:
test_identifier.equivalencies = equivalencies

# We're using the RecursiveEquivalencyCache, so must refresh it.
EquivalentIdentifiersCoverageProvider(db.session).run()

# Act
result = isbn_for_identifier(test_identifier)
# Assert
assert result == expected_isbn


class TestProQuestIdentifierParser:
@pytest.mark.parametrize(
Expand Down
76 changes: 0 additions & 76 deletions tests/manager/sqlalchemy/model/test_time_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,9 @@
import pytest
from sqlalchemy.exc import IntegrityError

from palace.manager.core.equivalents_coverage import (
EquivalentIdentifiersCoverageProvider,
)
from palace.manager.sqlalchemy.model.identifier import Equivalency, Identifier
from palace.manager.sqlalchemy.model.time_tracking import (
PlaytimeEntry,
PlaytimeSummary,
_isbn_for_identifier,
_title_for_identifier,
)
from palace.manager.sqlalchemy.util import create
Expand Down Expand Up @@ -239,77 +234,6 @@ def test_cascades(self, db: DatabaseTransactionFixture):


class TestHelpers:
@pytest.mark.parametrize(
"id_key, equivalents, expected_isbn",
[
# If the identifier is an ISBN, we will not use an equivalency.
[
"i1",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"i2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
["i1", (("i1", "i2", 200),), "080442957X"],
["i2", (("i2", "i1", 200),), "9788175257665"],
# If identifier is not an ISBN, but has an equivalency that is, use the strongest match.
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
# If we don't find an equivalent ISBN identifier, then we should get None.
["g2", (), None],
["g1", (("g1", "g2", 1),), None],
# If identifier is None, expect default value.
[None, (), None],
],
)
def test__isbn_for_identifier(
self,
db: DatabaseTransactionFixture,
id_key: str | None,
equivalents: tuple[tuple[str, str, int | float]],
expected_isbn: str,
):
ids: dict[str, Identifier] = {
"i1": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="080442957X"
),
"i2": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="9788175257665"
),
"g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
"g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
}
equivalencies = [
Equivalency(
input_id=ids[equivalent[0]].id,
output_id=ids[equivalent[1]].id,
strength=equivalent[2],
)
for equivalent in equivalents
]
test_identifier: Identifier | None = ids[id_key] if id_key is not None else None
if test_identifier is not None:
test_identifier.equivalencies = equivalencies

# We're using the RecursiveEquivalencyCache, so must refresh it.
EquivalentIdentifiersCoverageProvider(db.session).run()

# Act
result = _isbn_for_identifier(test_identifier)
# Assert
assert result == expected_isbn

def test__title_for_identifier_multiple_editions(
self,
db: DatabaseTransactionFixture,
Expand Down