Skip to content

Commit

Permalink
fix: clarify internal vs external IDs in documents/topics
Browse files Browse the repository at this point in the history
  • Loading branch information
bpiwowar committed Jan 19, 2024
1 parent 5be1c23 commit 635c9e8
Showing 1 changed file with 39 additions and 29 deletions.
68 changes: 39 additions & 29 deletions src/datamaestro_text/data/ir/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class BaseHolder:
"""Base class for topics and documents"""

has_id: ClassVar[bool] = False
has_internal_id: ClassVar[bool] = False
has_text: ClassVar[bool] = False

def get_text(self):
Expand All @@ -16,9 +17,14 @@ def get_text(self):
def get_id(self) -> str:
raise RuntimeError(f"{type(self)} has no ID: " "you should use an adapter")

def get_internal_id(self) -> int:
raise RuntimeError(
f"{type(self)} has no internal ID: " "you should use an adapter"
)


@define()
class IDHolder:
@define(slots=False)
class IDHolder(BaseHolder):
"""Base data class for ID only data structures"""

id: str
Expand All @@ -28,30 +34,24 @@ def get_id(self):
return self.id


@define()
class TextHolder:
"""Base data class for text only data structures"""
@define(slots=False)
class InternalIDHolder(BaseHolder):
"""Base data class for ID only data structures"""

text: str
has_text: ClassVar[bool] = True
internal_id: int
has_internal_id: ClassVar[bool] = True

def get_text(self):
return self.text
def get_internal_id(self) -> int:
return self.id


@define()
class TextAndIDHolder:
"""Base data class for ID and text data structures"""
@define(slots=False)
class TextHolder(BaseHolder):
"""Base data class for text only data structures"""

id: str
text: str

has_id: ClassVar[bool] = True
has_text: ClassVar[bool] = True

def get_id(self):
return self.id

def get_text(self):
return self.text

Expand All @@ -62,52 +62,62 @@ class Document(BaseHolder):
pass


@define()
@define(slots=False)
class TextDocument(TextHolder, Document):
"""Documents with text"""


@define()
@define(slots=False)
class IDDocument(IDHolder, Document):
"""Documents with ID"""


@define()
class GenericDocument(TextAndIDHolder, Document):
@define(slots=False)
class FullIDDocument(InternalIDHolder, IDHolder, Document):
"""Documents with internal and external ID"""


@define(slots=False)
class GenericDocument(TextHolder, IDHolder, Document):
"""Documents with ID and text"""


@define(slots=False)
class FullGenericDocument(TextHolder, IDHolder, InternalIDHolder, Document):
"""Documents with ID and text"""


class Topic(BaseHolder):
pass


@define()
class GenericTopic(TextAndIDHolder, Topic):
@define(slots=False)
class GenericTopic(TextHolder, IDHolder, Topic):
pass


@define()
@define(slots=False)
class TextTopic(TextHolder, Topic):
pass


@define()
@define(slots=False)
class IDTopic(IDHolder, Topic):
pass


@define()
@define(slots=False)
class AdhocAssessment:
doc_id: str


@define()
@define(slots=False)
class SimpleAdhocAssessment(AdhocAssessment):
rel: float
"""Relevance (> 0 if relevant)"""


@define()
@define(slots=False)
class AdhocAssessedTopic:
topic_id: str
"""The topic ID"""
Expand Down

0 comments on commit 635c9e8

Please sign in to comment.