Skip to content

Commit

Permalink
fix for records
Browse files Browse the repository at this point in the history
  • Loading branch information
bpiwowar committed Feb 27, 2024
1 parent 6d73bcf commit a7ae472
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/datamaestro_text/data/ir/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def iter_ids(self) -> Iterator[str]:
By default, use iter_documents, which is not really efficient.
"""
for doc in self.iter():
yield doc.get_id()
yield doc[IDItem].id

@property
def documentcount(self):
Expand Down
2 changes: 1 addition & 1 deletion src/datamaestro_text/datasets/irds/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def records(self):

@cached_property
def ext2records(self):
return {record.topic.get_id(): record for record in self.records}
return {record[IDItem].id: record for record in self.records}

def topic_int(self, internal_topic_id: int) -> TopicRecord:
"""Returns a document given its internal ID"""
Expand Down
8 changes: 5 additions & 3 deletions src/datamaestro_text/transforms/ir/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,16 @@ def __validate__(self):

def iter(self):
for topic, doc1, doc2 in self.data.iter():
doc1, doc2 = self.store.documents_ext([doc1.get_id(), doc2.get_id()])
doc1, doc2 = self.store.documents_ext(
[doc1[ir.IDItem].id, doc2[ir.IDItem].id]
)
yield topic, doc1, doc2

def batch_iter(self, size: int):
for triplets in self.data.batch_iter(size):
docids = []
for topic, doc1, doc2 in triplets:
docids.extend(doc1.get_id(), doc2.get_id())
docids.extend(doc1[ir.IDItem].id, doc2[ir.IDItem].id)
docs_iter = iter(self.store.documents_ext(docids))
for triplet in triplets:
triplet[1] = next(docs_iter)
Expand Down Expand Up @@ -165,7 +167,7 @@ def execute(self):
if self.topic_ids:

def get_query(query):
return query.get_id()
return query[ir.IDItem].id

else:

Expand Down

0 comments on commit a7ae472

Please sign in to comment.