Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
bpiwowar committed Oct 4, 2023
1 parent b28ae2c commit 0a45c6f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
8 changes: 6 additions & 2 deletions src/datamaestro_text/datasets/irds/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,12 @@ def converter(self):
def store(self):
return self.dataset.docs_store()

@cached_property
def _docs(self):
return self.dataset.docs_iter()

def docid_internal2external(self, ix: int):
return self.dataset.docs_iter()[ix].doc_id
return self._docs[ix].doc_id

def document_ext(self, docid: str) -> Document:
return self.converter(self.store.get(docid))
Expand All @@ -130,7 +134,7 @@ def documents_ext(self, docids: List[str]) -> Document:
return [self.converter(retrieved[docid]) for docid in docids]

def document_int(self, ix):
return self.converter(self.dataset.docs_iter()[ix])
return self.converter(self._docs[ix])

@cached_property
def document_cls(self):
Expand Down
2 changes: 1 addition & 1 deletion src/datamaestro_text/transforms/ir/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __validate__(self):

def iter(self):
for topic, doc1, doc2 in self.data.iter():
doc1, doc2 = self.store.documents_ext(doc1.get_id(), doc2.get_id())
doc1, doc2 = self.store.documents_ext([doc1.get_id(), doc2.get_id()])
yield topic, doc1, doc2

def batch_iter(self, size: int):
Expand Down

0 comments on commit 0a45c6f

Please sign in to comment.