Skip to content

Commit

Permalink
fix: padding with new numpy versions
Browse files Browse the repository at this point in the history
  • Loading branch information
bpiwowar committed Mar 10, 2024
1 parent e6723b5 commit c30df48
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/source/text/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ Adapters
********

.. autoxpmconfig:: xpmir.text.adapters.MeanTextEncoder
.. autoxpmconfig:: xpmir.text.adapters.TopicTextConverter
8 changes: 7 additions & 1 deletion src/xpmir/neural/interaction/drmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,17 @@ def _encode(
options: TokenizerOptions,
) -> SimilarityInputWithTokens:
encoded = encoder(texts, options=options)

max_len = max(encoded.tokenized.lens)
padded_tokens = [
(t + [""] * (max_len - len(t))) for t in encoded.tokenized.tokens
]

return self.similarity.preprocess(
SimilarityInputWithTokens(
encoded.value,
encoded.tokenized.mask,
np.array(encoded.tokenized.tokens),
np.array(padded_tokens, dtype=str),
)
)

Expand Down
3 changes: 2 additions & 1 deletion src/xpmir/utils/iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,13 +406,14 @@ def start(self):

def close(self):
if self.mp_iterator:
atexit.unregister(self.close)
self.stop_process.set()
try:
# Try to remove an item from the queue just in case
next(self.mp_iterator)
finally:
self.mp_iterator = None
logging.info("Signaled the mp_iterator to quit")
atexit.unregister(self.close)

def detach(self):
"""Produces an iterator only based on the multiprocess queue (useful
Expand Down

0 comments on commit c30df48

Please sign in to comment.