Fix some of the warnings/deprecated functions (#3080)

* np + collections * address review notes * trim whitespace
piskvorky · Mar 22, 2021 · 04f3414 · 04f3414
1 parent 83b8821
commit 04f3414
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 16 deletions.
diff --git a/gensim/matutils.py b/gensim/matutils.py
@@ -724,7 +724,7 @@ def unitvec(vec, norm='l2', return_norm=False):
             veclen = vec.nnz
         if veclen > 0.0:
             if np.issubdtype(vec.dtype, np.integer):
-                vec = vec.astype(np.float)
+                vec = vec.astype(float)
             vec /= veclen
             if return_norm:
                 return vec, veclen
@@ -748,7 +748,7 @@ def unitvec(vec, norm='l2', return_norm=False):
             veclen = np.count_nonzero(vec)
         if veclen > 0.0:
             if np.issubdtype(vec.dtype, np.integer):
-                vec = vec.astype(np.float)
+                vec = vec.astype(float)
             if return_norm:
                 return blas_scal(1.0 / veclen, vec).astype(vec.dtype), veclen
             else:

diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -463,11 +463,11 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
                 ids = [int(idx) for idx, _ in doc]
             else:
                 ids = [idx for idx, _ in doc]
-            ids = np.array(ids, dtype=np.int)
-            cts = np.fromiter((cnt for _, cnt in doc), dtype=np.int, count=len(doc))
+            ids = np.array(ids, dtype=int)
+            cts = np.fromiter((cnt for _, cnt in doc), dtype=int, count=len(doc))
 
             # Get all authors in current document, and convert the author names to integer IDs.
-            authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=np.int)
+            authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=int)
 
             gammad = self.state.gamma[authors_d, :]  # gamma of document d before update.
             tilde_gamma = gammad.copy()  # gamma that will be updated.
@@ -975,9 +975,9 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None,
             else:
                 doc_no = d
             # Get all authors in current document, and convert the author names to integer IDs.
-            authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=np.int)
-            ids = np.fromiter((id for id, _ in doc), dtype=np.int, count=len(doc))  # Word IDs in doc.
-            cts = np.fromiter((cnt for _, cnt in doc), dtype=np.int, count=len(doc))  # Word counts.
+            authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=int)
+            ids = np.fromiter((id for id, _ in doc), dtype=int, count=len(doc))  # Word IDs in doc.
+            cts = np.fromiter((cnt for _, cnt in doc), dtype=int, count=len(doc))  # Word counts.
 
             if d % self.chunksize == 0:
                 logger.debug("bound: at document #%i in chunk", d)

diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -501,7 +501,7 @@ def add_vectors(self, keys, weights, extras=None, replace=False):
         # initially allocate extras, check type compatibility
         self.allocate_vecattrs(extras.keys(), [extras[k].dtype for k in extras.keys()])
 
-        in_vocab_mask = np.zeros(len(keys), dtype=np.bool)
+        in_vocab_mask = np.zeros(len(keys), dtype=bool)
         for idx, key in enumerate(keys):
             if key in self:
                 in_vocab_mask[idx] = True

diff --git a/gensim/models/nmf.py b/gensim/models/nmf.py
@@ -93,7 +93,8 @@
 
 """
 
-import collections
+
+import collections.abc
 import logging
 
 import numpy as np
@@ -590,7 +591,7 @@ def update(self, corpus, chunksize=None, passes=None, eval_every=None):
             logger.warning("Nmf.update() called with an empty corpus")
             return
 
-        if isinstance(corpus, collections.Iterator) and self.passes > 1:
+        if isinstance(corpus, collections.abc.Iterator) and self.passes > 1:
             raise ValueError("Corpus is an iterator, only `passes=1` is valid.")
 
         logger.info(

diff --git a/gensim/test/test_matutils.py b/gensim/test/test_matutils.py
@@ -144,7 +144,7 @@ def test_dirichlet_expectation(self):
 
 def manual_unitvec(vec):
     # manual unit vector calculation for UnitvecTestCase
-    vec = vec.astype(np.float)
+    vec = vec.astype(float)
     if sparse.issparse(vec):
         vec_sum_of_squares = vec.multiply(vec)
         unit = 1. / np.sqrt(vec_sum_of_squares.sum())

diff --git a/gensim/utils.py b/gensim/utils.py
@@ -8,7 +8,7 @@
 
 from __future__ import with_statement
 from contextlib import contextmanager
-import collections
+import collections.abc
 import logging
 import warnings
 import numbers
@@ -1598,7 +1598,7 @@ def upload_chunked(server, docs, chunksize=1000, preprocess=None):
 
     Notes
     -----
-    Use this function to train or index large collections -- avoid sending the
+    Use this function to train or index large collections.abc -- avoid sending the
     entire corpus over the wire as a single Pyro in-memory object. The documents
     will be sent in smaller chunks, of `chunksize` documents each.
 
@@ -2014,7 +2014,7 @@ def flatten(nested_list):
     Returns
     -------
     list
-        Flattened version of `nested_list` where any elements that are an iterable (`collections.Iterable`)
+        Flattened version of `nested_list` where any elements that are an iterable (`collections.abc.Iterable`)
         have been unpacked into the top-level list, in a recursive fashion.
 
     """
@@ -2036,7 +2036,7 @@ def lazy_flatten(nested_list):
 
     """
     for el in nested_list:
-        if isinstance(el, collections.Iterable) and not isinstance(el, str):
+        if isinstance(el, collections.abc.Iterable) and not isinstance(el, str):
             for sub in flatten(el):
                 yield sub
         else: