Skip to content

Commit

Permalink
Fix some of the warnings/deprecated functions (#3080)
Browse files Browse the repository at this point in the history
* np + collections

* address review notes

* trim whitespace
  • Loading branch information
FredHappyface authored Mar 22, 2021
1 parent 83b8821 commit 04f3414
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 16 deletions.
4 changes: 2 additions & 2 deletions gensim/matutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ def unitvec(vec, norm='l2', return_norm=False):
veclen = vec.nnz
if veclen > 0.0:
if np.issubdtype(vec.dtype, np.integer):
vec = vec.astype(np.float)
vec = vec.astype(float)
vec /= veclen
if return_norm:
return vec, veclen
Expand All @@ -748,7 +748,7 @@ def unitvec(vec, norm='l2', return_norm=False):
veclen = np.count_nonzero(vec)
if veclen > 0.0:
if np.issubdtype(vec.dtype, np.integer):
vec = vec.astype(np.float)
vec = vec.astype(float)
if return_norm:
return blas_scal(1.0 / veclen, vec).astype(vec.dtype), veclen
else:
Expand Down
12 changes: 6 additions & 6 deletions gensim/models/atmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,11 +463,11 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
ids = [int(idx) for idx, _ in doc]
else:
ids = [idx for idx, _ in doc]
ids = np.array(ids, dtype=np.int)
cts = np.fromiter((cnt for _, cnt in doc), dtype=np.int, count=len(doc))
ids = np.array(ids, dtype=int)
cts = np.fromiter((cnt for _, cnt in doc), dtype=int, count=len(doc))

# Get all authors in current document, and convert the author names to integer IDs.
authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=np.int)
authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=int)

gammad = self.state.gamma[authors_d, :] # gamma of document d before update.
tilde_gamma = gammad.copy() # gamma that will be updated.
Expand Down Expand Up @@ -975,9 +975,9 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None,
else:
doc_no = d
# Get all authors in current document, and convert the author names to integer IDs.
authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=np.int)
ids = np.fromiter((id for id, _ in doc), dtype=np.int, count=len(doc)) # Word IDs in doc.
cts = np.fromiter((cnt for _, cnt in doc), dtype=np.int, count=len(doc)) # Word counts.
authors_d = np.fromiter((self.author2id[a] for a in self.doc2author[doc_no]), dtype=int)
ids = np.fromiter((id for id, _ in doc), dtype=int, count=len(doc)) # Word IDs in doc.
cts = np.fromiter((cnt for _, cnt in doc), dtype=int, count=len(doc)) # Word counts.

if d % self.chunksize == 0:
logger.debug("bound: at document #%i in chunk", d)
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ def add_vectors(self, keys, weights, extras=None, replace=False):
# initially allocate extras, check type compatibility
self.allocate_vecattrs(extras.keys(), [extras[k].dtype for k in extras.keys()])

in_vocab_mask = np.zeros(len(keys), dtype=np.bool)
in_vocab_mask = np.zeros(len(keys), dtype=bool)
for idx, key in enumerate(keys):
if key in self:
in_vocab_mask[idx] = True
Expand Down
5 changes: 3 additions & 2 deletions gensim/models/nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@
"""

import collections

import collections.abc
import logging

import numpy as np
Expand Down Expand Up @@ -590,7 +591,7 @@ def update(self, corpus, chunksize=None, passes=None, eval_every=None):
logger.warning("Nmf.update() called with an empty corpus")
return

if isinstance(corpus, collections.Iterator) and self.passes > 1:
if isinstance(corpus, collections.abc.Iterator) and self.passes > 1:
raise ValueError("Corpus is an iterator, only `passes=1` is valid.")

logger.info(
Expand Down
2 changes: 1 addition & 1 deletion gensim/test/test_matutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_dirichlet_expectation(self):

def manual_unitvec(vec):
# manual unit vector calculation for UnitvecTestCase
vec = vec.astype(np.float)
vec = vec.astype(float)
if sparse.issparse(vec):
vec_sum_of_squares = vec.multiply(vec)
unit = 1. / np.sqrt(vec_sum_of_squares.sum())
Expand Down
8 changes: 4 additions & 4 deletions gensim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from __future__ import with_statement
from contextlib import contextmanager
import collections
import collections.abc
import logging
import warnings
import numbers
Expand Down Expand Up @@ -1598,7 +1598,7 @@ def upload_chunked(server, docs, chunksize=1000, preprocess=None):
Notes
-----
Use this function to train or index large collections -- avoid sending the
Use this function to train or index large collections.abc -- avoid sending the
entire corpus over the wire as a single Pyro in-memory object. The documents
will be sent in smaller chunks, of `chunksize` documents each.
Expand Down Expand Up @@ -2014,7 +2014,7 @@ def flatten(nested_list):
Returns
-------
list
Flattened version of `nested_list` where any elements that are an iterable (`collections.Iterable`)
Flattened version of `nested_list` where any elements that are an iterable (`collections.abc.Iterable`)
have been unpacked into the top-level list, in a recursive fashion.
"""
Expand All @@ -2036,7 +2036,7 @@ def lazy_flatten(nested_list):
"""
for el in nested_list:
if isinstance(el, collections.Iterable) and not isinstance(el, str):
if isinstance(el, collections.abc.Iterable) and not isinstance(el, str):
for sub in flatten(el):
yield sub
else:
Expand Down

0 comments on commit 04f3414

Please sign in to comment.