Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove mutable args. Fix 1561. #1562

Merged
merged 3 commits into from
Sep 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def init_sims(self, replace=False):
self.doctag_syn0norm = empty(self.doctag_syn0.shape, dtype=REAL)
np_divide(self.doctag_syn0, sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis], self.doctag_syn0norm)

def most_similar(self, positive=[], negative=[], topn=10, clip_start=0, clip_end=None, indexer=None):
def most_similar(self, positive=None, negative=None, topn=10, clip_start=0, clip_end=None, indexer=None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making these defaults empty tuples – () – could enforce the immutability, and eliminate the later None-check/assignments.

"""
Find the top-N most similar docvecs known from training. Positive docs contribute
positively towards the similarity, negative docs negatively.
Expand All @@ -436,6 +436,11 @@ def most_similar(self, positive=[], negative=[], topn=10, clip_start=0, clip_end
range of the underlying doctag_syn0norm vectors. (This may be useful if the ordering
there was chosen to be significant, such as more popular tag IDs in lower indexes.)
"""
if positive is None:
positive = []
if negative is None:
negative = []

self.init_sims()
clip_end = clip_end or len(self.doctag_syn0norm)

Expand Down
14 changes: 12 additions & 2 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def word_vec(self, word, use_norm=False):
else:
raise KeyError("word '%s' not in vocabulary" % word)

def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, indexer=None):
def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None):
"""
Find the top-N most similar words. Positive words contribute positively towards the
similarity, negative words negatively.
Expand All @@ -310,6 +310,11 @@ def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, i
[('queen', 0.50882536), ...]

"""
if positive is None:
positive = []
if negative is None:
negative = []

self.init_sims()

if isinstance(positive, string_types) and not negative:
Expand Down Expand Up @@ -442,7 +447,7 @@ def nbow(document):
# Compute WMD.
return emd(d1, d2, distance_matrix)

def most_similar_cosmul(self, positive=[], negative=[], topn=10):
def most_similar_cosmul(self, positive=None, negative=None, topn=10):
"""
Find the top-N most similar words, using the multiplicative combination objective
proposed by Omer Levy and Yoav Goldberg in [4]_. Positive words still contribute
Expand All @@ -464,6 +469,11 @@ def most_similar_cosmul(self, positive=[], negative=[], topn=10):
.. [4] Omer Levy and Yoav Goldberg. Linguistic Regularities in Sparse and Explicit Word Representations, 2014.

"""
if positive is None:
positive = []
if negative is None:
negative = []

self.init_sims()

if isinstance(positive, string_types) and not negative:
Expand Down
6 changes: 3 additions & 3 deletions gensim/models/lda_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ class Dispatcher(object):
There should never be more than one dispatcher running at any one time.
"""

def __init__(self, maxsize=MAX_JOBS_QUEUE, ns_conf={}):
def __init__(self, maxsize=MAX_JOBS_QUEUE, ns_conf=None):
"""
Note that the constructor does not fully initialize the dispatcher;
use the `initialize()` function to populate it with workers etc.
"""
self.maxsize = maxsize
self.callback = None # a pyro proxy to this object (unknown at init time, but will be set later)
self.ns_conf = ns_conf
self.callback = None # a pyro proxy to this object (unknown at init time, but will be set later)
self.ns_conf = ns_conf if ns_conf is not None else {}

@Pyro4.expose
def initialize(self, **model_params):
Expand Down
5 changes: 4 additions & 1 deletion gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf={}, minimum_phi_value=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None):
"""
If given, start training from the iterable `corpus` straight away. If not given,
Expand Down Expand Up @@ -316,6 +316,9 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
# set up distributed version
try:
import Pyro4
if ns_conf is None:
ns_conf = {}

with utils.getNS(**ns_conf) as ns:
from gensim.models.lda_dispatcher import LDA_DISPATCHER_PREFIX
self.dispatcher = Pyro4.Proxy(ns.list(prefix=LDA_DISPATCHER_PREFIX)[LDA_DISPATCHER_PREFIX])
Expand Down
4 changes: 2 additions & 2 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,7 +1231,7 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
self.syn0_lockf[self.wv.vocab[word].index] = lockf # lock-factor: 0.0 stops further changes
logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.wv.syn0.shape, fname))

def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, indexer=None):
def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None):
"""
Deprecated. Use self.wv.most_similar() instead.
Refer to the documentation for `gensim.models.KeyedVectors.most_similar`
Expand All @@ -1245,7 +1245,7 @@ def wmdistance(self, document1, document2):
"""
return self.wv.wmdistance(document1, document2)

def most_similar_cosmul(self, positive=[], negative=[], topn=10):
def most_similar_cosmul(self, positive=None, negative=None, topn=10):
"""
Deprecated. Use self.wv.most_similar_cosmul() instead.
Refer to the documentation for `gensim.models.KeyedVectors.most_similar_cosmul`
Expand Down
6 changes: 4 additions & 2 deletions gensim/summarization/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def add_node(self, node, attrs=None):
pass

@abstractmethod
def add_edge(self, edge, wt=1, label='', attrs=[]):
def add_edge(self, edge, wt=1, label='', attrs=None):
"""
Add an edge to the graph connecting two nodes.

Expand Down Expand Up @@ -172,7 +172,9 @@ def neighbors(self, node):
def has_node(self, node):
return node in self.node_neighbors

def add_edge(self, edge, wt=1, label='', attrs=[]):
def add_edge(self, edge, wt=1, label='', attrs=None):
if attrs is None:
attrs = []
u, v = edge
if v not in self.node_neighbors[u] and u not in self.node_neighbors[v]:
self.node_neighbors[u].append(v)
Expand Down
5 changes: 4 additions & 1 deletion gensim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,15 +1011,18 @@ def getNS(host=None, port=None, broadcast=True, hmac_key=None):
raise RuntimeError("Pyro name server not found")


def pyro_daemon(name, obj, random_suffix=False, ip=None, port=None, ns_conf={}):
def pyro_daemon(name, obj, random_suffix=False, ip=None, port=None, ns_conf=None):
"""
Register object with name server (starting the name server if not running
yet) and block until the daemon is terminated. The object is registered under
`name`, or `name`+ some random suffix if `random_suffix` is set.

"""
if ns_conf is None:
ns_conf = {}
if random_suffix:
name += '.' + hex(random.randint(0, 0xffffff))[2:]

import Pyro4
with getNS(**ns_conf) as ns:
with Pyro4.Daemon(ip or get_my_ip(), port or 0) as daemon:
Expand Down