-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix deprecated parameters in D2VTransformer
and W2VTransformer
. Fix #1937
#1945
Changes from 20 commits
2ee6ab8
2e0afad
6301be6
77307c3
5b9d2ff
d2733ff
92fc07b
66c6bf1
21934e1
428d428
0a0e1d6
c79bdfb
da76d3c
bbdb0d4
31d3729
bbcd0e5
734d5c5
7cfbfe1
be108c1
e4f7ad7
02fdf70
477b381
36993ef
055f043
74d1c59
9b8aa91
7bc192d
c6d73f6
f06c653
3dad345
3148a17
9ac44f9
e90fcf6
9686738
33961ed
a5fb143
d1c2d5a
7246e37
b254009
7505229
e603649
61dfef0
52d2945
09b5dc7
341a91f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -411,25 +411,21 @@ def score_cbow_pair(model, word, l1): | |
|
||
class Word2Vec(BaseWordEmbeddingsModel): | ||
"""Class for training, using and evaluating neural networks described in https://code.google.com/p/word2vec/ | ||
|
||
If you're finished training a model (=no more updates, only querying) | ||
then switch to the :mod:`gensim.models.KeyedVectors` instance in wv | ||
|
||
The model can be stored/loaded via its :meth:`~gensim.models.word2vec.Word2Vec.save()` and | ||
:meth:`~gensim.models.word2vec.Word2Vec.load()` methods, or stored/loaded in a format | ||
compatible with the original word2vec implementation via `wv.save_word2vec_format()` | ||
and `Word2VecKeyedVectors.load_word2vec_format()`. | ||
|
||
""" | ||
|
||
def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, | ||
def __init__(self, sentences=None, size=None, vector_size=100, alpha=0.025, window=5, min_count=5, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This changes non-relevant too, please remove it. |
||
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, | ||
sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, | ||
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=()): | ||
sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=None, epochs=5, null_word=0, | ||
trim_rule=None, sorted_vocab=1, compute_loss=False, callbacks=()): | ||
""" | ||
Initialize the model from an iterable of `sentences`. Each sentence is a | ||
list of words (unicode strings) that will be used for training. | ||
|
||
Parameters | ||
---------- | ||
sentences : iterable of iterables | ||
|
@@ -439,7 +435,6 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, | |
or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. | ||
If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it | ||
in some other way. | ||
|
||
sg : int {1, 0} | ||
Defines the training algorithm. If 1, skip-gram is employed; otherwise, CBOW is used. | ||
size : int | ||
|
@@ -498,30 +493,38 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, | |
If True, computes and stores loss value which can be retrieved using `model.get_latest_training_loss()`. | ||
callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec` | ||
List of callbacks that need to be executed/run at specific stages during training. | ||
|
||
Examples | ||
-------- | ||
Initialize and train a `Word2Vec` model | ||
|
||
>>> from gensim.models import Word2Vec | ||
>>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]] | ||
>>> | ||
>>> model = Word2Vec(sentences, min_count=1) | ||
>>> say_vector = model['say'] # get vector for word | ||
|
||
""" | ||
|
||
|
||
|
||
if iter is not None: | ||
warnings.warn("The parameter `iter` is deprecated, will be removed in 4.0.0, use `epochs` instead.") | ||
epochs = iter | ||
|
||
if size is not None: | ||
warnings.warn("The parameter `iter` is deprecated, will be removed in 4.0.0, use `epochs` instead.") | ||
vector_size = size | ||
|
||
self.vector_size = vector_size | ||
self.epochs = epochs | ||
self.callbacks = callbacks | ||
self.load = call_on_class_only | ||
|
||
self.wv = Word2VecKeyedVectors(size) | ||
self.wv = Word2VecKeyedVectors(vector_size) | ||
self.vocabulary = Word2VecVocab( | ||
max_vocab_size=max_vocab_size, min_count=min_count, sample=sample, | ||
sorted_vocab=bool(sorted_vocab), null_word=null_word) | ||
self.trainables = Word2VecTrainables(seed=seed, vector_size=size, hashfxn=hashfxn) | ||
self.trainables = Word2VecTrainables(seed=seed, vector_size=vector_size, hashfxn=hashfxn) | ||
|
||
super(Word2Vec, self).__init__( | ||
sentences=sentences, workers=workers, vector_size=size, epochs=iter, callbacks=callbacks, | ||
sentences=sentences, workers=workers, vector_size=vector_size, epochs=epochs, callbacks=callbacks, | ||
batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window, seed=seed, | ||
hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss, | ||
fast_version=FAST_VERSION) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
Follows scikit-learn API conventions | ||
""" | ||
|
||
import warnings | ||
import numpy as np | ||
import six | ||
from sklearn.base import TransformerMixin, BaseEstimator | ||
|
@@ -23,14 +24,22 @@ class W2VTransformer(TransformerMixin, BaseEstimator): | |
Base Word2Vec module | ||
""" | ||
|
||
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=1e-3, seed=1, | ||
workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, | ||
def __init__(self, size=None, vector_size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=1e-3, seed=1, | ||
workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=None, epochs=5, null_word=0, | ||
trim_rule=None, sorted_vocab=1, batch_words=10000): | ||
""" | ||
Sklearn wrapper for Word2Vec model. See gensim.models.Word2Vec for parameter details. | ||
""" | ||
if iter is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense here to rename the arguments here? You can simply pass correct variants to "original" class and this is enough to avoid deprecation problems. CC: @manneshiva |
||
warnings.warn("The parameter `iter` is deprecated, will be removed in 4.0.0, use `epochs` instead.") | ||
epochs = iter | ||
|
||
if size is not None: | ||
warnings.warn("The parameter `size` is deprecated, will be removed in 4.0.0, use `vector_size` instead.") | ||
vector_size = size | ||
|
||
self.gensim_model = None | ||
self.size = size | ||
self.vector_size = vector_size | ||
self.alpha = alpha | ||
self.window = window | ||
self.min_count = min_count | ||
|
@@ -44,23 +53,22 @@ def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size= | |
self.negative = negative | ||
self.cbow_mean = int(cbow_mean) | ||
self.hashfxn = hashfxn | ||
self.iter = iter | ||
self.epochs = epochs | ||
self.null_word = null_word | ||
self.trim_rule = trim_rule | ||
self.sorted_vocab = sorted_vocab | ||
self.batch_words = batch_words | ||
|
||
def fit(self, X, y=None): | ||
def fit(self, X, y=None): | ||
""" | ||
Fit the model according to the given training data. | ||
Calls gensim.models.Word2Vec | ||
""" | ||
self.gensim_model = models.Word2Vec( | ||
sentences=X, size=self.size, alpha=self.alpha, | ||
sentences=X, vector_size=self.vector_size, alpha=self.alpha, | ||
window=self.window, min_count=self.min_count, max_vocab_size=self.max_vocab_size, | ||
sample=self.sample, seed=self.seed, workers=self.workers, min_alpha=self.min_alpha, | ||
sg=self.sg, hs=self.hs, negative=self.negative, cbow_mean=self.cbow_mean, | ||
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word, trim_rule=self.trim_rule, | ||
hashfxn=self.hashfxn, epochs=self.epochs, null_word=self.null_word, trim_rule=self.trim_rule, | ||
sorted_vocab=self.sorted_vocab, batch_words=self.batch_words | ||
) | ||
return self | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This file is not relevant to PR, please remove it from PR