Skip to content

Commit

Permalink
Fix docstrings for gensim.models.normmodel (#1805)
Browse files Browse the repository at this point in the history
* First edits

* changed bow

* Added examples

* Final commit of the night

* Still struggling with docs

* Removed examples but still struggling with documentation

* fix docstring

* fix docstring[2]
  • Loading branch information
AustenLamacraft authored and menshikh-iv committed Jan 11, 2018
1 parent 0a4419f commit fad00c6
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 23 deletions.
1 change: 1 addition & 0 deletions docs/src/models/normmodel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
:inherited-members:
:undoc-members:
:show-inheritance:
:special-members: __getitem__
70 changes: 47 additions & 23 deletions gensim/models/normmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,27 @@


class NormModel(interfaces.TransformationABC):
"""
Objects of this class realize the explicit normalization of
vectors. Supported norms are l1' and 'l2' with 'l2' being
default.
"""Objects of this class realize the explicit normalization of vectors (l1 and l2)."""

The main methods are:
def __init__(self, corpus=None, norm='l2'):
"""Compute the l1 or l2 normalization by normalizing separately for each document in a corpus.
1. Constructor which normalizes the terms in the given corpus document-wise.
2. The normalize() method which normalizes a simple count representation.
3. The [] transformation which internally calls the self.normalize() method.
If :math:`v_{i,j}` is the 'i'th component of the vector representing document 'j', the l1 normalization is
>>> norm_l2 = NormModel(corpus)
>>> print(norm_l2[some_doc])
>>> norm_l2.save('/tmp/foo.tfidf_model')
.. math:: l1_{i, j} = \\frac{v_{i,j}}{\sum_k |v_{k,j}|}
Model persistency is achieved via its load/save methods
"""
the l2 normalization is
def __init__(self, corpus=None, norm='l2'):
"""
Compute the 'l1' or 'l2' normalization by normalizing separately
for each doc in a corpus.
Formula for 'l1' norm for term 'i' in document 'j' in a corpus of 'D' documents is::
.. math:: l2_{i, j} = \\frac{v_{i,j}}{\sqrt{\sum_k v_{k,j}^2}}
norml1_{i, j} = (i / sum(absolute(values in j)))
Formula for 'l2' norm for term 'i' in document 'j' in a corpus of 'D' documents is::
Parameters
----------
corpus : iterable of iterable of (int, number), optional
Input corpus.
norm : {'l1', 'l2'}, optional
Norm used to normalize.
norml2_{i, j} = (i / sqrt(sum(square(values in j))))
"""
self.norm = norm
if corpus is not None:
Expand All @@ -52,8 +44,13 @@ def __str__(self):
return "NormModel(num_docs=%s, num_nnz=%s, norm=%s)" % (self.num_docs, self.num_nnz, self.norm)

def calc_norm(self, corpus):
"""
Calculates the norm by calling matutils.unitvec with the norm parameter.
"""Calculate the norm by calling :func:`~gensim.matutils.unitvec` with the norm parameter.
Parameters
----------
corpus : iterable of iterable of (int, number)
Input corpus.
"""
logger.info("Performing %s normalization...", self.norm)
norms = []
Expand All @@ -68,8 +65,35 @@ def calc_norm(self, corpus):
self.norms = norms

def normalize(self, bow):
"""Normalize a simple count representation.
Parameters
----------
bow : list of (int, number)
Document in BoW format.
Returns
-------
list of (int, number)
Normalized document.
"""
vector = matutils.unitvec(bow, self.norm)
return vector

def __getitem__(self, bow):
"""Call the :func:`~gensim.models.normmodel.NormModel.normalize`.
Parameters
----------
bow : list of (int, number)
Document in BoW format.
Returns
-------
list of (int, number)
Normalized document.
"""
return self.normalize(bow)

0 comments on commit fad00c6

Please sign in to comment.