diff --git a/gensim/similarities/termsim.py b/gensim/similarities/termsim.py index 167b73b241..bc7bd836f0 100644 --- a/gensim/similarities/termsim.py +++ b/gensim/similarities/termsim.py @@ -168,7 +168,7 @@ class SparseTermSimilarityMatrix(SaveLoad): strict column diagonal dominance. Positive definiteness is a necessary precondition if you later wish to derive a change-of-basis matrix from the term similarity matrix using Cholesky factorization. - nonzero_limit : {int, None}, optional + nonzero_limit : int or None, optional The maximum number of non-zero elements outside the diagonal in a single column of the sparse term similarity matrix. If None, then no limit will be imposed. dtype : numpy.dtype, optional @@ -242,11 +242,11 @@ def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, positive key=lambda x: (lambda term_index, _: (tfidf.idfs[term_index], -term_index))(*x), reverse=True) for row_number, (t2_index, similarity) in zip(range(num_rows), rows): - if positive_definite and column_sum[t1_index] + similarity >= 1.0: + if positive_definite and column_sum[t1_index] + abs(similarity) >= 1.0: break if symmetric: if column_nonzero[t2_index] <= nonzero_limit \ - and (not positive_definite or column_sum[t2_index] + similarity < 1.0) \ + and (not positive_definite or column_sum[t2_index] + abs(similarity) < 1.0) \ and not (t1_index, t2_index) in matrix: matrix[t1_index, t2_index] = similarity column_nonzero[t1_index] += 1 diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 00f916f869..428431ea15 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -800,21 +800,22 @@ def test_symmetric(self): def test_positive_definite(self): """Test the positive_definite parameter of the matrix constructor.""" + negative_index = UniformTermSimilarityIndex(self.dictionary, term_similarity=-0.5) matrix = SparseTermSimilarityMatrix( - self.index, self.dictionary, nonzero_limit=2).matrix.todense() + negative_index, self.dictionary, nonzero_limit=2).matrix.todense() expected_matrix = numpy.array([ - [1.0, 0.5, 0.5, 0.0, 0.0], - [0.5, 1.0, 0.0, 0.5, 0.0], - [0.5, 0.0, 1.0, 0.0, 0.0], - [0.0, 0.5, 0.0, 1.0, 0.0], + [1.0, -.5, -.5, 0.0, 0.0], + [-.5, 1.0, 0.0, -.5, 0.0], + [-.5, 0.0, 1.0, 0.0, 0.0], + [0.0, -.5, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0]]) self.assertTrue(numpy.all(expected_matrix == matrix)) matrix = SparseTermSimilarityMatrix( - self.index, self.dictionary, nonzero_limit=2, positive_definite=True).matrix.todense() + negative_index, self.dictionary, nonzero_limit=2, positive_definite=True).matrix.todense() expected_matrix = numpy.array([ - [1.0, 0.5, 0.0, 0.0, 0.0], - [0.5, 1.0, 0.0, 0.0, 0.0], + [1.0, -.5, 0.0, 0.0, 0.0], + [-.5, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0]])