Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Fixed usage of centroid, median, and ward linkage in 'biosppy.cluster…
Browse files Browse the repository at this point in the history
…ing.hierarchical' (closes #9).
  • Loading branch information
capcarr committed Sep 16, 2016
1 parent 8e962d5 commit 8c991b2
Showing 1 changed file with 34 additions and 14 deletions.
48 changes: 34 additions & 14 deletions biosppy/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def hierarchical(data=None,
Linkage criterion; one of 'average', 'centroid', 'complete', 'median',
'single', 'ward', or 'weighted'.
metric : str, optional
Distance metric (see scipy.spatial.distance).
Distance metric (see 'biosppy.metrics').
metric_args : dict, optional
Additional keyword arguments to pass to the distance function.
Expand All @@ -112,6 +112,18 @@ def hierarchical(data=None,
cluster; outliers have key -1; clusters are assigned integer keys
starting at 0.
Raises
------
TypeError
If 'metric' is not a string.
ValueError
When the 'linkage' is unknown.
ValueError
When 'metric' is not 'euclidean' when using 'centroid', 'median',
or 'ward' linkage.
ValueError
When 'k' is larger than the number of data samples.
"""

# check inputs
Expand All @@ -122,22 +134,31 @@ def hierarchical(data=None,
'ward', 'weighted']:
raise ValueError("Unknown linkage criterion '%r'." % linkage)

if metric_args is None:
metric_args = {}
if not isinstance(metric, basestring):
raise TypeError("Please specify the distance metric as a string.")

N = len(data)
if k > N:
raise ValueError("Number of clusters 'k' is higher than the number \
of input samples.")
raise ValueError("Number of clusters 'k' is higher than the number" \
" of input samples.")

if k < 0:
k = 0
if metric_args is None:
metric_args = {}

# compute distances
D = metrics.pdist(data, metric=metric, **metric_args)
if linkage in ['centroid', 'median', 'ward']:
if metric != 'euclidean':
raise TypeError("Linkage '{}' requires the distance metric to be" \
" 'euclidean'.".format(linkage))
Z = sch.linkage(data, method=linkage)
else:
# compute distances
D = metrics.pdist(data, metric=metric, **metric_args)

# build linkage
Z = sch.linkage(D, method=linkage)
# build linkage
Z = sch.linkage(D, method=linkage)

if k < 0:
k = 0

# extract clusters
if k == 0:
Expand Down Expand Up @@ -423,7 +444,7 @@ def coassoc_partition(coassoc=None, k=0, linkage='average'):
Number of clusters to extract; if 0 uses the life-time criterion.
linkage : str, optional
Linkage criterion for final partition extraction; one of 'average',
'centroid', 'complete', 'median', 'single', 'ward', or 'weighted'.
'complete', 'single', or 'weighted'.
Returns
-------
Expand All @@ -438,8 +459,7 @@ def coassoc_partition(coassoc=None, k=0, linkage='average'):
if coassoc is None:
raise TypeError("Please specify the input co-association matrix.")

if linkage not in ['average', 'centroid', 'complete', 'median', 'single',
'ward', 'weighted']:
if linkage not in ['average', 'complete', 'single', 'weighted']:
raise ValueError("Unknown linkage criterion '%r'." % linkage)

N = len(coassoc)
Expand Down

0 comments on commit 8c991b2

Please sign in to comment.