Skip to content

Commit

Permalink
Merge branch 'main' into tabu-search
Browse files Browse the repository at this point in the history
  • Loading branch information
matt035343 authored Jun 13, 2024
2 parents 85e769a + 8c770a9 commit 6faa7d1
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 43 deletions.
83 changes: 44 additions & 39 deletions anti_clustering/exchange_heuristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
Psychological Methods, 26(2), 161–174. https://doi.org/10.1037/met0000301
"""

import numpy as np
import numpy.typing as npt
from anti_clustering._cluster_swap_heuristic import ClusterSwapHeuristic

Expand All @@ -29,8 +28,9 @@ class ExchangeHeuristicAntiClustering(ClusterSwapHeuristic):
The exchange heuristic to solving the anti-clustering problem.
"""

def __init__(self, verbose: bool = False, random_seed: int = None):
def __init__(self, verbose: bool = False, random_seed: int = None, restarts: int = 9):
super().__init__(verbose=verbose, random_seed=random_seed)
self.restarts = restarts

def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]:
# Starts with random cluster assignment
Expand All @@ -39,40 +39,45 @@ def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.ND
if self.verbose:
print("Solving")

# Initial objective value
current_objective = self._calculate_objective(cluster_assignment, distance_matrix)
for i in range(len(distance_matrix)):
if self.verbose and i % 5 == 0:
print(f"Iteration {i + 1} of {len(distance_matrix)}")

# Get list of possible swaps
exchange_indices = self._get_exchanges(cluster_assignment, i)

if len(exchange_indices) == 0:
continue

# Calculate objective value for all possible swaps.
# List contains tuples of obj. val. and swapped element index.
exchanges = [
(self._calculate_objective(self._swap(cluster_assignment, i, j), distance_matrix), j)
for j in exchange_indices
]

# Find best swap
best_exchange = max(exchanges)

# If best swap is better than current objective value then complete swap
if best_exchange[0] > current_objective:
cluster_assignment = self._swap(cluster_assignment, i, best_exchange[1])
current_objective = best_exchange[0]

return cluster_assignment

def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_matrix: npt.NDArray[float]) -> float:
"""
Calculate objective value
:param cluster_assignment: Cluster assignment matrix
:param distance_matrix: Cost matrix
:return: Objective value
"""
return np.multiply(cluster_assignment, distance_matrix).sum()
candidate_solutions = []

for restart in range(self.restarts):
# Initial objective value
current_objective = self._calculate_objective(cluster_assignment, distance_matrix)
for i in range(len(distance_matrix)):
if self.verbose and i % 5 == 0:
print(f"Iteration {i + 1} of {len(distance_matrix)}")

# Get list of possible swaps
exchange_indices = self._get_exchanges(cluster_assignment, i)

if len(exchange_indices) == 0:
continue

# Calculate objective value for all possible swaps.
# List contains tuples of obj. val. and swapped element index.
exchanges = [
(self._calculate_objective(self._swap(cluster_assignment, i, j), distance_matrix), j)
for j in exchange_indices
]

# Find best swap
best_exchange = max(exchanges)

# If best swap is better than current objective value then complete swap
if best_exchange[0] > current_objective:
cluster_assignment = self._swap(cluster_assignment, i, best_exchange[1])
current_objective = best_exchange[0]

candidate_solutions.append((current_objective, cluster_assignment))

if self.verbose:
print(f"Restart {restart + 1} of {self.restarts}")

# Cold restart, select random cluster assignment
cluster_assignment = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))

# Select best solution, maximizing objective
_, best_cluster_assignment = max(candidate_solutions, key=lambda x: x[0])

return best_cluster_assignment
16 changes: 14 additions & 2 deletions anti_clustering/naive_random_heuristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,20 @@ class NaiveRandomHeuristicAntiClustering(ClusterSwapHeuristic):
The naive randomized way of solving the anti-clustering problem.
"""

def __init__(self, verbose: bool = False, random_seed: int = None):
def __init__(self, verbose: bool = False, random_seed: int = None, iterations: int = 1000):
super().__init__(verbose=verbose, random_seed=random_seed)
self.iterations = iterations

def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]:
return self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
best_candidate = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
best_objective = self._calculate_objective(best_candidate, distance_matrix)

for _ in range(self.iterations):
candidate = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
objective = self._calculate_objective(candidate, distance_matrix)

if objective > best_objective:
best_candidate = candidate
best_objective = objective

return best_candidate
4 changes: 2 additions & 2 deletions examples/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@

methods: List[AntiClustering] = [
TabuSearchHeuristicAntiClustering(iterations=5000, restarts=10, tabu_tenure=50),
ExchangeHeuristicAntiClustering(),
SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=15),
ExchangeHeuristicAntiClustering(restarts=20),
SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=20),
NaiveRandomHeuristicAntiClustering(),
# ExactClusterEditingAntiClustering(), # This method is extremely slow for large datasets
]
Expand Down

0 comments on commit 6faa7d1

Please sign in to comment.