Merge branch 'main' into tabu-search

SneaksAndData · Jun 13, 2024 · 6faa7d1 · 6faa7d1
2 parents 85e769a + 8c770a9
commit 6faa7d1
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 43 deletions.
diff --git a/anti_clustering/exchange_heuristic.py b/anti_clustering/exchange_heuristic.py
@@ -19,7 +19,6 @@
 Psychological Methods, 26(2), 161–174. https://doi.org/10.1037/met0000301
 """
 
-import numpy as np
 import numpy.typing as npt
 from anti_clustering._cluster_swap_heuristic import ClusterSwapHeuristic
 
@@ -29,8 +28,9 @@ class ExchangeHeuristicAntiClustering(ClusterSwapHeuristic):
     The exchange heuristic to solving the anti-clustering problem.
     """
 
-    def __init__(self, verbose: bool = False, random_seed: int = None):
+    def __init__(self, verbose: bool = False, random_seed: int = None, restarts: int = 9):
         super().__init__(verbose=verbose, random_seed=random_seed)
+        self.restarts = restarts
 
     def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]:
         # Starts with random cluster assignment
@@ -39,40 +39,45 @@ def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.ND
         if self.verbose:
             print("Solving")
 
-        # Initial objective value
-        current_objective = self._calculate_objective(cluster_assignment, distance_matrix)
-        for i in range(len(distance_matrix)):
-            if self.verbose and i % 5 == 0:
-                print(f"Iteration {i + 1} of {len(distance_matrix)}")
-
-            # Get list of possible swaps
-            exchange_indices = self._get_exchanges(cluster_assignment, i)
-
-            if len(exchange_indices) == 0:
-                continue
-
-            # Calculate objective value for all possible swaps.
-            # List contains tuples of obj. val. and swapped element index.
-            exchanges = [
-                (self._calculate_objective(self._swap(cluster_assignment, i, j), distance_matrix), j)
-                for j in exchange_indices
-            ]
-
-            # Find best swap
-            best_exchange = max(exchanges)
-
-            # If best swap is better than current objective value then complete swap
-            if best_exchange[0] > current_objective:
-                cluster_assignment = self._swap(cluster_assignment, i, best_exchange[1])
-                current_objective = best_exchange[0]
-
-        return cluster_assignment
-
-    def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_matrix: npt.NDArray[float]) -> float:
-        """
-        Calculate objective value
-        :param cluster_assignment: Cluster assignment matrix
-        :param distance_matrix: Cost matrix
-        :return: Objective value
-        """
-        return np.multiply(cluster_assignment, distance_matrix).sum()
+        candidate_solutions = []
+
+        for restart in range(self.restarts):
+            # Initial objective value
+            current_objective = self._calculate_objective(cluster_assignment, distance_matrix)
+            for i in range(len(distance_matrix)):
+                if self.verbose and i % 5 == 0:
+                    print(f"Iteration {i + 1} of {len(distance_matrix)}")
+
+                # Get list of possible swaps
+                exchange_indices = self._get_exchanges(cluster_assignment, i)
+
+                if len(exchange_indices) == 0:
+                    continue
+
+                # Calculate objective value for all possible swaps.
+                # List contains tuples of obj. val. and swapped element index.
+                exchanges = [
+                    (self._calculate_objective(self._swap(cluster_assignment, i, j), distance_matrix), j)
+                    for j in exchange_indices
+                ]
+
+                # Find best swap
+                best_exchange = max(exchanges)
+
+                # If best swap is better than current objective value then complete swap
+                if best_exchange[0] > current_objective:
+                    cluster_assignment = self._swap(cluster_assignment, i, best_exchange[1])
+                    current_objective = best_exchange[0]
+
+            candidate_solutions.append((current_objective, cluster_assignment))
+
+            if self.verbose:
+                print(f"Restart {restart + 1} of {self.restarts}")
+
+            # Cold restart, select random cluster assignment
+            cluster_assignment = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
+
+        # Select best solution, maximizing objective
+        _, best_cluster_assignment = max(candidate_solutions, key=lambda x: x[0])
+
+        return best_cluster_assignment
diff --git a/anti_clustering/naive_random_heuristic.py b/anti_clustering/naive_random_heuristic.py
@@ -24,8 +24,20 @@ class NaiveRandomHeuristicAntiClustering(ClusterSwapHeuristic):
     The naive randomized way of solving the anti-clustering problem.
     """
 
-    def __init__(self, verbose: bool = False, random_seed: int = None):
+    def __init__(self, verbose: bool = False, random_seed: int = None, iterations: int = 1000):
         super().__init__(verbose=verbose, random_seed=random_seed)
+        self.iterations = iterations
 
     def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]:
-        return self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
+        best_candidate = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
+        best_objective = self._calculate_objective(best_candidate, distance_matrix)
+
+        for _ in range(self.iterations):
+            candidate = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))
+            objective = self._calculate_objective(candidate, distance_matrix)
+
+            if objective > best_objective:
+                best_candidate = candidate
+                best_objective = objective
+
+        return best_candidate
diff --git a/examples/evaluation.py b/examples/evaluation.py
@@ -38,8 +38,8 @@
 
 methods: List[AntiClustering] = [
     TabuSearchHeuristicAntiClustering(iterations=5000, restarts=10, tabu_tenure=50),
-    ExchangeHeuristicAntiClustering(),
-    SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=15),
+    ExchangeHeuristicAntiClustering(restarts=20),
+    SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=20),
     NaiveRandomHeuristicAntiClustering(),
     # ExactClusterEditingAntiClustering(), # This method is extremely slow for large datasets
 ]