Skip to content

Commit

Permalink
Improve code quality, add 3d plot.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jun Hu@lab committed Apr 18, 2020
1 parent ebd3603 commit ca2341b
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 91 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# NPTU AI Course
## K nearest neighbor
![alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Logo Title Text 1")
## K means
## Mean shift
## Random search
## Genetic Algorithm
19 changes: 13 additions & 6 deletions genetic_algorithm.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import numpy as np
import matplotlib.pyplot as plt
from util_3d import add_plot

is_3d = True
ax, gene_num = add_plot(is_3d)

chromosome_num = 10
gene_num = 2
population_shape = (chromosome_num, gene_num)

test_goal = np.random.rand(gene_num)

iteration = 0
iteration_num = 100

# Parameters
mutation_rate = 0.3
crossover_rate = 0.3
selection_ratio = 0.3
Expand Down Expand Up @@ -42,15 +47,17 @@
# We assume converged when arrive early_stop_fitness.
if best_fitness > early_stop_fitness:
break
plt.clf()
ax.clear()

# Plot
plt.scatter(population[:, 0], population[:, 1], s=50, alpha=0.5)
plt.scatter(*test_goal, s=200, marker="*", alpha=1.0)
plt.scatter(*best_goal, s=200, marker="+", alpha=1.0)
ax.scatter(*population.T, s=50, alpha=0.5)
ax.scatter(*test_goal, s=200, marker="*", alpha=1.0)
ax.scatter(*best_goal, s=200, marker="+", alpha=1.0)

plt.ylim(0, 1)
plt.xlim(0, 1)
if is_3d:
ax.set_zlim3d(0, 1)

plt.title("iteration %s, best_fitness: %.4f" % (iteration, best_fitness))
plt.pause(0.5)
Expand All @@ -67,7 +74,7 @@

# Crossover
for i, chromosome in enumerate(population):
if np.random.rand(1) <= mutation_rate:
if np.random.rand(1) <= crossover_rate:
# Prevent to crossover with self
parent_idx = np.random.randint(population.shape[0] - 1)
if parent_idx >= i:
Expand Down
34 changes: 20 additions & 14 deletions k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,40 @@

import numpy as np
import matplotlib.pyplot as plt
from util_3d import add_plot

is_3d = True
ax, point_dim = add_plot(is_3d)

n_cluster_points = 100
point_dim = 2
cluster_shape = (n_cluster_points, point_dim)
means_K = 4
early_stop_distant = 0.01

# Randomly generate clusters using Normal Distribution (randn)
rand_points1 = 0 + 2 * np.random.randn(*cluster_shape)
rand_points2 = 10 + 3 * np.random.randn(*cluster_shape)
rand_points3 = [20, 0] + 2 * np.random.randn(*cluster_shape)
rand_points4 = [30, 20] + 1.5 * np.random.randn(*cluster_shape)
if is_3d:
rand_points3 = [20, 0, 5] + 2 * np.random.randn(*cluster_shape)
rand_points4 = [30, 20, 10] + 1.5 * np.random.randn(*cluster_shape)
else:
rand_points3 = [20, 0] + 2 * np.random.randn(*cluster_shape)
rand_points4 = [30, 20] + 1.5 * np.random.randn(*cluster_shape)
points_num = n_cluster_points * means_K
all_points = np.concatenate((rand_points1, rand_points2, rand_points3, rand_points4))

# We random choice centroids from points.
rand_indexes = np.random.choice(all_points.shape[0], means_K, replace=False)
centroids = all_points[rand_indexes]

cluster_colors = ["red", "blue", "green", "purple"]

distant_arr = np.zeros((points_num, means_K))
iteration = 0
# Loop until converged.
while True:
points_per_cluster = [[] for _ in cluster_colors]
points_per_cluster = [[] for _ in range(means_K)]
# Calculate distance per point with each centroid.
for i, (cp, color) in enumerate(zip(centroids, cluster_colors)):
plt.scatter(*cp, color=color, marker='+', s=200)
for i, cp in enumerate(centroids):
ax.scatter(*cp, color="C%d" % i, marker='+', s=200)

for j, point in enumerate(all_points):
distant_arr[j, i] = np.linalg.norm(cp - point)
Expand All @@ -38,16 +44,16 @@
for point, clusters_distant in zip(all_points, distant_arr):
color_idx = np.argmin(clusters_distant)
points_per_cluster[color_idx].append(point)
plt.scatter(*point, color=cluster_colors[color_idx], s=50, alpha=0.1)
ax.scatter(*point, color="C%d" % color_idx, s=50, alpha=0.1)

centroids_distant = 0
new_centroids = []
# Calculate the mean of each cluster to got the new centroid of each cluster
for cluster, color, old_centroid in zip(points_per_cluster, cluster_colors, centroids):
new_centroid = np.average(cluster, axis=0)
for i, (cluster, old_centroid) in enumerate(zip(points_per_cluster, centroids)):
new_centroid = np.mean(cluster, axis=0)
# Record distance between new and old centroid in oder to determine convergence.
centroids_distant += np.linalg.norm(new_centroid - old_centroid)
plt.scatter(*new_centroid, color=color, s=200, marker="*")
ax.scatter(*new_centroid, color="C%d" % i, s=200, marker="*")
new_centroids.append(new_centroid)
centroids = new_centroids

Expand All @@ -57,8 +63,8 @@
iteration += 1

# We assume converged when centroid no more updated
if centroids_distant < 0.01:
if centroids_distant < early_stop_distant:
break
# Only clear figure on non-last figure
plt.clf()
ax.clear()
plt.pause(9999)
57 changes: 35 additions & 22 deletions k_nearest_neighbor.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,64 @@
import matplotlib.pyplot as plt
import numpy as np

from util_3d import add_plot

is_3d = False
ax, point_dim = add_plot(is_3d)

# Number of points of cluster
n_cluster_points = 20
point_dim = 2
cluster_shape = (n_cluster_points, point_dim)
cluster_colors = ["red", "blue", "green"]
color_per_point = []
points_color_idx = []

# Set number of neighbors and (x, y) of test point.
neighbors_K = 5
test_point = np.array([2, 5])
neighbors_K = 3

# Spawn points
rand_points1 = 0 + 2 * np.random.randn(*cluster_shape)
rand_points2 = 7 + 3 * np.random.randn(*cluster_shape)
rand_points3 = [3, 0] + 2 * np.random.randn(*cluster_shape)
points_num = n_cluster_points * 3
if is_3d:
test_point = np.array([2, 5, 2])
rand_points3 = [3, 0, 5] + 2 * np.random.randn(*cluster_shape)
else:
test_point = np.array([2, 5])
rand_points3 = [3, 0] + 2 * np.random.randn(*cluster_shape)

total_points_num = n_cluster_points * 3
points_list = [rand_points1, rand_points2, rand_points3]
distant_arr = np.zeros(points_num)
distant_arr = np.zeros(total_points_num)

for i, (cluster, color) in enumerate(zip(points_list, cluster_colors)):
for i, cluster in enumerate(points_list):
# Plot all points
plt.scatter(cluster[:, 0], cluster[:, 1], color=color, s=50, alpha=0.1)
ax.scatter(*cluster.T, color="C%d" % i, s=50, alpha=0.1)
# Create color for each point
color_per_point.append(np.full(n_cluster_points, i, dtype=int))
color_per_point = np.concatenate(color_per_point)
points_color_idx.append(np.full(n_cluster_points, i, dtype=int))

# Make list to concatenated np array alone axis 0
points_color_idx = np.concatenate(points_color_idx)
all_points = np.concatenate(points_list)

# Calculate distance between test point and each point
for i, ap in enumerate(all_points):
distant_arr[i] = np.linalg.norm(test_point - ap)

# Get neighbor points from sorted distance
min_idx = np.argsort(distant_arr)
neighbor_points = all_points[min_idx][:neighbors_K]
neighbor_colors = color_per_point[min_idx][:neighbors_K]
min_idx = np.argsort(distant_arr)[:neighbors_K]
neighbor_points = all_points[min_idx]
neighbor_colors_idx = points_color_idx[min_idx]

# Emphasize neighbor points
for p, color in zip(neighbor_points, neighbor_colors):
plt.scatter(*p, color=cluster_colors[color], s=50, alpha=0.5)
for p, color_idx in zip(neighbor_points, neighbor_colors_idx):
ax.scatter(*p, color="C%d" % color_idx, s=50, alpha=0.5)

# Get value of maximum count
u, c = np.unique(neighbor_colors, return_counts=True)
# Get value of unique item of maximum count
u, c = np.unique(neighbor_colors_idx, return_counts=True)
y = u[c == c.max()]
results = [cluster_colors[c] for c in y]
if len(y) == 1:
plt.scatter(*test_point, color=results[0], marker="*", s=200)
results = ["C%d" % c for c in y]

# Assert to only one predicted result of test point
if len(results) == 1:
ax.scatter(*test_point, color=results[0], marker="*", s=200)
else:
raise AssertionError("You got multiple predicted result: %s" % results)

Expand Down
77 changes: 37 additions & 40 deletions mean_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,27 @@

import numpy as np
import matplotlib.pyplot as plt
from util_3d import add_plot

is_3d = True
ax, point_dim = add_plot(is_3d)

n_cluster_points = 100
point_dim = 2
cluster_shape = (n_cluster_points, point_dim)

# Randomly generate clusters using Normal Distribution (randn)
rand_points = 0 + 2 * np.random.randn(*cluster_shape)
rand_points1 = 0 + 2 * np.random.randn(*cluster_shape)
rand_points2 = 10 + 3 * np.random.randn(*cluster_shape)
rand_points3 = [20, 0] + 2 * np.random.randn(*cluster_shape)
rand_points4 = [20, 20] + 1.5 * np.random.randn(*cluster_shape)

all_points = np.concatenate((rand_points, rand_points2, rand_points3, rand_points4), axis=0)
if is_3d:
rand_points3 = [20, 0, 5] + 2 * np.random.randn(*cluster_shape)
rand_points4 = [20, 20, 10] + 1.5 * np.random.randn(*cluster_shape)
else:
rand_points3 = [20, 0] + 2 * np.random.randn(*cluster_shape)
rand_points4 = [20, 20] + 1.5 * np.random.randn(*cluster_shape)

# We only need random point rather than cluster so using uniform distribution.
# centroid = 10 * np.random.rand(point_dim) - 5
cluster_colors = ["red", "blue", "green", "purple"]
all_points = (rand_points1, rand_points2, rand_points3, rand_points4)
points_num = n_cluster_points * len(all_points)
all_points = np.concatenate(all_points, axis=0)


def neighborhood_points(xs, x_centroid, dist=3):
Expand All @@ -36,9 +41,10 @@ def neighborhood_points(xs, x_centroid, dist=3):
# Make all points as centroids
centroid_arr = np.copy(all_points)

# Iterate until converged
while True:
plt.scatter(all_points[:, 0], all_points[:, 1], color="blue", s=50, alpha=0.1)
plt.scatter(centroid_arr[:, 0], centroid_arr[:, 1], color="red", s=50, alpha=0.1)
ax.scatter(*all_points.T, color="blue", s=50, alpha=0.1)
ax.scatter(*centroid_arr.T, color="red", s=50, alpha=0.1)

distant_list = []
for i, rp in enumerate(all_points):
Expand All @@ -57,43 +63,34 @@ def neighborhood_points(xs, x_centroid, dist=3):
plt.title("iteration %s, mean_distance=%.4f" % (iteration, mean_distance))
plt.pause(0.5)
plt.draw()
plt.clf()
ax.clear()
# We assume converged when centroid no more updated that same as k-means.
if mean_distance < 0.0001:
break
iteration += 1


def direct_calculate():
sorted_all = all_points[ind]
splited_cluster = np.split(sorted_all, cluster_idx.ravel())

for i, (cluster, centroids) in enumerate(zip(splited_cluster, splited_centroid)):
new_centroid = np.mean(centroids, axis=0)
plt.scatter(*new_centroid, color="C%d" % i, marker="*", s=200, alpha=1.0)
plt.scatter(cluster[:, 0], cluster[:, 1], color="C%d" % i, s=50, alpha=0.1)


def k_means(centroids, points_num):
distant_arr = np.zeros(points_num, len(centroids))
# Calculate distance per point with each centroid.
for i, (cp, color) in enumerate(centroids):
plt.scatter(*cp, color="C%d" % i, marker='+', s=200)

for j, point in enumerate(all_points):
distant_arr[j, i] = np.linalg.norm(cp - point)

# Get minimal distance between each centroid and each point.
for point, clusters_distant in zip(all_points, distant_arr):
color_idx = np.argmin(clusters_distant)
plt.scatter(*point, color="C%d" % color_idx, s=50, alpha=0.1)


# Sort all centroid(points) alone x then y value
ind = np.lexsort((centroid_arr[:, 1], centroid_arr[:, 0]))
sorted_centroids = centroid_arr[ind]

# If distance between points greater than threshold we split sorted centroids from those position to make cluster
centroid_diff = np.linalg.norm(np.diff(sorted_centroids, axis=0), axis=1)
cluster_idx = np.argwhere(centroid_diff > 1)
splited_centroid = np.split(sorted_centroids, cluster_idx.ravel())
split_idx = np.argwhere(centroid_diff > 1).ravel()
clustered_centroid = np.split(sorted_centroids, split_idx)

# Combine with k-means algorithm
distant_arr = np.zeros((points_num, len(split_idx) + 1))

for i, centroids in enumerate(clustered_centroid):
new_centroid = np.mean(centroids, axis=0)
ax.scatter(*new_centroid, color="C%d" % i, marker="*", s=200, alpha=1.0)
for j, point in enumerate(all_points):
distant_arr[j, i] = np.linalg.norm(new_centroid - point)

# Get minimal distance between each centroid and each point, and choose the centroid point.
for point, clusters_distant in zip(all_points, distant_arr):
color_idx = np.argmin(clusters_distant)
ax.scatter(*point, color="C%d" % color_idx, s=50, alpha=0.1)


plt.title("Clustering result: %s cluster" % (i+1))
Expand Down
20 changes: 11 additions & 9 deletions random_search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import numpy as np
import matplotlib.pyplot as plt
from util_3d import add_plot

is_3d = True
ax, particle_dim = add_plot(is_3d)

n_particles = 10
particle_dim = 2
particles_shape = (n_particles, particle_dim)

test_goal = np.random.rand(particle_dim)
Expand All @@ -22,30 +25,29 @@
sigma = 1/(4*iteration)
rand_particles = best_goal + sigma * np.random.randn(*particles_shape)

plt.scatter(rand_particles[:, 0], rand_particles[:, 1], s=50, alpha=0.5)
plt.scatter(*test_goal, s=200, marker="*", alpha=1.0)
ax.scatter(*rand_particles.T, s=50, alpha=0.5)
ax.scatter(*test_goal, s=200, marker="*", alpha=1.0)

for p in rand_particles:
distance = np.linalg.norm(p - test_goal)
distant_list.append(distance)
min_idx = np.argmin(distant_list)
min_particle = rand_particles[min_idx]
if best_distant is None:
best_distant = distant_list[min_idx]
best_goal = min_particle

if distant_list[min_idx] < best_distant:
if best_distant is None or distant_list[min_idx] < best_distant:
best_distant = distant_list[min_idx]
best_goal = min_particle
plt.scatter(*best_goal, s=200, marker="+", alpha=1.0)
ax.scatter(*best_goal, s=200, marker="+", alpha=1.0)

plt.ylim(-1, 1)
plt.xlim(-1, 1)
if is_3d:
ax.set_zlim3d(-1, 1)

plt.title("iteration %s, Error: %.4f" % (iteration, best_distant))
plt.pause(0.5)
plt.draw()
plt.clf()
ax.clear()

# We assume converged when centroid no more updated that same as k-means.
# if mean_distance < 0.0001:
Expand Down
Loading

0 comments on commit ca2341b

Please sign in to comment.