This example demonstrates the K-means clustering algorithm through real-time animation, visualizing how the algorithm iteratively converges to optimal cluster assignments by alternating between assignment and centroid update steps.
#include <chrono>
#include <cmath>
#include <iostream>
#include <limits>
#include <random>
#include <string>
#include <thread>
#include <utility>
#include <vector>
};
};
return std::sqrt((p.x - c.x) * (p.x - c.x) + (p.y - c.y) * (p.y - c.y));
}
std::cout << "Starting K-means clustering animation..." << '\n';
const int numPoints = 200;
const int k = 4;
const int maxIterations = 20;
std::vector<std::string> clusterColors = {"red", "blue", "green", "orange"};
std::random_device rd;
std::mt19937 gen(rd());
std::vector<Point> points;
points.reserve(numPoints);
std::vector<std::pair<double, double>> clusterCenters = {
{2.0, 2.0}, {-2.0, 2.0}, {-2.0, -2.0}, {2.0, -2.0}};
for (int cluster = 0; cluster < 4; cluster++) {
std::normal_distribution<double> xDist(clusterCenters[cluster].first, 0.8);
std::normal_distribution<double> yDist(clusterCenters[cluster].second, 0.8);
for (int i = 0; i < numPoints / 4; i++) {
points.push_back({xDist(gen), yDist(gen),
-1,
"gray"});
}
}
std::uniform_real_distribution<double> coordDist(-4.0, 4.0);
std::vector<Centroid> centroids(k);
for (int i = 0; i < k; i++) {
centroids[i] = {
.x = coordDist(gen), .y = coordDist(gen), .color = clusterColors[i]};
}
std::vector<double> xCoords, yCoords;
std::vector<std::string> pointColors;
for (const auto &p : points) {
xCoords.push_back(p.x);
yCoords.push_back(p.y);
pointColors.emplace_back("gray");
}
std::vector<double> centroidX, centroidY;
std::vector<std::string> centroidColors;
for (const auto &c : centroids) {
centroidX.push_back(c.x);
centroidY.push_back(c.y);
centroidColors.push_back(c.color);
}
{"type", "scatter"},
{"mode", "markers"},
{"x", xCoords},
{"y", yCoords},
{"marker", {{"color", pointColors}, {"size", 8}, {"opacity", 0.7}}},
{"name", "Data Points"},
{"hovertemplate", "Point (%{x:.2f}, %{y:.2f})<extra></extra>"}};
{"type", "scatter"},
{"mode", "markers"},
{"x", centroidX},
{"y", centroidY},
{"marker",
{{"color", centroidColors},
{"size", 20},
{"symbol", "x"},
{"line", {{"width", 3}, {"color", "black"}}}},
{"name", "Centroids"},
{"hovertemplate", "Centroid (%{x:.2f}, %{y:.2f})<extra></extra>"}}};
{"title",
{{"text",
"K-means Clustering Animation<br>" +
std::string(
"<sub>Watch algorithm converge to optimal clusters</sub>")},
{"font", {{"size", 16}}}}},
{"xaxis",
{{"title", "X Coordinate"}, {"range", {-5, 5}}, {"showgrid", true}}},
{"yaxis",
{{"title", "Y Coordinate"},
{"range", {-5, 5}},
{"showgrid", true},
{"scaleanchor", "x"}}},
{"width", 800},
{"height", 700},
{"showlegend", true}};
std::vector<plotly::Object> data = {pointTrace, centroidTrace};
std::cout << "Starting K-means algorithm with " << k << " clusters..."
<< '\n';
for (int iteration = 0; iteration < maxIterations; iteration++) {
std::cout << "Iteration " << (iteration + 1) << "/" << maxIterations
<< '\n';
bool converged = true;
for (auto &point : points) {
double minDist = std::numeric_limits<double>::max();
int bestCluster = 0;
for (int j = 0; j < k; j++) {
double dist =
distance(point, centroids[j]);
if (dist < minDist) {
minDist = dist;
bestCluster = j;
}
}
if (point.cluster != bestCluster) {
converged = false;
point.cluster = bestCluster;
point.color = clusterColors[bestCluster];
}
}
pointColors.clear();
for (const auto &p : points) {
pointColors.push_back(p.color);
}
fig.
restyle({{
"marker.color", {pointColors}}}, {0});
std::this_thread::sleep_for(std::chrono::milliseconds(800));
std::vector<double> newCentroidX(k, 0.0), newCentroidY(k, 0.0);
std::vector<int> clusterCounts(k, 0);
for (const auto &point : points) {
if (point.cluster >= 0) {
newCentroidX[point.cluster] += point.x;
newCentroidY[point.cluster] += point.y;
clusterCounts[point.cluster]++;
}
}
for (int j = 0; j < k; j++) {
if (clusterCounts[j] > 0) {
double newX = newCentroidX[j] / clusterCounts[j];
double newY = newCentroidY[j] / clusterCounts[j];
if (std::abs(centroids[j].x - newX) > 0.01 ||
std::abs(centroids[j].y - newY) > 0.01) {
converged = false;
}
centroids[j].x = newX;
centroids[j].y = newY;
}
}
centroidX.clear();
centroidY.clear();
for (const auto &c : centroids) {
centroidX.push_back(c.x);
centroidY.push_back(c.y);
}
fig.
restyle({{
"x", {centroidX}}, {
"y", {centroidY}}}, {1});
std::this_thread::sleep_for(std::chrono::milliseconds(800));
if (converged) {
std::cout << "Algorithm converged after " << (iteration + 1)
<< " iterations!" << '\n';
break;
}
}
{{"title",
{{"text",
"K-means Clustering - CONVERGED!<br>" +
std::string(
"<sub>Algorithm found optimal cluster assignments</sub>")},
{"font", {{"size", 16}, {"color", "green"}}}}}});
std::cout << "Clustering animation completed. Close browser to exit." << '\n';
return 0;
}
auto main() -> int
Definition gallery_animate_sin_wave.cpp:48
auto distance(const Point &p, const Centroid &c) -> double
Definition gallery_clustering_animation.cpp:72
nlohmann::json Object
Definition plotly.hpp:26
Public Plotly C++ API header.
Definition gallery_clustering_animation.cpp:66
std::string color
Definition gallery_clustering_animation.cpp:68
double x
Definition gallery_clustering_animation.cpp:67
double y
Definition gallery_clustering_animation.cpp:67
Definition gallery_clustering_animation.cpp:60
std::string color
Definition gallery_clustering_animation.cpp:63
int cluster
Definition gallery_clustering_animation.cpp:62
double x
Definition gallery_clustering_animation.cpp:61
double y
Definition gallery_clustering_animation.cpp:61