此算法的主要作用:屏幕上很多的点,把相邻的点聚到离他最近的点。
k-means algorithm算法是一个聚类算法,把n个对象根据他们的属性分为k个分割,k < n。它与处理混合正态分布的最大期望算法很相似,因为他们都试图找到数据中自然聚类的中心。
php实现算法代码如下:
- class Cluster
- {
- public $points;
- public $avgPoint;
- function calculateAverage($maxX, $maxY)
- {
- if (count($this->points)==0)
- {
- $this->avgPoint->x = rand(0, $maxX);
- $this->avgPoint->y = rand(0,$maxY);
- //we didn't get any clues at all :( lets just randomize and hope for better...
- return;
- }
- foreach($this->points as $p)
- {
- $xsum += $p->x;
- $ysum += $p->y;
- }
-
- $count = count($this->points);
- $this->avgPoint->x = $xsum / $count;
- $this->avgPoint->y = $ysum / $count;
- }
- }
-
- class Point
- {
- public $x;
- public $y;
- function getDistance($p)
- {
- $x1 = $this->x - $p->x;
- $y1 = $this->y - $p->y;
- return sqrt($x1*$x1 + $y1*$y1);
- }
- }
-
- function distributeOverClusters($k, $arr)
- {
- foreach($arr as $p)
- { if ($p->x > $maxX)
- $maxX = $p->x;
- if ($p->y > $maxY)
- $maxY = $p->y;
- }
- $clusters = array();
- for($i = 0; $i < $k; $i++)
- {
- $clusters[] = new Cluster();
- $tmpP = new Point();
- $tmpP->x=rand(0,$maxX);
- $tmpP->y=rand(0,$maxY);
- $clusters[$i]->avgPoint = $tmpP;
- }
- #deploy points to closest center.
- #recalculate centers
- for ($a = 0; $a < 200; $a++) # run it 200 times
- {
- foreach($clusters as $cluster)
- $cluster->points = array(); //reinitialize
- foreach($arr as $pnt)
- {
- $bestcluster=$clusters[0];
- $bestdist = $clusters[0]->avgPoint->getDistance($pnt);
-
- foreach($clusters as $cluster)
- {
- if ($cluster->avgPoint->getDistance($pnt) < $bestdist)
- {
- $bestcluster = $cluster;
- $bestdist = $cluster->avgPoint->getDistance($pnt);
- }
- }
- $bestcluster->points[] = $pnt;//add the point to the best cluster.
- }
- //recalculate the centers.
- foreach($clusters as $cluster)
- $cluster->calculateAverage($maxX, $maxY);
-
- }
- return $clusters;
- }
-
- $p = new Point();
- $p->x = 2;
- $p->y = 2;
- $p2 = new Point();
- $p2->x = 3;
- $p2->y = 2;
- $p3 = new Point();
- $p3->x = 8;
- $p3->y = 2;
- $arr[] = $p;
- $arr[] = $p2;
- $arr[] = $p3;
- var_dump(distributeOverClusters(2, $arr));
参考文献:
阅读(627) | 评论(0) | 转发(0) |