Skip to content

Commit d8b7fbf

Browse files
committed
v3 algorithm implementation (#27)
1 parent 43faa7f commit d8b7fbf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1758
-1531
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,5 @@ build
22
composer.lock
33
docs
44
vendor
5-
coverage
65
.phpunit.result.cache
76
.phpunit.cache

.scrutinizer.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,3 @@ checks:
1616
fix_line_ending: true
1717
fix_identation_4spaces: true
1818
fix_doc_comments: true
19-

.styleci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
preset: psr2
1+
preset: psr12

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ language: php
33
php:
44
- 7.3
55
- 7.4
6+
- 8.0
67

78
env:
89
matrix:

README.md

Lines changed: 15 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77

88
[K-mean](http://en.wikipedia.org/wiki/K-means_clustering) clustering algorithm implementation in PHP.
99

10-
Please also see the [FAQ](#faq)
11-
1210
## Installation
1311

1412
You can install the package via composer:
@@ -22,8 +20,7 @@ composer require bdelespierre/php-kmeans
2220
```PHP
2321
require "vendor/autoload.php";
2422

25-
// prepare 50 points of 2D space to be clustered
26-
$points = [
23+
$data = [
2724
[80,55],[86,59],[19,85],[41,47],[57,58],
2825
[76,22],[94,60],[13,93],[90,48],[52,54],
2926
[62,46],[88,44],[85,24],[63,14],[51,40],
@@ -37,30 +34,35 @@ $points = [
3734
];
3835

3936
// create a 2-dimentions space
40-
$space = new KMeans\Space(2);
37+
$space = new Kmeans\Space(2);
38+
39+
// prepare the points
40+
$points = new Kmeans\PointCollection($space);
4141

42-
// add points to space
43-
foreach ($points as $i => $coordinates) {
44-
$space->addPoint($coordinates);
42+
foreach ($data as $coordinates) {
43+
$points->attach(new Kmeans\Point($space, $coordinates));
4544
}
4645

46+
// prepare the algorithm
47+
$algorithm = new Kmeans\Algorithm(new Kmeans\RandomInitialization());
48+
4749
// cluster these 50 points in 3 clusters
48-
$clusters = $space->solve(3);
50+
$clusters = $algorithm->clusterize($points, 3);
4951

5052
// display the cluster centers and attached points
5153
foreach ($clusters as $num => $cluster) {
52-
$coordinates = $cluster->getCoordinates();
54+
$coordinates = $cluster->getCentroid()->getCoordinates();
5355
printf(
54-
"Cluster %s [%d,%d]: %d points\n",
56+
"Cluster #%s [%d,%d] has %d points\n",
5557
$num,
5658
$coordinates[0],
5759
$coordinates[1],
58-
count($cluster)
60+
count($cluster->getPoints())
5961
);
6062
}
6163
```
6264

63-
**Note:** the example is given with points of a 2D space but it will work with any dimention >1.
65+
**Note:** the example is given with points of a 2D space but it will work with any dimention greater than or equal to 1.
6466

6567
### Testing
6668

@@ -89,51 +91,3 @@ If you discover any security related issues, please email benjamin.delespierre@g
8991
## License
9092

9193
Lesser General Public License (LGPL). Please see [License File](LICENSE.md) for more information.
92-
93-
## FAQ
94-
95-
### How to get coordinates of a point/cluster:
96-
```PHP
97-
$x = $point[0];
98-
$y = $point[1];
99-
100-
// or
101-
102-
list($x,$y) = $point->getCoordinates();
103-
```
104-
105-
### List all points of a space/cluster:
106-
107-
```PHP
108-
foreach ($cluster as $point) {
109-
printf('[%d,%d]', $point[0], $point[1]);
110-
}
111-
```
112-
113-
### Attach data to a point:
114-
115-
```PHP
116-
$point = $space->addPoint([$x, $y, $z], "user #123");
117-
```
118-
119-
### Retrieve point data:
120-
121-
```PHP
122-
$data = $space[$point]; // e.g. "user #123"
123-
```
124-
125-
### Watch the algorithm run
126-
127-
Each iteration step can be monitored using a callback function passed to `Kmeans\Space::solve`:
128-
129-
```PHP
130-
$clusters = $space->solve(3, function($space, $clusters) {
131-
static $iterations = 0;
132-
133-
printf("Iteration: %d\n", ++$iterations);
134-
135-
foreach ($clusters as $i => $cluster) {
136-
printf("Cluster %d [%d,%d]: %d points\n", $i, $cluster[0], $cluster[1], count($cluster));
137-
}
138-
});
139-
```

composer.json

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@
2121
"require-dev": {
2222
"phpunit/phpunit": "^9.3",
2323
"squizlabs/php_codesniffer": "^3.6",
24-
"phpstan/phpstan": "^0.12.97"
24+
"phpstan/phpstan": "^0.12.97",
25+
"mockery/mockery": "^1.4"
2526
},
2627
"autoload": {
2728
"psr-4": {
28-
"Bdelespierre\\Kmeans\\": "src/"
29-
}
29+
"Kmeans\\": "src/"
30+
},
31+
"files": [
32+
"src/math.php"
33+
]
3034
},
3135
"autoload-dev": {
3236
"psr-4": {

demo.php

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22

33
require "vendor/autoload.php";
44

5-
// prepare 50 points of 2D space to be clustered
6-
$points = [
5+
$data = [
76
[80,55],[86,59],[19,85],[41,47],[57,58],
87
[76,22],[94,60],[13,93],[90,48],[52,54],
98
[62,46],[88,44],[85,24],[63,14],[51,40],
@@ -17,24 +16,29 @@
1716
];
1817

1918
// create a 2-dimentions space
20-
$space = new KMeans\Space(2);
19+
$space = new Kmeans\Space(2);
2120

22-
// add points to space
23-
foreach ($points as $i => $coordinates) {
24-
$space->addPoint($coordinates);
21+
// prepare the points
22+
$points = new Kmeans\PointCollection($space);
23+
24+
foreach ($data as $coordinates) {
25+
$points->attach(new Kmeans\Point($space, $coordinates));
2526
}
2627

28+
// prepare the algorithm
29+
$algorithm = new Kmeans\Algorithm(new Kmeans\RandomInitialization());
30+
2731
// cluster these 50 points in 3 clusters
28-
$clusters = $space->solve(3);
32+
$clusters = $algorithm->clusterize($points, 3);
2933

3034
// display the cluster centers and attached points
3135
foreach ($clusters as $num => $cluster) {
32-
$coordinates = $cluster->getCoordinates();
36+
$coordinates = $cluster->getCentroid()->getCoordinates();
3337
printf(
34-
"Cluster %s [%d,%d]: %d points\n",
38+
"Cluster #%s [%d,%d] has %d points\n",
3539
$num,
3640
$coordinates[0],
3741
$coordinates[1],
38-
count($cluster)
42+
count($cluster->getPoints())
3943
);
4044
}

demo_deterministic.php

Lines changed: 0 additions & 43 deletions
This file was deleted.

makefile

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
qa: phplint phpcs phpstan
77

8-
QA_PATHS = src/
8+
QA_PATHS = src/ tests/
99
QA_STANDARD = psr12
1010

1111
phplint:
@@ -28,8 +28,4 @@ todolist:
2828
# -----------------------------------------------------------------------------
2929

3030
test:
31-
vendor/bin/phpunit
32-
33-
.PHONY: coverage
34-
coverage:
35-
vendor/bin/phpunit --coverage-html coverage
31+
vendor/bin/phpunit --colors

phpstan.neon

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
parameters:
2+
paths:
3+
- src
4+
# The level 8 is the highest level
5+
level: 8

0 commit comments

Comments
 (0)