help(kmeans)
set.seed(1)
x=matrix(rnorm(50*2), ncol=2)
plot(x)
x[1:25,1]=x[1:25,1]+3
x[1:25,2]=x[1:25,2]-4
plot(x)
km.out=kmeans(x,2,nstart=20)
km.out
K-means clustering with 2 clusters of sizes 25, 25
Cluster means:
[,1] [,2]
1 0.03223135 0.06924384
2 3.16866521 -3.83459093
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Within cluster sum of squares by cluster:
[1] 28.53417 50.97988
(between_SS / total_SS = 79.8 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss"
[7] "size" "iter" "ifault"
km.out$cluster
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
plot(x, col=(km.out$cluster +1), main="K-Means Clustering Results with K=2", xlab="", ylab="", pch=20, cex=2)
points(km.out$centers,pch=8,cex=2)
set.seed (2)
km.out=kmeans(x,3,nstart=20)
km.out
K-means clustering with 3 clusters of sizes 25, 20, 5
Cluster means:
[,1] [,2]
1 0.03223135 0.06924384
2 3.08290361 -4.26589906
3 3.51171162 -2.10935842
Clustering vector:
[1] 2 2 2 2 3 3 2 2 2 2 3 2 2 2 2 2 2 3 2 3 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Within cluster sum of squares by cluster:
[1] 28.534174 27.901401 3.740301
(between_SS / total_SS = 84.7 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss"
[7] "size" "iter" "ifault"
set.seed (3)
km.out=kmeans(x,3,nstart=1)
km.out$tot.withinss
[1] 60.56297
km.out=kmeans(x,3,nstart=20)
km.out$tot.withinss
[1] 60.37249
iris2 = iris
iris2$Species = NULL
kmeans.result = kmeans(iris2,3,nstart=20)
kmeans.result
K-means clustering with 3 clusters of sizes 38, 50, 62
Cluster means:
Sepal.Length Sepal.Width Petal.Length Petal.Width
1 6.850000 3.073684 5.742105 2.071053
2 5.006000 3.428000 1.462000 0.246000
3 5.901613 2.748387 4.393548 1.433871
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[51] 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[101] 1 3 1 1 1 1 3 1 1 1 1 1 1 3 3 1 1 1 1 3 1 3 1 3 1 1 3 3 1 1 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 1 3
Within cluster sum of squares by cluster:
[1] 23.87947 15.15100 39.82097
(between_SS / total_SS = 88.4 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss"
[7] "size" "iter" "ifault"
table(iris$Species, kmeans.result$cluster)
1 2 3
setosa 0 50 0
versicolor 2 0 48
virginica 36 0 14
plot(iris2[c("Sepal.Length", "Sepal.Width")], col = kmeans.result$cluster)
points(kmeans.result$centers[,c("Sepal.Length", "Sepal.Width")], col = 1:3,pch = 8, cex=2)
library(cluster)
help(pam)
pam.result = pam(iris2,3)
pam.result
Medoids:
ID Sepal.Length Sepal.Width Petal.Length Petal.Width
[1,] 8 5.0 3.4 1.5 0.2
[2,] 79 6.0 2.9 4.5 1.5
[3,] 113 6.8 3.0 5.5 2.1
Clustering vector:
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[51] 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[101] 3 2 3 3 3 3 2 3 3 3 3 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3 3 2
Objective function:
build swap
0.6709391 0.6542077
Available components:
[1] "medoids" "id.med" "clustering" "objective" "isolation" "clusinfo" "silinfo"
[8] "diss" "call" "data"
table(pam.result$clustering,iris$Species)
setosa versicolor virginica
1 50 0 0
2 0 48 14
3 0 2 36
plot(pam.result)
library(RWeka)
help(SimpleKMeans)