# Clustering in R

help(kmeans)

set.seed(1)
x=matrix(rnorm(50*2), ncol=2)
plot(x)

x[1:25,1]=x[1:25,1]+3
x[1:25,2]=x[1:25,2]-4
plot(x)

km.out=kmeans(x,2,nstart=20)
km.out

K-means clustering with 2 clusters of sizes 25, 25

Cluster means:
[,1]        [,2]
1 0.03223135  0.06924384
2 3.16866521 -3.83459093

Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

Within cluster sum of squares by cluster:
[1] 28.53417 50.97988
(between_SS / total_SS =  79.8 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"
[7] "size"         "iter"         "ifault"      

km.out$cluster  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 plot(x, col=(km.out$cluster +1), main="K-Means Clustering Results with K=2", xlab="", ylab="", pch=20, cex=2)
points(km.out$centers,pch=8,cex=2) set.seed (2) km.out=kmeans(x,3,nstart=20) km.out K-means clustering with 3 clusters of sizes 25, 20, 5 Cluster means: [,1] [,2] 1 0.03223135 0.06924384 2 3.08290361 -4.26589906 3 3.51171162 -2.10935842 Clustering vector: [1] 2 2 2 2 3 3 2 2 2 2 3 2 2 2 2 2 2 3 2 3 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 Within cluster sum of squares by cluster: [1] 28.534174 27.901401 3.740301 (between_SS / total_SS = 84.7 %) Available components: [1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" [7] "size" "iter" "ifault"  set.seed (3) km.out=kmeans(x,3,nstart=1) km.out$tot.withinss

[1] 60.56297

km.out=kmeans(x,3,nstart=20)
km.out$tot.withinss [1] 60.37249 iris2 = iris iris2$Species = NULL
kmeans.result = kmeans(iris2,3,nstart=20)
kmeans.result

K-means clustering with 3 clusters of sizes 38, 50, 62

Cluster means:
Sepal.Length Sepal.Width Petal.Length Petal.Width
1     6.850000    3.073684     5.742105    2.071053
2     5.006000    3.428000     1.462000    0.246000
3     5.901613    2.748387     4.393548    1.433871

Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[51] 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[101] 1 3 1 1 1 1 3 1 1 1 1 1 1 3 3 1 1 1 1 3 1 3 1 3 1 1 3 3 1 1 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 1 3

Within cluster sum of squares by cluster:
[1] 23.87947 15.15100 39.82097
(between_SS / total_SS =  88.4 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"
[7] "size"         "iter"         "ifault"      

table(iris$Species, kmeans.result$cluster)


1  2  3
setosa      0 50  0
versicolor  2  0 48
virginica  36  0 14

plot(iris2[c("Sepal.Length", "Sepal.Width")], col = kmeans.result$cluster) points(kmeans.result$centers[,c("Sepal.Length", "Sepal.Width")], col = 1:3,pch = 8, cex=2)

library(cluster)
help(pam)
pam.result = pam(iris2,3)
pam.result

Medoids:
ID Sepal.Length Sepal.Width Petal.Length Petal.Width
[1,]   8          5.0         3.4          1.5         0.2
[2,]  79          6.0         2.9          4.5         1.5
[3,] 113          6.8         3.0          5.5         2.1
Clustering vector:
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[51] 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[101] 3 2 3 3 3 3 2 3 3 3 3 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3 3 2
Objective function:
build      swap
0.6709391 0.6542077

Available components:
[1] "medoids"    "id.med"     "clustering" "objective"  "isolation"  "clusinfo"   "silinfo"
[8] "diss"       "call"       "data"      

 table(pam.result$clustering,iris$Species)


setosa versicolor virginica
1     50          0         0
2      0         48        14
3      0          2        36

 plot(pam.result)

 library(RWeka)
help(SimpleKMeans)

CategoriesR

This site uses Akismet to reduce spam. Learn how your comment data is processed.