Travis Heeter Travis Heeter - 17 days ago 8
R Question

How to get the KMeans Between/Within accuracy percentage in R?

Data (already scaled):

structure(c(0.160485413118994, -0.325277232672307, 1.25345136614942, -0.0823959097766563, -1.05392120135926, 1.6177733504929, 0.403366736014645, -0.446717894120132, -0.932480539911433, -0.0823959097766563, 0.508649308224364, -0.625897284357438, 0.83280547753345, -0.625897284357438, -0.139663030393809, 1.96735207011525, -0.301741115048352, -0.301741115048352, -1.76044387693924, -0.139663030393809, 0.0577843342673918, 0.121189712724219, 1.51610803877442, -0.0056210441894354, -0.766485585671361, 1.51610803877442, -0.57626945030088, -1.02010709949867, -1.27372861332598, 1.0088650111198, 0.270645677706219, -0.834749808144522, 1.19678784152711, -0.296989842054972, 1.07728562684054, 0.0913923556763693, 0.121267909348011, -0.356740949398255, -0.864625361816163, 0.83828119746741, 1.30651856124931, -0.595043606688955, 0.545893694074002, 0.16558126048635, 5.23641370832172, -1.48243928506014, 0.419122882878118, 0.419122882878118, -0.0879603619054184, 0.0388104492904658, 0.311357673194754, 0.0940238518063364, 0.311357673194754, 0.0940238518063364, 9.43937817150828, -0.557977612358916, -0.557977612358916, 0.0940238518063364, -0.340643790970498, 0.528691494583171, -0.205722605421789, -0.596759109816679, 1.14786529440668, -0.747157765353175, -0.596759109816679, 2.59169238755704, -0.476440185387482, -0.95771588310427, -1.40891184971376, 0.425951747831495, 0.136489240515638, 0.520535462720394, -1.14366483350021, 0.648550870121979, 0.520535462720394, -1.91175727790973, 0.136489240515638, 0.90458168492515, 1.67267412933466, -0.631603203893873, -0.224811427137598, -0.610593308189137, 0.932534216017016, -0.610593308189137, -0.610593308189137, 2.86144362127471, -0.610593308189137, -0.996375189240675, -1.38215707029221, 0.546752334965478, 0.757884134731298, -0.344374342514091, 0.688992979903461, -0.344374342514091, -0.275483187686254, 1.44679568300967, -0.344374342514091, -0.137700878030581, -1.44663281975948, -0.137700878030581, -0.40214560409626, -0.593259833699538, 1.09491586112942, -0.911783549705002, 1.66825854993926, 2.91050104236056, -0.40214560409626, -0.84807880650391, -1.51697861011538, 0.426016057517946, -0.34473058195366, -0.622048342996663, 1.10411323084244, -0.740898812015093, -0.650346073715337, 2.92648708912503, -0.446602412540886, -0.899366104039667, -1.22761978037628, 0.36837223215692, 0.285643408957403, -0.513213860391233, 1.39175347420936, -1.46569752769153, 0.408544527318731, 2.74366577618398, -0.0830599461265831, -0.32886218284924, -1.92657672154651, -0.697565537933226, -0.328861155501701, -0.061730538550728, 0.0718347699247587, -1.26381831483011, 7.28436142760104, 1.6746184716306, -0.863122389403648, -0.729557080928162, -1.13025300635462, -1.13025300635462, -0.0766656163662548, 0.533294679362756, 1.54989517224444, -0.0766656163662548, 0.533294679362756, -0.279985714942592, 1.3465750736681, -0.686625912095266, -0.889946010671603, -0.483305813518929, 0.380765617904849, 0.156832553777284, -0.403000106541628, -0.291033574477846, -0.179067042414063, -0.403000106541628, -1.29873236305189, -1.07479929892432, 0.156832553777284, 0.156832553777284, -0.313536663149827, 0.0109306359630201, -0.151303013593403, 1.63326713152726, -1.44917221004479, -1.28693856048837, 1.79550078108368, 0.659865234188715, 1.79550078108368, 0.984332533301562, 0.183848573129592, 0.452709234957825, 0.0494182422154748, 1.52815188227076, -1.69817605966804, -1.69817605966804, 1.12486088952841, 0.856000227700176, 1.66258221318488, 1.12486088952841), .Dim = c(10L, 18L), .Dimnames = list( NULL, c("COMPACTNESS", "CIRCULARITY", "DISTANCE.CIRCULARITY", "RADIUS.RATIO", "PR.AXIS.ASPECT.RATIO", "MAX.LENGTH.ASPECT.RATIO", "SCATTER.RATIO", "ELONGATEDNESS", "PR.AXIS.RECTANGULARITY", "MAX.LENGTH.RECTANGULARITY", "SCALED.VARIANCE_MAJOR", "SCALED.VARIANCE_MINOR", "SCALED.RADIUS.OF.GYRATION", "SKEWNESS.ABOUT_MAJOR", "SKEWNESS.ABOUT_MINOR", "KURTOSIS.ABOUT_MAJOR", "KURTOSIS.ABOUT_MINOR", "HOLLOWS.RATIO" )))


I ran a kmeans on this data like so:

kc<-kmeans(d,4)


And printed the results:

print(kc)


It gives all this stuff, then it gives the between_ss/total_ss somewhere in there:

Within cluster sum of squares by cluster:
[1] 1245.577 1787.394 1089.873 2126.642
(between_SS / total_SS = 58.9 %)


How do I get just the
58.9%
?


I tried
kcc$betweenss/kcc$tot.withinss
but got
1.24...
, nothing near 58.9%.

Answer

The total sum of squares is in kc$totss, not in kc$tot.withinss, so that you can do:

d <- structure(c(0.160485413118994, -0.325277232672307, 1.25345136614942, -0.0823959097766563, -1.05392120135926, 1.6177733504929, 0.403366736014645, -0.446717894120132, -0.932480539911433, -0.0823959097766563, 0.508649308224364, -0.625897284357438, 0.83280547753345, -0.625897284357438, -0.139663030393809, 1.96735207011525, -0.301741115048352, -0.301741115048352, -1.76044387693924, -0.139663030393809, 0.0577843342673918, 0.121189712724219, 1.51610803877442, -0.0056210441894354, -0.766485585671361, 1.51610803877442, -0.57626945030088, -1.02010709949867, -1.27372861332598, 1.0088650111198, 0.270645677706219, -0.834749808144522, 1.19678784152711, -0.296989842054972, 1.07728562684054, 0.0913923556763693, 0.121267909348011, -0.356740949398255, -0.864625361816163, 0.83828119746741, 1.30651856124931, -0.595043606688955, 0.545893694074002, 0.16558126048635, 5.23641370832172, -1.48243928506014, 0.419122882878118, 0.419122882878118, -0.0879603619054184, 0.0388104492904658, 0.311357673194754, 0.0940238518063364, 0.311357673194754, 0.0940238518063364, 9.43937817150828, -0.557977612358916, -0.557977612358916, 0.0940238518063364, -0.340643790970498, 0.528691494583171, -0.205722605421789, -0.596759109816679, 1.14786529440668, -0.747157765353175, -0.596759109816679, 2.59169238755704, -0.476440185387482, -0.95771588310427, -1.40891184971376, 0.425951747831495, 0.136489240515638, 0.520535462720394, -1.14366483350021, 0.648550870121979, 0.520535462720394, -1.91175727790973, 0.136489240515638, 0.90458168492515, 1.67267412933466, -0.631603203893873, -0.224811427137598, -0.610593308189137, 0.932534216017016, -0.610593308189137, -0.610593308189137, 2.86144362127471, -0.610593308189137, -0.996375189240675, -1.38215707029221, 0.546752334965478, 0.757884134731298, -0.344374342514091, 0.688992979903461, -0.344374342514091, -0.275483187686254, 1.44679568300967, -0.344374342514091, -0.137700878030581, -1.44663281975948, -0.137700878030581, -0.40214560409626, -0.593259833699538, 1.09491586112942, -0.911783549705002, 1.66825854993926, 2.91050104236056, -0.40214560409626, -0.84807880650391, -1.51697861011538, 0.426016057517946, -0.34473058195366, -0.622048342996663, 1.10411323084244, -0.740898812015093, -0.650346073715337, 2.92648708912503, -0.446602412540886, -0.899366104039667, -1.22761978037628, 0.36837223215692, 0.285643408957403, -0.513213860391233, 1.39175347420936, -1.46569752769153, 0.408544527318731, 2.74366577618398, -0.0830599461265831, -0.32886218284924, -1.92657672154651, -0.697565537933226, -0.328861155501701, -0.061730538550728, 0.0718347699247587, -1.26381831483011, 7.28436142760104, 1.6746184716306, -0.863122389403648, -0.729557080928162, -1.13025300635462, -1.13025300635462, -0.0766656163662548, 0.533294679362756, 1.54989517224444, -0.0766656163662548, 0.533294679362756, -0.279985714942592, 1.3465750736681, -0.686625912095266, -0.889946010671603, -0.483305813518929, 0.380765617904849, 0.156832553777284, -0.403000106541628, -0.291033574477846, -0.179067042414063, -0.403000106541628, -1.29873236305189, -1.07479929892432, 0.156832553777284, 0.156832553777284, -0.313536663149827, 0.0109306359630201, -0.151303013593403, 1.63326713152726, -1.44917221004479, -1.28693856048837, 1.79550078108368, 0.659865234188715, 1.79550078108368, 0.984332533301562, 0.183848573129592, 0.452709234957825, 0.0494182422154748, 1.52815188227076, -1.69817605966804, -1.69817605966804, 1.12486088952841, 0.856000227700176, 1.66258221318488, 1.12486088952841), .Dim = c(10L, 18L), .Dimnames = list(    NULL, c("COMPACTNESS", "CIRCULARITY", "DISTANCE.CIRCULARITY",     "RADIUS.RATIO", "PR.AXIS.ASPECT.RATIO", "MAX.LENGTH.ASPECT.RATIO",     "SCATTER.RATIO", "ELONGATEDNESS", "PR.AXIS.RECTANGULARITY",     "MAX.LENGTH.RECTANGULARITY", "SCALED.VARIANCE_MAJOR", "SCALED.VARIANCE_MINOR",     "SCALED.RADIUS.OF.GYRATION", "SKEWNESS.ABOUT_MAJOR", "SKEWNESS.ABOUT_MINOR",     "KURTOSIS.ABOUT_MAJOR", "KURTOSIS.ABOUT_MINOR", "HOLLOWS.RATIO"    )))
kc<-kmeans(d,4)
print(kc)
# ...
# Clustering vector:
#  [1] 4 2 4 2 1 3 2 2 2 4
# 
# Within cluster sum of squares by cluster:
# [1]  0.00000 22.36071  0.00000 16.25689
#  (between_SS / total_SS =  88.1 %)
# ...
kc$betweenss/kc$totss*100
# [1] 88.09212