cluster/0000755000176000001440000000000012124341736011766 5ustar ripleyuserscluster/MD50000644000176000001440000001501612124341736012301 0ustar ripleyusersc7a50a95c6cd9c6ebf814ce6ac7b4523 *ChangeLog 24a9cf41592c395d9a384ba49a5775d8 *DESCRIPTION 824c6b9820b7fc963b52b185b8284481 *INDEX df97ca34d4dc7b97ae3e200ec4978769 *NAMESPACE 8de82c7d42bd4a27c27c25462ae2a307 *PORTING 15be042001f89dcd2a6c478787e41c89 *R/0aaa.R abb6dcf5f87ac205d3c4e9d0e62e5d8a *R/agnes.q 14a3b4a844384c861a96bfa96c1468f8 *R/clara.q 15df5f32ef8e56daf9ad3bbde0817c10 *R/clusGap.R e93355a6f7ad8d8486e6db4db52200a9 *R/coef.R 9deec9c4bc14219130c26e2205e809d3 *R/daisy.q 9600484036f3229ac54f7c5596d5cf73 *R/diana.q 315c3dd944b4f414095b78ecc7164e16 *R/ellipsoidhull.R 90a3a2f89e1dbb581f4c19bc19cb92a5 *R/fanny.q a96e11e7323c9f7e06a597f5753a0b35 *R/internal.R 0486e196d9a7b9a8fd9cc467d7d998c6 *R/mona.q 6b0b3574778930624b6512e0d3f3c0d6 *R/pam.q 27e799846893d5ad40b15de6aab39a56 *R/plothier.q ede702ba62f772a3a9c3fee5bc5af00b *R/plotpart.q 39605fa5e570bf8aca365053e802eedb *R/silhouette.R f1d53c5f24897b5ab0e6014be9be7844 *R/zzz.R 1f5615783dc47c80ed105754db24330a *README ac189f8e6e1314c01a700f6a31ee4506 *data/agriculture.tab 1046f7a750dbbac2724fd910aff561e7 *data/animals.tab c8f60e65b0356698f6b08c23c07e2a94 *data/chorSub.rda 0f34ac1e10d178fa04a4babb705d5b27 *data/flower.R 0772c2f9932873c7dd6551208cd0725b *data/plantTraits.rda 1022570c32295066506ad7b26439f7bf *data/pluton.tab eda904e011e79ea5a25602125d558d04 *data/ruspini.tab e816e6c67444107ab47ff3bf2f6023f2 *data/votes.repub.tab be46f45035b177b940393db459789282 *data/xclara.rda 9737dd4fd749f0140a0cc25ca1561c23 *inst/CITATION 3742e68750c8fd362633e7546acfb037 *inst/po/de/LC_MESSAGES/R-cluster.mo e7180b24d1ef841e3cf811821522b34f *inst/po/en@quot/LC_MESSAGES/R-cluster.mo a851d9595f49b9f1b99435a7478552a7 *inst/po/pl/LC_MESSAGES/R-cluster.mo 48ebe7fae62f5fc4f392777fa58e8d7b *man/agnes.Rd 6ac0a9386a8f3ce08ac7afe942e0d195 *man/agnes.object.Rd 7db03338761019b70d064ffe1eddcc5d *man/agriculture.Rd f72a36d67c7c7b58399dbcaf473acdd9 *man/animals.Rd 80586a34dc1e14f840ebae455aeb9736 *man/bannerplot.Rd 0b1033484c0b66ff9451427930e92987 *man/chorSub.Rd f4e46ab4ab26d4683f8a171e77789f55 *man/clara.Rd b86f299b6994b98e2112843516f3108a *man/clara.object.Rd 08faf93b0cf40bf2a6f5d84d0d260525 *man/clusGap.Rd 044ea0b7c74e9f42456f1fa374f83894 *man/clusplot.default.Rd ea3ea6469c8f57eafa1229f82b78c30c *man/clusplot.partition.Rd c7341c96f49e5b288448c4cb9436c2fa *man/cluster-internal.Rd 20b35f88ced8e679778892a732a43369 *man/coef.hclust.Rd 61c630d722662ffd3c25f134a42e8dd9 *man/daisy.Rd ee5e0c36deba584f30ab869222a229df *man/diana.Rd aa9c2fe350e02eb23f211a44e40c8a90 *man/dissimilarity.object.Rd a10676b5759ff18923c6a31ec1b04e1a *man/ellipsoidhull.Rd 05b0d63a22cf98893b37f9e7c99b64a4 *man/fanny.Rd 7d549aed091402cecc8a398085e4bb86 *man/fanny.object.Rd 94bfe5845b4efa6bffec6c455081a237 *man/flower.Rd 9fa8b2603353a1db6cc145e0636fcab7 *man/lower.to.upper.tri.inds.Rd f0c4aadcecdf255e2bd89388d7114578 *man/mona.Rd 546379a2e048bf7ef7a69aff87f4ca50 *man/mona.object.Rd 327a9d84519a4e40965740abbf0e4cf0 *man/pam.Rd f8b72253624c24b78b4980688aeb0bf5 *man/pam.object.Rd cafab0f87fd2443f14e672d325243990 *man/partition.object.Rd 40fe00964d4215ce1b2390e8f18e9dc0 *man/plantTraits.Rd 60b5c55053eca83e5d86a3add0bdf460 *man/plot.agnes.Rd e7b16368335969b777ccebde36b578a8 *man/plot.diana.Rd 936341a14d58811b41e45b09fd8b37bb *man/plot.mona.Rd ee6a690d0f2c7a25f3b8f77881778137 *man/plot.partition.Rd a902dfc84d340596d551a6edb69b7856 *man/pltree.Rd 433e5eb26354a93ff347fb69086f9c60 *man/pltree.twins.Rd c71d01936f0121aecc2863982456c82f *man/pluton.Rd 8b34c88f90c91ce710b2ccc65e91a486 *man/predict.ellipsoid.Rd d74abf0fc1c013f696f0b8ddd724b094 *man/print.agnes.Rd b6384eb685030609ae9edd06434949b0 *man/print.clara.Rd e0c63f114cc0bf71fe53964b5f883255 *man/print.diana.Rd b32046c766441c7fc75f644734c690b1 *man/print.dissimilarity.Rd 1ce3568140412485f727b2d9193ba09c *man/print.fanny.Rd 0dcf3dd3c5afbb216939ca9158c32192 *man/print.mona.Rd b1c1625935a5c22d81aa2e73d3b80457 *man/print.pam.Rd 7cd999938af26fb5c97e4d45ab9ea982 *man/ruspini.Rd 474c002257c8b72a2db58856c81fcba5 *man/silhouette.Rd 8beea8b2090f7e91d0a0b69ec2013718 *man/sizeDiss.Rd 806f75ba78b8e959f53fefad76a0dca6 *man/summary.agnes.Rd 1024351710e393761532b043ff37df7f *man/summary.clara.Rd 1536a3f848f81661af3d45c06697e072 *man/summary.diana.Rd 423bde4e918538a7f6e0b1d4aa0e7d6f *man/summary.mona.Rd 5cc8d9a8fa53b437121d841475d46b46 *man/summary.pam.Rd 1f622b89b4b8b0e93e3f0abd65122ee4 *man/twins.object.Rd 69c2598048e0488ca72d0809d1d3214a *man/volume.ellipsoid.Rd 0510d0816550f6d3c258652912053a1d *man/votes.repub.Rd 438589447329748ecc79553dc159e458 *man/xclara.Rd b89c5d900c3319e8d7275e606788d93f *po/R-cluster.pot c0ca73459893e2c1ce3dd62022f96a38 *po/R-de.po e9b5293e63746638be1f3570dbeb4fe3 *po/R-en@quot.po eb6c6c1f7adb22cd3009cd5013c3ce72 *po/R-pl.po 2510e0b50d86d1fea5be2409781564b8 *po/update-me.sh f4a527cae1a1abd5ea739875d6ffccae *src/clara.c 245ac1e056686a94b1838758944d3503 *src/cluster.h 4ae4b8fc0d4ba8a1a44bd9fc43e08626 *src/daisy.f f479676d1d6850e21e7e10b3ac529c19 *src/dysta.f af6f8cc6efc167ca55a2b2f3647be9aa *src/fanny.c 23cea00d2feab57a92e8c2393c7f4b8a *src/ind_2.h 4138c531330925b61566dc7d07a3d23b *src/init.c 77001343e1648a21ebe395a6d8561648 *src/mona.f 58e4fef8d3b0e297542cb6dbbde81f21 *src/pam.c 6f11dc74b1636e3ac848563d570f0030 *src/sildist.c f42f05132aaf001ddd333e3e109692e0 *src/spannel.c ccd77c81287eaa4c777eab312a357faa *src/twins.c 6f9c4aa64eb33381559cc9a337b4253a *tests/agnes-ex.R 71bb23c915eb909afe8b39650f4198cb *tests/agnes-ex.Rout.save 8b4f445da3ac72cff39d4a36b6250316 *tests/clara-NAs.R e1f71c79ff859a49fd830f333563172c *tests/clara-NAs.Rout.save b89d8900c62c02ab2e8bf0a24c01e7ea *tests/clara-ex.R 2f15ef0bcce52548c1d134a59395dbaf *tests/clara.R a1fe464ee2ff4467152313d1a67b6afb *tests/clara.Rout.save 79c68eadc1e1f72d909b7c015a278cc7 *tests/clusplot-out.R b583d20874ec3960ec1cc06d8ae1d447 *tests/clusplot-out.Rout.save 35fd3185d685723e528a435514b38604 *tests/daisy-ex.R 61459193985591197aadfff191f49940 *tests/daisy-ex.Rout.save 15d472538e911a5a8da7970ae6d889c3 *tests/diana-boots.R 4fc11382af41801e16128f96e17a70e7 *tests/diana-ex.R c10f01e8f1c0a008a92d674670db931a *tests/diana-ex.Rout.save d59a754abe85489fc3b09d6093f72777 *tests/ellipsoid-ex.R 26727e98f08ac84c139af4422b75f0f2 *tests/ellipsoid-ex.Rout.save 52b341bc06eb5692a73dec2be2cd7e5a *tests/fanny-ex.R 7ea9c35599c857c5393421da3570ea0a *tests/mona.R 794166e31834ecfb147e62843ad7931a *tests/mona.Rout.save a393cf1d3d4fa219774a48050f6f0fe5 *tests/pam.R dfa9ed8323a746f229a3fb852c0bf334 *tests/pam.Rout.save 477cd7fd12117a6cbcdfc9d5944fbd39 *tests/silhouette-default.R 88f4c305ccf9133ec34c2fda0935942e *tests/silhouette-default.Rout.save d9cdce1776e344a6f4f2574cee6ef487 *tests/sweep-ex.R cluster/tests/0000755000176000001440000000000012124335266013131 5ustar ripleyuserscluster/tests/sweep-ex.R0000644000176000001440000000411011646600064015004 0ustar ripleyusers#### NOT part of the cluster package! #### Find out what exactly sweep() in ../src/spannel.f is doing #### in order to eventually replace it with BLAS calls ! ### subroutine sweep (cov,nord,ixlo,nel,deter) ### =============================== ### is called only once as ### call sweep(cov,ndep,0,i,deter) ### where i in 0:ndep sweep1 <- function(cov, i, det = 1) { ## Purpose: ## ------------------------------------------------------------------------- ## Arguments: ## ------------------------------------------------------------------------- ## Author: Martin Maechler, Date: 22 Jan 2002, 08:58 if(!is.matrix(cov) || 0 != diff(D <- dim(cov))) stop("'cov' must be a square matrix") if((nord <- as.integer(D[1] - 1)) < 1)## cov[0:nord, 0:nord] stop("'cov' must be at least 2 x 2") if(0 > (i <- as.integer(i)) || i > nord) stop("'i' must be in 0:nord, where nord = nrow(cov)-1") storage.mode(cov) <- "double" .C(cluster:::cl_sweep, cov, nord, ixlo = 0:0, i = i, deter=det) } sweepAll <- function(cov, det = 1) { ## Purpose: ## ------------------------------------------------------------------------- ## Arguments: ## ------------------------------------------------------------------------- ## Author: Martin Maechler, Date: 22 Jan 2002, 08:58 if(!is.matrix(cov) || 0 != diff(D <- dim(cov))) stop("'cov' must be a square matrix") if((nord <- as.integer(D[1] - 1)) < 1)## cov[0:nord, 0:nord] stop("'cov' must be at least 2 x 2") storage.mode(cov) <- "double" for(i in 0:nord) { .C(cluster:::cl_sweep, cov, nord, ixlo = 0:0, i = i, deter = det, DUP = FALSE) # i.e. work on 'cov' and 'det' directly if(det <= 0) cat("** i = ", i, "; deter = ", format(det)," <= 0\n",sep="") } list(cov = cov, deter = det) } require(cluster) ## Examples with errors m1 <- cov(cbind(1, 1:5)) try(sweepAll(m1))# deter = 0; cov[2,2] = Inf ## ok (m2 <- cov(cbind(1:5, c(2:5,1), c(4:2,2,6)))) qr(m2, tol = .001)$rank sweepAll(m2) ## deter = 0 cluster/tests/silhouette-default.Rout.save0000644000176000001440000004453111626750550020562 0ustar ripleyusers R Under development (unstable) (2011-08-28 r56813) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## This came from a bug report on R-help by ge yreyt > ## Date: Mon, 9 Jun 2003 16:06:53 -0400 (EDT) > library(cluster) > if(FALSE) # manual testing + library(cluster, lib="~/R/Pkgs/cluster.Rcheck") > > data(iris) > > .proctime00 <- proc.time() > > mdist <- as.dist(1 - cor(t(iris[,1:4])))#dissimlarity > ## this is always the same: > hc <- diana(mdist, diss = TRUE, stand = FALSE) > > maxk <- 15 # at most 15 clusters > silh.wid <- numeric(maxk) # myind[k] := the silh.value for k clusters > silh.wid[1] <- NA # 1-cluster: silhouette not defined > > op <- par(mfrow = c(4,4), mar = .1+ c(2,1,2,1), mgp=c(1.5, .6,0)) > for(k in 2:maxk) { + cat("\n", k,":\n==\n") + k.gr <- cutree(as.hclust(hc), k = k) + cat("grouping table: "); print(table(k.gr)) + si <- silhouette(k.gr, mdist) + cat("silhouette:\n"); print(summary(si)) + plot(si, main = paste("k =",k), + col = 2:(k+1), do.n.k=FALSE, do.clus.stat=FALSE) + silh.wid[k] <- summary(si)$avg.width + ## === + } 2 : == grouping table: k.gr 1 2 50 100 silhouette: Silhouette of 150 units in 2 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 50 100 0.9829965 0.9362626 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.5884 0.9437 0.9611 0.9518 0.9815 0.9918 3 : == grouping table: k.gr 1 2 3 50 50 50 silhouette: Silhouette of 150 units in 3 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 50 50 50 0.9773277 0.6926798 0.7467236 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03353 0.76940 0.86120 0.80560 0.97560 0.98920 4 : == grouping table: k.gr 1 2 3 4 35 15 50 50 silhouette: Silhouette of 150 units in 4 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 50 50 0.5653722 0.5226372 0.6926798 0.7467236 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03353 0.56620 0.75100 0.66400 0.84240 0.89390 5 : == grouping table: k.gr 1 2 3 4 5 35 15 29 21 50 silhouette: Silhouette of 150 units in 5 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 29 21 50 0.5653722 0.5226372 0.5776362 0.4625437 0.5296735 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5404 0.3937 0.6252 0.5372 0.7392 0.8136 6 : == grouping table: k.gr 1 2 3 4 5 6 35 15 29 21 29 21 silhouette: Silhouette of 150 units in 6 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 29 21 29 21 0.5653722 0.5226372 0.5776362 0.3732981 0.3383135 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.1094 0.3351 0.5257 0.4968 0.6938 0.8136 7 : == grouping table: k.gr 1 2 3 4 5 6 7 35 14 1 29 21 29 21 silhouette: Silhouette of 150 units in 7 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 29 21 29 21 0.4165289 0.6671435 0.0000000 0.5776362 0.3732981 0.3383135 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3264 0.3001 0.5234 0.4720 0.6970 0.8301 8 : == grouping table: k.gr 1 2 3 4 5 6 7 8 35 14 1 29 10 11 29 21 silhouette: Silhouette of 150 units in 8 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 29 10 11 29 21 0.4165289 0.6671435 0.0000000 0.4209012 0.6943265 0.7262601 0.2053018 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6258 0.2576 0.5842 0.4633 0.7132 0.8887 9 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 35 14 1 26 10 11 3 29 21 silhouette: Silhouette of 150 units in 9 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 29 0.4165289 0.6671435 0.0000000 0.5318152 0.6673269 0.6944652 0.7957279 0.2053018 21 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6258 0.3150 0.5896 0.4859 0.7263 0.8870 10 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 35 14 1 26 10 11 3 16 13 21 silhouette: Silhouette of 150 units in 10 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 16 0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4640123 13 21 0.6615431 0.4228530 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5870 0.3535 0.6068 0.5208 0.7349 0.8803 11 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 35 14 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 11 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 16 0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4064279 13 11 10 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3264 0.3730 0.5984 0.5244 0.7302 0.8505 12 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 35 11 3 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 12 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 11 3 1 26 10 11 3 0.2883758 0.7044155 0.4092330 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 16 13 11 10 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6007 0.3395 0.5817 0.4921 0.7216 0.8700 13 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 28 11 3 7 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 13 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 28 11 3 7 1 26 10 11 0.3783869 0.6827810 0.4092330 0.4285753 0.0000000 0.5318152 0.6319149 0.6145837 3 16 13 11 10 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.4013 0.3314 0.5704 0.5138 0.7274 0.8531 14 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 19 11 3 9 7 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 14 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 19 11 3 9 7 1 26 10 0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.5318152 0.6319149 11 3 16 13 11 10 0.6145837 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3795 0.5875 0.5217 0.7263 0.8505 15 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 19 11 3 9 7 1 18 10 11 8 3 16 13 11 10 silhouette: Silhouette of 150 units in 15 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 19 11 3 9 7 1 18 10 0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.6616381 0.5871805 11 8 3 16 13 11 10 0.5171407 0.6705138 0.7444822 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3859 0.6211 0.5335 0.7478 0.8551 > par(op) > > summary(si.p <- silhouette(50 - k.gr, mdist)) Silhouette of 150 units in 15 clusters from silhouette.default(x = 50 - k.gr, dist = mdist) : Cluster sizes, ids = (35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49), and average silhouette widths: 10 11 13 16 3 8 11 10 0.6590274 0.4297258 0.5866228 0.4064279 0.7444822 0.6705138 0.5171407 0.5871805 18 1 7 9 3 11 19 0.6616381 0.0000000 0.1669077 0.4525348 0.3959926 0.6171802 0.5419530 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3859 0.6211 0.5335 0.7478 0.8551 > stopifnot(identical(si.p[,3], si[,3]), + identical(si.p[, 1:2], 50 - si[, 1:2])) > > # the widths: > silh.wid [1] NA 0.9518406 0.8055770 0.6639850 0.5371742 0.4967654 0.4720384 [8] 0.4633064 0.4858965 0.5207776 0.5243911 0.4920638 0.5138220 0.5217026 [15] 0.5335255 > #select the number of k clusters with the largest si value : > (myk <- which.min(silh.wid)) # -> 8 (here) [1] 8 > > postscript(file="silhouette-ex.ps") > ## MM: plot to see how the decision is made > plot(silh.wid, type = 'b', col= "blue", xlab = "k") > axis(1, at=myk, col.axis= "red", font.axis= 2) > > ##--- PAM()'s silhouette should give same as silh*.default()! > Eq <- function(x,y, tol = 1e-12) x == y | abs(x - y) < tol * abs((x+y)/2) > > for(k in 2:40) { + cat("\n", k,":\n==\n") + p.k <- pam(mdist, k = k) + k.gr <- p.k$clustering + si.p <- silhouette(p.k) + si.g <- silhouette(k.gr, mdist) + ## since the obs.order may differ (within cluster): + si.g <- si.g[ as.integer(rownames(si.p)), ] + cat("grouping table: "); print(table(k.gr)) + if(!isTRUE(all.equal(c(si.g), c(si.p)))) { + cat("silhouettes differ:") + if(any(neq <- !Eq(si.g[,3], si.p[,3]))) { + cat("\n") + print( cbind(si.p[], si.g[,2:3])[ neq, ] ) + } else cat(" -- but not in col.3 !\n") + } + } 2 : == grouping table: k.gr 1 2 50 100 3 : == grouping table: k.gr 1 2 3 50 50 50 4 : == grouping table: k.gr 1 2 3 4 50 43 37 20 5 : == grouping table: k.gr 1 2 3 4 5 50 25 35 20 20 6 : == grouping table: k.gr 1 2 3 4 5 6 33 17 25 35 20 20 7 : == grouping table: k.gr 1 2 3 4 5 6 7 33 17 17 14 18 31 20 8 : == grouping table: k.gr 1 2 3 4 5 6 7 8 21 13 16 17 14 18 31 20 9 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 21 13 16 12 20 11 19 17 21 10 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 21 13 16 18 10 15 14 7 16 20 11 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 21 13 16 19 10 14 7 6 15 13 16 12 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 21 13 16 17 10 12 9 3 5 15 13 16 13 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 21 12 16 1 18 11 12 9 3 15 13 4 15 14 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 20 10 7 13 18 10 12 9 3 7 10 13 4 14 15 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 20 11 5 13 1 18 10 12 9 3 7 10 13 4 14 16 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 20 11 5 13 1 12 8 9 11 9 3 7 10 13 4 14 17 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 20 11 5 13 1 12 8 7 9 10 3 3 9 13 4 13 9 18 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 11 5 9 4 1 12 8 7 9 10 3 3 9 13 4 13 9 19 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 11 5 9 4 1 10 8 8 9 8 3 3 9 13 3 4 13 9 20 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 20 11 5 9 4 1 10 8 8 9 8 3 3 9 12 3 4 6 9 8 21 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 20 11 5 9 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5 22 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5 23 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 24 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 15 10 5 10 3 3 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 25 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 26 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 7 3 4 6 8 8 4 5 1 27 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 5 9 11 7 2 3 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 5 27 1 28 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 27 28 5 1 29 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 8 27 28 29 4 5 1 30 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 27 28 29 30 8 4 5 1 31 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 7 27 28 29 30 31 7 4 5 1 2 32 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 27 28 29 30 31 32 7 7 4 5 1 2 33 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 1 6 27 28 29 30 31 32 33 7 3 7 4 5 1 2 34 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 7 7 3 2 1 1 27 28 29 30 31 32 33 34 6 7 3 7 4 5 1 2 35 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 5 7 3 2 1 1 27 28 29 30 31 32 33 34 35 6 5 3 7 4 4 5 1 2 36 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 6 8 2 2 5 7 1 3 2 1 27 28 29 30 31 32 33 34 35 36 1 6 5 3 7 4 4 5 1 2 37 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 8 2 2 5 7 1 3 2 27 28 29 30 31 32 33 34 35 36 37 1 1 6 5 3 7 4 5 1 3 2 38 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 5 2 2 5 3 7 1 3 27 28 29 30 31 32 33 34 35 36 37 38 2 1 1 6 5 3 7 4 5 1 3 2 39 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 1 27 28 29 30 31 32 33 34 35 36 37 38 39 3 2 1 1 6 5 3 7 4 5 1 3 2 40 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 5 4 3 7 10 6 2 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 27 28 29 30 31 32 33 34 35 36 37 38 39 40 1 3 2 1 1 6 5 3 7 4 5 1 3 2 > > > ## "pathological" case where a_i == b_i == 0 : > D6 <- structure(c(0, 0, 0, 0.4, 1, 0.05, 1, 1, 0, 1, 1, 0, 0.25, 1, 1), + Labels = LETTERS[1:6], Size = 6, call = as.name("manually"), + class = "dist", Diag = FALSE, Upper = FALSE) > D6 A B C D E B 0.00 C 0.00 0.05 D 0.00 1.00 1.00 E 0.40 1.00 1.00 0.25 F 1.00 0.00 0.00 1.00 1.00 > kl6 <- c(1,1, 2,2, 3,3) > silhouette(kl6, D6)# had one NaN cluster neighbor sil_width [1,] 1 2 0.000 [2,] 1 3 1.000 [3,] 2 1 -0.975 [4,] 2 1 -0.500 [5,] 3 2 -0.375 [6,] 3 1 -0.500 attr(,"Ordered") [1] FALSE attr(,"call") silhouette.default(x = kl6, dist = D6) attr(,"class") [1] "silhouette" > summary(silhouette(kl6, D6)) Silhouette of 6 units in 3 clusters from silhouette.default(x = kl6, dist = D6) : Cluster sizes and average silhouette widths: 2 2 2 0.5000 -0.7375 -0.4375 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.97500 -0.50000 -0.43750 -0.22500 -0.09375 1.00000 > plot(silhouette(kl6, D6))# gives error in earlier cluster versions > > dev.off() pdf 2 > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 3.453 0.03 3.503 0 0 > > cluster/tests/silhouette-default.R0000644000176000001440000000510011626747420017064 0ustar ripleyusers## This came from a bug report on R-help by ge yreyt ## Date: Mon, 9 Jun 2003 16:06:53 -0400 (EDT) library(cluster) if(FALSE) # manual testing library(cluster, lib="~/R/Pkgs/cluster.Rcheck") data(iris) .proctime00 <- proc.time() mdist <- as.dist(1 - cor(t(iris[,1:4])))#dissimlarity ## this is always the same: hc <- diana(mdist, diss = TRUE, stand = FALSE) maxk <- 15 # at most 15 clusters silh.wid <- numeric(maxk) # myind[k] := the silh.value for k clusters silh.wid[1] <- NA # 1-cluster: silhouette not defined op <- par(mfrow = c(4,4), mar = .1+ c(2,1,2,1), mgp=c(1.5, .6,0)) for(k in 2:maxk) { cat("\n", k,":\n==\n") k.gr <- cutree(as.hclust(hc), k = k) cat("grouping table: "); print(table(k.gr)) si <- silhouette(k.gr, mdist) cat("silhouette:\n"); print(summary(si)) plot(si, main = paste("k =",k), col = 2:(k+1), do.n.k=FALSE, do.clus.stat=FALSE) silh.wid[k] <- summary(si)$avg.width ## === } par(op) summary(si.p <- silhouette(50 - k.gr, mdist)) stopifnot(identical(si.p[,3], si[,3]), identical(si.p[, 1:2], 50 - si[, 1:2])) # the widths: silh.wid #select the number of k clusters with the largest si value : (myk <- which.min(silh.wid)) # -> 8 (here) postscript(file="silhouette-ex.ps") ## MM: plot to see how the decision is made plot(silh.wid, type = 'b', col= "blue", xlab = "k") axis(1, at=myk, col.axis= "red", font.axis= 2) ##--- PAM()'s silhouette should give same as silh*.default()! Eq <- function(x,y, tol = 1e-12) x == y | abs(x - y) < tol * abs((x+y)/2) for(k in 2:40) { cat("\n", k,":\n==\n") p.k <- pam(mdist, k = k) k.gr <- p.k$clustering si.p <- silhouette(p.k) si.g <- silhouette(k.gr, mdist) ## since the obs.order may differ (within cluster): si.g <- si.g[ as.integer(rownames(si.p)), ] cat("grouping table: "); print(table(k.gr)) if(!isTRUE(all.equal(c(si.g), c(si.p)))) { cat("silhouettes differ:") if(any(neq <- !Eq(si.g[,3], si.p[,3]))) { cat("\n") print( cbind(si.p[], si.g[,2:3])[ neq, ] ) } else cat(" -- but not in col.3 !\n") } } ## "pathological" case where a_i == b_i == 0 : D6 <- structure(c(0, 0, 0, 0.4, 1, 0.05, 1, 1, 0, 1, 1, 0, 0.25, 1, 1), Labels = LETTERS[1:6], Size = 6, call = as.name("manually"), class = "dist", Diag = FALSE, Upper = FALSE) D6 kl6 <- c(1,1, 2,2, 3,3) silhouette(kl6, D6)# had one NaN summary(silhouette(kl6, D6)) plot(silhouette(kl6, D6))# gives error in earlier cluster versions dev.off() ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/tests/pam.Rout.save0000644000176000001440000014463511712175353015534 0ustar ripleyusers R version 2.14.1 Patched (2012-01-24 r58194) Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > ## Compare on these: > nms <- c("clustering", "objective", "isolation", "clusinfo", "silinfo") > nm2 <- c("medoids", "id.med", nms) > nm3 <- nm2[- pmatch("obj", nm2)] > > (x <- x0 <- cbind(V1 = (-3:4)^2, V2 = c(0:6,NA), V3 = c(1,2,NA,7,NA,8:9,8))) V1 V2 V3 [1,] 9 0 1 [2,] 4 1 2 [3,] 1 2 NA [4,] 0 3 7 [5,] 1 4 NA [6,] 4 5 8 [7,] 9 6 9 [8,] 16 NA 8 > (px <- pam(x,2, metric="manhattan")) Medoids: ID V1 V2 V3 [1,] 2 4 1 2 [2,] 6 4 5 8 Clustering vector: [1] 1 1 1 2 2 2 2 2 Objective function: build swap 6.375 6.375 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > stopifnot(identical(x,x0))# DUP=FALSE .. > pd <- pam(dist(x,"manhattan"), 2) > px2 <- pam(x,2, metric="manhattan", keep.diss=FALSE, keep.data=FALSE) > pdC <- pam(x,2, metric="manhattan", cluster.only = TRUE) > > stopifnot(identical(px[nms], pd[nms]), + identical(px[nms], px2[nms]), + identical(pdC, px2$clustering), + ## and for default dist "euclidean": + identical(pam(x, 2)[nms], + pam(dist(x),2)[nms]) + ) > > set.seed(253) > ## generate 250 objects, divided into 2 clusters. > x <- rbind(cbind(rnorm(120, 0,8), rnorm(120, 0,8)), + cbind(rnorm(130,50,8), rnorm(130,10,8))) > > .proctime00 <- proc.time() > > summary(px2 <- pam(x, 2)) Medoids: ID [1,] 61 -0.7697828 -0.2330187 [2,] 163 49.1392167 9.4097259 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [112] 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Objective function: build swap 13.25843 10.20817 Numerical information per cluster: size max_diss av_diss diameter separation [1,] 120 31.04843 10.18584 53.22082 9.419035 [2,] 130 26.94337 10.22878 47.86442 9.419035 Isolated clusters: L-clusters: character(0) L*-clusters: character(0) Silhouette plot information: cluster neighbor sil_width 117 1 2 0.80638966 75 1 2 0.80600824 81 1 2 0.80556624 107 1 2 0.80535252 6 1 2 0.80526675 100 1 2 0.80385505 68 1 2 0.80369702 113 1 2 0.80331774 61 1 2 0.80315322 57 1 2 0.80313945 12 1 2 0.80161573 59 1 2 0.80047745 82 1 2 0.79630964 67 1 2 0.79559589 63 1 2 0.79488886 47 1 2 0.79458809 21 1 2 0.79379540 9 1 2 0.79343081 95 1 2 0.79332153 4 1 2 0.79136081 3 1 2 0.79130879 39 1 2 0.79052367 120 1 2 0.78877423 90 1 2 0.78767224 85 1 2 0.78588359 106 1 2 0.78504452 92 1 2 0.78303000 83 1 2 0.78245915 19 1 2 0.78228359 14 1 2 0.78139236 10 1 2 0.77825678 49 1 2 0.77597087 64 1 2 0.77482761 44 1 2 0.77397394 89 1 2 0.77297318 119 1 2 0.77238705 108 1 2 0.77137189 104 1 2 0.76871378 32 1 2 0.76856251 115 1 2 0.76843312 27 1 2 0.76811698 88 1 2 0.76810713 109 1 2 0.76681303 62 1 2 0.76655954 36 1 2 0.76547988 66 1 2 0.76535606 74 1 2 0.76491406 26 1 2 0.76441455 24 1 2 0.76436188 65 1 2 0.76381352 40 1 2 0.76061109 52 1 2 0.75748679 54 1 2 0.75746436 13 1 2 0.75594073 56 1 2 0.75353784 96 1 2 0.75268786 116 1 2 0.75267215 110 1 2 0.75266614 112 1 2 0.75150872 78 1 2 0.75083708 7 1 2 0.74905187 86 1 2 0.74190424 18 1 2 0.74162144 111 1 2 0.74085474 69 1 2 0.74044653 76 1 2 0.73911707 50 1 2 0.73847075 93 1 2 0.73616384 31 1 2 0.73462007 33 1 2 0.73455252 43 1 2 0.73396232 102 1 2 0.72930751 118 1 2 0.72778023 15 1 2 0.72588122 53 1 2 0.72542363 8 1 2 0.72535191 77 1 2 0.72467809 16 1 2 0.72446952 48 1 2 0.72331213 105 1 2 0.72325095 37 1 2 0.72055248 101 1 2 0.71783562 22 1 2 0.71217552 23 1 2 0.71078375 84 1 2 0.70573352 17 1 2 0.70221946 38 1 2 0.69947240 2 1 2 0.69718780 98 1 2 0.69601237 1 1 2 0.69373841 35 1 2 0.69179546 70 1 2 0.69074915 28 1 2 0.68434091 97 1 2 0.68351978 5 1 2 0.67662675 72 1 2 0.67420722 34 1 2 0.67315267 11 1 2 0.67226046 103 1 2 0.67188668 87 1 2 0.67172802 58 1 2 0.67090513 46 1 2 0.66835116 60 1 2 0.66565445 80 1 2 0.65983842 73 1 2 0.65093947 55 1 2 0.64709226 20 1 2 0.64439401 45 1 2 0.63403361 51 1 2 0.63303101 42 1 2 0.62906268 94 1 2 0.60916406 91 1 2 0.59905996 41 1 2 0.57245485 29 1 2 0.55594781 99 1 2 0.55035955 79 1 2 0.50808544 71 1 2 0.46663954 25 1 2 0.43797346 114 1 2 0.16645003 30 1 2 0.08928664 121 2 1 0.80353953 137 2 1 0.80253721 146 2 1 0.80106653 173 2 1 0.80039417 216 2 1 0.79969919 124 2 1 0.79964913 163 2 1 0.79901674 157 2 1 0.79779188 242 2 1 0.79744315 227 2 1 0.79708130 207 2 1 0.79653829 130 2 1 0.79574204 188 2 1 0.79496670 250 2 1 0.79302877 145 2 1 0.79190501 126 2 1 0.79156003 166 2 1 0.79068795 222 2 1 0.78986170 232 2 1 0.78839216 176 2 1 0.78819086 198 2 1 0.78782877 225 2 1 0.78747329 230 2 1 0.78689375 205 2 1 0.78683641 160 2 1 0.78643596 150 2 1 0.78484046 136 2 1 0.78455577 228 2 1 0.78198238 206 2 1 0.78137390 152 2 1 0.78044944 200 2 1 0.77843458 149 2 1 0.77822272 221 2 1 0.77758324 226 2 1 0.77611981 129 2 1 0.77531368 199 2 1 0.77491451 154 2 1 0.77136276 241 2 1 0.77076783 179 2 1 0.77010597 174 2 1 0.76893758 214 2 1 0.76776510 181 2 1 0.76763087 213 2 1 0.76683151 215 2 1 0.76639087 236 2 1 0.76637552 218 2 1 0.76563050 182 2 1 0.76450873 219 2 1 0.76370712 208 2 1 0.76090426 151 2 1 0.75957536 164 2 1 0.75914844 248 2 1 0.75849775 224 2 1 0.75826151 168 2 1 0.75782023 189 2 1 0.75555083 128 2 1 0.75550519 125 2 1 0.75510766 177 2 1 0.75128941 147 2 1 0.75086382 158 2 1 0.75029192 245 2 1 0.74993652 186 2 1 0.74741247 165 2 1 0.74681005 156 2 1 0.74478894 122 2 1 0.74315425 247 2 1 0.74107328 220 2 1 0.74054057 183 2 1 0.73818743 184 2 1 0.73743259 169 2 1 0.73712431 180 2 1 0.73419669 240 2 1 0.73390938 134 2 1 0.73382823 190 2 1 0.73379720 217 2 1 0.73311931 171 2 1 0.73110365 143 2 1 0.72986022 153 2 1 0.72891371 223 2 1 0.72887340 238 2 1 0.72789416 175 2 1 0.72311665 138 2 1 0.72290131 235 2 1 0.72157157 237 2 1 0.71591233 132 2 1 0.71549875 204 2 1 0.71381083 201 2 1 0.71263881 170 2 1 0.70812568 191 2 1 0.70747428 243 2 1 0.70588929 193 2 1 0.70499170 141 2 1 0.70489885 161 2 1 0.70303117 249 2 1 0.69300988 229 2 1 0.69231982 196 2 1 0.69162302 211 2 1 0.69128644 246 2 1 0.68757678 159 2 1 0.68619850 133 2 1 0.68605444 194 2 1 0.68538064 155 2 1 0.68278455 234 2 1 0.68202095 127 2 1 0.68111027 144 2 1 0.67559517 131 2 1 0.65959281 139 2 1 0.65895024 209 2 1 0.65844942 148 2 1 0.65180336 185 2 1 0.64989675 212 2 1 0.63954685 192 2 1 0.63470144 123 2 1 0.63005333 202 2 1 0.61735843 135 2 1 0.61493992 210 2 1 0.60680456 140 2 1 0.58410004 187 2 1 0.58193543 239 2 1 0.57088679 203 2 1 0.56761998 244 2 1 0.55321123 231 2 1 0.55043439 197 2 1 0.52364031 195 2 1 0.51955678 142 2 1 0.47466260 162 2 1 0.46155841 172 2 1 0.45167576 178 2 1 0.42686872 233 2 1 0.37013099 167 2 1 0.32442373 Average silhouette width per cluster: [1] 0.7196104 0.7148520 Average silhouette width of total data set: [1] 0.717136 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > pdx <- pam(dist(x), 2) > all.equal(px2[nms], pdx[nms], tol = 1e-12) ## TRUE [1] TRUE > pdxK <- pam(dist(x), 2, keep.diss = TRUE) > stopifnot(identical(pdx[nm2], pdxK[nm2])) > > spdx <- silhouette(pdx) > summary(spdx) Silhouette of 250 units in 2 clusters from pam(x = dist(x), k = 2) : Cluster sizes and average silhouette widths: 120 130 0.7196104 0.7148520 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.08929 0.69140 0.74400 0.71710 0.77810 0.80640 > spdx cluster neighbor sil_width 117 1 2 0.80638966 75 1 2 0.80600824 81 1 2 0.80556624 107 1 2 0.80535252 6 1 2 0.80526675 100 1 2 0.80385505 68 1 2 0.80369702 113 1 2 0.80331774 61 1 2 0.80315322 57 1 2 0.80313945 12 1 2 0.80161573 59 1 2 0.80047745 82 1 2 0.79630964 67 1 2 0.79559589 63 1 2 0.79488886 47 1 2 0.79458809 21 1 2 0.79379540 9 1 2 0.79343081 95 1 2 0.79332153 4 1 2 0.79136081 3 1 2 0.79130879 39 1 2 0.79052367 120 1 2 0.78877423 90 1 2 0.78767224 85 1 2 0.78588359 106 1 2 0.78504452 92 1 2 0.78303000 83 1 2 0.78245915 19 1 2 0.78228359 14 1 2 0.78139236 10 1 2 0.77825678 49 1 2 0.77597087 64 1 2 0.77482761 44 1 2 0.77397394 89 1 2 0.77297318 119 1 2 0.77238705 108 1 2 0.77137189 104 1 2 0.76871378 32 1 2 0.76856251 115 1 2 0.76843312 27 1 2 0.76811698 88 1 2 0.76810713 109 1 2 0.76681303 62 1 2 0.76655954 36 1 2 0.76547988 66 1 2 0.76535606 74 1 2 0.76491406 26 1 2 0.76441455 24 1 2 0.76436188 65 1 2 0.76381352 40 1 2 0.76061109 52 1 2 0.75748679 54 1 2 0.75746436 13 1 2 0.75594073 56 1 2 0.75353784 96 1 2 0.75268786 116 1 2 0.75267215 110 1 2 0.75266614 112 1 2 0.75150872 78 1 2 0.75083708 7 1 2 0.74905187 86 1 2 0.74190424 18 1 2 0.74162144 111 1 2 0.74085474 69 1 2 0.74044653 76 1 2 0.73911707 50 1 2 0.73847075 93 1 2 0.73616384 31 1 2 0.73462007 33 1 2 0.73455252 43 1 2 0.73396232 102 1 2 0.72930751 118 1 2 0.72778023 15 1 2 0.72588122 53 1 2 0.72542363 8 1 2 0.72535191 77 1 2 0.72467809 16 1 2 0.72446952 48 1 2 0.72331213 105 1 2 0.72325095 37 1 2 0.72055248 101 1 2 0.71783562 22 1 2 0.71217552 23 1 2 0.71078375 84 1 2 0.70573352 17 1 2 0.70221946 38 1 2 0.69947240 2 1 2 0.69718780 98 1 2 0.69601237 1 1 2 0.69373841 35 1 2 0.69179546 70 1 2 0.69074915 28 1 2 0.68434091 97 1 2 0.68351978 5 1 2 0.67662675 72 1 2 0.67420722 34 1 2 0.67315267 11 1 2 0.67226046 103 1 2 0.67188668 87 1 2 0.67172802 58 1 2 0.67090513 46 1 2 0.66835116 60 1 2 0.66565445 80 1 2 0.65983842 73 1 2 0.65093947 55 1 2 0.64709226 20 1 2 0.64439401 45 1 2 0.63403361 51 1 2 0.63303101 42 1 2 0.62906268 94 1 2 0.60916406 91 1 2 0.59905996 41 1 2 0.57245485 29 1 2 0.55594781 99 1 2 0.55035955 79 1 2 0.50808544 71 1 2 0.46663954 25 1 2 0.43797346 114 1 2 0.16645003 30 1 2 0.08928664 121 2 1 0.80353953 137 2 1 0.80253721 146 2 1 0.80106653 173 2 1 0.80039417 216 2 1 0.79969919 124 2 1 0.79964913 163 2 1 0.79901674 157 2 1 0.79779188 242 2 1 0.79744315 227 2 1 0.79708130 207 2 1 0.79653829 130 2 1 0.79574204 188 2 1 0.79496670 250 2 1 0.79302877 145 2 1 0.79190501 126 2 1 0.79156003 166 2 1 0.79068795 222 2 1 0.78986170 232 2 1 0.78839216 176 2 1 0.78819086 198 2 1 0.78782877 225 2 1 0.78747329 230 2 1 0.78689375 205 2 1 0.78683641 160 2 1 0.78643596 150 2 1 0.78484046 136 2 1 0.78455577 228 2 1 0.78198238 206 2 1 0.78137390 152 2 1 0.78044944 200 2 1 0.77843458 149 2 1 0.77822272 221 2 1 0.77758324 226 2 1 0.77611981 129 2 1 0.77531368 199 2 1 0.77491451 154 2 1 0.77136276 241 2 1 0.77076783 179 2 1 0.77010597 174 2 1 0.76893758 214 2 1 0.76776510 181 2 1 0.76763087 213 2 1 0.76683151 215 2 1 0.76639087 236 2 1 0.76637552 218 2 1 0.76563050 182 2 1 0.76450873 219 2 1 0.76370712 208 2 1 0.76090426 151 2 1 0.75957536 164 2 1 0.75914844 248 2 1 0.75849775 224 2 1 0.75826151 168 2 1 0.75782023 189 2 1 0.75555083 128 2 1 0.75550519 125 2 1 0.75510766 177 2 1 0.75128941 147 2 1 0.75086382 158 2 1 0.75029192 245 2 1 0.74993652 186 2 1 0.74741247 165 2 1 0.74681005 156 2 1 0.74478894 122 2 1 0.74315425 247 2 1 0.74107328 220 2 1 0.74054057 183 2 1 0.73818743 184 2 1 0.73743259 169 2 1 0.73712431 180 2 1 0.73419669 240 2 1 0.73390938 134 2 1 0.73382823 190 2 1 0.73379720 217 2 1 0.73311931 171 2 1 0.73110365 143 2 1 0.72986022 153 2 1 0.72891371 223 2 1 0.72887340 238 2 1 0.72789416 175 2 1 0.72311665 138 2 1 0.72290131 235 2 1 0.72157157 237 2 1 0.71591233 132 2 1 0.71549875 204 2 1 0.71381083 201 2 1 0.71263881 170 2 1 0.70812568 191 2 1 0.70747428 243 2 1 0.70588929 193 2 1 0.70499170 141 2 1 0.70489885 161 2 1 0.70303117 249 2 1 0.69300988 229 2 1 0.69231982 196 2 1 0.69162302 211 2 1 0.69128644 246 2 1 0.68757678 159 2 1 0.68619850 133 2 1 0.68605444 194 2 1 0.68538064 155 2 1 0.68278455 234 2 1 0.68202095 127 2 1 0.68111027 144 2 1 0.67559517 131 2 1 0.65959281 139 2 1 0.65895024 209 2 1 0.65844942 148 2 1 0.65180336 185 2 1 0.64989675 212 2 1 0.63954685 192 2 1 0.63470144 123 2 1 0.63005333 202 2 1 0.61735843 135 2 1 0.61493992 210 2 1 0.60680456 140 2 1 0.58410004 187 2 1 0.58193543 239 2 1 0.57088679 203 2 1 0.56761998 244 2 1 0.55321123 231 2 1 0.55043439 197 2 1 0.52364031 195 2 1 0.51955678 142 2 1 0.47466260 162 2 1 0.46155841 172 2 1 0.45167576 178 2 1 0.42686872 233 2 1 0.37013099 167 2 1 0.32442373 attr(,"Ordered") [1] TRUE attr(,"call") pam(x = dist(x), k = 2) attr(,"class") [1] "silhouette" > postscript("pam-tst.ps") > if(FALSE) + plot(spdx)# the silhouette > ## is now identical : > plot(pdx)# failed in 1.7.0 -- now only does silhouette > > par(mfrow = 2:1) > ## new 'dist' argument for clusplot(): > plot(pdx, dist=dist(x)) > ## but this should work automagically (via eval()) as well: > plot(pdx) > ## or this > clusplot(pdx) > > data(ruspini) > summary(pr4 <- pam(ruspini, 4)) Medoids: ID x y 10 10 19 65 32 32 44 149 52 52 99 119 70 70 69 21 Clustering vector: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 Objective function: build swap 17.22898 11.48637 Numerical information per cluster: size max_diss av_diss diameter separation [1,] 20 24.04163 12.55362 40.24922 40.49691 [2,] 23 26.92582 10.44238 36.61967 24.04163 [3,] 17 33.97058 13.84800 47.63402 24.04163 [4,] 15 17.02939 8.98767 27.07397 40.49691 Isolated clusters: L-clusters: character(0) L*-clusters: [1] 1 4 Silhouette plot information: cluster neighbor sil_width 10 1 4 0.8056096 6 1 4 0.7954977 9 1 4 0.7923048 11 1 4 0.7831672 8 1 2 0.7811793 12 1 4 0.7658171 3 1 4 0.7587961 14 1 4 0.7569107 2 1 4 0.7456150 16 1 4 0.7436018 13 1 4 0.7398841 4 1 2 0.7361533 18 1 4 0.7080079 15 1 4 0.7006854 19 1 4 0.7000938 1 1 4 0.6798381 5 1 4 0.6646571 20 1 4 0.6619626 17 1 4 0.6148541 7 1 2 0.5900575 26 2 3 0.8357433 32 2 3 0.8332753 27 2 3 0.8290271 25 2 3 0.8285547 28 2 3 0.8192636 35 2 3 0.8186309 33 2 3 0.8175087 23 2 3 0.8089969 22 2 3 0.8025389 34 2 3 0.8013310 31 2 3 0.7949677 36 2 3 0.7943536 24 2 3 0.7930770 29 2 3 0.7897346 30 2 3 0.7892027 21 2 3 0.7698024 37 2 3 0.7684502 39 2 3 0.7631648 38 2 3 0.7438848 40 2 3 0.7083130 42 2 3 0.5291270 43 2 3 0.4931623 41 2 3 0.4290814 54 3 2 0.7741745 57 3 2 0.7703455 55 3 2 0.7641810 50 3 2 0.7619943 52 3 2 0.7616220 56 3 2 0.7575313 59 3 2 0.7327828 49 3 2 0.7317002 51 3 2 0.7209864 60 3 2 0.7206840 58 3 2 0.7019611 53 3 2 0.6775322 45 3 2 0.5974787 46 3 2 0.5740823 47 3 2 0.4835635 48 3 2 0.4247331 44 3 2 0.4196093 70 4 1 0.8548947 67 4 1 0.8527439 65 4 1 0.8503105 69 4 1 0.8391810 71 4 1 0.8381065 66 4 1 0.8229841 62 4 1 0.8153092 64 4 1 0.8061254 73 4 1 0.7950213 63 4 1 0.7795369 72 4 1 0.7748121 61 4 1 0.7701103 68 4 1 0.7620559 74 4 1 0.7596815 75 4 1 0.7425538 Average silhouette width per cluster: [1] 0.7262347 0.7548344 0.6691154 0.8042285 Average silhouette width of total data set: [1] 0.737657 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 1.4142 40.1060 75.5910 71.5380 99.1690 154.5000 Metric : euclidean Number of objects : 75 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pr3 <- pam(ruspini, 3)) Medoids: ID x y 17 17 30 52 32 32 44 149 52 52 99 119 Clustering vector: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 Objective function: build swap 25.68229 21.59293 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pr5 <- pam(ruspini, 5)) Medoids: ID x y 10 10 19 65 32 32 44 149 52 52 99 119 47 47 78 94 70 70 69 21 Clustering vector: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 4 4 4 3 3 3 3 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 3 3 3 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 Objective function: build swap 12.09864 10.39579 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > > data(votes.repub) > summary(pv3 <- pam(votes.repub, 3)) Medoids: ID X1856 X1860 X1864 X1868 X1872 X1876 X1880 X1884 X1888 X1892 X1896 Alabama 1 NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 Alaska 2 NA NA NA NA NA NA NA NA NA NA NA New Mexico 31 NA NA NA NA NA NA NA NA NA NA NA X1900 X1904 X1908 X1912 X1916 X1920 X1924 X1928 X1932 X1936 X1940 Alabama 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 Alaska NA NA NA NA NA NA NA NA NA NA NA New Mexico NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.50 43.28 X1944 X1948 X1952 X1956 X1960 X1964 X1968 X1972 X1976 Alabama 18.20 19.04 35.02 39.39 41.75 69.5 14.0 72.4 43.48 Alaska NA NA NA NA 50.94 34.1 45.3 58.1 62.91 New Mexico 46.44 42.93 55.39 57.81 49.41 41.0 51.8 61.0 51.04 Clustering vector: Alabama Alaska Arizona Arkansas California 1 2 3 1 2 Colorado Connecticut Delaware Florida Georgia 2 2 3 1 1 Hawaii Idaho Illinois Indiana Iowa 2 3 2 3 3 Kansas Kentucky Louisiana Maine Maryland 2 3 1 2 3 Massachusetts Michigan Minnesota Mississippi Missouri 3 2 3 1 3 Montana Nebraska Nevada New Hampshire New Jersey 3 3 2 2 2 New Mexico New York North Carolina North Dakota Ohio 3 3 3 2 3 Oklahoma Oregon Pennsylvania Rhode Island South Carolina 3 3 2 3 2 South Dakota Tennessee Texas Utah Vermont 3 3 2 3 2 Virginia Washington West Virginia Wisconsin Wyoming 2 3 3 3 3 Objective function: build swap 38.32548 38.32548 Numerical information per cluster: size max_diss av_diss diameter separation [1,] 6 78.92731 51.59134 116.7048 50.14189 [2,] 18 86.54675 38.47068 271.2455 19.42184 [3,] 26 60.03879 35.16361 124.8324 19.42184 Isolated clusters: L-clusters: character(0) L*-clusters: character(0) Silhouette plot information: cluster neighbor sil_width Louisiana 1 3 0.54689535 Alabama 1 3 0.52839272 Georgia 1 3 0.52730253 Mississippi 1 2 0.52454810 Florida 1 3 0.25211631 Arkansas 1 3 0.24131701 Alaska 2 3 0.15699268 Hawaii 2 3 0.08479842 Vermont 2 3 -0.02620975 Maine 2 3 -0.03284950 Michigan 2 3 -0.11524982 Pennsylvania 2 3 -0.15341477 New Hampshire 2 3 -0.17099889 Connecticut 2 3 -0.19095000 New Jersey 2 3 -0.19281567 Kansas 2 3 -0.19719316 California 2 3 -0.24006293 Illinois 2 3 -0.25236336 North Dakota 2 3 -0.25464430 Virginia 2 3 -0.26262534 Nevada 2 3 -0.27016336 Colorado 2 3 -0.27885043 Texas 2 1 -0.47297583 South Carolina 2 1 -0.50899710 New Mexico 3 2 0.39555584 Washington 3 2 0.32989454 Oklahoma 3 2 0.30953823 Wyoming 3 2 0.30163169 Idaho 3 2 0.29915132 Montana 3 2 0.29105494 Missouri 3 2 0.29038462 Oregon 3 2 0.27710695 Maryland 3 2 0.27437520 West Virginia 3 2 0.27089938 Utah 3 2 0.26964380 Tennessee 3 2 0.26846440 Arizona 3 2 0.25968564 Delaware 3 2 0.25920434 Kentucky 3 2 0.25868341 South Dakota 3 2 0.25615670 Indiana 3 2 0.25031548 Wisconsin 3 2 0.21808013 Ohio 3 2 0.21477474 Nebraska 3 2 0.20965953 North Carolina 3 2 0.19201537 Minnesota 3 2 0.18955165 New York 3 2 0.18820394 Iowa 3 2 0.17296046 Rhode Island 3 2 0.12599915 Massachusetts 3 2 0.12106770 Average silhouette width per cluster: [1] 0.4367620 -0.1876985 0.2497715 Average silhouette width of total data set: [1] 0.1147212 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 17.199 48.343 64.681 82.227 105.490 281.950 Metric : euclidean Number of objects : 50 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pv4 <- pam(votes.repub, 4)) Medoids: ID X1856 X1860 X1864 X1868 X1872 X1876 X1880 X1884 X1888 X1892 X1896 Alabama 1 NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 Alaska 2 NA NA NA NA NA NA NA NA NA NA NA New Mexico 31 NA NA NA NA NA NA NA NA NA NA NA Iowa 15 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.60 55.46 X1900 X1904 X1908 X1912 X1916 X1920 X1924 X1928 X1932 X1936 X1940 Alabama 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 Alaska NA NA NA NA NA NA NA NA NA NA NA New Mexico NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.50 43.28 Iowa 57.99 63.37 55.62 24.30 54.06 70.91 55.06 61.80 39.98 42.70 52.03 X1944 X1948 X1952 X1956 X1960 X1964 X1968 X1972 X1976 Alabama 18.20 19.04 35.02 39.39 41.75 69.5 14.0 72.4 43.48 Alaska NA NA NA NA 50.94 34.1 45.3 58.1 62.91 New Mexico 46.44 42.93 55.39 57.81 49.41 41.0 51.8 61.0 51.04 Iowa 51.99 47.58 63.76 59.06 56.71 38.1 53.0 57.6 50.51 Clustering vector: Alabama Alaska Arizona Arkansas California 1 2 3 1 2 Colorado Connecticut Delaware Florida Georgia 2 2 3 1 1 Hawaii Idaho Illinois Indiana Iowa 2 3 4 3 4 Kansas Kentucky Louisiana Maine Maryland 4 3 1 2 3 Massachusetts Michigan Minnesota Mississippi Missouri 4 2 4 1 3 Montana Nebraska Nevada New Hampshire New Jersey 3 4 2 2 2 New Mexico New York North Carolina North Dakota Ohio 3 3 3 4 4 Oklahoma Oregon Pennsylvania Rhode Island South Carolina 3 3 4 4 2 South Dakota Tennessee Texas Utah Vermont 4 3 2 3 2 Virginia Washington West Virginia Wisconsin Wyoming 2 3 3 4 3 Objective function: build swap 35.84182 35.84182 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pv6 <- pam(votes.repub, 6, trace = 3)) C pam(): computing 1226 dissimilarities: [Ok] pam()'s bswap(*, s=281.951, pamonce=0): build 6 medoids: new repr. 2 new repr. 1 new repr. 31 new repr. 15 new repr. 46 new repr. 40 after build: medoids are 1 2 15 31 40 46 and min.dist dysma[1:n] are 0 0 37.7 56 35.8 28.5 28.6 31.7 54.1 48.2 51.7 33.2 27.3 30.5 0 35.1 25.4 60.9 36.9 26.7 48.4 28.1 33.2 63.1 21.1 28.6 37.5 35.8 29.8 31.3 0 32 29.9 35.7 30.9 35.1 27.8 35.7 50.2 0 26.2 30.2 45.2 34.1 33.8 0 28.5 35.1 34.2 28.8 swp new 10 <-> 1 old; decreasing diss. 1579.03 by -2.57067 end{bswap()}, end{cstat()} Medoids: ID X1856 X1860 X1864 X1868 X1872 X1876 X1880 X1884 X1888 X1892 Georgia 10 NA NA NA 35.72 43.77 27.94 34.33 33.84 28.33 21.80 Alaska 2 NA NA NA NA NA NA NA NA NA NA Virginia 46 0.19 1.15 NA NA 50.48 40.62 39.52 48.90 49.47 38.75 New Mexico 31 NA NA NA NA NA NA NA NA NA NA Iowa 15 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.60 South Carolina 40 NA NA NA 57.93 75.95 50.26 33.97 23.72 17.27 18.99 X1896 X1900 X1904 X1908 X1912 X1916 X1920 X1924 X1928 X1932 Georgia 36.82 28.56 18.32 31.40 4.27 7.07 28.57 18.19 43.37 7.77 Alaska NA NA NA NA NA NA NA NA NA NA Virginia 45.90 43.81 36.67 38.36 17.00 32.05 37.85 32.79 53.91 30.09 New Mexico NA NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 Iowa 55.46 57.99 63.37 55.62 24.30 54.06 70.91 55.06 61.80 39.98 South Carolina 13.51 7.04 4.63 5.97 1.06 2.43 3.90 2.21 8.54 1.89 X1936 X1940 X1944 X1948 X1952 X1956 X1960 X1964 X1968 X1972 Georgia 12.60 14.84 18.25 18.31 30.34 33.22 37.44 54.1 30.4 75.0 Alaska NA NA NA NA NA NA 50.94 34.1 45.3 58.1 Virginia 29.39 31.55 37.39 41.04 56.32 55.37 52.44 46.5 41.4 67.8 New Mexico 36.50 43.28 46.44 42.93 55.39 57.81 49.41 41.0 51.8 61.0 Iowa 42.70 52.03 51.99 47.58 63.76 59.06 56.71 38.1 53.0 57.6 South Carolina 1.43 4.37 4.46 3.78 49.28 25.18 48.76 58.9 38.1 70.8 X1976 Georgia 33.02 Alaska 62.91 Virginia 50.73 New Mexico 51.04 Iowa 50.51 South Carolina 43.54 Clustering vector: Alabama Alaska Arizona Arkansas California 1 2 3 3 2 Colorado Connecticut Delaware Florida Georgia 2 2 4 3 1 Hawaii Idaho Illinois Indiana Iowa 2 4 5 4 5 Kansas Kentucky Louisiana Maine Maryland 5 4 1 2 4 Massachusetts Michigan Minnesota Mississippi Missouri 5 2 5 6 4 Montana Nebraska Nevada New Hampshire New Jersey 4 5 2 2 2 New Mexico New York North Carolina North Dakota Ohio 4 4 3 5 5 Oklahoma Oregon Pennsylvania Rhode Island South Carolina 4 4 5 5 6 South Dakota Tennessee Texas Utah Vermont 5 3 2 4 2 Virginia Washington West Virginia Wisconsin Wyoming 3 4 4 5 4 Objective function: build swap 31.58067 31.52926 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 1.24 0.012 1.258 0 0 > > ## re-starting with medoids from pv6 shouldn't change: > pv6. <- pam(votes.repub, 6, medoids = pv6$id.med, trace = 3) C pam(): computing 1226 dissimilarities: [Ok] pam()'s bswap(*, s=281.951, pamonce=0): medoids given after build: medoids are 2 10 15 31 40 46 and min.dist dysma[1:n] are 48.2 0 37.7 56 35.8 28.5 28.6 31.7 54.1 0 51.7 33.2 27.3 30.5 0 35.1 25.4 58.3 36.9 26.7 48.4 28.1 33.2 63.1 21.1 28.6 37.5 35.8 29.8 31.3 0 32 29.9 35.7 30.9 35.1 27.8 35.7 50.2 0 26.2 30.2 45.2 34.1 33.8 0 28.5 35.1 34.2 28.8 end{bswap()}, end{cstat()} > identical(pv6[nm3], pv6.[nm3]) [1] TRUE > > ## This example seg.faulted at some point: > d.st <- data.frame(V1= c(9, 12, 12, 15, 9, 9, 13, 11, 15, 10, 13, 13, + 13, 15, 8, 13, 13, 10, 7, 9, 6, 11, 3), + V2= c(5, 9, 3, 5, 1, 1, 2, NA, 10, 1, 4, 7, + 4, NA, NA, 5, 2, 4, 3, 3, 6, 1, 1), + V3 = c(63, 41, 59, 50, 290, 226, 60, 36, 32, 121, 70, 51, + 79, 32, 42, 39, 76, 60, 56, 88, 57, 309, 254), + V4 = c(146, 43, 78, 88, 314, 149, 78, NA, 238, 153, 159, 222, + 203, NA, NA, 74, 100, 111, 9, 180, 50, 256, 107)) > dd <- daisy(d.st, stand = TRUE) > (r0 <- pam(dd, 5))# cluster 5 = { 23 } -- on single observation Medoids: ID [1,] 15 15 [2,] 8 8 [3,] 14 14 [4,] 22 22 [5,] 23 23 Clustering vector: [1] 1 2 2 3 4 4 2 2 3 2 2 2 2 3 1 2 2 2 1 1 1 4 5 Objective function: build swap 0.9368049 0.8621860 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" > ## This gave only 3 different medoids -> and seg.fault: > (r5 <- pam(dd, 5, medoids = c(1,3,20,2,5), trace = 2)) # now "fine" pam()'s bswap(*, s=8.51931, pamonce=0): medoids given after build: medoids are 1 2 3 5 20 swp new 14 <-> 2 old; decreasing diss. 29.8745 by -5.50096 swp new 15 <-> 1 old; decreasing diss. 24.3735 by -2.20162 swp new 6 <-> 20 old; decreasing diss. 22.1719 by -2.12745 swp new 8 <-> 3 old; decreasing diss. 20.0444 by -0.201608 end{bswap()}, end{cstat()} Medoids: ID [1,] 15 15 [2,] 8 8 [3,] 14 14 [4,] 5 5 [5,] 6 6 Clustering vector: [1] 1 2 2 3 4 5 2 2 3 5 2 2 2 3 1 2 2 2 1 1 1 4 5 Objective function: build swap 1.2988899 0.8627319 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" > > dev.off() null device 1 > > ##------------------------ Testing pam() with new "pamonce" argument: > > ## This is from "next version of Matrix" test-tools-1.R: > showSys.time <- function(expr) { + ## prepend 'Time' for R CMD Rdiff + st <- system.time(expr) + writeLines(paste("Time", capture.output(print(st)))) + invisible(st) + } > show2Ratios <- function(...) { + stopifnot(length(rgs <- list(...)) == 2, + nchar(ns <- names(rgs)) > 0) + r <- round(cbind(..1, ..2)[c(1,3),], 3) + dimnames(r) <- list(paste("Time ", rownames(r)), ns) + r + } > > > n <- 1000 > ## If not enough cases, all CPU times equals 0. > n <- 500 # for now, and automatic testing > > sd <- 0.5 > set.seed(13) > n2 <- as.integer(round(n * 1.5)) > x <- rbind(cbind(rnorm( n,0,sd), rnorm( n,0,sd)), + cbind(rnorm(n2,5,sd), rnorm(n2,5,sd)), + cbind(rnorm(n2,7,sd), rnorm(n2,7,sd)), + cbind(rnorm(n2,9,sd), rnorm(n2,9,sd))) > > > ## original algorithm > st0 <- showSys.time(pamx <- pam(x, 4, trace.lev=2))# 8.157 0.024 8.233 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): build 4 medoids: new repr. 1268 new repr. 414 new repr. 2153 new repr. 915 after build: medoids are 414 915 1268 2153 swp new 1793 <-> 1268 old; decreasing diss. 1862.37 by -129.13 end{bswap()}, end{cstat()} Time user system elapsed Time 1.516 0.012 1.536 > ## bswapPamOnce algorithm > st1 <- showSys.time(pamxonce <- pam(x, 4, pamonce=TRUE, trace.lev=2))# 6.122 0.024 6.181 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): build 4 medoids: new repr. 1268 new repr. 414 new repr. 2153 new repr. 915 after build: medoids are 414 915 1268 2153 swp new 1793 <-> 1268 old; decreasing diss. 1862.37 by -129.13 end{bswap()}, end{cstat()} Time user system elapsed Time 1.400 0.008 1.413 > ## bswapPamOnceDistIndice > st2 <- showSys.time(pamxonce2 <- pam(x, 4, pamonce = 2, trace.lev=2))# 4.101 0.024 4.151 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=2): build 4 medoids: new repr. 1268 new repr. 414 new repr. 2153 new repr. 915 after build: medoids are 414 915 1268 2153 swp new 1793 <-> 1268 old; decreasing diss. 1862.37 by -129.13 end{bswap()}, end{cstat()} Time user system elapsed Time 1.064 0.000 1.069 > show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) 2:orig 1:orig Time user.self 0.702 0.923 Time elapsed 0.696 0.920 > > ## only call element is not equal > (icall <- which(names(pamx) == "call")) [1] 9 > pamx[[icall]] pam(x = x, k = 4, trace.lev = 2) > stopifnot(all.equal(pamx [-icall], pamxonce [-icall]), + all.equal(pamxonce[-icall], pamxonce2[-icall])) > > ## Same using specified medoids > (med0 <- 1 + round(n* c(0,1, 2.5, 4)))# lynne (~ 2010, AMD Phenom II X4 925) [1] 1 501 1251 2001 > st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.071 0.024 13.177 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): medoids given after build: medoids are 1 501 1251 2001 swp new 915 <-> 501 old; decreasing diss. 2126.83 by -197.507 swp new 1793 <-> 1251 old; decreasing diss. 1929.32 by -101.336 swp new 414 <-> 1 old; decreasing diss. 1827.98 by -86.3404 swp new 2153 <-> 2001 old; decreasing diss. 1741.64 by -8.40201 end{bswap()}, end{cstat()} Time user system elapsed Time 2.432 0.000 2.439 > st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.503 0.024 8.578 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): medoids given after build: medoids are 1 501 1251 2001 swp new 915 <-> 501 old; decreasing diss. 2126.83 by -197.507 swp new 1793 <-> 1251 old; decreasing diss. 1929.32 by -101.336 swp new 414 <-> 1 old; decreasing diss. 1827.98 by -86.3404 swp new 2153 <-> 2001 old; decreasing diss. 1741.64 by -8.40201 end{bswap()}, end{cstat()} Time user system elapsed Time 2.112 0.000 2.118 > st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.587 0.025 5.647 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=2): medoids given after build: medoids are 1 501 1251 2001 swp new 915 <-> 501 old; decreasing diss. 2126.83 by -197.507 swp new 1793 <-> 1251 old; decreasing diss. 1929.32 by -101.336 swp new 414 <-> 1 old; decreasing diss. 1827.98 by -86.3404 swp new 2153 <-> 2001 old; decreasing diss. 1741.64 by -8.40201 end{bswap()}, end{cstat()} Time user system elapsed Time 1.288 0.000 1.289 > show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) 2:orig 1:orig Time user.self 0.530 0.868 Time elapsed 0.528 0.868 > > ## only call element is not equal > stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), + all.equal(pamxoncest[-icall], pamxonce2st[-icall])) > > ## Different starting values > med0 <- 1:4 # lynne (~ 2010, AMD Phenom II X4 925) > st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.416 0.023 13.529 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): medoids given after build: medoids are 1 2 3 4 swp new 1421 <-> 4 old; decreasing diss. 21009.4 by -15939.9 swp new 2153 <-> 3 old; decreasing diss. 5069.52 by -1657.88 swp new 915 <-> 2 old; decreasing diss. 3411.65 by -1592.06 swp new 414 <-> 1 old; decreasing diss. 1819.59 by -86.3404 swp new 1793 <-> 1421 old; decreasing diss. 1733.25 by -0.00639415 end{bswap()}, end{cstat()} Time user system elapsed Time 3.008 0.000 3.016 > st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.384 0.024 8.459 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): medoids given after build: medoids are 1 2 3 4 swp new 1421 <-> 4 old; decreasing diss. 21009.4 by -15939.9 swp new 2153 <-> 3 old; decreasing diss. 5069.52 by -1657.88 swp new 915 <-> 2 old; decreasing diss. 3411.65 by -1592.06 swp new 414 <-> 1 old; decreasing diss. 1819.59 by -86.3404 swp new 1793 <-> 1421 old; decreasing diss. 1733.25 by -0.00639415 end{bswap()}, end{cstat()} Time user system elapsed Time 2.441 0.000 2.448 > st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.455 0.030 5.520 C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=2): medoids given after build: medoids are 1 2 3 4 swp new 1421 <-> 4 old; decreasing diss. 21009.4 by -15939.9 swp new 2153 <-> 3 old; decreasing diss. 5069.52 by -1657.88 swp new 915 <-> 2 old; decreasing diss. 3411.65 by -1592.06 swp new 414 <-> 1 old; decreasing diss. 1819.59 by -86.3404 swp new 1793 <-> 1421 old; decreasing diss. 1733.25 by -0.00639415 end{bswap()}, end{cstat()} Time user system elapsed Time 1.468 0.000 1.470 > show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) 2:orig 1:orig Time user.self 0.488 0.812 Time elapsed 0.487 0.812 > > ## only call element is not equal > stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), + all.equal(pamxoncest[-icall], pamxonce2st[-icall])) > > > ## Medoid bug --- MM: Fixed, well "0L+ hack", in my pam.q, on 2012-01-31 > ## ---------- > med0 <- (1:6) > st0 <- showSys.time(pamxst <- pam(x, 6, medoids = med0 , trace.lev=2)) C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): medoids given after build: medoids are 1 2 3 4 5 6 swp new 1421 <-> 6 old; decreasing diss. 20991.1 by -15949.5 swp new 2153 <-> 4 old; decreasing diss. 5041.66 by -1676.25 swp new 915 <-> 3 old; decreasing diss. 3365.41 by -1671.37 swp new 325 <-> 2 old; decreasing diss. 1694.04 by -53.8582 swp new 2720 <-> 5 old; decreasing diss. 1640.18 by -26.6572 swp new 2696 <-> 2153 old; decreasing diss. 1613.53 by -19.0531 swp new 52 <-> 1 old; decreasing diss. 1594.47 by -13.965 swp new 2709 <-> 2720 old; decreasing diss. 1580.51 by -5.81848 swp new 199 <-> 325 old; decreasing diss. 1574.69 by -2.65496 swp new 438 <-> 52 old; decreasing diss. 1572.03 by -1.77054 swp new 2082 <-> 2696 old; decreasing diss. 1570.26 by -0.187256 end{bswap()}, end{cstat()} Time user system elapsed Time 8.140 0.000 8.161 > stopifnot(identical(med0, 1:6)) > med0 <- (1:6) > st1 <- showSys.time(pamxst.1 <- pam(x, 6, medoids = med0 , pamonce=1, trace.lev=2)) C pam(): computing 3779876 dissimilarities: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): medoids given after build: medoids are 1 2 3 4 5 6 swp new 1421 <-> 6 old; decreasing diss. 20991.1 by -15949.5 swp new 2153 <-> 4 old; decreasing diss. 5041.66 by -1676.25 swp new 915 <-> 3 old; decreasing diss. 3365.41 by -1671.37 swp new 325 <-> 2 old; decreasing diss. 1694.04 by -53.8582 swp new 2720 <-> 5 old; decreasing diss. 1640.18 by -26.6572 swp new 2696 <-> 2153 old; decreasing diss. 1613.53 by -19.0531 swp new 52 <-> 1 old; decreasing diss. 1594.47 by -13.965 swp new 2709 <-> 2720 old; decreasing diss. 1580.51 by -5.81848 swp new 199 <-> 325 old; decreasing diss. 1574.69 by -2.65496 swp new 438 <-> 52 old; decreasing diss. 1572.03 by -1.77054 swp new 2082 <-> 2696 old; decreasing diss. 1570.26 by -0.187256 end{bswap()}, end{cstat()} Time user system elapsed Time 6.817 0.000 6.833 > stopifnot(identical(med0, 1:6)) > stopifnot(all.equal(pamxst[-icall], pamxst.1 [-icall])) > > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 33.318 0.032 33.446 0 0 > > cluster/tests/pam.R0000644000176000001440000001366711712175353014047 0ustar ripleyuserslibrary(cluster) ## Compare on these: nms <- c("clustering", "objective", "isolation", "clusinfo", "silinfo") nm2 <- c("medoids", "id.med", nms) nm3 <- nm2[- pmatch("obj", nm2)] (x <- x0 <- cbind(V1 = (-3:4)^2, V2 = c(0:6,NA), V3 = c(1,2,NA,7,NA,8:9,8))) (px <- pam(x,2, metric="manhattan")) stopifnot(identical(x,x0))# DUP=FALSE .. pd <- pam(dist(x,"manhattan"), 2) px2 <- pam(x,2, metric="manhattan", keep.diss=FALSE, keep.data=FALSE) pdC <- pam(x,2, metric="manhattan", cluster.only = TRUE) stopifnot(identical(px[nms], pd[nms]), identical(px[nms], px2[nms]), identical(pdC, px2$clustering), ## and for default dist "euclidean": identical(pam(x, 2)[nms], pam(dist(x),2)[nms]) ) set.seed(253) ## generate 250 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(120, 0,8), rnorm(120, 0,8)), cbind(rnorm(130,50,8), rnorm(130,10,8))) .proctime00 <- proc.time() summary(px2 <- pam(x, 2)) pdx <- pam(dist(x), 2) all.equal(px2[nms], pdx[nms], tol = 1e-12) ## TRUE pdxK <- pam(dist(x), 2, keep.diss = TRUE) stopifnot(identical(pdx[nm2], pdxK[nm2])) spdx <- silhouette(pdx) summary(spdx) spdx postscript("pam-tst.ps") if(FALSE) plot(spdx)# the silhouette ## is now identical : plot(pdx)# failed in 1.7.0 -- now only does silhouette par(mfrow = 2:1) ## new 'dist' argument for clusplot(): plot(pdx, dist=dist(x)) ## but this should work automagically (via eval()) as well: plot(pdx) ## or this clusplot(pdx) data(ruspini) summary(pr4 <- pam(ruspini, 4)) (pr3 <- pam(ruspini, 3)) (pr5 <- pam(ruspini, 5)) data(votes.repub) summary(pv3 <- pam(votes.repub, 3)) (pv4 <- pam(votes.repub, 4)) (pv6 <- pam(votes.repub, 6, trace = 3)) cat('Time elapsed: ', proc.time() - .proctime00,'\n') ## re-starting with medoids from pv6 shouldn't change: pv6. <- pam(votes.repub, 6, medoids = pv6$id.med, trace = 3) identical(pv6[nm3], pv6.[nm3]) ## This example seg.faulted at some point: d.st <- data.frame(V1= c(9, 12, 12, 15, 9, 9, 13, 11, 15, 10, 13, 13, 13, 15, 8, 13, 13, 10, 7, 9, 6, 11, 3), V2= c(5, 9, 3, 5, 1, 1, 2, NA, 10, 1, 4, 7, 4, NA, NA, 5, 2, 4, 3, 3, 6, 1, 1), V3 = c(63, 41, 59, 50, 290, 226, 60, 36, 32, 121, 70, 51, 79, 32, 42, 39, 76, 60, 56, 88, 57, 309, 254), V4 = c(146, 43, 78, 88, 314, 149, 78, NA, 238, 153, 159, 222, 203, NA, NA, 74, 100, 111, 9, 180, 50, 256, 107)) dd <- daisy(d.st, stand = TRUE) (r0 <- pam(dd, 5))# cluster 5 = { 23 } -- on single observation ## This gave only 3 different medoids -> and seg.fault: (r5 <- pam(dd, 5, medoids = c(1,3,20,2,5), trace = 2)) # now "fine" dev.off() ##------------------------ Testing pam() with new "pamonce" argument: ## This is from "next version of Matrix" test-tools-1.R: showSys.time <- function(expr) { ## prepend 'Time' for R CMD Rdiff st <- system.time(expr) writeLines(paste("Time", capture.output(print(st)))) invisible(st) } show2Ratios <- function(...) { stopifnot(length(rgs <- list(...)) == 2, nchar(ns <- names(rgs)) > 0) r <- round(cbind(..1, ..2)[c(1,3),], 3) dimnames(r) <- list(paste("Time ", rownames(r)), ns) r } n <- 1000 ## If not enough cases, all CPU times equals 0. n <- 500 # for now, and automatic testing sd <- 0.5 set.seed(13) n2 <- as.integer(round(n * 1.5)) x <- rbind(cbind(rnorm( n,0,sd), rnorm( n,0,sd)), cbind(rnorm(n2,5,sd), rnorm(n2,5,sd)), cbind(rnorm(n2,7,sd), rnorm(n2,7,sd)), cbind(rnorm(n2,9,sd), rnorm(n2,9,sd))) ## original algorithm st0 <- showSys.time(pamx <- pam(x, 4, trace.lev=2))# 8.157 0.024 8.233 ## bswapPamOnce algorithm st1 <- showSys.time(pamxonce <- pam(x, 4, pamonce=TRUE, trace.lev=2))# 6.122 0.024 6.181 ## bswapPamOnceDistIndice st2 <- showSys.time(pamxonce2 <- pam(x, 4, pamonce = 2, trace.lev=2))# 4.101 0.024 4.151 show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) ## only call element is not equal (icall <- which(names(pamx) == "call")) pamx[[icall]] stopifnot(all.equal(pamx [-icall], pamxonce [-icall]), all.equal(pamxonce[-icall], pamxonce2[-icall])) ## Same using specified medoids (med0 <- 1 + round(n* c(0,1, 2.5, 4)))# lynne (~ 2010, AMD Phenom II X4 925) st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.071 0.024 13.177 st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.503 0.024 8.578 st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.587 0.025 5.647 show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) ## only call element is not equal stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), all.equal(pamxoncest[-icall], pamxonce2st[-icall])) ## Different starting values med0 <- 1:4 # lynne (~ 2010, AMD Phenom II X4 925) st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.416 0.023 13.529 st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.384 0.024 8.459 st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.455 0.030 5.520 show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) ## only call element is not equal stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), all.equal(pamxoncest[-icall], pamxonce2st[-icall])) ## Medoid bug --- MM: Fixed, well "0L+ hack", in my pam.q, on 2012-01-31 ## ---------- med0 <- (1:6) st0 <- showSys.time(pamxst <- pam(x, 6, medoids = med0 , trace.lev=2)) stopifnot(identical(med0, 1:6)) med0 <- (1:6) st1 <- showSys.time(pamxst.1 <- pam(x, 6, medoids = med0 , pamonce=1, trace.lev=2)) stopifnot(identical(med0, 1:6)) stopifnot(all.equal(pamxst[-icall], pamxst.1 [-icall])) ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/tests/mona.Rout.save0000644000176000001440000000646312124335144015677 0ustar ripleyusers R version 3.0.0 beta (2013-03-25 r62402) -- "Masked Marvel" Copyright (C) 2013 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > data(animals) > (mani <- mona(animals)) Revised data: war fly ver end gro hai ant 0 0 0 0 1 0 bee 0 1 0 0 1 1 cat 1 0 1 0 0 1 cpl 0 0 0 0 0 1 chi 1 0 1 1 1 1 cow 1 0 1 0 1 1 duc 1 1 1 0 1 0 eag 1 1 1 1 0 0 ele 1 0 1 1 1 0 fly 0 1 0 0 0 0 fro 0 0 1 1 0 0 her 0 0 1 0 1 0 lio 1 0 1 1 1 1 liz 0 0 1 0 0 0 lob 0 0 0 0 0 0 man 1 0 1 1 1 1 rab 1 0 1 0 1 1 sal 0 0 1 0 0 0 spi 0 0 0 0 0 1 wha 1 0 1 1 1 0 Order of objects: [1] ant cpl spi lob bee fly fro her liz sal cat cow rab chi lio man ele wha duc [20] eag Variable used: [1] gro NULL hai fly gro ver end gro NULL war gro NULL end NULL NULL [16] hai NULL fly end Separation step: [1] 4 0 5 3 4 2 3 4 0 1 4 0 3 0 0 4 0 2 3 Available components: [1] "data" "order" "variable" "step" "call" "order.lab" > > str(mani) List of 6 $ data : int [1:20, 1:6] 0 0 1 0 1 1 1 1 1 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:20] "ant" "bee" "cat" "cpl" ... .. ..$ : chr [1:6] "war" "fly" "ver" "end" ... $ order : int [1:20] 1 4 19 15 2 10 11 12 14 18 ... $ variable : chr [1:19] "gro" "NULL" "hai" "fly" ... $ step : int [1:19] 4 0 5 3 4 2 3 4 0 1 ... $ call : language mona(x = animals) $ order.lab: chr [1:20] "ant" "cpl" "spi" "lob" ... - attr(*, "class")= chr "mona" > > if(require(MASS)) { + + if(R.version$major != "1" || as.numeric(R.version$minor) >= 7) + RNGversion("1.6") + set.seed(253) + n <- 512; p <- 3 + Sig <- diag(p); Sig[] <- 0.8 ^ abs(col(Sig) - row(Sig)) + x3 <- mvrnorm(n, rep(0,p), Sig) >= 0 + x <- cbind(x3, rbinom(n, size=1, prob = 1/2)) + + print(sapply(as.data.frame(x), table)) + + mx <- mona(x) + str(mx) + print(lapply(mx[c(1,3,4)], table)) + } Loading required package: MASS V1 V2 V3 V4 0 244 245 261 238 1 268 267 251 274 List of 5 $ data : int [1:512, 1:4] 0 0 0 0 1 1 1 0 1 0 ... $ order : int [1:512] 1 137 154 204 353 398 30 52 69 85 ... $ variable: int [1:511] 0 0 0 0 0 3 0 0 0 0 ... $ step : int [1:511] 0 0 0 0 0 4 0 0 0 0 ... $ call : language mona(x = x) - attr(*, "class")= chr "mona" $data 0 1 988 1060 $variable 0 1 2 3 4 496 4 1 5 5 $step 0 1 2 3 4 496 1 2 4 8 Warning message: In RNGkind("Marsaglia-Multicarry", "Buggy Kinderman-Ramage") : buggy version of Kinderman-Ramage generator used > > proc.time() user system elapsed 0.350 0.054 0.504 cluster/tests/mona.R0000644000176000001440000000075010473136753014215 0ustar ripleyuserslibrary(cluster) data(animals) (mani <- mona(animals)) str(mani) if(require(MASS)) { if(R.version$major != "1" || as.numeric(R.version$minor) >= 7) RNGversion("1.6") set.seed(253) n <- 512; p <- 3 Sig <- diag(p); Sig[] <- 0.8 ^ abs(col(Sig) - row(Sig)) x3 <- mvrnorm(n, rep(0,p), Sig) >= 0 x <- cbind(x3, rbinom(n, size=1, prob = 1/2)) print(sapply(as.data.frame(x), table)) mx <- mona(x) str(mx) print(lapply(mx[c(1,3,4)], table)) } cluster/tests/fanny-ex.R0000644000176000001440000000557311646600064015012 0ustar ripleyusers.libPaths() # show full library tree {also as check of R CMD check!} library(cluster) ####---------- Tests for FANNY i.e., fanny() -------------------------- #### ### -- thanks to ../.Rbuildignore , the output of this is ### -- only compared to saved values for the maintainer ###--- An extension of example(fanny) : ------------------- set.seed(21) ## generate 10+15 objects in two clusters, plus 3 objects lying ## between those clusters. x <- rbind(cbind(rnorm(10, 0, 0.5), rnorm(10, 0, 0.5)), cbind(rnorm(15, 5, 0.5), rnorm(15, 5, 0.5)), cbind(rnorm( 3,3.2,0.5), rnorm( 3,3.2,0.5))) .proctime00 <- proc.time() (fannyx <- fanny(x, 2)) summary(fannyx) str(fannyx) ## Different platforms differ (even gcc 3.0.1 vs 3.2 on same platform)! ## {70 or 71 iterations} ## ==> No "fanny-ex.Rout.save" is distributed ! ## -------------------------------------------- summary(fanny(x,3))# one extra cluster (fanny(x,2, memb.exp = 1.5)) (fanny(x,2, memb.exp = 1.2)) (fanny(x,2, memb.exp = 1.1)) (fanny(x,2, memb.exp = 3)) data(ruspini) # < to run under R 1.9.1 summary(fanny(ruspini, 3), digits = 9) summary(fanny(ruspini, 4), digits = 9)# 'correct' #{clusters} summary(fanny(ruspini, 5), digits = 9) cat('Time elapsed: ', proc.time() - .proctime00,'\n') data(chorSub) p4cl <- pam(chorSub, k = 4, cluster.only = TRUE) ## The first two are "completely fuzzy" -- and now give a warnings f4.20 <- fanny(chorSub, k = 4, trace.lev = 1) ; f4.20$coef f4.18 <- fanny(chorSub, k = 4, memb.exp = 1.8) # same problem f4.18. <- fanny(chorSub, k = 4, memb.exp = 1.8, iniMem.p = f4.20$membership) # very quick convergence stopifnot(all.equal(f4.18[-c(7,9)], f4.18.[-c(7,9)], tol = 5e-7)) f4.16 <- fanny(chorSub, k = 4, memb.exp = 1.6) # now gives 4 crisp clusters f4.16. <- fanny(chorSub, k = 4, memb.exp = 1.6, iniMem.p = f4.18$membership, trace.lev = 2)# "converges" immediately - WRONGLY! f4.16.2 <- fanny(chorSub, k = 4, memb.exp = 1.6, iniMem.p = cluster:::as.membership(p4cl), tol = 1e-10, trace.lev = 2)## looks much better: stopifnot(f4.16$clustering == f4.16.2$clustering, all.equal(f4.16[-c(1,7,9)], f4.16.2[-c(1,7,9)], tol = 1e-7), all.equal(f4.16$membership, f4.16.2$membership, tol = 0.001)) ## the memberships are quite close but have only converged to precision 0.000228 f4.14 <- fanny(chorSub, k = 4, memb.exp = 1.4) f4.12 <- fanny(chorSub, k = 4, memb.exp = 1.2) table(f4.12$clustering, f4.14$clustering)# close but different table(f4.16$clustering, f4.14$clustering)# dito table(f4.12$clustering, f4.16$clustering)# hence differ even more symnum(cbind(f4.16$membership, 1, f4.12$membership), cutpoints= c(0., 0.2, 0.6, 0.8, 0.9, 0.95, 1 -1e-7, 1 +1e-7), symbols = c(" ", ".", ",", "+", "*", "B","1")) ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/tests/ellipsoid-ex.Rout.save0000644000176000001440000001440511552355523017345 0ustar ripleyusers R version 2.13.0 Patched (2011-04-16 r55459) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > eh <- ellipsoidhull(cbind(x=1:4, y = 1:4)) #singular Error in Fortran routine computing the spanning ellipsoid, probably collinear data Warning message: In ellipsoidhull(cbind(x = 1:4, y = 1:4)) : possibly not converged in 5000 iterations > eh 'ellipsoid' in 2 dimensions: center = ( 2.5 2.5 ); squared ave.radius d^2 = 0 and shape matrix = x y x 1.25 1.25 y 1.25 1.25 hence, area = 0 ** Warning: ** the algorithm did not terminate reliably! most probably because of collinear data > > set.seed(157) > for(n in 4:10) { ## n=2 and 3 still differ -- platform dependently! + cat("n = ",n,"\n") + x2 <- rnorm(n) + print(ellipsoidhull(cbind(1:n, x2))) + print(ellipsoidhull(cbind(1:n, x2, 4*x2 + rnorm(n)))) + } n = 4 'ellipsoid' in 2 dimensions: center = ( 2.66215 0.82086 ); squared ave.radius d^2 = 2 and shape matrix = x2 1.55901 0.91804 x2 0.91804 0.67732 hence, area = 2.9008 'ellipsoid' in 3 dimensions: center = ( 2.50000 0.74629 2.95583 ); squared ave.radius d^2 = 3 and shape matrix = x2 1.25000 0.72591 1.8427 x2 0.72591 0.52562 1.5159 1.84268 1.51588 4.7918 hence, volume = 1.3843 n = 5 'ellipsoid' in 2 dimensions: center = ( 3.0726 1.2307 ); squared ave.radius d^2 = 2 and shape matrix = x2 2.21414 0.45527 x2 0.45527 2.39853 hence, area = 14.194 'ellipsoid' in 3 dimensions: center = ( 2.7989 1.1654 4.6782 ); squared ave.radius d^2 = 3 and shape matrix = x2 1.92664 0.40109 1.4317 x2 0.40109 1.76625 6.9793 1.43170 6.97928 28.0530 hence, volume = 11.532 n = 6 'ellipsoid' in 2 dimensions: center = ( 3.04367 0.97016 ); squared ave.radius d^2 = 2 and shape matrix = x2 4.39182 0.30833 x2 0.30833 0.59967 hence, area = 10.011 'ellipsoid' in 3 dimensions: center = ( 3.3190 0.7678 3.2037 ); squared ave.radius d^2 = 3 and shape matrix = x2 2.786928 -0.044373 -1.1467 x2 -0.044373 0.559495 1.5496 -1.146728 1.549620 5.5025 hence, volume = 10.741 n = 7 'ellipsoid' in 2 dimensions: center = ( 3.98294 -0.16567 ); squared ave.radius d^2 = 2 and shape matrix = x2 4.62064 -0.83135 x2 -0.83135 0.37030 hence, area = 6.3453 'ellipsoid' in 3 dimensions: center = ( 4.24890 -0.25918 -0.76499 ); squared ave.radius d^2 = 3 and shape matrix = x2 4.6494 -0.93240 -4.0758 x2 -0.9324 0.39866 1.9725 -4.0758 1.97253 10.4366 hence, volume = 6.9939 n = 8 'ellipsoid' in 2 dimensions: center = ( 3.6699 -0.4532 ); squared ave.radius d^2 = 2 and shape matrix = x2 9.4327 -2.5269 x2 -2.5269 3.7270 hence, area = 33.702 'ellipsoid' in 3 dimensions: center = ( 4.22030 -0.37953 -1.53922 ); squared ave.radius d^2 = 3 and shape matrix = x2 7.5211 -1.4804 -6.6587 x2 -1.4804 2.6972 11.8198 -6.6587 11.8198 52.6243 hence, volume = 36.383 n = 9 'ellipsoid' in 2 dimensions: center = ( 5.324396 -0.037779 ); squared ave.radius d^2 = 2 and shape matrix = x2 10.1098 -1.3708 x2 -1.3708 2.1341 hence, area = 27.885 'ellipsoid' in 3 dimensions: center = ( 5.44700 -0.12504 -1.13538 ); squared ave.radius d^2 = 3 and shape matrix = x2 7.0364 -1.2424 -5.5741 x2 -1.2424 1.7652 7.3654 -5.5741 7.3654 31.5558 hence, volume = 27.782 n = 10 'ellipsoid' in 2 dimensions: center = ( 4.85439 0.28401 ); squared ave.radius d^2 = 2 and shape matrix = x2 13.932 0.64900 x2 0.649 0.95132 hence, area = 22.508 'ellipsoid' in 3 dimensions: center = ( 5.12537 0.25024 0.86441 ); squared ave.radius d^2 = 3 and shape matrix = x2 9.29343 0.56973 1.4143 x2 0.56973 0.76519 1.8941 1.41427 1.89409 6.3803 hence, volume = 31.936 > > set.seed(1) > x <- rt(100, df = 4) > y <- 100 + 5 * x + rnorm(100) > ellipsoidhull(cbind(x,y)) 'ellipsoid' in 2 dimensions: center = ( -1.3874 93.0589 ); squared ave.radius d^2 = 2 and shape matrix = x y x 32.924 160.54 y 160.543 785.88 hence, area = 62.993 > z <- 10 - 8 * x + y + rnorm(100) > (e3 <- ellipsoidhull(cbind(x,y,z))) 'ellipsoid' in 3 dimensions: center = ( -0.71678 96.09950 111.61029 ); squared ave.radius d^2 = 3 and shape matrix = x y z x 26.005 126.41 -80.284 y 126.410 616.94 -387.459 z -80.284 -387.46 254.006 hence, volume = 130.45 > d3o <- cbind(x,y + rt(100,3), 2 * x^2 + rt(100, 2)) > (e. <- ellipsoidhull(d3o, ret.sq = TRUE)) 'ellipsoid' in 3 dimensions: center = ( 0.32491 101.68998 39.48045 ); squared ave.radius d^2 = 3 and shape matrix = x x 19.655 94.364 48.739 94.364 490.860 181.022 48.739 181.022 1551.980 hence, volume = 9463.8 > stopifnot(all.equal(e.$sqdist, + with(e., mahalanobis(d3o, center=loc, cov=cov)), + tol = 1e-13)) > d5 <- cbind(d3o, 2*abs(y)^1.5 + rt(100,3), 3*x - sqrt(abs(y))) > (e5 <- ellipsoidhull(d5, ret.sq = TRUE)) 'ellipsoid' in 5 dimensions: center = ( -0.32451 98.54780 37.33619 1973.88383 -10.81891 ); squared ave.radius d^2 = 5 and shape matrix = x x 17.8372 87.0277 8.3389 2607.9 49.117 87.0277 446.9453 -2.0502 12745.4 239.470 8.3389 -2.0502 1192.8439 2447.8 24.458 2607.9264 12745.3826 2447.8006 384472.1 7179.239 49.1172 239.4703 24.4582 7179.2 135.260 hence, volume = 10218 > tail(sort(e5$sqdist)) ## 4 values 5.00039 ... 5.0099 [1] 4.999915 5.000005 5.000010 5.000088 5.001444 5.009849 > cluster/tests/ellipsoid-ex.R0000644000176000001440000000146111131111634015640 0ustar ripleyuserslibrary(cluster) eh <- ellipsoidhull(cbind(x=1:4, y = 1:4)) #singular eh set.seed(157) for(n in 4:10) { ## n=2 and 3 still differ -- platform dependently! cat("n = ",n,"\n") x2 <- rnorm(n) print(ellipsoidhull(cbind(1:n, x2))) print(ellipsoidhull(cbind(1:n, x2, 4*x2 + rnorm(n)))) } set.seed(1) x <- rt(100, df = 4) y <- 100 + 5 * x + rnorm(100) ellipsoidhull(cbind(x,y)) z <- 10 - 8 * x + y + rnorm(100) (e3 <- ellipsoidhull(cbind(x,y,z))) d3o <- cbind(x,y + rt(100,3), 2 * x^2 + rt(100, 2)) (e. <- ellipsoidhull(d3o, ret.sq = TRUE)) stopifnot(all.equal(e.$sqdist, with(e., mahalanobis(d3o, center=loc, cov=cov)), tol = 1e-13)) d5 <- cbind(d3o, 2*abs(y)^1.5 + rt(100,3), 3*x - sqrt(abs(y))) (e5 <- ellipsoidhull(d5, ret.sq = TRUE)) tail(sort(e5$sqdist)) ## 4 values 5.00039 ... 5.0099 cluster/tests/diana-ex.Rout.save0000644000176000001440000004203611674325037016440 0ustar ripleyusers R version 2.14.1 RC (2011-12-20 r57943) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > options(digits = 6) > data(votes.repub) > di.votes <- daisy(votes.repub) > > .p00 <- proc.time() > summary(diana(votes.repub, metric = "manhattan", stand = TRUE)) Merge: [,1] [,2] [1,] -7 -32 [2,] -13 -35 [3,] -12 -50 [4,] 1 -30 [5,] -26 -28 [6,] -5 -37 [7,] -22 -38 [8,] -21 -39 [9,] -16 -27 [10,] 4 2 [11,] -25 -48 [12,] -42 -46 [13,] -6 -14 [14,] -34 -41 [15,] -8 -20 [16,] 5 -31 [17,] 10 7 [18,] -17 -47 [19,] -3 -44 [20,] -33 12 [21,] 15 18 [22,] 17 -29 [23,] 22 -49 [24,] 21 11 [25,] 23 -15 [26,] -11 -19 [27,] 3 9 [28,] 8 -23 [29,] 19 16 [30,] 27 14 [31,] 6 25 [32,] -1 -10 [33,] 31 13 [34,] 29 -36 [35,] -2 -45 [36,] -9 -43 [37,] 24 20 [38,] 32 -4 [39,] -24 -40 [40,] 38 -18 [41,] 33 30 [42,] 34 37 [43,] 35 26 [44,] 41 28 [45,] 40 36 [46,] 42 44 [47,] 45 39 [48,] 43 46 [49,] 47 48 Order of objects: [1] Alabama Georgia Arkansas Louisiana Florida [6] Texas Mississippi South Carolina Alaska Vermont [11] Hawaii Maine Arizona Utah Montana [16] Nevada New Mexico Oklahoma Delaware Maryland [21] Kentucky Washington Missouri West Virginia North Carolina [26] Tennessee Virginia California Oregon Connecticut [31] New York New Jersey Illinois Ohio Michigan [36] Pennsylvania New Hampshire Wisconsin Iowa Colorado [41] Indiana Idaho Wyoming Kansas Nebraska [46] North Dakota South Dakota Massachusetts Rhode Island Minnesota Height: [1] 27.36345 33.96925 39.65826 48.53428 31.89965 72.59850 35.69152 [8] 167.58020 31.58222 43.84601 24.48796 85.55248 18.39339 25.67631 [15] 11.49397 17.45552 28.62550 42.54480 16.48510 20.04450 17.87516 [22] 21.98373 14.21808 33.61071 18.39733 14.75762 56.55675 11.70132 [29] 27.05887 8.38200 11.36820 13.25237 9.23004 17.83484 12.70819 [36] 20.66714 21.03997 23.66586 28.60541 15.31703 40.33905 10.46294 [43] 24.83525 12.80419 26.36292 16.25192 47.25773 12.79160 24.87206 Divisive coefficient: [1] 0.886918 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 8.382 25.540 34.510 45.060 56.020 167.600 Metric : manhattan Number of objects : 50 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" "data" > summary(diana(di.votes, keep.diss = FALSE)) Merge: [,1] [,2] [1,] -12 -50 [2,] -13 -32 [3,] -14 -35 [4,] -7 -29 [5,] -21 -39 [6,] -3 -28 [7,] -25 -48 [8,] -16 -27 [9,] -15 -41 [10,] 2 -30 [11,] 6 -26 [12,] -33 -42 [13,] 12 -46 [14,] 1 -44 [15,] 10 3 [16,] -22 -38 [17,] -11 -19 [18,] -5 -47 [19,] -17 -20 [20,] -2 -45 [21,] 14 -31 [22,] -37 -49 [23,] 9 8 [24,] 4 15 [25,] -8 19 [26,] -6 21 [27,] 5 -23 [28,] 24 16 [29,] 26 -36 [30,] 11 29 [31,] -1 -10 [32,] 28 22 [33,] 23 -34 [34,] 7 13 [35,] -4 -9 [36,] 20 17 [37,] -24 -40 [38,] 31 -43 [39,] 32 33 [40,] 25 34 [41,] 30 18 [42,] 35 -18 [43,] 38 42 [44,] 39 27 [45,] 41 40 [46,] 36 44 [47,] 43 37 [48,] 46 45 [49,] 47 48 Order of objects: [1] Alabama Georgia Texas Arkansas Florida [6] Louisiana Mississippi South Carolina Alaska Vermont [11] Hawaii Maine Connecticut New Hampshire Illinois [16] New York New Jersey Indiana Ohio Michigan [21] Pennsylvania Oregon Wisconsin Iowa South Dakota [26] Kansas Nebraska North Dakota Massachusetts Rhode Island [31] Minnesota Arizona Nevada Montana Colorado [36] Idaho Wyoming Utah New Mexico Oklahoma [41] California Washington Delaware Kentucky Maryland [46] Missouri West Virginia North Carolina Tennessee Virginia Height: [1] 48.2397 63.1862 72.9221 56.1363 72.9221 116.7048 63.0951 281.9508 [9] 33.8330 58.0384 32.7611 106.7448 20.5216 39.1728 19.8436 27.0243 [17] 31.4966 20.2258 47.1690 31.6595 49.2428 36.7667 64.4821 26.1547 [25] 37.4564 25.9221 50.7201 77.1184 22.6334 44.4594 178.4119 23.4206 [33] 27.8273 48.0483 43.7055 17.1992 31.1988 34.0510 48.0483 70.4868 [41] 33.2328 81.0764 43.3829 33.4744 66.7591 25.3953 54.7306 29.5099 [49] 30.1541 Divisive coefficient: [1] 0.878225 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" > cat('Time elapsed: ', proc.time() - .p00,'\n') Time elapsed: 0.014 0.001 0.015 0 0 > > data(agriculture) > data(ruspini) > > .p0 <- proc.time() > dia.agr <- diana(agriculture) > drusp0 <- diana(ruspini, keep.diss=FALSE, keep.data=FALSE) > drusp1 <- diana(ruspini, metric = "manhattan") > cat('Time elapsed: ', proc.time() - .p0,'\n') Time elapsed: 0.006 0.001 0.006 0 0 > > summary(dia.agr) Merge: [,1] [,2] [1,] -1 -10 [2,] -2 -9 [3,] 1 -3 [4,] -6 -8 [5,] -5 -7 [6,] 3 -12 [7,] -4 -11 [8,] 6 4 [9,] 8 2 [10,] 7 5 [11,] 9 10 Order of objects: [1] B NL D UK F I DK L GR P E IRL Height: [1] 1.64924 2.43516 4.85077 6.72309 2.77308 8.05295 2.22036 24.03539 [9] 5.16236 12.56742 3.14006 Divisive coefficient: [1] 0.871106 66 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 1.649 4.357 7.987 9.594 13.250 24.040 Metric : euclidean Number of objects : 12 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" "data" > summary(drusp0) Merge: [,1] [,2] [1,] -18 -19 [2,] -55 -56 [3,] -27 -28 [4,] -49 -51 [5,] -33 -34 [6,] -23 -24 [7,] -59 -60 [8,] -29 -30 [9,] -67 -69 [10,] -36 -39 [11,] -32 -35 [12,] -50 -54 [13,] -37 -38 [14,] -70 -71 [15,] -64 -68 [16,] -62 -66 [17,] -12 -13 [18,] -16 1 [19,] -9 -10 [20,] -42 -43 [21,] -15 -17 [22,] -47 -48 [23,] 12 -52 [24,] -21 -22 [25,] 9 14 [26,] 2 -57 [27,] -73 -74 [28,] 6 -25 [29,] -26 11 [30,] 4 -53 [31,] 3 8 [32,] -11 17 [33,] -6 -8 [34,] -2 -3 [35,] 10 -40 [36,] -14 21 [37,] -65 25 [38,] -4 33 [39,] 36 18 [40,] 29 5 [41,] 26 7 [42,] -1 34 [43,] 24 28 [44,] 16 -63 [45,] -46 22 [46,] -31 35 [47,] 27 -75 [48,] 37 -72 [49,] 43 31 [50,] 30 23 [51,] 13 20 [52,] 15 48 [53,] 32 -20 [54,] 41 -58 [55,] 42 -5 [56,] 40 46 [57,] -44 -45 [58,] 19 39 [59,] 38 -7 [60,] 51 -41 [61,] -61 44 [62,] 50 54 [63,] 52 47 [64,] 61 63 [65,] 49 56 [66,] 59 53 [67,] 55 58 [68,] 57 62 [69,] 65 60 [70,] 67 66 [71,] 68 45 [72,] 69 71 [73,] 70 64 [74,] 73 72 Order of objects: [1] 1 2 3 5 9 10 14 15 17 16 18 19 4 6 8 7 11 12 13 20 61 62 66 63 64 [26] 68 65 67 69 70 71 72 73 74 75 21 22 23 24 25 27 28 29 30 26 32 35 33 34 31 [51] 36 39 40 37 38 42 43 41 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Height: [1] 10.04988 6.40312 16.12452 29.12044 4.12311 17.02939 8.48528 [8] 4.24264 9.21954 4.12311 1.41421 40.24922 8.94427 6.32456 [15] 19.02630 28.84441 6.32456 4.12311 14.14214 102.07840 21.40093 [22] 4.12311 10.81665 27.07397 3.60555 13.60147 8.54400 2.82843 [29] 5.09902 3.60555 12.80625 22.80351 5.65685 12.36932 154.49595 [36] 4.47214 10.77033 2.23607 5.83095 13.03840 2.00000 6.32456 [43] 2.82843 28.16026 6.08276 3.00000 9.43398 2.23607 16.27882 [50] 11.04536 3.00000 6.70820 36.61967 3.60555 13.34166 4.24264 [57] 20.02498 94.57801 17.02939 35.35534 2.23607 6.32456 13.15295 [64] 3.16228 4.47214 22.20360 2.00000 5.38516 9.84886 2.82843 [71] 15.29706 47.63402 11.00000 4.47214 Divisive coefficient: [1] 0.960566 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" > summary(drusp1) Merge: [,1] [,2] [1,] -55 -56 [2,] -27 -28 [3,] -18 -19 [4,] -49 -51 [5,] -33 -34 [6,] -32 -35 [7,] -23 -24 [8,] -59 -60 [9,] -50 -54 [10,] -29 -30 [11,] -67 -69 [12,] -37 -38 [13,] 11 -71 [14,] -64 -68 [15,] -62 -66 [16,] -12 -13 [17,] -16 3 [18,] -9 -10 [19,] -47 -48 [20,] 9 -52 [21,] -42 -43 [22,] -39 -40 [23,] -21 -22 [24,] 13 -70 [25,] -15 -17 [26,] 1 -57 [27,] -26 6 [28,] 4 -53 [29,] 7 -25 [30,] 2 10 [31,] -11 16 [32,] -6 -8 [33,] -31 -36 [34,] -74 -75 [35,] -2 -3 [36,] -46 19 [37,] -65 24 [38,] -14 17 [39,] -4 32 [40,] 38 25 [41,] -1 35 [42,] 26 8 [43,] 27 5 [44,] 23 30 [45,] 28 20 [46,] 15 -63 [47,] 43 33 [48,] 44 29 [49,] 46 -73 [50,] 31 -20 [51,] 42 -58 [52,] -44 -45 [53,] 12 22 [54,] 37 -72 [55,] 14 54 [56,] 39 -7 [57,] 41 -5 [58,] 53 21 [59,] 18 40 [60,] 48 47 [61,] -61 49 [62,] 45 51 [63,] 58 -41 [64,] 55 34 [65,] 61 64 [66,] 57 59 [67,] 56 50 [68,] 52 62 [69,] 60 63 [70,] 66 67 [71,] 68 36 [72,] 69 71 [73,] 70 65 [74,] 73 72 Order of objects: [1] 1 2 3 5 9 10 14 16 18 19 15 17 4 6 8 7 11 12 13 20 61 62 66 63 73 [26] 64 68 65 67 69 71 70 72 74 75 21 22 27 28 29 30 23 24 25 26 32 35 33 34 31 [51] 36 37 38 39 40 42 43 41 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Height: [1] 12 9 22 36 5 24 11 5 2 12 6 54 12 8 20 40 8 5 16 [20] 142 30 5 15 16 33 5 19 11 4 5 6 18 32 9 187 6 14 2 [39] 8 4 16 3 8 26 7 3 13 3 16 9 51 5 18 6 24 6 32 [58] 123 18 48 3 8 15 4 6 31 2 7 13 4 18 67 11 6 Divisive coefficient: [1] 0.958075 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 52.50 97.00 90.95 128.00 187.00 Metric : manhattan Number of objects : 75 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" "data" > str (drusp1) List of 8 $ order : int [1:75] 1 2 3 5 9 10 14 16 18 19 ... $ height : num [1:74] 12 9 22 36 5 24 11 5 2 12 ... $ dc : num 0.958 $ merge : int [1:74, 1:2] -55 -27 -18 -49 -33 -32 -23 -59 -50 -29 ... $ diss :Classes 'dissimilarity', 'dist' atomic [1:2775] 11 12 29 13 25 43 33 22 27 39 ... .. ..- attr(*, "Size")= int 75 .. ..- attr(*, "Metric")= chr "manhattan" .. ..- attr(*, "Labels")= chr [1:75] "1" "2" "3" "4" ... $ call : language diana(x = ruspini, metric = "manhattan") $ order.lab: chr [1:75] "1" "2" "3" "5" ... $ data : int [1:75, 1:2] 4 5 10 9 13 13 12 15 18 19 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:75] "1" "2" "3" "4" ... .. ..$ : chr [1:2] "x" "y" - attr(*, "class")= chr [1:2] "diana" "twins" > > ## From system.file("scripts/ch11.R", package = "MASS") > data(swiss) > swiss.x <- as.matrix(swiss[,-1]) > .p1 <- proc.time() > dCH <- diana(swiss.x) > cat('Time elapsed: ', proc.time() - .p1,'\n') Time elapsed: 0.001 0 0.001 0 0 > str(as.dendrogram(as.hclust(dCH)))# keep back-compatible --[dendrogram w/ 2 branches and 47 members at h = 127] |--[dendrogram w/ 2 branches and 31 members at h = 99.2] | |--[dendrogram w/ 2 branches and 11 members at h = 60.9] | | |--[dendrogram w/ 2 branches and 8 members at h = 29.1] | | | |--[dendrogram w/ 2 branches and 5 members at h = 22.7] | | | | |--[dendrogram w/ 2 branches and 4 members at h = 17.4] | | | | | |--[dendrogram w/ 2 branches and 3 members at h = 11.7] | | | | | | |--leaf "Courtelary" | | | | | | `--[dendrogram w/ 2 branches and 2 members at h = 7.48] | | | | | | |--leaf "Le Locle" | | | | | | `--leaf "ValdeTravers" | | | | | `--leaf "La Chauxdfnd" | | | | `--leaf "La Vallee" | | | `--[dendrogram w/ 2 branches and 3 members at h = 19] | | | |--[dendrogram w/ 2 branches and 2 members at h = 11.5] | | | | |--leaf "Lausanne" | | | | `--leaf "Neuchatel" | | | `--leaf "Vevey" | | `--[dendrogram w/ 2 branches and 3 members at h = 56.1] | | |--leaf "V. De Geneve" | | `--[dendrogram w/ 2 branches and 2 members at h = 21.4] | | |--leaf "Rive Droite" | | `--leaf "Rive Gauche" | `--[dendrogram w/ 2 branches and 20 members at h = 48.4] | |--leaf "Moutier" | `--[dendrogram w/ 2 branches and 19 members at h = 44.3] | |--[dendrogram w/ 2 branches and 18 members at h = 39.1] | | |--[dendrogram w/ 2 branches and 6 members at h = 21.9] | | | |--[dendrogram w/ 2 branches and 4 members at h = 12.1] | | | | |--[dendrogram w/ 2 branches and 2 members at h = 10.8] | | | | | |--leaf "Neuveville" | | | | | `--leaf "Boudry" | | | | `--[dendrogram w/ 2 branches and 2 members at h = 4.56] | | | | |--leaf "Grandson" | | | | `--leaf "Val de Ruz" | | | `--[dendrogram w/ 2 branches and 2 members at h = 13.5] | | | |--leaf "Nyone" | | | `--leaf "Yverdon" | | `--[dendrogram w/ 2 branches and 12 members at h = 20.4] | | |--[dendrogram w/ 2 branches and 7 members at h = 15.1] | | | |--[dendrogram w/ 2 branches and 5 members at h = 11.6] | | | | |--[dendrogram w/ 2 branches and 4 members at h = 8.05] | | | | | |--[dendrogram w/ 2 branches and 3 members at h = 6.79] | | | | | | |--[dendrogram w/ 2 branches and 2 members at h = 4.79] | | | | | | | |--leaf "Aigle" | | | | | | | `--leaf "Morges" | | | | | | `--leaf "Rolle" | | | | | `--leaf "Avenches" | | | | `--leaf "Orbe" | | | `--[dendrogram w/ 2 branches and 2 members at h = 6.04] | | | |--leaf "Moudon" | | | `--leaf "Payerne" | | `--[dendrogram w/ 2 branches and 5 members at h = 17.3] | | |--[dendrogram w/ 2 branches and 4 members at h = 11.2] | | | |--[dendrogram w/ 2 branches and 2 members at h = 7.57] | | | | |--leaf "Aubonne" | | | | `--leaf "Oron" | | | `--[dendrogram w/ 2 branches and 2 members at h = 6.35] | | | |--leaf "Cossonay" | | | `--leaf "Lavaux" | | `--leaf "Paysd'enhaut" | `--leaf "Echallens" `--[dendrogram w/ 2 branches and 16 members at h = 56.2] |--[dendrogram w/ 2 branches and 5 members at h = 20.4] | |--[dendrogram w/ 2 branches and 3 members at h = 12.7] | | |--leaf "Delemont" | | `--[dendrogram w/ 2 branches and 2 members at h = 9.4] | | |--leaf "Franches-Mnt" | | `--leaf "Porrentruy" | `--[dendrogram w/ 2 branches and 2 members at h = 13] | |--leaf "Gruyere" | `--leaf "Sarine" `--[dendrogram w/ 2 branches and 11 members at h = 30] |--[dendrogram w/ 2 branches and 5 members at h = 12.9] | |--[dendrogram w/ 2 branches and 4 members at h = 11.9] | | |--[dendrogram w/ 2 branches and 3 members at h = 8.45] | | | |--leaf "Broye" | | | `--[dendrogram w/ 2 branches and 2 members at h = 4.14] | | | |--leaf "Glane" | | | `--leaf "Veveyse" | | `--leaf "Sion" | `--leaf "Monthey" `--[dendrogram w/ 2 branches and 6 members at h = 16] |--[dendrogram w/ 2 branches and 4 members at h = 7.42] | |--[dendrogram w/ 2 branches and 3 members at h = 5.94] | | |--[dendrogram w/ 2 branches and 2 members at h = 2.05] | | | |--leaf "Conthey" | | | `--leaf "Sierre" | | `--leaf "Herens" | `--leaf "Entremont" `--[dendrogram w/ 2 branches and 2 members at h = 5.09] |--leaf "Martigwy" `--leaf "St Maurice" > cluster/tests/diana-ex.R0000644000176000001440000000145311674325037014751 0ustar ripleyuserslibrary(cluster) options(digits = 6) data(votes.repub) di.votes <- daisy(votes.repub) .p00 <- proc.time() summary(diana(votes.repub, metric = "manhattan", stand = TRUE)) summary(diana(di.votes, keep.diss = FALSE)) cat('Time elapsed: ', proc.time() - .p00,'\n') data(agriculture) data(ruspini) .p0 <- proc.time() dia.agr <- diana(agriculture) drusp0 <- diana(ruspini, keep.diss=FALSE, keep.data=FALSE) drusp1 <- diana(ruspini, metric = "manhattan") cat('Time elapsed: ', proc.time() - .p0,'\n') summary(dia.agr) summary(drusp0) summary(drusp1) str (drusp1) ## From system.file("scripts/ch11.R", package = "MASS") data(swiss) swiss.x <- as.matrix(swiss[,-1]) .p1 <- proc.time() dCH <- diana(swiss.x) cat('Time elapsed: ', proc.time() - .p1,'\n') str(as.dendrogram(as.hclust(dCH)))# keep back-compatible cluster/tests/diana-boots.R0000644000176000001440000000161011674325037015456 0ustar ripleyuserslibrary(cluster) ## Kind of a bootstrap -- calling many diana()s dianaBoot <- function(data, B = 500, frac.sub = c(0.7, min = 0.2), digits = 4) { stopifnot((n <- nrow(data)) >= 10, B >= 10, frac.sub > 0, (m <- round(frac.sub[["min"]]*n)) >= 2, (mm <- round(frac.sub[1]*n)) > m) for(b in 1:B) { d.r <- data[sample(n, max(m, min(n, rpois(1, lambda = mm)))) ,] dia. <- diana(d.r, keep.diss=FALSE, keep.data=FALSE) print(dia.[1:3], digits = digits) } } .p0 <- proc.time() data(ruspini) set.seed(134) dianaBoot(ruspini) cat('Time elapsed: ', (.p1 <- proc.time()) - .p0,'\n') data(agriculture) set.seed(707) dianaBoot(agriculture) cat('Time elapsed: ', (.p2 <- proc.time()) - .p1,'\n') data(swiss); swiss.x <- as.matrix(swiss[,-1]) set.seed(312) dianaBoot(swiss.x) cat('Time elapsed: ', (.p3 <- proc.time()) - .p2,'\n') cluster/tests/daisy-ex.Rout.save0000644000176000001440000012367111646600064016475 0ustar ripleyusers R version 2.14.0 alpha (2011-10-06 r57181) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## For different cluster versions > > require(cluster) Loading required package: cluster > > if(interactive()) { + (pkgPath <- .find.package("cluster", verbose = TRUE)) + (verC <- readLines(Dfile <- file.path(pkgPath, "DESCRIPTION"), n = 2)[2]) + } > > ## trivial cases should 'work': > daisy(cbind(1)) Dissimilarities : dissimilarity(0) Metric : euclidean Number of objects : 1 > (d10 <- daisy(matrix(0., 1,0))); str(d10) Dissimilarities : dissimilarity(0) Metric : euclidean Number of objects : 1 Classes 'dissimilarity', 'dist' atomic (0) ..- attr(*, "Size")= int 1 ..- attr(*, "Metric")= chr "euclidean" > d01 <- daisy(matrix(0., 0,1)) > if(paste(R.version$major, R.version$minor, sep=".") >= "2.1.0") + print(d01) Dissimilarities : dissimilarity(0) Metric : euclidean Number of objects : 0 > str(d01) Classes 'dissimilarity', 'dist' atomic (0) ..- attr(*, "Size")= int 0 ..- attr(*, "Metric")= chr "euclidean" > d32 <- data.frame(eins=c("A"=1,"B"=1,"C"=1), zwei=c(2,2,2)) > daisy(d32) Dissimilarities : A B B 0 C 0 0 Metric : euclidean Number of objects : 3 > daisy(d32, stand = TRUE) Dissimilarities : A B B 0 C 0 0 Metric : euclidean Number of objects : 3 Warning message: In daisy(d32, stand = TRUE) : 'x' has constant columns 1, 2; these are standardized to 0 > daisy(d32, type = list(ordratio="zwei")) Dissimilarities : A B B 0 C 0 0 Metric : mixed ; Types = I, T Number of objects : 3 > > > str(d5 <- data.frame(a= c(0, 0, 0,1,0,0, 0,0,1, 0,NA), + b= c(NA,0, 1,1,0,1, 0,1,0, 1,0), + c= c(0, 1, 1,0,1,NA,1,0,1, 0,NA), + d= c(1, 1, 0,1,0,0, 0,0,0, 1,0), + e= c(1, NA,0,1,0,0, 0,0,NA,1,1))) 'data.frame': 11 obs. of 5 variables: $ a: num 0 0 0 1 0 0 0 0 1 0 ... $ b: num NA 0 1 1 0 1 0 1 0 1 ... $ c: num 0 1 1 0 1 NA 1 0 1 0 ... $ d: num 1 1 0 1 0 0 0 0 0 1 ... $ e: num 1 NA 0 1 0 0 0 0 NA 1 ... > (d0 <- daisy(d5)) Dissimilarities : 1 2 3 4 5 6 7 8 2 1.290994 3 1.936492 1.581139 4 1.118034 1.936492 2.000000 5 1.936492 1.118034 1.000000 2.236068 6 1.825742 1.825742 0.000000 1.936492 1.118034 7 1.936492 1.118034 1.000000 2.236068 0.000000 1.118034 8 1.581139 1.936492 1.000000 1.732051 1.414214 0.000000 1.414214 9 2.236068 1.581139 1.581139 1.936492 1.118034 1.825742 1.118034 1.936492 10 0.000000 1.581139 1.732051 1.000000 2.000000 1.581139 2.000000 1.414214 11 1.581139 1.581139 1.825742 1.825742 1.290994 1.825742 1.290994 1.825742 9 10 2 3 4 5 6 7 8 9 10 2.236068 11 0.000000 1.825742 Metric : euclidean Number of objects : 11 Warning message: In daisy(d5) : binary variable(s) 1, 2, 3, 4, 5 treated as interval scaled > (d1 <- daisy(d5, type = list(asymm = 1:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.5000000 3 1.0000000 0.6666667 4 0.3333333 0.7500000 0.8000000 5 1.0000000 0.5000000 0.5000000 1.0000000 6 1.0000000 1.0000000 0.0000000 0.7500000 1.0000000 7 1.0000000 0.5000000 0.5000000 1.0000000 0.0000000 1.0000000 8 1.0000000 1.0000000 0.5000000 0.7500000 1.0000000 0.0000000 1.0000000 9 1.0000000 0.6666667 0.6666667 0.7500000 0.5000000 1.0000000 0.5000000 10 0.0000000 0.6666667 0.7500000 0.2500000 1.0000000 0.6666667 1.0000000 11 0.5000000 1.0000000 1.0000000 0.6666667 1.0000000 1.0000000 1.0000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.6666667 1.0000000 11 1.0000000 NA 0.6666667 Metric : mixed ; Types = A, A, A, A, A Number of objects : 11 > (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.3333333 3 0.7500000 0.5000000 4 0.3333333 0.7500000 0.8000000 5 0.7500000 0.2500000 0.3333333 1.0000000 6 0.6666667 0.6666667 0.0000000 0.7500000 0.5000000 7 0.7500000 0.2500000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.6666667 0.7500000 0.3333333 0.7500000 0.6666667 0.0000000 0.6666667 9 1.0000000 0.5000000 0.6666667 0.7500000 0.3333333 1.0000000 0.3333333 10 0.0000000 0.5000000 0.6000000 0.2500000 0.8000000 0.5000000 0.8000000 11 0.5000000 0.5000000 1.0000000 0.6666667 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.5000000 1.0000000 11 1.0000000 0.0000000 0.6666667 Metric : mixed ; Types = S, S, A, A, A Number of objects : 11 > (d2.<- daisy(d5, type = list( asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.3333333 3 0.7500000 0.5000000 4 0.3333333 0.7500000 0.8000000 5 0.7500000 0.2500000 0.3333333 1.0000000 6 0.6666667 0.6666667 0.0000000 0.7500000 0.5000000 7 0.7500000 0.2500000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.6666667 0.7500000 0.3333333 0.7500000 0.6666667 0.0000000 0.6666667 9 1.0000000 0.5000000 0.6666667 0.7500000 0.3333333 1.0000000 0.3333333 10 0.0000000 0.5000000 0.6000000 0.2500000 0.8000000 0.5000000 0.8000000 11 0.5000000 0.5000000 1.0000000 0.6666667 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.5000000 1.0000000 11 1.0000000 0.0000000 0.6666667 Metric : mixed ; Types = I, I, A, A, A Number of objects : 11 Warning message: In daisy(d5, type = list(asymm = 3:5)) : binary variable(s) 1, 2 treated as interval scaled > stopifnot(identical(c(d2), c(d2.))) > (dS <- daisy(d5, stand = TRUE))# gave error in some versions Dissimilarities : 1 2 3 4 5 6 7 8 2 2.614264 3 4.010913 3.291786 4 3.493856 4.725761 4.757684 5 4.010913 2.415752 2.000000 5.160965 6 3.823025 3.801028 0.000000 4.813384 2.236068 7 4.010913 2.415752 2.000000 5.160965 0.000000 2.236068 8 3.310837 3.995202 2.025000 4.305222 2.846160 0.000000 2.846160 9 5.558018 4.247692 4.148136 3.995202 3.493856 4.789855 3.493856 4.725761 10 0.000000 3.182103 3.587469 3.125000 4.107303 3.310837 4.107303 2.961302 11 3.416389 3.416389 3.674376 3.801028 2.614264 3.674376 2.614264 3.674376 9 10 2 3 4 5 6 7 8 9 10 5.307417 11 0.000000 3.801028 Metric : euclidean Number of objects : 11 Warning message: In daisy(d5, stand = TRUE) : binary variable(s) 1, 2, 3, 4, 5 treated as interval scaled > stopifnot(all.equal(as.vector(summary(c(dS), digits=9)), + c(0, 2.6142638, 3.4938562, 3.2933687, 4.0591077, 5.5580177), + tol = 1e-7))# 7.88e-9 > > d5[,4] <- 1 # binary with only one instead of two values > (d0 <- daisy(d5)) Dissimilarities : 1 2 3 4 5 6 7 8 2 1.290994 3 1.581139 1.118034 4 1.118034 1.936492 1.732051 5 1.581139 0.000000 1.000000 2.000000 6 1.290994 1.290994 0.000000 1.581139 1.118034 7 1.581139 0.000000 1.000000 2.000000 0.000000 1.118034 8 1.118034 1.581139 1.000000 1.414214 1.414214 0.000000 1.414214 9 1.825742 1.118034 1.581139 1.581139 1.118034 1.825742 1.118034 1.936492 10 0.000000 1.581139 1.414214 1.000000 1.732051 1.118034 1.732051 1.000000 11 0.000000 0.000000 1.825742 1.290994 1.290994 1.825742 1.290994 1.825742 9 10 2 3 4 5 6 7 8 9 10 1.936492 11 0.000000 1.290994 Metric : euclidean Number of objects : 11 Warning message: In daisy(d5) : binary variable(s) 1, 2, 3, 5 treated as interval scaled > (d1 <- daisy(d5, type = list(asymm = 1:5)))# 2 NAs Dissimilarities : 1 2 3 4 5 6 7 2 1.0000000 3 1.0000000 0.5000000 4 0.5000000 1.0000000 0.7500000 5 1.0000000 0.0000000 0.5000000 1.0000000 6 1.0000000 1.0000000 0.0000000 0.6666667 1.0000000 7 1.0000000 0.0000000 0.5000000 1.0000000 0.0000000 1.0000000 8 1.0000000 1.0000000 0.5000000 0.6666667 1.0000000 0.0000000 1.0000000 9 1.0000000 0.5000000 0.6666667 0.6666667 0.5000000 1.0000000 0.5000000 10 0.0000000 1.0000000 0.6666667 0.3333333 1.0000000 0.5000000 1.0000000 11 0.0000000 NA 1.0000000 0.5000000 1.0000000 1.0000000 1.0000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.5000000 1.0000000 11 1.0000000 NA 0.5000000 Metric : mixed ; Types = A, A, A, A, A Number of objects : 11 Warning message: In daisy(d5, type = list(asymm = 1:5)) : at least one binary variable has not 2 different levels. > (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.5000000 3 0.6666667 0.3333333 4 0.5000000 1.0000000 0.7500000 5 0.6666667 0.0000000 0.3333333 1.0000000 6 0.5000000 0.5000000 0.0000000 0.6666667 0.5000000 7 0.6666667 0.0000000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.5000000 0.6666667 0.3333333 0.6666667 0.6666667 0.0000000 0.6666667 9 1.0000000 0.3333333 0.6666667 0.6666667 0.3333333 1.0000000 0.3333333 10 0.0000000 0.6666667 0.5000000 0.3333333 0.7500000 0.3333333 0.7500000 11 0.0000000 0.0000000 1.0000000 0.5000000 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.3333333 1.0000000 11 1.0000000 0.0000000 0.5000000 Metric : mixed ; Types = S, S, A, A, A Number of objects : 11 Warning message: In daisy(d5, type = list(symm = 1:2, asymm = 3:5)) : at least one binary variable has not 2 different levels. > (d2.<- daisy(d5, type = list( asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.5000000 3 0.6666667 0.3333333 4 0.5000000 1.0000000 0.7500000 5 0.6666667 0.0000000 0.3333333 1.0000000 6 0.5000000 0.5000000 0.0000000 0.6666667 0.5000000 7 0.6666667 0.0000000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.5000000 0.6666667 0.3333333 0.6666667 0.6666667 0.0000000 0.6666667 9 1.0000000 0.3333333 0.6666667 0.6666667 0.3333333 1.0000000 0.3333333 10 0.0000000 0.6666667 0.5000000 0.3333333 0.7500000 0.3333333 0.7500000 11 0.0000000 0.0000000 1.0000000 0.5000000 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.3333333 1.0000000 11 1.0000000 0.0000000 0.5000000 Metric : mixed ; Types = I, I, A, A, A Number of objects : 11 Warning messages: 1: In daisy(d5, type = list(asymm = 3:5)) : at least one binary variable has not 2 different levels. 2: In daisy(d5, type = list(asymm = 3:5)) : binary variable(s) 1, 2 treated as interval scaled > ## better leave away the constant variable: it has no effect: > stopifnot(identical(c(d1), c(daisy(d5[,-4], type = list(asymm = 1:4))))) > > ###---- Trivial "binary only" matrices (not data frames) did fail: > > x <- matrix(0, 2, 2) > dimnames(x)[[2]] <- c("A", "B")## colnames<- is missing in S+ > daisy(x, type = list(symm= "B", asymm="A")) Dissimilarities : 1 2 0 Metric : mixed ; Types = A, S Number of objects : 2 Warning message: In daisy(x, type = list(symm = "B", asymm = "A")) : at least one binary variable has not 2 different levels. > daisy(x, type = list(symm= "B"))# 0 too Dissimilarities : 1 2 0 Metric : mixed ; Types = I, S Number of objects : 2 Warning message: In daisy(x, type = list(symm = "B")) : at least one binary variable has not 2 different levels. > > x2 <- x; x2[2,2] <- 1 > daisy(x2, type= list(symm = "B"))# |-> 0.5 (gives 1 in S+) Dissimilarities : 1 2 0.5 Metric : mixed ; Types = I, S Number of objects : 2 > daisy(x2, type= list(symm = "B", asymm="A"))# 1 Dissimilarities : 1 2 1 Metric : mixed ; Types = A, S Number of objects : 2 Warning message: In daisy(x2, type = list(symm = "B", asymm = "A")) : at least one binary variable has not 2 different levels. > > x3 <- x; x3[] <- diag(2) > daisy(x3) # warning: both as interval scaled -> sqrt(2) Dissimilarities : 1 2 1.414214 Metric : euclidean Number of objects : 2 > daisy(x3, type= list(symm="B", asymm="A"))# 1 Dissimilarities : 1 2 1 Metric : mixed ; Types = A, S Number of objects : 2 > daisy(x3, type= list(symm =c("B","A"))) # 1, S+: sqrt(2) Dissimilarities : 1 2 1 Metric : mixed ; Types = S, S Number of objects : 2 > daisy(x3, type= list(asymm=c("B","A"))) # 1, S+ : sqrt(2) Dissimilarities : 1 2 1 Metric : mixed ; Types = A, A Number of objects : 2 > > x4 <- rbind(x3, 1) > daisy(x4, type= list(symm="B", asymm="A"))# 1 0.5 0.5 Dissimilarities : 1 2 2 1.0 3 0.5 0.5 Metric : mixed ; Types = A, S Number of objects : 3 > daisy(x4, type= list(symm=c("B","A"))) # dito; S+ : 1.41 1 1 Dissimilarities : 1 2 2 1.0 3 0.5 0.5 Metric : mixed ; Types = S, S Number of objects : 3 > daisy(x4, type= list(asymm=c("A","B"))) # dito, dito Dissimilarities : 1 2 2 1.0 3 0.5 0.5 Metric : mixed ; Types = A, A Number of objects : 3 > > > > ## ----------- example(daisy) ----------------------- > > data(flower) > data(agriculture) > > ## Example 1 in ref: > ## Dissimilarities using Euclidean metric and without standardization > (d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE)) Dissimilarities : B DK D GR E F IRL DK 5.408327 D 2.061553 3.405877 GR 22.339651 22.570113 22.661200 E 9.818350 11.182576 10.394710 12.567418 F 3.448188 3.512834 2.657066 20.100995 8.060397 IRL 12.747549 13.306014 13.080138 9.604166 3.140064 10.564563 I 5.803447 5.470832 5.423099 17.383325 5.727128 2.773085 7.920859 L 4.275512 2.220360 2.300000 24.035391 12.121056 4.060788 14.569145 NL 1.649242 5.096077 2.435159 20.752349 8.280097 2.202272 11.150785 P 17.236299 17.864490 17.664088 5.162364 7.430343 15.164432 4.601087 UK 2.828427 8.052950 4.850773 21.485344 8.984431 5.303772 12.103718 I L NL P DK D GR E F IRL I L 6.660330 NL 4.204759 4.669047 P 12.515990 19.168985 15.670673 UK 6.723095 7.102112 3.124100 16.323296 Metric : euclidean Number of objects : 12 > (d.agr2 <- daisy(agriculture, metric = "manhattan")) Dissimilarities : B DK D GR E F IRL I L NL P DK 7.5 D 2.7 4.8 GR 30.4 31.9 31.5 E 13.6 15.1 14.7 16.8 F 4.3 3.8 3.4 28.1 11.3 IRL 17.2 18.7 18.3 13.2 3.6 14.9 I 6.0 7.5 7.1 24.4 7.6 3.7 11.2 L 5.0 2.5 2.3 33.8 17.0 5.7 20.6 9.4 NL 2.0 6.3 3.1 28.4 11.6 3.1 15.2 4.4 5.4 P 23.7 25.2 24.8 6.7 10.1 21.4 6.5 17.7 27.1 21.7 UK 3.2 10.7 5.9 28.0 11.2 7.5 14.8 8.8 8.2 4.4 21.3 Metric : manhattan Number of objects : 12 > > > ## Example 2 in ref > (dfl0 <- daisy(flower)) Dissimilarities : 1 2 3 4 5 6 7 2 0.8875408 3 0.5272467 0.5147059 4 0.3517974 0.5504493 0.5651552 5 0.4115605 0.6226307 0.3726307 0.6383578 6 0.2269199 0.6606209 0.3003268 0.4189951 0.3443627 7 0.2876225 0.5999183 0.4896242 0.3435866 0.4197712 0.1892974 8 0.4234069 0.4641340 0.6038399 0.2960376 0.4673203 0.5714869 0.4107843 9 0.5808824 0.4316585 0.4463644 0.8076797 0.3306781 0.5136846 0.5890931 10 0.6094363 0.4531046 0.4678105 0.5570670 0.3812908 0.4119281 0.5865196 11 0.3278595 0.7096814 0.5993873 0.6518791 0.3864788 0.4828840 0.5652369 12 0.4267565 0.5857843 0.6004902 0.5132761 0.5000817 0.5248366 0.6391340 13 0.5196487 0.5248366 0.5395425 0.7464461 0.2919118 0.4524510 0.5278595 14 0.2926062 0.5949346 0.6096405 0.3680147 0.5203431 0.3656863 0.5049837 15 0.6221814 0.3903595 0.5300654 0.5531454 0.4602124 0.5091503 0.3345588 16 0.6935866 0.3575163 0.6222222 0.3417892 0.7301471 0.5107843 0.4353758 17 0.7765114 0.1904412 0.5801471 0.4247141 0.6880719 0.5937092 0.5183007 18 0.4610294 0.4515114 0.7162173 0.4378268 0.4755310 0.6438317 0.4692402 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6366422 10 0.6639706 0.4256127 11 0.4955474 0.4308007 0.3948121 12 0.4216503 0.4194036 0.3812092 0.2636029 13 0.5754085 0.2181781 0.3643791 0.3445670 0.2331699 14 0.4558007 0.4396650 0.3609477 0.2838644 0.1591503 0.3784314 15 0.4512255 0.2545343 0.4210784 0.4806781 0.4295752 0.3183007 0.4351307 16 0.6378268 0.6494690 0.3488562 0.7436683 0.6050654 0.5882353 0.4598039 17 0.4707516 0.6073938 0.3067810 0.7015931 0.5629902 0.5461601 0.5427288 18 0.1417892 0.5198529 0.8057598 0.5359477 0.5495507 0.5733252 0.5698121 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.3949346 17 0.3528595 0.1670752 18 0.5096814 0.7796160 0.6125408 Metric : mixed ; Types = N, N, N, N, O, O, I, I Number of objects : 18 > stopifnot(identical(c(dfl0), + c(daisy(flower, type = list(symm = 1)))) && + identical(c(dfl0), + c(daisy(flower, type = list(symm = 2)))) && + identical(c(dfl0), + c(daisy(flower, type = list(symm = 3)))) && + identical(c(dfl0), + c(daisy(flower, type = list(symm = c(1,3))))) + ) > > (dfl1 <- daisy(flower, type = list(asymm = 3))) Dissimilarities : 1 2 3 4 5 6 7 2 0.8875408 3 0.5272467 0.5882353 4 0.3517974 0.5504493 0.5651552 5 0.4115605 0.7115780 0.4258637 0.6383578 6 0.2269199 0.7549953 0.3432306 0.4189951 0.3935574 7 0.2876225 0.6856209 0.5595705 0.3435866 0.4797386 0.2163399 8 0.4234069 0.4641340 0.6038399 0.2960376 0.4673203 0.5714869 0.4107843 9 0.5808824 0.4933240 0.5101307 0.8076797 0.3779178 0.5870682 0.6732493 10 0.6094363 0.5178338 0.5346405 0.5570670 0.4357610 0.4707750 0.6703081 11 0.3278595 0.7096814 0.5993873 0.6518791 0.3864788 0.4828840 0.5652369 12 0.4267565 0.5857843 0.6004902 0.5132761 0.5000817 0.5248366 0.6391340 13 0.5196487 0.5998133 0.6166200 0.7464461 0.3336134 0.5170868 0.6032680 14 0.2926062 0.5949346 0.6096405 0.3680147 0.5203431 0.3656863 0.5049837 15 0.6221814 0.4461251 0.6057890 0.5531454 0.5259570 0.5818861 0.3823529 16 0.6935866 0.4085901 0.7111111 0.3417892 0.8344538 0.5837535 0.4975724 17 0.7765114 0.2176471 0.6630252 0.4247141 0.7863679 0.6785247 0.5923436 18 0.4610294 0.4515114 0.7162173 0.4378268 0.4755310 0.6438317 0.4692402 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6366422 10 0.6639706 0.4864146 11 0.4955474 0.4308007 0.3948121 12 0.4216503 0.4194036 0.3812092 0.2636029 13 0.5754085 0.2493464 0.4164332 0.3445670 0.2331699 14 0.4558007 0.4396650 0.3609477 0.2838644 0.1591503 0.3784314 15 0.4512255 0.2908964 0.4812325 0.4806781 0.4295752 0.3637722 0.4351307 16 0.6378268 0.7422502 0.3986928 0.7436683 0.6050654 0.6722689 0.4598039 17 0.4707516 0.6941643 0.3506069 0.7015931 0.5629902 0.6241830 0.5427288 18 0.1417892 0.5198529 0.8057598 0.5359477 0.5495507 0.5733252 0.5698121 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.4513539 17 0.4032680 0.1909430 18 0.5096814 0.7796160 0.6125408 Metric : mixed ; Types = N, N, A, N, O, O, I, I Number of objects : 18 > (dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))) Dissimilarities : 1 2 3 4 5 6 7 2 0.9007353 3 0.6176471 0.5882353 4 0.4226891 0.5455882 0.6403361 5 0.4806723 0.7369748 0.5264706 0.7605042 6 0.2823529 0.7470588 0.3911765 0.4764706 0.4980392 7 0.3310924 0.6983193 0.6676471 0.4109244 0.5745098 0.2764706 8 0.5100840 0.4544118 0.6789916 0.3327731 0.5705882 0.6563025 0.4932773 9 0.5808824 0.5084034 0.5252101 0.8257353 0.3882353 0.6100840 0.6756303 10 0.6323529 0.5067227 0.5235294 0.5522059 0.4722689 0.4739496 0.6941176 11 0.3389706 0.7117647 0.6014706 0.6588235 0.4066176 0.4919118 0.5742647 12 0.4441176 0.5816176 0.5963235 0.5139706 0.5264706 0.5220588 0.6544118 13 0.5286765 0.6252101 0.6420168 0.7735294 0.3336134 0.5504202 0.6159664 14 0.3044118 0.5963235 0.6110294 0.3742647 0.5411765 0.3573529 0.5147059 15 0.6242647 0.4588235 0.6184874 0.5691176 0.5386555 0.6025210 0.3823529 16 0.6845588 0.3831933 0.6857143 0.3147059 0.8344538 0.5504202 0.4848739 17 0.7897059 0.2176471 0.6630252 0.4198529 0.8117647 0.6705882 0.6050420 18 0.5268908 0.4647059 0.8336134 0.5210084 0.5537815 0.7588235 0.5386555 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6595588 10 0.6639706 0.5126050 11 0.5073529 0.4419118 0.4066176 12 0.4272059 0.4367647 0.3867647 0.2698529 13 0.6073529 0.2596639 0.4529412 0.3647059 0.2595588 14 0.4669118 0.4514706 0.3720588 0.2845588 0.1647059 0.3992647 15 0.4720588 0.2932773 0.5050420 0.4897059 0.4448529 0.3764706 0.4448529 16 0.6058824 0.7319328 0.3621849 0.7235294 0.5786765 0.6722689 0.4389706 17 0.4610294 0.7092437 0.3394958 0.7036765 0.5588235 0.6495798 0.5441176 18 0.1882353 0.5198529 0.8286765 0.5470588 0.5669118 0.5823529 0.5816176 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.4386555 17 0.4159664 0.1655462 18 0.5117647 0.7705882 0.6257353 Metric : mixed ; Types = A, N, A, N, O, O, T, I Number of objects : 18 > (dfl3 <- daisy(flower, type = list(asymm = 1:3))) Dissimilarities : 1 2 3 4 5 6 7 2 0.8875408 3 0.6025677 0.5882353 4 0.4020542 0.6290850 0.6458917 5 0.4703548 0.7115780 0.4968410 0.7295518 6 0.2593371 0.7549953 0.4004357 0.4788515 0.4591503 7 0.3287115 0.7998911 0.6528322 0.4581155 0.5596950 0.2523965 8 0.4838936 0.5304388 0.6901027 0.3947168 0.5340803 0.6531279 0.5477124 9 0.5808824 0.4933240 0.5101307 0.8076797 0.3779178 0.5870682 0.6732493 10 0.6094363 0.5178338 0.5346405 0.5570670 0.4357610 0.4707750 0.6703081 11 0.3278595 0.7096814 0.5993873 0.6518791 0.3864788 0.4828840 0.5652369 12 0.4267565 0.5857843 0.6004902 0.5132761 0.5000817 0.5248366 0.6391340 13 0.5196487 0.5998133 0.6166200 0.7464461 0.3336134 0.5170868 0.6032680 14 0.2926062 0.5949346 0.6096405 0.3680147 0.5203431 0.3656863 0.5049837 15 0.6221814 0.5204793 0.6057890 0.6321662 0.5259570 0.5818861 0.4460784 16 0.6935866 0.4766885 0.7111111 0.3906162 0.8344538 0.5837535 0.5805011 17 0.7765114 0.2539216 0.6630252 0.4853875 0.7863679 0.6785247 0.6910675 18 0.5268908 0.5160131 0.8185341 0.5837691 0.5434641 0.7358077 0.6256536 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6366422 10 0.6639706 0.4864146 11 0.4955474 0.4308007 0.3948121 12 0.4216503 0.4194036 0.3812092 0.2636029 13 0.5754085 0.2493464 0.4164332 0.3445670 0.2331699 14 0.4558007 0.4396650 0.3609477 0.2838644 0.1591503 0.3784314 15 0.5156863 0.2908964 0.4812325 0.4806781 0.4295752 0.3637722 0.4351307 16 0.7289449 0.7422502 0.3986928 0.7436683 0.6050654 0.6722689 0.4598039 17 0.5380019 0.6941643 0.3506069 0.7015931 0.5629902 0.6241830 0.5427288 18 0.1890523 0.5198529 0.8057598 0.5359477 0.5495507 0.5733252 0.5698121 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.5265795 17 0.4704793 0.2227669 18 0.5824930 0.8909897 0.7000467 Metric : mixed ; Types = A, A, A, N, O, O, I, I Number of objects : 18 > > ## --- animals > data(animals) > d0 <- daisy(animals) Warning message: In daisy(animals) : binary variable(s) 1, 2, 3, 4, 5, 6 treated as interval scaled > > d1 <- daisy(animals - 1, type=list(asymm=c(2,4))) Warning message: In daisy(animals - 1, type = list(asymm = c(2, 4))) : binary variable(s) 1, 3, 5, 6 treated as interval scaled > (d2 <- daisy(animals - 1, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) Dissimilarities : ant bee cat cpl chi cow duc bee 0.4000000 cat 1.0000000 0.8000000 cpl 0.5000000 0.4000000 0.5000000 chi 0.8000000 0.6666667 0.4000000 0.8000000 cow 0.7500000 0.6000000 0.2500000 0.7500000 0.2000000 duc 0.6000000 0.6000000 0.6000000 1.0000000 0.5000000 0.4000000 eag 0.8333333 0.8333333 0.5000000 0.8333333 0.5000000 0.6666667 0.3333333 ele 0.6000000 0.8333333 0.6000000 1.0000000 0.2000000 0.4000000 0.3333333 fly 0.4000000 0.4000000 0.8000000 0.4000000 1.0000000 1.0000000 0.6000000 fro 0.5000000 0.8000000 0.7500000 0.7500000 0.5000000 0.7500000 0.6000000 her 0.2500000 0.6000000 0.7500000 0.7500000 0.6000000 0.5000000 0.4000000 lio 0.7500000 0.6000000 0.2500000 0.7500000 0.0000000 0.0000000 0.4000000 liz 0.5000000 0.8000000 0.5000000 0.5000000 0.8000000 0.7500000 0.6000000 lob 0.0000000 0.5000000 1.0000000 0.3333333 1.0000000 1.0000000 0.7500000 man 0.8000000 0.6666667 0.4000000 0.8000000 0.0000000 0.2000000 0.5000000 rab 0.7500000 0.6000000 0.2500000 0.7500000 0.2000000 0.0000000 0.4000000 sal 0.3333333 0.7500000 0.6666667 0.6666667 0.7500000 0.6666667 0.5000000 spi 0.5000000 0.4000000 0.5000000 0.0000000 0.7500000 0.7500000 1.0000000 wha 0.6000000 0.8333333 0.6000000 1.0000000 0.2000000 0.4000000 0.3333333 eag ele fly fro her lio liz bee cat cpl chi cow duc eag ele 0.3333333 fly 0.5000000 0.8333333 fro 0.4000000 0.2500000 0.6000000 her 0.6666667 0.4000000 0.6000000 0.2500000 lio 0.6000000 0.2500000 1.0000000 0.6666667 0.5000000 liz 0.5000000 0.6000000 0.4000000 0.2500000 0.2500000 0.7500000 lob 0.8000000 0.7500000 0.2500000 0.5000000 0.3333333 1.0000000 0.3333333 man 0.5000000 0.2000000 1.0000000 0.5000000 0.6000000 0.0000000 0.8000000 rab 0.6666667 0.4000000 1.0000000 0.7500000 0.5000000 0.0000000 0.7500000 sal 0.6000000 0.5000000 0.5000000 0.2500000 0.0000000 0.6666667 0.0000000 spi 0.8000000 1.0000000 0.4000000 0.6666667 0.7500000 0.7500000 0.5000000 wha 0.3333333 0.0000000 0.8333333 0.2500000 0.4000000 0.2500000 0.6000000 lob man rab sal spi bee cat cpl chi cow duc eag ele fly fro her lio liz lob man 1.0000000 rab 1.0000000 0.2000000 sal 0.3333333 0.7500000 0.6666667 spi 0.3333333 0.7500000 0.7500000 0.6666667 wha 0.7500000 0.2000000 0.4000000 0.5000000 1.0000000 Metric : mixed ; Types = S, A, S, A, S, S Number of objects : 20 > stopifnot(c(d1) == c(d2)) > > d3 <- daisy(2 - animals, type=list(asymm=c(2,4))) Warning message: In daisy(2 - animals, type = list(asymm = c(2, 4))) : binary variable(s) 1, 3, 5, 6 treated as interval scaled > (d4 <- daisy(2 - animals, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) Dissimilarities : ant bee cat cpl chi cow duc bee 0.3333333 cat 0.6666667 0.6666667 cpl 0.3333333 0.3333333 0.3333333 chi 0.6666667 0.6666667 0.3333333 0.6666667 cow 0.5000000 0.5000000 0.1666667 0.5000000 0.1666667 duc 0.5000000 0.6000000 0.5000000 0.8333333 0.5000000 0.3333333 eag 0.8333333 1.0000000 0.5000000 0.8333333 0.6000000 0.6666667 0.4000000 ele 0.5000000 0.8333333 0.5000000 0.8333333 0.2000000 0.3333333 0.3333333 fly 0.3333333 0.4000000 0.6666667 0.3333333 1.0000000 0.8333333 0.6000000 fro 0.4000000 0.8000000 0.6000000 0.6000000 0.5000000 0.6000000 0.6000000 her 0.1666667 0.5000000 0.5000000 0.5000000 0.5000000 0.3333333 0.3333333 lio 0.6000000 0.6000000 0.2000000 0.6000000 0.0000000 0.0000000 0.4000000 liz 0.3333333 0.6666667 0.3333333 0.3333333 0.6666667 0.5000000 0.5000000 lob 0.0000000 0.4000000 0.6000000 0.2000000 0.8000000 0.6000000 0.6000000 man 0.6666667 0.6666667 0.3333333 0.6666667 0.0000000 0.1666667 0.5000000 rab 0.5000000 0.5000000 0.1666667 0.5000000 0.1666667 0.0000000 0.3333333 sal 0.2000000 0.6000000 0.4000000 0.4000000 0.6000000 0.4000000 0.4000000 spi 0.4000000 0.4000000 0.4000000 0.0000000 0.6000000 0.6000000 1.0000000 wha 0.5000000 0.8333333 0.5000000 0.8333333 0.2000000 0.3333333 0.3333333 eag ele fly fro her lio liz bee cat cpl chi cow duc eag ele 0.4000000 fly 0.6000000 0.8333333 fro 0.5000000 0.2500000 0.6000000 her 0.6666667 0.3333333 0.5000000 0.2000000 lio 0.6000000 0.2000000 1.0000000 0.5000000 0.4000000 liz 0.5000000 0.5000000 0.3333333 0.2000000 0.1666667 0.6000000 lob 0.8000000 0.6000000 0.2000000 0.4000000 0.2000000 0.7500000 0.2000000 man 0.6000000 0.2000000 1.0000000 0.5000000 0.5000000 0.0000000 0.6666667 rab 0.6666667 0.3333333 0.8333333 0.6000000 0.3333333 0.0000000 0.5000000 sal 0.6000000 0.4000000 0.4000000 0.2000000 0.0000000 0.5000000 0.0000000 spi 0.8000000 0.8000000 0.4000000 0.5000000 0.6000000 0.6000000 0.4000000 wha 0.4000000 0.0000000 0.8333333 0.2500000 0.3333333 0.2000000 0.5000000 lob man rab sal spi bee cat cpl chi cow duc eag ele fly fro her lio liz lob man 0.8000000 rab 0.6000000 0.1666667 sal 0.2000000 0.6000000 0.4000000 spi 0.2500000 0.6000000 0.6000000 0.5000000 wha 0.6000000 0.2000000 0.3333333 0.4000000 0.8000000 Metric : mixed ; Types = S, A, S, A, S, S Number of objects : 20 > stopifnot(c(d3) == c(d4)) > > pairs(cbind(d0,d2,d4), + main = "Animals -- symmetric and asymm. dissimilarities") > cluster/tests/daisy-ex.R0000644000176000001440000000732311646600064015003 0ustar ripleyusers## For different cluster versions require(cluster) if(interactive()) { (pkgPath <- .find.package("cluster", verbose = TRUE)) (verC <- readLines(Dfile <- file.path(pkgPath, "DESCRIPTION"), n = 2)[2]) } ## trivial cases should 'work': daisy(cbind(1)) (d10 <- daisy(matrix(0., 1,0))); str(d10) d01 <- daisy(matrix(0., 0,1)) if(paste(R.version$major, R.version$minor, sep=".") >= "2.1.0") print(d01) str(d01) d32 <- data.frame(eins=c("A"=1,"B"=1,"C"=1), zwei=c(2,2,2)) daisy(d32) daisy(d32, stand = TRUE) daisy(d32, type = list(ordratio="zwei")) str(d5 <- data.frame(a= c(0, 0, 0,1,0,0, 0,0,1, 0,NA), b= c(NA,0, 1,1,0,1, 0,1,0, 1,0), c= c(0, 1, 1,0,1,NA,1,0,1, 0,NA), d= c(1, 1, 0,1,0,0, 0,0,0, 1,0), e= c(1, NA,0,1,0,0, 0,0,NA,1,1))) (d0 <- daisy(d5)) (d1 <- daisy(d5, type = list(asymm = 1:5))) (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) (d2.<- daisy(d5, type = list( asymm= 3:5))) stopifnot(identical(c(d2), c(d2.))) (dS <- daisy(d5, stand = TRUE))# gave error in some versions stopifnot(all.equal(as.vector(summary(c(dS), digits=9)), c(0, 2.6142638, 3.4938562, 3.2933687, 4.0591077, 5.5580177), tol = 1e-7))# 7.88e-9 d5[,4] <- 1 # binary with only one instead of two values (d0 <- daisy(d5)) (d1 <- daisy(d5, type = list(asymm = 1:5)))# 2 NAs (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) (d2.<- daisy(d5, type = list( asymm= 3:5))) ## better leave away the constant variable: it has no effect: stopifnot(identical(c(d1), c(daisy(d5[,-4], type = list(asymm = 1:4))))) ###---- Trivial "binary only" matrices (not data frames) did fail: x <- matrix(0, 2, 2) dimnames(x)[[2]] <- c("A", "B")## colnames<- is missing in S+ daisy(x, type = list(symm= "B", asymm="A")) daisy(x, type = list(symm= "B"))# 0 too x2 <- x; x2[2,2] <- 1 daisy(x2, type= list(symm = "B"))# |-> 0.5 (gives 1 in S+) daisy(x2, type= list(symm = "B", asymm="A"))# 1 x3 <- x; x3[] <- diag(2) daisy(x3) # warning: both as interval scaled -> sqrt(2) daisy(x3, type= list(symm="B", asymm="A"))# 1 daisy(x3, type= list(symm =c("B","A"))) # 1, S+: sqrt(2) daisy(x3, type= list(asymm=c("B","A"))) # 1, S+ : sqrt(2) x4 <- rbind(x3, 1) daisy(x4, type= list(symm="B", asymm="A"))# 1 0.5 0.5 daisy(x4, type= list(symm=c("B","A"))) # dito; S+ : 1.41 1 1 daisy(x4, type= list(asymm=c("A","B"))) # dito, dito ## ----------- example(daisy) ----------------------- data(flower) data(agriculture) ## Example 1 in ref: ## Dissimilarities using Euclidean metric and without standardization (d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE)) (d.agr2 <- daisy(agriculture, metric = "manhattan")) ## Example 2 in ref (dfl0 <- daisy(flower)) stopifnot(identical(c(dfl0), c(daisy(flower, type = list(symm = 1)))) && identical(c(dfl0), c(daisy(flower, type = list(symm = 2)))) && identical(c(dfl0), c(daisy(flower, type = list(symm = 3)))) && identical(c(dfl0), c(daisy(flower, type = list(symm = c(1,3))))) ) (dfl1 <- daisy(flower, type = list(asymm = 3))) (dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))) (dfl3 <- daisy(flower, type = list(asymm = 1:3))) ## --- animals data(animals) d0 <- daisy(animals) d1 <- daisy(animals - 1, type=list(asymm=c(2,4))) (d2 <- daisy(animals - 1, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) stopifnot(c(d1) == c(d2)) d3 <- daisy(2 - animals, type=list(asymm=c(2,4))) (d4 <- daisy(2 - animals, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) stopifnot(c(d3) == c(d4)) pairs(cbind(d0,d2,d4), main = "Animals -- symmetric and asymm. dissimilarities") cluster/tests/clusplot-out.Rout.save0000644000176000001440000000616111460054255017415 0ustar ripleyusers R version 2.12.0 Patched (2010-10-19 r53372) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > ### clusplot() & pam() RESULT checking ... > > ## plotting votes.diss(dissimilarity) in a bivariate plot and > ## partitioning into 2 clusters > data(votes.repub) > votes.diss <- daisy(votes.repub) > for(k in 2:4) { + votes.clus <- pam(votes.diss, k, diss = TRUE)$clustering + print(clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE)) + } $Distances [,1] [,2] [1,] 0.0000 154.8601 [2,] 154.8601 0.0000 $Shading [1] 22.12103 20.87897 $Distances [,1] [,2] [,3] [1,] 0.0000 NA 140.8008 [2,] NA 0 NA [3,] 140.8008 NA 0.0000 $Shading [1] 25.602967 5.292663 15.104370 $Distances [,1] [,2] [,3] [,4] [1,] 0.0000 NA 117.2287 280.7259 [2,] NA 0 NA NA [3,] 117.2287 NA 0.0000 NA [4,] 280.7259 NA NA 0.0000 $Shading [1] 15.431339 3.980743 10.145454 19.442464 > > ## plotting iris (dataframe) in a 2-dimensional plot and partitioning > ## into 3 clusters. > data(iris) > iris.x <- iris[, 1:4] > > for(k in 2:5) + print(clusplot(iris.x, pam(iris.x, k)$clustering, diss = FALSE)) $Distances [,1] [,2] [1,] 0.0000000 0.5452161 [2,] 0.5452161 0.0000000 $Shading [1] 18.93861 24.06139 $Distances [,1] [,2] [,3] [1,] 0.000000 1.433071 2.851715 [2,] 1.433071 0.000000 NA [3,] 2.851715 NA 0.000000 $Shading [1] 18.987588 17.166940 9.845472 $Distances [,1] [,2] [,3] [,4] [1,] 0.000000 2.24157 1.6340329 3.0945887 [2,] 2.241570 0.00000 NA NA [3,] 1.634033 NA 0.0000000 0.9461858 [4,] 3.094589 NA 0.9461858 0.0000000 $Shading [1] 12.881766 14.912379 13.652381 7.553474 $Distances [,1] [,2] [,3] [,4] [,5] [1,] 0.000000 1.9899160 1.552387 3.11516148 3.96536391 [2,] 1.989916 0.0000000 NA NA 0.94713417 [3,] 1.552387 NA 0.000000 1.17348334 2.22769309 [4,] 3.115161 NA 1.173483 0.00000000 0.04539385 [5,] 3.965364 0.9471342 2.227693 0.04539385 0.00000000 $Shading [1] 10.369738 11.560147 10.088431 14.857590 5.124093 > > > .Random.seed <- c(0L,rep(7654L,3)) > ## generate 25 objects, divided into 2 clusters. > x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), + cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) > print.default(clusplot(px2 <- pam(x, 2))) $Distances [,1] [,2] [1,] 0.000000 5.516876 [2,] 5.516876 0.000000 $Shading [1] 20.18314 22.81686 > > clusplot(px2, labels = 2, col.p = 1 + px2$clustering) > cluster/tests/clusplot-out.R0000644000176000001440000000152410761764276015744 0ustar ripleyuserslibrary(cluster) ### clusplot() & pam() RESULT checking ... ## plotting votes.diss(dissimilarity) in a bivariate plot and ## partitioning into 2 clusters data(votes.repub) votes.diss <- daisy(votes.repub) for(k in 2:4) { votes.clus <- pam(votes.diss, k, diss = TRUE)$clustering print(clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE)) } ## plotting iris (dataframe) in a 2-dimensional plot and partitioning ## into 3 clusters. data(iris) iris.x <- iris[, 1:4] for(k in 2:5) print(clusplot(iris.x, pam(iris.x, k)$clustering, diss = FALSE)) .Random.seed <- c(0L,rep(7654L,3)) ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) print.default(clusplot(px2 <- pam(x, 2))) clusplot(px2, labels = 2, col.p = 1 + px2$clustering) cluster/tests/clara.Rout.save0000644000176000001440000011742111552355523016033 0ustar ripleyusers R version 2.13.0 Patched (2011-04-16 r55459) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > ## generate 1500 objects, divided into 2 clusters. > set.seed(144) > x <- rbind(cbind(rnorm(700, 0,8), rnorm(700, 0,8)), + cbind(rnorm(800,50,8), rnorm(800,10,8))) > > isEq <- function(x,y, epsF = 100) + is.logical(r <- all.equal(x,y, tol = epsF * .Machine$double.eps)) && r > > .proctime00 <- proc.time() > > ## full size sample {should be = pam()}: > n0 <- length(iSml <- c(1:70, 701:720)) > summary(clara0 <- clara(x[iSml,], k = 2, sampsize = n0)) Object of class 'clara' from call: clara(x = x[iSml, ], k = 2, sampsize = n0) Medoids: [,1] [,2] [1,] -1.499522 -1.944452 [2,] 48.629631 12.998515 Objective function: 10.23588 Numerical information per cluster: size max_diss av_diss isolation [1,] 70 24.81995 10.25745 0.4744879 [2,] 20 19.07782 10.16040 0.3647145 Average silhouette width per cluster: [1] 0.7144587 0.7090915 Average silhouette width of best sample: 0.713266 Best sample: [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 [51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 [76] 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 [77] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Silhouette plot information for best sample: cluster neighbor sil_width 45 1 2 0.8033727 60 1 2 0.8021017 55 1 2 0.8005931 66 1 2 0.8002776 58 1 2 0.7991899 11 1 2 0.7991773 41 1 2 0.7973302 26 1 2 0.7962397 63 1 2 0.7962229 13 1 2 0.7949705 67 1 2 0.7942590 54 1 2 0.7936184 17 1 2 0.7916087 16 1 2 0.7913570 39 1 2 0.7912755 6 1 2 0.7840455 34 1 2 0.7833568 49 1 2 0.7819733 9 1 2 0.7789087 23 1 2 0.7785009 32 1 2 0.7757325 22 1 2 0.7655369 61 1 2 0.7639754 12 1 2 0.7639644 5 1 2 0.7606436 18 1 2 0.7579145 56 1 2 0.7566307 3 1 2 0.7537894 24 1 2 0.7531180 50 1 2 0.7517817 48 1 2 0.7501998 25 1 2 0.7499655 59 1 2 0.7472022 19 1 2 0.7445038 65 1 2 0.7398395 28 1 2 0.7377377 38 1 2 0.7370935 7 1 2 0.7335940 40 1 2 0.7310012 14 1 2 0.7294895 62 1 2 0.7254478 70 1 2 0.7163214 4 1 2 0.7157257 21 1 2 0.7148663 64 1 2 0.7108496 2 1 2 0.7062831 15 1 2 0.7015120 52 1 2 0.6978313 37 1 2 0.6954023 31 1 2 0.6932905 33 1 2 0.6888478 10 1 2 0.6805028 20 1 2 0.6766854 43 1 2 0.6761461 8 1 2 0.6749706 27 1 2 0.6671817 35 1 2 0.6632888 68 1 2 0.6587599 30 1 2 0.6554989 36 1 2 0.6228481 53 1 2 0.6203313 57 1 2 0.6191666 42 1 2 0.6142020 47 1 2 0.6024151 1 1 2 0.5814464 69 1 2 0.5091186 46 1 2 0.4961302 44 1 2 0.4849961 29 1 2 0.4569316 51 1 2 0.4230181 81 2 1 0.7965942 71 2 1 0.7961971 85 2 1 0.7919593 74 2 1 0.7869047 82 2 1 0.7795304 78 2 1 0.7788873 79 2 1 0.7729041 72 2 1 0.7492980 88 2 1 0.7447973 87 2 1 0.7404399 76 2 1 0.7352351 77 2 1 0.7216838 86 2 1 0.7165677 84 2 1 0.6952406 73 2 1 0.6942882 83 2 1 0.6621568 80 2 1 0.6368446 90 2 1 0.5743228 75 2 1 0.5597232 89 2 1 0.4482549 4005 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 0.18647 11.58500 20.05800 27.81500 45.57800 85.23200 Metric : euclidean Number of objects : 90 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > pam0 <- pam (x[iSml,], k = 2) > stopifnot(identical(clara0$clustering, pam0$clustering) + , isEq(clara0$objective, unname(pam0$objective[2])) + ) > > summary(clara2 <- clara(x, 2)) Object of class 'clara' from call: clara(x = x, k = 2) Medoids: [,1] [,2] [1,] 2.012828 -1.896095 [2,] 51.494628 10.274769 Objective function: 10.23445 Numerical information per cluster: size max_diss av_diss isolation [1,] 700 36.84408 10.49814 0.7230478 [2,] 800 30.89896 10.00373 0.6063775 Average silhouette width per cluster: [1] 0.7562366 0.7203254 Average silhouette width of best sample: 0.733384 Best sample: [1] 21 23 50 97 142 168 191 192 197 224 325 328 433 458 471 [16] 651 712 714 722 797 805 837 909 919 926 999 1006 1018 1019 1049 [31] 1081 1084 1132 1144 1150 1201 1207 1250 1291 1307 1330 1374 1426 1428 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [334] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [482] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [593] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [630] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [667] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [741] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [778] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [815] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [852] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [889] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [926] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [963] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1000] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1037] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1074] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1111] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1148] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1185] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1222] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1259] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1296] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1333] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1370] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1407] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1444] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1481] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Silhouette plot information for best sample: cluster neighbor sil_width 325 1 2 0.8261589 191 1 2 0.8206687 23 1 2 0.8149640 97 1 2 0.8048084 433 1 2 0.8017745 458 1 2 0.8008324 471 1 2 0.7958547 328 1 2 0.7689099 142 1 2 0.7619508 21 1 2 0.7607528 197 1 2 0.7606641 50 1 2 0.7509131 192 1 2 0.7098473 651 1 2 0.7035969 224 1 2 0.6843886 168 1 2 0.5337006 1084 2 1 0.8180447 1081 2 1 0.8171686 1201 2 1 0.8170847 1291 2 1 0.8167148 1307 2 1 0.8166841 1144 2 1 0.8159947 999 2 1 0.8135303 1426 2 1 0.8023538 1049 2 1 0.8022891 1250 2 1 0.8014300 712 2 1 0.7859324 837 2 1 0.7792784 1018 2 1 0.7764837 919 2 1 0.7651939 1374 2 1 0.7648534 1428 2 1 0.7516819 1330 2 1 0.7505861 1006 2 1 0.7368113 714 2 1 0.7237565 1150 2 1 0.7046060 1132 2 1 0.6940608 909 2 1 0.6859682 926 2 1 0.6725631 722 2 1 0.6572791 797 2 1 0.6395698 1019 2 1 0.6083662 805 2 1 0.2814164 1207 2 1 0.2694097 946 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 0.48462 12.32300 26.49900 32.21300 52.39100 77.17500 Metric : euclidean Number of objects : 44 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > clInd <- c("objective", "i.med", "medoids", "clusinfo") > clInS <- c(clInd, "sample") > ## clara() {as original code} always draws the *same* random samples !!!! > clara(x, 2, samples = 50)[clInd] $objective [1] 10.06735 $i.med [1] 177 1115 $medoids [,1] [,2] [1,] -0.2538744 -1.209148 [2,] 50.0372683 9.501125 $clusinfo size max_diss av_diss isolation [1,] 700 34.67208 10.193945 0.6743054 [2,] 800 29.51964 9.956571 0.5741003 > for(i in 1:20) + print(clara(x[sample(nrow(x)),], 2, samples = 50)[clInd]) $objective [1] 10.05727 $i.med [1] 936 192 $medoids [,1] [,2] [1,] 50.03726827 9.501124850 [2,] -0.03900399 -0.009078886 $clusinfo size max_diss av_diss isolation [1,] 800 29.51964 9.956571 0.5791419 [2,] 700 34.06055 10.172348 0.6682295 $objective [1] 10.05296 $i.med [1] 468 1394 $medoids [,1] [,2] [1,] -0.3292826 -0.2398794 [2,] 50.0372683 9.5011249 $clusinfo size max_diss av_diss isolation [1,] 700 33.98451 10.163128 0.6624677 [2,] 800 29.51964 9.956571 0.5754330 $objective [1] 10.05852 $i.med [1] 1171 379 $medoids [,1] [,2] [1,] 50.9444060 9.6723175 [2,] -0.3292826 -0.2398794 $clusinfo size max_diss av_diss isolation [1,] 800 30.10388 9.966988 0.5764486 [2,] 700 33.98451 10.163128 0.6507574 $objective [1] 10.07051 $i.med [1] 75 1254 $medoids [,1] [,2] [1,] -0.9493373 0.3552542 [2,] 50.5455985 9.3904972 $clusinfo size max_diss av_diss isolation [1,] 700 33.12704 10.191999 0.6336273 [2,] 800 29.66384 9.964205 0.5673860 $objective [1] 10.0613 $i.med [1] 199 134 $medoids [,1] [,2] [1,] -0.03900399 -0.009078886 [2,] 49.59384120 9.792964832 $clusinfo size max_diss av_diss isolation [1,] 700 34.06055 10.172348 0.6732466 [2,] 800 29.57491 9.964138 0.5845827 $objective [1] 10.06101 $i.med [1] 1453 1122 $medoids [,1] [,2] [1,] 50.0372683 9.50112485 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 29.51964 9.956571 0.5690241 [2,] 700 33.31923 10.180359 0.6422655 $objective [1] 10.08603 $i.med [1] 613 318 $medoids [,1] [,2] [1,] 50.0627056 9.478225 [2,] -0.2902194 1.026496 $clusinfo size max_diss av_diss isolation [1,] 800 29.51131 9.957225 0.5780037 [2,] 700 33.21560 10.233240 0.6505552 $objective [1] 10.07293 $i.med [1] 618 406 $medoids [,1] [,2] [1,] 50.3621263 9.0207185 [2,] -0.2092816 -0.5916053 $clusinfo size max_diss av_diss isolation [1,] 800 29.25143 9.990206 0.5682446 [2,] 700 34.30301 10.167473 0.6663777 $objective [1] 10.0592 $i.med [1] 1279 1349 $medoids [,1] [,2] [1,] 50.1502433 10.60358224 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 30.54975 9.953191 0.5852356 [2,] 700 33.31923 10.180359 0.6382900 $objective [1] 10.06241 $i.med [1] 1293 21 $medoids [,1] [,2] [1,] 50.5809098 9.7418386 [2,] -0.9493373 0.3552542 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5725461 [2,] 700 33.12704 10.191999 0.6324587 $objective [1] 10.0592 $i.med [1] 337 675 $medoids [,1] [,2] [1,] -0.9691441 0.03342515 [2,] 50.1502433 10.60358224 $clusinfo size max_diss av_diss isolation [1,] 700 33.31923 10.180359 0.6382900 [2,] 800 30.54975 9.953191 0.5852356 $objective [1] 10.05697 $i.med [1] 22 574 $medoids [,1] [,2] [1,] 50.5809098 9.74183863 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5716937 [2,] 700 33.31923 10.180359 0.6351809 $objective [1] 10.05096 $i.med [1] 739 808 $medoids [,1] [,2] [1,] 50.5809098 9.7418386 [2,] -0.2092816 -0.5916053 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5785936 [2,] 700 34.30301 10.167473 0.6618278 $objective [1] 10.06135 $i.med [1] 1431 485 $medoids [,1] [,2] [1,] 50.0627056 9.47822525 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 29.51131 9.957225 0.5686352 [2,] 700 33.31923 10.180359 0.6420076 $objective [1] 10.05324 $i.med [1] 10 1221 $medoids [,1] [,2] [1,] 50.58090982 9.741838628 [2,] -0.03900399 -0.009078886 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5817385 [2,] 700 34.06055 10.172348 0.6607218 $objective [1] 10.06101 $i.med [1] 1249 1411 $medoids [,1] [,2] [1,] -0.9691441 0.03342515 [2,] 50.0372683 9.50112485 $clusinfo size max_diss av_diss isolation [1,] 700 33.31923 10.180359 0.6422655 [2,] 800 29.51964 9.956571 0.5690241 $objective [1] 10.05296 $i.med [1] 610 21 $medoids [,1] [,2] [1,] -0.3292826 -0.2398794 [2,] 50.0372683 9.5011249 $clusinfo size max_diss av_diss isolation [1,] 700 33.98451 10.163128 0.6624677 [2,] 800 29.51964 9.956571 0.5754330 $objective [1] 10.06486 $i.med [1] 1101 397 $medoids [,1] [,2] [1,] -0.9691441 0.03342515 [2,] 50.1066826 9.35514422 $clusinfo size max_diss av_diss isolation [1,] 700 33.31923 10.180359 0.6417479 [2,] 800 29.42336 9.963794 0.5667111 $objective [1] 10.07521 $i.med [1] 838 356 $medoids [,1] [,2] [1,] 50.36212634 9.020718482 [2,] -0.03900399 -0.009078886 $clusinfo size max_diss av_diss isolation [1,] 800 29.25143 9.990206 0.5712766 [2,] 700 34.06055 10.172348 0.6651980 $objective [1] 10.05906 $i.med [1] 1270 1024 $medoids [,1] [,2] [1,] 50.5455985 9.3904972 [2,] -0.2092816 -0.5916053 $clusinfo size max_diss av_diss isolation [1,] 800 29.66384 9.964205 0.5734673 [2,] 700 34.30301 10.167473 0.6631526 > > clara(x, 2, samples = 101)[clInd] $objective [1] 10.05727 $i.med [1] 286 1115 $medoids [,1] [,2] [1,] -0.03900399 -0.009078886 [2,] 50.03726827 9.501124850 $clusinfo size max_diss av_diss isolation [1,] 700 34.06055 10.172348 0.6682295 [2,] 800 29.51964 9.956571 0.5791419 > clara(x, 2, samples = 149)[clInd] $objective [1] 10.05319 $i.med [1] 238 1272 $medoids [,1] [,2] [1,] -0.2092816 -0.5916053 [2,] 50.1502433 10.6035822 $clusinfo size max_diss av_diss isolation [1,] 700 34.30301 10.167473 0.6649301 [2,] 800 30.54975 9.953191 0.5921768 > clara(x, 2, samples = 200)[clInd] $objective [1] 10.05319 $i.med [1] 238 1272 $medoids [,1] [,2] [1,] -0.2092816 -0.5916053 [2,] 50.1502433 10.6035822 $clusinfo size max_diss av_diss isolation [1,] 700 34.30301 10.167473 0.6649301 [2,] 800 30.54975 9.953191 0.5921768 > ## Note that this last one is practically identical to the slower pam() one > > (ii <- sample(length(x), 20)) [1] 249 452 2663 2537 2235 2421 1004 1834 2602 397 717 2805 1575 1281 283 [16] 1657 1749 820 269 519 > ## This was bogous (and lead to seg.faults); now properly gives error. > ## but for these, now see ./clara-NAs.R > if(FALSE) { ## ~~~~~~~~~~~~~ + x[ii] <- NA + try( clara(x, 2, samples = 50) ) + } > > ###-- Larger example: 2000 objects, divided into 5 clusters. > x5 <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)), + cbind(rnorm(400,10,8), rnorm(400,40,6)), + cbind(rnorm(400,30,4), rnorm(400, 0,4)), + cbind(rnorm(400,40,4), rnorm(400,20,2)), + cbind(rnorm(400,50,4), rnorm(400,50,4))) > ## plus 1 random dimension > x5 <- cbind(x5, rnorm(nrow(x5))) > > clara(x5, 5) Call: clara(x = x5, k = 5) Medoids: [,1] [,2] [,3] [1,] 0.5850466 -2.222194 -0.63631241 [2,] 8.0131143 42.708122 -0.31693240 [3,] 42.6657812 21.123133 -0.62411426 [4,] 50.6470292 48.480686 -0.09146223 [5,] 28.6470950 -2.544131 -0.22186047 Objective function: 6.100721 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 396 408 401 395 Best sample: [1] 23 130 178 202 267 297 338 357 376 387 439 441 638 647 662 [16] 719 723 802 874 880 994 1038 1056 1097 1184 1215 1225 1268 1271 1282 [31] 1346 1442 1446 1474 1496 1515 1585 1590 1605 1641 1680 1687 1696 1728 1742 [46] 1761 1857 1909 1951 1956 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > summary(clara(x5, 5, samples = 50)) Object of class 'clara' from call: clara(x = x5, k = 5, samples = 50) Medoids: [,1] [,2] [,3] [1,] -0.8427864 0.1606105 -0.70362181 [2,] 12.0389703 39.0303445 0.19158023 [3,] 39.6341676 20.7182868 0.43978514 [4,] 50.6470292 48.4806864 -0.09146223 [5,] 30.6814242 -0.1072177 -0.25861548 Objective function: 5.743812 Numerical information per cluster: size max_diss av_diss isolation [1,] 400 15.20728 5.207177 0.4823345 [2,] 397 24.25898 8.677062 0.7324727 [3,] 406 18.39064 4.369617 0.8109074 [4,] 401 18.28050 5.260543 0.6119680 [5,] 396 12.69653 5.243478 0.5598344 Average silhouette width per cluster: [1] 0.7433532 0.6956424 0.7315944 0.7336104 0.7079360 Average silhouette width of best sample: 0.7188531 Best sample: [1] 106 130 145 213 275 316 434 444 486 501 630 693 713 739 773 [16] 804 808 821 823 899 914 948 961 972 980 987 1076 1114 1126 1127 [31] 1169 1175 1203 1225 1228 1242 1269 1397 1405 1421 1595 1606 1658 1703 1777 [46] 1834 1857 1881 1937 1999 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [334] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 [408] 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [445] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [482] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [519] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [556] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [593] 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [630] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [667] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [741] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [778] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [815] 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [852] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [889] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [926] 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [963] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1000] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1037] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1074] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1111] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1148] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1185] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1222] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1259] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1296] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1333] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1370] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1407] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1444] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1481] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1518] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1555] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1592] 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1629] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1666] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1703] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1740] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1777] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1814] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1851] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1888] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1925] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1962] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1999] 4 4 Silhouette plot information for best sample: cluster neighbor sil_width 130 1 5 0.8123353 275 1 5 0.7945197 316 1 5 0.7561799 213 1 5 0.7459412 106 1 5 0.6869957 145 1 5 0.6641473 630 2 3 0.7819320 739 2 3 0.7774128 486 2 3 0.7559683 713 2 3 0.7316982 444 2 3 0.7204625 501 2 3 0.7091146 773 2 1 0.6886472 693 2 3 0.5855803 434 2 3 0.5099654 1225 3 5 0.8105776 1203 3 5 0.7965773 1595 3 5 0.7842711 1269 3 5 0.7799931 1242 3 5 0.7625442 1397 3 5 0.7315512 1228 3 5 0.7262025 1421 3 5 0.6011616 1405 3 5 0.5914707 1999 4 3 0.8050046 1857 4 3 0.8030709 1658 4 3 0.7941141 1777 4 3 0.7865209 1937 4 3 0.7831996 1881 4 3 0.7504779 1834 4 3 0.6614223 1606 4 3 0.6373808 1703 4 3 0.5813025 804 5 3 0.8021043 987 5 3 0.7999064 1076 5 3 0.7907769 948 5 3 0.7905304 961 5 3 0.7716289 823 5 3 0.7657693 808 5 3 0.7510670 914 5 3 0.7358231 1175 5 3 0.7337485 1169 5 3 0.7254812 972 5 3 0.7118795 821 5 3 0.7101558 899 5 1 0.6580927 1114 5 3 0.6552887 1127 5 3 0.6292428 1126 5 3 0.5362475 980 5 1 0.4671695 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 0.69682 19.31600 34.09200 33.07000 46.25400 92.25300 Metric : euclidean Number of objects : 50 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > ## 3 "half" samples: > clara(x5, 5, samples = 999) Call: clara(x = x5, k = 5, samples = 999) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.9779485 39.6788652 -0.23487762 [3,] 40.2944064 20.2221253 0.21417849 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.659041 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 1 2 103 147 155 176 179 247 262 288 365 369 372 470 486 [16] 573 759 779 785 791 797 822 875 883 913 954 1107 1114 1154 1156 [31] 1171 1175 1206 1213 1218 1233 1243 1394 1439 1444 1512 1741 1777 1798 1800 [46] 1818 1845 1946 1948 1973 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > clara(x5, 5, samples = 1000) Call: clara(x = x5, k = 5, samples = 1000) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.9779485 39.6788652 -0.23487762 [3,] 40.2944064 20.2221253 0.21417849 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.659041 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 1 2 103 147 155 176 179 247 262 288 365 369 372 470 486 [16] 573 759 779 785 791 797 822 875 883 913 954 1107 1114 1154 1156 [31] 1171 1175 1206 1213 1218 1233 1243 1394 1439 1444 1512 1741 1777 1798 1800 [46] 1818 1845 1946 1948 1973 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > clara(x5, 5, samples = 1001) Call: clara(x = x5, k = 5, samples = 1001) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.9779485 39.6788652 -0.23487762 [3,] 40.2944064 20.2221253 0.21417849 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.659041 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 1 2 103 147 155 176 179 247 262 288 365 369 372 470 486 [16] 573 759 779 785 791 797 822 875 883 913 954 1107 1114 1154 1156 [31] 1171 1175 1206 1213 1218 1233 1243 1394 1439 1444 1512 1741 1777 1798 1800 [46] 1818 1845 1946 1948 1973 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > clara(x5, 5, samples = 2000)#full sample Call: clara(x = x5, k = 5, samples = 2000) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.5993345 39.8970536 -0.39199265 [3,] 40.3370139 20.3148331 -0.06033818 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.65785 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 84 106 164 226 284 288 329 423 430 450 469 593 603 654 742 [16] 887 929 970 974 1035 1043 1096 1171 1187 1192 1302 1307 1327 1371 1431 [31] 1433 1439 1440 1452 1513 1522 1525 1548 1565 1593 1620 1639 1654 1688 1740 [46] 1761 1832 1845 1895 1899 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > ###--- Start a version of example(clara) ------- > > ## xclara : artificial data with 3 clusters of 1000 bivariate objects each. > data(xclara) > (clx3 <- clara(xclara, 3)) Call: clara(x = xclara, k = 3) Medoids: V1 V2 [1,] 5.553391 13.306260 [2,] 43.198760 60.360720 [3,] 74.591890 -6.969018 Objective function: 13.225 Clustering vector: int [1:3000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 900 1148 952 Best sample: [1] 20 30 46 91 92 169 179 187 209 223 382 450 555 971 1004 [16] 1025 1058 1277 1281 1302 1319 1361 1362 1513 1591 1623 1628 1729 1752 1791 [31] 1907 1917 1946 2064 2089 2498 2527 2537 2545 2591 2672 2722 2729 2790 2797 [46] 2852 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > ## Plot similar to Figure 5 in Struyf et al (1996) > plot(clx3) > > ## The rngR = TRUE case is currently in the non-strict tests > ## ./clara-ex.R > ## ~~~~~~~~~~~~ > > ###--- End version of example(clara) ------- > > ## small example(s): > data(ruspini) > > clara(ruspini,4) Call: clara(x = ruspini, k = 4) Medoids: x y 10 19 65 32 44 149 52 99 119 67 66 18 Objective function: 11.51066 Clustering vector: Named int [1:75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... - attr(*, "names")= chr [1:75] "1" "2" "3" "4" "5" "6" "7" ... Cluster sizes: 20 23 17 15 Best sample: [1] 2 3 4 5 6 7 8 9 10 16 18 19 20 21 22 23 25 29 30 32 34 35 36 37 41 [26] 42 43 44 46 47 49 50 52 53 54 58 59 60 61 63 65 66 67 69 71 72 73 75 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > rus <- data.matrix(ruspini); storage.mode(rus) <- "double" > ru2 <- rus[c(1:7,21:28, 45:51, 61:69),] > ru3 <- rus[c(1:4,21:25, 45:48, 61:63),] > ru4 <- rus[c(1:2,21:22, 45:47),] > ru5 <- rus[c(1:2,21, 45),] > daisy(ru5, "manhattan") Dissimilarities : 1 2 21 2 11 21 118 107 45 143 132 89 Metric : manhattan Number of objects : 4 > ## Dissimilarities : 11 118 143 107 132 89 > > ## no problem anymore, since 2002-12-28: > ## sampsize >= k+1 is now enforced: > ## clara(ru5, k=3, met="manhattan", sampsize=3,trace=2)[clInS] > clara(ru5, k=3, met="manhattan", sampsize=4,trace=1)[clInS] calling .C(cl_clara, ..., DUP = FALSE): C clara(): (nsam,nran,n) = (4,5,4); 'full_sample', C clara(): sample 1 C clara(): best sample _found_ --> dysta2(nbest), resul(), end $objective [1] 2.75 $i.med [1] 2 3 4 $medoids x y 2 5 63 21 28 147 45 85 115 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.5 0.1028037 [2,] 1 0 0.0 0.0000000 [3,] 1 0 0.0 0.0000000 $sample [1] "1" "2" "21" "45" > > daisy(ru4, "manhattan") Dissimilarities : 1 2 21 22 45 46 2 11 21 118 107 22 124 113 6 45 143 132 89 87 46 124 113 108 106 19 47 115 104 103 101 28 9 Metric : manhattan Number of objects : 7 > ## this one (k=3) gave problems, from ss = 6 on ___ still after 2002-12-28 ___ : > for(ss in 4:nrow(ru4)){ + cat("---\n\nsample size = ",ss,"\n") + print(clara(ru4,k=3,met="manhattan",sampsize=ss)[clInS]) + } --- sample size = 4 $objective [1] 7.714286 $i.med [1] 1 4 7 $medoids x y 1 4 53 22 32 149 47 78 94 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.50000 0.09565217 [2,] 2 6 3.00000 0.05940594 [3,] 3 28 12.33333 0.27722772 $sample [1] "1" "22" "45" "47" --- sample size = 5 $objective [1] 7.714286 $i.med [1] 2 3 7 $medoids x y 2 5 63 21 28 147 47 78 94 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.50000 0.10576923 [2,] 2 6 3.00000 0.05825243 [3,] 3 28 12.33333 0.27184466 $sample [1] "2" "21" "22" "45" "47" --- sample size = 6 $objective [1] 6.428571 $i.med [1] 2 4 6 $medoids x y 2 5 63 22 32 149 46 85 96 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.500000 0.09734513 [2,] 2 6 3.000000 0.05660377 [3,] 3 19 9.333333 0.17924528 $sample [1] "2" "21" "22" "45" "46" "47" --- sample size = 7 $objective [1] 6.428571 $i.med [1] 2 4 6 $medoids x y 2 5 63 22 32 149 46 85 96 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.500000 0.09734513 [2,] 2 6 3.000000 0.05660377 [3,] 3 19 9.333333 0.17924528 $sample [1] "1" "2" "21" "22" "45" "46" "47" > for(ss in 5:nrow(ru3)){ + cat("---\n\nsample size = ",ss,"\n") + print(clara(ru3,k=4,met="manhattan",sampsize=ss)[clInS]) + } --- sample size = 5 $objective [1] 13.625 $i.med [1] 4 5 10 15 $medoids x y 4 9 77 21 28 147 45 85 115 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 29 16.50 0.3258427 [2,] 5 14 9.00 0.1573034 [3,] 4 30 19.25 0.3370787 [4,] 3 15 10.00 0.1351351 $sample [1] "3" "4" "21" "45" "62" --- sample size = 6 $objective [1] 9.0625 $i.med [1] 3 7 13 15 $medoids x y 3 10 59 23 35 153 48 74 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1881188 [2,] 5 13 5.60 0.1354167 [3,] 4 30 11.75 0.3448276 [4,] 3 15 10.00 0.1724138 $sample [1] "3" "21" "23" "45" "48" "62" --- sample size = 7 $objective [1] 9.0625 $i.med [1] 3 7 13 15 $medoids x y 3 10 59 23 35 153 48 74 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1881188 [2,] 5 13 5.60 0.1354167 [3,] 4 30 11.75 0.3448276 [4,] 3 15 10.00 0.1724138 $sample [1] "2" "3" "21" "23" "45" "48" "62" --- sample size = 8 $objective [1] 8.8125 $i.med [1] 3 7 12 15 $medoids x y 3 10 59 23 35 153 47 78 94 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1844660 [2,] 5 13 5.60 0.1274510 [3,] 4 28 10.75 0.3373494 [4,] 3 15 10.00 0.1807229 $sample [1] "3" "21" "23" "46" "47" "48" "61" "62" --- sample size = 9 $objective [1] 9.3125 $i.med [1] 2 6 11 16 $medoids x y 2 5 63 22 32 149 46 85 96 63 83 21 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 8 5.40 0.0754717 [3,] 4 19 9.75 0.2467532 [4,] 3 30 15.00 0.3896104 $sample [1] "2" "21" "22" "23" "45" "46" "47" "61" "63" --- sample size = 10 $objective [1] 8.5625 $i.med [1] 3 7 11 15 $medoids x y 3 10 59 23 35 153 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1696429 [2,] 5 13 5.60 0.1214953 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "2" "3" "22" "23" "45" "46" "47" "61" "62" "63" --- sample size = 11 $objective [1] 8.6875 $i.med [1] 2 7 12 15 $medoids x y 2 5 63 23 35 153 47 78 94 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1730769 [2,] 5 13 5.60 0.1274510 [3,] 4 28 10.75 0.3373494 [4,] 3 15 10.00 0.1807229 $sample [1] "1" "2" "3" "4" "23" "24" "25" "45" "47" "48" "62" --- sample size = 12 $objective [1] 8.8125 $i.med [1] 3 7 12 15 $medoids x y 3 10 59 23 35 153 47 78 94 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1844660 [2,] 5 13 5.60 0.1274510 [3,] 4 28 10.75 0.3373494 [4,] 3 15 10.00 0.1807229 $sample [1] "2" "3" "22" "23" "24" "25" "46" "47" "48" "61" "62" "63" --- sample size = 13 $objective [1] 8.4375 $i.med [1] 2 7 11 15 $medoids x y 2 5 63 23 35 153 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 13 5.60 0.1214953 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "1" "2" "4" "22" "23" "24" "25" "45" "46" "47" "61" "62" "63" --- sample size = 14 $objective [1] 8.4375 $i.med [1] 2 7 11 15 $medoids x y 2 5 63 23 35 153 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 13 5.60 0.1214953 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "2" "3" "4" "22" "23" "24" "25" "45" "46" "47" "48" "61" "62" "63" --- sample size = 15 $objective [1] 8.375 $i.med [1] 2 6 11 15 $medoids x y 2 5 63 22 32 149 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 8 5.40 0.0754717 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "2" "3" "4" "21" "22" "23" "24" "25" "45" "46" "47" "48" "61" "62" "63" --- sample size = 16 $objective [1] 8.375 $i.med [1] 2 6 11 15 $medoids x y 2 5 63 22 32 149 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 8 5.40 0.0754717 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "1" "2" "3" "4" "21" "22" "23" "24" "25" "45" "46" "47" "48" "61" "62" [16] "63" > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 2.83 0 2.82 0 0 > ## Lynne (P IV, 1.6 GHz): 18.81; then (no NA; R 1.9.0-alpha): 15.07 > ## nb-mm (P III,700 MHz): 27.97 > cluster/tests/clara.R0000644000176000001440000000644011466450711014343 0ustar ripleyuserslibrary(cluster) ## generate 1500 objects, divided into 2 clusters. set.seed(144) x <- rbind(cbind(rnorm(700, 0,8), rnorm(700, 0,8)), cbind(rnorm(800,50,8), rnorm(800,10,8))) isEq <- function(x,y, epsF = 100) is.logical(r <- all.equal(x,y, tol = epsF * .Machine$double.eps)) && r .proctime00 <- proc.time() ## full size sample {should be = pam()}: n0 <- length(iSml <- c(1:70, 701:720)) summary(clara0 <- clara(x[iSml,], k = 2, sampsize = n0)) pam0 <- pam (x[iSml,], k = 2) stopifnot(identical(clara0$clustering, pam0$clustering) , isEq(clara0$objective, unname(pam0$objective[2])) ) summary(clara2 <- clara(x, 2)) clInd <- c("objective", "i.med", "medoids", "clusinfo") clInS <- c(clInd, "sample") ## clara() {as original code} always draws the *same* random samples !!!! clara(x, 2, samples = 50)[clInd] for(i in 1:20) print(clara(x[sample(nrow(x)),], 2, samples = 50)[clInd]) clara(x, 2, samples = 101)[clInd] clara(x, 2, samples = 149)[clInd] clara(x, 2, samples = 200)[clInd] ## Note that this last one is practically identical to the slower pam() one (ii <- sample(length(x), 20)) ## This was bogous (and lead to seg.faults); now properly gives error. ## but for these, now see ./clara-NAs.R if(FALSE) { ## ~~~~~~~~~~~~~ x[ii] <- NA try( clara(x, 2, samples = 50) ) } ###-- Larger example: 2000 objects, divided into 5 clusters. x5 <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)), cbind(rnorm(400,10,8), rnorm(400,40,6)), cbind(rnorm(400,30,4), rnorm(400, 0,4)), cbind(rnorm(400,40,4), rnorm(400,20,2)), cbind(rnorm(400,50,4), rnorm(400,50,4))) ## plus 1 random dimension x5 <- cbind(x5, rnorm(nrow(x5))) clara(x5, 5) summary(clara(x5, 5, samples = 50)) ## 3 "half" samples: clara(x5, 5, samples = 999) clara(x5, 5, samples = 1000) clara(x5, 5, samples = 1001) clara(x5, 5, samples = 2000)#full sample ###--- Start a version of example(clara) ------- ## xclara : artificial data with 3 clusters of 1000 bivariate objects each. data(xclara) (clx3 <- clara(xclara, 3)) ## Plot similar to Figure 5 in Struyf et al (1996) plot(clx3) ## The rngR = TRUE case is currently in the non-strict tests ## ./clara-ex.R ## ~~~~~~~~~~~~ ###--- End version of example(clara) ------- ## small example(s): data(ruspini) clara(ruspini,4) rus <- data.matrix(ruspini); storage.mode(rus) <- "double" ru2 <- rus[c(1:7,21:28, 45:51, 61:69),] ru3 <- rus[c(1:4,21:25, 45:48, 61:63),] ru4 <- rus[c(1:2,21:22, 45:47),] ru5 <- rus[c(1:2,21, 45),] daisy(ru5, "manhattan") ## Dissimilarities : 11 118 143 107 132 89 ## no problem anymore, since 2002-12-28: ## sampsize >= k+1 is now enforced: ## clara(ru5, k=3, met="manhattan", sampsize=3,trace=2)[clInS] clara(ru5, k=3, met="manhattan", sampsize=4,trace=1)[clInS] daisy(ru4, "manhattan") ## this one (k=3) gave problems, from ss = 6 on ___ still after 2002-12-28 ___ : for(ss in 4:nrow(ru4)){ cat("---\n\nsample size = ",ss,"\n") print(clara(ru4,k=3,met="manhattan",sampsize=ss)[clInS]) } for(ss in 5:nrow(ru3)){ cat("---\n\nsample size = ",ss,"\n") print(clara(ru3,k=4,met="manhattan",sampsize=ss)[clInS]) } ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') ## Lynne (P IV, 1.6 GHz): 18.81; then (no NA; R 1.9.0-alpha): 15.07 ## nb-mm (P III,700 MHz): 27.97 cluster/tests/clara-ex.R0000644000176000001440000000156711646600064014760 0ustar ripleyusers#### These are *NOT* compared with output in the released version of ### 'cluster' currently library(cluster) data(xclara) ## Try 100 times *different* random samples -- for reliability: if(R.version$major != "1" || as.numeric(R.version$minor) >= 7) RNGversion("1.6") nSim <- 100 nCl <- 3 # = no.classes set.seed(421)# (reproducibility) ## unknown problem: this is still platform dependent to some extent: cl <- matrix(NA,nrow(xclara), nSim) for(i in 1:nSim) cl[,i] <- clara(xclara, nCl, rngR = TRUE)$cluster tcl <- apply(cl,1, tabulate, nbins = nCl) ## those that are not always in same cluster (5 out of 3000 for this seed): (iDoubt <- which(apply(tcl,2, function(n) all(n < nSim)))) if(length(iDoubt)) { # (not for all seeds) tabD <- tcl[,iDoubt, drop=FALSE] dimnames(tabD) <- list(cluster = paste(1:nCl), obs = format(iDoubt)) t(tabD) # how many times in which clusters } cluster/tests/clara-NAs.Rout.save0000644000176000001440000007157412036114347016515 0ustar ripleyusers R version 2.15.1 Patched (2012-10-12 r60924) -- "Roasted Marshmallows" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1, + 100 + c(13, 0, 2, 4, 7, 8, 1)), + c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13, + 47, 64, 48, NA, NA, 44, 65)) > x [,1] [,2] [1,] 0 -5 [2,] -4 -14 [3,] -22 NA [4,] -14 -35 [5,] 0 -30 [6,] NA NA [7,] -28 7 [8,] 1 2 [9,] 10 -18 [10,] -1 13 [11,] 113 47 [12,] 100 64 [13,] 102 48 [14,] 104 NA [15,] 107 NA [16,] 108 44 [17,] 101 65 > (d <- dist(x,'manhattan')) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 2 13 3 44 36 4 44 31 16 5 25 20 44 19 6 NA NA NA NA NA 7 40 45 12 56 65 NA 8 8 21 46 52 33 NA 34 9 23 18 64 41 22 NA 63 29 10 19 30 42 61 44 NA 33 13 42 11 165 178 270 209 190 NA 181 157 168 148 12 169 182 244 213 194 NA 185 161 172 152 30 13 155 168 248 199 180 NA 171 147 158 138 12 18 14 208 216 252 236 208 NA 264 206 188 210 18 8 4 15 214 222 258 242 214 NA 270 212 194 216 12 14 10 6 16 157 170 260 201 182 NA 173 149 160 140 8 28 10 8 2 17 171 184 246 215 196 NA 187 163 174 154 30 2 18 6 12 28 > summary(d, na.rm = TRUE) # max = 270 Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 2.00 27.25 147.50 114.60 188.50 270.00 16 > ## First call with "trace" (seg.fault typically later ...): > try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) ) calling .C(cl_clara, ..., DUP = FALSE): C clara(): (nsam,nran,n) = (10,5,17); 'large_sample', C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 1 {295} [ntt=7, nunfs=0] .. nsel[1:10]= 6 7 8 9 10 12 13 14 16 17 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 3 {295} [ntt=7, nunfs=1] .. nsel[1:10]= 1 4 7 9 11 12 13 15 16 17 -> dysta2() . clara(): s:= max dys[1..45] = 270; clara()'s bswap2(*, s=270): new repr. 7 new repr. 1 after build: medoids are 1 7 and min.dist dysma[1:n] are 0 44 40 23 12 18 0 10 10 18 --> sky = sum_j D_j= 175 swp new 8 <-> 7 old; decreasing diss. by -18 Last swap: new 8 <-> 7 old; decreasing diss. by 1 end{bswap}: sky = 157 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 6 {295} [ntt=7, nunfs=2] .. nsel[1:10]= 1 2 3 5 8 9 10 11 13 17 -> dysta2() . clara(): s:= max dys[1..45] = 270; clara()'s bswap2(*, s=270): new repr. 5 new repr. 9 after build: medoids are 5 9 and min.dist dysma[1:n] are 8 21 46 33 0 29 13 12 0 18 --> sky = sum_j D_j= 180 swp new 1 <-> 5 old; decreasing diss. by -18 Last swap: new 1 <-> 5 old; decreasing diss. by 1 end{bswap}: sky = 162 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=7, nunfs=3] .. nsel[1:10]= 1 2 3 4 7 8 9 10 13 14 -> dysta2() . clara(): s:= max dys[1..45] = 264; clara()'s bswap2(*, s=264): new repr. 1 new repr. 10 after build: medoids are 1 10 and min.dist dysma[1:n] are 0 13 44 44 40 8 23 19 4 0 --> sky = sum_j D_j= 195 Last swap: new 9 <-> 10 old; decreasing diss. by 0 end{bswap}: sky = 195 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 16 {295} [ntt=7, nunfs=4] .. nsel[1:10]= 2 3 4 6 7 8 9 10 11 17 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample Error in clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) : Observation 6 has *only* NAs --> omit it for clustering > ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!! > ## Now: gives the proper error message. > > ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives > ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10) > ## Problem in .Fortran("clara",: Internal error: data for decrementing > ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara > > ## The large example from clara.R -- made small enough to still provoke > ## the "** dysta2() ... OUT" problem {no longer!} > x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10, + 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA, + 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38, + 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59, + -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56, + 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3, + -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1, + 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15, + -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2, + 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1, + 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8), + ncol = 2) > str(x) # 88 x 2 num [1:88, 1:2] 0 3 -4 62 1 3 -7 45 36 46 ... > try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem calling .C(cl_clara, ..., DUP = FALSE): C clara(): (nsam,nran,n) = (44,20,88); C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 {295} [ntt=44, nunfs=0] .. nsel[1:44]= 2 3 4 6 9 10 12 14 15 18 19 20 24 25 26 28 31 35 38 42 47 48 51 53 54 57 60 61 64 66 68 70 71 73 74 75 76 77 78 79 80 81 82 88 -> dysta2() . clara(): s:= max dys[1..946] = 78.6448; clara()'s bswap2(*, s=78.6448): new repr. 19 new repr. 9 after build: medoids are 9 19 and min.dist dysma[1:n] are 21.2 7.07 9.9 2.83 5.66 5 5.1 9.22 0 11.3 1.41 6.71 6.32 8.49 7.07 12.7 1.41 33.9 0 14.1 7.07 18.9 5.39 4.24 15.5 31.1 5.66 5.66 5.66 4.24 1.41 8.49 11.3 22.6 2.83 4.12 13 0 3.61 5 1.41 17 9.22 12.7 --> sky = sum_j D_j= 385.677 Last swap: new 6 <-> 9 old; decreasing diss. by 0.939294 end{bswap}: sky = 385.677 selec() -> 'NAfs' obj= 1.64279 C clara(): sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 12 {295} [ntt=44, nunfs=1] .. nsel[1:44]= 1 2 3 5 6 7 9 12 17 19 26 27 28 29 30 38 39 42 43 45 47 50 52 54 55 56 58 59 60 61 62 64 67 68 71 74 75 76 77 79 80 81 83 84 -> dysta2() . clara(): s:= max dys[1..946] = 81.7435; clara()'s bswap2(*, s=81.7435): new repr. 16 new repr. 43 after build: medoids are 16 43 and min.dist dysma[1:n] are 1.41 21.2 7.07 1.41 2.83 17 5.66 5 14.1 1.41 7.07 15 12.7 14.1 14.1 0 4.24 14.1 8.49 9.9 7.07 2 8.6 14.1 12.1 4.24 1.41 5.66 5.66 5.66 5.66 5.66 4.24 1.41 11.3 2.83 5.83 11 0 5.1 1.41 17 0 17 --> sky = sum_j D_j= 331.98 Last swap: new 17 <-> 43 old; decreasing diss. by 0.492109 end{bswap}: sky = 331.98 selec() -> 'NAfs' obj= 1.59799 C clara(): sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 14 {295} [ntt=44, nunfs=2] .. nsel[1:44]= 1 3 4 6 7 9 12 14 15 16 18 19 20 29 30 31 32 36 38 39 40 44 46 47 48 49 51 53 54 56 57 60 62 64 65 66 67 73 75 77 79 81 82 87 -> dysta2() . clara(): s:= max dys[1..946] = 77.8781; clara()'s bswap2(*, s=77.8781): new repr. 19 new repr. 35 after build: medoids are 19 35 and min.dist dysma[1:n] are 1.41 7.07 9.9 2.83 17 5.66 6.4 5.1 4.12 4.24 11.3 1.41 2.83 14.1 14.1 1.41 6 5.66 0 3.16 5.66 18.4 8.06 7.07 16.3 6 7.21 4.24 14 4.24 31.1 5.66 5.66 5.66 0 4.24 4.24 22.6 7.07 0 5.1 17 8.25 7.07 --> sky = sum_j D_j= 338.587 end{bswap}: sky = 338.587 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 70 {295} [ntt=44, nunfs=3] .. nsel[1:44]= 1 3 8 9 14 15 16 17 19 20 22 23 28 30 31 32 34 35 36 37 38 39 40 41 45 46 47 49 54 56 57 65 66 67 69 70 71 74 76 77 78 84 86 88 -> dysta2() . clara(): s:= max dys[1..946] = 77.8781; clara()'s bswap2(*, s=77.8781): new repr. 21 new repr. 32 after build: medoids are 21 32 and min.dist dysma[1:n] are 1.41 7.07 7.81 5.66 5.1 4.12 4.24 14.1 1.41 2.83 4.24 0 12.7 14.1 1.41 6 8.06 33.9 5.66 8.49 0 3.16 5.66 5.66 9.9 8.06 7.07 6 14 4.24 31.1 0 4.24 4.24 4.24 8.49 11.3 2.83 9.06 0 7.07 17 1.41 12.7 --> sky = sum_j D_j= 325.933 end{bswap}: sky = 325.933 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 80 {295} [ntt=44, nunfs=4] .. nsel[1:44]= 1 2 3 5 7 8 11 13 14 20 22 23 26 28 30 31 33 34 37 38 39 41 45 46 47 50 51 52 57 59 61 64 67 71 76 77 79 80 81 82 85 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 6 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=44, nunfs=5] .. nsel[1:44]= 2 3 4 5 6 8 10 12 19 20 21 23 24 25 29 30 31 32 33 37 39 41 42 45 46 48 50 53 54 59 61 66 68 69 71 72 73 79 80 82 84 85 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 7 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 17 {295} [ntt=44, nunfs=6] .. nsel[1:44]= 2 3 6 7 8 12 14 16 17 18 20 22 26 27 29 30 31 32 33 35 36 37 42 44 45 46 49 52 54 58 59 61 62 63 65 67 70 74 75 77 78 79 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 8 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 {295} [ntt=44, nunfs=7] .. nsel[1:44]= 2 3 6 7 8 9 11 13 14 18 30 31 32 34 35 37 38 40 43 44 47 48 49 52 54 55 56 58 59 60 66 67 68 70 71 72 73 75 80 83 84 85 87 88 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 17 new repr. 9 after build: medoids are 9 17 and min.dist dysma[1:n] are 21.2 7.07 2.83 17 9.9 5.66 2.83 1.41 0 11.3 14.1 1.41 9.9 9.22 33.9 8.49 0 5.66 8.49 18.4 7.07 13.9 1.41 8.25 13.9 5.66 4.24 1.41 4.24 5.66 4.24 4.24 1.41 8.49 11.3 0 22.6 11.3 1.41 7.07 17 21.2 7.07 12.7 --> sky = sum_j D_j= 384.698 end{bswap}: sky = 384.698 selec() -> 'NAfs' obj= 1.54909 C clara(): sample 9 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 {295} [ntt=44, nunfs=8] .. nsel[1:44]= 2 4 6 7 8 11 12 13 14 15 17 19 20 21 24 27 29 30 31 33 34 35 36 39 42 45 46 48 50 51 52 54 55 58 60 61 62 65 67 78 80 84 86 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 10 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=44, nunfs=9] .. nsel[1:44]= 1 3 5 6 7 9 10 14 15 16 17 18 19 20 21 23 28 29 30 32 33 36 37 39 40 44 46 47 51 53 54 55 56 57 65 69 70 74 76 81 82 84 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 11 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 66 {295} [ntt=44, nunfs=10] .. nsel[1:44]= 1 3 4 5 6 11 13 14 15 18 19 21 28 30 31 32 33 34 39 40 41 42 43 46 47 57 58 59 63 65 66 67 71 72 73 74 75 78 79 80 83 84 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 12 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 21 {295} [ntt=44, nunfs=11] .. nsel[1:44]= 4 5 6 8 9 10 13 14 15 16 17 21 23 25 27 28 30 35 36 41 44 46 47 49 50 54 55 56 57 59 61 62 64 65 66 68 71 72 74 75 76 81 83 84 -> dysta2() . clara(): s:= max dys[1..946] = 78.3135; clara()'s bswap2(*, s=78.3135): new repr. 5 new repr. 2 after build: medoids are 2 5 and min.dist dysma[1:n] are 26.2 0 3.61 9.49 0 13.5 11 20.5 13.9 6.32 15 21.6 2.24 32.1 26.6 12.8 12 24.4 17.9 8.6 10.8 18.1 7.21 20.5 14.9 29.4 26.2 3.61 23 21.1 23 3.61 7 16.6 3.61 9.22 9.49 12.6 13 9.85 22.2 14.2 15.7 11 --> sky = sum_j D_j= 623.732 swp new 43 <-> 5 old; decreasing diss. by -205.358 Last swap: new 43 <-> 5 old; decreasing diss. by 1 end{bswap}: sky = 418.375 selec() -> 'NAfs' obj= 3.10025 C clara(): sample 13 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 4 {295} [ntt=44, nunfs=12] .. nsel[1:44]= 3 4 5 14 15 16 17 19 20 21 24 25 26 29 30 31 34 35 38 40 41 43 47 49 50 52 55 57 58 60 61 63 64 65 66 68 72 73 74 79 81 83 86 88 -> dysta2() . clara(): s:= max dys[1..946] = 84.1487; clara()'s bswap2(*, s=84.1487): new repr. 19 new repr. 34 after build: medoids are 19 34 and min.dist dysma[1:n] are 7.07 9.9 1.41 5.1 4.12 4.24 14.1 1.41 2.83 8.06 5.39 8.49 7.07 14.1 14.1 1.41 8.06 33.9 0 5.66 5.66 8.49 7.07 6 4 7.62 10.3 31.1 1.41 5.66 5.66 15.6 5.66 0 4.24 1.41 0 22.6 2.83 5.1 17 2 1.41 12.7 --> sky = sum_j D_j= 340.1 end{bswap}: sky = 340.1 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 14 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 78 {295} [ntt=44, nunfs=13] .. nsel[1:44]= 5 7 8 9 10 11 12 18 19 21 23 27 28 29 31 32 33 35 36 38 39 46 50 51 52 56 57 58 59 60 64 65 66 68 72 73 75 77 78 80 84 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 15 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 73 {295} [ntt=44, nunfs=14] .. nsel[1:44]= 2 3 8 10 18 25 26 27 29 31 33 34 35 41 42 43 44 46 47 48 49 53 54 56 57 58 59 60 63 69 70 71 72 73 75 76 77 79 81 84 85 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 16 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 43 {295} [ntt=44, nunfs=15] .. nsel[1:44]= 5 6 7 8 9 12 14 16 18 22 23 24 27 28 29 30 34 35 36 39 40 41 43 45 56 57 59 60 62 64 65 67 69 70 71 73 74 75 79 81 83 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 75.1665; clara()'s bswap2(*, s=75.1665): new repr. 15 new repr. 41 after build: medoids are 15 41 and min.dist dysma[1:n] are 12.7 17 2.83 6.4 15.7 5 7.07 9.9 25.5 13.6 14.1 5 15 1.41 0 0 8.06 19.8 8.49 4.24 8.49 19.8 5.66 12.6 9.9 45.3 5.66 14.8 8.49 9.9 2 10 9.9 22.6 25.5 8.49 11.3 5.83 5.1 2.83 0 7.07 15.6 21.2 --> sky = sum_j D_j= 479.721 end{bswap}: sky = 479.721 selec() -> 'NAfs' obj= 2.17131 C clara(): sample 17 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 22 {295} [ntt=44, nunfs=16] .. nsel[1:44]= 4 6 9 10 11 12 13 16 19 22 26 27 29 30 33 34 37 38 39 42 43 48 51 54 55 57 60 61 62 63 64 66 69 72 73 75 76 77 78 81 82 85 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 18 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 39 {295} [ntt=44, nunfs=17] .. nsel[1:44]= 1 4 8 13 15 19 20 23 25 26 27 28 30 31 33 34 36 37 39 41 42 43 44 45 46 47 50 54 55 57 59 60 62 64 65 66 67 72 73 78 79 81 82 85 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 19 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 72 {295} [ntt=44, nunfs=18] .. nsel[1:44]= 1 4 5 6 10 12 13 14 17 18 19 22 23 25 27 30 31 32 33 38 39 40 41 42 44 45 46 48 55 57 58 59 60 61 66 67 69 70 72 74 83 84 85 86 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 20 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 48 {295} [ntt=44, nunfs=19] .. nsel[1:44]= 1 3 4 5 7 10 11 13 14 20 22 23 31 32 33 34 35 36 37 40 41 42 43 44 48 50 52 53 55 56 62 63 68 71 72 73 74 75 81 82 83 84 86 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample Error in clara(x, 2, samples = 20, trace = 3) : Observation 33 has *only* NAs --> omit it for clustering > ## To see error message for > 1 missing: > try(clara(rbind(NA,x), 2)) Error in clara(rbind(NA, x), 2) : Observations 1,34 have *only* NAs --> na.omit() them for clustering! > > x <- x[-33,] > ## still had the ** dysta2() .. OUT" problem {no longer!} > clara(x, 2, samples = 12, trace = 3) calling .C(cl_clara, ..., DUP = FALSE): C clara(): (nsam,nran,n) = (44,12,87); 'large_sample', C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 7 8 11 14 16 17 21 22 26 29 30 32 33 34 36 37 39 40 41 43 44 45 46 48 49 51 52 54 55 56 58 62 64 66 68 69 71 74 82 83 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 76.5376; clara()'s bswap2(*, s=76.5376): new repr. 17 new repr. 40 after build: medoids are 17 40 and min.dist dysma[1:n] are 1.41 17 6.4 2.83 7.07 4.24 14.1 7.28 4.24 7.07 14.1 14.1 4 8.06 33.9 8.49 0 5.66 5.66 14.1 18.4 9.9 6.4 7.07 8 2 8.6 4.24 12.1 4.24 31.1 5.66 15.6 2 4.24 4.24 8.49 0 5.83 0 17 21.2 1.41 7.07 --> sky = sum_j D_j= 384.62 end{bswap}: sky = 384.62 1st proper or new best: obj= 4.93331 C clara(): sample 2 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 70 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 8 10 11 13 15 16 18 20 21 22 23 24 25 31 32 33 34 35 36 37 40 41 43 45 46 48 50 52 55 62 64 65 68 69 71 72 77 81 82 84 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 21 new repr. 40 after build: medoids are 21 40 and min.dist dysma[1:n] are 9.9 6.4 4.47 2.83 1.41 2.24 4.24 11.3 4.47 7.28 4.24 0 5 8.49 1.41 4 8.06 33.9 5.66 8.49 0 5.66 14.1 18.4 6.4 7.07 8 5.66 4.24 4.24 15.6 2 4.24 4.24 8.49 0 22.6 5.1 8.94 0 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 321.274 end{bswap}: sky = 321.274 obj= 4.93331 C clara(): sample 3 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 38 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 5 10 11 14 18 21 22 23 24 25 26 27 29 33 34 36 37 39 41 42 43 45 47 49 50 55 57 58 60 62 64 67 68 71 73 75 77 79 82 83 84 85 87 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 18 new repr. 32 after build: medoids are 18 32 and min.dist dysma[1:n] are 21.2 1.41 4 2.83 5.1 11.3 8.06 4.24 0 5.39 8.49 7.07 13 14.1 8.06 33.9 8.49 0 5.66 14.1 8.49 18.4 8.06 16.3 4 7.21 4.24 1.41 4.47 5.66 15.6 0 1.41 4.24 0 2.83 9.06 7.07 1.41 2 17 21.2 1.41 12.7 --> sky = sum_j D_j= 350.699 Last swap: new 40 <-> 32 old; decreasing diss. by 0.7867 end{bswap}: sky = 350.699 1st proper or new best: obj= 4.80308 C clara(): sample 4 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 85 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 5 6 8 10 11 12 15 18 21 24 25 26 28 31 33 37 40 42 43 47 49 50 52 54 55 57 58 59 60 61 62 63 64 71 72 73 74 78 79 81 82 86 -> dysta2() . clara(): s:= max dys[1..946] = 78.3135; clara()'s bswap2(*, s=78.3135): new repr. 18 new repr. 43 after build: medoids are 18 43 and min.dist dysma[1:n] are 21.2 9.9 1.41 2.83 6.4 4.47 2.83 5 2.24 11.3 7.28 5 8.49 7.07 12.7 1.41 8.06 0 5.66 8.49 18.4 17.8 2 5.66 4.24 12.1 4.24 1.41 5.66 5.66 5.66 5.66 15.6 5.66 2 0 22.6 2.83 5.83 5.1 1.41 8.94 0 7.07 --> sky = sum_j D_j= 297.276 end{bswap}: sky = 297.276 obj= 4.93331 C clara(): sample 5 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 84 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 6 9 10 12 13 15 16 17 18 20 21 24 26 27 31 34 35 36 37 40 41 42 43 45 51 52 53 54 55 56 62 64 65 67 68 69 71 72 73 74 77 79 82 -> dysta2() . clara(): s:= max dys[1..946] = 76.5376; clara()'s bswap2(*, s=76.5376): new repr. 20 new repr. 44 after build: medoids are 20 44 and min.dist dysma[1:n] are 9.9 2.83 5.66 4.47 5 1.41 2.24 4.24 14.1 11.3 4.47 7.28 5 7.07 15 1.41 33.9 5.66 8.49 0 5.66 14.1 8.49 18.4 6.4 8.6 4.24 14.1 12.1 4.24 31.1 15.6 2 4.24 1.41 4.24 8.49 0 22.6 2.83 5.83 5.1 1.41 0 --> sky = sum_j D_j= 350.799 end{bswap}: sky = 350.799 obj= 4.93331 C clara(): sample 6 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 33 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 -> dysta2() . clara(): s:= max dys[1..946] = 82.0244; clara()'s bswap2(*, s=82.0244): new repr. 18 new repr. 19 after build: medoids are 18 19 and min.dist dysma[1:n] are 1.41 1.41 17 5.66 2.83 1.41 5.66 5.39 14.1 11.3 4.24 7.07 12.6 12.7 33.9 5.66 8.49 0 0 17 7.07 13.6 5.83 9.9 4.47 11.3 4.24 31.1 1.41 5.66 5.66 3.16 4.24 8.49 0 2.83 6.32 8.25 0 8.49 2.83 17 5.1 4.24 --> sky = sum_j D_j= 339.187 end{bswap}: sky = 339.187 1st proper or new best: obj= 4.79624 C clara(): sample 7 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 26 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 4 5 9 10 11 18 19 21 23 24 25 30 37 38 39 40 46 47 50 54 55 56 59 62 65 66 67 68 70 71 72 74 75 76 79 80 81 82 83 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 15 new repr. 16 after build: medoids are 15 16 and min.dist dysma[1:n] are 1.41 7.07 9.9 1.41 5.66 1.41 2.83 11.3 1.41 11.2 0 8.54 8.49 14.1 0 0 5.66 5.66 7.07 13.6 9.9 11.3 4.24 31.1 5.66 15.6 4.24 4.24 1.41 4.24 11.3 0 22.6 6.32 8.25 0 1.41 17 5.1 4.24 17 21.2 1.41 7.07 --> sky = sum_j D_j= 331.596 end{bswap}: sky = 331.596 obj= 4.79624 C clara(): sample 8 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 74 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 5 10 12 15 16 17 19 20 21 22 23 24 25 26 27 28 29 31 37 38 39 41 42 44 45 46 49 50 51 56 57 61 62 63 64 68 73 75 77 78 80 81 85 -> dysta2() . clara(): s:= max dys[1..946] = 84.1487; clara()'s bswap2(*, s=84.1487): new repr. 20 new repr. 36 after build: medoids are 20 36 and min.dist dysma[1:n] are 9.9 1.41 4 6.4 4.12 4.24 14.1 1.41 2.83 8.06 4.24 0 5.39 8.49 7.07 13 12.7 14.1 1.41 0 3.16 5.66 14.1 8.49 9.9 8.06 7.07 4 7.21 7.62 31.1 1.41 5.66 15.6 5.66 0 4.24 2.83 9.06 7.07 5.1 17 8.25 1.41 --> sky = sum_j D_j= 312.667 end{bswap}: sky = 312.667 obj= 4.80308 C clara(): sample 9 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 67 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 5 9 11 13 17 22 23 25 29 31 32 34 37 38 40 42 43 44 47 49 53 55 56 58 62 63 66 68 69 70 71 72 73 74 75 76 78 81 82 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 15 new repr. 16 after build: medoids are 15 16 and min.dist dysma[1:n] are 1.41 7.07 1.41 5.66 2.83 1.41 14.1 4.24 0 8.49 14.1 1.41 7.62 33.9 0 0 5.66 8.49 17 9.9 13.6 5.83 17 4.24 31.1 7.07 15.6 5.66 4.24 4.24 8.49 11.3 0 22.6 2.83 6.32 8.25 0 2.83 5.1 4.24 21.2 1.41 7.07 --> sky = sum_j D_j= 355.134 end{bswap}: sky = 355.134 obj= 4.79624 C clara(): sample 10 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 85 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 8 11 13 22 24 25 26 27 29 31 34 36 37 38 39 41 42 44 47 48 49 50 53 57 58 59 60 61 62 63 64 66 67 70 71 72 75 77 79 82 84 87 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 15 new repr. 33 after build: medoids are 15 33 and min.dist dysma[1:n] are 21.2 9.9 7.81 2.83 1.41 4.24 5.39 8.49 7.07 13 14.1 1.41 33.9 8.49 0 3.16 5.66 14.1 8.49 9.9 16.3 6 4 7.21 14 1.41 4.47 5.66 5.66 5.66 15.6 5.66 0 4.24 1.41 11.3 0 22.6 9.06 7.07 1.41 2 21.2 12.7 --> sky = sum_j D_j= 365.357 end{bswap}: sky = 365.357 obj= 4.80308 C clara(): sample 11 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 87 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 7 8 9 10 12 16 17 20 22 23 24 26 27 29 34 35 36 37 38 41 44 47 48 49 50 51 52 53 54 59 60 61 63 67 68 69 75 76 78 80 81 84 85 -> dysta2() . clara(): s:= max dys[1..946] = 80.9938; clara()'s bswap2(*, s=80.9938): new repr. 19 new repr. 20 after build: medoids are 19 20 and min.dist dysma[1:n] are 21.2 17 7.28 5.66 1.41 9.22 4.24 14.1 5.1 4.24 0 8.54 7.07 12.6 14.1 33.9 5.66 8.49 0 0 14.1 9.9 13.6 5.83 5.83 9.9 4.47 4.24 17 11.3 5.66 5.66 5.66 5.66 1.41 4.24 8.49 8.25 0 2.83 17 5.1 21.2 1.41 --> sky = sum_j D_j= 368.774 end{bswap}: sky = 368.774 obj= 4.79624 C clara(): sample 12 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 44 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 2 3 7 11 12 14 18 19 20 21 24 28 30 31 32 33 34 37 38 39 40 42 49 51 52 53 55 59 63 68 69 70 72 74 76 77 78 79 81 84 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 80.9938; clara()'s bswap2(*, s=80.9938): new repr. 19 new repr. 24 after build: medoids are 19 24 and min.dist dysma[1:n] are 1.41 21.2 7.07 17 2.83 4.12 9.06 11.3 1.41 6.32 7 5.39 12.7 14.1 1.41 2 8.54 33.9 0 5.83 5.66 5.66 8.49 0 9.9 4.24 14.6 4.24 5.66 5.66 4.24 8.49 11.3 22.6 5.1 0 3.16 5.83 1.41 10 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 351.374 end{bswap}: sky = 351.374 obj= 5.11334 C clara(): best sample _found_ ; nbest[1:44] = c(1,5,7,9,11,13,14,15,17,18,22,26,27,28,34,35,36,37,38,43,46,47,48,50,51,54,55,56,57,61,63,64,66,69,71,73,74,75,76,77,78,80,81,82) --> dysta2(nbest), resul(), end Call: clara(x = x, k = 2, samples = 12, trace = 3) Medoids: [,1] [,2] [1,] NA 1 [2,] 47 15 Objective function: 4.79624 Clustering vector: int [1:87] 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 ... Cluster sizes: 74 13 Best sample: [1] 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 [26] 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > data(xclara) > set.seed(123) > xclara[sample(nrow(xclara), 50),] <- NA > try( clara(xclara, k = 3) ) #-> "nice" error message depicting first 12 missing obs Error in clara(xclara, k = 3) : 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs --> na.omit() them for clustering! > > proc.time() user system elapsed 0.228 0.024 0.240 cluster/tests/clara-NAs.R0000644000176000001440000000421110537315624015015 0ustar ripleyuserslibrary(cluster) x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1, 100 + c(13, 0, 2, 4, 7, 8, 1)), c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13, 47, 64, 48, NA, NA, 44, 65)) x (d <- dist(x,'manhattan')) summary(d, na.rm = TRUE) # max = 270 ## First call with "trace" (seg.fault typically later ...): try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) ) ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!! ## Now: gives the proper error message. ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10) ## Problem in .Fortran("clara",: Internal error: data for decrementing ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara ## The large example from clara.R -- made small enough to still provoke ## the "** dysta2() ... OUT" problem {no longer!} x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10, 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA, 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38, 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59, -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56, 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3, -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1, 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15, -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2, 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1, 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8), ncol = 2) str(x) # 88 x 2 try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem ## To see error message for > 1 missing: try(clara(rbind(NA,x), 2)) x <- x[-33,] ## still had the ** dysta2() .. OUT" problem {no longer!} clara(x, 2, samples = 12, trace = 3) data(xclara) set.seed(123) xclara[sample(nrow(xclara), 50),] <- NA try( clara(xclara, k = 3) ) #-> "nice" error message depicting first 12 missing obs cluster/tests/agnes-ex.Rout.save0000644000176000001440000004341511135624110016445 0ustar ripleyusers R version 2.8.1 Patched (2009-01-19 r47658) Copyright (C) 2009 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > options(digits = 6) > data(votes.repub) > > .proctime00 <- proc.time() > > agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE) > summary(agn1) Object of class 'agnes' from call: agnes(x = votes.repub, metric = "manhattan", stand = TRUE) Agglomerative coefficient: 0.797756 Order of objects: [1] Alabama Georgia Arkansas Louisiana Mississippi [6] South Carolina Alaska Vermont Arizona Montana [11] Nevada Colorado Idaho Wyoming Utah [16] California Oregon Washington Minnesota Connecticut [21] New York New Jersey Illinois Ohio Indiana [26] Michigan Pennsylvania New Hampshire Wisconsin Delaware [31] Kentucky Maryland Missouri New Mexico West Virginia [36] Iowa South Dakota North Dakota Kansas Nebraska [41] Maine Massachusetts Rhode Island Florida North Carolina [46] Tennessee Virginia Oklahoma Hawaii Texas Merge: [,1] [,2] [1,] -7 -32 [2,] -13 -35 [3,] -12 -50 [4,] 1 -30 [5,] 2 -14 [6,] -26 -28 [7,] -5 -37 [8,] -15 -41 [9,] -22 -38 [10,] -25 -31 [11,] 7 -47 [12,] -21 -39 [13,] -16 -27 [14,] 4 5 [15,] -42 -46 [16,] -20 10 [17,] 14 9 [18,] -3 6 [19,] -6 3 [20,] -33 15 [21,] 17 -29 [22,] -17 16 [23,] 8 -34 [24,] 21 -49 [25,] 22 -48 [26,] -8 25 [27,] 19 -44 [28,] 11 -23 [29,] 28 24 [30,] -11 -43 [31,] 18 27 [32,] 23 13 [33,] 29 26 [34,] 20 -36 [35,] -1 -10 [36,] 32 -19 [37,] 31 33 [38,] -9 34 [39,] 37 36 [40,] 35 -4 [41,] -2 -45 [42,] 40 -18 [43,] -24 -40 [44,] 39 12 [45,] 44 38 [46,] 41 45 [47,] 42 43 [48,] 46 30 [49,] 47 48 Height: [1] 27.36345 31.15453 35.61832 51.44421 35.69152 87.45523 31.58222 47.53682 [9] 16.34184 11.49397 22.11426 16.35662 10.46294 19.03961 28.41137 11.70132 [17] 12.72838 19.07671 20.90246 8.38200 11.10094 12.92659 9.23004 11.37867 [25] 15.97442 12.70819 16.91515 17.74499 24.83533 18.78225 17.03525 15.77893 [33] 12.71848 18.52818 30.74557 12.55524 17.14634 22.33846 12.80419 27.38835 [41] 37.23685 12.79160 38.76377 29.38432 16.63215 14.75762 25.59605 53.03627 [49] 21.07684 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 8.382 25.540 34.510 45.060 56.020 167.600 Metric : manhattan Number of objects : 50 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > Dvr <- daisy(votes.repub) > agn2 <- agnes(Dvr, method = "complete") > summary(agn2) Object of class 'agnes' from call: agnes(x = Dvr, method = "complete") Agglomerative coefficient: 0.88084 Order of objects: [1] Alabama Georgia Louisiana Arkansas Florida [6] Texas Mississippi South Carolina Alaska Michigan [11] Connecticut New York New Hampshire Indiana Ohio [16] Illinois New Jersey Pennsylvania Minnesota North Dakota [21] Wisconsin Iowa South Dakota Kansas Nebraska [26] Arizona Nevada Montana Oklahoma Colorado [31] Idaho Wyoming Utah California Oregon [36] Washington Missouri New Mexico West Virginia Delaware [41] Kentucky Maryland North Carolina Tennessee Virginia [46] Hawaii Maine Massachusetts Rhode Island Vermont Merge: [,1] [,2] [1,] -12 -50 [2,] -7 -32 [3,] -14 -35 [4,] -13 -30 [5,] -25 -31 [6,] -37 -47 [7,] -21 -39 [8,] -3 -28 [9,] 4 -38 [10,] -16 -27 [11,] -15 -41 [12,] 8 -26 [13,] -2 -22 [14,] -33 -42 [15,] 14 -46 [16,] 1 -44 [17,] -11 -19 [18,] 2 -29 [19,] -5 6 [20,] -17 -20 [21,] -34 -49 [22,] 5 -48 [23,] 18 3 [24,] 11 10 [25,] 23 9 [26,] -23 21 [27,] -8 20 [28,] 12 -36 [29,] -6 16 [30,] 13 25 [31,] 28 29 [32,] -1 -10 [33,] 19 22 [34,] 17 7 [35,] -4 -9 [36,] 30 26 [37,] 35 -43 [38,] 32 -18 [39,] -24 -40 [40,] 36 24 [41,] 27 15 [42,] 31 33 [43,] 38 37 [44,] 40 42 [45,] 34 -45 [46,] 43 39 [47,] 44 41 [48,] 47 45 [49,] 46 48 Height: [1] 48.2397 60.8984 72.9221 56.1363 58.8227 116.7048 63.0951 281.9508 [9] 28.1437 47.1690 19.4218 32.9438 36.7643 20.2258 39.1728 20.8792 [17] 25.3229 56.3813 42.2230 33.6978 64.5254 26.1547 37.4564 25.9221 [25] 80.4894 23.4206 27.8273 43.4492 48.0483 43.7055 17.1992 31.1988 [33] 70.4868 33.2328 22.1831 54.3057 21.1413 35.1129 121.4022 43.3829 [41] 33.4744 66.7591 29.5099 30.1541 178.4119 32.7611 55.3633 22.6334 [49] 83.1040 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 17.20 48.34 64.68 82.23 105.50 282.00 Metric : euclidean Number of objects : 50 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > ## almost same: > (ag2. <- agnes(Dvr, method= "complete", keep.diss=FALSE)) Call: agnes(x = Dvr, method = "complete", keep.diss = FALSE) Agglomerative coefficient: 0.88084 Order of objects: [1] Alabama Georgia Louisiana Arkansas Florida [6] Texas Mississippi South Carolina Alaska Michigan [11] Connecticut New York New Hampshire Indiana Ohio [16] Illinois New Jersey Pennsylvania Minnesota North Dakota [21] Wisconsin Iowa South Dakota Kansas Nebraska [26] Arizona Nevada Montana Oklahoma Colorado [31] Idaho Wyoming Utah California Oregon [36] Washington Missouri New Mexico West Virginia Delaware [41] Kentucky Maryland North Carolina Tennessee Virginia [46] Hawaii Maine Massachusetts Rhode Island Vermont Height (summary): Min. 1st Qu. Median Mean 3rd Qu. Max. 17.2 28.1 39.2 52.3 58.8 282.0 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > ag22 <- agnes(votes.repub, method= "complete", keep.diss=FALSE,keep.data=FALSE) > stopifnot(identical(agn2[-5:-6], ag2.[-5:-6]), + identical(Dvr, daisy(votes.repub)), # DUP=FALSE (!) + identical(ag2.[-6], ag22[-6]) + ) > > data(agriculture) > summary(agnes(agriculture)) Object of class 'agnes' from call: agnes(x = agriculture) Agglomerative coefficient: 0.781893 Order of objects: [1] B NL D F UK DK L I GR P E IRL Merge: [,1] [,2] [1,] -1 -10 [2,] -2 -9 [3,] 1 -3 [4,] 3 -6 [5,] -5 -7 [6,] 4 -12 [7,] 6 2 [8,] -4 -11 [9,] 7 -8 [10,] 8 5 [11,] 9 10 Height: [1] 1.64924 2.24836 2.76918 4.02677 4.78835 2.22036 5.29409 14.77963 [9] 5.16236 8.55075 3.14006 66 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 1.649 4.357 7.987 9.594 13.250 24.040 Metric : euclidean Number of objects : 12 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > > data(ruspini) > summary(ar0 <- agnes(ruspini, keep.diss=FALSE, keep.data=FALSE)) Object of class 'agnes' from call: agnes(x = ruspini, keep.diss = FALSE, keep.data = FALSE) Agglomerative coefficient: 0.947954 Order of objects: [1] 1 2 3 5 4 6 8 7 9 10 14 15 17 16 18 19 11 12 13 20 61 62 66 63 64 [26] 68 65 67 69 70 71 72 75 73 74 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Merge: [,1] [,2] [1,] -18 -19 [2,] -55 -56 [3,] -27 -28 [4,] -49 -51 [5,] -33 -34 [6,] -23 -24 [7,] -67 -69 [8,] -59 -60 [9,] -29 -30 [10,] -36 -39 [11,] -32 -35 [12,] -50 -54 [13,] -25 -26 [14,] -16 1 [15,] -70 -71 [16,] -64 -68 [17,] -37 -38 [18,] 12 -52 [19,] -62 -66 [20,] -12 -13 [21,] -9 -10 [22,] -42 -43 [23,] -15 -17 [24,] -47 -48 [25,] -21 -22 [26,] 7 15 [27,] 2 -57 [28,] 4 -53 [29,] 10 -40 [30,] 3 9 [31,] -73 -74 [32,] -72 -75 [33,] -11 20 [34,] 13 11 [35,] -6 -8 [36,] -14 23 [37,] -2 -3 [38,] -65 26 [39,] 5 17 [40,] 25 6 [41,] 36 14 [42,] 34 -31 [43,] -4 35 [44,] 28 18 [45,] 27 8 [46,] 19 -63 [47,] -46 24 [48,] -1 37 [49,] 16 38 [50,] 40 30 [51,] 42 29 [52,] 33 -20 [53,] 49 32 [54,] 51 39 [55,] 21 41 [56,] 48 -5 [57,] 45 -58 [58,] 53 31 [59,] 43 -7 [60,] 50 54 [61,] 44 57 [62,] -41 22 [63,] -61 46 [64,] 55 52 [65,] 63 58 [66,] -44 -45 [67,] 59 64 [68,] 56 67 [69,] 66 61 [70,] 60 62 [71,] 69 47 [72,] 70 71 [73,] 68 65 [74,] 73 72 Height: [1] 9.26758 6.40312 12.13789 22.37868 7.63441 6.32456 14.58991 [8] 21.63544 4.12311 12.07902 6.36396 4.24264 7.23741 3.56155 [15] 1.41421 16.38921 5.85486 4.12311 10.69547 67.75052 15.48443 [22] 4.12311 8.94386 17.00500 3.60555 9.53375 6.46443 2.82843 [29] 4.48680 3.60555 10.91541 5.83095 14.34411 5.65685 101.14200 [36] 4.47214 6.98022 2.23607 9.56136 2.00000 5.61339 2.82843 [43] 14.95692 3.16228 6.19728 3.00000 7.28356 9.97147 3.00000 [50] 5.47542 11.07404 2.23607 6.60456 3.60555 24.90532 15.45463 [57] 4.24264 64.42555 17.02939 22.56493 2.23607 5.22383 8.28122 [64] 3.16228 3.81721 15.20808 2.00000 5.19258 8.51123 2.82843 [71] 12.62990 34.72475 9.14005 4.47214 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > summary(ar1 <- agnes(ruspini, metric = "manhattan")) Object of class 'agnes' from call: agnes(x = ruspini, metric = "manhattan") Agglomerative coefficient: 0.946667 Order of objects: [1] 1 2 3 5 4 6 8 7 9 10 14 16 18 19 15 17 11 12 13 20 61 62 66 63 73 [26] 74 64 68 65 67 69 70 71 72 75 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Merge: [,1] [,2] [1,] -55 -56 [2,] -27 -28 [3,] -18 -19 [4,] -49 -51 [5,] -36 -39 [6,] -33 -34 [7,] -32 -35 [8,] -23 -24 [9,] -67 -69 [10,] -59 -60 [11,] -50 -54 [12,] -29 -30 [13,] -25 -26 [14,] -16 3 [15,] -70 -71 [16,] -64 -68 [17,] -62 -66 [18,] 11 -52 [19,] -37 -38 [20,] -12 -13 [21,] -9 -10 [22,] 9 15 [23,] 1 -57 [24,] -47 -48 [25,] -42 -43 [26,] -21 -22 [27,] -15 -17 [28,] 4 -53 [29,] 2 12 [30,] 5 -40 [31,] 6 19 [32,] 13 7 [33,] -11 20 [34,] -73 -74 [35,] -72 -75 [36,] -6 -8 [37,] -65 22 [38,] -14 14 [39,] -2 -3 [40,] 32 -31 [41,] 38 27 [42,] 26 8 [43,] 28 18 [44,] -46 24 [45,] -4 36 [46,] 23 10 [47,] -1 39 [48,] 42 29 [49,] 33 -20 [50,] 37 35 [51,] 40 30 [52,] 17 -63 [53,] 16 50 [54,] 51 31 [55,] 46 -58 [56,] 21 41 [57,] 47 -5 [58,] 45 -7 [59,] 52 34 [60,] -44 -45 [61,] 48 54 [62,] 43 55 [63,] 59 53 [64,] 56 49 [65,] -41 25 [66,] -61 63 [67,] 57 58 [68,] 67 64 [69,] 60 62 [70,] 61 65 [71,] 69 44 [72,] 70 71 [73,] 68 66 [74,] 73 72 Height: [1] 11.50000 9.00000 16.00000 26.25000 10.00000 8.00000 16.66667 [8] 28.70833 5.00000 15.50000 8.66667 4.00000 2.00000 9.25000 [15] 6.00000 20.50000 7.50000 5.00000 12.33333 94.33333 22.78571 [22] 5.00000 12.50000 18.00000 8.00000 20.20000 5.00000 13.78571 [29] 8.25000 4.00000 5.50000 5.00000 12.40000 8.00000 125.71357 [36] 6.00000 9.50000 3.00000 11.87500 2.00000 7.00000 4.00000 [43] 18.72917 4.00000 7.50000 3.00000 9.25000 12.40000 3.00000 [50] 7.50000 14.43750 3.00000 7.50000 5.00000 32.38333 21.00000 [57] 6.00000 85.49616 18.00000 28.75000 3.00000 6.50000 9.55556 [64] 4.00000 5.00000 19.61111 2.00000 6.00000 11.00000 4.00000 [71] 15.40000 47.02381 10.00000 6.00000 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 52.50 97.00 90.95 128.00 187.00 Metric : manhattan Number of objects : 75 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > str(ar1) List of 9 $ order : int [1:75] 1 2 3 5 4 6 8 7 9 10 ... $ height : num [1:74] 11.5 9 16 26.2 10 ... $ ac : num 0.947 $ merge : int [1:74, 1:2] -55 -27 -18 -49 -36 -33 -32 -23 -67 -59 ... $ diss :Classes 'dissimilarity', 'dist' atomic [1:2775] 11 12 29 13 25 43 33 22 27 39 ... .. ..- attr(*, "Size")= int 75 .. ..- attr(*, "Metric")= chr "manhattan" .. ..- attr(*, "Labels")= chr [1:75] "1" "2" "3" "4" ... $ call : language agnes(x = ruspini, metric = "manhattan") $ method : chr "average" $ order.lab: chr [1:75] "1" "2" "3" "5" ... $ data : num [1:75, 1:2] 4 5 10 9 13 13 12 15 18 19 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:75] "1" "2" "3" "4" ... .. ..$ : chr [1:2] "x" "y" - attr(*, "class")= chr [1:2] "agnes" "twins" > > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 0.11 0.02 0.136 0 0 > > summary(ar2 <- agnes(ruspini, metric="manhattan", method = "weighted")) Object of class 'agnes' from call: agnes(x = ruspini, metric = "manhattan", method = "weighted") Agglomerative coefficient: 0.942387 Order of objects: [1] 1 2 3 5 9 10 14 16 18 19 15 17 4 6 8 7 11 12 13 20 61 64 68 65 67 [26] 69 70 71 62 66 63 72 75 73 74 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Merge: [,1] [,2] [1,] -55 -56 [2,] -27 -28 [3,] -18 -19 [4,] -49 -51 [5,] -36 -39 [6,] -33 -34 [7,] -32 -35 [8,] -23 -24 [9,] -67 -69 [10,] -59 -60 [11,] -50 -54 [12,] -29 -30 [13,] -25 -26 [14,] -16 3 [15,] -70 -71 [16,] -64 -68 [17,] -62 -66 [18,] 11 -52 [19,] -37 -38 [20,] -12 -13 [21,] -9 -10 [22,] 9 15 [23,] 1 -57 [24,] -47 -48 [25,] -42 -43 [26,] -21 -22 [27,] -15 -17 [28,] 4 -53 [29,] 2 12 [30,] 5 -40 [31,] 6 19 [32,] 13 7 [33,] -11 20 [34,] -73 -74 [35,] -72 -75 [36,] -14 14 [37,] -6 -8 [38,] -65 22 [39,] 36 27 [40,] -2 -3 [41,] 32 -31 [42,] 28 18 [43,] 26 8 [44,] -46 24 [45,] -4 37 [46,] 23 10 [47,] -1 40 [48,] 16 38 [49,] 43 29 [50,] 17 -63 [51,] 41 30 [52,] 33 -20 [53,] 35 34 [54,] 46 -58 [55,] 47 -5 [56,] 51 31 [57,] 21 39 [58,] 45 -7 [59,] -44 -45 [60,] -61 48 [61,] 42 54 [62,] 49 56 [63,] -41 25 [64,] 55 57 [65,] 60 50 [66,] 65 53 [67,] 58 52 [68,] 59 61 [69,] 64 67 [70,] 62 63 [71,] 68 44 [72,] 70 71 [73,] 69 66 [74,] 73 72 Height: [1] 11.5000 9.0000 15.2500 21.7734 5.0000 15.8750 8.0000 4.0000 [9] 2.0000 9.0000 6.0000 32.1172 10.0000 8.0000 16.0000 27.9062 [17] 7.5000 5.0000 13.2500 97.9766 18.3125 5.0000 11.8750 8.2500 [25] 4.0000 5.5000 5.0000 22.3438 5.0000 12.5000 23.7812 8.0000 [33] 14.5000 8.0000 114.9764 6.0000 9.5000 3.0000 11.8750 2.0000 [41] 7.0000 4.0000 20.2031 4.0000 7.5000 3.0000 9.2500 12.6250 [49] 3.0000 7.5000 15.6875 3.0000 7.5000 5.0000 33.5469 21.0000 [57] 6.0000 69.4453 18.0000 28.4375 3.0000 6.5000 9.5000 4.0000 [65] 5.0000 19.1250 2.0000 6.0000 10.5000 4.0000 15.0000 41.5312 [73] 10.0000 6.0000 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 52.50 97.00 90.95 128.00 187.00 Metric : manhattan Number of objects : 75 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > print (ar3 <- agnes(ruspini, metric="manhattan", method = "flexible", + par.meth = 0.5)) Call: agnes(x = ruspini, metric = "manhattan", method = "flexible", par.method = 0.5) Agglomerative coefficient: 0.942387 Order of objects: [1] 1 2 3 5 9 10 14 16 18 19 15 17 4 6 8 7 11 12 13 20 61 64 68 65 67 [26] 69 70 71 62 66 63 72 75 73 74 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Height (summary): Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 5.00 8.12 14.20 15.60 115.00 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > stopifnot(all.equal(ar2[1:4], ar3[1:4], tol=1e-12)) > > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 0.123 0.028 0.157 0 0 > cluster/tests/agnes-ex.R0000644000176000001440000000210110057071051014745 0ustar ripleyuserslibrary(cluster) options(digits = 6) data(votes.repub) .proctime00 <- proc.time() agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE) summary(agn1) Dvr <- daisy(votes.repub) agn2 <- agnes(Dvr, method = "complete") summary(agn2) ## almost same: (ag2. <- agnes(Dvr, method= "complete", keep.diss=FALSE)) ag22 <- agnes(votes.repub, method= "complete", keep.diss=FALSE,keep.data=FALSE) stopifnot(identical(agn2[-5:-6], ag2.[-5:-6]), identical(Dvr, daisy(votes.repub)), # DUP=FALSE (!) identical(ag2.[-6], ag22[-6]) ) data(agriculture) summary(agnes(agriculture)) data(ruspini) summary(ar0 <- agnes(ruspini, keep.diss=FALSE, keep.data=FALSE)) summary(ar1 <- agnes(ruspini, metric = "manhattan")) str(ar1) cat('Time elapsed: ', proc.time() - .proctime00,'\n') summary(ar2 <- agnes(ruspini, metric="manhattan", method = "weighted")) print (ar3 <- agnes(ruspini, metric="manhattan", method = "flexible", par.meth = 0.5)) stopifnot(all.equal(ar2[1:4], ar3[1:4], tol=1e-12)) cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/src/0000755000176000001440000000000012124335263012553 5ustar ripleyuserscluster/src/twins.c0000644000176000001440000002721112124335266014071 0ustar ripleyusers/* Produced by * $Id: f2c-clean,v 1.10 2002/03/28 16:37:27 maechler Exp $ * * twins.f -- translated by f2c (version 20031025). */ #include #include #include "cluster.h" #include "ind_2.h" // the auxiliary routines static void averl_(int nn, int *kwan, int *ner, double *ban, double *dys, int method, double *alpha, int *merge); static void splyt_(int nn, int *kwan, int *ner, double *ban, double *dys, int *merge); static double min_dis(double dys[], int ka, int kb, int ner[]); /* This program performs agglomerative nesting (AGNES) using the */ /* group average method (_or others_) of Sokal and Michener (1958), */ /* as well as divisive analysis (DIANA) using the method of */ /* Mcnaughton-Smith, Williams, Dale, and Mockett (1964). */ /* Extended by Martin Maechler to allow the (flexible) */ /* Lance-Williams clustering method (with parameters alpha[1:4]) */ void twins(int *nn, // = maximal number of objects int *jpp,// = maximal number of variables used in the analysis double *x, double *dys, double *dys2,// dys2(.) can have length 1, if(!keep.diss) int *jdyss, /* jdyss (in/out): initially, jdyss mod 10 = 1 : <==> diss = TRUE * jdyss < 10 : don't save dissimilarities */ double *valmd, int *jtmd, int *ndyst, int *jalg, int *method, int *kwan, int *ner, double *ban, double *coef, double *alpha, int *merge) { if (*jdyss % 10 == 1) { *jpp = 1; } else { // compute distances int jhalt = 0; F77_CALL(dysta)(nn, jpp, x, dys, ndyst, jtmd, valmd, &jhalt); /* ------ in ./dysta.f */ if (jhalt != 0) { *jdyss = -1; return; } } if (*jdyss >= 10) { /* save distances for S */ Memcpy(dys2, dys, (*nn * (*nn - 1) / 2 + 1)); } if (*jalg != 2) { // AGNES averl_(*nn, kwan, ner, ban, dys, *method, alpha, merge); } else { // DIANA splyt_(*nn, kwan, ner, ban, dys, merge); } // Compute agglomerative/divisive coefficient from banner: *coef = bncoef(*nn, ban); return; } /* twins */ /* ----------------------------------------------------------- */ /* AGNES agglomeration */ static void averl_(int nn, int *kwan, int *ner, double *ban, double *dys, int method, double *alpha, int *merge) { /* VARs */ double akb, smald; int j, k, l1, l2, lq, nab, lka, nlj, nclu, lnum, lput, lenda, lendb, lnext, n_1 = nn - 1, la = -1, lb = -1, llast = -1, lfyrs = -1, // <- against (unnecessary) warnings [-Wall] nmerge; /* System generated locals */ int merge_dim1 = n_1; int merge_offset = 1 + merge_dim1; /* Parameter adjustments */ merge -= merge_offset; --ban; --ner; --kwan; --alpha; /* initialization: */ /* Starting with nn clusters, kwan(j) = #{obj} in cluster j */ for (j = 1; j <= nn; ++j) { kwan[j] = 1; ner[j] = j; } /* find closest clusters */ nmerge = 1; for (nclu = n_1; nclu >= 1; --nclu) { j = 1; L80: ++j; if (kwan[j] == 0) goto L80; smald = dys[ind_2(1, j)] * 1.1f + 1.; for (k = 1; k <= n_1; ++k) if (kwan[k] > 0) { for (j = k + 1; j <= nn; ++j) if (kwan[j] > 0) { nlj = ind_2(k, j); if (smald >= dys[nlj]) { // Note: also when "==" ! smald = dys[nlj]; la = k; lb = j; } } } /* merge-structure for plotting tree in S */ l1 = -la; l2 = -lb; for (j = 1; j <= (nmerge - 1); ++j) { if (merge[j + merge_dim1] == l1 || merge[j + (merge_dim1 << 1)] == l1) l1 = j; if (merge[j + merge_dim1] == l2 || merge[j + (merge_dim1 << 1)] == l2) l2 = j; } merge[nmerge + merge_dim1] = l1; merge[nmerge + (merge_dim1 << 1)] = l2; ++nmerge; /* determine lfyrs and llast */ for (k = 1; k <= nn; ++k) { if (ner[k] == la) lfyrs = k; if (ner[k] == lb) llast = k; } ban[llast] = smald; /* if the two clusters are next to each other, ner must not be changed */ lnext = lfyrs + kwan[la]; if (lnext != llast) { /* updating ner and ban */ lput = lfyrs + kwan[la]; lnum = llast - lput; for (k = 1; k <= lnum; ++k) { lka = ner[lput]; akb = ban[lput]; lenda = llast + kwan[lb] - 2; lendb = lenda + 1; for (j = lput; j <= lenda; ++j) { ner[j] = ner[j + 1]; ban[j] = ban[j + 1]; } ner[lendb] = lka; ban[lendb] = akb; /* L220: */ } } /* We will merge A & B into A_{new} */ // Calculate new dissimilarities d(q, A_{new}) for (lq = 1; lq <= nn; ++lq) { // for each other cluster 'q' double dnew, ta, tb, tq, fa, fb, fc; if (lq == la || lq == lb || kwan[lq] == 0) continue; int naq = ind_2(la, lq); int nbq = ind_2(lb, lq); switch(method) { case 1: /* 1: group average method */ ta = (double) kwan[la]; tb = (double) kwan[lb]; fa = ta / (ta + tb); fb = tb / (ta + tb); dys[naq] = fa * dys[naq] + fb * dys[nbq]; break; case 2: /* 2: single linkage */ dnew = dys[naq]; if (dys[nbq] < dnew) dnew = dys[nbq]; dys[naq] = dnew; break; case 3: /* 3: complete linkage */ dnew = dys[naq]; if (dnew < dys[nbq]) dnew = dys[nbq]; dys[naq] = dnew; break; case 4: /* 4: ward's method */ ta = (double) kwan[la]; tb = (double) kwan[lb]; tq = (double) kwan[lq]; fa = (ta + tq) / (ta + tb + tq); fb = (tb + tq) / (ta + tb + tq); fc = -tq / (ta + tb + tq); nab = ind_2(la, lb); dys[naq] = sqrt(fa * dys[naq] * dys[naq] + fb * dys[nbq] * dys[nbq] + fc * dys[nab] * dys[nab]); break; case 5: /* 5: weighted average linkage */ dys[naq] = (dys[naq] + dys[nbq]) / 2.; break; case 6: /* 6: "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams' */ dys[naq] = alpha[1] * dys[naq] + alpha[2] * dys[nbq] + alpha[3] * dys[ind_2(la, lb)] + alpha[4] * fabs(dys[naq] - dys[nbq]); /* Lance-Williams would allow alpha(1:2) to *depend* on |cluster| * could also include the extensions of Jambu(1978) -- * See Gordon A.D. (1999) "Classification" (2nd ed.) p.78 ff */ break; default: error(_("invalid method (code %d"), method); } } kwan[la] += kwan[lb]; kwan[lb] = 0; } return; } /* averl_ */ /* ----------------------------------------------------------- */ /* cf = ac := "Agglomerative Coefficient" from AGNES banner */ /* or cf = dc := "Divisive Coefficient" from DIANA banner */ void R_bncoef(int *nn, double *ban, double *cf) { *cf = bncoef(*nn, ban); } double bncoef(int nn, double *ban) { /* VARs */ int k, kafte, kearl; double sup, syze, cf; /* Parameter adjustments */ --ban; // sup := max_k ban[k] for(sup = 0., k = 2; k <= nn; ++k) { if (sup < ban[k]) sup = ban[k]; } cf = 0.; for (k = 1; k <= nn; ++k) { kearl = (k > 1 ) ? k : 2; kafte = (k < nn) ? (k + 1) : nn; syze = fmin2(ban[kearl], ban[kafte]); cf += (1. - syze / sup); } return cf / nn; } /* bncoef */ /* ----------------------------------------------------------- */ /* DIANA "splitting" */ static void splyt_(int nn, int *kwan, int *ner, double *ban, double *dys, int *merge) { /* Local variables */ int j, ja, jb, k, l; int jma, jmb, lmm, llq, lmz, lxx, lxy, lmma, lmmb, lner, nclu; int lchan, nhalf, nmerge, n_1 = nn - 1, splyn; /* against (unnecessary) warnings [-Wall]: */ int jaway = -1, lndsd = -1; double da, db, cs, sd, dyff; /* System generated locals */ int merge_dim1 = n_1; int merge_offset = 1 + merge_dim1; /* Parameter adjustments */ merge -= merge_offset; --ban; --ner; --kwan; /* initialization */ nclu = 1; nhalf = nn * n_1 / 2 + 1; for (l = 1; l <= nn; ++l) { kwan[l] = 0; ban[l] = 0.; ner[l] = l; } kwan[1] = nn; ja = 1; /* cs := diameter of data set */ cs = 0.f; k = 0; L20: if (cs < dys[k]) cs = dys[k]; ++k; if (k < nhalf) { goto L20; } /* prepare for splitting */ //____________ Big Loop _________________________________________________ L30: jb = ja + kwan[ja] - 1; jma = jb; /* special case of a pair of objects */ if (kwan[ja] == 2) { kwan[ja] = 1; kwan[jb] = 1; ban [jb] = dys[ind_2(ner[ja], ner[jb])]; } else { /* finding first object to be shifted */ double bygsd = -1.; for (l = ja; l <= jb; ++l) { lner = ner[l]; sd = 0.; for (j = ja; j <= jb; ++j) sd += dys[ind_2(lner, ner[j])]; if (bygsd < sd) { bygsd = sd; lndsd = l; } } /* shifting the first object */ --kwan[ja]; kwan[jb] = 1; if (jb != lndsd) { lchan = ner[lndsd]; lmm = jb - 1; for (lmma = lndsd; lmma <= lmm; ++lmma) { lmmb = lmma + 1; ner[lmma] = ner[lmmb]; } ner[jb] = lchan; } splyn = 0; jma = jb - 1; /* finding the next object to be shifted */ do { splyn++; int rest = (jma - ja); double bdyff = -1.; for (l = ja; l <= jma; ++l) { lner = ner[l]; da = 0.; for (j = ja; j <= jma; ++j) da += dys[ind_2(lner, ner[j])]; da /= rest; db = 0.; for (j = jma + 1; j <= jb; ++j) db += dys[ind_2(lner, ner[j])]; db /= splyn; dyff = da - db; if (bdyff < dyff) { bdyff = dyff; jaway = l; } } /* end for(l ..) */ jmb = jma + 1; /* shifting the next object when necessary */ if (bdyff <= 0.) break; // out of "object shifting" while(.) loop if (jma != jaway) { lchan = ner[jaway]; lmz = jma - 1; for (lxx = jaway; lxx <= lmz; ++lxx) ner[lxx] = ner[lxx + 1]; ner[jma] = lchan; } for (lxx = jmb; lxx <= jb; ++lxx) { lxy = lxx - 1; if (ner[lxy] < ner[lxx]) break; lchan = ner[lxy]; ner[lxy] = ner[lxx]; ner[lxx] = lchan; } --kwan[ja]; kwan[jma] = kwan[jmb] + 1; kwan[jmb] = 0; --jma; jmb = jma + 1; } while (jma != ja); // 200: switch the two parts when necessary if (ner[ja] >= ner[jmb]) { int lxxa = ja; for (int lgrb = jmb; lgrb <= jb; ++lgrb) { ++lxxa; lchan = ner[lgrb]; int lxg = -1; for (lxy = lxxa; lxy <= lgrb; ++lxy) { int lxf = lgrb - lxy + lxxa; lxg = lxf - 1; ner[lxf] = ner[lxg]; } ner[lxg] = lchan; } llq = kwan[jmb]; kwan[jmb] = 0; jma = ja + jb - jma - 1; jmb = jma + 1; kwan[jmb] = kwan[ja]; kwan[ja] = llq; } /* 300 : compute level for banner */ if (nclu == 1) { ban[jmb] = cs; } else { ban[jmb] = min_dis(dys, ja, jb, &ner[1]); } } if (++nclu < nn) { /* continue splitting until all objects are separated */ if (jb != nn) { L420: ja += kwan[ja]; if (ja <= nn) { if (kwan[ja] <= 1) goto L420; else goto L30; } } ja = 1; if (kwan[ja] == 1) goto L420; else goto L30; } //____________ End Big Loop _________________________________________________ /* 500 : merge-structure for plotting tree in S */ for (nmerge = 1; nmerge <= n_1; ++nmerge) { int nj = -1, l1, l2; double dmin = cs; for (j = 2; j <= nn; ++j) { if (kwan[j] >= 0 && dmin >= ban[j]) { dmin = ban[j]; nj = j; } } kwan[nj] = -1; l1 = -ner[nj - 1]; l2 = -ner[nj]; for (j = 1; j <= (nmerge - 1); ++j) { if (merge[j + merge_dim1] == l1 || merge[j + (merge_dim1 << 1)] == l1) l1 = j; if (merge[j + merge_dim1] == l2 || merge[j + (merge_dim1 << 1)] == l2) l2 = j; } merge[nmerge + merge_dim1] = l1; merge[nmerge + (merge_dim1 << 1)] = l2; } return; } /* splyt_ */ /* ----------------------------------------------------------- */ /* used in splyt() above */ static double min_dis(double dys[], int ka, int kb, int ner[]) { double dm = 0.; for(int k = ka -1; k < kb -1; ++k) { int ner_k = ner[k]; for (int j = k+1; j < kb; ++j) { int k_j = ind_2(ner_k, ner[j]); if (dm < dys[k_j]) dm = dys[k_j]; } } return dm; } /* min_dis */ cluster/src/spannel.c0000644000176000001440000001005612124335266014364 0ustar ripleyusers/* Compute the SPANNing ELlipsoid * ------------------------------ for clusplot.default(*, span = TRUE) * Original spannel.f -- translated by f2c (version 20010821). * and f2c-clean,v 1.10 2002/03/28 16:37:27 maechler */ #include #include "cluster.h" #ifdef DEBUG_spannel # include #endif void spannel(int *ncas, /* = number of objects */ int *ndep, /* = number of variables */ double *dat,/* [ncas, 0:ndep] */ double *dstopt, /* = squared distances [1:ncas] */ double *cov,/* matrix [0:ndep, 0:ndep] */ double *varsum, /* [1:ndep] */ double *varss, /* [1:ndep] */ double *prob, /* [1:ncas] */ double *work, /* [0:ndep] */ double *eps, int *maxit, /* = maximal # iterations (and returns #{iter.})*/ int *ierr) { static int c__0 = 0; int it, i, j, k; double dmax, p, deter; int dat_dim1 = *ncas; int cov_dim1 = *ndep + 1; #define COV(i,j) cov[i + j * cov_dim1] #define X(i,j) dat[i + j * dat_dim1] /* X(i,j) : i = 0..(n-1), j = 0,..p often 1..p */ /* Parameter adjustments */ --varss; --varsum; /* When spannel() is called, dat[i,0] are all == 1 -- left unchanged: * Scale Data dat[i,j] to mean = 0 and var{1/n} = 1 -- for j= 1:ndep (not j=0!) */ for (j = 1; j <= *ndep; ++j) { varsum[j] = 0.; varss[j] = 0.; } for (i = 0; i < *ncas; ++i) { for (j = 1; j <= *ndep; ++j) { p = X(i,j); varsum[j] += p; varss [j] += p * p; } } for (j = 1; j <= *ndep; ++j) { double aver = varsum[j] / *ncas, scal = sqrt(varss[j] / *ncas - aver * aver); #ifdef DEBUG_spannel Rprintf("j= %d, scal = %g\n", j, scal); #endif for (i = 0; i < *ncas; ++i) X(i,j) = (X(i,j) - aver) / scal; } p = 1. / (double) (*ncas); for (i = 0; i < *ncas; ++i) prob[i] = p; *ierr = 0; p = (double) (*ndep); /* ---- Repeat { ... up to `maxit' times ] */ for(it = 0; it < *maxit; it++) { /* Cov[,] = weighted covariance of dat[,] {weights = prob[]} */ for (j = 0; j <= *ndep; ++j) for (k = 0; k <= j; ++k) COV(k,j) = 0.; for (i = 0; i < *ncas; ++i) { for (j = 0; j <= *ndep; ++j) { work[j] = X(i,j); double tempo = prob[i] * work[j]; for (k = 0; k <= j; ++k) COV(k,j) += tempo * work[k]; } } for (j = 0; j <= *ndep; ++j) for (k = 0; k <= j; ++k) COV(j,k) = COV(k,j); deter = 1.; for (i = 0; i <= *ndep; ++i) { cl_sweep(cov, ndep, &c__0, &i, &deter); if (deter <= 0.) { *ierr = 2; return; } } #ifdef DEBUG_spannel Rprintf(" it= %d; after all sweep()s : deter = %g\n", it, deter); #endif dmax = 0.; for (i = 0; i < *ncas; ++i) { double dist = -1.; for (j = 0; j <= *ndep; ++j) { /* work(j) = - sum_{k=0}^p dat(i,k) * cov(k,j) { = cov(j,k) }, * i.e., work_j = - X[i,] %*% COV[,j] */ double w_j = 0.; for (k = 0; k <= *ndep; ++k) w_j -= COV(j,k) * X(i,k); dist += w_j * X(i,j); } dstopt[i] = dist;/* Dist{opt}_i = -1 - t(X[i,]) %*% COV %*% X[i,] */ if (dmax < dist) dmax = dist; }/* for() : now dmax == max{ dstopt[i] } */ if (dmax <= p + *eps) { /* _converged_ */ *maxit = it; return; } /* else not yet converged */ for (i = 0; i < *ncas; ++i) prob[i] *= (dstopt[i] / p); } return;/* with it == *maxit and no convergence */ } /* spannel */ #undef X /* This is currently also called from R : ../tests/sweep-ex.R * ==> keep pointer args !*/ void cl_sweep(double *cov, int *nord, int *ixlo, int *nel, double *deter) { int i, j, cov_dim1 = *nord + 1; double temp = COV(*nel,*nel); *deter *= temp; if (*deter <= 0.) return; /* singular case -- signaled via *deter */ if (*nord <= 1) { COV(1,1) = 1. / temp; } else { /* nord > 1 */ for (i = *ixlo; i <= *nord; ++i) if (i != *nel) { for (j = *ixlo; j <= i; ++j) if (j != *nel) { COV(j,i) = COV(i,j) - COV(i,*nel) * COV(*nel,j) / temp; COV(i,j) = COV(j,i); } } COV(*nel,*nel) = 1.; for (i = *ixlo; i <= *nord; ++i) { COV(*nel,i) = -COV(i,*nel) / temp; COV(i,*nel) = COV(*nel,i); } } return; } /* cl_sweep */ #undef COV cluster/src/sildist.c0000644000176000001440000000326312124335266014401 0ustar ripleyusers/* Donated by Francois Romain */ #include /* fmax2() */ #include #include "cluster.h" void sildist(double *d, /* distance : in matrix or dist format; i.e., of length n^2 or n*(n-1)/2; see 'ismat' */ int *n, /* number of Subjects (attr(d,'Size')) */ int *clustering,/* clustering vector, values from {1..k} */ int *k, /* number of clusters */ double *diC, /* diC */ int *counts, /* counts[k] := #{cluster k} */ double *si, /* (a_i - b_i) / max(ai,bi) */ int *neighbor, /* neighbor */ int *ismat) /* boolean : is 'd' a matrix (1) or a dist vector (0) */ { int i,j,l, ci; Rboolean computeSi ; /* do we compute si[i] */ double ai, bi ; for(i = 0, l = 0; i < *n; i++) { ci = clustering[i] - 1; counts[ci]++; if(*ismat) l = (*n)*i + i+1; for(j = i+1; j < *n; j++, l++) { int cj = clustering[j]-1; diC[(*k)*i + cj] += d[l]; diC[(*k)*j + ci] += d[l]; } } for(i = 0; i < *n; i++) { int ki = (*k)*i; ci = clustering[i] - 1; computeSi = TRUE; for(j=0; j < *k; j++) { if(j == ci) { if(counts[j] == 1) /* only one subject in the cluster */ computeSi = FALSE; else diC[ki + j] /= (counts[j]-1); } else { diC[ki + j] /= counts[j]; } } ai = diC[ki+ci]; /* bi = min_C diC : */ if(ci == 0) { /* cluster #1 */ bi = diC[ki+1]; neighbor[i] = 2; } else { bi = diC[ki]; neighbor[i] = 1; } for(j = 1; j < *k; j++) if(j != ci) { if(bi > diC[ki + j]) { bi = diC[ki + j]; neighbor[i] = j+1; } } si[i] = (computeSi && (bi != ai)) ? (bi - ai) / fmax2(ai, bi) : 0.; } } cluster/src/pam.c0000644000176000001440000004111212124335266013476 0ustar ripleyusers/* * PAM := Partitioning Around Medoids * * original Id: pam.f,v 1.16 2003/06/03 13:40:56 maechler translated by * f2c (version 20031025) and run through f2c-clean,v 1.10 2002/03/28 */ #include #include /* for diagnostics */ #include /* for interrupting */ #include "cluster.h" #include "ind_2.h" /* carries out a clustering using the k-medoid approach. */ void cl_pam(int *nn, int *p, int *kk, double *x, double *dys, int *jdyss, /* jdyss = 0 : compute distances from x * = 1 : distances provided in x */ double *valmd, int *jtmd, int *ndyst, int *nsend, int/*logical*/ *nrepr, int *nelem, double *radus, double *damer, double *avsyl, double *separ, double *ttsyl, double *obj, int *med, int *ncluv, double *clusinf, double *sylinf, int *nisol, int* pamonce) { int clusinf_dim1 = *kk; /* Local variables */ Rboolean all_stats = (obj[0] == 0.),/* if false, only return 'ncluv[]' */ med_given = (med[0] != 0),/* if true, med[] contain initial medoids */ do_swap = (nisol[0] != 0); int k, i, nhalf, jhalt, trace_lev = (int) obj[1]; double s; /* Function Body */ nhalf = *nn * (*nn - 1) / 2 + 1; /* nhalf := #{distances}+1 = length(dys) */ if (*jdyss != 1) { jhalt = 0; if(trace_lev) Rprintf("C pam(): computing %d dissimilarities: ", nhalf); F77_CALL(dysta)(nn, p, x, dys, ndyst, jtmd, valmd, &jhalt); if(trace_lev) Rprintf("[Ok]\n"); if (jhalt != 0) { *jdyss = -1; return; } } /* s := max( dys[.] ), the largest distance */ for (i = 1, s = 0.; i < nhalf; ++i) /* dys[0] == 0. not used here */ if (s < dys[i]) s = dys[i]; /* FIXME: work with med[] = (i_1, i_2, ..., i_k) * ----- instead nrepr[] = (b_1, ... b_n) b_i in {0,1} */ for (i = 0; i < *nn; ++i) nrepr[i] = 0; if(med_given) { /* if true, med[] contain initial medoids */ /* for the moment, translate these to nrepr[] 0/1 : * not assuming that the med[] indices are sorted */ for (k = 0; k < *kk; k++) nrepr[med[k] - 1] = 1; } /* Build + Swap [but no build if(med_given); swap only if(do_swap) : */ bswap(*kk, *nn, nrepr, med_given, do_swap, trace_lev, radus, damer, avsyl, dys, s, obj, pamonce); if(trace_lev) Rprintf("end{bswap()}, "); /* Compute Clustering & STATs if(all_stats): */ cstat(kk, nn, nsend, nrepr, all_stats, radus, damer, avsyl, separ, &s, dys, ncluv, nelem, med, nisol); if(trace_lev) Rprintf("end{cstat()}\n"); if(all_stats) { for (k = 0; k < *kk; ++k) { clusinf[k]= (double) nrepr[k]; clusinf[k + clusinf_dim1] = radus[k]; clusinf[k + (clusinf_dim1 << 1)] = avsyl [k]; clusinf[k + clusinf_dim1 * 3] = damer[k]; clusinf[k + (clusinf_dim1 << 2)] = separ[k]; } if (1 < *kk && *kk < *nn) { /* Compute Silhouette info : */ dark(*kk, *nn, ncluv, nsend, nelem, nrepr, radus, damer, avsyl, ttsyl, dys, &s, sylinf); } } } /* pam */ /* ----------------------------------------------------------- bswap(): the clustering algorithm in 2 parts: I. build, II. swap */ void bswap(int kk, int n, int *nrepr, Rboolean med_given, Rboolean do_swap, int trace_lev, /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */ double *dysma, double *dysmb, double *beter, double *dys, double s, double *obj, int *pamonce) { int i, j, ij, k,h, dig_n; double sky; /* Parameter adjustments */ --nrepr; --beter; --dysma; --dysmb; if(trace_lev) Rprintf("pam()'s bswap(*, s=%g, pamonce=%d): ", s, *pamonce); s = s * 1.1 + 1.;// larger than all dys[] (but DBL_MAX is too large) /* IDEA: when n is large compared to k (= kk), * ---- rather use a "sparse" representation: * instead of boolean vector nrepr[] , use ind_repr <- which(nrepr) !! */ for (i = 1; i <= n; ++i) dysma[i] = s; if(med_given) { if(trace_lev) Rprintf("medoids given\n"); /* compute dysma[] : dysma[j] = D(j, nearest_representative) */ for (i = 1; i <= n; ++i) { if (nrepr[i] == 1) for (j = 1; j <= n; ++j) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } } else { /* ====== first algorithm: BUILD. ====== */ if(trace_lev) Rprintf("build %d medoids:\n", kk); /* find kk representatives aka medoids : */ for (k = 1; k <= kk; ++k) { R_CheckUserInterrupt(); /* compute beter[i] for all non-representatives: * also find ammax := max_{..} and nmax := argmax_i{beter[i]} ... */ int nmax = -1; /* -Wall */ double ammax, cmd; ammax = 0.; for (i = 1; i <= n; ++i) { if (nrepr[i] == 0) { beter[i] = 0.; for (j = 1; j <= n; ++j) { cmd = dysma[j] - dys[ind_2(i, j)]; if (cmd > 0.) beter[i] += cmd; } if (ammax <= beter[i]) { /* does < (instead of <= ) work too? -- NO! */ ammax = beter[i]; nmax = i; } } } nrepr[nmax] = 1;/* = .true. : found new representative */ if (trace_lev >= 2) Rprintf(" new repr. %d\n", nmax); /* update dysma[] : dysma[j] = D(j, nearest_representative) */ for (j = 1; j <= n; ++j) { ij = ind_2(nmax, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } /* output of the above loop: nrepr[], dysma[], ... */ } if(trace_lev) /* >= 2 (?) */ { dig_n = 1+floor(log10(n)); Rprintf(" after build: medoids are"); for (i = 1; i <= n; ++i) if(nrepr[i] == 1) Rprintf(" %*d", dig_n, i); if(trace_lev >= 3) { Rprintf("\n and min.dist dysma[1:n] are\n"); for (i = 1; i <= n; ++i) { Rprintf(" %6.3g", dysma[i]); if(i % 10 == 0) Rprintf("\n"); } if(n % 10 != 0) Rprintf("\n"); } else Rprintf("\n"); } else dig_n = 1;// -Wall sky = 0.; for (j = 1; j <= n; ++j) sky += dysma[j]; obj[0] = sky / n; if (do_swap && (kk > 1 || med_given)) { double dzsky; int hbest = -1, nbest = -1, kbest= -1; // -Wall int *medoids, *clustmembership; double *fvect; if(*pamonce) { // add one to use R indices medoids = (int*) R_alloc(kk+1, sizeof(int)); clustmembership = (int*) R_alloc(n+1, sizeof(int)); fvect = (double*) R_alloc(n+1, sizeof(double)); for (int k = 1, i = 1; i <= n; ++i) { if (nrepr[i]) { medoids[k] = i; k++; } } } else { // -Wall : clustmembership = medoids = (int*) NULL; fvect = (double*) NULL; } /* ====== second algorithm: SWAP. ====== */ /* Hmm: In the following, we RE-compute dysma[]; * don't need it first time; then only need *update* after swap */ /*-- Loop : */ L60: if(*pamonce == 0) { // original algorihtm for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, ) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for (i = 1; i <= n; ++i) { if (nrepr[i]) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) { dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } } else { // *pamonce == 1 or == 2 : for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, ) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for(k = 1; k <= kk; k++) { i = medoids[k]; ij = ind_2(i, j); if (dysma[j] > dys[ij]) { //store cluster membership clustmembership[j] = i; dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */ if(*pamonce == 0) { // original algorihtm for (h = 1; h <= n; ++h) if (!nrepr[h]) { R_CheckUserInterrupt(); for (i = 1; i <= n; ++i) if (nrepr[i]) { double dz = 0.; /* dz := T_{ih} := sum_j C_{jih} [p.104] : */ for (j = 1; j <= n; ++j) { /* if (!nrepr[j]) { */ int hj = ind_2(h, j); ij = ind_2(i, j); if (dys[ij] == dysma[j]) { double small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j]; dz += (- dysma[j] + small); } else if (dys[hj] < dysma[j]) /* 1c. */ dz += (- dysma[j] + dys[hj]); } if (dzsky > dz) { dzsky = dz; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; } } } } else { // *pamonce == 1 or == 2 : for(k = 1; k <= kk; k++) { R_CheckUserInterrupt(); i=medoids[k]; double removeCost = 0.; //Compute cost for removing the medoid for (j = 1; j <= n; ++j) { if(clustmembership[j] == i) { removeCost+=(dysmb[j]-dysma[j]); fvect[j]=dysmb[j]; } else{ fvect[j]=dysma[j]; } } if (*pamonce == 1) { // Now check possible new medoids h for (h = 1; h <= n; ++h) if (!nrepr[h]) { double addGain = removeCost; // Compute gain of adding h as a medoid: for (j = 1; j <= n; ++j) { int hj = ind_2(h, j); if(dys[hj] < fvect[j]) addGain += (dys[hj]-fvect[j]); } if (dzsky > addGain) { dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; kbest = k; } } } else { // *pamonce == 2 : // Now check possible new medoids h for (h = 1; h <= n; ++h) if (!nrepr[h]) { double addGain = removeCost - fvect[h]; // - fvect[h] since dys[h,h]=0; // Compute gain of adding h as a medoid: int ijbase = (h-2)*(h-1)/2; for (j = 1; j < h; ++j) { int hj = ijbase+j; if(dys[hj] < fvect[j]) addGain += (dys[hj]-fvect[j]); } ijbase += h;// = (h-2)*(h-1)/2 + h for (j = h+1; j <= n; ++j) { ijbase += j-2; if(dys[ijbase] < fvect[j]) addGain += (dys[ijbase]-fvect[j]); } if (dzsky > addGain) { dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; kbest = k; } } } } } if (dzsky < - 16*DBL_EPSILON * fabs(sky)) { // basically " < 0 ", // but ' < 0 ' gave infinite loop, swapping the identical objects // found an improving swap if(trace_lev >= 2) Rprintf( " swp new %*d <-> %*d old; decreasing diss. %7g by %g\n", dig_n, hbest, dig_n, nbest, sky, dzsky); nrepr[hbest] = 1; nrepr[nbest] = 0; if(*pamonce) medoids[kbest]=hbest; sky += dzsky; goto L60; } } obj[1] = sky / n; } /* bswap */ /* ----------------------------------------------------------- cstat(): Compute STATistics (numerical output) concerning each partition */ void cstat(int *kk, int *nn, int *nsend, int *nrepr, Rboolean all_stats, double *radus, double *damer, double *avsyl, double *separ, double *s, double *dys, int *ncluv, int *nelem, int *med, int *nisol) { int j, k, ja, jk, nplac, ksmal = -1/* -Wall */; double ss = *s * 1.1 + 1.; /* Parameter adjustments */ --nisol; --med; --nelem; --ncluv; --separ; --avsyl; --damer; --radus; --nrepr; --nsend; /* nsend[j] := i, where x[i,] is the medoid to which x[j,] belongs */ for (j = 1; j <= *nn; ++j) { if (nrepr[j] == 0) { double dsmal = ss; for (k = 1; k <= *nn; ++k) { if (nrepr[k] == 1) { int kj_ = ind_2(k, j); if (dsmal > dys[kj_]) { dsmal = dys[kj_]; ksmal = k; } } } nsend[j] = ksmal; } else { nsend[j] = j; } } /* ncluv[j] := k , the cluster number (k = 1..*kk) */ jk = 1; nplac = nsend[1]; for (j = 1; j <= *nn; ++j) { ncluv[j] = 0; if (nsend[j] == nplac) ncluv[j] = 1; } for (ja = 2; ja <= *nn; ++ja) { nplac = nsend[ja]; if (ncluv[nplac] == 0) { ++jk; for (j = 2; j <= *nn; ++j) { if (nsend[j] == nplac) ncluv[j] = jk; } if (jk == *kk) break; } } if(all_stats) { /* analysis of the clustering. */ int numl; for (k = 1; k <= *kk; ++k) { int ntt = 0, m = -1/* -Wall */; double ttt = 0.; radus[k] = -1.; R_CheckUserInterrupt(); for (j = 1; j <= *nn; ++j) { if (ncluv[j] == k) { double djm; ++ntt; m = nsend[j]; nelem[ntt] = j; djm = dys[ind_2(j, m)]; ttt += djm; if (radus[k] < djm) radus[k] = djm; } } if(ntt == 0) REprintf("bug in C cstat(): ntt=0 !!!\n"); avsyl[k] = ttt / ntt; med[k] = m; } if (*kk == 1) { damer[1] = *s; nrepr[1] = *nn; return; } /* ELSE kk > 1 : */ /* numl = number of L-clusters. */ numl = 0; for (k = 1; k <= *kk; ++k) { /* identification of cluster k: nelem= vector of object indices, nel = number of objects */ int nel = 0; R_CheckUserInterrupt(); for (j = 1; j <= *nn; ++j) { if (ncluv[j] == k) { ++nel; nelem[nel] = j; } } nrepr[k] = nel; if (nel == 1) { int nvn = nelem[1]; damer[k] = 0.; separ[k] = ss; for (j = 1; j <= *nn; ++j) { if (j != nvn) { int mevj = ind_2(nvn, j); if (separ[k] > dys[mevj]) separ[k] = dys[mevj]; } } /* Is cluster k 1) an L-cluster or 2) an L*-cluster ? */ if (separ[k] == 0.) ++numl; } else { /* nel != 1 : */ double dam = -1., sep = ss; Rboolean kand = TRUE; for (ja = 1; ja <= nel; ++ja) { int jb, nvna = nelem[ja]; double aja = -1., ajb = ss; for (jb = 1; jb <= *nn; ++jb) { int jndz = ind_2(nvna, jb); if (ncluv[jb] == k) { if (aja < dys[jndz]) aja = dys[jndz]; } else { if (ajb > dys[jndz]) ajb = dys[jndz]; } } if (kand && aja >= ajb) kand = FALSE; if (dam < aja) dam = aja; if (sep > ajb) sep = ajb; } separ[k] = sep; damer[k] = dam; if (kand) { ++numl; if (dam >= sep) /* L-cluster */ nisol[k] = 1; else/* L*-cluster */ nisol[k] = 2; continue /* k */; } } /* nel = 1 or (!kand) : */ nisol[k] = 0; }/* for(k) */ } /* all_stats */ } /* cstat */ /* ----------------------------------------------------------- Compute Silhouette Information : */ void dark(int kk, int nn, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *dys, double *s, double *sylinf) { int k, nsylr; /* pointers to sylinf[] columns -- sylinf[nn, 4] : */ double *sylinf_2, *sylinf_3, *sylinf_4; sylinf_2 = sylinf + nn; sylinf_3 = sylinf_2 + nn; sylinf_4 = sylinf_3 + nn; /* Parameter adjustments */ --avsyl; --ncluv; nsylr = 0; *ttsyl = 0.; for (k = 1; k <= kk; ++k) { /* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */ int j,l, ntt = 0; for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { nelem[ntt] = j; ++ntt; } } for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */ int k_, nj = nelem[j]; double dysb = *s * 1.1 + 1.; negbr[j] = -1; /* for all clusters k_ != k : */ for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) { double db = 0.; int nbb = 0; for (l = 1; l <= nn; ++l) if (ncluv[l] == k_) { ++nbb; if (l != nj) db += dys[ind_2(nj, l)]; } db /= nbb; /* now db(k_) := mean( d[j, l]; l in C_{k_} ) */ if (dysb > db) { dysb = db; negbr[j] = k_; } }/* negbr[j] := arg max_{k_} db(k_) */ if (ntt > 1) { double dysa = 0.; for (l = 0; l < ntt; ++l) { int nl = nelem[l]; if (nj != nl) dysa += dys[ind_2(nj, nl)]; } dysa /= ntt - 1; if (dysa > 0.) { if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* dysb == dysa: */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } else /* dysa == 0 */ if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; } else { /* ntt == 1: */ syl[j] = 0.; } } /* for( j ) */ avsyl[k] = 0.; if (ntt == 0) /* this can happen when medoids are user-specified !*/ continue; /* next k */ for (j = 0; j < ntt; ++j) { int lang=-1 /*Wall*/; double symax = -2.; for (l = 0; l < ntt; ++l) { if (symax < syl[l]) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = symax; /* = syl[lang] */ avsyl[k] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[k]; avsyl[k] /= ntt; if (ntt == 1) { sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[0]; sylinf_3[nsylr] = 0.; sylinf_4[nsylr] = (double) nelem[0]; ++nsylr; } else { for (j = 0; j < ntt; ++j) { int lplac = nsend[j]; sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[lplac]; sylinf_3[nsylr] = srank[j]; sylinf_4[nsylr] = (double) nelem[lplac]; ++nsylr; } } } /* for (k) */ *ttsyl /= nn; } /* dark */ cluster/src/mona.f0000644000176000001440000001345412124335266013666 0ustar ripleyusers subroutine clmona(nn,pp, x,jerr, nban,ner,kwan,lava, jlack) cc cc MONothetic Analysis cc cc Program for divisive hierarchical clustering of binary data, cc using association analysis. cc cc list of functions and subroutines: cc function kab c Args integer nn, pp, jerr c nn = number of objects c pp = number of variables c jerr : error return code in {1,2,3,4} integer x(nn,pp), jlack(pp), nban(nn),ner(nn),kwan(nn),lava(nn) c x[i,j]: binary (0/1/NA) data (obs. i, var.j) c where "NA", missing values, are all values > 1 c Function called: integer kab c VARs logical syn integer j, ja, jb, jnat, jma, jtel, jtelz, jtel2, jres integer j0,j1, jptwe integer a, b, c, d, k, ka, kb, kal, kalf, km integer l, lbb, laa, lcc, ldd, lee, lack,lama, lams integer nel, nelbb, nzf, nhalf, npass, nclu, myst, mysca c--begin{-Wall} a=0 b=0 c=0 d=0 jma=0 jtel=0 jtelz=0 lcc=0 nelbb=0 c-- end{-Wall} nhalf=(nn+1)/2 jptwe=(pp+4)/5 myst=0 do 70 l=1,nn mysca=0 do 60 j=1,pp if(x(l,j) .gt. 1) mysca=mysca+1 60 continue if(mysca .eq. pp) then c all variables missing for this object jerr=1 return endif myst=myst+mysca 70 continue if(myst.eq.0)go to 290 lack=0 do 100 j=1,pp j0=0 j1=0 do 80 l=1,nn if(x(l,j).eq.0) j0=j0+1 if(x(l,j).eq.1) j1=j1+1 80 continue jlack(j)=nn-j0-j1 if(jlack(j).ne.0) lack=lack+1 if(jlack(j).ge.nhalf) then c at least 50% of the objects have missing values for this variable jerr=2 return endif if(j0.eq.0 .or. j1.eq.0) then c all non missing values are identical for this variable jerr=3 return endif 100 continue if(lack .eq. pp) then c all variables have missing values jerr=4 return endif cc cc filling in missing values cc do 260 j=1,pp if(jlack(j) .ne. 0) then lama=-1 syn=.true. do 240 ja=1,pp if(jlack(ja) .eq. 0) then c no missing in x[, ja] a=0 b=0 c=0 d=0 do 230 k=1,nn if(x(k,j).eq.1)go to 220 if(x(k,ja).eq.0)a=a+1 if(x(k,ja).eq.1)b=b+1 go to 230 220 if(x(k,ja).eq.0)c=c+1 if(x(k,ja).eq.1)d=d+1 230 continue kal=a*d - b*c kalf=kab(kal) if(kalf.ge.lama)then lama=kalf jma=ja if(kal.lt.0) syn=.false. endif endif 240 continue do 250 l=1,nn if(x(l,j).gt. 1) then c missing if(syn) then x(l,j)=x(l,jma) else if(x(l,jma).eq.1) x(l,j)=0 if(x(l,jma).eq.0) x(l,j)=1 endif endif 250 continue endif 260 continue c--- end of treating missing values ---- cc cc initialization cc 290 do 300 k=1,nn kwan(k)=0 ner(k)=k lava(k)=0 300 continue npass=1 kwan(1)=nn cc cc algorithm cc nclu=1 ka=1 C --- Loop --- 310 kb=ka+kwan(ka)-1 lama=-1 jnat=pp do 370 j=1,pp if(nclu.eq.1)go to 330 j0=0 j1=0 do 325 l=ka,kb nel=ner(l) if(x(nel,j).eq.0) j0=j0+1 if(x(nel,j).eq.1) j1=j1+1 325 continue if(j1.eq.0)go to 370 if(j0.eq.0)go to 370 330 jnat=jnat-1 lams=0 do 360 jb=1,pp if(jb .ne. j) then a=0 b=0 c=0 d=0 do 350 l=ka,kb nel=ner(l) if(x(nel,j).eq. 0) then if(x(nel,jb).eq.0) a=a+1 if(x(nel,jb).eq.1) b=b+1 else if(x(nel,jb).eq.0) c=c+1 if(x(nel,jb).eq.1) d=d+1 endif 350 continue lams=lams+kab(a*d - b*c) endif 360 continue if(lama .lt. lams) then lama=lams jtel =c+d jtelz=a+b jma=j endif 370 continue if(jnat.lt.pp)go to 375 kwan(ka)=-kwan(ka) go to 400 cc cc splitting cc 375 nel=ner(ka) if(x(nel,jma).eq.1)then nzf=0 jtel2=jtel else nzf=1 jtel2=jtelz endif jres=kb-ka+1-jtel2 km=ka+jtel2 l=ka c -- inner loop -- 378 nel=ner(l) if(x(nel,jma).eq.nzf)go to 380 l=l+1 if(l.lt.km)go to 378 go to 390 380 do 381 lbb=l,kb nelbb=ner(lbb) if(x(nelbb,jma) .ne. nzf) then lcc=lbb-1 go to 382 endif 381 continue 382 do 383 laa=l,lcc ldd=lcc+l-laa lee=ldd+1 ner(lee)=ner(ldd) 383 continue ner(l)=nelbb go to 378 390 nclu=nclu+1 nban(km)=npass kwan(ka)=jtel2 kwan(km)=jres lava(km)=jma ka=ka+kwan(ka) 400 if(kb.eq.nn)go to 500 410 ka=ka+kab(kwan(ka)) if(ka.gt.nn)go to 500 if(kwan(ka).lt.2)go to 410 go to 310 500 npass=npass+1 do 510 ka=1,nn if(kwan(ka).ge.2) go to 310 510 continue end cc cc kab(j) = |j| cc integer function kab(j) integer j kab=j if(j.lt.0) kab=-j return end cluster/src/init.c0000644000176000001440000000545712124335266013700 0ustar ripleyusers#include #include #include "cluster.h" #include #define CDEF(name) {#name, (DL_FUNC) &name, sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t} static R_NativePrimitiveArgType R_bncoef_t[3] = { INTSXP, REALSXP, REALSXP }; static R_NativePrimitiveArgType cl_clara_t[33] = { /*n:*/ INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP, REALSXP, INTSXP, /*valmd:*/ REALSXP, INTSXP, INTSXP, /* rng_R: */ LGLSXP, /* pam_like:*/ LGLSXP, /*nrepr: */ INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, /*radus:*/ REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, /*obj: */ REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, /*tmp: */ REALSXP,INTSXP }; static R_NativePrimitiveArgType cl_fanny_t[27] = { INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, /*jdyss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, /*negbr: */ INTSXP, /*syl: */ REALSXP, REALSXP, REALSXP, REALSXP, /*nfuzz: */ INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, /*obj: */ REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, INTSXP }; static R_NativePrimitiveArgType cl_pam_t[24] = { INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, /*jdyss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, /*nrepr: */ LGLSXP, INTSXP, /*radus: */ REALSXP, REALSXP, REALSXP, REALSXP, /*ttsyl: */ REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, /*optim: */ INTSXP }; static R_NativePrimitiveArgType spannel_t[12] = { // ./spannel.c : INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, /*varss: */ REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP }; static R_NativePrimitiveArgType sildist_t[] = { REALSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, /* si: */ REALSXP, INTSXP, LGLSXP }; static R_NativePrimitiveArgType twins_t[17] = { INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, /* jdiss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, /* kwan: */ INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, INTSXP }; /* is only .C()-called from ../tests/sweep-ex.R : */ static R_NativePrimitiveArgType cl_sweep_t[5] = { REALSXP, INTSXP, INTSXP, INTSXP, REALSXP }; static const R_CMethodDef CEntries[] = { CDEF(R_bncoef), CDEF(cl_clara), {"dysta3", (DL_FUNC) &dysta3, 8},/* ./fanny.c */ CDEF(cl_fanny), CDEF(cl_pam), CDEF(spannel), CDEF(cl_sweep), CDEF(sildist), CDEF(twins), {NULL, NULL, 0} }; /* static R_CallMethodDef CallEntries[] = { * {NULL, NULL, 0} * }; */ static R_FortranMethodDef FortEntries[] = { {"cl_daisy", (DL_FUNC) &F77_SUB(cldaisy), 11}, {"cl_mona", (DL_FUNC) &F77_SUB(clmona), 9}, {"dysta", (DL_FUNC) &F77_SUB(dysta), 8}, {NULL, NULL, 0} }; void R_init_cluster(DllInfo *dll) { R_registerRoutines(dll, CEntries, NULL/*CallEntries*/, FortEntries, NULL); R_useDynamicSymbols(dll, FALSE); } cluster/src/ind_2.h0000644000176000001440000000205212124335266013721 0ustar ripleyusers/* inlined, to be included in pam.c and clara.c */ static #ifdef __GNUC__ __inline__ #endif int ind_2(int l, int j) { /* Utility, originally FORTRAN, called "meet"; called from CLARA, PAM & TWINS. * Original code had MEET(), MEET2(), and MEET3() in the 3 source files. * ind_2(l,j) returns the *index* of dys() where diss. d(l,j) is stored: * d(l,j) == dys[ind_2(l,j)] * * MM: changed to work with 0-origin matrices dys[], but l,j are >= 1 */ #ifdef was_orig if(l > j) return (l-2)*(l-1)/2 + j; else if(l == j) return 0;/* and the first element, dys[0] is := 0. permanently! */ else /* l < j */ return (j-2)*(j-1)/2 + l; #else /* from Li Long -- optimizes particularly well on Itanium2 */ /* max_m check by MMächler: max_m is the largest integer m * ----- such that (m-2)*(m-1) < MAX_INT : */ #define max_m 46342 int result = 0; if (l != j) { int m = l>j ? l : j; int n = l>j ? j : l; result = (m <= max_m) ? (m-2)*(m-1)/2 + n : (int) (((double) m-2)*(m-1)/2 + n); } return result; #endif } cluster/src/fanny.c0000644000176000001440000003076412124335266014047 0ustar ripleyusers/* FANNY : program for Fuzzy cluster ANalysis */ /* was $Id: fanny.c 5592 2010-06-19 16:37:03Z maechler $ * fanny.f -- translated by f2c (version 20020621). * and treated by f2c-clean v 1.10, and manually by Martin Maechler */ #include #include /* for diagnostics */ #include "cluster.h" /* dysta3() is in cluster.h ! */ static void fuzzy(int nn, int k, double *p, double *dp, double *pt, double *dss, double *esp, double *ef, double *obj, double r, double tol, int *nit, int trace_lev); static void caddy(int nn, int k, double *p, int *ktrue, int *nfuzz, int *ncluv, double *rdraw, int trace_lev); static void fygur(int kk, int nn, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *dss, double *s, double *sylinf); void cl_fanny(int *nn, /* = number of objects */ int *jpp, /* = number of variables for clustering */ int *kk, /* = number of clusters */ double *x, double *dss, int *jdyss, double *valmd, int *jtmd, int *ndyst, int *nsend, int *nelem, int *negbr, double *syl, double *p, double *dp, double *pt, int *nfuzz, double *esp, double *ef, double *dvec, double *ttsyl, double *obj, /* input/output; see fuzzy() below */ int *ncluv, double *sylinf, double *r, double *tol, int *maxit) { int ktrue, trace_lev = (int) obj[1]; Rboolean all_stats = (obj[0] == 0.);/* TODO: consider *not* doing caddy() */ if (*jdyss != 1) { /* compute dissimilarities from data */ int jhalt = 0; dysta3(nn, jpp, x, dss, ndyst, jtmd, valmd, &jhalt); if (jhalt) { *jdyss = -1; return; } } fuzzy(*nn, *kk, p, dp, pt, dss, esp, ef, obj, *r, *tol, maxit, trace_lev); caddy(*nn, *kk, p, /* -> */ &ktrue, nfuzz, ncluv, pt, trace_lev); obj[0] = (double) ktrue; /* Compute "silhouette": */ if (all_stats && 2 <= ktrue && ktrue < *nn) { int i, nhalf = *nn * (*nn - 1) / 2; double s = 0.; /* s := max( dss[i,j] ) */ for(i = 0; i < nhalf; i++) if (s < dss[i]) s = dss[i]; fygur(ktrue, *nn, ncluv, nsend, nelem, negbr, syl, dvec, pt, ttsyl, dss, &s, sylinf); } return; } /* cl_fanny */ void dysta3(int *nn, int *p, double *x, double *dys, int *ndyst, int *jtmd, double *valmd, int *jhalt) { int k, l, nlk, x_d = *nn; nlk = 0; for (l = 0; l < (*nn - 1); ++l) { for (k = l + 1; k < *nn; ++k, ++nlk) { double clk = 0.; int j, jj, npres = 0; for (j = 0, jj = 0; j < *p; j++, jj+=x_d) { double d; if (jtmd[j] < 0) { if (x[l + jj] == valmd[j] || x[k + jj] == valmd[j]) continue; /* next j */ } ++npres; d = x[l + jj] - x[k + jj]; if (*ndyst != 2) /* 1 or 3 */ clk += d * d; else /* if (*ndyst == 2) */ clk += fabs(d); } if (npres == 0) { dys[nlk] = -1.; *jhalt = 1; } else { clk *= (*p) / (double) npres; dys[nlk] = (*ndyst == 1) ? sqrt(clk) : /*ndyst = 2 or 3 */ clk; } } } } /* dysta3 */ static void fuzzy(int nn, int k, double *p, double *dp, double *pt, double *dss, double *esp, double *ef, double *obj,/* of length 4; * in : (cluster_only, trace_lev, compute_p, 0) * out: (ktrue , cryt, PC ("dunn"), normalized_PC) */ double r, /* the exponent, > 1. -- was fixed to 2 originally */ double tol,/* the precision for the iterations */ int *nit, /* the maximal number of iterations -- originally fixed to 500 */ int trace_lev) { double dt, xx, ddd, crt, reen, cryt; int p_d = nn, dp_d = nn; int i, j, m, mi, it; Rboolean converged = FALSE, compute_p = (int)obj[2]; if(trace_lev) Rprintf("fanny()'s fuzzy(n = %d, k = %d):\n", nn, k); if(compute_p) { /* Compute initial fuzzy clustering, i.e. membership matrix p[,] */ int nd, ndk; double p0 = 0.1 / (k - 1); for (m = 0; m < nn; ++m) for (j = 0; j < k; ++j) p[m + j * p_d] = p0; ndk = nn / k; nd = ndk; j = 0; for (m = 0; m < nn; ++m) { int jj; p[m + j * p_d] = 0.9; if (m+1 >= nd) { ++j; if (j+1 == k) /* reset */ nd = nn; else nd += ndk; } for (jj = 0; jj < k; ++jj) p[m + jj * p_d] = pow(p[m + jj * p_d], r); } } else { /* p[,] already contains memberships */ for (m = 0; m < nn; ++m) for (j = 0; j < k; ++j) p[m + j * p_d] = pow(p[m + j * p_d], r); } /* initial criterion value */ cryt = 0.; for (j = 0; j < k; ++j) { esp[j] = 0.; ef[j] = 0.; for (m = 0; m < nn; ++m) { esp[j] += p[m + j * p_d]; for (i = 0; i < nn; ++i) { if (i != m) { mi = imin2(m,i); mi = mi * nn - (mi + 1) * (mi + 2) / 2 + imax2(m,i); dp[m + j * dp_d] += p[i + j * p_d] * dss[mi]; ef[j] += p[i + j * p_d] * p[m + j * p_d] * dss[mi]; } } } cryt += ef[j] / (esp[j] * 2.); } crt = cryt; if(trace_lev) { Rprintf("fuzzy(): initial obj = %g\n", cryt); if(trace_lev >= 2) { Rprintf(" ef[]= ("); for(j=0; j < k; j++) Rprintf(" %g%s", ef[j], ((j < k-1)? "," : ")\n")); Rprintf(" esp[]= ("); for(j=0; j < k; j++) Rprintf(" %g%s",esp[j], ((j < k-1)? "," : ")\n")); } } reen = 1. / (r - 1.); it = 0; while(++it <= *nit) { /* . . . . . iterations . . . . . . . . . . . . . */ for(m = 0; m < nn; m++) { /* the new membership coefficients of the objects are calculated, and the resulting value of the criterion is computed. */ dt = 0.; for (j = 0; j < k; ++j) { pt[j] = pow(esp[j] / (dp[m + j * dp_d] - ef[j] / (2 * esp[j])), reen); dt += pt[j]; } xx = 0.; for (j = 0; j < k; ++j) { pt[j] /= dt; if (pt[j] < 0.) xx += pt[j]; } /* now: sum_j (pt[j]) == 1; xx := sum_{pt[j] < 0} pt[j] */ for (j = 0; j < k; ++j) { double d_mj; pt[j] = (pt[j] > 0.) ? pow(pt[j] / (1 - xx), r) : 0.; d_mj = pt[j] - p[m + j * p_d]; esp[j] += d_mj; for (i = 0; i < nn; ++i) { if (i != m) { mi = imin2(m,i); mi = mi * nn - (mi + 1) * (mi + 2) / 2 + imax2(m,i); ddd = d_mj * dss[mi]; dp[i + j * dp_d] += ddd; ef[j] += p[i + j * p_d] * 2. * ddd; } } p[m + j * p_d] = pt[j]; } if(trace_lev >= 3) { Rprintf(" pt[m= %d, *]: ",m); for (j = 0; j < k; ++j) Rprintf(" %g%s", pt[j], ((j < k-1)? "," : "\n")); } } /* m == nn */ cryt = 0.; for (j = 0; j < k; ++j) cryt += ef[j] / (esp[j] * 2.); if(trace_lev >= 2) Rprintf(" m == n: obj = %#20.14g", cryt); /* Convergence check */ if((converged = (fabs(cryt - crt) <= tol * cryt))) break; if(trace_lev >= 2) Rprintf(" not converged: it = %d\n", it); crt = cryt; } /* while */ *nit = (converged)? it : -1; if(trace_lev) { Rprintf("%s%sonverged after %d iterations, obj = %#20.*g\n", trace_lev >=2 ? "\n" : "", (converged) ? "C" : "NOT c", it, (int)((trace_lev >= 2)? 20 : 7), cryt); } /* obj[0] = (double) it; << no longer; return it via *nit ! */ obj[1] = cryt; /* PC (partition coefficient), "non-fuzzyness index" of libert is computed * C = 1/n sum_{i,j} u_{i,j} ^ r fulfills * 1 >= C >= sum_j (1/k)^r = k * k^-r = k^(1-r) * ==> normalization (C - k^(1-r)) / (1 - k^(1-r)) = (k^(r-1) * C - 1) / (k^(r-1) - 1) */ for (j = 0, crt = 0.; j < k; ++j) crt += esp[j]; crt /= nn; obj[2] = crt; /* the PC */ xx = pow((double)k, r - 1.); obj[3] = (xx * crt - 1.) / (xx - 1.); /* Note however, that for r != 2, MM rather prefers to use * the "original definition" C = 1/n sum_{i,j} u_{i,j} ^ 2, and its normalization */ /* p[m,j] := (u_{m,j} ^ r) ^{1/r} == u_{m,j} : */ xx = 1. / r; for (m = 0; m < nn; ++m) for (j = 0; j < k; ++j) p[m + j * p_d] = pow(p[m + j * p_d], xx); } /* fuzzy */ static void caddy(int nn, int k, double *p, int *ktrue, int *nfuzz, int *ncluv, double *rdraw, int trace_lev) { Rboolean stay; int i, m, ktry, nbest; double pbest; if(trace_lev) Rprintf("fanny()'s caddy(*, k = %d):\n", k); pbest = p[0]; nbest = 1; for (i = 1; i < k; ++i) { if (pbest < p[i * nn]) { pbest = p[i * nn]; nbest = i+1; } } nfuzz[0] = nbest; ncluv[0] = 1; *ktrue = 1; for (m = 1; m < nn; ++m) { pbest = p[m]; nbest = 1; for (i = 1; i < k; ++i) { if (pbest < p[m + i * nn]) { pbest = p[m + i * nn]; nbest = i+1; } } stay = FALSE; for (ktry = 0; ktry < *ktrue; ++ktry) { if (nfuzz[ktry] == nbest) { stay = TRUE; ncluv[m] = ktry+1; break; } } if (! stay) { nfuzz[*ktrue] = nbest; (*ktrue)++; ncluv[m] = *ktrue; } } if(trace_lev) Rprintf(" -> k_true (crisp) = %d", *ktrue); if (*ktrue < k) { int kwalk, kleft; if(trace_lev) Rprintf(" < k (= %d) !!\n", k); for (kwalk = *ktrue; kwalk < k; ++kwalk) { for (kleft = 1; kleft <= k; ++kleft) { stay = FALSE; for (ktry = 0; ktry < kwalk; ++ktry) { if (nfuzz[ktry] == kleft) { stay = TRUE; break; } } if (! stay) { nfuzz[kwalk] = kleft; break; } } } } else if(trace_lev) Rprintf("\n"); for (m = 0; m < nn; ++m) { for (i = 0; i < k; ++i) rdraw[i] = p[m + (nfuzz[i]-1) * nn]; for (i = 0; i < k; ++i) p[m + i * nn] = rdraw[i]; } return; } /* caddy */ /* ----------------------------------------------------------- Compute Silhouette Information : TODO cleanup: this is almost identical to dark() in ./pam.c -- difference : different dys[] / dss[] indexing, but that -- dss[] indexing change needs to be "synchronized" in all functions here */ static void fygur(int kk, int nn, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *dss, double *s, double *sylinf) { int sylinf_d = nn; /* sylinf[nn, 4] */ int j, l, k, k_, nj, ntt, nsylr; double dysa, dysb; /* pointers to sylinf[] columns:*/ double *sylinf_2, *sylinf_3, *sylinf_4; sylinf_2 = sylinf + sylinf_d; sylinf_3 = sylinf_2 + sylinf_d; sylinf_4 = sylinf_3 + sylinf_d; /* Parameter adjustments */ --avsyl; --ncluv; --dss; nsylr = 0; *ttsyl = 0.; for (k = 1; k <= kk; ++k) { /* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */ ntt = 0; for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { nelem[ntt] = j; ++ntt; } } for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */ nj = nelem[j]; dysb = *s * 1.1 + 1.; negbr[j] = -1; /* for all clusters k_ != k : */ for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) { int nbb = 0; double db = 0.; for (l = 1; l <= nn; ++l) { if (ncluv[l] == k_) { ++nbb; if (l < nj) { db += dss[nn * (l - 1) + nj - l * (l + 1) / 2]; } else if (l > nj) { db += dss[nn * (nj - 1) + l - nj * (nj + 1) / 2]; } /* else dss(.)=0 ; nothing to add */ } } db /= nbb; /* now db(k_) := mean( d[j, l]; l in C_{k_} ) */ if (dysb > db) { dysb = db; negbr[j] = k_; } }/* negbr[j] := arg min_{k_} db(k_) */ if (ntt > 1) { dysa = 0.; for (l = 0; l < ntt; ++l) { int nl = nelem[l]; if (nj < nl) { dysa += dss[nn * (nj - 1) + nl - nj * (nj + 1) / 2]; } else if (nj > nl) { dysa += dss[nn * (nl - 1) + nj - nl * (nl + 1) / 2]; }/* else dss(.)=0 ; nothing to add */ } dysa /= ntt - 1; if (dysa > 0.) { if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* dysb == dysa: */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } else /* dysa == 0 */ if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; } else { /* ntt == 1: */ syl[j] = 0.; } } /* for( j ) */ avsyl[k] = 0.; for (j = 0; j < ntt; ++j) { int lang = 0 /* -Wall */; double symax = -2.; for (l = 0; l < ntt; ++l) { if (symax < syl[l]) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = symax; /* = syl[lang] */ avsyl[k] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[k]; avsyl[k] /= (double) ntt; if (ntt < 2) { sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[0]; sylinf_3[nsylr] = 0.; sylinf_4[nsylr] = (double) nelem[0]; ++nsylr; } else { for (j = 0; j < ntt; ++j) { nj = nsend[j]; sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[nj]; sylinf_3[nsylr] = srank[j]; sylinf_4[nsylr] = (double) nelem[nj]; ++nsylr; } } } /* for (k) */ *ttsyl /= nn; } /* fygur */ cluster/src/dysta.f0000644000176000001440000000304012124335266014046 0ustar ripleyusers c Dysta() : c c Compute Distances from X matrix {also for agnes() and diana()}: c ----------------------------------------------------------- c c was part of pam.f --- now called both from Fortran & C c "keep in sync" with daisy.f {move both to C or replace by R's dist!} c subroutine dysta(nn,p,x,dys,ndyst,jtmd,valmd,jhalt) integer nn, p, ndyst, jtmd(p), jhalt double precision x(nn,p), dys(1+nn*(nn-1)/2), valmd(p) c ndyst = 1 : euclidean c "else" : manhattan c VARs integer nlk,j,l,k, lsubt, npres double precision pp, clk, rpres nlk=1 dys(1)=0.0 c ---------- is used potentially for d[i,i] == dys[1] == 0 pp=p do 100 l=2,nn lsubt=l-1 do 20 k=1,lsubt clk=0.0 nlk=nlk+1 npres=0 do 30 j=1,p if(jtmd(j).lt.0) then if(x(l,j).eq.valmd(j))goto 30 if(x(k,j).eq.valmd(j))goto 30 endif npres=npres+1 if(ndyst.eq.1) then clk=clk+ (x(l,j)-x(k,j))*(x(l,j)-x(k,j)) else clk=clk+ dabs(x(l,j)-x(k,j)) endif 30 continue rpres=npres if(npres.eq.0) then jhalt=1 dys(nlk)=-1.0 else if(ndyst.eq.1) then dys(nlk)= dsqrt(clk*(pp/rpres)) else dys(nlk)= clk*(pp/rpres) endif endif 20 continue 100 continue end cluster/src/daisy.f0000644000176000001440000001004612124335266014037 0ustar ripleyusers subroutine cldaisy(nn,jpp,x,valmd,weights, + jtmd,jdat,vtype,ndyst,mdata,disv) c c c c Calculating dissimilarities between objects or variables c c integer nn, jpp c c nn = number of objects c c jpp = number of variables used for the calculations c c The following vectors and matrices must be dimensioned in the c c main program : double precision x(nn,jpp), valmd(jpp), weights(jpp) double precision disv(1+nn*(nn-1)/2) integer jtmd(jpp), jdat, vtype(jpp), ndyst, mdata c vtype was character originally c vtype(j) is the type of variable j: c = 1 (A) for an Asymmetric binary variable c = 2 (S) for a Symmetric binary variable c = 3 (N) for a Nominal variable c = 4 (O) for an Ordinal variable c = 5 (I) for an Interval variable (additive) c = 6 (T) for a raTio variable (log transformed) c vector jtmd is only read if there are missing values : if(mdata) c jtmd(j) = 0 if variable j is binary c = -1 if variable j is not binary and has missing values c = +1 if variable j is not binary and has no missing values c VAR double precision clk,dlk, pp,ppa, rpres integer j,k,l,la, lsubt, nlk, nbad, npres logical hasNA hasNA = (mdata .ne. 0) c calculation of the dissimilarities nlk=0 if(jdat .eq. 1) then c Case I: `mixed' type variables nbad=0 do 450 l=2,nn la=l-1 do 440 k=1,la nlk=nlk+1 ppa=0. dlk=0. c Dissimilarity between obs. l & k do 420 j=1,jpp if(vtype(j) .ge. 3) then if (hasNA) then if(jtmd(j).lt.0) then if(x(l,j).eq.valmd(j)) goto 420 if(x(k,j).eq.valmd(j)) goto 420 endif endif ppa=ppa + weights(j) if(vtype(j).eq.3) then if(x(l,j).ne.x(k,j)) dlk=dlk+ weights(j) else dlk=dlk+ weights(j)*dabs(x(l,j)-x(k,j)) endif else c binary variable x(*,j) if(x(l,j).ne.0..and.x(l,j).ne.1.) goto 420 if(x(k,j).ne.0..and.x(k,j).ne.1.) goto 420 if(vtype(j).eq.2.or.x(l,j).ne.0.or.x(k,j).ne.0) * ppa=ppa+weights(j) if(x(l,j).ne.x(k,j)) dlk=dlk+ weights(j) endif 420 continue if(ppa.le.0.5) then nbad=nbad+1 disv(nlk)=-1 else disv(nlk)=dlk/ppa endif 440 continue 450 continue else c Case II : jdat != 1: all variables are interval scaled c ------- ~~~~~~~~~ { basically === dysta() in ./dysta.f c FIXME: common code! } pp=jpp do 600 l=2,nn lsubt=l-1 do 520 k=1,lsubt clk=0.0 nlk=nlk+1 npres=0 do 530 j=1,jpp if (hasNA) then if(jtmd(j).lt.0) then if(x(l,j).eq.valmd(j)) goto 530 if(x(k,j).eq.valmd(j)) goto 530 endif endif npres=npres+1 if(ndyst.eq.1) then clk=clk+ (x(l,j)-x(k,j))*(x(l,j)-x(k,j)) else clk=clk+ dabs(x(l,j)-x(k,j)) endif 530 continue rpres=npres if(npres.eq.0)then disv(nlk)=-1.0 else if(ndyst.eq.1) then disv(nlk)=dsqrt(clk*(pp/rpres)) else disv(nlk)=clk*(pp/rpres) endif 520 continue 600 continue endif end cluster/src/cluster.h0000644000176000001440000001546212124335266014420 0ustar ripleyusers/* Declare everything, Fortran & C -- so we can register them */ #include /* -> Rconfig.h, but also Boolean.h RS.h */ #ifdef ENABLE_NLS #include #define _(String) dgettext ("cluster", String) #else #define _(String) (String) #endif /* --------- ./clara.c ------------------*/ double randm(int *nrun); void cl_clara(int *n, /* = number of objects */ int *jpp,/* = number of variables */ int *kk, /* = number of clusters, 1 <= kk <= n-1 */ double *x, /* Input: the data x[n, jpp] _rowwise_ (transposed) * Output: the first `n' values are the `clustering' * (integers in 1,2,..,kk) */ int *nran, /* = #{random samples} drawn (= `samples' in R)*/ int *nsam, /* = #{objects} drawn from data set (`sampsize' in R) */ double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)] * Output: to contain the distances */ int *mdata, /*= {0,1}; 1: min(x) is missing value (NA); 0: no NA */ double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/ int *jtmd, /* [j]= {-1,1}; -1: x[,j] has NA; 1: no NAs in x[,j] */ int *diss_kind,/* = {1,2}; 1 : euclidean; 2 : manhattan*/ int/*logical*/ *rng_R,/*= {0,1}; 0 : use clara's internal weak RNG; * 1 : use R's RNG (and seed) */ int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam();*/ // otherwise use the code as it was in clara() "forever" upto 2011-04 int *nrepr, int *nsel, int *nbest,/* x[nbest[j]] will be the j-th obs in the final sample */ int *nr, int *nrx, double *radus, double *ttd, double *ratt, double *ttbes, double *rdbes, double *rabes, int *mtt, double *obj, double *avsyl, double *ttsyl, double *sylinf, int *jstop, int *trace_lev, double *tmp, /* = double [ 3 * nsam ] */ int *itmp /* = integer[ 6 * nsam ] */ ); void dysta2(int nsam, int jpp, int *nsel, double *x, int n, double *dys, int diss_kind, int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA); void bswap2(int kk, int nsam, double s, const double dys[], Rboolean pam_like, int trace_lev, // result: double *sky, int *nrepr, double *dysma, double *dysmb, double *beter); void selec(int kk, int n, int jpp, int diss_kind, double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd, int trace_lev, int *nrepr, int *nsel, double *dys, double *x, int *nr, Rboolean *nafs, double *ttd, double *radus, double *ratt, int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new, double *ttnew, double *rdnew); void resul(int kk, int n, int jpp, int diss_kind, Rboolean has_NA, int *jtmd, double *valmd, double *x, int *nrx, int *mtt); void black(int kk, int jpp, int nsam, int *nbest, double *dys, double s, double *x, /* --> Output : */ double *avsyl, double *ttsyl, double *sylinf, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank); /* -------- ./dysta.f --- (was in pam.f) -------------------- */ int F77_NAME(dysta)(int *nn, int *jpp, double *x, double *dys, int *ndyst, int *jtmd, double *valmd, int *jhalt); /* --------- ./pam.c ------------------*/ void cl_pam(int *nn, int *jpp, int *kk, double *x, double *dys, int *jdyss, /* jdyss = 0 : compute distances from x * = 1 : distances provided in x */ double *valmd, int *jtmd, int *ndyst, int *nsend, int *nrepr, int *nelem, double *radus, double *damer, double *ttd, double *separ, double *ttsyl, double *obj, int *med, int *ncluv, double *clusinf, double *sylinf, int *nisol, int* optim); void bswap(int kk, int nsam, int *nrepr, /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */ Rboolean med_given, Rboolean do_swap, int trace_lev, double *dysma, double *dysmb, double *beter, double *dys, double s, double *obj, int *pamonce); void cstat(int *kk, int *nn, int *nsend, int *nrepr, Rboolean all_stats, double *radus, double *damer, double *ttd, double *separ, double *s, double *dys, int *ncluv, int *nelem, int *med, int *nisol); void dark(int kk, int nn, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *dys, double *s, double *sylinf); /* --------- ./spannel.c ------------------*/ void cl_sweep(double *, int *, int *, int *, double *); void spannel(int *ncas, /* = number of objects */ int *ndep, /* = number of variables */ double *dat,/* [ncas, 0:ndep] */ double *dstopt, /* = squared distances [1:ncas] */ double *cov,/* matrix [0:ndep, 0:ndep] */ double *varsum, /* [1:ndep] */ double *varss, /* [1:ndep] */ double *prob, /* [1:ncas] */ double *work, /* [0:ndep] */ double *eps, int *maxit, /* = maximal # iterations (and returns #{iter.})*/ int *ierr); void sildist(double *d, /* distance : in matrix or dist format; i.e., of length n^2 or n*(n-1)/2; see 'ismat' */ int *n, /* number of Subjects (attr(d,'Size')) */ int *clustering,/* clustering vector, values from {1..k} */ int *k, /* number of clusters */ double *diC, /* diC */ int *counts, /* counts[k] := #{cluster k} */ double *si, /* (a_i - b_i) / max(ai,bi) */ int *neighbor, /* neighbor */ int *ismat); /* boolean : is 'd' a matrix or 'dist' ? */ void cl_fanny(int *nn, int *jpp, int *kk, double *x, double *dss, int *jdyss, double *valmd, int *jtmd, int *ndyst, int *nsend, int *nelem, int *negbr, double *syl, double *p, double *dp, double *pt, int *nfuzz, double *esp, double *ef, double *dvec, double *ttsyl, double *obj, int *ncluv, double *sylinf, double *r, double *tol, int *maxit); /* ================= Fortran things (remainder) ======================== */ /* -------- ./daisy.f ---------------------------------- */ int F77_NAME(cldaisy)(int *nn, int *jpp, double *x, double *valmd, int *jtmd, int *jdat, int *vtype, int *ndyst, int *mdata, double *disv); /* -------- ./fanny.c ---------------------------------- */ /* R-level: called only from ../tests/dysta-ex.R (now via .C()): */ void dysta3(int *nn, int *p, double *x, double *dys, int *ndyst, int *jtmd, double *valmd, int *jhalt); /* -------- ./mona.f ---------------------------------- */ int F77_NAME(clmona)(int *nn, int *pp, int *x, int *jerr, int *nban, int *ner, int *kwan, int *lava, int *jlack); /* -------- ./twins.c ---------------------------------- */ void R_bncoef(int *nn, double *ban, double *cf); double bncoef(int nn, double *ban); void twins(int *nn, int *jpp, double *x, double *dys, double *dys2, int *jdyss, double *valmd, int *jtmd, int *ndyst, int *jalg, int *method, int *kwan, int *ner, double *ban, double *coef, double *alpha, int *merge); cluster/src/clara.c0000644000176000001440000006272712124335266014022 0ustar ripleyusers/* Clustering LARge Applications ~ ~~~ ~ Clustering program based upon the k-medoid approach, and suitable for data sets of at least 100 objects. (for smaller data sets, please use program pam.) */ /* original Id: clara.f,v 1.10 2002/08/27 15:43:58 maechler translated by * f2c (version 20010821) and run through f2c-clean,v 1.10 2002/03/28 */ #include #include /* for diagnostics */ #include /* when R's RNG is used */ #include /* for interrupting */ #include "cluster.h" #include "ind_2.h" void cl_clara(int *n, /* = number of objects */ int *jpp,/* = number of variables */ int *kk, /* = number of clusters, 1 <= kk <= n-1 */ double *x, /* Input: the data x[n, jpp] _rowwise_ (transposed) * Output: the first `n' values are the `clustering' * (integers in 1,2,..,kk) */ int *nran, /* = #{random samples} drawn (= `samples' in R)*/ int *nsam, /* = #{objects} drawn from data set (`sampsize' in R) */ double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)] * Output: to contain the distances */ int *mdata, /*= {0,1}; 1: min(x) is missing value (NA); 0: no NA */ double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/ int *jtmd, /* [j]= {-1,1}; -1: x[,j] has NA; 1: no NAs in x[,j] */ int *diss_kind,/*= {1,2}; 1 : euclidean; 2 : manhattan*/ int/*logical*/ *rng_R,/*= {0,1}; 0 : use clara's internal weak RNG; * 1 : use R's RNG (and seed) */ int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam();*/ // otherwise use the code as it was in clara() "forever" upto 2011-04 int *nrepr, /* logical (0/1): 1 = "is representative object" */ int *nsel, int *nbest,/* x[nbest[j],] : the j-th obs in the final sample */ int *nr, int *nrx,/* prov. and final "medoids" aka representatives */ double *radus, double *ttd, double *ratt, double *ttbes, double *rdbes, double *rabes, int *mtt, double *obj, double *avsyl, double *ttsyl, double *sylinf, int *jstop, int *trace_lev, double *tmp, /* = double [ 3 * nsam ] */ int *itmp /* = integer[ 6 * nsam ] */ ) { #define tmp1 tmp #define tmp2 &tmp[*nsam] #define ntmp1 itmp #define ntmp2 &itmp[*nsam] #define ntmp3 &itmp[nsamb] #define ntmp4 &itmp[nsamb+ *nsam] #define ntmp5 &itmp[2*nsamb] #define ntmp6 &itmp[2*nsamb+ *nsam] /* Local variables */ Rboolean nafs, kall, full_sample, lrg_sam, dyst_toomany_NA, has_NA = *mdata; int j, jk, jkk, js, jsm, jran, l, n_sam; int nsm, ntt, rand_k, nrun, n_dys, nsamb, nunfs; double rnn, sky, zb, s, sx = -1., zba = -1.;/* Wall */ *jstop = 0; rnn = (double) (*n); /* n_dys := size of distance array dys[] */ n_dys = *nsam * (*nsam - 1) / 2 + 1;/* >= 1 */ full_sample = (*n == *nsam);/* only one sub sample == full data */ nsamb = *nsam * 2; lrg_sam = (*n < nsamb);/* sample more than *n/2 */ if (lrg_sam)/* generate indices for the other, smaller half */ n_sam = *n - *nsam; else n_sam = *nsam; if(*trace_lev) Rprintf("C clara(): (nsam,nran,n) = (%d,%d,%d);%s\n", *nsam, *nran, *n, full_sample ? " 'full_sample',": (lrg_sam ? " 'large_sample',": "")); if(*rng_R && !full_sample) GetRNGstate(); else /* << initialize `random seed' of the very simple randm() below */ nrun = 0; #define NEW_rand_k_trace_print(_nr_) \ rand_k= 1+ (int)(rnn* ((*rng_R)? unif_rand(): randm(&nrun))); \ if (rand_k > *n) {/* should never happen */ \ REprintf("** C clara(): random k=%d > n **\n", rand_k); \ rand_k = *n; \ } \ if(*trace_lev >= 4) { \ Rprintf("... {" #_nr_ "}"); \ if(*rng_R) Rprintf("R unif_rand()"); \ else Rprintf("nrun=%5d", nrun); \ Rprintf(" -> k{ran}=%d\n", rand_k); \ } /* __LOOP__ : random subsamples are drawn and partitioned into kk clusters */ kall = FALSE; /* kall becomes TRUE iff we've found a "valid sample", i.e. one for which all d(j,k) can be computed */ nunfs = 0; dyst_toomany_NA = FALSE; for (jran = 1; jran <= *nran; ++jran) { if(*trace_lev) Rprintf("C clara(): sample %d ", jran); if (!full_sample) {/* `real' case: sample size < n */ ntt = 0; if (kall && nunfs+1 != jran && !lrg_sam) { /* Have had (at least) one valid sample; use its representatives * nrx[] : nsel[] := sort(nrx[]) for the first j=1:k */ if(*trace_lev >= 2) Rprintf(" if (kall && nunfs...): \n"); for (jk = 0; jk < *kk; ++jk) nsel[jk] = nrx[jk]; for (jk = 0; jk < *kk-1; ++jk) { /* sort(nsel[0:(kk-1)] */ /* FIXME: nsel[] is 0-indexed, but *contains* 1-indices*/ nsm = nsel[jk]; jsm = jk; for (jkk = jk + 1; jkk < *kk; ++jkk) { if (nsm > nsel[jkk]) { nsm = nsel[jkk]; jsm = jkk; } } nsel[jsm] = nsel[jk]; nsel[jk] = nsm; } ntt = *kk; } else { /* no valid sample _OR_ lrg_sam */ if(*trace_lev >= 2) Rprintf(" finding 1st... new k{ran}:\n"); /* Loop finding random index `rand_k' not yet in nrx[0:(*kk-1)] : */ L180: NEW_rand_k_trace_print(180) if (kall) { for (jk = 0; jk < *kk; ++jk) if (rand_k == nrx[jk]) goto L180; } /* end Loop */ nsel[ntt] = rand_k; if (++ntt == n_sam) goto L295; } if(*trace_lev >= 2) { Rprintf(".. kall: %s, ", (kall) ? "T" : "FALSE"); if(*trace_lev == 2) { Rprintf("nsel[ntt=%d] = %d\n", ntt, nsel[ntt]); } else { /* trace_lev >= 3 */ Rprintf("\n... nrx [0:%d]= ",*kk-1); for (jk = 0; jk < *kk; jk++) Rprintf("%d ",nrx[jk]); Rprintf("\n... nsel[0:%d]= ",ntt-1); for (jk = 0; jk < ntt; jk++) Rprintf("%d ",nsel[jk]); Rprintf("\n"); } } do { /* Loop finding random index 'rand_k' in {1:n}, * not in nrx[0:(k-1)] nor nsel[1:ntt] : */ L210: NEW_rand_k_trace_print(210) if (kall && lrg_sam) { for (jk = 0; jk < *kk; ++jk) { if (rand_k == nrx[jk]) goto L210; } } /* insert rand_k into nsel[1:ntt] or after and increase ntt : */ for (int ka = 0; ka < ntt; ++ka) if (nsel[ka] >= rand_k) { if (nsel[ka] == rand_k) goto L210; else {// nsel[ka] > rand_k : for (int na = ntt; na > ka; --na) nsel[na] = nsel[na-1]; nsel[ka] = rand_k; /* continue _outer_ loop */ goto L290; } } // else: rand_k > nsel[ka] for all ka = 0:(ntt-1) : nsel[ntt] = rand_k; L290: ++ntt; } while (ntt < n_sam); L295: if(*trace_lev) Rprintf(" {295} [ntt=%d, nunfs=%d] ", ntt, nunfs); if (lrg_sam) { /* have indices for smaller _nonsampled_ half; revert this: */ for (j = 1, jk = 0, js = 0; j <= *n; j++) { if (jk < n_sam && nsel[jk] == j) ++jk; else nrepr[js++] = j; } for (j = 0; j < *nsam; ++j) nsel[j] = nrepr[j]; } if(*trace_lev >= 3) { Rprintf(".. nsel[1:%d]= ", *nsam); for (jk = 0; jk < *nsam; jk++) Rprintf("%d ",nsel[jk]); } if(*trace_lev) Rprintf(" -> dysta2()\n"); } else { /* full_sample : *n = *nsam -- one sample is enough ! */ for (j = 0; j < *nsam; ++j) nsel[j] = j+1;/* <- uses 1-indices for its *values*! */ } dysta2(*nsam, *jpp, nsel, x, *n, dys, *diss_kind, jtmd, valmd, has_NA, &dyst_toomany_NA); if(dyst_toomany_NA) { if(*trace_lev) Rprintf(" dysta2() gave dyst_toomany_NA --> new sample\n"); dyst_toomany_NA = FALSE; ++nunfs; continue;/* random sample*/ } s = 0.; for(l = 1; l < n_dys; l++) /* dys[0] is not used here */ if (s < dys[l]) s = dys[l]; if(*trace_lev >= 2) Rprintf(". clara(): s:= max dys[1..%d] = %g;", l-1,s); bswap2(*kk, *nsam, s, dys, *pam_like, *trace_lev, /* --> */ &sky, nrepr, /* dysma */tmp1, /*dysmb*/tmp2, /* beter[], only used here */&tmp[nsamb]); if(*trace_lev >= 2) Rprintf("end{bswap}: sky = %g\n", sky); selec(*kk, *n, *jpp, *diss_kind, &zb, *nsam, has_NA, jtmd, valmd, *trace_lev, nrepr, nsel, dys, x, nr, &nafs, ttd, radus, ratt, ntmp1, ntmp2, ntmp3, ntmp4, ntmp5, ntmp6, tmp1, tmp2); if (nafs) { /* couldn't assign some observation (to a cluster) * because of too many NA s */ ++nunfs; if(*trace_lev >= 2) Rprintf(" selec() -> 'NAfs'"); } else if(!kall || zba > zb) { /* 1st proper sample or new best */ kall = TRUE; if(*trace_lev >= 2) Rprintf(" 1st proper or new best:"); zba = zb; for (jk = 0; jk < *kk; ++jk) { ttbes[jk] = ttd [jk]; rdbes[jk] = radus[jk]; rabes[jk] = ratt [jk]; nrx [jk] = nr [jk]; } for (js = 0; js < *nsam; ++js) nbest[js] = nsel[js]; sx = s; } if(*trace_lev >= 2) Rprintf(" obj= %g\n", zb/rnn); if(full_sample) break; /* out of resampling */ } /* --- end random sampling loop */ if(*rng_R && !full_sample) PutRNGstate(); if (nunfs >= *nran) { *jstop = 1; return; } /* else */ if (!kall) { *jstop = 2; return; } if(*trace_lev) { Rprintf("C clara(): best sample _found_ "); if(*trace_lev >= 2) { Rprintf("; nbest[1:%d] =\n c(", *nsam); for (js = 0; js < *nsam; ++js) { Rprintf("%d", nbest[js]); if(js+1 < *nsam) Rprintf(","); } Rprintf(")\n"); } Rprintf(" --> dysta2(nbest), resul(), end\n"); } /* for the best subsample, the objects of the entire data set are assigned to their clusters */ *obj = zba / rnn; dysta2(*nsam, *jpp, nbest, x, *n, dys, *diss_kind, jtmd, valmd, has_NA, &dyst_toomany_NA); if(dyst_toomany_NA) { REprintf(" *** SHOULD NOT HAPPEN: clara() -> dysta2(nbest) gave toomany_NA\n"); return; } resul(*kk, *n, *jpp, *diss_kind, has_NA, jtmd, valmd, x, nrx, mtt); if (*kk > 1) black(*kk, *jpp, *nsam, nbest, dys, sx, x, /* compute --> */ avsyl, ttsyl, sylinf, ntmp1, ntmp2, ntmp3, ntmp4, /* syl[] */ tmp1, tmp2); return; } /* End clara() ---------------------------------------------------*/ #undef tmp1 #undef tmp2 #undef ntmp1 #undef ntmp2 #undef ntmp3 #undef ntmp4 #undef ntmp5 #undef ntmp6 void dysta2(int nsam, int jpp, int *nsel, double *x, int n, double *dys, int diss_kind, int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA) { /* Compute Dissimilarities for the selected sub-sample ---> dys[,] */ int nlk = 0; dys[0] = 0.;/* very first index; *is* used because ind_2(i,i) |-> 0 ! */ for (int l = 1; l < nsam; ++l) { int lsel = nsel[l]; if(lsel <= 0 || lsel > n) REprintf(" ** dysta2(): nsel[l= %d] = %d is OUT\n", l, lsel); for (int k = 0; k < l; ++k) { /* compute d(nsel[l], nsel[k]) {if possible}*/ int ksel = nsel[k]; if(ksel <= 0 || ksel > n) REprintf(" ** dysta2(): nsel[k= %d] = %d is OUT\n", k, ksel); ++nlk; int npres = 0, j, lj, kj; double clk = 0.; for (j = 0, lj = lsel-1, kj = ksel-1; j < jpp; ++j, lj += n, kj += n) { if (has_NA && jtmd[j] < 0) { /* x[,j] has some Missing (NA) */ /* in the following line (Fortran!), x[-2] ==> seg.fault {BDR to R-core, Sat, 3 Aug 2002} */ if (x[lj] == valmd[j] || x[kj] == valmd[j]) { continue /* next j */; } } ++npres; if (diss_kind == 1) clk += (x[lj] - x[kj]) * (x[lj] - x[kj]); else clk += fabs(x[lj] - x[kj]); } if (npres == 0) {/* cannot compute d(.,.) because of too many NA */ *toomany_NA = TRUE; dys[nlk] = -1.; } else { double d1 = clk * (jpp / (double) npres); dys[nlk] = (diss_kind == 1) ? sqrt(d1) : d1 ; } } /* for( k ) */ } /* for( l ) */ return; } /* End dysta2() -----------------------------------------------------------*/ double randm(int *nrun) { /* we programmed this generator ourselves because we wanted it to be machine independent. it should run on most computers because the largest int used is less than 2^30 . the period is 2^16=65536, which is good enough for our purposes. */ /* MM: improved the original speed-wise only: */ *nrun = (*nrun * 5761 + 999) & 0177777; /* Masking off all but the last 16 bits is equivalent to % 65536 */ return ((double) (*nrun) / 65536.); } /* randm() */ /* bswap2() : called once [per random sample] from clara() : */ void bswap2(int kk, int n, /* == nsam == 'sampsize', here in clara */ double s, const double dys[], Rboolean pam_like, int trace_lev, // result: double *sky, int *nrepr, double *dysma, double *dysmb, double *beter) { int i, j, ij, k,h, hbest = -1, nbest = -1;/* init for -Wall */ double dzsky; /* Parameter adjustments */ --nrepr; --beter; --dysma; --dysmb; if(trace_lev >= 2) { if(trace_lev == 2) Rprintf("\n bswap2():"); else Rprintf("\nclara()'s bswap2(*, s=%g): ", s); } s = s * 1.1 + 1.;/* value larger than all dissimilarities */ /* ====== first algorithm: BUILD. ====== */ for (i = 1; i <= n; ++i) { nrepr[i] = 0; dysma[i] = s; } for(k = 0; k < kk; k++) { int nmax = -1; /* -Wall */ double ammax = 0.; for (i = 1; i <= n; ++i) { if (nrepr[i] == 0) { beter[i] = 0.; for (j = 1; j <= n; ++j) { double cmd = dysma[j] - dys[ ind_2(i, j)]; if (cmd > 0.) beter[i] += cmd; } if (ammax <= beter[i]) { /* does < (instead of <= ) work too? -- NO! */ ammax = beter[i]; nmax = i; } } } nrepr[nmax] = 1;/* = .true. : found new representative */ if(trace_lev >= 2) { if(trace_lev == 2) Rprintf(" %d", nmax); else Rprintf(" new repr. %d\n", nmax); } /* update dysma[] : dysma[j] = D(j, nearest_representative) */ for (j = 1; j <= n; ++j) { ij = ind_2(nmax, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } // output of the above loop: nrepr[], dysma[], ... *sky = 0.; for (j = 1; j <= n; ++j) *sky += dysma[j]; if(trace_lev >= 2) /* >= 2 (?) */ { Rprintf(" after build: medoids are"); for (i = 1; i <= n; ++i) if(nrepr[i] == 1) Rprintf(" %2d", i); if(trace_lev >= 3) { Rprintf("\n and min.dist dysma[1:n] are\n"); for (i = 1; i <= n; ++i) { Rprintf(" %6.3g", dysma[i]); if(i % 10 == 0) Rprintf("\n"); } if(n % 10 != 0) Rprintf("\n"); } else Rprintf("\n"); Rprintf(" --> sky = sum_j D_j= %g\n", *sky); } if (kk == 1) return; // asky = *sky / ((double) n); /* ====== second algorithm: SWAP. ====== */ /* Big LOOP : */ L60: for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, ) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for (i = 1; i <= n; ++i) { if (nrepr[i]) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) { dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */ for (h = 1; h <= n; ++h) if (!nrepr[h]) { for (i = 1; i <= n; ++i) if (nrepr[i]) { double dz = 0.; /* dz := T_{ih} := sum_j C_{jih} [p.104] : */ for (j = 1; j <= n; ++j) { int ij = ind_2(i, j), hj = ind_2(h, j); if (dys[ij] == dysma[j]) { double small; if(pam_like) small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j]; else // old clara code which differs from pam()'s // and seems a bit illogical: small = dysmb[j] > dys[ij] ? dys[hj] : dysmb[j]; dz += (- dysma[j] + small); } else if (dys[hj] < dysma[j]) dz += (- dysma[j] + dys[hj]); } if (dzsky > dz) { dzsky = dz; // dzsky := min_{i,h} T_{i,h} hbest = h; nbest = i; } } } /* once had some 64-bit compiler / data configuration that looped forever*/ R_CheckUserInterrupt(); if (dzsky < 0.) { /* found an improving swap */ if(trace_lev >= 3) Rprintf( " swp new %d <-> %d old; decreasing diss. by %g\n", hbest, nbest, dzsky); nrepr[hbest] = 1; nrepr[nbest] = 0; *sky += dzsky; goto L60; } if(trace_lev >= 2 && hbest != -1) // in my examples hbest == -1 and it does not print: Rprintf( " Last swap: new %d <-> %d old; decreasing diss. by %g\n", hbest, nbest, dzsky); } /* End of bswap2() -------------------------------------------------- */ /* selec() : called once [per random sample] from clara() */ void selec(int kk, int n, int jpp, int diss_kind, double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd, int trace_lev, int *nrepr, int *nsel, double *dys, double *x, int *nr, Rboolean *nafs, /* := TRUE if a distance cannot be calculated */ double *ttd, double *radus, double *ratt, int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new, double *ttnew, double *rdnew) { /* Local variables */ int j, jk, jj, jp, jnew, ka, kb, jkabc = -1/* -Wall */; int newf, nrjk, npab, nstrt, na, nb, npa, npb, njk, nobs; double pp = (double) (jpp), tra; /* Parameter adjustments */ --nsel; --nrepr; --ratt; --radus; --ttd; --np; --nr; --ns; --rdnew; --ttnew; --npnew; --nrnew; --nsnew; --new; /* nafs := TRUE if a distance cannot be calculated (because of NA s)*/ *nafs = FALSE; /* identification of representative objects, and initializations */ jk = 0; for (j = 1; j <= nsam; ++j) { if (nrepr[j] != 0) { ++jk; nr [jk] = nsel[j]; ns [jk] = 0; ttd [jk] = 0.; radus[jk] = -1.; np [jk] = j; } } /* - assignment of the objects of the entire data set to a cluster, * - computation of some statistics, * - determination of the new ordering of the clusters */ *zb = 0.; newf = 0; for(jj = 1; jj <= n; jj++) { double dsum, dnull = -9./* -Wall */; if (!has_NA) { for (jk = 1; jk <= kk; ++jk) { dsum = 0.; nrjk = nr[jk]; if (nrjk != jj) { for (jp = 0; jp < jpp; ++jp) { na = (nrjk - 1) + jp * n; nb = (jj - 1) + jp * n; tra = fabs(x[na] - x[nb]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (jk != 1 && dsum >= dnull) continue /* next jk */; } dnull = dsum; jkabc = jk; } } else { /* _has_ missing data */ Rboolean pres = FALSE; for (jk = 1; jk <= kk; ++jk) { dsum = 0.; nrjk = nr[jk]; if (nrjk != jj) { nobs = 0; for (jp = 0; jp < jpp; ++jp) { na = (nrjk - 1) + jp * n; nb = (jj - 1) + jp * n; if (jtmd[jp] < 0) { if (x[na] == valmd[jp] || x[nb] == valmd[jp]) continue /* next jp */; } nobs++; tra = fabs(x[na] - x[nb]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (nobs == 0) /* all pairs partially missing */ continue /* next jk */; dsum *= (nobs / pp); /* MM: ^^^^^^^^^ fishy; rather * (pp/nobs) as in dysta2*/ } if (!pres) pres = TRUE; else if (dnull <= dsum) continue /* next jk */; /* here : pres was FALSE {i.e. 1st time} or * dnull > dsum {i.e. new best} */ dnull = dsum; jkabc = jk; }/* for(jk ..) */ if (!pres) { /* found nothing */ *nafs = TRUE; return; } } /* else: has_NA */ if (diss_kind == 1) dnull = sqrt(dnull); *zb += dnull; ttd[jkabc] += dnull; if (radus[jkabc] < dnull) radus[jkabc] = dnull; ++ns[jkabc]; if (newf < kk) { if (newf != 0) { for (jnew = 1; jnew <= newf; ++jnew) { if (jkabc == new[jnew]) goto L90;/* next jj */ } } ++newf; new[newf] = jkabc; } L90: ; } /* for( jj = 1..n ) */ /* a permutation is carried out on vectors nr,ns,np,ttd,radus using the information in vector new. */ for (jk = 1; jk <= kk; ++jk) { njk = new[jk]; nrnew[jk] = nr[njk]; nsnew[jk] = ns[njk]; npnew[jk] = np[njk]; ttnew[jk] = ttd[njk]; rdnew[jk] = radus[njk]; } for (jk = 1; jk <= kk; ++jk) { nr[jk] = nrnew[jk]; ns[jk] = nsnew[jk]; np[jk] = npnew[jk]; ttd[jk] = ttnew[jk]; radus[jk] = rdnew[jk]; } for (j = 1; j <= kk; ++j) { double rns = (double) ns[j]; ttd[j] /= rns; } if (kk > 1) { /* computation of minimal distance of medoid ka to any other medoid for comparison with the radius of cluster ka. */ for (ka = 1; ka <= kk; ++ka) { nstrt = 0; npa = np[ka]; for (kb = 1; kb <= kk; ++kb) { if (kb == ka) continue /* next kb */; npb = np[kb]; npab = ind_2(npa, npb); if (nstrt == 0) nstrt = 1; else if (dys[npab] >= ratt[ka]) continue /* next kb */; ratt[ka] = dys[npab]; if (ratt[ka] == 0.) ratt[ka] = -1.; } if (ratt[ka] > -0.5) ratt[ka] = radus[ka] / ratt[ka]; } } return; } /* End selec() -----------------------------------------------------------*/ void resul(int kk, int n, int jpp, int diss_kind, Rboolean has_NA, int *jtmd, double *valmd, double *x, int *nrx, int *mtt) { /* Local variables */ int j, jk, jj, ka, na, nb, njnb, nrjk, nobs, jksky = -1/* Wall */; double pp = (double) (jpp), dsum, tra, dnull = -9./* Wall */; /* clustering vector is incorporated into x, and ``printed''. */ for(jj = 0; jj < n; jj++) { for (jk = 0; jk < kk; ++jk) { if (nrx[jk] == jj + 1)/* 1-indexing */ goto L220; /* continue next jj (i.e., outer loop) */ } njnb = jj; if (!has_NA) { for (jk = 0; jk < kk; ++jk) { dsum = 0.; nrjk = (nrx[jk] - 1); for (j = 0; j < jpp; ++j) { tra = fabs(x[nrjk + j * n] - x[njnb + j * n]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (diss_kind == 1) dsum = sqrt(dsum); if (jk == 0 || dnull > dsum) { dnull = dsum; jksky = jk; } } } else { /* _has_ missing data */ for (jk = 0; jk < kk; ++jk) { dsum = 0.; nrjk = (nrx[jk] - 1); nobs = 0; for (j = 0; j < jpp; ++j) { na = nrjk + j * n; nb = njnb + j * n; if (jtmd[j] < 0) { if (x[na] == valmd[j] || x[nb] == valmd[j]) continue /* next j */; } nobs++; tra = fabs(x[na] - x[nb]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (diss_kind == 1) dsum = sqrt(dsum); dsum *= (nobs / pp); /* MM: ^^^^^^^^ fishy; rather * (pp/nobs) as in dysta2 */ if (jk == 0 || dnull > dsum) { dnull = dsum; jksky = jk; } } } x[njnb] = (double) jksky + 1;/* 1-indexing */ L220: ; } /* for(jj = 0; jj < n ..)*/ for (jk = 0; jk < kk; ++jk) x[nrx[jk] - 1] = (double) jk + 1;/* 1-indexing */ /* mtt[k] := size(k-th cluster) : */ for (ka = 0; ka < kk; ++ka) { mtt[ka] = 0; for(j = 0; j < n; j++) { if (((int) x[j]) == ka + 1)/* 1-indexing */ ++mtt[ka]; } } return; } /* end resul() -----------------------------------------------------------*/ void black(int kk, int jpp, int nsam, int *nbest, double *dys, double s, double *x, /* --> Output : */ double *avsyl, double *ttsyl, double *sylinf, /* but the following output vectors are never by clara() : */ int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank) { /* Silhouettes computation and "drawing" --> syl[] and sylinf[] */ /* System generated locals */ int sylinf_dim1, sylinf_offset; /* Local variables */ double att, btt, db, dysa, dysb, symax; int lang = -1/* -Wall */; int j, l, lplac, nj, nl, nbb, ncase, nclu, numcl, nsylr, ntt; /* Parameter adjustments */ --avsyl; --srank; --syl; --negbr; --nelem; --nsend; --ncluv; --nbest; sylinf_dim1 = nsam; sylinf_offset = 1 + sylinf_dim1 * 1; sylinf -= sylinf_offset; /* construction of clustering vector (ncluv) of selected sample (nbest). */ /* Function Body */ for (l = 1; l <= nsam; ++l) { ncase = nbest[l]; ncluv[l] = (int) x[ncase - 1]; } /* drawing of the silhouettes */ nsylr = 0; *ttsyl = 0.; for (numcl = 1; numcl <= kk; ++numcl) { ntt = 0; for (j = 1; j <= nsam; ++j) { if (ncluv[j] == numcl) { ++ntt; nelem[ntt] = j; } } for (j = 1; j <= ntt; ++j) { nj = nelem[j]; dysb = s * 1.1 + 1.; negbr[j] = -1; for (nclu = 1; nclu <= kk; ++nclu) { if (nclu != numcl) { nbb = 0; db = 0.; for (l = 1; l <= nsam; ++l) { if (ncluv[l] == nclu) { ++nbb; db += dys[ind_2(nj, l)]; } } btt = (double) nbb; db /= btt; if (db < dysb) { dysb = db; negbr[j] = nclu; } } } if (ntt == 1) { syl[j] = 0.; continue /* j */; } dysa = 0.; for (l = 1; l <= ntt; ++l) { nl = nelem[l]; dysa += dys[ind_2(nj, nl)]; } att = (double) (ntt - 1); dysa /= att; if (dysa <= 0.) { if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; continue /* j */; } if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* (dysb == dysa) */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } /* for(j ..) */ avsyl[numcl] = 0.; for (j = 1; j <= ntt; ++j) { symax = -2.; for (l = 1; l <= ntt; ++l) { if (syl[l] > symax) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = syl[lang]; avsyl[numcl] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[numcl]; avsyl[numcl] /= ntt; if (ntt >= 2) { for (l = 1; l <= ntt; ++l) { lplac = nsend[l]; ncase = nelem[lplac]; ++nsylr; sylinf[nsylr + sylinf_dim1] = (double) numcl; sylinf[nsylr + (sylinf_dim1 << 1)] = (double) negbr[lplac]; sylinf[nsylr + sylinf_dim1 * 3] = srank[l]; sylinf[nsylr + (sylinf_dim1 << 2)] = (double) nbest[ncase]; } } else { ncase = nelem[1]; ++nsylr; sylinf[nsylr + sylinf_dim1] = (double) numcl; sylinf[nsylr + (sylinf_dim1 << 1)] = (double) negbr[1]; sylinf[nsylr + sylinf_dim1 * 3] = 0.; sylinf[nsylr + (sylinf_dim1 << 2)] = (double) nbest[ncase]; } } *ttsyl /= (double) (nsam); return; } /* black */ cluster/po/0000755000176000001440000000000012124335263012402 5ustar ripleyuserscluster/po/update-me.sh0000755000176000001440000000137012014772531014624 0ustar ripleyusers#!/bin/sh # ## Script for updating package-specific *.pot files ## written such that it should work for any package # thisdir=`dirname $0` ; cd $thisdir; thisdir=`pwd` echo 'preliminary thisdir='$thisdir pkgDIR=`dirname $thisdir` pkg=`basename $pkgDIR` echo ' --> pkgDIR='$pkgDIR' ; pkg='$pkg echo ''; echo '## FIXME ## use new Scheme from R 2.16.x on' cd `R-patched RHOME`/po make pkg-update PKG=$pkg PKGDIR=$pkgDIR echo 'end{make pkg-update}' ; echo '' echo 'Test with (e.g.)' echo ' LANGUAGE=de R --no-environ --no-save' ; echo '' echo 'and then something like' echo ' ellipsoidPoints(diag(3)) ; ellipsoidPoints(cbind(1,1:3))'; echo '' echo 'Commit with something like' echo " svn ci -m'translation updates' po inst/po"; echo '' cluster/po/R-pl.po0000644000176000001440000006103512014772531013562 0ustar ripleyusersmsgid "" msgstr "" "Project-Id-Version: cluster 1.14.2\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2012-08-21 22:49\n" "PO-Revision-Date: 2012-08-21 11:03+0100\n" "Last-Translator: Åukasz Daniel \n" "Language-Team: Åukasz Daniel \n" "Language: pl_PL\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 " "|| n%100>=20) ? 1 : 2)\n" "X-Poedit-SourceCharset: iso-8859-1\n" # Recommended/cluster/R/agnes.q: 9 # stop("invalid clustering method") msgid "invalid clustering method" msgstr "niepoprawna metoda grupowania" # Recommended/cluster/R/agnes.q: 10 # stop("ambiguous clustering method") msgid "ambiguous clustering method" msgstr "niejednoznaczna metoda grupowania" # Recommended/cluster/R/agnes.q: 21 # stop("'par.method' must be of length 1, 3, or 4") msgid "'par.method' must be of length 1, 3, or 4" msgstr "'par.method' musi być dÅ‚ugoÅ›ci 1, 3, lub 4" # Recommended/cluster/R/pam.q: 12 # stop(..msg$error["NAdiss"]) # Recommended/cluster/R/diana.q: 10 # stop(..msg$error["NAdiss"]) # Recommended/cluster/R/agnes.q: 27 # stop(..msg$error["NAdiss"]) # Recommended/cluster/R/fanny.q: 11 # stop(..msg$error["NAdiss"]) msgid "NAdiss" msgstr "wartość NA odmiennoÅ›ci" # Recommended/cluster/R/pam.q: 19 # stop(..msg$error["non.diss"]) # Recommended/cluster/R/diana.q: 17 # stop(..msg$error["non.diss"]) # Recommended/cluster/R/agnes.q: 34 # stop(..msg$error["non.diss"]) # Recommended/cluster/R/fanny.q: 18 # stop(..msg$error["non.diss"]) msgid "non.diss" msgstr "nieodmienne" # Recommended/cluster/R/pam.q: 39 # stop("x is not a numeric dataframe or matrix.") # Recommended/cluster/R/diana.q: 35 # stop("x is not a numeric dataframe or matrix.") # Recommended/cluster/R/clara.q: 14 # stop("x is not a numeric dataframe or matrix.") # Recommended/cluster/R/agnes.q: 52 # stop("x is not a numeric dataframe or matrix.") # Recommended/cluster/R/fanny.q: 36 # stop("x is not a numeric dataframe or matrix.") msgid "x is not a numeric dataframe or matrix." msgstr "'x' nie jest ramkÄ… liczbowÄ… ani też macierzÄ…" # Recommended/cluster/R/agnes.q: 67 # stop("need at least 2 objects to cluster") msgid "need at least 2 objects to cluster" msgstr "potrzeba co najmniej 2 obiektów do grupowania" # Recommended/cluster/R/agnes.q: 91 # stop("No clustering performed, NA-values in the dissimilarity matrix.\n # ) # Recommended/cluster/R/fanny.q: 119 # stop("No clustering performed, NA-values in the dissimilarity matrix.") msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "Nie wykonano grupowania, wartoÅ›ci NA w macierzy różnic." # Recommended/cluster/R/clara.q: 12 # stop("'x' is a \"dist\" object, but should be a data matrix or frame") msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "'x' jest obiektem klasy 'dist', ale powinien być macierzÄ… lub ramkÄ…" # Recommended/cluster/R/clara.q: 17 # stop("The number of cluster should be at least 1 and at most n-1." ) msgid "The number of cluster should be at least 1 and at most n-1." msgstr "Liczba grup powinna wynosić conajmniej 1 oraz co najwyżej n-1." # Recommended/cluster/R/clara.q: 19 # stop(gettextf("'sampsize' should be at least %d = max(2, 1+ number of clusters)", # max(2,k+1)), domain=NA) msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "'sampsize' powinien być co najmniej %d = max(2, 1+ liczba grup)" # Recommended/cluster/R/clara.q: 22 # stop(gettextf("'sampsize' = %d should not be larger than the number of objects, %d", # sampsize, n), domain=NA) msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "'sampsize' = %d nie powinien być wiÄ™kszy niż liczba obiektów, %d" # Recommended/cluster/R/clara.q: 25 # stop("'samples' should be at least 1") msgid "'samples' should be at least 1" msgstr "'samples' powinno wynosić przynajmniej 1" # Recommended/cluster/R/clara.q: 33 # stop("when 'medoids.x' is FALSE, 'keep.data' must be too") msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "kiedy 'medoids.x' jest FALSE, 'keep.data' musi być również FALSE" # Recommended/cluster/R/clara.q: 101 # stop("Each of the random samples contains objects between which\n # " no distance can be computed.") msgid "Each of the random samples contains objects between which" msgstr "Każda z losowych próbek zawiera obiekty pomiÄ™dzy którymi" # Recommended/cluster/R/clara.q: 101 # stop("Each of the random samples contains objects between which\n # " no distance can be computed.") msgid "no distance can be computed." msgstr "odlegÅ‚ość nie może zostać obliczona." # Recommended/cluster/R/clara.q: 104 # stop("For each of the ", samples, # " samples, at least one object was found which\n # ould not", # " be assigned to a cluster (because of missing values).") msgid "For each of the" msgstr "Dla każdej z" # Recommended/cluster/R/clara.q: 104 # stop("For each of the ", samples, # " samples, at least one object was found which\n # ould not", # " be assigned to a cluster (because of missing values).") msgid "" "samples, at least one object was found which\n" " could not" msgstr "" "próbek, co najmniej jeden obiekt zostaÅ‚ znaleziony, który\n" "nie mógÅ‚" # Recommended/cluster/R/clara.q: 104 # stop("For each of the ", samples, # " samples, at least one object was found which\n # ould not", # " be assigned to a cluster (because of missing values).") msgid "be assigned to a cluster (because of missing values)." msgstr "zostać przypisany do grupy (z powodu brakujÄ…cych wartoÅ›ci)." # Recommended/cluster/R/clara.q: 108 # stop("invalid 'jstop' from .C(cl_clara,.): ", res$jstop) msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "niepoprawny 'jstop' z '.C(cl_clara,.)':" # Recommended/cluster/R/clusGap.R: 20 # stop("'B' has to be a positive integer") msgid "'B' has to be a positive integer" msgstr "'B' musi być dodatniÄ… liczbÄ… caÅ‚kowitÄ…" # Recommended/cluster/R/coef.R: 10 # stop("invalid 'twins' object") msgid "invalid 'twins' object" msgstr "niepoprawny obiekt 'twins'" # Recommended/cluster/R/daisy.q: 7 # stop("x is not a dataframe or a numeric matrix.") msgid "x is not a dataframe or a numeric matrix." msgstr "'x' nie jest ramkÄ… danych ani też macierzÄ… liczbowÄ…" # Recommended/cluster/R/daisy.q: 14 # stop("invalid ", sQuote("type"),"; must be named list") msgid "invalid" msgstr "niepoprawny" # Recommended/cluster/R/daisy.q: 14 # stop("invalid ", sQuote("type"),"; must be named list") msgid "type" msgstr "type" # Recommended/cluster/R/daisy.q: 14 # stop("invalid ", sQuote("type"),"; must be named list") msgid "; must be named list" msgstr "; musi być nazwanÄ… listÄ…" # Recommended/cluster/R/daisy.q: 20 # stop("type$", nt, " has invalid column names") # Recommended/cluster/R/daisy.q: 24 # stop("type$", nt, " must be in 1:ncol(x)") # Recommended/cluster/R/daisy.q: 26 # stop("type$", nt, " must contain column names or numbers") msgid "type$" msgstr "type$" # Recommended/cluster/R/daisy.q: 20 # stop("type$", nt, " has invalid column names") msgid "has invalid column names" msgstr "posiada niepoprawne nazwy kolumn" # Recommended/cluster/R/daisy.q: 24 # stop("type$", nt, " must be in 1:ncol(x)") msgid "must be in 1:ncol(x)" msgstr "musi być w przedziale 1:ncol(x)" # Recommended/cluster/R/daisy.q: 26 # stop("type$", nt, " must contain column names or numbers") msgid "must contain column names or numbers" msgstr "musi zawierać nazwy kolumn lub liczby" # Recommended/cluster/R/daisy.q: 37 # stop("at least one binary variable has more than 2 levels.") msgid "at least one binary variable has more than 2 levels." msgstr "przynajmniej jedna zmienna binarna posiada wiÄ™cej niż 2 poziomy." # Recommended/cluster/R/daisy.q: 39 # warning("at least one binary variable has not 2 different levels.") msgid "at least one binary variable has not 2 different levels." msgstr "przynajmniej jedna zmienna binarna nie posiada 2 różnych poziomów." # Recommended/cluster/R/daisy.q: 47 # stop("at least one binary variable has values not in {0,1,NA}") msgid "at least one binary variable has values not in {0,1,NA}" msgstr "przynajmniej jedna zmienna binarna posiada wartoÅ›ci poza {0, 1, NA}" # Recommended/cluster/R/daisy.q: 71 # warning("binary variable(s) ", pColl(which(tI)[iBin]), # " treated as interval scaled") msgid "binary variable(s)" msgstr "zmienne binarne" # Recommended/cluster/R/daisy.q: 71 # warning("binary variable(s) ", pColl(which(tI)[iBin]), # " treated as interval scaled") msgid "treated as interval scaled" msgstr "traktowane jako interwaÅ‚ zostaÅ‚y przeskalowane" # Recommended/cluster/R/daisy.q: 91 # warning(sQuote("x"), " has constant columns ", # pColl(which(sx == 0)), "; these are standardized to 0") msgid "x" msgstr "x" # Recommended/cluster/R/daisy.q: 91 # warning(sQuote("x"), " has constant columns ", # pColl(which(sx == 0)), "; these are standardized to 0") msgid "has constant columns" msgstr "posiada staÅ‚e kolumny" # Recommended/cluster/R/daisy.q: 91 # warning(sQuote("x"), " has constant columns ", # pColl(which(sx == 0)), "; these are standardized to 0") msgid "; these are standardized to 0" msgstr "; zostaÅ‚y one ustandaryzowane do zera" # Recommended/cluster/R/daisy.q: 102 # warning("with mixed variables, metric \"gower\" is used automatically") msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "z mieszanymi zmiennymi, metryka 'gower' jest używana automatycznie" # Recommended/cluster/R/daisy.q: 117 # stop("'weights' must be of length p (or 1)") msgid "'weights' must be of length p (or 1)" msgstr "'weights' musi być o dÅ‚ugoÅ›ci 'p' (lub 1)" # Recommended/cluster/R/daisy.q: 125 # stop("invalid type ", type2[ina], # " for column numbers ", pColl(which(is.na))) msgid "invalid type" msgstr "niepoprawny typ" # Recommended/cluster/R/daisy.q: 125 # stop("invalid type ", type2[ina], # " for column numbers ", pColl(which(is.na))) msgid "for column numbers" msgstr "dla liczb kolumn" # Recommended/cluster/R/diana.q: 75 # stop("No clustering performed, NA's in dissimilarity matrix.\n msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "Nie wykonano grupowania, wartoÅ›ci NA w macierzy różnic" # Recommended/cluster/R/ellipsoidhull.R: 14 # stop("'x' must be numeric n x p matrix") msgid "'x' must be numeric n x p matrix" msgstr "'x' musi być liczbowÄ… macierzÄ… n x p" # Recommended/cluster/R/ellipsoidhull.R: 16 # warning("omitting NAs") msgid "omitting NAs" msgstr "pomijanie wartoÅ›ci NA" # Recommended/cluster/R/ellipsoidhull.R: 20 # stop("no points without missing values") msgid "no points without missing values" msgstr "brak punktów bez brakujÄ…cych wartoÅ›ci" # Recommended/cluster/R/ellipsoidhull.R: 40 # stop("computed some negative or all 0 'prob'abilities") msgid "computed some negative or all 0 'prob'abilities" msgstr "" "niektóre wyliczone prawdopodobieÅ„stwa sÄ… ujemne lub wszystkie sÄ… zerami" # Recommended/cluster/R/ellipsoidhull.R: 43 # warning("possibly not converged in ", maxit, " iterations") msgid "possibly not converged in" msgstr "prawdopodobnie nie uzbieżniÅ‚ siÄ™ w" # Recommended/cluster/R/ellipsoidhull.R: 43 # warning("possibly not converged in ", maxit, " iterations") msgid "iterations" msgstr "iteracjach" # Recommended/cluster/R/ellipsoidhull.R: 95 # stop("'A' must be p x p cov-matrix defining an ellipsoid") msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "'A' musi być macierzÄ… kowariancji p x p okreÅ›lajÄ…cÄ… elipsoidÄ™" # Recommended/cluster/R/ellipsoidhull.R: 109 # stop("ellipsoidPoints() not yet implemented for p >= 3 dim.") msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "" "'ellipsoidPoints()' nie zostaÅ‚a jeszcze zaimplementowana dla p >= 3 wymiary." # Recommended/cluster/R/fanny.q: 54 # stop("'k' (number of clusters) must be in {1,2, .., n/2 -1}") msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "'k' (liczba grup) musi mieÅ›cić siÄ™ w przedziale {1,2, .., n/2 -1}" # Recommended/cluster/R/fanny.q: 57 # stop("'memb.exp' must be a finite number > 1") msgid "'memb.exp' must be a finite number > 1" msgstr "'memb.exp' musi być skoÅ„czonÄ… liczbÄ… > 1" # Recommended/cluster/R/fanny.q: 59 # stop("'maxit' must be non-negative integer") msgid "'maxit' must be non-negative integer" msgstr "'maxit' musi być nieujemnÄ… liczbÄ… caÅ‚kowitÄ…" # Recommended/cluster/R/fanny.q: 68 # stop("'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1") msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "'iniMem.p' musi być nieujemnÄ… maceirzÄ… n x k z rowSums == 1" # Recommended/cluster/R/fanny.q: 106 # warning(sprintf( # "FANNY algorithm has not converged in 'maxit' = %d iterations", # maxit)) msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "algorytm FANNY nie uzbieżniÅ‚ siÄ™ w 'maxit' = %d iteracjach" # Recommended/cluster/R/fanny.q: 143 # warning("the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?") msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "przynależnoÅ›ci sÄ… bardzo bliskie 1/k. Może zmniejszyć 'memb.exp'?" # Recommended/cluster/R/fanny.q: 240 # stop("'m', a membership matrix, must be nonnegative with rowSums == 1") msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "macierz przynależnoÅ›ci 'm' musi być nieujemna z rowSums == 1" # Recommended/cluster/R/internal.R: 18 # stop("'n' must be >= 2") # Recommended/cluster/R/internal.R: 26 # stop("'n' must be >= 2") msgid "'n' must be >= 2" msgstr "'n' musi być >= 2" # Recommended/cluster/PORTING: 8 # stop("x must be a matrix or data frame.") # Recommended/cluster/R/mona.q: 6 # stop("x must be a matrix or data frame.") msgid "x must be a matrix or data frame." msgstr "'x' musi być macierzÄ… lub ramkÄ… danych." # Recommended/cluster/R/mona.q: 10 # stop("All variables must be binary (factor with 2 levels).") msgid "All variables must be binary (factor with 2 levels)." msgstr "Wszystkie zmienne muszÄ… być binarne (czynnik z dwoma poziomami)" # Recommended/cluster/R/agnes.q: 10 # stop("ambiguous clustering method") #, fuzzy msgid "No clustering performed," msgstr "niejednoznaczna metoda grupowania" # Recommended/cluster/R/mona.q: 41 # stop(ch,"an object was found with all values missing.") msgid "an object was found with all values missing." msgstr "znaleziono obiekt któremu brakowaÅ‚o wszystkich wartoÅ›ci." # Recommended/cluster/R/mona.q: 43 # stop(ch,"a variable was found with at least 50% missing values.") msgid "a variable was found with at least 50% missing values." msgstr "znaleziono zmiennÄ… z co najmniej 50% brakujÄ…cych wartoÅ›ci." # Recommended/cluster/R/mona.q: 45 # stop(ch,"a variable was found with all non missing values identical.") msgid "a variable was found with all non missing values identical." msgstr "znaleziono zmiennÄ… z identycznymi niebrakujÄ…cymi wartoÅ›ciami." # Recommended/cluster/R/mona.q: 47 # stop(ch,"all variables have at least one missing value.") msgid "all variables have at least one missing value." msgstr "wszystkie zmienne majÄ… co najmniej jednÄ… brakujÄ…cÄ… wartość." # Recommended/cluster/R/pam.q: 55 # stop("Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2") msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "" "Liczba grup 'k' musi zawierać siÄ™ w zbiorze {1,2, .., n-1}; tak wiÄ™c n >= 2" # Recommended/cluster/R/pam.q: 63 # stop("'medoids' must be NULL or vector of ", # k, " distinct indices in {1,2, .., n}, n=", n) msgid "'medoids' must be NULL or vector of" msgstr "'medoids' musi być wartoÅ›ciÄ… NULL lub wektorem" # Recommended/cluster/R/pam.q: 63 # stop("'medoids' must be NULL or vector of ", # k, " distinct indices in {1,2, .., n}, n=", n) msgid "distinct indices in {1,2, .., n}, n=" msgstr "różnych indeksów w {1,2, .., n}, n=" # Recommended/cluster/R/pam.q: 109 # stop("No clustering performed, NAs in the computed dissimilarity matrix.") msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "Nie wykonano grupowania, wyliczono wartoÅ›ci NA w macierzy różnic." # Recommended/cluster/R/pam.q: 116 # stop("error from .C(cl_pam, *): invalid medID's") msgid "error from .C(cl_pam, *): invalid medID's" msgstr "błąd w '.C(cl_pam, *)': niepoprawne 'medID'" # Recommended/cluster/R/plotpart.q: 70 # stop("NA-values are not allowed in dist-like 'x'.") msgid "NA-values are not allowed in dist-like 'x'." msgstr "wartoÅ›ci NA nie sÄ… dozwolone w 'x' typu odlegÅ‚oÅ›ci." # Recommended/cluster/R/plotpart.q: 79 # stop("Distances must be result of dist or a square matrix.") msgid "Distances must be result of dist or a square matrix." msgstr "OdlegÅ‚oÅ›ci muszÄ… być wynikiem 'dist' lub macierzy kwadratowej." # Recommended/cluster/R/plotpart.q: 81 # stop("the square matrix is not symmetric.") msgid "the square matrix is not symmetric." msgstr "macierz kwadratowa nie jest symetryczna." # Recommended/cluster/R/plotpart.q: 94 # warning(">>>>> funny case in clusplot.default() -- please report!\n msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" ">>>>> zabawny przypadek w 'clusplot.default()' -- proszÄ™ zgÅ‚osić raport!" # Recommended/cluster/R/plotpart.q: 116 # stop("x is not a data matrix") msgid "x is not a data matrix" msgstr "'x' nie jest macierzÄ… danych" # Recommended/cluster/R/plotpart.q: 120 # stop("one or more objects contain only missing values") msgid "one or more objects contain only missing values" msgstr "jeden lub wiÄ™cej obiektów zawierajÄ… jedynie wartoÅ›ci brakujÄ…ce" # Recommended/cluster/R/plotpart.q: 122 # stop("one or more variables contain only missing values") msgid "one or more variables contain only missing values" msgstr "jeden lub wiÄ™cej zmiennych zawiera jedynie wartoÅ›ci brakujÄ…ce" # Recommended/cluster/R/plotpart.q: 125 # message("Missing values were displaced by the median of the corresponding variable(s)") msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "BrakujÄ…ce wartoÅ›ci zostaÅ‚y zastÄ…pione przez medianÄ™ odpowiednich zmiennych" # Recommended/cluster/R/plotpart.q: 165 # stop("x is not numeric") msgid "x is not numeric" msgstr "'x' nie jest liczbÄ…" # Recommended/cluster/R/plotpart.q: 175 # stop("The clustering vector is of incorrect length") msgid "The clustering vector is of incorrect length" msgstr "Wektor grupujÄ…cy posiada niepoprawnÄ… dÅ‚ugość" # Recommended/cluster/R/plotpart.q: 178 # stop("NA-values are not allowed in clustering vector") msgid "NA-values are not allowed in clustering vector" msgstr "wartoÅ›ci NA sÄ… niedozwolone w wektorze grupujÄ…cym" # Recommended/cluster/R/plotpart.q: 304 # warning("Error in Fortran routine for the spanning ellipsoid,", # "\n # ank problem??") msgid "Error in Fortran routine for the spanning ellipsoid," msgstr "Błąd w procedurze Fortran dla elipsoidy obejmujÄ…cej," # Recommended/cluster/R/plotpart.q: 304 # warning("Error in Fortran routine for the spanning ellipsoid,", # "\n # ank problem??") msgid "rank problem??" msgstr "problem rang??" # Recommended/cluster/R/plotpart.q: 357 # stop("'col.clus' should have length 4 when color is TRUE") msgid "'col.clus' should have length 4 when color is TRUE" msgstr "'col.clus' powinien mieć dÅ‚ugość 4, gdy 'color' ma wartość TRUE" # Recommended/cluster/R/plotpart.q: 512 # stop("no diss nor data found, nor the original argument of ", # deparse(x$call)) msgid "no diss nor data found, nor the original argument of" msgstr "nie znaleziono różnic ani danych, ani oryginalnego argumentu" # Recommended/cluster/R/plotpart.q: 519 # stop("no diss nor data found for clusplot()'") msgid "no diss nor data found for clusplot()'" msgstr "nie znaleziono różnic ani danych dla 'clusplot()'" # Recommended/cluster/R/silhouette.R: 7 # stop("invalid partition object") msgid "invalid partition object" msgstr "niepoprawny obiekt podziaÅ‚u" # Recommended/cluster/R/silhouette.R: 21 # stop("full silhouette is only available for results of", # " 'clara(*, keep.data = TRUE)'") msgid "full silhouette is only available for results of" msgstr "peÅ‚na sylwetka jest dostÄ™pna jedynie dla wyników" # Recommended/cluster/R/silhouette.R: 21 # stop("full silhouette is only available for results of", # " 'clara(*, keep.data = TRUE)'") msgid "'clara(*, keep.data = TRUE)'" msgstr "'clara(*, keep.data = TRUE)'" # Recommended/cluster/R/silhouette.R: 36 # stop("'x' must only have integer codes") # Recommended/cluster/R/silhouette.R: 83 # stop("'x' must only have integer codes") msgid "'x' must only have integer codes" msgstr "'x' musi posiadać tylko kody bÄ™dÄ…ce liczbami caÅ‚kowitymi" # Recommended/cluster/R/silhouette.R: 43 # stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") # Recommended/cluster/R/silhouette.R: 95 # stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "Potrzeba albo różnic 'dist' lub diss.matrix 'dmatrix'" # Recommended/cluster/R/silhouette.R: 45 # stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") # Recommended/cluster/R/silhouette.R: 97 # stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "'dmatrix' nie jest macierzÄ… różnic kompatybilnÄ… z 'x'" # Recommended/cluster/R/silhouette.R: 49 # stop("clustering 'x' and dissimilarity 'dist' are incompatible") # Recommended/cluster/R/silhouette.R: 101 # stop("clustering 'x' and dissimilarity 'dist' are incompatible") msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "grupowane 'x' oraz różnice 'dist' nie sÄ… kompatybilne" # Recommended/cluster/R/silhouette.R: 135 # stop("invalid silhouette structure") msgid "invalid silhouette structure" msgstr "niepoprana struktura 'silhouette'" # Recommended/cluster/R/silhouette.R: 159 # stop("invalid 'silhouette' object") msgid "invalid 'silhouette' object" msgstr "niepoprawny obiekt 'silhouette'" # Recommended/cluster/R/silhouette.R: 202 # stop("No valid silhouette information (#{clusters} =? 1)") msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "Brak poprawnej informacji o sylwetce (czy liczba grup =? 1)" # Recommended/cluster/R/daisy.q: 77 # warning("setting 'logical' variable",if(sum(ilog)>1)"s " else " ", # pColl(which(ilog)), " to type 'asymm'") #, fuzzy msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "ustawianie zmiennej 'logical'" msgstr[1] "ustawianie zmiennej 'logical'" msgstr[2] "ustawianie zmiennej 'logical'" # Recommended/cluster/R/clara.q: 91 # stop(ngettext(nNA, # sprintf("Observation %d has *only* NAs --> omit it for clustering", # i[1]), # ## nNA > 1 : # paste(if(nNA < 13) sprintf("Observations %s", pasteC(i)) # else sprintf("%d observations (%s ...)", nNA, pasteC(i[1:12])), # "\thave *only* NAs --> na.omit() them for clustering!", # sep = "\n # )) #~ msgid "Observation %d has *only* NAs --> omit it for clustering" #~ msgstr "" #~ "Obserwacja %d posiada *tylko* wartoÅ›ci NA --> pomijanie jej w grupowaniu" # Recommended/cluster/R/clara.q: 91 # stop(ngettext(nNA, # sprintf("Observation %d has *only* NAs --> omit it for clustering", # i[1]), # ## nNA > 1 : # paste(if(nNA < 13) sprintf("Observations %s", pasteC(i)) # else sprintf("%d observations (%s ...)", nNA, pasteC(i[1:12])), # "\thave *only* NAs --> na.omit() them for clustering!", # sep = "\n # )) #~ msgid "Observations %s" #~ msgstr "Obserwacje %s" # Recommended/cluster/R/clara.q: 91 # stop(ngettext(nNA, # sprintf("Observation %d has *only* NAs --> omit it for clustering", # i[1]), # ## nNA > 1 : # paste(if(nNA < 13) sprintf("Observations %s", pasteC(i)) # else sprintf("%d observations (%s ...)", nNA, pasteC(i[1:12])), # "\thave *only* NAs --> na.omit() them for clustering!", # sep = "\n # )) #~ msgid "%d observations (%s ...)" #~ msgstr "Obserwacje (liczba: %d; %s ...)" #~ msgid "have *only* NAs --> na.omit() them for clustering!" #~ msgstr "majÄ… *tylko* wartoÅ›ci NA --> pomijanie ich w grupowaniu" # Recommended/cluster/R/daisy.q: 77 # warning("setting 'logical' variable",if(sum(ilog)>1)"s " else " ", # pColl(which(ilog)), " to type 'asymm'") #~ msgid "s" #~ msgstr " " # Recommended/cluster/R/daisy.q: 77 # warning("setting 'logical' variable",if(sum(ilog)>1)"s " else " ", # pColl(which(ilog)), " to type 'asymm'") #~ msgid "to type 'asymm'" #~ msgstr "na typ 'asymm'" cluster/po/R-en@quot.po0000644000176000001440000002637412014772531014571 0ustar ripleyusers# All this catalog "translates" are quotation characters. # The msgids must be ASCII and therefore cannot contain real quotation # characters, only substitutes like grave accent (0x60), apostrophe (0x27) # and double quote (0x22). These substitutes look strange; see # http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html # # This catalog translates grave accent (0x60) and apostrophe (0x27) to # left single quotation mark (U+2018) and right single quotation mark (U+2019). # It also translates pairs of apostrophe (0x27) to # left single quotation mark (U+2018) and right single quotation mark (U+2019) # and pairs of quotation mark (0x22) to # left double quotation mark (U+201C) and right double quotation mark (U+201D). # # When output to an UTF-8 terminal, the quotation characters appear perfectly. # When output to an ISO-8859-1 terminal, the single quotation marks are # transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to # grave/acute accent (by libiconv), and the double quotation marks are # transliterated to 0x22. # When output to an ASCII terminal, the single quotation marks are # transliterated to apostrophes, and the double quotation marks are # transliterated to 0x22. # msgid "" msgstr "" "Project-Id-Version: R 2.15.1\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2012-08-21 22:49\n" "PO-Revision-Date: 2012-08-21 22:49\n" "Last-Translator: Automatically generated\n" "Language-Team: none\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Language: en\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" msgid "invalid clustering method" msgstr "invalid clustering method" msgid "ambiguous clustering method" msgstr "ambiguous clustering method" msgid "'par.method' must be of length 1, 3, or 4" msgstr "‘par.method’ must be of length 1, 3, or 4" msgid "NAdiss" msgstr "NAdiss" msgid "non.diss" msgstr "non.diss" msgid "x is not a numeric dataframe or matrix." msgstr "x is not a numeric dataframe or matrix." msgid "need at least 2 objects to cluster" msgstr "need at least 2 objects to cluster" msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "No clustering performed, NA-values in the dissimilarity matrix." msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "‘x’ is a \"dist\" object, but should be a data matrix or frame" msgid "The number of cluster should be at least 1 and at most n-1." msgstr "The number of cluster should be at least 1 and at most n-1." msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "‘sampsize’ should be at least %d = max(2, 1+ number of clusters)" msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "‘sampsize’ = %d should not be larger than the number of objects, %d" msgid "'samples' should be at least 1" msgstr "‘samples’ should be at least 1" msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "when ‘medoids.x’ is FALSE, ‘keep.data’ must be too" msgid "Each of the random samples contains objects between which" msgstr "Each of the random samples contains objects between which" msgid "no distance can be computed." msgstr "no distance can be computed." msgid "For each of the" msgstr "For each of the" msgid "" "samples, at least one object was found which\n" " could not" msgstr "" "samples, at least one object was found which\n" " could not" msgid "be assigned to a cluster (because of missing values)." msgstr "be assigned to a cluster (because of missing values)." msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "invalid ‘jstop’ from .C(cl_clara,.):" msgid "'B' has to be a positive integer" msgstr "‘B’ has to be a positive integer" msgid "invalid 'twins' object" msgstr "invalid ‘twins’ object" msgid "x is not a dataframe or a numeric matrix." msgstr "x is not a dataframe or a numeric matrix." msgid "invalid" msgstr "invalid" msgid "type" msgstr "type" msgid "; must be named list" msgstr "; must be named list" msgid "type$" msgstr "type$" msgid "has invalid column names" msgstr "has invalid column names" msgid "must be in 1:ncol(x)" msgstr "must be in 1:ncol(x)" msgid "must contain column names or numbers" msgstr "must contain column names or numbers" msgid "at least one binary variable has more than 2 levels." msgstr "at least one binary variable has more than 2 levels." msgid "at least one binary variable has not 2 different levels." msgstr "at least one binary variable has not 2 different levels." msgid "at least one binary variable has values not in {0,1,NA}" msgstr "at least one binary variable has values not in {0,1,NA}" msgid "binary variable(s)" msgstr "binary variable(s)" msgid "treated as interval scaled" msgstr "treated as interval scaled" msgid "x" msgstr "x" msgid "has constant columns" msgstr "has constant columns" msgid "; these are standardized to 0" msgstr "; these are standardized to 0" msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "with mixed variables, metric \"gower\" is used automatically" msgid "'weights' must be of length p (or 1)" msgstr "‘weights’ must be of length p (or 1)" msgid "invalid type" msgstr "invalid type" msgid "for column numbers" msgstr "for column numbers" msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "No clustering performed, NA's in dissimilarity matrix." msgid "'x' must be numeric n x p matrix" msgstr "‘x’ must be numeric n x p matrix" msgid "omitting NAs" msgstr "omitting NAs" msgid "no points without missing values" msgstr "no points without missing values" msgid "computed some negative or all 0 'prob'abilities" msgstr "computed some negative or all 0 'prob'abilities" msgid "possibly not converged in" msgstr "possibly not converged in" msgid "iterations" msgstr "iterations" msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "‘A’ must be p x p cov-matrix defining an ellipsoid" msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "ellipsoidPoints() not yet implemented for p >= 3 dim." msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "‘k’ (number of clusters) must be in {1,2, .., n/2 -1}" msgid "'memb.exp' must be a finite number > 1" msgstr "‘memb.exp’ must be a finite number > 1" msgid "'maxit' must be non-negative integer" msgstr "‘maxit’ must be non-negative integer" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "‘iniMem.p’ must be a nonnegative n * k matrix with rowSums == 1" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "FANNY algorithm has not converged in ‘maxit’ = %d iterations" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "the memberships are all very close to 1/k. Maybe decrease ‘memb.exp’ ?" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "'m', a membership matrix, must be nonnegative with rowSums == 1" msgid "'n' must be >= 2" msgstr "‘n’ must be >= 2" msgid "x must be a matrix or data frame." msgstr "x must be a matrix or data frame." msgid "All variables must be binary (factor with 2 levels)." msgstr "All variables must be binary (factor with 2 levels)." msgid "No clustering performed," msgstr "No clustering performed," msgid "an object was found with all values missing." msgstr "an object was found with all values missing." msgid "a variable was found with at least 50% missing values." msgstr "a variable was found with at least 50% missing values." msgid "a variable was found with all non missing values identical." msgstr "a variable was found with all non missing values identical." msgid "all variables have at least one missing value." msgstr "all variables have at least one missing value." msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "Number of clusters ‘k’ must be in {1,2, .., n-1}; hence n >= 2" msgid "'medoids' must be NULL or vector of" msgstr "‘medoids’ must be NULL or vector of" msgid "distinct indices in {1,2, .., n}, n=" msgstr "distinct indices in {1,2, .., n}, n=" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "No clustering performed, NAs in the computed dissimilarity matrix." msgid "error from .C(cl_pam, *): invalid medID's" msgstr "error from .C(cl_pam, *): invalid medID's" msgid "NA-values are not allowed in dist-like 'x'." msgstr "NA-values are not allowed in dist-like ‘x’." msgid "Distances must be result of dist or a square matrix." msgstr "Distances must be result of dist or a square matrix." msgid "the square matrix is not symmetric." msgstr "the square matrix is not symmetric." msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr ">>>>> funny case in clusplot.default() -- please report!" msgid "x is not a data matrix" msgstr "x is not a data matrix" msgid "one or more objects contain only missing values" msgstr "one or more objects contain only missing values" msgid "one or more variables contain only missing values" msgstr "one or more variables contain only missing values" msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "Missing values were displaced by the median of the corresponding variable(s)" msgid "x is not numeric" msgstr "x is not numeric" msgid "The clustering vector is of incorrect length" msgstr "The clustering vector is of incorrect length" msgid "NA-values are not allowed in clustering vector" msgstr "NA-values are not allowed in clustering vector" msgid "Error in Fortran routine for the spanning ellipsoid," msgstr "Error in Fortran routine for the spanning ellipsoid," msgid "rank problem??" msgstr "rank problem??" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "‘col.clus’ should have length 4 when color is TRUE" msgid "no diss nor data found, nor the original argument of" msgstr "no diss nor data found, nor the original argument of" msgid "no diss nor data found for clusplot()'" msgstr "no diss nor data found for clusplot()'" msgid "invalid partition object" msgstr "invalid partition object" msgid "full silhouette is only available for results of" msgstr "full silhouette is only available for results of" msgid "'clara(*, keep.data = TRUE)'" msgstr "'clara(*, keep.data = TRUE)'" msgid "'x' must only have integer codes" msgstr "‘x’ must only have integer codes" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "Need either a dissimilarity ‘dist’ or diss.matrix ‘dmatrix’" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "‘dmatrix’ is not a dissimilarity matrix compatible to ‘x’" msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "clustering ‘x’ and dissimilarity ‘dist’ are incompatible" msgid "invalid silhouette structure" msgstr "invalid silhouette structure" msgid "invalid 'silhouette' object" msgstr "invalid ‘silhouette’ object" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "No valid silhouette information (#{clusters} =? 1)" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "setting ‘logical’ variable %s to type ‘asymm’" msgstr[1] "setting ‘logical’ variables %s to type ‘asymm’" cluster/po/R-de.po0000644000176000001440000002636412122115474013542 0ustar ripleyusers# Translation of src/library/Recommended/cluster/po/R-cluster.pot to German # Copyright (C) 2013 The R Foundation # This file is distributed under the same license as the R package. # Detlef Steuer , 2013. msgid "" msgstr "" "Project-Id-Version: R 3.0.0\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2012-08-21 22:49\n" "PO-Revision-Date: 2013-03-18 10:39+0100\n" "Last-Translator: Detlef Steuer \n" "Language-Team: R Core Team = 3 dim." msgstr "ellipsoidPoints() noch nicht für Dimensionen p>=3 implementiert" msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "'k' (Anzahl Cluster) muss aus {1, 2, ..., n/2 -1} sein" msgid "'memb.exp' must be a finite number > 1" msgstr "'memb.exp' muss endliche Zahl > 1 sein" msgid "'maxit' must be non-negative integer" msgstr "'maxit' muss nicht-negative Zahl sein" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "" "'iniMem.p' muss eine nicht-negative n x k Matrix mit Zeilensummen == 1 sein" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "FANNY Algorithmus ist in 'maxit' = %d Iterationen nicht konvergiert" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "" "die Mitgliedswerte sind alle sehr nah an 1/k. Evtl. 'memb.exp' reduzieren?" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "" "'m' ist eine Mitgleidswertmatrix, muss nichtnegative sein mit Zeilensummen " "== 1" msgid "'n' must be >= 2" msgstr "'n' muss >= 2 sein" msgid "x must be a matrix or data frame." msgstr "x muss eine Matrix oder Datafram sein" msgid "All variables must be binary (factor with 2 levels)." msgstr "Alle Variablen müssen binär sein (Faktor mit 2 Stufen)" msgid "No clustering performed," msgstr "Clustering nicht durchgeführt," msgid "an object was found with all values missing." msgstr "Objekt gefunden bei dem alle Werte fehlend sind" msgid "a variable was found with at least 50% missing values." msgstr "Variable mit mindestens 50% fehlenden Werten gefunden." msgid "a variable was found with all non missing values identical." msgstr "Variable gefunden, bei der alle nicht fehlenden Werten identisch sind." msgid "all variables have at least one missing value." msgstr "Alle Variablen haben mindestens einen fehlenden Wert." msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "Anzahl der Cluster 'k' muss auch {1, 2, ..., n-1} sein; deshalb n >= 2" msgid "'medoids' must be NULL or vector of" msgstr "'medoids' muss NULL sein oder ein Vektor von" msgid "distinct indices in {1,2, .., n}, n=" msgstr "verschiedenen Indizes aus {1, 2,..., n}, n=" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "" "Keine Clusterung durchgeführt, NAs in der berechnenten Unähnlichkeitsmatrix." msgid "error from .C(cl_pam, *): invalid medID's" msgstr "Fehler aus .C(cl_pam, *): unzulässige medID's" msgid "NA-values are not allowed in dist-like 'x'." msgstr "NAs nicht erlaubt in dist-ähnlichem 'x'." msgid "Distances must be result of dist or a square matrix." msgstr "" "Distanzen müssen ein Ergebnis von dist oder eine quadratische Matrix sein." msgid "the square matrix is not symmetric." msgstr "Die quadratische Matrix ist nicht symmetrisch." msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" ">>>>> lustige Sache in clusplot.default() -- bitte an den Entwickler senden!" msgid "x is not a data matrix" msgstr "x ist keine Datenmatrix" msgid "one or more objects contain only missing values" msgstr "eins oder mehrere Objekte enthalten nur fehlende Werte" msgid "one or more variables contain only missing values" msgstr "eine oder mehrere Variablen enthalten nur fehlende Werte" msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "Fehlende Werte wurden durch den Median der korrespondierenden Variable(n) " "ersetzt" msgid "x is not numeric" msgstr "x ist nicht numerisch" msgid "The clustering vector is of incorrect length" msgstr "Der Clustervektor hat eine falsche Länge" msgid "NA-values are not allowed in clustering vector" msgstr "NAs im Clustervektor nicht erlaubt" msgid "Error in Fortran routine for the spanning ellipsoid," msgstr "Fehler im Fortran-Kode für den aufspannenden Ellipsoiden," msgid "rank problem??" msgstr "evtl. Probleme mit dem Rang?" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "'col.clus' sollte Länge 4 haben, wenn color auf TRUE gesetzt ist" msgid "no diss nor data found, nor the original argument of" msgstr "" "weder diss noch data gefunden, ebensowenig das ursprüngliche Argument von" msgid "no diss nor data found for clusplot()'" msgstr "weder diss noch data für 'clusplot()' gefunden" msgid "invalid partition object" msgstr "unzulässiges Partitionsobjekt" msgid "full silhouette is only available for results of" msgstr "die volle Silhoutte ist nur verfügbar für Resultate von" msgid "'clara(*, keep.data = TRUE)'" msgstr "'clara(*, keep.data = TRUE)'" msgid "'x' must only have integer codes" msgstr "'x' darf nur ganzahlige Kodes enthalten" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "" "Benötige entweder Unähnlichkeitsmatrix 'dist' oder diss.matrix 'dmatrix'" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "'dmatrix' ist keine zu 'x' kompatible Unähnlichkeitsmatrix " msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "Clusterung 'x' und Unähnlichkeitsmatrix 'dist' sind inkompatibel" msgid "invalid silhouette structure" msgstr "unzulässige Silhouttenstruktur" msgid "invalid 'silhouette' object" msgstr "unzulässiges 'silhouette' Objekt" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "keine gültige Silhouetteninformation (#{clusters} =? 1)" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "setze 'logical' Variable %s auf Typ 'asymm'" msgstr[1] "setze 'logical' Variablen %s auf Typ 'asymm'" #~ msgid "Observation %d has *only* NAs --> omit it for clustering" #~ msgstr "Beobachtung %d hat *nur* NAs --> ausgelassen für Clustering" #~ msgid "Observations %s" #~ msgstr "Beobachtungen %s" #~ msgid "%d observations (%s ...)" #~ msgstr "%d Beobachtungen (%s ...)" #~ msgid "have *only* NAs --> na.omit() them for clustering!" #~ msgstr "haben *nur* NAs --> na.omit() diese für das Clustern" #~ msgid "s" #~ msgstr "n" #~ msgid "to type 'asymm'" #~ msgstr "auf Typ 'asymm'" cluster/po/R-cluster.pot0000644000176000001440000001415412014772531015014 0ustar ripleyusersmsgid "" msgstr "" "Project-Id-Version: R 2.15.1\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2012-08-21 22:49\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=CHARSET\n" "Content-Transfer-Encoding: 8bit\n" msgid "invalid clustering method" msgstr "" msgid "ambiguous clustering method" msgstr "" msgid "'par.method' must be of length 1, 3, or 4" msgstr "" msgid "NAdiss" msgstr "" msgid "non.diss" msgstr "" msgid "x is not a numeric dataframe or matrix." msgstr "" msgid "need at least 2 objects to cluster" msgstr "" msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "" msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "" msgid "The number of cluster should be at least 1 and at most n-1." msgstr "" msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "" msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "" msgid "'samples' should be at least 1" msgstr "" msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "" msgid "Each of the random samples contains objects between which" msgstr "" msgid "no distance can be computed." msgstr "" msgid "For each of the" msgstr "" msgid "samples, at least one object was found which\n could not" msgstr "" msgid "be assigned to a cluster (because of missing values)." msgstr "" msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "" msgid "'B' has to be a positive integer" msgstr "" msgid "invalid 'twins' object" msgstr "" msgid "x is not a dataframe or a numeric matrix." msgstr "" msgid "invalid" msgstr "" msgid "type" msgstr "" msgid "; must be named list" msgstr "" msgid "type$" msgstr "" msgid "has invalid column names" msgstr "" msgid "must be in 1:ncol(x)" msgstr "" msgid "must contain column names or numbers" msgstr "" msgid "at least one binary variable has more than 2 levels." msgstr "" msgid "at least one binary variable has not 2 different levels." msgstr "" msgid "at least one binary variable has values not in {0,1,NA}" msgstr "" msgid "binary variable(s)" msgstr "" msgid "treated as interval scaled" msgstr "" msgid "x" msgstr "" msgid "has constant columns" msgstr "" msgid "; these are standardized to 0" msgstr "" msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "" msgid "'weights' must be of length p (or 1)" msgstr "" msgid "invalid type" msgstr "" msgid "for column numbers" msgstr "" msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "" msgid "'x' must be numeric n x p matrix" msgstr "" msgid "omitting NAs" msgstr "" msgid "no points without missing values" msgstr "" msgid "computed some negative or all 0 'prob'abilities" msgstr "" msgid "possibly not converged in" msgstr "" msgid "iterations" msgstr "" msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "" msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "" msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "" msgid "'memb.exp' must be a finite number > 1" msgstr "" msgid "'maxit' must be non-negative integer" msgstr "" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "" msgid "'n' must be >= 2" msgstr "" msgid "x must be a matrix or data frame." msgstr "" msgid "All variables must be binary (factor with 2 levels)." msgstr "" msgid "No clustering performed," msgstr "" msgid "an object was found with all values missing." msgstr "" msgid "a variable was found with at least 50% missing values." msgstr "" msgid "a variable was found with all non missing values identical." msgstr "" msgid "all variables have at least one missing value." msgstr "" msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "" msgid "'medoids' must be NULL or vector of" msgstr "" msgid "distinct indices in {1,2, .., n}, n=" msgstr "" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "" msgid "error from .C(cl_pam, *): invalid medID's" msgstr "" msgid "NA-values are not allowed in dist-like 'x'." msgstr "" msgid "Distances must be result of dist or a square matrix." msgstr "" msgid "the square matrix is not symmetric." msgstr "" msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" msgid "x is not a data matrix" msgstr "" msgid "one or more objects contain only missing values" msgstr "" msgid "one or more variables contain only missing values" msgstr "" msgid "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" msgid "x is not numeric" msgstr "" msgid "The clustering vector is of incorrect length" msgstr "" msgid "NA-values are not allowed in clustering vector" msgstr "" msgid "Error in Fortran routine for the spanning ellipsoid," msgstr "" msgid "rank problem??" msgstr "" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "" msgid "no diss nor data found, nor the original argument of" msgstr "" msgid "no diss nor data found for clusplot()'" msgstr "" msgid "invalid partition object" msgstr "" msgid "full silhouette is only available for results of" msgstr "" msgid "'clara(*, keep.data = TRUE)'" msgstr "" msgid "'x' must only have integer codes" msgstr "" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "" msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "" msgid "invalid silhouette structure" msgstr "" msgid "invalid 'silhouette' object" msgstr "" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "" msgstr[1] "" cluster/man/0000755000176000001440000000000012124335263012537 5ustar ripleyuserscluster/man/xclara.Rd0000644000176000001440000000120711711740461014301 0ustar ripleyusers\name{xclara} \alias{xclara} \title{Bivariate Data Set with 3 Clusters} \description{ An artificial data set consisting of 3000 points in 3 well-separated clusters of size 1000 each. } \usage{data(xclara)} \format{ A data frame with 3000 observations on 2 numeric variables giving the \eqn{x} and \eqn{y} coordinates of the points, respectively. } \source{ Sample data set accompanying the reference below. } \references{ Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996) Clustering in an Object-Oriented Environment. \emph{Journal of Statistical Software} \bold{1}. \url{http://www.jstatsoft.org/v01/i04} } \keyword{datasets} cluster/man/votes.repub.Rd0000644000176000001440000000113407351402102015272 0ustar ripleyusers\name{votes.repub} \alias{votes.repub} \title{Votes for Republican Candidate in Presidential Elections} \usage{data(votes.repub)} \description{ A data frame with the percents of votes given to the republican candidate in presidential elections from 1856 to 1976. Rows represent the 50 states, and columns the 31 elections. } \source{ S. Peterson (1973): \emph{A Statistical History of the American Presidential Elections}. New York: Frederick Ungar Publishing Co. Data from 1964 to 1976 is from R. M. Scammon, \emph{American Votes 12}, Congressional Quarterly. } \keyword{datasets} cluster/man/volume.ellipsoid.Rd0000644000176000001440000000160710371143174016324 0ustar ripleyusers\name{volume.ellipsoid} \alias{volume} \alias{volume.ellipsoid} \title{Compute the Volume of Planar Object} \description{ Compute the volume of a planar object. This is a generic function and a method for \code{ellipsoid} objects. } \usage{ \method{volume}{ellipsoid}(object) } \arguments{ \item{object}{an \R object the volume of which is wanted; for the \code{ellipsoid} method, an object of that class (see \code{\link{ellipsoidhull}} or the example below).} } \value{ a number, the volume of the given \code{object}. } \seealso{\code{\link{ellipsoidhull}} for spanning ellipsoid computation.} \examples{ ## example(ellipsoidhull) # which defines `ellipsoid' object myEl <- structure(list(cov = rbind(c(3,1),1:2), loc = c(0,0), d2 = 10), class = "ellipsoid") volume(myEl)# i.e. "area" here (d = 2) myEl # also mentions the "volume" } \keyword{utilities} cluster/man/twins.object.Rd0000644000176000001440000000134310370161217015435 0ustar ripleyusers\name{twins.object} \alias{twins.object} \alias{twins}% == class \title{Hierarchical Clustering Object} \description{ The objects of class \code{"twins"} represent an agglomerative or divisive (polythetic) hierarchical clustering of a dataset. } \section{GENERATION}{ This class of objects is returned from \code{agnes} or \code{diana}. } \section{METHODS}{ The \code{"twins"} class has a method for the following generic function: \code{pltree}. } \section{INHERITANCE}{ The following classes inherit from class \code{"twins"} : \code{"agnes"} and \code{"diana"}. } \value{ See \code{\link{agnes.object}} and \code{\link{diana.object}} for details. } \seealso{\code{\link{agnes}},\code{\link{diana}}. } \keyword{cluster} cluster/man/summary.pam.Rd0000644000176000001440000000104710370161217015276 0ustar ripleyusers\name{summary.pam} \alias{summary.pam} \alias{print.summary.pam} \title{Summary Method for PAM Objects} \description{Summarize a \code{\link{pam}} object and return an object of class \code{summary.pam}. There's a \code{\link{print}} method for the latter. } \usage{ \method{summary}{pam}(object, \dots) \method{print}{summary.pam}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{pam}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{pam}}, \code{\link{pam.object}}. } \keyword{cluster} cluster/man/summary.mona.Rd0000644000176000001440000000075510370161217015460 0ustar ripleyusers\name{summary.mona} \alias{summary.mona} \alias{print.summary.mona} \title{Summary Method for `mona' Objects} \description{Returns (and prints) a summary list for a \code{mona} object.} \usage{ \method{summary}{mona}(object, \dots) \method{print}{summary.mona}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{mona}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{mona}}, \code{\link{mona.object}}.} \keyword{cluster} \keyword{print} cluster/man/summary.diana.Rd0000644000176000001440000000077010370161217015577 0ustar ripleyusers\name{summary.diana} \alias{summary.diana} \alias{print.summary.diana} \title{Summary Method for `diana' Objects} \description{Returns (and prints) a summary list for a \code{diana} object.} \usage{ \method{summary}{diana}(object, \dots) \method{print}{summary.diana}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{diana}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{diana}}, \code{\link{diana.object}}.} \keyword{cluster} \keyword{print} cluster/man/summary.clara.Rd0000644000176000001440000000241010370161217015576 0ustar ripleyusers\name{summary.clara} \alias{summary.clara} \alias{print.summary.clara} \title{Summary Method for `clara' Objects} \description{ Returns (and prints) a summary list for a \code{clara} object. Printing gives more output than the corresponding \code{\link{print.clara}} method. } \usage{ \method{summary}{clara}(object, \dots) \method{print}{summary.clara}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{clara}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{clara.object}}} \examples{ ## generate 2000 objects, divided into 5 clusters. set.seed(47) x <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)), cbind(rnorm(400,10,8), rnorm(400,40,6)), cbind(rnorm(400,30,4), rnorm(400, 0,4)), cbind(rnorm(400,40,4), rnorm(400,20,2)), cbind(rnorm(400,50,4), rnorm(400,50,4)) ) clx5 <- clara(x, 5) ## Mis`classification' table: % R version >= 1.5 : % table(rep(1:5, each = 400), clx5$clust) # -> 1 "error" table(rep(1:5, rep(400,5)), clx5$clust) # -> 1 "error" summary(clx5) ## Graphically: par(mfrow = c(3,1), mgp = c(1.5, 0.6, 0), mar = par("mar") - c(0,0,2,0)) %>1.5: plot(x, col = rep(2:6, each = 400)) plot(x, col = rep(2:6, rep(400,5))) plot(clx5) } \keyword{cluster} \keyword{print} cluster/man/summary.agnes.Rd0000644000176000001440000000122010370161217015607 0ustar ripleyusers\name{summary.agnes} \alias{summary.agnes} \alias{print.summary.agnes} \title{Summary Method for `agnes' Objects} \description{ Returns (and prints) a summary list for an \code{agnes} object. Printing gives more output than the corresponding \code{\link{print.agnes}} method. } \usage{ \method{summary}{agnes}(object, \dots) \method{print}{summary.agnes}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{agnes}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{agnes}}, \code{\link{agnes.object}}.} \examples{ data(agriculture) summary(agnes(agriculture)) } \keyword{cluster} \keyword{print} cluster/man/sizeDiss.Rd0000644000176000001440000000171010224226467014626 0ustar ripleyusers\name{sizeDiss} \alias{sizeDiss} \title{Sample Size of Dissimilarity Like Object} \description{ Returns the number of observations (\emph{sample size}) corresponding to a dissimilarity like object, or equivalently, the number of rows or columns of a matrix when only the lower or upper triangular part (without diagonal) is given. It is nothing else but the inverse function of \eqn{f(n) = n(n-1)/2}. } \usage{ sizeDiss(d) } \arguments{ \item{d}{any \R object with length (typically) \eqn{n(n-1)/2}.} } \value{ a number; \eqn{n} if \code{length(d) == n(n-1)/2}, \code{NA} otherwise. } \seealso{\code{\link{dissimilarity.object}} and also \code{\link{as.dist}} for class \code{dissimilarity} and \code{dist} objects which have a \code{Size} attribute.} \examples{ sizeDiss(1:10)# 5, since 10 == 5 * (5 - 1) / 2 sizeDiss(1:9) # NA n <- 1:100 stopifnot(n == sapply( n*(n-1)/2, function(n) sizeDiss(logical(n)))) } \keyword{utilities} \keyword{arith} cluster/man/silhouette.Rd0000644000176000001440000002110711626747420015224 0ustar ripleyusers\name{silhouette} \alias{silhouette} \alias{silhouette.clara} \alias{silhouette.default} \alias{silhouette.partition} \alias{sortSilhouette} \alias{summary.silhouette} \alias{print.summary.silhouette} \alias{plot.silhouette} \title{Compute or Extract Silhouette Information from Clustering} \description{ Compute silhouette information according to a given clustering in \eqn{k} clusters. } \usage{ silhouette(x, \dots) \method{silhouette}{default} (x, dist, dmatrix, \dots) \method{silhouette}{partition}(x, \dots) \method{silhouette}{clara}(x, full = FALSE, \dots) sortSilhouette(object, \dots) \method{summary}{silhouette}(object, FUN = mean, \dots) \method{plot}{silhouette}(x, nmax.lab = 40, max.strlen = 5, main = NULL, sub = NULL, xlab = expression("Silhouette width "* s[i]), col = "gray", do.col.sort = length(col) > 1, border = 0, cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, \dots) } \arguments{ \item{x}{an object of appropriate class; for the \code{default} method an integer vector with \eqn{k} different integer cluster codes or a list with such an \code{x$clustering} component. Note that silhouette statistics are only defined if \eqn{2 \le k \le n-1}{2 <= k <= n-1}.} \item{dist}{a dissimilarity object inheriting from class \code{\link{dist}} or coercible to one. If not specified, \code{dmatrix} must be.} \item{dmatrix}{a symmetric dissimilarity matrix (\eqn{n \times n}{n * n}), specified instead of \code{dist}, which can be more efficient.} \item{full}{logical specifying if a \emph{full} silhouette should be computed for \code{\link{clara}} object. Note that this requires \eqn{O(n^2)} memory, since the full dissimilarity (see \code{\link{daisy}}) is needed internally.} \item{object}{an object of class \code{silhouette}.} \item{\dots}{further arguments passed to and from methods.} \item{FUN}{function used to summarize silhouette widths.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labeling the silhouette plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in silhouette plot labeling.} \item{main, sub, xlab}{arguments to \code{\link{title}}; have a sensible non-NULL default here.} \item{col, border, cex.names}{arguments passed \code{\link{barplot}()}; note that the default used to be \code{col = heat.colors(n), border = par("fg")} instead.\cr \code{col} can also be a color vector of length \eqn{k} for clusterwise coloring, see also \code{do.col.sort}: } \item{do.col.sort}{logical indicating if the colors \code{col} should be sorted \dQuote{along} the silhouette; this is useful for casewise or clusterwise coloring.} \item{do.n.k}{logical indicating if \eqn{n} and \eqn{k} \dQuote{title text} should be written.} \item{do.clus.stat}{logical indicating if cluster size and averages should be written right to the silhouettes.} } \details{ For each observation i, the \emph{silhouette width} \eqn{s(i)} is defined as follows: \cr Put a(i) = average dissimilarity between i and all other points of the cluster to which i belongs (if i is the \emph{only} observation in its cluster, \eqn{s(i) := 0} without further calculations). For all \emph{other} clusters C, put \eqn{d(i,C)} = average dissimilarity of i to all observations of C. The smallest of these \eqn{d(i,C)} is \eqn{b(i) := \min_C d(i,C)}, and can be seen as the dissimilarity between i and its \dQuote{neighbor} cluster, i.e., the nearest one to which it does \emph{not} belong. Finally, \deqn{s(i) := \frac{b(i) - a(i) }{max(a(i), b(i))}.}{% s(i) := ( b(i) - a(i) ) / max( a(i), b(i) ).} \code{silhouette.default()} is now based on C code donated by Romain Francois (the R version being still available as \code{cluster:::silhouette.default.R}). Observations with a large \eqn{s(i)} (almost 1) are very well clustered, a small \eqn{s(i)} (around 0) means that the observation lies between two clusters, and observations with a negative \eqn{s(i)} are probably placed in the wrong cluster. } \note{ While \code{silhouette()} is \emph{intrinsic} to the \code{\link{partition}} clusterings, and hence has a (trivial) method for these, it is straightforward to get silhouettes from hierarchical clusterings from \code{silhouette.default()} with \code{\link{cutree}()} and distance as input. By default, for \code{\link{clara}()} partitions, the silhouette is just for the best random \emph{subset} used. Use \code{full = TRUE} to compute (and later possibly plot) the full silhouette. } \value{ \code{silhouette()} returns an object, \code{sil}, of class \code{silhouette} which is an [n x 3] matrix with attributes. For each observation i, \code{sil[i,]} contains the cluster to which i belongs as well as the neighbor cluster of i (the cluster, not containing i, for which the average dissimilarity between its observations and i is minimal), and the silhouette width \eqn{s(i)} of the observation. The \code{\link{colnames}} correspondingly are \code{c("cluster", "neighbor", "sil_width")}. \code{summary(sil)} returns an object of class \code{summary.silhouette}, a list with components %%Rd bug: fails inside \value{}!:\describe{ \item{si.summary}{numerical \code{\link{summary}} of the individual silhouette widths \eqn{s(i)}.} \item{clus.avg.widths}{numeric (rank 1) array of clusterwise \emph{means} of silhouette widths where \code{mean = FUN} is used.} \item{avg.width}{the total mean \code{FUN(s)} where \code{s} are the individual silhouette widths.} \item{clus.sizes}{\code{\link{table}} of the \eqn{k} cluster sizes.} \item{call}{if available, the call creating \code{sil}.} \item{Ordered}{logical identical to \code{attr(sil, "Ordered")}, see below.} %%Rd bug: } \code{sortSilhouette(sil)} orders the rows of \code{sil} as in the silhouette plot, by cluster (increasingly) and decreasing silhouette width \eqn{s(i)}. \cr \code{attr(sil, "Ordered")} is a logical indicating if \code{sil} \emph{is} ordered as by \code{sortSilhouette()}. In that case, \code{rownames(sil)} will contain case labels or numbers, and \cr \code{attr(sil, "iOrd")} the ordering index vector. } \references{ Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65. chapter 2 of Kaufman, L. and Rousseeuw, P.J. (1990), see the references in \code{\link{plot.agnes}}. } \seealso{\code{\link{partition.object}}, \code{\link{plot.partition}}. } \examples{ data(ruspini) pr4 <- pam(ruspini, 4) str(si <- silhouette(pr4)) (ssi <- summary(si)) plot(si) # silhouette plot plot(si, col = c("red", "green", "blue", "purple"))# with cluster-wise coloring si2 <- silhouette(pr4$clustering, dist(ruspini, "canberra")) summary(si2) # has small values: "canberra"'s fault plot(si2, nmax= 80, cex.names=0.6) op <- par(mfrow= c(3,2), oma= c(0,0, 3, 0), mgp= c(1.6,.8,0), mar= .1+c(4,2,2,2)) for(k in 2:6) plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE) mtext("PAM(Ruspini) as in Kaufman & Rousseeuw, p.101", outer = TRUE, font = par("font.main"), cex = par("cex.main")); frame() ## the same with cluster-wise colours: c6 <- c("tomato", "forest green", "dark blue", "purple2", "goldenrod4", "gray20") for(k in 2:6) plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE, col = c6[1:k]) par(op) ## clara(): standard silhouette is just for the best random subset data(xclara) set.seed(7) str(xc1k <- xclara[sample(nrow(xclara), size = 1000) ,]) cl3 <- clara(xc1k, 3) plot(silhouette(cl3))# only of the "best" subset of 46 ## The full silhouette: internally needs large (36 MB) dist object: sf <- silhouette(cl3, full = TRUE) ## this is the same as s.full <- silhouette(cl3$clustering, daisy(xc1k)) if(paste(R.version$major, R.version$minor, sep=".") >= "2.3.0") stopifnot(all.equal(sf, s.full, check.attributes = FALSE, tol = 0)) ## color dependent on original "3 groups of each 1000": plot(sf, col = 2+ as.integer(names(cl3$clustering) ) \%/\% 1000, main ="plot(silhouette(clara(.), full = TRUE))") ## Silhouette for a hierarchical clustering: ar <- agnes(ruspini) si3 <- silhouette(cutree(ar, k = 5), # k = 4 gave the same as pam() above daisy(ruspini)) plot(si3, nmax = 80, cex.names = 0.5) ## 2 groups: Agnes() wasn't too good: si4 <- silhouette(cutree(ar, k = 2), daisy(ruspini)) plot(si4, nmax = 80, cex.names = 0.5) } \keyword{cluster} cluster/man/ruspini.Rd0000644000176000001440000000136311711740461014523 0ustar ripleyusers\name{ruspini} \alias{ruspini} \title{Ruspini Data} \usage{data(ruspini)} \description{ The Ruspini data set, consisting of 75 points in four groups that is popular for illustrating clustering techniques. } \format{ A data frame with 75 observations on 2 variables giving the x and y coordinates of the points, respectively. } \source{ E. H. Ruspini (1970) Numerical methods for fuzzy clustering. \emph{Inform. Sci.} \bold{2}, 319--350. } \references{ see those in \code{\link{agnes}}. } \examples{ data(ruspini) ## Plot similar to Figure 4 in Stryuf et al (1996) \dontrun{plot(pam(ruspini, 4), ask = TRUE)} \dontshow{plot(pam(ruspini, 4))} ## Plot similar to Figure 6 in Stryuf et al (1996) plot(fanny(ruspini, 5)) } \keyword{datasets} cluster/man/print.pam.Rd0000644000176000001440000000110410073061404014724 0ustar ripleyusers\name{print.pam} \alias{print.pam} \title{Print Method for PAM Objects} \description{ Prints the medoids, clustering vector and objective function of \code{pam} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{pam}}. } \usage{ \method{print}{pam}(x, \dots) } \arguments{ \item{x}{a pam object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/print.mona.Rd0000644000176000001440000000112707351402102015105 0ustar ripleyusers\name{print.mona} \alias{print.mona} \title{Print Method for MONA Objects} \description{ Prints the ordering of objects, separation steps, and used variables of a \code{mona} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{mona}}. } \usage{ \method{print}{mona}(x, \dots) } \arguments{ \item{x}{a mona object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/print.fanny.Rd0000644000176000001440000000165010247645540015304 0ustar ripleyusers\name{print.fanny} \alias{print.fanny} \alias{summary.fanny} \alias{print.summary.fanny} \title{Print and Summary Methods for FANNY Objects} \description{ Prints the objective function, membership coefficients and clustering vector of \code{fanny} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{fanny}}. } \usage{ \method{print}{fanny}(x, digits = getOption("digits"), \dots) \method{summary}{fanny}(object, \dots) \method{print}{summary.fanny}(x, digits = getOption("digits"), \dots) } \arguments{ \item{x, object}{a \code{\link{fanny}} object.} \item{digits}{number of significant digits for printing, see \code{\link{print.default}}.} \item{\dots}{potential further arguments (required by generic).} } \seealso{ \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/print.dissimilarity.Rd0000644000176000001440000000303210224260017017036 0ustar ripleyusers\title{Print and Summary Methods for Dissimilarity Objects} \name{print.dissimilarity} \alias{print.dissimilarity} \alias{summary.dissimilarity} \alias{print.summary.dissimilarity} \description{ Print or summarize the distances and the attributes of a \code{dissimilarity} object. These are methods for the functions \code{print()} and \code{summary()} for \code{dissimilarity} objects. See \code{print}, \code{print.default}, or \code{summary} for the general behavior of these. } \usage{ \method{print}{dissimilarity}(x, diag = NULL, upper = NULL, digits = getOption("digits"), justify = "none", right = TRUE, \dots) \method{summary}{dissimilarity}(object, digits = max(3, getOption("digits") - 2), \dots) \method{print}{summary.dissimilarity}(x, \dots) } \arguments{ \item{x, object}{a \code{dissimilarity} object or a \code{summary.dissimilarity} one for \code{print.summary.dissimilarity()}.} \item{digits}{the number of digits to use, see \code{\link{print.default}}.} \item{diag, upper, justify, right}{optional arguments specifying how the triangular dissimilarity matrix is printed; see \code{\link[stats]{print.dist}}.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{daisy}}, \code{\link{dissimilarity.object}}, \code{\link{print}}, \code{\link{print.default}}, \code{\link{print.dist}}. } \examples{ ## See example(daisy) sd <- summary(daisy(matrix(rnorm(100), 20,5))) sd # -> print.summary.dissimilarity(.) str(sd) } \keyword{cluster} \keyword{print} cluster/man/print.diana.Rd0000644000176000001440000000116507351402102015231 0ustar ripleyusers\name{print.diana} \alias{print.diana} \title{Print Method for DIANA Objects} \description{ Prints the ordering of objects, diameters of splitted clusters, and divisive coefficient of a \code{diana} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{diana}}. } \usage{ \method{print}{diana}(x, \dots) } \arguments{ \item{x}{a diana object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/print.clara.Rd0000644000176000001440000000127407520565344015260 0ustar ripleyusers\name{print.clara} \alias{print.clara} \title{Print Method for CLARA Objects} \description{ Prints the best sample, medoids, clustering vector and objective function of \code{clara} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{clara}}. } \usage{ \method{print}{clara}(x, \dots) } \arguments{ \item{x}{a clara object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{summary.clara}} producing more output; \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} % Converted by Sd2Rd version 0.3-2. cluster/man/print.agnes.Rd0000644000176000001440000000134607520566217015273 0ustar ripleyusers\name{print.agnes} \alias{print.agnes} \title{Print Method for AGNES Objects} \description{ Prints the call, agglomerative coefficient, ordering of objects and distances between merging clusters (`Height') of an \code{agnes} object. This is a method for the generic \code{\link{print}()} function for objects inheriting from class \code{agnes}, see \code{\link{agnes.object}}. } \usage{ \method{print}{agnes}(x, \dots) } \arguments{ \item{x}{an agnes object.} \item{\dots}{potential further arguments (required by generic).} } \seealso{ \code{\link{summary.agnes}} producing more output; \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/predict.ellipsoid.Rd0000644000176000001440000000416411626750550016456 0ustar ripleyusers\name{predict.ellipsoid} \alias{predict.ellipsoid} \alias{ellipsoidPoints} \title{Predict Method for Ellipsoid Objects} \description{ Compute points on the ellipsoid boundary, mostly for drawing. } \usage{ % method *and* stand alone on purpose : predict.ellipsoid(object, n.out=201, \dots) \method{predict}{ellipsoid}(object, n.out=201, \dots) ellipsoidPoints(A, d2, loc, n.half = 201) } \arguments{ \item{object}{an object of class \code{ellipsoid}, typically from \code{\link{ellipsoidhull}()}; alternatively any list-like object with proper components, see details below.} \item{n.out, n.half}{half the number of points to create.} \item{A, d2, loc}{arguments of the auxilary \code{ellipsoidPoints}, see below.} \item{\dots}{passed to and from methods.} } \details{ Note \code{ellipsoidPoints} is the workhorse function of \code{predict.ellipsoid} a standalone function and method for \code{ellipsoid} objects, see \code{\link{ellipsoidhull}}. The class of \code{object} is not checked; it must solely have valid components \code{loc} (length \eqn{p}), the \eqn{p \times p}{p x p} matrix \code{cov} (corresponding to \code{A}) and \code{d2} for the center, the shape (``covariance'') matrix and the squared average radius (or distance) or \code{\link{qchisq}(*, p)} quantile. Unfortunately, this is only implemented for \eqn{p = 2}, currently; contributions for \eqn{p \ge 3}{p >= 3} are \emph{very welcome}. } \value{ a numeric matrix of dimension \code{2*n.out} times \eqn{p}. } \seealso{\code{\link{ellipsoidhull}}, \code{\link{volume.ellipsoid}}. } \examples{ ## see also example(ellipsoidhull) ## Robust vs. L.S. covariance matrix set.seed(143) x <- rt(200, df=3) y <- 3*x + rt(200, df=2) plot(x,y, main="non-normal data (N=200)") mtext("with classical and robust cov.matrix ellipsoids") X <- cbind(x,y) C.ls <- cov(X) ; m.ls <- colMeans(X) d2.99 <- qchisq(0.99, df = 2) lines(ellipsoidPoints(C.ls, d2.99, loc=m.ls), col="green") if(require(MASS)) { Cxy <- cov.rob(cbind(x,y)) lines(ellipsoidPoints(Cxy$cov, d2 = d2.99, loc=Cxy$center), col="red") }# MASS } \keyword{dplot} \keyword{utilities} cluster/man/pluton.Rd0000644000176000001440000000311310307303715014343 0ustar ripleyusers\name{pluton} \alias{pluton} \title{Isotopic Composition Plutonium Batches} \usage{data(pluton)} \description{ The \code{pluton} data frame has 45 rows and 4 columns, containing percentages of isotopic composition of 45 Plutonium batches. } \format{ This data frame contains the following columns: \describe{ \item{Pu238}{the percentages of \eqn{\ ^{238}Pu}{(238)Pu}, always less than 2 percent.} \item{Pu239}{the percentages of \eqn{\ ^{239}Pu}{(239)Pu}, typically between 60 and 80 percent (from neutron capture of Uranium, \eqn{\ ^{238}U}{(238)U}).} \item{Pu240}{percentage of the plutonium 240 isotope.} \item{Pu241}{percentage of the plutonium 241 isotope.} } } \details{ Note that the percentage of plutonium~242 can be computed from the other four percentages, see the examples. In the reference below it is explained why it is very desirable to combine these plutonium patches in three groups of similar size. } \source{ Available as \file{pluton.dat} from the archive %% originally at %% \url{http://win-www.uia.ac.be/u/statis/datasets/clusplot-examples.tar.gz}, currently \url{http://www.agoras.ua.ac.be/datasets/clusplot-examples.tar.gz}. } \references{ Rousseeuw, P.J. and Kaufman, L and Trauwaert, E. (1996) Fuzzy clustering using scatter matrices, \emph{Computational Statistics and Data Analysis} \bold{23}(1), 135--151. } \examples{ data(pluton) hist(apply(pluton,1,sum), col = "gray") # between 94\% and 100\% pu5 <- pluton pu5$Pu242 <- 100 - apply(pluton,1,sum) # the remaining isotope. pairs(pu5) } \keyword{datasets} cluster/man/pltree.twins.Rd0000644000176000001440000000423410631261142015462 0ustar ripleyusers\name{pltree.twins} \alias{pltree.twins} \title{Clustering Tree of a Hierarchical Clustering} \description{ Draws a clustering tree (dendrogram) of a \code{twins} object, i.e., hierarchical clustering, typically resulting from \code{\link{agnes}()} or \code{\link{diana}()}. } \usage{ \method{pltree}{twins}(x, main = paste("Dendrogram of ", deparse(x$call)), labels = NULL, ylab = "Height", \dots) } \arguments{ \item{x}{an object of class \code{"twins"}, typically created by either \code{\link{agnes}()} or \code{\link{diana}()}.} \item{main}{main title with a sensible default.} \item{labels}{labels to use; the default is constructed from \code{x}.} \item{ylab}{label for y-axis.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \value{ a NULL value is returned. } \details{ Creates a plot of a clustering tree given a \code{twins} object. The leaves of the tree are the original observations. In case of an agglomerative clustering, two branches come together at the distance between the two clusters being merged. For a divisive clustering, a branch splits up at the diameter of the cluster being splitted. Note that currently the method function simply calls \code{plot(\link[stats]{as.hclust}(x), ...)}, which dispatches to \code{\link{plot.hclust}(..)}. If more flexible plots are needed, consider \code{xx <- \link{as.dendrogram}(\link{as.hclust}(x))} and plotting \code{xx}, see \code{\link{plot.dendrogram}}. } \seealso{ \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{hclust}}, \code{\link{par}}, \code{\link{plot.agnes}}, \code{\link{plot.diana}}. } \examples{ data(votes.repub) agn <- agnes(votes.repub) pltree(agn) dagn <- as.dendrogram(as.hclust(agn)) dagn2 <- as.dendrogram(as.hclust(agn), hang = 0.2) op <- par(mar = par("mar") + c(0,0,0, 2)) # more space to the right plot(dagn2, horiz = TRUE) plot(dagn, horiz = TRUE, center = TRUE, nodePar = list(lab.cex = 0.6, lab.col = "forest green", pch = NA), main = deparse(agn$call)) par(op) } \keyword{cluster} \keyword{hplot} cluster/man/pltree.Rd0000644000176000001440000000121110366455265014327 0ustar ripleyusers\name{pltree} \alias{pltree} \title{Clustering Trees - Generic Function} \description{ Generic function drawing a clustering tree (``dendrogram'') on the current graphics device. There is a \code{twins} method, see \code{\link{pltree.twins}} for usage and examples. } \usage{ pltree(x, \dots) } \arguments{ \item{x}{an \R object (for which a \code{pltree} method is defined).} \item{...}{methods may have additional arguments. Graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \seealso{\code{\link{pltree.twins}}, \code{\link{twins.object}}. } \keyword{cluster} \keyword{hplot} cluster/man/plot.partition.Rd0000644000176000001440000001110511674154360016020 0ustar ripleyusers\name{plot.partition} \alias{plot.partition} \title{Plot of a Partition of the Data Set} \description{Creates plots for visualizing a \code{partition} object.} \usage{ \method{plot}{partition}(x, ask = FALSE, which.plots = NULL, nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL, stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE, span = TRUE, xlim = NULL, ylim = NULL, main = NULL, \dots) } \arguments{ \item{x}{an object of class \code{"partition"}, typically created by the functions \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.} \item{ask}{logical; if true and \code{which.plots} is \code{NULL}, \code{plot.partition} operates in interactive mode, via \code{\link{menu}}.} \item{which.plots}{integer vector or NULL (default), the latter producing both plots. Otherwise, \code{which.plots} must contain integers of \code{1} for a \emph{clusplot} or \code{2} for \emph{silhouette}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labeling the silhouette plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in silhouette plot labeling.} \item{data}{numeric matrix with the scaled data; per default taken from the partition object \code{x}, but can be specified explicitly.} \item{dist}{when \code{x} does not have a \code{diss} component as for \code{\link{pam}(*, keep.diss=FALSE)}, \code{dist} must be the dissimilarity if a clusplot is desired.} \item{stand,lines,shade,color,labels,plotchar,span,xlim,ylim,main, \dots}{ All optional arguments available for the \code{\link{clusplot.default}} function (except for the \code{diss} one) and graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \section{Side Effects}{ An appropriate plot is produced on the current graphics device. This can be one or both of the following choices: \cr Clusplot \cr Silhouette plot } \details{ When \code{ask= TRUE}, rather than producing each plot sequentially, \code{plot.partition} displays a menu listing all the plots that can be produced. If the menu is not desired but a pause between plots is still wanted, call \code{par(ask= TRUE)} before invoking the plot command. The \emph{clusplot} of a cluster partition consists of a two-dimensional representation of the observations, in which the clusters are indicated by ellipses (see \code{\link{clusplot.partition}} for more details). The \emph{silhouette plot} of a nonhierarchical clustering is fully described in Rousseeuw (1987) and in chapter 2 of Kaufman and Rousseeuw (1990). For each observation i, a bar is drawn, representing its silhouette width s(i), see \code{\link{silhouette}} for details. Observations are grouped per cluster, starting with cluster 1 at the top. Observations with a large s(i) (almost 1) are very well clustered, a small s(i) (around 0) means that the observation lies between two clusters, and observations with a negative s(i) are probably placed in the wrong cluster. A clustering can be performed for several values of \code{k} (the number of clusters). Finally, choose the value of \code{k} with the largest overall average silhouette width. } \note{ In the silhouette plot, observation labels are only printed when the number of observations is less than \code{nmax.lab} (40, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. \cr For more flexibility, use \code{plot(silhouette(x), ...)}, see \code{\link{plot.silhouette}}. } \references{ Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65. Further, the references in \code{\link{plot.agnes}}. } \seealso{ \code{\link{partition.object}}, \code{\link{clusplot.partition}}, \code{\link{clusplot.default}}, \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{par}}. } \examples{ ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) plot(pam(x, 2)) ## Save space not keeping data in clus.object, and still clusplot() it: data(xclara) cx <- clara(xclara, 3, keep.data = FALSE) cx$data # is NULL plot(cx, data = xclara) } \keyword{cluster} \keyword{hplot} % Converted by Sd2Rd version 0.3-2. cluster/man/plot.mona.Rd0000644000176000001440000000406010370161217014732 0ustar ripleyusers\name{plot.mona} \alias{plot.mona} \title{Banner of Monothetic Divisive Hierarchical Clusterings} \description{ Creates the banner of a \code{mona} object. } \usage{ \method{plot}{mona}(x, main = paste("Banner of ", deparse(x$call)), sub = NULL, xlab = "Separation step", col = c(2,0), axes = TRUE, adj = 0, nmax.lab = 35, max.strlen = 5, \dots) } \arguments{ \item{x}{an object of class \code{"mona"}, typically created by \code{\link{mona}(.)}.} \item{main,sub}{main and sub titles for the plot, with convenient defaults. See documentation in \code{\link{plot.default}}.} \item{xlab}{x axis label, see \code{\link{title}}.} \item{col,adj}{graphical parameters passed to \code{\link{bannerplot}()}.} \item{axes}{logical, indicating if (labeled) axes should be drawn.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for labeling.} \item{max.strlen}{positive integer giving the length to which strings are truncated in labeling.} \item{\dots}{further graphical arguments are passed to \code{\link{bannerplot}()} and \code{\link{text}}.} } \section{Side Effects}{ A banner is plotted on the current graphics device. } \details{ Plots the separation step at which clusters are splitted. The observations are given in the order found by the \code{mona} algorithm, the numbers in the \code{step} vector are represented as bars between the observations. When a long bar is drawn between two observations, those observations have the same value for each variable. See chapter 7 of Kaufman and Rousseeuw (1990). } \note{ In the banner plot, observation labels are only printed when the number of observations is limited less than \code{nmax.lab} (35, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. } \references{see those in \code{\link{plot.agnes}}.} \seealso{ \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{par}}. } \keyword{cluster} \keyword{hplot} % Converted by Sd2Rd version 0.3-2. cluster/man/plot.diana.Rd0000644000176000001440000000662410631261142015062 0ustar ripleyusers\name{plot.diana} %% almost identical to ./plot.agnes.Rd and quite similar to ./plot.mona.Rd \alias{plot.diana} \title{Plots of a Divisive Hierarchical Clustering} \description{ Creates plots for visualizing a \code{diana} object. } \usage{ \method{plot}{diana}(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots) } \arguments{ \item{x}{an object of class \code{"diana"}, typically created by \code{\link{diana}(.)}.} \item{ask}{logical; if true and \code{which.plots} is \code{NULL}, \code{plot.diana} operates in interactive mode, via \code{\link{menu}}.} \item{which.plots}{integer vector or NULL (default), the latter producing both plots. Otherwise, \code{which.plots} must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a dendrogram or ``clustering tree''.} \item{main, sub}{main and sub title for the plot, each with a convenient default. See documentation for these arguments in \code{\link{plot.default}}.} \item{adj}{for label adjustment in \code{\link{bannerplot}()}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labelling the banner plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in banner plot labeling.} \item{xax.pretty}{logical or integer indicating if \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis. \code{xax.pretty = FALSE} is for back compatibility.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied and are passed to \code{\link{bannerplot}()} or \code{\link{pltree}()}, respectively.} } \section{Side Effects}{ An appropriate plot is produced on the current graphics device. This can be one or both of the following choices: \cr Banner \cr Clustering tree } \details{ When \code{ask = TRUE}, rather than producing each plot sequentially, \code{plot.diana} displays a menu listing all the plots that can be produced. If the menu is not desired but a pause between plots is still wanted one must set \code{par(ask= TRUE)} before invoking the plot command. The banner displays the hierarchy of clusters, and is equivalent to a tree. See Rousseeuw (1986) or chapter 6 of Kaufman and Rousseeuw (1990). The banner plots the diameter of each cluster being splitted. The observations are listed in the order found by the \code{diana} algorithm, and the numbers in the \code{height} vector are represented as bars between the observations. The leaves of the clustering tree are the original observations. A branch splits up at the diameter of the cluster being splitted. } \note{ In the banner plot, observation labels are only printed when the number of observations is limited less than \code{nmax.lab} (35, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. } \references{see those in \code{\link{plot.agnes}}.} \seealso{ \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{twins.object}}, \code{\link{par}}. } \examples{ example(diana)# -> dv <- diana(....) plot(dv, which = 1, nmax.lab = 100) ## wider labels : op <- par(mar = par("mar") + c(0, 2, 0,0)) plot(dv, which = 1, nmax.lab = 100, max.strlen = 12) par(op) } \keyword{cluster} \keyword{hplot} cluster/man/plot.agnes.Rd0000644000176000001440000001100711705523412015076 0ustar ripleyusers\name{plot.agnes} %% almost identical to ./plot.diana.Rd and quite similar to ./plot.mona.Rd \alias{plot.agnes} \title{Plots of an Agglomerative Hierarchical Clustering} \description{ Creates plots for visualizing an \code{agnes} object. } \usage{ \method{plot}{agnes}(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Agglomerative Coefficient = ",round(x$ac, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots) } \arguments{ \item{x}{an object of class \code{"agnes"}, typically created by \code{\link{agnes}(.)}.} \item{ask}{logical; if true and \code{which.plots} is \code{NULL}, \code{plot.agnes} operates in interactive mode, via \code{\link{menu}}.} \item{which.plots}{integer vector or NULL (default), the latter producing both plots. Otherwise, \code{which.plots} must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a dendrogram or ``clustering tree''.} \item{main, sub}{main and sub title for the plot, with convenient defaults. See documentation for these arguments in \code{\link{plot.default}}.} \item{adj}{for label adjustment in \code{\link{bannerplot}()}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labelling the banner plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in banner plot labeling.} \item{xax.pretty}{logical or integer indicating if \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis. \code{xax.pretty = FALSE} is for back compatibility.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied and are passed to \code{\link{bannerplot}()} or \code{pltree()} (see \code{\link{pltree.twins}}), respectively.} } \section{Side Effects}{ Appropriate plots are produced on the current graphics device. This can be one or both of the following choices: \cr Banner \cr Clustering tree } \details{ When \code{ask = TRUE}, rather than producing each plot sequentially, \code{plot.agnes} displays a menu listing all the plots that can be produced. If the menu is not desired but a pause between plots is still wanted one must set \code{par(ask= TRUE)} before invoking the plot command. The banner displays the hierarchy of clusters, and is equivalent to a tree. See Rousseeuw (1986) or chapter 5 of Kaufman and Rousseeuw (1990). The banner plots distances at which observations and clusters are merged. The observations are listed in the order found by the \code{agnes} algorithm, and the numbers in the \code{height} vector are represented as bars between the observations. The leaves of the clustering tree are the original observations. Two branches come together at the distance between the two clusters being merged. For more customization of the plots, rather call \code{\link{bannerplot}} and \code{pltree()}, i.e., its method \code{\link{pltree.twins}}, respectively. directly with corresponding arguments, e.g., \code{xlab} or \code{ylab}. } \note{ In the banner plot, observation labels are only printed when the number of observations is limited less than \code{nmax.lab} (35, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. For the dendrogram, more flexibility than via \code{pltree()} is provided by \code{dg <- \link{as.dendrogram}(x)} and plotting \code{dg} via \code{\link[stats]{plot.dendrogram}}. } \references{ Kaufman, L. and Rousseeuw, P.J. (1990) \emph{Finding Groups in Data: An Introduction to Cluster Analysis}. Wiley, New York. Rousseeuw, P.J. (1986). A visual display for hierarchical classification, in \emph{Data Analysis and Informatics 4}; edited by E. Diday, Y. Escoufier, L. Lebart, J. Pages, Y. Schektman, and R. Tomassone. North-Holland, Amsterdam, 743--748. Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997) Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37. } \seealso{ \code{\link{agnes}} and \code{\link{agnes.object}}; \code{\link{bannerplot}}, \code{\link{pltree.twins}}, and \code{\link{par}}. } \examples{ ## Can also pass `labels' to pltree() and bannerplot(): data(iris) cS <- as.character(Sp <- iris$Species) cS[Sp == "setosa"] <- "S" cS[Sp == "versicolor"] <- "V" cS[Sp == "virginica"] <- "g" ai <- agnes(iris[, 1:4]) plot(ai, labels = cS, nmax = 150)# bannerplot labels are mess } \keyword{cluster} \keyword{hplot} cluster/man/plantTraits.Rd0000644000176000001440000001075610631261142015337 0ustar ripleyusers\name{plantTraits} \alias{plantTraits} \title{Plant Species Traits Data} \docType{data} \encoding{latin1} \description{ This dataset constitutes a description of 136 plant species according to biological attributes (morphological or reproductive) } \usage{data(plantTraits) } \format{ A data frame with 136 observations on the following 31 variables. \describe{ \item{\code{pdias}}{Diaspore mass (mg)} \item{\code{longindex}}{Seed bank longevity} \item{\code{durflow}}{Flowering duration} \item{\code{height}}{Plant height, an ordered factor with levels \code{1} < \code{2} < \dots < \code{8}.} % Plant height}{an ordered factor with levels \code{1} <10cm < \code{2} 10-30cm< \code{3} 30-60cm< \code{4}60-100cm < \code{5}1-3m < \code{6}3-6m < \code{7}:6-15m < \code{8}>15m} \item{\code{begflow}}{Time of first flowering, an ordered factor with levels \code{1} < \code{2} < \code{3} < \code{4} < \code{5} < \code{6} < \code{7} < \code{8} < \code{9}} % {\code{begflow}}{an ordered factor with levels \code{1} january< \code{2} february< \code{3} march< \code{4}april < \code{5} may< \code{6} june< \code{7} july< \code{8}august < \code{9}september} \item{\code{mycor}}{Mycorrhizas, an ordered factor with levels \code{0}never < \code{1} sometimes< \code{2}always} \item{\code{vegaer}}{aerial vegetative propagation, an ordered factor with levels \code{0}never < \code{1} present but limited< \code{2}important.} \item{\code{vegsout}}{underground vegetative propagation, an ordered factor with 3 levels identical to \code{vegaer} above.} \item{\code{autopoll}}{selfing pollination, an ordered factor with levels \code{0}never < \code{1}rare < \code{2} often< the rule\code{3}} \item{\code{insects}}{insect pollination, an ordered factor with 5 levels \code{0} < \dots < \code{4}.} \item{\code{wind}}{wind pollination, an ordered factor with 5 levels \code{0} < \dots < \code{4}.} \item{\code{lign}}{a binary factor with levels \code{0:1}, indicating if plant is woody.} \item{\code{piq}}{a binary factor indicating if plant is thorny.} \item{\code{ros}}{a binary factor indicating if plant is rosette.} \item{\code{semiros}}{semi-rosette plant, a binary factor (\code{0}: no; \code{1}: yes).} \item{\code{leafy}}{leafy plant, a binary factor.} \item{\code{suman}}{summer annual, a binary factor.} \item{\code{winan}}{winter annual, a binary factor.} \item{\code{monocarp}}{monocarpic perennial, a binary factor.} \item{\code{polycarp}}{polycarpic perennial, a binary factor.} \item{\code{seasaes}}{seasonal aestival leaves, a binary factor.} \item{\code{seashiv}}{seasonal hibernal leaves, a binary factor.} \item{\code{seasver}}{seasonal vernal leaves, a binary factor.} \item{\code{everalw}}{leaves always evergreen, a binary factor.} \item{\code{everparti}}{leaves partially evergreen, a binary factor.} \item{\code{elaio}}{fruits with an elaiosome (dispersed by ants), a binary factor.} \item{\code{endozoo}}{endozoochorous fruits, a binary factor.} \item{\code{epizoo}}{epizoochorous fruits, a binary factor.} \item{\code{aquat}}{aquatic dispersal fruits, a binary factor.} \item{\code{windgl}}{wind dispersed fruits, a binary factor.} \item{\code{unsp}}{unspecialized mechanism of seed dispersal, a binary factor.} } } \details{ Most of factor attributes are not disjunctive. For example, a plant can be usually pollinated by insects but sometimes self-pollination can occured. } \source{ Vallet, Jeanne (2005) \emph{Structuration de communautés végétales et analyse comparative de traits biologiques le long d'un gradient d'urbanisation}. Mémoire de Master 2 'Ecologie-Biodiversité-Evolution'; Université Paris Sud XI, 30p.+ annexes (in french) } % \references{ % ~~ possibly secondary sources and usages ~~ % } \examples{ data(plantTraits) ## Calculation of a dissimilarity matrix library(cluster) dai.b <- daisy(plantTraits, type = list(ordratio = 4:11, symm = 12:13, asymm = 14:31)) ## Hierarchical classification agn.trts <- agnes(dai.b, method="ward") plot(agn.trts, which.plots = 2, cex= 0.6) plot(agn.trts, which.plots = 1) cutree6 <- cutree(agn.trts, k=6) cutree6 ## Principal Coordinate Analysis cmdsdai.b <- cmdscale(dai.b, k=6) plot(cmdsdai.b[, 1:2], asp = 1, col = cutree6) } \keyword{datasets} % plant attribute database, mixed type variables, dissimilarity matrix (DAISY), Hierarchical Classification (AGNES) % Principal Coordinates Analysis (CMDSCALE) cluster/man/partition.object.Rd0000644000176000001440000000544410370161217016310 0ustar ripleyusers\name{partition.object} \alias{partition}% == class \alias{partition.object} \title{Partitioning Object} \description{ The objects of class \code{"partition"} represent a partitioning of a dataset into clusters. } \section{GENERATION}{ These objects are returned from \code{pam}, \code{clara} or \code{fanny}. } \section{METHODS}{ The \code{"partition"} class has a method for the following generic functions: \code{plot}, \code{clusplot}. } \section{INHERITANCE}{ The following classes inherit from class \code{"partition"} : \code{"pam"}, \code{"clara"} and \code{"fanny"}. See \code{\link{pam.object}}, \code{\link{clara.object}} and \code{\link{fanny.object}} for details. } \value{a \code{"partition"} object is a list with the following (and typically more) components: \item{clustering}{ the clustering vector. An integer vector of length \eqn{n}, the number of observations, giving for each observation the number (`id') of the cluster to which it belongs.} \item{call}{the matched \code{\link{call}} generating the object.} \item{silinfo}{ a list with all \emph{silhouette} information, only available when the number of clusters is non-trivial, i.e., \eqn{1 < k < n} and then has the following components, see \code{\link{silhouette}} \describe{ \item{widths}{an (n x 3) matrix, as returned by \code{\link{silhouette}()}, with for each observation i the cluster to which i belongs, as well as the neighbor cluster of i (the cluster, not containing i, for which the average dissimilarity between its observations and i is minimal), and the silhouette width \eqn{s(i)} of the observation. } \item{clus.avg.widths}{the average silhouette width per cluster.} \item{avg.width}{the average silhouette width for the dataset, i.e., simply the average of \eqn{s(i)} over all observations \eqn{i}.} }% describe This information is also needed to construct a \emph{silhouette plot} of the clustering, see \code{\link{plot.partition}}. Note that \code{avg.width} can be maximized over different clusterings (e.g. with varying number of clusters) to choose an \emph{optimal} clustering.%% see an example or a demo << FIXME >> } \item{objective}{value of criterion maximized during the partitioning algorithm, may more than one entry for different stages.} \item{diss}{ an object of class \code{"dissimilarity"}, representing the total dissimilarity matrix of the dataset (or relevant subset, e.g. for \code{clara}). } \item{data}{ a matrix containing the original or standardized data. This might be missing to save memory or when a dissimilarity matrix was given as input structure to the clustering method. } } \seealso{\code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}}. } \keyword{cluster} cluster/man/pam.object.Rd0000644000176000001440000000657710370161217015064 0ustar ripleyusers\name{pam.object} \alias{pam.object} \title{Partitioning Around Medoids (PAM) Object} \description{ The objects of class \code{"pam"} represent a partitioning of a dataset into clusters. } \section{GENERATION}{ These objects are returned from \code{\link{pam}}.} \section{METHODS}{ The \code{"pam"} class has methods for the following generic functions: \code{print}, \code{summary}. } \section{INHERITANCE}{ The class \code{"pam"} inherits from \code{"partition"}. Therefore, the generic functions \code{plot} and \code{clusplot} can be used on a \code{pam} object. } \value{ A legitimate \code{pam} object is a list with the following components: \item{medoids}{ the medoids or representative objects of the clusters. If a dissimilarity matrix was given as input to \code{pam}, then a vector of numbers or labels of observations is given, else \code{medoids} is a matrix with in each row the coordinates of one medoid.} \item{id.med}{integer vector of \emph{indices} giving the medoid observation numbers.} \item{clustering}{the clustering vector, see \code{\link{partition.object}}.} \item{objective}{the objective function after the first and second step of the \code{pam} algorithm.} \item{isolation}{ vector with length equal to the number of clusters, specifying which clusters are isolated clusters (L- or L*-clusters) and which clusters are not isolated.\cr A cluster is an L*-cluster iff its diameter is smaller than its separation. A cluster is an L-cluster iff for each observation i the maximal dissimilarity between i and any other observation of the cluster is smaller than the minimal dissimilarity between i and any observation of another cluster. Clearly each L*-cluster is also an L-cluster. } \item{clusinfo}{ matrix, each row gives numerical information for one cluster. These are the cardinality of the cluster (number of observations), the maximal and average dissimilarity between the observations in the cluster and the cluster's medoid, %% FIXME: Now differs from clara.object.Rd: the diameter of the cluster (maximal dissimilarity between two observations of the cluster), and the separation of the cluster (minimal dissimilarity between an observation of the cluster and an observation of another cluster). } \item{silinfo}{list with silhouette width information, see \code{\link{partition.object}}.} \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.} \item{call}{generating call, see \code{\link{partition.object}}.} \item{data}{(possibibly standardized) see \code{\link{partition.object}}.} } \seealso{ \code{\link{pam}}, \code{\link{dissimilarity.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \examples{ ## Use the silhouette widths for assessing the best number of clusters, ## following a one-dimensional example from Christian Hennig : ## x <- c(rnorm(50), rnorm(50,mean=5), rnorm(30,mean=15)) asw <- numeric(20) ## Note that "k=1" won't work! for (k in 2:20) asw[k] <- pam(x, k) $ silinfo $ avg.width k.best <- which.max(asw) cat("silhouette-optimal number of clusters:", k.best, "\n") plot(1:20, asw, type= "h", main = "pam() clustering assessment", xlab= "k (# clusters)", ylab = "average silhouette width") axis(1, k.best, paste("best",k.best,sep="\n"), col = "red", col.axis = "red") } \keyword{cluster} cluster/man/pam.Rd0000644000176000001440000001770711712175353013623 0ustar ripleyusers\name{pam} \alias{pam} \title{Partitioning Around Medoids} \description{ Partitioning (clustering) of the data into \code{k} clusters ``around medoids'', a more robust version of K-means. } \usage{ pam(x, k, diss = inherits(x, "dist"), metric = "euclidean", medoids = NULL, stand = FALSE, cluster.only = FALSE, do.swap = TRUE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, pamonce = FALSE, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix or object, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (\code{\link{NA}}s) \emph{are} allowed---as long as every pair of observations has at least one case not missing. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are \emph{not} allowed. } \item{k}{positive integer specifying the number of clusters, less than the number of observations.} \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} will be considered as a dissimilarity matrix. If FALSE, then \code{x} will be considered as a matrix of observations by variables. } \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations.\cr The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{medoids}{NULL (default) or length-\code{k} vector of integer indices (in \code{1:n}) specifying initial medoids instead of using the \sQuote{\emph{build}} algorithm.} \item{stand}{logical; if true, the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored.} \item{cluster.only}{logical; if true, only the clustering will be computed and returned, see details.} \item{do.swap}{logical indicating if the \bold{swap} phase should happen. The default, \code{TRUE}, correspond to the original algorithm. On the other hand, the \bold{swap} phase is much more computer intensive than the \bold{build} one for large \eqn{n}, so can be skipped by \code{do.swap = FALSE}.} \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give much smaller results and hence even save memory allocation \emph{time}.} \item{pamonce}{logical or integer in \code{0:2} specifying algorithmic short cuts as proposed by Reynolds et al. (2006), see below.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the build and swap phase of the algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"pam"} representing the clustering. See \code{?\link{pam.object}} for details. } \details{ \code{pam} is fully described in chapter 2 of Kaufman and Rousseeuw (1990). Compared to the k-means approach in \code{kmeans}, the function \code{pam} has the following features: (a) it also accepts a dissimilarity matrix; (b) it is more robust because it minimizes a sum of dissimilarities instead of a sum of squared euclidean distances; (c) it provides a novel graphical display, the silhouette plot (see \code{plot.partition}) (d) it allows to select the number of clusters using \code{mean(\link{silhouette}(pr))} on the result \code{pr <- pam(..)}, or directly its component \code{pr$silinfo$avg.width}, see also \code{\link{pam.object}}. When \code{cluster.only} is true, the result is simply a (possibly named) integer vector specifying the clustering, i.e.,\cr \code{pam(x,k, cluster.only=TRUE)} is the same as \cr \code{pam(x,k)$clustering} but computed more efficiently. The \code{pam}-algorithm is based on the search for \code{k} representative objects or medoids among the observations of the dataset. These observations should represent the structure of the data. After finding a set of \code{k} medoids, \code{k} clusters are constructed by assigning each observation to the nearest medoid. The goal is to find \code{k} representative objects which minimize the sum of the dissimilarities of the observations to their closest representative object. \cr By default, when \code{medoids} are not specified, the algorithm first looks for a good initial set of medoids (this is called the \bold{build} phase). Then it finds a local minimum for the objective function, that is, a solution such that there is no single switch of an observation with a medoid that will decrease the objective (this is called the \bold{swap} phase). When the \code{medoids} are specified, their order does \emph{not} matter; in general, the algorithms have been designed to not depend on the order of the observations. The \code{pamonce} option, new in cluster 1.14.2 (Jan. 2012), has been proposed by Matthias Studer, University of Geneva, based on the findings by Reynolds et al. (2006). The default \code{FALSE} (or integer \code{0}) corresponds to the original \dQuote{swap} algorithm, whereas \code{pamonce = 1} (or \code{TRUE}), corresponds to the first proposal .... %% FIXME and \code{pamonce = 2} additionally implements the second proposal as well. % FIXME more details } \note{ For large datasets, \code{pam} may need too much memory or too much computation time since both are \eqn{O(n^2)}. Then, \code{\link{clara}()} is preferable, see its documentation. } \author{Kaufman and Rousseeuw's orginal Fortran code was translated to C and augmented in several ways, e.g. to allow \code{cluster.only=TRUE} or \code{do.swap=FALSE}, by Martin Maechler. \cr Matthias Studer, Univ.Geneva provided the \code{pamonce} implementation. } \references{ %% the pamonce options : Reynolds, A., Richards, G., de la Iglesia, B. and Rayward-Smith, V. (1992) Clustering rules: A comparison of partitioning and hierarchical clustering algorithms; \emph{Journal of Mathematical Modelling and Algorithms} \bold{5}, 475--504 (\url{http://dx.doi.org/10.1007/s10852-005-9022-1}). } \seealso{ \code{\link{agnes}} for background and references; \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{daisy}}, \code{\link{partition.object}}, \code{\link{plot.partition}}, \code{\link{dist}}. } \examples{ ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) pamx <- pam(x, 2) pamx summary(pamx) plot(pamx) ## use obs. 1 & 16 as starting medoids -- same result (typically) (p2m <- pam(x, 2, medoids = c(1,16))) p3m <- pam(x, 3, trace = 2) ## rather stupid initial medoids: (p3m. <- pam(x, 3, medoids = 3:1, trace = 1)) \dontshow{ ii <- pmatch(c("obj","call"), names(pamx)) stopifnot(all.equal(pamx [-ii], p2m [-ii], tol=1e-14), all.equal(pamx$objective[2], p2m$objective[2], tol=1e-14)) } pam(daisy(x, metric = "manhattan"), 2, diss = TRUE) data(ruspini) ## Plot similar to Figure 4 in Stryuf et al (1996) \dontrun{plot(pam(ruspini, 4), ask = TRUE)} \dontshow{plot(pam(ruspini, 4))} } \keyword{cluster} cluster/man/mona.object.Rd0000644000176000001440000000276107554320503015235 0ustar ripleyusers\name{mona.object} \alias{mona.object} \title{Monothetic Analysis (MONA) Object} \description{ The objects of class \code{"mona"} represent the divisive hierarchical clustering of a dataset with only binary variables (measurements). This class of objects is returned from \code{\link{mona}}. } \section{METHODS}{ The \code{"mona"} class has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}. } \value{ A legitimate \code{mona} object is a list with the following components: \item{data}{ matrix with the same dimensions as the original data matrix, but with factors coded as 0 and 1, and all missing values replaced. } \item{order}{ a vector giving a permutation of the original observations to allow for plotting, in the sense that the branches of a clustering tree will not cross. } \item{order.lab}{ a vector similar to \code{order}, but containing observation labels instead of observation numbers. This component is only available if the original observations were labelled. } \item{variable}{ vector of length n-1 where n is the number of observations, specifying the variables used to separate the observations of \code{order}. } \item{step}{ vector of length n-1 where n is the number of observations, specifying the separation steps at which the observations of \code{order} are separated. } } \seealso{\code{\link{mona}} for examples etc, \code{\link{plot.mona}}. } \keyword{cluster} cluster/man/mona.Rd0000644000176000001440000000611110370161217013754 0ustar ripleyusers\name{mona} \alias{mona} \title{MONothetic Analysis Clustering of Binary Variables} \description{ Returns a list representing a divisive hierarchical clustering of a dataset with binary variables only. } \usage{ mona(x) } \arguments{ \item{x}{ data matrix or data frame in which each row corresponds to an observation, and each column corresponds to a variable. All variables must be binary. A limited number of missing values (NAs) is allowed. Every observation must have at least one value different from NA. No variable should have half of its values missing. There must be at least one variable which has no missing values. A variable with all its non-missing values identical, is not allowed. } } \value{ an object of class \code{"mona"} representing the clustering. See \code{mona.object} for details. } \details{ \code{mona} is fully described in chapter 7 of Kaufman and Rousseeuw (1990). It is "monothetic" in the sense that each division is based on a single (well-chosen) variable, whereas most other hierarchical methods (including \code{agnes} and \code{diana}) are "polythetic", i.e. they use all variables together. The \code{mona}-algorithm constructs a hierarchy of clusterings, starting with one large cluster. Clusters are divided until all observations in the same cluster have identical values for all variables.\cr At each stage, all clusters are divided according to the values of one variable. A cluster is divided into one cluster with all observations having value 1 for that variable, and another cluster with all observations having value 0 for that variable. The variable used for splitting a cluster is the variable with the maximal total association to the other variables, according to the observations in the cluster to be splitted. The association between variables f and g is given by a(f,g)*d(f,g) - b(f,g)*c(f,g), where a(f,g), b(f,g), c(f,g), and d(f,g) are the numbers in the contingency table of f and g. [That is, a(f,g) (resp. d(f,g)) is the number of observations for which f and g both have value 0 (resp. value 1); b(f,g) (resp. c(f,g)) is the number of observations for which f has value 0 (resp. 1) and g has value 1 (resp. 0).] The total association of a variable f is the sum of its associations to all variables. This algorithm does not work with missing values, therefore the data are revised, e.g. all missing values are filled in. To do this, the same measure of association between variables is used as in the algorithm. When variable f has missing values, the variable g with the largest absolute association to f is looked up. When the association between f and g is positive, any missing value of f is replaced by the value of g for the same observation. If the association between f and g is negative, then any missing value of f is replaced by the value of 1-g for the same observation. } \seealso{ \code{\link{agnes}} for background and references; \code{\link{mona.object}}, \code{\link{plot.mona}}. } \examples{ data(animals) ma <- mona(animals) ma ## Plot similar to Figure 10 in Struyf et al (1996) plot(ma) } \keyword{cluster} cluster/man/lower.to.upper.tri.inds.Rd0000644000176000001440000000216207420053713017463 0ustar ripleyusers\name{lower.to.upper.tri.inds} \alias{lower.to.upper.tri.inds} \alias{upper.to.lower.tri.inds} \title{Permute Indices for Triangular Matrices} \description{ Compute index vectors for extracting or reordering of lower or upper triangular matrices that are stored as contiguous vectors. } \usage{ lower.to.upper.tri.inds(n) upper.to.lower.tri.inds(n) } \arguments{ \item{n}{integer larger than 1.} } \value{ integer vector containing a permutation of \code{1:N} where \eqn{N = n(n-1)/2}. } \note{ these functions are mainly for internal use in the cluster package, and may not remain available (unless we see a good reason). } \seealso{\code{\link{upper.tri}}, \code{\link{lower.tri}} with a related purpose.} \examples{ m5 <- matrix(NA,5,5) m <- m5; m[lower.tri(m)] <- upper.to.lower.tri.inds(5); m m <- m5; m[upper.tri(m)] <- lower.to.upper.tri.inds(5); m stopifnot(lower.to.upper.tri.inds(2) == 1, lower.to.upper.tri.inds(3) == 1:3, upper.to.lower.tri.inds(3) == 1:3, sort(upper.to.lower.tri.inds(5)) == 1:10, sort(lower.to.upper.tri.inds(6)) == 1:15) } \keyword{array} \keyword{utilities} cluster/man/flower.Rd0000644000176000001440000000326711711740461014335 0ustar ripleyusers\name{flower} \alias{flower} \title{Flower Characteristics} \usage{data(flower)} \description{8 characteristics for 18 popular flowers.} \format{ A data frame with 18 observations on 8 variables: \tabular{rll}{ [ , "V1"] \tab factor \tab winters \cr [ , "V2"] \tab factor \tab shadow \cr [ , "V3"] \tab factor \tab tubers \cr [ , "V4"] \tab factor \tab color \cr [ , "V5"] \tab ordered \tab soil \cr [ , "V6"] \tab ordered \tab preference \cr [ , "V7"] \tab numeric \tab height \cr [ , "V8"] \tab numeric \tab distance } \describe{ \item{V1}{winters, is binary and indicates whether the plant may be left in the garden when it freezes.} \item{V2}{shadow, is binary and shows whether the plant needs to stand in the shadow.} \item{V3}{tubers, is asymmetric binary and distinguishes between plants with tubers and plants that grow in any other way.} \item{V4}{color, is nominal and specifies the flower's color (1 = white, 2 = yellow, 3 = pink, 4 = red, 5 = blue).} \item{V5}{soil, is ordinal and indicates whether the plant grows in dry (1), normal (2), or wet (3) soil.} \item{V6}{preference, is ordinal and gives someone's preference ranking going from 1 to 18.} \item{V7}{height, is interval scaled, the plant's height in centimeters.} \item{V8}{distance, is interval scaled, the distance in centimeters that should be left between the plants.} } } \references{ Struyf, Hubert and Rousseeuw (1996), see \code{\link{agnes}}. } \examples{ data(flower) ## Example 2 in ref daisy(flower, type = list(asymm = 3)) daisy(flower, type = list(asymm = c(1, 3), ordratio = 7)) } \keyword{datasets} cluster/man/fanny.object.Rd0000644000176000001440000000533310500041124015374 0ustar ripleyusers\name{fanny.object} \alias{fanny.object} \title{Fuzzy Analysis (FANNY) Object} \description{ The objects of class \code{"fanny"} represent a fuzzy clustering of a dataset. } \section{GENERATION}{ These objects are returned from \code{\link{fanny}}. } \section{METHODS}{ The \code{"fanny"} class has methods for the following generic functions: \code{print}, \code{summary}. } \section{INHERITANCE}{ The class \code{"fanny"} inherits from \code{"partition"}. Therefore, the generic functions \code{plot} and \code{clusplot} can be used on a \code{fanny} object. } \value{ A legitimate \code{fanny} object is a list with the following components: \item{membership}{ matrix containing the memberships for each pair consisting of an observation and a cluster. } \item{memb.exp}{the membership exponent used in the fitting criterion.} \item{coeff}{ Dunn's partition coefficient \eqn{F(k)} of the clustering, where \eqn{k} is the number of clusters. \eqn{F(k)} is the sum of all \emph{squared} membership coefficients, divided by the number of observations. Its value is between \eqn{1/k} and 1. The normalized form of the coefficient is also given. It is defined as \eqn{(F(k) - 1/k) / (1 - 1/k)}, and ranges between 0 and 1. A low value of Dunn's coefficient indicates a very fuzzy clustering, whereas a value close to 1 indicates a near-crisp clustering. } \item{clustering}{ the clustering vector of the nearest crisp clustering, see \code{\link{partition.object}}.} \item{k.crisp}{integer (\eqn{\le k}{<= k}) giving the number of \emph{crisp} clusters; can be less than \eqn{k}, where it's recommended to decrease \code{memb.exp}.} \item{objective}{ named vector containing the minimal value of the objective function reached by the FANNY algorithm and the relative convergence tolerance \code{tol} used.% + still has 'iterations' for back-compatibility } \item{convergence}{ named vector with \code{iterations}, the number of iterations needed and \code{converged} indicating if the algorithm converged (in \code{maxit} iterations within convergence tolerance \code{tol}). } \item{diss}{ an object of class \code{"dissimilarity"}, see \code{\link{partition.object}}.} \item{call}{generating call, see \code{\link{partition.object}}.} \item{silinfo}{ list with silhouette information of the nearest crisp clustering, see \code{\link{partition.object}}.} \item{data}{matrix, possibibly standardized, or NULL, see \code{\link{partition.object}}.} } \seealso{ \code{\link{fanny}}, \code{\link{print.fanny}}, \code{\link{dissimilarity.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \keyword{cluster} cluster/man/fanny.Rd0000644000176000001440000001521710421445365014152 0ustar ripleyusers\name{fanny} \alias{fanny} \title{Fuzzy Analysis Clustering} \description{ Computes a fuzzy clustering of the data into \code{k} clusters. } \usage{ fanny(x, k, diss = inherits(x, "dist"), memb.exp = 2, metric = c("euclidean", "manhattan", "SqEuclidean"), stand = FALSE, iniMem.p = NULL, cluster.only = FALSE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, maxit = 500, tol = 1e-15, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (NAs) are allowed. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are not allowed. } \item{k}{integer giving the desired number of clusters. It is required that \eqn{0 < k < n/2} where \eqn{n} is the number of observations.} \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} is assumed to be a dissimilarity matrix. If FALSE, then \code{x} is treated as a matrix of observations by variables. } \item{memb.exp}{number \eqn{r} strictly larger than 1 specifying the \emph{membership exponent} used in the fit criterion; see the \sQuote{Details} below. Default: \code{2} which used to be hardwired inside FANNY.} \item{metric}{character string specifying the metric to be used for calculating dissimilarities between observations. Options are \code{"euclidean"} (default), \code{"manhattan"}, and \code{"SqEuclidean"}. Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences, and \code{"SqEuclidean"}, the \emph{squared} euclidean distances are sum-of-squares of differences. Using this last option is equivalent (but somewhat slower) to computing so called \dQuote{fuzzy C-means}. \cr If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{stand}{logical; if true, the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored.} \item{iniMem.p}{numeric \eqn{n \times k}{n * k} matrix or \code{NULL} (by default); can be used to specify a starting \code{membership} matrix, i.e., a matrix of non-negative numbers, each row summing to one. } %% FIXME: add example \item{cluster.only}{logical; if true, no silhouette information will be computed and returned, see details.}%% FIXME: add example \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give smaller results and hence also save memory allocation \emph{time}.} \item{maxit, tol}{maximal number of iterations and default tolerance for convergence (relative convergence of the fit criterion) for the FANNY algorithm. The defaults \code{maxit = 500} and \code{tol = 1e-15} used to be hardwired inside the algorithm.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the C-internal algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"fanny"} representing the clustering. See \code{\link{fanny.object}} for details. } \details{ In a fuzzy clustering, each observation is \dQuote{spread out} over the various clusters. Denote by \eqn{u_{iv}}{u(i,v)} the membership of observation \eqn{i} to cluster \eqn{v}. The memberships are nonnegative, and for a fixed observation i they sum to 1. The particular method \code{fanny} stems from chapter 4 of Kaufman and Rousseeuw (1990) (see the references in \code{\link{daisy}}) and has been extended by Martin Maechler to allow user specified \code{memb.exp}, \code{iniMem.p}, \code{maxit}, \code{tol}, etc. Fanny aims to minimize the objective function \deqn{\sum_{v=1}^k \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^r u_{jv}^r d(i,j)}{ 2 \sum_{j=1}^n u_{jv}^r}}{% SUM_[v=1..k] (SUM_(i,j) u(i,v)^r u(j,v)^r d(i,j)) / (2 SUM_j u(j,v)^r)} where \eqn{n} is the number of observations, \eqn{k} is the number of clusters, \eqn{r} is the membership exponent \code{memb.exp} and \eqn{d(i,j)} is the dissimilarity between observations \eqn{i} and \eqn{j}. \cr Note that \eqn{r \to 1}{r -> 1} gives increasingly crisper clusterings whereas \eqn{r \to \infty}{r -> Inf} leads to complete fuzzyness. K\&R(1990), p.191 note that values too close to 1 can lead to slow convergence. Further note that even the default, \eqn{r = 2} can lead to complete fuzzyness, i.e., memberships \eqn{u_{iv} \equiv 1/k}{u(i,v) == 1/k}. In that case a warning is signalled and the user is advised to chose a smaller \code{memb.exp} (\eqn{=r}). Compared to other fuzzy clustering methods, \code{fanny} has the following features: (a) it also accepts a dissimilarity matrix; (b) it is more robust to the \code{spherical cluster} assumption; (c) it provides a novel graphical display, the silhouette plot (see \code{\link{plot.partition}}). } \seealso{ \code{\link{agnes}} for background and references; \code{\link{fanny.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}, \code{\link{daisy}}, \code{\link{dist}}. } \examples{ ## generate 10+15 objects in two clusters, plus 3 objects lying ## between those clusters. x <- rbind(cbind(rnorm(10, 0, 0.5), rnorm(10, 0, 0.5)), cbind(rnorm(15, 5, 0.5), rnorm(15, 5, 0.5)), cbind(rnorm( 3,3.2,0.5), rnorm( 3,3.2,0.5))) fannyx <- fanny(x, 2) ## Note that observations 26:28 are "fuzzy" (closer to # 2): fannyx summary(fannyx) plot(fannyx) (fan.x.15 <- fanny(x, 2, memb.exp = 1.5)) # 'crispier' for obs. 26:28 (fanny(x, 2, memb.exp = 3)) # more fuzzy in general data(ruspini) f4 <- fanny(ruspini, 4) stopifnot(rle(f4$clustering)$lengths == c(20,23,17,15)) plot(f4, which = 1) ## Plot similar to Figure 6 in Stryuf et al (1996) plot(fanny(ruspini, 5)) } \keyword{cluster} cluster/man/ellipsoidhull.Rd0000644000176000001440000001115311262365102015675 0ustar ripleyusers\name{ellipsoidhull} \alias{ellipsoidhull} \alias{print.ellipsoid} \title{Compute the Ellipsoid Hull or Spanning Ellipsoid of a Point Set} \description{ Compute the \dQuote{ellipsoid hull} or \dQuote{spanning ellipsoid}, i.e. the ellipsoid of minimal volume (\sQuote{area} in 2D) such that all given points lie just inside or on the boundary of the ellipsoid. } \usage{ ellipsoidhull(x, tol=0.01, maxit=5000, ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE) \method{print}{ellipsoid}(x, digits = max(1, getOption("digits") - 2), \dots) } \arguments{ \item{x}{the \eqn{n} \eqn{p}-dimensional points asnumeric \eqn{n\times p}{n x p} matrix.} \item{tol}{convergence tolerance for Titterington's algorithm. Setting this to much smaller values may drastically increase the number of iterations needed, and you may want to increas \code{maxit} as well.} \item{maxit}{integer giving the maximal number of iteration steps for the algorithm.} \item{ret.wt, ret.sqdist, ret.pr}{logicals indicating if additional information should be returned, \code{ret.wt} specifying the \emph{weights}, \code{ret.sqdist} the \emph{\bold{sq}uared \bold{dist}ances} and \code{ret.pr} the final \bold{pr}obabilities in the algorithms.} \item{digits,\dots}{the usual arguments to \code{\link{print}} methods.} } \details{ The \dQuote{spanning ellipsoid} algorithm is said to stem from Titterington(1976), in Pison et al(1999) who use it for \code{\link{clusplot.default}}.\cr The problem can be seen as a special case of the \dQuote{Min.Vol.} ellipsoid of which a more more flexible and general implementation is \code{\link[MASS]{cov.mve}} in the \code{MASS} package. } \value{ an object of class \code{"ellipsoid"}, basically a \code{\link{list}} with several components, comprising at least \item{cov}{\eqn{p\times p}{p x p} \emph{covariance} matrix description the ellipsoid.} \item{loc}{\eqn{p}-dimensional location of the ellipsoid center.} \item{d2}{average squared radius. Further, \eqn{d2 = t^2}, where \eqn{t} is \dQuote{the value of a t-statistic on the ellipse boundary} (from \code{\link[ellipse]{ellipse}} in the \pkg{ellipse} package), and hence, more usefully, \code{d2 = qchisq(alpha, df = p)}, where \code{alpha} is the confidence level for p-variate normally distributed data with location and covariance \code{loc} and \code{cov} to lie inside the ellipsoid.} \item{wt}{the vector of weights iff \code{ret.wt} was true.} \item{sqdist}{the vector of squared distances iff \code{ret.sqdist} was true.} \item{prob}{the vector of algorithm probabilities iff \code{ret.pr} was true.} \item{it}{number of iterations used.} \item{tol, maxit}{just the input argument, see above.} \item{eps}{the achieved tolerance which is the maximal squared radius minus \eqn{p}.} \item{ierr}{error code as from the algorithm; \code{0} means \emph{ok}.} \item{conv}{logical indicating if the converged. This is defined as \code{it < maxit && ierr == 0}.} } \references{ Pison, G., Struyf, A. and Rousseeuw, P.J. (1999) Displaying a Clustering with CLUSPLOT, \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr A version of this is available as technical report from \url{http://www.agoras.ua.ac.be/abstract/Disclu99.htm} D.M. Titterington (1976) Algorithms for computing D-optimal design on finite design spaces. In \emph{Proc.\ of the 1976 Conf.\ on Information Science and Systems}, 213--216; John Hopkins University. } \author{Martin Maechler did the present class implementation; Rousseeuw et al did the underlying code.} \seealso{\code{\link{predict.ellipsoid}} which is also the \code{\link{predict}} method for \code{ellipsoid} objects. \code{\link{volume.ellipsoid}} for an example of \sQuote{manual} \code{ellipsoid} object construction;\cr further \code{\link[ellipse]{ellipse}} from package \pkg{ellipse} and \code{\link[sfsmisc]{ellipsePoints}} from package \pkg{sfsmisc}. \code{\link[grDevices]{chull}} for the convex hull, \code{\link{clusplot}} which makes use of this; \code{\link[MASS]{cov.mve}}. } \examples{ x <- rnorm(100) xy <- unname(cbind(x, rnorm(100) + 2*x + 10)) exy <- ellipsoidhull(xy) exy # >> calling print.ellipsoid() plot(xy) lines(predict(exy)) points(rbind(exy$loc), col = "red", cex = 3, pch = 13) exy <- ellipsoidhull(xy, tol = 1e-7, ret.wt = TRUE, ret.sq = TRUE) str(exy) # had small `tol', hence many iterations (ii <- which(zapsmall(exy $ wt) > 1e-6)) # only about 4 to 6 points round(exy$wt[ii],3); sum(exy$wt[ii]) # sum to 1 } \keyword{dplot} \keyword{hplot}% << ? chull has "hplot" as well. cluster/man/dissimilarity.object.Rd0000644000176000001440000000463210370161217017163 0ustar ripleyusers\name{dissimilarity.object} \alias{dissimilarity.object} \title{Dissimilarity Matrix Object} \description{ Objects of class \code{"dissimilarity"} representing the dissimilarity matrix of a dataset. } \section{GENERATION}{ \code{\link{daisy}} returns this class of objects. Also the functions \code{pam}, \code{clara}, \code{fanny}, \code{agnes}, and \code{diana} return a \code{dissimilarity} object, as one component of their return objects. } \section{METHODS}{ The \code{"dissimilarity"} class has methods for the following generic functions: \code{print}, \code{summary}. } \value{ The dissimilarity matrix is symmetric, and hence its lower triangle (column wise) is represented as a vector to save storage space. If the object, is called \code{do}, and \code{n} the number of observations, i.e., \code{n <- attr(do, "Size")}, then for \eqn{i < j <= n}, the dissimilarity between (row) i and j is \code{do[n*(i-1) - i*(i-1)/2 + j-i]}. The length of the vector is \eqn{n*(n-1)/2}, i.e., of order \eqn{n^2}. \code{"dissimilarity"} objects also inherit from class \code{\link{dist}} and can use \code{dist} methods, in particular, \code{\link{as.matrix}}, such that \eqn{d_{ij}}{d(i,j)} from above is just \code{as.matrix(do)[i,j]}. The object has the following attributes: \item{Size}{the number of observations in the dataset.} \item{Metric}{the metric used for calculating the dissimilarities. Possible values are "euclidean", "manhattan", "mixed" (if variables of different types were present in the dataset), and "unspecified".} \item{Labels}{optionally, contains the labels, if any, of the observations of the dataset.} \item{NA.message}{optionally, if a dissimilarity could not be computed, because of too many missing values for some observations of the dataset.} \item{Types}{when a mixed metric was used, the types for each variable as one-letter codes (as in the book, e.g. p.54): \describe{ \item{A}{Asymmetric binary} \item{S}{Symmetric binary} \item{N}{Nominal (factor)} \item{O}{Ordinal (ordered factor)} \item{I}{Interval scaled (numeric)} \item{T}{raTio to be log transformed (positive numeric)} }.} } \seealso{ \code{\link{daisy}}, \code{\link{dist}}, \code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}}, \code{\link{agnes}}, \code{\link{diana}}. } %\examples{} --> ./daisy.Rd \keyword{cluster} cluster/man/diana.Rd0000644000176000001440000001571511711740461014114 0ustar ripleyusers\name{diana} \title{DIvisive ANAlysis Clustering} \alias{diana} \alias{diana.object} \description{ Computes a divisive hierarchical clustering of the dataset returning an object of class \code{diana}. } \usage{ diana(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, keep.diss = n < 100, keep.data = !diss, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix or object, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (\code{\link{NA}}s) \emph{are} allowed. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are \emph{not} allowed. } \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} will be considered as a dissimilarity matrix. If FALSE, then \code{x} will be considered as a matrix of observations by variables. } \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations.\cr The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{stand}{logical; if true, the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored.} \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give much smaller results and hence even save memory allocation \emph{time}.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"diana"} representing the clustering; this class has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}. Further, the class \code{"diana"} inherits from \code{"twins"}. Therefore, the generic function \code{\link{pltree}} can be used on a \code{diana} object, and \code{\link{as.hclust}} and \code{\link{as.dendrogram}} methods are available. A legitimate \code{diana} object is a list with the following components: \item{order}{ a vector giving a permutation of the original observations to allow for plotting, in the sense that the branches of a clustering tree will not cross. } \item{order.lab}{ a vector similar to \code{order}, but containing observation labels instead of observation numbers. This component is only available if the original observations were labelled. } \item{height}{a vector with the diameters of the clusters prior to splitting. } \item{dc}{ the divisive coefficient, measuring the clustering structure of the dataset. For each observation i, denote by \eqn{d(i)} the diameter of the last cluster to which it belongs (before being split off as a single observation), divided by the diameter of the whole dataset. The \code{dc} is the average of all \eqn{1 - d(i)}. It can also be seen as the average width (or the percentage filled) of the banner plot. Because \code{dc} grows with the number of observations, this measure should not be used to compare datasets of very different sizes. } \item{merge}{ an (n-1) by 2 matrix, where n is the number of observations. Row i of \code{merge} describes the split at step n-i of the clustering. If a number \eqn{j} in row r is negative, then the single observation \eqn{|j|} is split off at stage n-r. If j is positive, then the cluster that will be splitted at stage n-j (described by row j), is split off at stage n-r. } \item{diss}{ an object of class \code{"dissimilarity"}, representing the total dissimilarity matrix of the dataset. } \item{data}{ a matrix containing the original or standardized measurements, depending on the \code{stand} option of the function \code{agnes}. If a dissimilarity matrix was given as input structure, then this component is not available. } } \details{ \code{diana} is fully described in chapter 6 of Kaufman and Rousseeuw (1990). It is probably unique in computing a divisive hierarchy, whereas most other software for hierarchical clustering is agglomerative. Moreover, \code{diana} provides (a) the divisive coefficient (see \code{diana.object}) which measures the amount of clustering structure found; and (b) the banner, a novel graphical display (see \code{plot.diana}). The \code{diana}-algorithm constructs a hierarchy of clusterings, starting with one large cluster containing all n observations. Clusters are divided until each cluster contains only a single observation.\cr At each stage, the cluster with the largest diameter is selected. (The diameter of a cluster is the largest dissimilarity between any two of its observations.)\cr To divide the selected cluster, the algorithm first looks for its most disparate observation (i.e., which has the largest average dissimilarity to the other observations of the selected cluster). This observation initiates the "splinter group". In subsequent steps, the algorithm reassigns observations that are closer to the "splinter group" than to the "old party". The result is a division of the selected cluster into two new clusters. } \seealso{ \code{\link{agnes}} also for background and references; \code{\link{cutree}} (and \code{\link{as.hclust}}) for grouping extraction; \code{\link{daisy}}, \code{\link{dist}}, \code{\link{plot.diana}}, \code{\link{twins.object}}. } \examples{ data(votes.repub) dv <- diana(votes.repub, metric = "manhattan", stand = TRUE) print(dv) plot(dv) ## Cut into 2 groups: dv2 <- cutree(as.hclust(dv), k = 2) table(dv2) # 8 and 42 group members rownames(votes.repub)[dv2 == 1] ## For two groups, does the metric matter ? dv0 <- diana(votes.repub, stand = TRUE) # default: Euclidean dv.2 <- cutree(as.hclust(dv0), k = 2) table(dv2 == dv.2)## identical group assignments str(as.dendrogram(dv0)) # {via as.dendrogram.twins() method} data(agriculture) ## Plot similar to Figure 8 in ref \dontrun{plot(diana(agriculture), ask = TRUE)} \dontshow{plot(diana(agriculture))} } \keyword{cluster} cluster/man/daisy.Rd0000644000176000001440000002310311410630600014124 0ustar ripleyusers\name{daisy} \alias{daisy} \title{Dissimilarity Matrix Calculation} \concept{Gower's formula} \concept{Gower's distance} \concept{Gower's coefficient}% FIXME: see ../TODO-MM \description{ Compute all the pairwise dissimilarities (distances) between observations in the data set. The original variables may be of mixed types. In that case, or whenever \code{metric = "gower"} is set, a generalization of Gower's formula is used, see \sQuote{Details} below. } \usage{ daisy(x, metric = c("euclidean", "manhattan", "gower"), stand = FALSE, type = list(), weights = rep.int(1, p)) } \arguments{ \item{x}{ numeric matrix or data frame, of dimension \eqn{n\times p}{n x p}, say. Dissimilarities will be computed between the rows of \code{x}. Columns of mode \code{numeric} (i.e. all columns when \code{x} is a matrix) will be recognized as interval scaled variables, columns of class \code{factor} will be recognized as nominal variables, and columns of class \code{ordered} will be recognized as ordinal variables. Other variable types should be specified with the \code{type} argument. Missing values (\code{\link{NA}}s) are allowed. } \item{metric}{ character string specifying the metric to be used. The currently available options are \code{"euclidean"} (the default), \code{"manhattan"} and \code{"gower"}.\cr Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. \dQuote{Gower's distance} is chosen by metric \code{"gower"} or automatically if some columns of \code{x} are not numeric. Also known as Gower's coefficient (1971), expressed as a dissimilarity, this implies that a particular standardisation will be applied to each variable, and the \dQuote{distance} between two units is the sum of all the variable-specific distances, see the details section. } \item{stand}{logical flag: if TRUE, then the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If not all columns of \code{x} are numeric, \code{stand} will be ignored and Gower's standardization (based on the \code{\link{range}}) will be applied in any case, see argument \code{metric}, above, and the details section. } \item{type}{list for specifying some (or all) of the types of the variables (columns) in \code{x}. The list may contain the following components: \code{"ordratio"} (ratio scaled variables to be treated as ordinal variables), \code{"logratio"} (ratio scaled variables that must be logarithmically transformed), \code{"asymm"} (asymmetric binary) and \code{"symm"} (symmetric binary variables). Each component's value is a vector, containing the names or the numbers of the corresponding columns of \code{x}. Variables not mentioned in the \code{type} list are interpreted as usual (see argument \code{x}). } \item{weights}{an optional numeric vector of length \eqn{p}(=\code{ncol(x)}); to be used in \dQuote{case 2} (mixed variables, or \code{metric = "gower"}), specifying a weight for each variable (\code{x[,k]}) instead of \eqn{1} in Gower's original formula.} }% end{arg..} \value{ an object of class \code{"dissimilarity"} containing the dissimilarities among the rows of \code{x}. This is typically the input for the functions \code{pam}, \code{fanny}, \code{agnes} or \code{diana}. For more details, see \code{\link{dissimilarity.object}}. } \details{ The original version of \code{daisy} is fully described in chapter 1 of Kaufman and Rousseeuw (1990). Compared to \code{\link{dist}} whose input must be numeric variables, the main feature of \code{daisy} is its ability to handle other variable types as well (e.g. nominal, ordinal, (a)symmetric binary) even when different types occur in the same data set. The handling of nominal, ordinal, and (a)symmetric binary data is achieved by using the general dissimilarity coefficient of Gower (1971). If \code{x} contains any columns of these data-types, both arguments \code{metric} and \code{stand} will be ignored and Gower's coefficient will be used as the metric. This can also be activated for purely numeric data by \code{metric = "gower"}. With that, each variable (column) is first standardized by dividing each entry by the range of the corresponding variable, after subtracting the minimum value; consequently the rescaled variable has range \eqn{[0,1]}, exactly. %% FIXME: Use something like "gowerRob" which uses *robust* rescaling Note that setting the type to \code{symm} (symmetric binary) gives the same dissimilarities as using \emph{nominal} (which is chosen for non-ordered factors) only when no missing values are present, and more efficiently. Note that \code{daisy} now gives a warning when 2-valued numerical variables do not have an explicit \code{type} specified, because the reference authors recommend to consider using \code{"asymm"}. In the \code{daisy} algorithm, missing values in a row of x are not included in the dissimilarities involving that row. There are two main cases, \enumerate{ \item If all variables are interval scaled (and \code{metric} is \emph{not} \code{"gower"}), the metric is "euclidean", and \eqn{n_g} is the number of columns in which neither row i and j have NAs, then the dissimilarity d(i,j) returned is \eqn{\sqrt{p/n_g}}{sqrt(p/n_g)} (\eqn{p=}ncol(x)) times the Euclidean distance between the two vectors of length \eqn{n_g} shortened to exclude NAs. The rule is similar for the "manhattan" metric, except that the coefficient is \eqn{p/n_g}. If \eqn{n_g = 0}, the dissimilarity is NA. \item When some variables have a type other than interval scaled, or if \code{metric = "gower"} is specified, the dissimilarity between two rows is the weighted mean of the contributions of each variable. Specifically, \deqn{d_{ij} = d(i,j) = \frac{\sum_{k=1}^p w_k \delta_{ij}^{(k)} d_{ij}^{(k)}}{ \sum_{k=1}^p w_k \delta_{ij}^{(k)}}. }{d_ij = d(i,j) = sum(k=1:p; w_k delta(ij;k) d(ij,k)) / sum(k=1:p; w_k delta(ij;k)).} In other words, \eqn{d_{ij}}{d_ij} is a weighted mean of \eqn{d_{ij}^{(k)}}{d(ij,k)} with weights \eqn{w_k \delta_{ij}^{(k)}}{w_k delta(ij;k)}, where \eqn{w_k}\code{= weigths[k]}, \eqn{\delta_{ij}^{(k)}}{delta(ij;k)} is 0 or 1, and \eqn{d_{ij}^{(k)}}{d(ij,k)}, the k-th variable contribution to the total distance, is a distance between \code{x[i,k]} and \code{x[j,k]}, see below. The 0-1 weight \eqn{\delta_{ij}^{(k)}}{delta(ij;k)} becomes zero when the variable \code{x[,k]} is missing in either or both rows (i and j), or when the variable is asymmetric binary and both values are zero. In all other situations it is 1. The contribution \eqn{d_{ij}^{(k)}}{d(ij,k)} of a nominal or binary variable to the total dissimilarity is 0 if both values are equal, 1 otherwise. The contribution of other variables is the absolute difference of both values, divided by the total range of that variable. Note that \dQuote{standard scoring} is applied to ordinal variables, i.e., they are replaced by their integer codes \code{1:K}. Note that this is not the same as using their ranks (since there typically are ties). % contrary to what Kaufman & Rousseeuw write in their book, and % the original help page. As the individual contributions \eqn{d_{ij}^{(k)}}{d(ij,k)} are in \eqn{[0,1]}, the dissimilarity \eqn{d_{ij}}{d_ij} will remain in this range. If all weights \eqn{w_k \delta_{ij}^{(k)}}{w_k delta(ij;k)} are zero, the dissimilarity is set to \code{\link{NA}}. } } \section{Background}{ Dissimilarities are used as inputs to cluster analysis and multidimensional scaling. The choice of metric may have a large impact. } \references{ Gower, J. C. (1971) A general coefficient of similarity and some of its properties, \emph{Biometrics} \bold{27}, 857--874. Kaufman, L. and Rousseeuw, P.J. (1990) \emph{Finding Groups in Data: An Introduction to Cluster Analysis}. Wiley, New York. Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997) Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis} \bold{26}, 17--37. } \author{ Anja Struyf, Mia Hubert, and Peter and Rousseeuw, for the original version. \cr Martin Maechler improved the \code{\link{NA}} handling and \code{type} specification checking, and extended functionality to \code{metric = "gower"} and the optional \code{weights} argument. } \seealso{ \code{\link{dissimilarity.object}}, \code{\link{dist}}, \code{\link{pam}}, \code{\link{fanny}}, \code{\link{clara}}, \code{\link{agnes}}, \code{\link{diana}}. } \examples{ data(agriculture) ## Example 1 in ref: ## Dissimilarities using Euclidean metric and without standardization d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE) d.agr as.matrix(d.agr)[,"DK"] # via as.matrix.dist(.) ## compare with as.matrix(daisy(agriculture, metric = "gower")) data(flower) ## Example 2 in ref summary(dfl1 <- daisy(flower, type = list(asymm = 3))) summary(dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))) ## this failed earlier: summary(dfl3 <- daisy(flower, type = list(asymm = c("V1", "V3"), symm= 2, ordratio= 7, logratio= 8))) } \keyword{cluster} cluster/man/coef.hclust.Rd0000644000176000001440000000442511674345261015260 0ustar ripleyusers\name{coef.hclust} \alias{coefHier} \alias{coef.hclust} \alias{coef.twins} \title{Agglomerative / Divisive Coefficient for 'hclust' Objects} \description{ Computes the \dQuote{agglomerative coefficient} (aka \dQuote{divisive coefficient} for \code{\link{diana}}), measuring the clustering structure of the dataset. For each observation i, denote by \eqn{m(i)} its dissimilarity to the first cluster it is merged with, divided by the dissimilarity of the merger in the final step of the algorithm. The agglomerative coefficient is the average of all \eqn{1 - m(i)}. It can also be seen as the average width (or the percentage filled) of the banner plot. \code{coefHier()} directly interfaces to the underlying C code, and \dQuote{proves} that \emph{only} \code{object$heights} is needed to compute the coefficient. Because it grows with the number of observations, this measure should not be used to compare datasets of very different sizes. } \usage{ coefHier(object) coef.hclust(object, \dots)%-- we export this, on purpose \method{coef}{hclust}(object, \dots) \method{coef}{twins}(object, \dots) } \arguments{ \item{object}{an object of class \code{"hclust"} or \code{"twins"}, i.e., typically the result of \code{\link{hclust}(.)},\code{\link{agnes}(.)}, or \code{\link{diana}(.)}. Since \code{coef.hclust} only uses \code{object$heights}, and \code{object$merge}, \code{object} can be any list-like object with appropriate \code{merge} and \code{heights} components. For \code{coefHier}, even only \code{object$heights} is needed. } \item{\dots}{currently unused potential further arguments} } \value{ a number specifying the \emph{agglomerative} (or \emph{divisive} for \code{diana} objects) coefficient as defined by Kaufman and Rousseeuw, see \code{\link{agnes.object} $ ac} or \code{\link{diana.object} $ dc}. } \examples{ data(agriculture) aa <- agnes(agriculture) coef(aa) # really just extracts aa$ac coef(as.hclust(aa))# recomputes coefHier(aa) # ditto \dontshow{ stopifnot(all.equal(coef(aa), coefHier(aa))) d.a <- dist(agriculture, "manhattan") for (m in c("average", "single", "complete")) stopifnot(all.equal(coef(hclust(d.a, method=m)), coef(agnes (d.a, method=m)), tol=1e-13)) } } \keyword{cluster} cluster/man/cluster-internal.Rd0000644000176000001440000000033507420040427016320 0ustar ripleyusers\name{cluster-internal} \alias{meanabsdev} \title{Internal cluster functions} \description{ Internal cluster functions. } \usage{ meanabsdev(y) } \details{ These are not to be called by the user. } \keyword{internal} cluster/man/clusplot.partition.Rd0000644000176000001440000000542211573377221016715 0ustar ripleyusers\name{clusplot} \alias{clusplot} \alias{clusplot.partition} \title{Bivariate Cluster Plot (of a Partitioning Object)} \description{ Draws a 2-dimensional \dQuote{clusplot} (clustering plot) on the current graphics device. The generic function has a default and a \code{partition} method. } \usage{ clusplot(x, \dots) \method{clusplot}{partition}(x, main = NULL, dist = NULL, \dots) } \arguments{ \item{x}{an \R object, here, specifically an object of class \code{"partition"}, e.g. created by one of the functions \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.} \item{main}{title for the plot; when \code{NULL} (by default), a title is constructed, using \code{x$call}.} \item{dist}{when \code{x} does not have a \code{diss} nor a \code{data} component, e.g., for \code{\link{pam}(dist(*), keep.diss=FALSE)}, \code{dist} must specify the dissimilarity for the clusplot.} \item{\dots}{optional arguments passed to methods, notably the \code{\link{clusplot.default}} method (except for the \code{diss} one) may also be supplied to this function. Many graphical parameters (see \code{\link{par}}) may also be supplied as arguments here.} } \section{Side Effects}{ a 2-dimensional clusplot is created on the current graphics device. } \value{ For the \code{partition} (and \code{default}) method: An invisible list with components \code{Distances} and \code{Shading}, as for \code{\link{clusplot.default}}, see there. } \details{ The \code{clusplot.partition()} method relies on \code{\link{clusplot.default}}. If the clustering algorithms \code{pam}, \code{fanny} and \code{clara} are applied to a data matrix of observations-by-variables then a clusplot of the resulting clustering can always be drawn. When the data matrix contains missing values and the clustering is performed with \code{\link{pam}} or \code{\link{fanny}}, the dissimilarity matrix will be given as input to \code{clusplot}. When the clustering algorithm \code{\link{clara}} was applied to a data matrix with NAs then clusplot will replace the missing values as described in \code{\link{clusplot.default}}, because a dissimilarity matrix is not available. } \seealso{\code{\link{clusplot.default}} for references; \code{\link{partition.object}}, \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{par}}. } \examples{ ## For more, see ?clusplot.default ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) clusplot(pam(x, 2)) ## add noise, and try again : x4 <- cbind(x, rnorm(25), rnorm(25)) clusplot(pam(x4, 2)) } \keyword{cluster} \keyword{hplot} cluster/man/clusplot.default.Rd0000644000176000001440000002650611666514653016343 0ustar ripleyusers\name{clusplot.default} \alias{clusplot.default} \title{Bivariate Cluster Plot (clusplot) Default Method} \description{ Creates a bivariate plot visualizing a partition (clustering) of the data. All observation are represented by points in the plot, using principal components or multidimensional scaling. Around each cluster an ellipse is drawn. } \usage{ \method{clusplot}{default}(x, clus, diss = FALSE, s.x.2d = mkCheckX(x, diss), stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels= 0, plotchar = TRUE, col.p = "dark green", col.txt = col.p, col.clus = if(color) c(2, 4, 6, 3) else 5, cex = 1, cex.txt = cex, span = TRUE, add = FALSE, xlim = NULL, ylim = NULL, main = paste("CLUSPLOT(", deparse(substitute(x)),")"), sub = paste("These two components explain", round(100 * var.dec, digits = 2), "\% of the point variability."), xlab = "Component 1", ylab = "Component 2", verbose = getOption("verbose"), \dots) } \arguments{ \item{x}{matrix or data frame, or dissimilarity matrix, depending on the value of the \code{diss} argument. In case of a matrix (alike), each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (\code{\link{NA}}s) are allowed. They are replaced by the median of the corresponding variable. When some variables or some observations contain only missing values, the function stops with a warning message. In case of a dissimilarity matrix, \code{x} is the output of \code{\link{daisy}} or \code{\link{dist}} or a symmetric matrix. Also, a vector of length \eqn{n*(n-1)/2} is allowed (where \eqn{n} is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are not allowed. } \item{clus}{ a vector of length n representing a clustering of \code{x}. For each observation the vector lists the number or name of the cluster to which it has been assigned. \code{clus} is often the clustering component of the output of \code{\link{pam}}, \code{\link{fanny}} or \code{\link{clara}}.} \item{diss}{ logical indicating if \code{x} will be considered as a dissimilarity matrix or a matrix of observations by variables (see \code{x} arugment above).} \item{s.x.2d}{a \code{\link{list}} with components \code{x} (a \eqn{n \times 2} matrix; typically something like principal components of original data), \code{labs} and \code{var.dec}.} \item{stand}{ logical flag: if true, then the representations of the n observations in the 2-dimensional plot are standardized. } \item{lines}{ integer out of \code{0, 1, 2}, used to obtain an idea of the distances between ellipses. The distance between two ellipses E1 and E2 is measured along the line connecting the centers \eqn{m1} and \eqn{m2} of the two ellipses. In case E1 and E2 overlap on the line through \eqn{m1} and \eqn{m2}, no line is drawn. Otherwise, the result depends on the value of \code{lines}: If \describe{ \item{lines = 0,}{no distance lines will appear on the plot;} \item{lines = 1,}{the line segment between \eqn{m1} and \eqn{m2} is drawn;} \item{lines = 2,}{a line segment between the boundaries of E1 and E2 is drawn (along the line connecting \eqn{m1} and \eqn{m2}).} } } \item{shade}{ logical flag: if TRUE, then the ellipses are shaded in relation to their density. The density is the number of points in the cluster divided by the area of the ellipse. } \item{color}{ logical flag: if TRUE, then the ellipses are colored with respect to their density. With increasing density, the colors are light blue, light green, red and purple. To see these colors on the graphics device, an appropriate color scheme should be selected (we recommend a white background).} \item{labels}{ integer code, currently one of 0,1,2,3,4 and 5. If \describe{ \item{labels= 0,}{no labels are placed in the plot;} \item{labels= 1,}{points and ellipses can be identified in the plot (see \code{\link{identify}});} \item{labels= 2,}{all points and ellipses are labelled in the plot;} \item{labels= 3,}{only the points are labelled in the plot;} \item{labels= 4,}{only the ellipses are labelled in the plot.} \item{labels= 5,}{the ellipses are labelled in the plot, and points can be identified.} } The levels of the vector \code{clus} are taken as labels for the clusters. The labels of the points are the rownames of \code{x} if \code{x} is matrix like. Otherwise (\code{diss = TRUE}), \code{x} is a vector, point labels can be attached to \code{x} as a "Labels" attribute (\code{attr(x,"Labels")}), as is done for the output of \code{\link{daisy}}. A possible \code{\link{names}} attribute of \code{clus} will not be taken into account. } \item{plotchar}{ logical flag: if TRUE, then the plotting symbols differ for points belonging to different clusters. } \item{span}{ logical flag: if TRUE, then each cluster is represented by the ellipse with smallest area containing all its points. (This is a special case of the minimum volume ellipsoid.)\cr If FALSE, the ellipse is based on the mean and covariance matrix of the same points. While this is faster to compute, it often yields a much larger ellipse. There are also some special cases: When a cluster consists of only one point, a tiny circle is drawn around it. When the points of a cluster fall on a straight line, \code{span=FALSE} draws a narrow ellipse around it and \code{span=TRUE} gives the exact line segment. } \item{add}{logical indicating if ellipses (and labels if \code{labels} is true) should be \emph{added} to an already existing plot. If false, neither a \code{\link{title}} or sub title, see \code{sub}, is written.} \item{col.p}{color code(s) used for the observation points.} \item{col.txt}{color code(s) used for the labels (if \code{labels >= 2}).} \item{col.clus}{color code for the ellipses (and their labels); only one if color is false (as per default).} \item{cex, cex.txt}{character \bold{ex}pansion (size), for the point symbols and point labels, respectively.} \item{xlim, ylim}{numeric vectors of length 2, giving the x- and y- ranges as in \code{\link{plot.default}}.} \item{main}{main title for the plot; by default, one is constructed.} \item{sub}{sub title for the plot; by default, one is constructed.} \item{xlab, ylab}{x- and y- axis labels for the plot, with defaults.} \item{verbose}{a logical indicating, if there should be extra diagnostic output; mainly for \sQuote{debugging}.} \item{\dots}{Further graphical parameters may also be supplied, see \code{\link{par}}.} }% End Arguments \value{ An invisible list with components: \item{Distances}{ When \code{lines} is 1 or 2 we optain a k by k matrix (k is the number of clusters). The element in \code{[i,j]} is the distance between ellipse i and ellipse j.\cr If \code{lines = 0}, then the value of this component is \code{NA}. } \item{Shading}{ A vector of length k (where k is the number of clusters), containing the amount of shading per cluster. Let y be a vector where element i is the ratio between the number of points in cluster i and the area of ellipse i. When the cluster i is a line segment, y[i] and the density of the cluster are set to \code{NA}. Let z be the sum of all the elements of y without the NAs. Then we put shading = y/z *37 + 3 . } } \section{Side Effects}{ a visual display of the clustering is plotted on the current graphics device. } \details{ \code{clusplot} uses the functions \code{\link{princomp}} and \code{\link{cmdscale}}. These functions are data reduction techniques. They will represent the data in a bivariate plot. Ellipses are then drawn to indicate the clusters. The further layout of the plot is determined by the optional arguments. } \note{ When we have 4 or fewer clusters, then the \code{color=TRUE} gives every cluster a different color. When there are more than 4 clusters, clusplot uses the function \code{\link{pam}} to cluster the densities into 4 groups such that ellipses with nearly the same density get the same color. \code{col.clus} specifies the colors used. The \code{col.p} and \code{col.txt} arguments, added for \R, are recycled to have length the number of observations. If \code{col.p} has more than one value, using \code{color = TRUE} can be confusing because of a mix of point and ellipse colors. } \references{ Pison, G., Struyf, A. and Rousseeuw, P.J. (1999) Displaying a Clustering with CLUSPLOT, \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr A version of this is available as technical report from \url{http://www.agoras.ua.ac.be/abstract/Disclu99.htm} Kaufman, L. and Rousseeuw, P.J. (1990). \emph{Finding Groups in Data: An Introduction to Cluster Analysis.} Wiley, New York. Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis}, \bold{26}, 17-37. } \seealso{ \code{\link{princomp}}, \code{\link{cmdscale}}, \code{\link{pam}}, \code{\link{clara}}, \code{\link{daisy}}, \code{\link{par}}, \code{\link{identify}}, \code{\link[MASS]{cov.mve}}, \code{\link{clusplot.partition}}. } \examples{ ## plotting votes.diss(dissimilarity) in a bivariate plot and ## partitioning into 2 clusters data(votes.repub) votes.diss <- daisy(votes.repub) pamv <- pam(votes.diss, 2, diss = TRUE) clusplot(pamv, shade = TRUE) ## is the same as votes.clus <- pamv$clustering clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE) clusplot(pamv, col.p = votes.clus, labels = 4)# color points and label ellipses # "simple" cheap ellipses: larger than minimum volume: # here they are *added* to the previous plot: clusplot(pamv, span = FALSE, add = TRUE, col.clus = "midnightblue") ## a work-around for setting a small label size: clusplot(votes.diss, votes.clus, diss = TRUE) op <- par(new=TRUE, cex = 0.6) clusplot(votes.diss, votes.clus, diss = TRUE, axes=FALSE,ann=FALSE, sub="", col.p=NA, col.txt="dark green", labels=3) par(op) ## MM: This should now be as simple as clusplot(votes.diss, votes.clus, diss = TRUE, labels = 3, cex.txt = 0.6) if(interactive()) { # uses identify() *interactively* : clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE, labels = 1) clusplot(votes.diss, votes.clus, diss = TRUE, labels = 5)# ident. only points } ## plotting iris (data frame) in a 2-dimensional plot and partitioning ## into 3 clusters. data(iris) iris.x <- iris[, 1:4] cl3 <- pam(iris.x, 3)$clustering op <- par(mfrow= c(2,2)) clusplot(iris.x, cl3, color = TRUE) U <- par("usr") ## zoom in : rect(0,-1, 2,1, border = "orange", lwd=2) clusplot(iris.x, cl3, color = TRUE, xlim = c(0,2), ylim = c(-1,1)) box(col="orange",lwd=2); mtext("sub region", font = 4, cex = 2) ## or zoom out : clusplot(iris.x, cl3, color = TRUE, xlim = c(-4,4), ylim = c(-4,4)) mtext("`super' region", font = 4, cex = 2) rect(U[1],U[3], U[2],U[4], lwd=2, lty = 3) # reset graphics par(op) } \keyword{cluster} \keyword{hplot} cluster/man/clusGap.Rd0000644000176000001440000002046112036113605014423 0ustar ripleyusers\name{clusGap} \title{Gap Statistic for Estimating the Number of Clusters} \alias{clusGap} \alias{maxSE} \alias{print.clusGap} \alias{plot.clusGap} \description{ \code{clusGap()} calculates a goodness of clustering measure, the \dQuote{gap} statistic. For each number of clusters \eqn{k}, it compares \eqn{\log(W(k))} with \eqn{E^*[\log(W(k))]} where the latter is defined via bootstrapping, i.e. simulating from a reference distribution. \code{maxSE(f, SE.f)} determines the location of the \bold{maximum} of \code{f}, taking a \dQuote{1-SE rule} into account for the \code{*SE*} methods. The default method \code{"firstSEmax"} looks for the smallest \eqn{k} such that its value \eqn{f(k)} is not more than 1 standard error away from the first local maximum. This is similar but not the same as \code{"Tibs2001SEmax"}, Tibshirani et al's recommendation of determining the number of clusters from the gap statistics and their standard deviations. } \usage{ clusGap(x, FUNcluster, K.max, B = 100, verbose = interactive(), \dots) maxSE(f, SE.f, method = c("firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"), SE.factor = 1) \S3method{print}{clusGap}(x, method = "firstSEmax", SE.factor = 1, \dots) } \arguments{ \item{x}{numeric matrix or \code{\link{data.frame}}.} \item{FUNcluster}{a \code{\link{function}} which accepts as first argument a (data) matrix like \code{x}, second argument, say \eqn{k, k\geq 2}{k, k >= 2}, the number of clusters desired, and returns a \code{\link{list}} with a component named (or shortened to) \code{cluster} which is a vector of length \code{n = nrow(x)} of integers in \code{1:k} determining the clustering or grouping of the \code{n} observations.} \item{K.max}{the maximum number of clusters to consider, must be at least two.} \item{B}{integer, number of Monte Carlo (\dQuote{bootstrap}) samples.} \item{verbose}{integer or logical, determining if \dQuote{progress} output should be printed. The default prints one bit per bootstrap sample.} \item{\dots}{optionally further arguments for \code{FUNcluster()}, see \code{kmeans} example below.} \item{f}{numeric vector of \sQuote{function values}, of length \eqn{K}, whose (\dQuote{1 SE respected}) maximum we want.} \item{SE.f}{numeric vector of length \eqn{K} of standard errors of \code{f}.} \item{method}{character string indicating how the \dQuote{optimal} number of clusters, \eqn{\hat k}{k^}, is computed from the gap statistics (and their standard deviations), or more generally how the location \eqn{\hat k}{k^} of the maximum of \eqn{f_k}{f[k]} should be determined. %% -> ../R/clusGap.R \describe{ \item{\code{"globalmax"}:}{simply corresponds to the global maximum, i.e., is \code{which.max(f)}} \item{\code{"firstmax"}:}{gives the location of the first \emph{local} maximum.} \item{\code{"Tibs2001SEmax"}:}{uses the criterion, Tibshirani et al(2001) proposed: \dQuote{the smallest \eqn{k} such that \eqn{f(k) \ge f(k+1) - s_{k+1}}}. Note that this chooses \eqn{k = 1} when all standard deviations are larger than the differences \eqn{f(k+1) - f(k)}.} \item{\code{"firstSEmax"}:}{location of the first \eqn{f()} value which is not larger than the first \emph{local} maximum minus \code{SE.factor * SE.f[]}, i.e, within an \dQuote{f S.E.} range of that maximum (see also \code{SE.factor}).} \item{\code{"globalSEmax"}:}{(used in Dudoit and Fridlyand (2002), supposedly following Tibshirani's proposition): location of the first \eqn{f()} value which is not larger than the \emph{global} maximum minus \code{SE.factor * SE.f[]}, i.e, within an \dQuote{f S.E.} range of that maximum (see also \code{SE.factor}).} } See the examples for a comparison in a simple case. } \item{SE.factor}{[When \code{method} contains \code{"SE"}] Determining the optimal number of clusters, Tibshirani et al. proposed the \dQuote{1 S.E.}-rule. Using an \code{SE.factor} \eqn{f}, the \dQuote{f S.E.}-rule is used, more generally.} } \details{ The main result \code{$Tab[,"gap"]} of course is from bootstrapping aka Monte Carlo simulation and hence random, or equivalently, depending on the initial random seed (see \code{\link{set.seed}()}). On the other hand, in our experience, using \code{B = 500} gives quite precise results such that the gap plot is basically unchanged after an another run. } \value{ an object of S3 class \code{"clusGap"}, basically a list with components \item{Tab}{a matrix with \code{K.max} rows and 4 columns, named "logW", "E.logW", "gap", and "SE.sim", where \code{gap = E.logW - logW}, and \code{SE.sim} corresponds to the standard error of \code{gap}, \code{SE.sim[k]=}\eqn{s_k}{s[k]}, where \eqn{s_k := \sqrt{1 + 1/B} sd^*(gap_j)}{s[k] := sqrt(1 + 1/B) sd^*(gap[])}, and \eqn{sd^*()} is the standard deviation of the simulated (\dQuote{bootstrapped}) gap values. } \item{n}{number of observations, i.e., \code{nrow(x)}.} \item{B}{input \code{B}} \item{FUNcluster}{input function \code{FUNcluster}} } \references{ Tibshirani, R., Walther, G. and Hastie, T. (2001). Estimating the number of data clusters via the Gap statistic. \emph{Journal of the Royal Statistical Society B}, \bold{63}, 411--423. Tibshirani, R., Walther, G. and Hastie, T. (2000). Estimating the number of clusters in a dataset via the Gap statistic. Technical Report. Stanford. Per Broberg (2006). SAGx: Statistical Analysis of the GeneChip. R package version 1.9.7.% moved to Bioconductor sometime after 2006 \url{http://home.swipnet.se/pibroberg/expression_hemsida1.html} } \author{ This function is originally based on the functions \code{gap} of (Bioconductor) package \pkg{SAGx} by Per Broberg, \code{gapStat()} from former package \pkg{SLmisc} by Matthias Kohl and ideas from \code{gap()} and its methods of package \pkg{lga} by Justin Harrington. The current implementation is by Martin Maechler. } \seealso{ \code{\link{silhouette}} for a much simpler less sophisticated goodness of clustering measure. \code{\link[fpc]{cluster.stats}()} in package \pkg{fpc} for alternative measures. %\code{\link[SGAx]{gap}} in Bioconductor package \pkg{SGAx}. } \examples{ ### --- maxSE() methods ------------------------------------------- (mets <- eval(formals(maxSE)$method)) fk <- c(2,3,5,4,7,8,5,4) sk <- c(1,1,2,1,1,3,1,1)/2 ## use plot.clusGap(): plot(structure(class="clusGap", list(Tab = cbind(gap=fk, SE.sim=sk)))) ## Note that 'firstmax' and 'globalmax' are always at 3 and 6 : sapply(c(1/4, 1,2,4), function(SEf) sapply(mets, function(M) maxSE(fk, sk, method = M, SE.factor = SEf))) ### --- clusGap() ------------------------------------------------- ## ridiculously nicely separated clusters in 3 D : x <- rbind(matrix(rnorm(150, sd = 0.1), ncol = 3), matrix(rnorm(150, mean = 1, sd = 0.1), ncol = 3), matrix(rnorm(150, mean = 2, sd = 0.1), ncol = 3), matrix(rnorm(150, mean = 3, sd = 0.1), ncol = 3)) ## Slightly faster way to use pam (see below) pam1 <- function(x,k) list(cluster = pam(x,k, cluster.only=TRUE)) doExtras <- cluster:::doExtras() ## or set it explicitly to TRUE for the following if(doExtras) { ## Note we use B = 60 in the following examples to keep them "speedy". ## ---- rather keep the default B = 500 for your analysis! ## note we can pass 'nstart = 20' to kmeans() : gskmn <- clusGap(x, FUN = kmeans, nstart = 20, K.max = 8, B = 60) gskmn #-> its print() method plot(gskmn, main = "clusGap(., FUN = kmeans, n.start=20, B= 60)") set.seed(12); system.time( gsPam0 <- clusGap(x, FUN = pam, K.max = 8, B = 60) ) set.seed(12); system.time( gsPam1 <- clusGap(x, FUN = pam1, K.max = 8, B = 60) ) ## and show that it gives the same: stopifnot(identical(gsPam1[-4], gsPam0[-4])) gsPam1 print(gsPam1, method="globalSEmax") print(gsPam1, method="globalmax") }% end {doExtras} gs.pam.RU <- clusGap(ruspini, FUN = pam1, K.max = 8, B = 60) gs.pam.RU plot(gs.pam.RU, main = "Gap statistic for the 'ruspini' data") mtext("k = 4 is best .. and k = 5 pretty close") \donttest{## This takes a minute.. ## No clustering ==> k = 1 ("one cluster") should be optimal: Z <- matrix(rnorm(256*3), 256,3) gsP.Z <- clusGap(Z, FUN = pam1, K.max = 8, B = 200) plot(gsP.Z) gsP.Z }%end{dont..} } \keyword{cluster} cluster/man/clara.object.Rd0000644000176000001440000000505210500041124015341 0ustar ripleyusers\name{clara.object} \alias{clara.object} \title{Clustering Large Applications (CLARA) Object} \description{ The objects of class \code{"clara"} represent a partitioning of a large dataset into clusters and are typically returned from \code{\link{clara}}. } \section{Methods, Inheritance}{ The \code{"clara"} class has methods for the following generic functions: \code{print}, \code{summary}. The class \code{"clara"} inherits from \code{"partition"}. Therefore, the generic functions \code{plot} and \code{clusplot} can be used on a \code{clara} object. } \value{ A legitimate \code{clara} object is a list with the following components: \item{sample}{ labels or case numbers of the observations in the best sample, that is, the sample used by the \code{clara} algorithm for the final partition.} \item{medoids}{the medoids or representative objects of the clusters. It is a matrix with in each row the coordinates of one medoid. Possibly \code{NULL}, namely when the object resulted from \code{clara(*, medoids.x=FALSE)}. Use the following \code{i.med} in that case.} \item{i.med}{ the \emph{indices} of the \code{medoids} above: \code{medoids <- x[i.med,]} where \code{x} is the original data matrix in \code{clara(x,*)}.} \item{clustering}{the clustering vector, see \code{\link{partition.object}}.} \item{objective}{the objective function for the final clustering of the entire dataset.} \item{clusinfo}{ matrix, each row gives numerical information for one cluster. These are the cardinality of the cluster (number of observations), the maximal and average dissimilarity between the observations in the cluster and the cluster's medoid. %% FIXME: Now differs from pam.object.Rd: The last column is the maximal dissimilarity between the observations in the cluster and the cluster's medoid, divided by the minimal dissimilarity between the cluster's medoid and the medoid of any other cluster. If this ratio is small, the cluster is well-separated from the other clusters. } \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.} \item{silinfo}{list with silhouette width information for the best sample, see \code{\link{partition.object}}.} \item{call}{generating call, see \code{\link{partition.object}}.} \item{data}{matrix, possibibly standardized, or NULL, see \code{\link{partition.object}}.} } \seealso{ \code{\link{clara}}, \code{\link{dissimilarity.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \keyword{cluster} cluster/man/clara.Rd0000644000176000001440000001740211573376173014130 0ustar ripleyusers\name{clara} \alias{clara} \title{Clustering Large Applications} \description{ Computes a \code{"clara"} object, a list representing a clustering of the data into \code{k} clusters. } \usage{ clara(x, k, metric = "euclidean", stand = FALSE, samples = 5, sampsize = min(n, 40 + 2 * k), trace = 0, medoids.x = TRUE, keep.data = medoids.x, rngR = FALSE, pamLike = FALSE) } \arguments{ \item{x}{ data matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (NAs) are allowed.} \item{k}{integer, the number of clusters. It is required that \eqn{0 < k < n} where \eqn{n} is the number of observations (i.e., n = \code{nrow(x)}).} \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations. The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. } \item{stand}{logical, indicating if the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. } \item{samples}{integer, number of samples to be drawn from the dataset. The default, \code{5}, is rather small for historical (and now back compatibility) reasons and we recommend to set \code{samples} an order of magnitude larger. } \item{sampsize}{integer, number of observations in each sample. \code{sampsize} should be higher than the number of clusters (\code{k}) and at most the number of observations (n = \code{nrow(x)}).} \item{trace}{integer indicating a \emph{trace level} for diagnostic output during the algorithm.} \item{medoids.x}{logical indicating if the medoids should be returned, identically to some rows of the input data \code{x}. If \code{FALSE}, \code{keep.data} must be false as well, and the medoid indices, i.e., row numbers of the medoids will still be returned (\code{i.med} component), and the algorithm saves space by needing one copy less of \code{x}.} \item{keep.data}{logical indicating if the (\emph{scaled} if \code{stand} is true) data should be kept in the result. % (\code{keepdata} is equivalent to \code{keep.data} where the former % is deprecated.) Setting this to \code{FALSE} saves memory (and hence time), but disables \code{\link{clusplot}()}ing of the result. Use \code{medoids.x = FALSE} to save even more memory.} \item{rngR}{logical indicating if \R's random number generator should be used instead of the primitive clara()-builtin one. If true, this also means that each call to \code{clara()} returns a different result -- though only slightly different in good situations.} \item{pamLike}{logical indicating if the \dQuote{swap} phase (see \code{\link{pam}}, in C code) should use the same algorithm as \code{\link{pam}()}. Note that from Kaufman and Rousseeuw's description this \emph{should} have been true always, but as the original Fortran code and the subsequent port to C has always contained a small one-letter change (a typo according to Martin Maechler) with respect to PAM, the default, \code{pamLike = FALSE} has been chosen to remain back compatible rather than \dQuote{PAM compatible}.} } \value{ an object of class \code{"clara"} representing the clustering. See \code{\link{clara.object}} for details. } \details{ \code{clara} is fully described in chapter 3 of Kaufman and Rousseeuw (1990). Compared to other partitioning methods such as \code{pam}, it can deal with much larger datasets. Internally, this is achieved by considering sub-datasets of fixed size (\code{sampsize}) such that the time and storage requirements become linear in \eqn{n} rather than quadratic. Each sub-dataset is partitioned into \code{k} clusters using the same algorithm as in \code{\link{pam}}.\cr Once \code{k} representative objects have been selected from the sub-dataset, each observation of the entire dataset is assigned to the nearest medoid. The mean (equivalent to the sum) of the dissimilarities of the observations to their closest medoid is used as a measure of the quality of the clustering. The sub-dataset for which the mean (or sum) is minimal, is retained. A further analysis is carried out on the final partition. Each sub-dataset is forced to contain the medoids obtained from the best sub-dataset until then. Randomly drawn observations are added to this set until \code{sampsize} has been reached. } \note{ %% mostly by Martin Maechler : By default, the random sampling is implemented with a \emph{very} simple scheme (with period \eqn{2^{16} = 65536}) inside the Fortran code, independently of \R's random number generation, and as a matter of fact, deterministically. Alternatively, we recommend setting \code{rngR = TRUE} which uses \R's random number generators. Then, \code{clara()} results are made reproducible typically by using \code{\link{set.seed}()} before calling \code{clara}. The storage requirement of \code{clara} computation (for small \code{k}) is about \eqn{O(n \times p) + O(j^2)}{O(n * p) + O(j^2)} where \eqn{j = \code{sampsize}}, and \eqn{(n,p) = \code{dim(x)}}. The CPU computing time (again assuming small \code{k}) is about \eqn{O(n \times p \times j^2 \times N)}{O(n * p * j^2 * N)}, where \eqn{N = \code{samples}}. For \dQuote{small} datasets, the function \code{\link{pam}} can be used directly. What can be considered \emph{small}, is really a function of available computing power, both memory (RAM) and speed. Originally (1990), \dQuote{small} meant less than 100 observations; in 1997, the authors said \emph{\dQuote{small (say with fewer than 200 observations)}}; as of 2006, you can use \code{\link{pam}} with several thousand observations. } \author{ Kaufman and Rousseeuw (see \code{\link{agnes}}), originally. All arguments from \code{trace} on, and most \R documentation and all tests by Martin Maechler. } \seealso{ \code{\link{agnes}} for background and references; \code{\link{clara.object}}, \code{\link{pam}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \examples{ ## generate 500 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(200,0,8), rnorm(200,0,8)), cbind(rnorm(300,50,8), rnorm(300,50,8))) clarax <- clara(x, 2, samples=50) clarax clarax$clusinfo ## using pamLike=TRUE gives the same (apart from the 'call'): all.equal(clarax[-8], clara(x, 2, samples=50, pamLike = TRUE)[-8]) plot(clarax) ## `xclara' is an artificial data set with 3 clusters of 1000 bivariate ## objects each. data(xclara) (clx3 <- clara(xclara, 3)) ## "better" number of samples cl.3 <- clara(xclara, 3, samples=100) ## but that did not change the result here: stopifnot(cl.3$clustering == clx3$clustering) ## Plot similar to Figure 5 in Struyf et al (1996) \dontrun{plot(clx3, ask = TRUE)} \dontshow{plot(clx3)} ## Try 100 times *different* random samples -- for reliability: nSim <- 100 nCl <- 3 # = no.classes set.seed(421)# (reproducibility) cl <- matrix(NA,nrow(xclara), nSim) for(i in 1:nSim) cl[,i] <- clara(xclara, nCl, medoids.x = FALSE, rngR = TRUE)$cluster tcl <- apply(cl,1, tabulate, nbins = nCl) ## those that are not always in same cluster (5 out of 3000 for this seed): (iDoubt <- which(apply(tcl,2, function(n) all(n < nSim)))) if(length(iDoubt)) { # (not for all seeds) tabD <- tcl[,iDoubt, drop=FALSE] dimnames(tabD) <- list(cluster = paste(1:nCl), obs = format(iDoubt)) t(tabD) # how many times in which clusters } } \keyword{cluster} cluster/man/chorSub.Rd0000644000176000001440000000165610417224251014440 0ustar ripleyusers\name{chorSub} \alias{chorSub} \docType{data} \title{Subset of C-horizon of Kola Data} \description{ This is a small rounded subset of the C-horizon data \code{\link[mvoutlier]{chorizon}} from package \pkg{mvoutlier}. } \usage{data(chorSub)} \format{ A data frame with 61 observations on 10 variables. The variables contain scaled concentrations of chemical elements. } \details{ This data set was produced from \code{chorizon} via these statements: \preformatted{ data(chorizon, package = "mvoutlier") chorSub <- round(100*scale(chorizon[,101:110]))[190:250,] storage.mode(chorSub) <- "integer" colnames(chorSub) <- gsub("_.*", '', colnames(chorSub)) } } \source{Kola Project (1993-1998) } \seealso{ \code{\link[mvoutlier]{chorizon}} in package \pkg{mvoutlier} and other Kola data in the same package. } \examples{ data(chorSub) summary(chorSub) pairs(chorSub, gap= .1)# some outliers } \keyword{datasets} cluster/man/bannerplot.Rd0000644000176000001440000000575210367701315015205 0ustar ripleyusers\name{bannerplot} \alias{bannerplot} \title{Plot Banner (of Hierarchical Clustering)} \description{ Draws a \dQuote{banner}, i.e. basically a horizontal \code{\link{barplot}} visualizing the (agglomerative or divisive) hierarchical clustering or an other binary dendrogram structure. } \usage{ bannerplot(x, w = rev(x$height), fromLeft = TRUE, main=NULL, sub=NULL, xlab = "Height", adj = 0, col = c(2, 0), border = 0, axes = TRUE, frame.plot = axes, rev.xax = !fromLeft, xax.pretty = TRUE, labels = NULL, nmax.lab = 35, max.strlen = 5, yax.do = axes && length(x$order) <= nmax.lab, yaxRight = fromLeft, y.mar = 2.4 + max.strlen/2.5, \dots) } \arguments{ \item{x}{a list with components \code{order}, \code{order.lab} and \code{height} when \code{w}, the next argument is not specified.} \item{w}{non-negative numeric vector of bar widths.} \item{fromLeft}{logical, indicating if the banner is from the left or not.} \item{main,sub}{main and sub titles, see \code{\link{title}}.} \item{xlab}{x axis label (with \sQuote{correct} default e.g. for \code{plot.agnes}).} \item{adj}{passed to \code{\link{title}(main,sub)} for string adjustment.} \item{col}{vector of length 2, for two horizontal segments.} \item{border}{color for bar border; now defaults to background (no border).} \item{axes}{logical indicating if axes (and labels) should be drawn at all.} \item{frame.plot}{logical indicating the banner should be framed; mainly used when \code{border = 0} (as per default).} \item{rev.xax}{logical indicating if the x axis should be reversed (as in \code{plot.diana}).} \item{xax.pretty}{logical or integer indicating if \code{\link{pretty}()} should be used for the x axis. \code{xax.pretty = FALSE} is mainly for back compatibility.} \item{labels}{labels to use on y-axis; the default is constructed from \code{x}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labelling the banner plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in banner plot labeling.} \item{yax.do}{logical indicating if a y axis and banner labels should be drawn.} \item{yaxRight}{logical indicating if the y axis is on the right or left.} \item{y.mar}{positive number specifying the margin width to use when banners are labeled (along a y-axis). The default adapts to the string width and optimally would also dependend on the font.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \author{Martin Maechler (from original code of Kaufman and Rousseeuw).} \note{This is mainly a utility called from \code{\link{plot.agnes}}, \code{\link{plot.diana}} and \code{\link{plot.mona}}. }% also serves as \seealso{*} \examples{ data(agriculture) bannerplot(agnes(agriculture), main = "Bannerplot") } \keyword{hplot} \keyword{cluster} \keyword{utilities} cluster/man/animals.Rd0000644000176000001440000000204211711740461014451 0ustar ripleyusers\name{animals} \alias{animals} \title{Attributes of Animals} \usage{data(animals)} \description{ This data set considers 6 binary attributes for 20 animals. } \format{ A data frame with 20 observations on 6 variables: \tabular{rll}{ [ , 1] \tab war \tab warm-blooded \cr [ , 2] \tab fly \tab can fly \cr [ , 3] \tab ver \tab vertebrate \cr [ , 4] \tab end \tab endangered \cr [ , 5] \tab gro \tab live in groups \cr [ , 6] \tab hai \tab have hair \cr } All variables are encoded as 1 = `no', 2 = `yes'. } \source{ Leonard Kaufman and Peter J. Rousseeuw (1990): \emph{Finding Groups in Data} (pp 297ff). New York: Wiley. } \details{ This dataset is useful for illustrating monothetic (only a single variable is used for each split) hierarchical clustering. } \references{ see Struyf, Hubert & Rousseeuw (1996), in \code{\link{agnes}}. } \examples{ data(animals) apply(animals,2, table) # simple overview ma <- mona(animals) ma ## Plot similar to Figure 10 in Struyf et al (1996) plot(ma) } \keyword{datasets} cluster/man/agriculture.Rd0000644000176000001440000000274211711740461015362 0ustar ripleyusers\name{agriculture} \alias{agriculture} \title{European Union Agricultural Workforces} \usage{data(agriculture)} \description{ Gross National Product (GNP) per capita and percentage of the population working in agriculture for each country belonging to the European Union in 1993. } \format{ A data frame with 12 observations on 2 variables: \tabular{rlll}{ [ , 1] \tab \code{x} \tab numeric \tab per capita GNP \cr [ , 2] \tab \code{y} \tab numeric \tab percentage in agriculture } The row names of the data frame indicate the countries. } \source{ Eurostat (European Statistical Agency, 1994): \emph{Cijfers en feiten: Een statistisch portret van de Europese Unie}. } \details{ The data seem to show two clusters, the \dQuote{more agricultural} one consisting of Greece, Portugal, Spain, and Ireland. } \seealso{\code{\link{agnes}}, \code{\link{daisy}}, \code{\link{diana}}. } \references{ see those in \code{\link{agnes}}. } \examples{ data(agriculture) ## Compute the dissimilarities using Euclidean metric and without ## standardization daisy(agriculture, metric = "euclidean", stand = FALSE) ## 2nd plot is similar to Figure 3 in Struyf et al (1996) plot(pam(agriculture, 2)) ## Plot similar to Figure 7 in Struyf et al (1996) \dontrun{plot(agnes(agriculture), ask = TRUE)} \dontshow{plot(agnes(agriculture))} ## Plot similar to Figure 8 in Struyf et al (1996) \dontrun{plot(diana(agriculture), ask = TRUE)} \dontshow{plot(diana(agriculture))} } \keyword{datasets} cluster/man/agnes.object.Rd0000644000176000001440000000554711707042531015402 0ustar ripleyusers\name{agnes.object} \alias{agnes.object} \title{Agglomerative Nesting (AGNES) Object} \description{ The objects of class \code{"agnes"} represent an agglomerative hierarchical clustering of a dataset. } \section{GENERATION}{ This class of objects is returned from \code{\link{agnes}}. } \section{METHODS}{ The \code{"agnes"} class has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}, and \code{\link{as.dendrogram}}. } \section{INHERITANCE}{ The class \code{"agnes"} inherits from \code{"twins"}. Therefore, the generic functions \code{\link{pltree}} and \code{\link{as.hclust}} are available for \code{agnes} objects. After applying \code{as.hclust()}, all \emph{its} methods are available, of course. } \value{ A legitimate \code{agnes} object is a list with the following components: \item{order}{ a vector giving a permutation of the original observations to allow for plotting, in the sense that the branches of a clustering tree will not cross.} \item{order.lab}{ a vector similar to \code{order}, but containing observation labels instead of observation numbers. This component is only available if the original observations were labelled. } \item{height}{ a vector with the distances between merging clusters at the successive stages. } \item{ac}{ the agglomerative coefficient, measuring the clustering structure of the dataset. For each observation i, denote by m(i) its dissimilarity to the first cluster it is merged with, divided by the dissimilarity of the merger in the final step of the algorithm. The \code{ac} is the average of all 1 - m(i). It can also be seen as the average width (or the percentage filled) of the banner plot. Because \code{ac} grows with the number of observations, this measure should not be used to compare datasets of very different sizes. } \item{merge}{ an (n-1) by 2 matrix, where n is the number of observations. Row i of \code{merge} describes the merging of clusters at step i of the clustering. If a number j in the row is negative, then the single observation |j| is merged at this stage. If j is positive, then the merger is with the cluster formed at stage j of the algorithm. } \item{diss}{ an object of class \code{"dissimilarity"} (see \code{\link{dissimilarity.object}}), representing the total dissimilarity matrix of the dataset. } \item{data}{ a matrix containing the original or standardized measurements, depending on the \code{stand} option of the function \code{agnes}. If a dissimilarity matrix was given as input structure, then this component is not available. } } \seealso{ \code{\link{agnes}}, \code{\link{diana}}, \code{\link{as.hclust}}, \code{\link{hclust}}, \code{\link{plot.agnes}}, \code{\link{twins.object}}. } \keyword{cluster} cluster/man/agnes.Rd0000644000176000001440000001736111711740461014134 0ustar ripleyusers\name{agnes} \alias{agnes} \title{Agglomerative Nesting (Hierarchical Clustering)} \concept{UPGMA clustering} \description{ Computes agglomerative hierarchical clustering of the dataset. } \usage{ agnes(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, method = "average", par.method, keep.diss = n < 100, keep.data = !diss) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (NAs) are allowed. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector with length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are not allowed. } \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} is assumed to be a dissimilarity matrix. If FALSE, then \code{x} is treated as a matrix of observations by variables. } \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations. The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{stand}{ logical flag: if TRUE, then the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{method}{ character string defining the clustering method. The six methods implemented are "average" ([unweighted pair-]group average method, UPGMA), "single" (single linkage), "complete" (complete linkage), "ward" (Ward's method), "weighted" (weighted average linkage) and its generalization \code{"flexible"} which uses (a constant version of) the Lance-Williams formula and the \code{par.method} argument. Default is "average". } \item{par.method}{if \code{method == "flexible"}, numeric vector of length 1, 3, or 4, see in the details section. } \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give much smaller results and hence even save memory allocation \emph{time}.} } \value{ an object of class \code{"agnes"} (which extends \code{"twins"}) representing the clustering. See \code{\link{agnes.object}} for details, and methods applicable. } \details{ \code{agnes} is fully described in chapter 5 of Kaufman and Rousseeuw (1990). Compared to other agglomerative clustering methods such as \code{hclust}, \code{agnes} has the following features: (a) it yields the agglomerative coefficient (see \code{\link{agnes.object}}) which measures the amount of clustering structure found; and (b) apart from the usual tree it also provides the banner, a novel graphical display (see \code{\link{plot.agnes}}). The \code{agnes}-algorithm constructs a hierarchy of clusterings.\cr At first, each observation is a small cluster by itself. Clusters are merged until only one large cluster remains which contains all the observations. At each stage the two \emph{nearest} clusters are combined to form one larger cluster. For \code{method="average"}, the distance between two clusters is the average of the dissimilarities between the points in one cluster and the points in the other cluster. \cr In \code{method="single"}, we use the smallest dissimilarity between a point in the first cluster and a point in the second cluster (nearest neighbor method). \cr When \code{method="complete"}, we use the largest dissimilarity between a point in the first cluster and a point in the second cluster (furthest neighbor method). The \code{method = "flexible"} allows (and requires) more details: The Lance-Williams formula specifies how dissimilarities are computed when clusters are agglomerated (equation (32) in K.\&R., p.237). If clusters \eqn{C_1} and \eqn{C_2} are agglomerated into a new cluster, the dissimilarity between their union and another cluster \eqn{Q} is given by \deqn{ D(C_1 \cup C_2, Q) = \alpha_1 * D(C_1, Q) + \alpha_2 * D(C_2, Q) + \beta * D(C_1,C_2) + \gamma * |D(C_1, Q) - D(C_2, Q)|, } where the four coefficients \eqn{(\alpha_1, \alpha_2, \beta, \gamma)} are specified by the vector \code{par.method}: If \code{par.method} is of length 1, say \eqn{= \alpha}, \code{par.method} is extended to give the \dQuote{Flexible Strategy} (K. \& R., p.236 f) with Lance-Williams coefficients \eqn{(\alpha_1 = \alpha_2 = \alpha, \beta = 1 - 2\alpha, \gamma=0)}.\cr If of length 3, \eqn{\gamma = 0} is used. \bold{Care} and expertise is probably needed when using \code{method = "flexible"} particularly for the case when \code{par.method} is specified of longer length than one. The \emph{weighted average} (\code{method="weighted"}) is the same as \code{method="flexible", par.method = 0.5}. } \section{BACKGROUND}{ Cluster analysis divides a dataset into groups (clusters) of observations that are similar to each other. \describe{ \item{Hierarchical methods}{like \code{agnes}, \code{\link{diana}}, and \code{\link{mona}} construct a hierarchy of clusterings, with the number of clusters ranging from one to the number of observations.} \item{Partitioning methods}{like \code{\link{pam}}, \code{\link{clara}}, and \code{\link{fanny}} require that the number of clusters be given by the user.} } } \references{ Kaufman, L. and Rousseeuw, P.J. (1990). \emph{Finding Groups in Data: An Introduction to Cluster Analysis}. Wiley, New York. Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996) Clustering in an Object-Oriented Environment. \emph{Journal of Statistical Software} \bold{1}. \url{http://www.jstatsoft.org/v01/i04} Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37. Lance, G.N., and W.T. Williams (1966). A General Theory of Classifactory Sorting Strategies, I. Hierarchical Systems. \emph{Computer J.} \bold{9}, 373--380. } \seealso{ \code{\link{agnes.object}}, \code{\link{daisy}}, \code{\link{diana}}, \code{\link{dist}}, \code{\link{hclust}}, \code{\link{plot.agnes}}, \code{\link{twins.object}}. } \examples{ data(votes.repub) agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE) agn1 plot(agn1) op <- par(mfrow=c(2,2)) agn2 <- agnes(daisy(votes.repub), diss = TRUE, method = "complete") plot(agn2) agnS <- agnes(votes.repub, method = "flexible", par.meth = 0.6) plot(agnS) par(op) ## Exploring the dendrogram structure (d2 <- as.dendrogram(agn2)) # two main branches d2[[1]] # the first branch d2[[2]] # the 2nd one { 8 + 42 = 50 } d2[[1]][[1]]# first sub-branch of branch 1 .. and shorter form identical(d2[[c(1,1)]], d2[[1]][[1]]) ## a "textual picture" of the dendrogram : str(d2) data(agriculture) ## Plot similar to Figure 7 in ref \dontrun{plot(agnes(agriculture), ask = TRUE)} \dontshow{plot(agnes(agriculture))} } \keyword{cluster} cluster/inst/0000755000176000001440000000000012124335263012741 5ustar ripleyuserscluster/inst/po/0000755000176000001440000000000012124335263013357 5ustar ripleyuserscluster/inst/po/pl/0000755000176000001440000000000012124335263013772 5ustar ripleyuserscluster/inst/po/pl/LC_MESSAGES/0000755000176000001440000000000012124335263015557 5ustar ripleyuserscluster/inst/po/pl/LC_MESSAGES/R-cluster.mo0000644000176000001440000002545512014773330020006 0ustar ripleyusersÞ•a$ƒ,839 mŽ2«9Þ? 5X ?Ž $Î #ó & > )O y C˜ @Ü $ <B ! ¡  × 8õ 4. 4c 9˜ 4Ò < D LT .¡ +Ð ü ;6??vB¶2ù>,,k;˜;Ô6.Gv,’4¿8ô7-5e›8®/ç$5<)rœ0¯àõ$;Wnˆ¡ ¾ ËÖ$ë"&34Z ¬Í Ö/ã1E_7nF¦#í,127:j¥§)¾'è!!ûCC?+ƒ¯EÌ9>LD‹?Ð01A,s -³)áD @P,‘F¾'<-j&†K­AùB;<~7»=ó 1O?47Äü7 9N :ˆ Dà ;!ND!1“!@Å!@"=G"A…"!Ç";é"B%#Eh#D®#>ó#2$8B$K{$&Ç$Mî$-<%j%3{%¯% Æ% ç%'ó%&;&V&t&!‘&³& Ã& Î&&ï&.'3E'>y')¸'(â' ((C.(@r(%³(Ù(Gè(F0)(w)0 )Ñ)Ö)CÜ)C *d*f*7„*0¼*í**+MZ,-U*I2@67DF3 ?'%>C:H1]#KJ/()9_a.$"PWN R< V!;GYB OTQS 0[4=5LAX&\`E8 +^'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'clara(*, keep.data = TRUE)''col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' must be numeric n x p matrix'x' must only have integer codes; must be named list; these are standardized to 0>>>>> funny case in clusplot.default() -- please report!All variables must be binary (factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between whichError in Fortran routine for the spanning ellipsoid,FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of theMissing values were displaced by the median of the corresponding variable(s)NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NAdissNeed either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.a variable was found with all non missing values identical.a variable was found with at least 50% missing values.all variables have at least one missing value.ambiguous clustering methodan object was found with all values missing.at least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}be assigned to a cluster (because of missing values).binary variable(s)clustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 'prob'abilitiesdistinct indices in {1,2, .., n}, n=ellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfor column numbersfull silhouette is only available for results ofhas constant columnshas invalid column namesinvalidinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid typeiterationsmust be in 1:ncol(x)must contain column names or numbersneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument ofno distance can be computed.no points without missing valuesnon.dissomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuespossibly not converged inrank problem??samples, at least one object was found which could notthe memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.treated as interval scaledtypetype$when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyxx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: cluster 1.14.2 Report-Msgid-Bugs-To: bugs.r-project.org POT-Creation-Date: 2012-08-21 22:49 PO-Revision-Date: 2012-08-21 11:03+0100 Last-Translator: Åukasz Daniel Language-Team: Åukasz Daniel Language: pl_PL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2) X-Poedit-SourceCharset: iso-8859-1 'A' musi być macierzÄ… kowariancji p x p okreÅ›lajÄ…cÄ… elipsoidÄ™'B' musi być dodatniÄ… liczbÄ… caÅ‚kowitÄ…'clara(*, keep.data = TRUE)''col.clus' powinien mieć dÅ‚ugość 4, gdy 'color' ma wartość TRUE'dmatrix' nie jest macierzÄ… różnic kompatybilnÄ… z 'x''iniMem.p' musi być nieujemnÄ… maceirzÄ… n x k z rowSums == 1'k' (liczba grup) musi mieÅ›cić siÄ™ w przedziale {1,2, .., n/2 -1}macierz przynależnoÅ›ci 'm' musi być nieujemna z rowSums == 1'maxit' musi być nieujemnÄ… liczbÄ… caÅ‚kowitÄ…'medoids' musi być wartoÅ›ciÄ… NULL lub wektorem'memb.exp' musi być skoÅ„czonÄ… liczbÄ… > 1'n' musi być >= 2'par.method' musi być dÅ‚ugoÅ›ci 1, 3, lub 4'samples' powinno wynosić przynajmniej 1'sampsize' = %d nie powinien być wiÄ™kszy niż liczba obiektów, %d'sampsize' powinien być co najmniej %d = max(2, 1+ liczba grup)'weights' musi być o dÅ‚ugoÅ›ci 'p' (lub 1)'x' jest obiektem klasy 'dist', ale powinien być macierzÄ… lub ramkÄ…'x' musi być liczbowÄ… macierzÄ… n x p'x' musi posiadać tylko kody bÄ™dÄ…ce liczbami caÅ‚kowitymi; musi być nazwanÄ… listÄ…; zostaÅ‚y one ustandaryzowane do zera>>>>> zabawny przypadek w 'clusplot.default()' -- proszÄ™ zgÅ‚osić raport!Wszystkie zmienne muszÄ… być binarne (czynnik z dwoma poziomami)OdlegÅ‚oÅ›ci muszÄ… być wynikiem 'dist' lub macierzy kwadratowej.Każda z losowych próbek zawiera obiekty pomiÄ™dzy którymiBłąd w procedurze Fortran dla elipsoidy obejmujÄ…cej,algorytm FANNY nie uzbieżniÅ‚ siÄ™ w 'maxit' = %d iteracjachDla każdej zBrakujÄ…ce wartoÅ›ci zostaÅ‚y zastÄ…pione przez medianÄ™ odpowiednich zmiennychwartoÅ›ci NA sÄ… niedozwolone w wektorze grupujÄ…cymwartoÅ›ci NA nie sÄ… dozwolone w 'x' typu odlegÅ‚oÅ›ci.wartość NA odmiennoÅ›ciPotrzeba albo różnic 'dist' lub diss.matrix 'dmatrix'Nie wykonano grupowania, wartoÅ›ci NA w macierzy różnicNie wykonano grupowania, wartoÅ›ci NA w macierzy różnic.Nie wykonano grupowania, wyliczono wartoÅ›ci NA w macierzy różnic.Brak poprawnej informacji o sylwetce (czy liczba grup =? 1)Liczba grup 'k' musi zawierać siÄ™ w zbiorze {1,2, .., n-1}; tak wiÄ™c n >= 2Wektor grupujÄ…cy posiada niepoprawnÄ… dÅ‚ugośćLiczba grup powinna wynosić conajmniej 1 oraz co najwyżej n-1.znaleziono zmiennÄ… z identycznymi niebrakujÄ…cymi wartoÅ›ciami.znaleziono zmiennÄ… z co najmniej 50% brakujÄ…cych wartoÅ›ci.wszystkie zmienne majÄ… co najmniej jednÄ… brakujÄ…cÄ… wartość.niejednoznaczna metoda grupowaniaznaleziono obiekt któremu brakowaÅ‚o wszystkich wartoÅ›ci.przynajmniej jedna zmienna binarna posiada wiÄ™cej niż 2 poziomy.przynajmniej jedna zmienna binarna nie posiada 2 różnych poziomów.przynajmniej jedna zmienna binarna posiada wartoÅ›ci poza {0, 1, NA}zostać przypisany do grupy (z powodu brakujÄ…cych wartoÅ›ci).zmienne binarnegrupowane 'x' oraz różnice 'dist' nie sÄ… kompatybilneniektóre wyliczone prawdopodobieÅ„stwa sÄ… ujemne lub wszystkie sÄ… zeramiróżnych indeksów w {1,2, .., n}, n='ellipsoidPoints()' nie zostaÅ‚a jeszcze zaimplementowana dla p >= 3 wymiary.błąd w '.C(cl_pam, *)': niepoprawne 'medID'dla liczb kolumnpeÅ‚na sylwetka jest dostÄ™pna jedynie dla wynikówposiada staÅ‚e kolumnyposiada niepoprawne nazwy kolumnniepoprawnyniepoprawny 'jstop' z '.C(cl_clara,.)':niepoprawny obiekt 'silhouette'niepoprawny obiekt 'twins'niepoprawna metoda grupowanianiepoprawny obiekt podziaÅ‚uniepoprana struktura 'silhouette'niepoprawny typiteracjachmusi być w przedziale 1:ncol(x)musi zawierać nazwy kolumn lub liczbypotrzeba co najmniej 2 obiektów do grupowanianie znaleziono różnic ani danych dla 'clusplot()'nie znaleziono różnic ani danych, ani oryginalnego argumentuodlegÅ‚ość nie może zostać obliczona.brak punktów bez brakujÄ…cych wartoÅ›cinieodmiennepomijanie wartoÅ›ci NAjeden lub wiÄ™cej obiektów zawierajÄ… jedynie wartoÅ›ci brakujÄ…cejeden lub wiÄ™cej zmiennych zawiera jedynie wartoÅ›ci brakujÄ…ceprawdopodobnie nie uzbieżniÅ‚ siÄ™ wproblem rang??próbek, co najmniej jeden obiekt zostaÅ‚ znaleziony, który nie mógÅ‚przynależnoÅ›ci sÄ… bardzo bliskie 1/k. Może zmniejszyć 'memb.exp'?macierz kwadratowa nie jest symetryczna.traktowane jako interwaÅ‚ zostaÅ‚y przeskalowanetypetype$kiedy 'medoids.x' jest FALSE, 'keep.data' musi być również FALSEz mieszanymi zmiennymi, metryka 'gower' jest używana automatyczniex'x' nie jest macierzÄ… danych'x' nie jest ramkÄ… danych ani też macierzÄ… liczbowÄ…'x' nie jest ramkÄ… liczbowÄ… ani też macierzÄ…'x' nie jest liczbÄ…'x' musi być macierzÄ… lub ramkÄ… danych.cluster/inst/po/en@quot/0000755000176000001440000000000012124335263014772 5ustar ripleyuserscluster/inst/po/en@quot/LC_MESSAGES/0000755000176000001440000000000012124335263016557 5ustar ripleyuserscluster/inst/po/en@quot/LC_MESSAGES/R-cluster.mo0000644000176000001440000002453612014772531021007 0ustar ripleyusersÞ•c4‰Lp3q ¥Æ2ã9 ?P 5 ?Æ $ #+ &O v )‡ ± CÐ @ $U <z !· Ù ú  8- 4f 4› 9Ð 4 <? | LŒ .Ù +4;;w6?ÇB2J>},¼;é;%6a.˜Ç,ã48E7~5¶ì8ÿ/8$h5)Ãí01F_$gŒ¨¿Ùò  '$<"a&„4«à ý '/41d–°7¿\÷FT#›¿Úß2å:SU)l'–¾!Ï\ñ7N$†«6ÈAÿCA9…?¿(ÿ'(*P{-"¾GáD)(n@—%Ø$þ#88V44Ä9ù43@h©L¹./5eCl°6É? B@ 2ƒ B¶ ,ù ;&!;b!6ž!.Õ!", "4M"8‚"7»"5ó")#@<#/}#$­#5Ò#)$2$0E$v$‹$¤$(¬$Õ$õ$%*%C% `% m%x%$%"²%&Õ%4ü%1& N&o& x&/…&1µ&ç&'7'lH'Jµ'#($(?(D(:J(:…(À(Â()Ù(')+)!<)2C`!B]Z_WNXK>ROG.#-D3[J%,"IQ:1? '@ LA06$*Y)b /^79S48a<P&HM;5VE +(\F Uc =T'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'clara(*, keep.data = TRUE)''col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' must be numeric n x p matrix'x' must only have integer codes; must be named list; these are standardized to 0>>>>> funny case in clusplot.default() -- please report!All variables must be binary (factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between whichError in Fortran routine for the spanning ellipsoid,FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of theMissing values were displaced by the median of the corresponding variable(s)NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NAdissNeed either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed,No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.a variable was found with all non missing values identical.a variable was found with at least 50% missing values.all variables have at least one missing value.ambiguous clustering methodan object was found with all values missing.at least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}be assigned to a cluster (because of missing values).binary variable(s)clustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 'prob'abilitiesdistinct indices in {1,2, .., n}, n=ellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfor column numbersfull silhouette is only available for results ofhas constant columnshas invalid column namesinvalidinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid typeiterationsmust be in 1:ncol(x)must contain column names or numbersneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument ofno distance can be computed.no points without missing valuesnon.dissomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuespossibly not converged inrank problem??samples, at least one object was found which could notsetting 'logical' variable %s to type 'asymm'setting 'logical' variables %s to type 'asymm'the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.treated as interval scaledtypetype$when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyxx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: R 2.15.1 Report-Msgid-Bugs-To: bugs.r-project.org POT-Creation-Date: 2012-08-21 22:49 PO-Revision-Date: 2012-08-21 22:49 Last-Translator: Automatically generated Language-Team: none MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Language: en Plural-Forms: nplurals=2; plural=(n != 1); ‘A’ must be p x p cov-matrix defining an ellipsoid‘B’ has to be a positive integer'clara(*, keep.data = TRUE)'‘col.clus’ should have length 4 when color is TRUE‘dmatrix’ is not a dissimilarity matrix compatible to ‘x’‘iniMem.p’ must be a nonnegative n * k matrix with rowSums == 1‘k’ (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1‘maxit’ must be non-negative integer‘medoids’ must be NULL or vector of‘memb.exp’ must be a finite number > 1‘n’ must be >= 2‘par.method’ must be of length 1, 3, or 4‘samples’ should be at least 1‘sampsize’ = %d should not be larger than the number of objects, %d‘sampsize’ should be at least %d = max(2, 1+ number of clusters)‘weights’ must be of length p (or 1)‘x’ is a "dist" object, but should be a data matrix or frame‘x’ must be numeric n x p matrix‘x’ must only have integer codes; must be named list; these are standardized to 0>>>>> funny case in clusplot.default() -- please report!All variables must be binary (factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between whichError in Fortran routine for the spanning ellipsoid,FANNY algorithm has not converged in ‘maxit’ = %d iterationsFor each of theMissing values were displaced by the median of the corresponding variable(s)NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like ‘x’.NAdissNeed either a dissimilarity ‘dist’ or diss.matrix ‘dmatrix’No clustering performed,No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No valid silhouette information (#{clusters} =? 1)Number of clusters ‘k’ must be in {1,2, .., n-1}; hence n >= 2The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.a variable was found with all non missing values identical.a variable was found with at least 50% missing values.all variables have at least one missing value.ambiguous clustering methodan object was found with all values missing.at least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}be assigned to a cluster (because of missing values).binary variable(s)clustering ‘x’ and dissimilarity ‘dist’ are incompatiblecomputed some negative or all 0 'prob'abilitiesdistinct indices in {1,2, .., n}, n=ellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfor column numbersfull silhouette is only available for results ofhas constant columnshas invalid column namesinvalidinvalid ‘jstop’ from .C(cl_clara,.):invalid ‘silhouette’ objectinvalid ‘twins’ objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid typeiterationsmust be in 1:ncol(x)must contain column names or numbersneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument ofno distance can be computed.no points without missing valuesnon.dissomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuespossibly not converged inrank problem??samples, at least one object was found which could notsetting ‘logical’ variable %s to type ‘asymm’setting ‘logical’ variables %s to type ‘asymm’the memberships are all very close to 1/k. Maybe decrease ‘memb.exp’ ?the square matrix is not symmetric.treated as interval scaledtypetype$when ‘medoids.x’ is FALSE, ‘keep.data’ must be toowith mixed variables, metric "gower" is used automaticallyxx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.cluster/inst/po/de/0000755000176000001440000000000012124335263013747 5ustar ripleyuserscluster/inst/po/de/LC_MESSAGES/0000755000176000001440000000000012124335263015534 5ustar ripleyuserscluster/inst/po/de/LC_MESSAGES/R-cluster.mo0000644000176000001440000002477012014772531017764 0ustar ripleyusersÞ•a$ƒ,839 mŽ2«9Þ? 5X ?Ž $Î #ó & > )O y C˜ @Ü $ <B ! ¡  × 8õ 4. 4c 9˜ 4Ò < D LT .¡ +Ð ü ;6??vB¶2ù>,,k;˜;Ô6.Gv,’4¿8ô7-5e›8®/ç$5<)rœ0¯àõ$;Wnˆ¡ ¾ ËÖ$ë"&34Z ¬Í Ö/ã1E_7nF¦#í,127:j¥§)¾'è!!‹CGÏ$<AY<›KØ6$O[%«,Ñ&þ%*8"cG†@Î)C9'}'¥Í!íL8\K•<á:CY Q«"ý) JJQ=œAÚN 8k F¤ )ë @!FV!6!5Ô! "0%"7V"BŽ"?Ñ">#P#Ad#G¦#+î#@$.[$Š$9$×$í$ %)%!C%e%‚%ž%½%Ý% ï%û%)&+=&/i&J™&'ä& '-'6'6F'8}'¶'Ñ';î'J*(.u(¤(Ä(È(>Î(9 )G)I),a)-Ž)¼)%Ò)MZ,-U*I2@67DF3 ?'%>C:H1]#KJ/()9_a.$"PWN R< V!;GYB OTQS 0[4=5LAX&\`E8 +^'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'clara(*, keep.data = TRUE)''col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' must be numeric n x p matrix'x' must only have integer codes; must be named list; these are standardized to 0>>>>> funny case in clusplot.default() -- please report!All variables must be binary (factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between whichError in Fortran routine for the spanning ellipsoid,FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of theMissing values were displaced by the median of the corresponding variable(s)NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NAdissNeed either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.a variable was found with all non missing values identical.a variable was found with at least 50% missing values.all variables have at least one missing value.ambiguous clustering methodan object was found with all values missing.at least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}be assigned to a cluster (because of missing values).binary variable(s)clustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 'prob'abilitiesdistinct indices in {1,2, .., n}, n=ellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfor column numbersfull silhouette is only available for results ofhas constant columnshas invalid column namesinvalidinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid typeiterationsmust be in 1:ncol(x)must contain column names or numbersneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument ofno distance can be computed.no points without missing valuesnon.dissomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuespossibly not converged inrank problem??samples, at least one object was found which could notthe memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.treated as interval scaledtypetype$when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyxx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: R 2.14.0 Report-Msgid-Bugs-To: bugs.r-project.org POT-Creation-Date: 2012-08-21 22:49 PO-Revision-Date: 2011-10-16 19:32+0200 Last-Translator: Detlef Steuer Language-Team: R Core Team 1 sein'n' muss >= 2 sein'par.method' muss Länge 1, 3 oder 4 haben'samples' sollte mindestens 1 sein'sampsize' = %d; sollte nicht größer sein als die Zahl der Objekte %d'sampsize' sollte mindestens %d = max(2, 1+ Anzahl Cluster sein)'weights' muss von Länge p (oder 1) sein'x' ist ein "dist"-Objekt, sollte aber Datenmatrix oder -frame sein'x' muss numerische n x p - Matrix sein'x' darf nur ganzahlige Kodes enthalten; muss eine benannte Liste sein; diese sind standardisiert auf 0>>>>> lustige Sache in clusplot.default() -- bitte an den Entwickler senden!Alle Variablen müssen binär sein (Faktor mit 2 Stufen)Distanzen müssen ein Ergebnis von dist oder eine quadratische Matrix sein.Jede der Zufallsstichproben enthält Objekte, zwischen denenFehler im Fortran-Kode für den aufspannenden Ellipsoiden,FANNY Algorithmus ist in 'maxit' = %d Iterationen nicht konvergiertFür jede derFehlende Werte wurden durch den Median der korrespondierenden Variable(n) ersetztNAs im Clustervektor nicht erlaubtNAs nicht erlaubt in dist-ähnlichem 'x'.NAdissBenötige entweder Unähnlichkeitsmatrix 'dist' oder diss.matrix 'dmatrix'Keine Clusterung durchgeführt, NAs in Unähnlichkeitsmatrix.Keine Clusterung durchgeführt. NAs in der Unähnlichkeitsmatrix.Keine Clusterung durchgeführt, NAs in der berechnenten Unähnlichkeitsmatrix.keine gültige Silhouetteninformation (#{clusters} =? 1)Anzahl der Cluster 'k' muss auch {1, 2, ..., n-1} sein; deshalb n >= 2Der Clustervektor hat eine falsche LängeDie Anzhal der Cluster sollte mindestens 1, hächstens n-1 sein.Variable gefunden, bei der alle nicht fehlenden Werten identisch sind.Variable mit mindestens 50% fehlenden Werten gefunden.Alle Variablen haben mindestens einen fehlenden Wert.zweideutige ClustermethodeObjekt gefunden bei dem alle Werte fehlend sindmindestens eine binäre Variable hat mehr als 2 Stufen.mindestens eine binäre Variable hat keine 2 verschiedenen Stufen.mindestens eine binäre Variable hat Werte nicht aus {0, 1, NA}einem Cluster zugeordnet werden konnte (wegen fehlender Werte)binäre Variable(n)Clusterung 'x' und Unähnlichkeitsmatrix 'dist' sind inkompatibeleinige negative Wahrscheinlichkeiten oder alle zu 0 berechnet in 'prob'verschiedenen Indizes aus {1, 2,..., n}, n=ellipsoidPoints() noch nicht für Dimensionen p>=3 implementiertFehler aus .C(cl_pam, *): unzulässige medID'sfür Spaltenzahlendie volle Silhoutte ist nur verfügbar für Resultate vonhat konstante Spaltenhat unzulässige Spaltennamenunzulässigerunzulässiger 'jstop' aus .C(cl_clara,.):unzulässiges 'silhouette' Objektunzulässiges 'twins'-Objektunzulässige Clustermethodeunzulässiges Partitionsobjektunzulässige Silhouttenstrukturunzulässiger TypIterationenmuss aus 1:ncol(x) seinmuss Spaltennummern oder Zahlen enthaltenbenötige zum Clustern mindestens 2 Objekteweder diss noch data für 'clusplot()' gefundenweder diss noch data gefunden, ebensowenig das ursprüngliche Argument vonkeine Entfernung berechnent werden kannkeine Punkte ohne fehlende Wertenon.dissNAs ausgelasseneins oder mehrere Objekte enthalten nur fehlende Werteeine oder mehrere Variablen enthalten nur fehlende Werteevtl nicht konvergiert in evtl. Probleme mit dem Rang?Stichproben wurde mindestens ein Objekt gefunden, das nichtdie Mitgliedswerte sind alle sehr nah an 1/k. Evtl. 'memb.exp' reduzieren?Die quadratische Matrix ist nicht symmetrisch.als intervallskaliert behandeltTyptype$wenn 'medoids.x' FALSE ist, dann muss es auch 'keep.data' seinmit gemischten Variablen wird automatisch "gower" genutztxx ist keine Datenmatrixx ist weder Dataframe noch numerische Matrixx ist weder numerischer Dataframe noch Matrixx ist nicht numerischx muss eine Matrix oder Datafram seincluster/inst/CITATION0000644000176000001440000000251511634214313014076 0ustar ripleyusers## -*- R -*- citHeader("To cite the R package 'cluster' in publications use:") if(!exists("meta") || is.null(meta)) meta <- packageDescription("cluster") year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date) vers <- paste("R package version", meta$Version) citEntry(entry = "Manual", title = "cluster: Cluster Analysis Basics and Extensions", author = c( person("Martin", "Maechler", email="maechler@stat.math.ethz.ch", role = c("aut", "cre"), comment = "enhancements, speed improvements, bug fixes, since 2000"), person("Peter", "Rousseeuw", email="rousse@uia.ua.ac.be", role="aut"), person("Anja", "Struyf", email="Anja.Struyf@uia.ua.ac.be", role="aut"), person("Mia", "Hubert", email="Mia.Hubert@uia.ua.ac.be", role="aut"), person("Kurt", "Hornik", role=c("trl","ctb"), comment = "R port; and much initial help file fixing, 1998--2000") ), year = year, note = paste(vers,"---", "For new features, see the 'Changelog' file (in the package source)"), ## FIXME: rather give the URL to the manual on CRAN ??? ## url = "http://stat.ethz.ch/CRAN/src/contrib/........", textVersion = paste( "Maechler, M., Rousseeuw, P., Struyf, A., Hubert, M., Hornik, K.(", year, "). cluster: Cluster Analysis Basics and Extensions. ", vers, ".", sep="")) cluster/data/0000755000176000001440000000000012124335263012675 5ustar ripleyuserscluster/data/xclara.rda0000644000176000001440000010772411573402372014655 0ustar ripleyusers‹,\X”KD@¤”éînölïÛ€](v vØ-6v÷®Š]OEÄP°AP1Ð7³£ßçûÞ{ðýûïÌÜ{Ï9÷ÜQ :F˜u4300hfЬùgsò¯FÍÈ? Éß–ä¯ÉØÞƒ{ìe`ÐÜVÿ[–æÇaPä¿nVb1Â?߈ÿ[ÞšS—¥¹Z²ß>ÉWò±£{ÔÏË|lsÄN¥\óÑ»†/x–q6—2cMKÃ9õà`sX½¿^x\ûÚ½Î#»Â—/ÃMáÂëÛ”ãÜJgžÿpè­%‹w°8¸Ö;yòNþýàû²zùĽÛ9Õ a“FËúÀ­û'ñÎJ½ƒoØ¥Qú£›|á~¸6³š0VÔÞq>w¿bƒ ¬û·wÝ‚Ý2£üaàðÇ{Èï˜ÎwÀýªâi¾p1qu¸ú«Ì=‹r—ﺋ¸ôK;XõäUœß’ ß5CËÇ Ï9¶18|ñÈéHÊŸóÐÄÕˆí_hhµ)Þn’¶µ& å•üi¬´Mµ‡/éãŒdYÿö7S7!b{¯~ž^Wt-.›MWÐ^[G¤­=:aÕ—‹Wº¹À¹ùù¯vG öË&Ëfî<8Ï:¶é|·Úú§]¼Þ–f ªÀå×ÒÙGt–­~W¶iµáïªï÷½ÍƒqDûžÉà=ÿøÉËV×ç7ÅóÆüJNÙû ã\<”ˆÞZÆÎ 牧kCˆÔ´]Hd-|ÜÚÝÿ±–óÚOÒ³ÒL˜AV¡Ÿ q57ƾý¶SgÓ¾®éÙ™Eh©ˆ®´z\Ëùþ!~ÜzÕB´ëý³ÁwúNXغ{´nüÿê¿çkËýït$gÊÑU3`¹MÙ‹Û{,8ý–{ÊOêÚº.ë%½¬s.JÔˆX‹—.§w@ü×/}OJ“ázs]TÇ›îºV]O®ü¸Þæ«í‡<º„´œ;Œ–Ãïq‰{~gSΙ¯bN§ƒžÚÚâÏ~Ü~;ᣑÖœ¶G`ÂænóÇÝ›e—w曾’õ²ˆ:·c7Ü7—-z3ø€¶,¥ 0¤›9Ú?×x´Ÿö™¯xð6çYÛ¾ôéŠoð6=¿%ãÙ„›ë¸¥Mƒö«ÅÐ!kªnÂ.7¯Ý«倻m²Vç²p¿ùõ7ˆ^ó*¤ÛËnðÙg%Ë$ydíÖñc{ Ôø®´J±!gÇ:p¿eÜ`þ-ï#\¼íÙïñ :³ܬW}^#tÔ" ˆ/Xp¸·tÂöÞØü¸×)M­fÃI¡9âÛN«òÞûí|%Uy=þÀ¦Í ×áÏÂñÎÀ[»ê»£Mª$ÌiÜ_NUÌT¯Ió¯éÚî «Í.3‡çþ,²”¹2GüáÏNÃf×}Wö›"Ü)wÒÔî=îþsÅˈ#ðì'k) BÄ̓eeÍÒàõßâIƒúꯚ:á¬êå‘NT ìnAͤ ‰:[[qͧEœŠg}¶\„¿÷Æ ÑÓÖYÿþœóT§A`§J»Æ£h~§8ÐæÁ¿åsWŒEò±¢Ý6BøDÏÖ·§>AÀ»%ÅÆîè8.ùáĶòZ/øi ô i)¶¡nÉ«—ñÚÁÿ®ìë¢ÎSUjÞ0û7:Œö>Þ©3EíÖÏo#àÇ¥#‘„Èú÷]‚e@@³1[ ~ÃëeýNyÁÖ^²ÆðÒ¶\¶ëû"„7¯Ÿ\†h+ç™Cù»à îRšÖpAFú÷ÒþQn>r§¦¥BîüQ×lóÛÙk¾q~÷ayÆ»OÂö5'Ÿ ¢Ý\úœ[_f·o´¹¡}ôÚs¸xÑq„”9,Í==n;ĈOî‚_듳|v-Dľúy1'à8>.btîD—ImVÎjDûÑ!-ÿÌœû´fKòÇôALçöÅ¢ÃñŒùϖŸœ—×gV·§/º^Ó§di¿IúíYÀ÷‚홢ԭ¢:¿™ñxôa¸sÊ:÷¸\0•ØD~k5E«é°ÿrt郎!ðoòøù.‹|òІ×Ù™Ó’z6×E-ÓÖ Ç”Óî× i6¸Í_<»;ë–ξÇñ À_ÁßëöŸýyÚ+.WS6ßýƒpIÿÇsw€ûƒ”’˜ñV:ûÉ3FßxýáâI„_S' ³ÿRà»}æyŸ|X®}Ñfl¾³ËRFïuÝSô­½bt‡Hy?qÕÞ% Xy[ü8 ŽW®F´Z¥³ŒÍ<}í»"Æ'Ó )<7¯³šÓ4ˆßZ*uEЈŒ«3j‡!ñÈNƒ”wäýßΙùÀ5¾¹=wõ9ßä^­-Ö^÷JúPä§ýu÷yyÑ4ëCàCÿ'ˆiV×,&ÑÖŽ—Êm.#²÷} v·á¾.­vUç}Úšü¢9}]káßë3 ¬&ø&Q2î4Ü.]4=ô±C2]ú_Z «P}}EûÓÕÍò¼³fIœm<ñåÅ‚Ìýˆš~kŸw;„åÑ…L†ÿ€{Âyw͵ߗ­¼ßt Ü™Ö?¸ôF‰ç|¶uý†¤¿tÃáí—ùÈׯ7Ú¶»×yFaü¾>˜e}à!YŸ’ú¸½àÓa/)ŒýØoŒ)cc|‚NŽÒ~1ÕlbçR]pV/¿x£$¡oUÂöµ0. aÕû5<Žo½Õ…‡V´Š÷Ý ‹5æä ñÀUÚìÉßó¼ÖÞ¹ÀßµÕt™a"xgªßõ˜~ógË·N ¬n#“”•†ƒÚúá ß•†¬Bûe_®÷±7Ô™œ›ÿ™;è1‚¹á9÷V ×>h0m½òg,¼”ôxGdžCÀª°³:‡/.š›O‡rîö@El'´sükqòG_8ÜÍÜ}F¿³•‘íun††ƒ«gWÃó<…Íà¿ËáL_߉ðÌIi>ü¼ÂÉvÝ*Eø¹OÙTWÓË_çÏLA«C{^yÿì« ‹gçÃ};á­Fñ>žê¬ ÎöýÏoÓ ¸^²¬0³?€ fóÎØ¶i*òþ¨´5}c–å]Bô\² íÞsêO¤¼þôä<çÇVÓ ¯‹uNǶ68}®s^üÍpÜÄRøm3!â!¢MTCö?Ç‹çÓ‘: 1}:å¾éòVUVGÃvÂMäª[¸}$gJßCèЙ'{<ÌrŽp/IÓÙ†È*oZ‡ä6‡½zt~»)ï[¤ŽqAxë…â0$hïÿg»3 ,!+¢BYÇUó&ÄŽ`ó u5oL‘ôX½ƒ?u¼ç²üàrôÓ±f=ƒuNó²“Í"¼íBS’Ââ{Œ›* ÔÅøÔ˶m»œ³OºêŒ‚6INNû )Æ}†ƒ$ÏÂyE4Éw­l[ý8 ÏbNï .ßáûôû–ê’<˜Üx=d£=ìþûÙñ»3ÝòÏW¦ü…±ÃN6CŒÞ8ÜBèêæ¥ûׯB÷½UÃv„êZ; ¨ªqÀ„ülDÛw|嘄æ»Ùç;µ*ˆòMDœ|ÀÞ…k¬¢Ë1Pg¬RUkzþ·˜œ!;{!Üæ”àµE8D¬_4®G?ÄÖŽïh•­sY];áÏd7´ð‡&^IøÕï¡£ßèLÏ]KHË€P ‹Ó˜î‹þ®ðÊÕ~ždZvòA’6Ï(\út%¢_*U¾¹¨³h¬ðäyõ‡¿™>êb&ÿˆìæµá™ýí¬sžü#v!¼³úî—vwÔý“µþá›èÕv>FÌ%ŸWrUl¢@ˆÙ£_;F`ùßs a« $ÞÌ^Ç(Ûë|­OgΨ·œ”³C§unf/]â°Vçgøª£"]gw½ç ñ›»#:Äl[zè48Õn.œ\õ ^}-jþi3üìžMÞÚ(Cü3 ^ŽØ kÓ¡Í¥„WðÜfÓóµ ‚¦”P‚¶1»Ý»¬îQž^]]€°“~;  ÏçY'ÆÌ@@ƒÝ–»upôH‹|Ôø!iÚ·÷ç]:¤” èÊ×ó »ú#潞WÁ¿©Å•´n7×»Ë}›ƒuŸ ÷˜š÷ÓÙ7ìq¢¦Tû[`#yðÂG×â§ä¨:ÖH×¢"y\ õcD†žË_–ˆõúÚL[›€û$1þA² HDØÝ5ûj‚°ùÂìjõßµÁ¤2ØÚߘä|x1Âç,°X·. >£¯ÐG䫬¿¬ÔY8ÄßÌ÷=„Èò)_§mMBtÑùÆØÃ›á?ßœ&L]+ׂß  &¼xhÈRĆUÿ혠3Ù²_á­åˆqz®ìÖz|Lžîþô¢‘ž 3µO^ÙÀ¶¹U|sû¹:ÿgí:zwi®kyãÕºÛ§‘8=¨ªÏª}ûqâ°3¾°·L%@'<¿ÑãÔ‡ø‰fŸß§BçÔhé18‰ó«_§’!‡a•×öq«Š0Ä-58}¶×ð !Ë:î gÄnØ9Ù!?jRx?Á¢¨¾iiJ º([ÝŒàÅ´ÌÄhÆ|äëœÇ)tËßur2öµýŒ¯, ÷c˵×f\GdÏ^ÛZ {ÃñxJG–|69¤3}¾|}ÒòÙð¶kª?*Ü ›´6GF½;„˜ßke; 8:ï…þf$ÂÐv¡Ÿ¿ÀÞ=kƒÔ¤9ÃŽMr‰à’¼ã¶†9ÚßS=WÇýÖV‹÷$ŸJ‰Ø¦_§If8%› "Ä4®<øöÆq¸ÿ¾Ó;ª@ ·)¯z¼ÂA›q³;><òñ¹Ó<¹ÄA˜ã/aˆ¯ÓãV~~1çóÁNmCÙ¬‚Ú—ðÉyÖæÚàðj9:ºš·ZgÿØW¨õù‚àìÝ‹Án[û%KRº"Ì£ËÙ:·GÏ—Žýä‡ð<}Þm—:Bd§ îòóÄ>]`&Q®ùq)€óÕ6”XØì\3¬ë>]‹¸ ûK;ÎÿqZvD¯ž¶ŒÛ^ ûí¶Ñú”Î*ÇmACGmM¿ ¢CS¯#4pÞ¯ß9\´Yû|@á4{mÝ‚Þ81p˜0aïÍ3à +Ù¹&YçvC÷¼£n.xm+—¼üœ Õþ&³g6Á}ÿ½y†ŽÖu= ê>Û\u‹CêmÍ&÷OýTÃ=¿ÒËf~°¿¾A #bçà¼ãº-öw0’ò¹n]tN:7}úàñ×hÁ„Óç QÞ‹çÀ[Úó½ò.Zž ªýqW ïI¦/UÙ¡ Õ©MÔΧ ]çU¼5qAÚ,øÊ.xø»!þÊM* ÁïÇ®â=wáâ´wðÂ< 3 Êþ;ÏÃÞ æÔ;ÕoJÿÀC@vØïÆVà™û"âqû |z£w× $è’»”<ÌΟã±NÞÑü}ðÛeõÂÃÎá&ÖFßM¿®>;ûúvD5 N‘G’ï[x÷òñÅ ¹Úå¯Qøäò¦?¿ïBB ꦢµ4ÏÌ•ðœÓ¼Ý»/jøëéc„¦é:¹ÁK°š$žœ†.<á×=+TKhdÎ.w³MG.ߜùqdq÷ª%ˆfu=|--Sr¸L/¡ ^‘¤j8´E„¯ú·OÖnD—³çFÖ>§ ¤ý¾š,Ã×U:׋;߻徇ÏZî[ ê…„ÿ}>Ÿ3ðLŠõ|\çSCa´®‹ ž‡6!ú<§Ë§ŽzÙ>F[o=OÓþ.²|1di>¼b?S¢¿“«§JûN@ï\@…Ós] ùŠ1ˆ”›Ë­_×µ›-kuûFW8mú ©y„@#ÝëšžÃ#ƒœŠQ[ºœhØí8—qu&ü¢>Ú…Ãr±ª—sñ5x52¾zUNÖOæŒç¾ž §ÌŽ}„žßt­x4-'Àó唥äqìéÞ_pé÷ú]‰‘7â}’relÔ9ö~šñ(½nµsO˜ˆ!´‚é š“Þ—•p©ð˜Ôl‚öýó¹ì?7¹ÜÞK×â£Å…®/ àÕ ç­ðvþÍC“Àqê6ÜÛ´w‹šM­Ô!tŽžèl¶>šÕÿ<¼—å­üëà1á /¢]‹îÛ&Oˆ6é=&’ŽøC¾´#ê¸êýŒöÇap|SÛœu¶öêõ£Öy&BÙ9Žï4p ¡÷jQØ’æÅû´·UÝt[Ǹè,Ó“KºÆŸÕòŸÃ¶T¢UË›¥üÅ $¶QR½™Žê·oÏà#цpMœwüÅ̳ðã‘@°„ý×”C«¾¾€µYØãí Ö[‡h+r\1õä6Äü'ß²ã\&‚~Ï)ƒiì˜ÿê¾N@Äé·›/ 0F’eÏ©£:¼Ñ>ìn",ê~Y[^bšõf§Z:¬SF(íGŽˆÙb÷ç3T­ân!"ñB/[(´ÇÊ/Åì_ŽP5…‹Ñ³»” Ø«N?>kØVÈvmX¿ìˆôi}U°°íˆyk?´ë|ußËG8sÇF}þv%a6$ÊÌ&‚³º!²¾ºà×Kn|; ¾ñ÷åê3ŽÊðø+’Ýöm®í<ÆÏ+|G½R¢Í·s•}Î^B|)ù˜¢íÂêgbÞæ‘3C;ÃéÉ—ÛËÊÞÂú6«7Ñ—G¤?ÞÔÌBz²;X“ƒ¹Eç60ûc´á‘ea„Ø„"¨qòD“—k­×yµ¿ëŸøpço†:÷Ãýâ\&iaU>¢cõ3ÂóvwµmjÅiº?áò÷ñßà}P¯Ûhk¶­üVŒ 3z]”ó½ìT•8é>F9dÌYóŸÎÚ:ÜãØÉpoð|èÙ.Q—º.¾#OBb±óš¾?"–Få»î{ñ`Y¿l„Ý£ËÓv;U/ŽXéD=¯Öj}vk¸•«s³óIîs_[³ge†k’1\S›§Œ}¹TgÔaØÑÞnð+;”¼o~#¼ª^[t?d/KÿQ$ÒjÉpŠçäó>S×IùK:!×n+"º l3Å›A©öåÏùA§RáR­žÎ(Ãþš»Û+­õÉ*y´>ã½ëp®ç¦íqþŽÉðõžd_po:šÍ#«(p~5ö>û!~<¯h^•{.ÐYû‡mõÑa…_’®< DûÐðêú éëçÅ­ÏÒÆ«Ý…Êšƒ´/^ò'f-9 ÿ*ñ…VaµðŸî½ÑpÉöXkx_œUÙ¿v:§¶Ô0ÿg8Â%=O™·‘õSÎ6 âúçºeÍ#ЬKß‘¿6º’<ó"ÖâI!|?ÿÙŸ»}$"¼™þã˜,Uì\ ¿¿ÆÛI‚£QÎÖÑ¢½ˆ*Ù5ùÓXíëNÝ–êŒn6âPZ¨DXÓýŒnä•îHt£<¶ š{#ðºrÿ— ‡tm~dlo4 Aô×Ò™{‚¯Ã%¦aB¿)ƒÓe%p0(ÎYî`Œ„P>U¬uëü-‘t¼=õ:¸ÎuÆAùZ¡NžÓN \Ç}éª3Ù0JíûøAàu„· °”öyû¼MƒoäÔ7ŸÝ˜ø¦§®5WѦt|—Öu­±ãx•óE”¬ÛÁŸÔk7ë§ÂÿGº`bØ=ÛO'(—ßZÔv¯©@xí è,Z7Jû ø‚¶…õ`øß·®½ÎùØv[gGí·y=¯9˜ ³}~÷ñÎ xM3ñìîlø=_VVóËPûåd˜MÛ‚:‡÷ífJ¦5ÁSsùpû³ðpB듳|9,›÷îuÛÖ ·…¶¾pø[Û‘œ·¢aJÇú˽Àú¡ò‹Cºþ܃v&áG¦(ADkÖˆœÚƒ6´šÍÐóá”5ÃÆÓ!’Sí2pôr¸Y—ߞѪ16§>ì:¾ nõz£¶êãÍ`Á“@}ñJ·våuyÒ~\?²?óC+ë;ý…]T®†ììÞGZoåÇ^ËQ=¥dIÛZ¹0Ýãýñ?õR‡y67ùXÜzFíyø¬JÎW?ƒðIG/Œ6'u£Î½$mäE¹r@è›M§¼˜‹È [Ã>Ó-êÊôßvGgÙÉ-ƒn͈< s]!jÆûOg7îüäeá™pX;4`ªIÜæ„xú£¥mX«ª‹ÖÚ/Å}lR ‰C¶ìnÝ.H¬æqRšƒ3¢Ù3ƒ;ÇÀ»ÙØ0ÊÈ©ô ’®”¶™€óˆ–÷ H»t¯¢Ó•«H¶¢p#Òøß Î.NÒJ¦Ü€hAØÎ6±!|Z@…¤ÕONnwÎÜs¹)Ë“/‚—I²”å[ð×ÎÚ2ùö;Ä_GV$„~„æ ª„83hÂìV‘þr‘Cu‡¥H½ó·Ü²èÒ®¿=¦m¿\ÕþÙ}ÕPEÙQ!|—æwfx@ÍôQ Óo·ðaS,„C `:‹Ô1ç­ «u·XîwýŽD±Ãè‹!uç‹FùÃ6f—wŃu0•/”³s‰³ªèC;#1éØq«»?5ö†hÅœ®¯{{]øâðËöHr7ïm¶Xþ{É”ëK 0ù^yç†Ò2M‡[›A6ù„¬¨Ålˆw"z¼G¯SÆ??@ºâ~ßÛÜ/Np ¿µœVí醪¬Á ®î­¥2ßH2ïÑJÌ}«×!Ù3šÖ—H¯±6³^K«¯NW÷u—ñ›~ÏlHoAä³ùÀpE„‡éC‡¬ÏÍq¡KÊ}pËf7˜ù…ȶŇ´ÿždÙSÚn³€ Òá·RbþÙP D ¶¿¶|ñönÀH×/U æ€Ó’|lÜEˆ7 :AVÂÿNxn^;ÂÕ”¶%›0ζÆNžÙË5‹Þ­GÚwMñ‡fõHiÆð´ m†Åå ஬»÷Q Ñ ÛeÚ–Ë!Oe-$»óŠÒž .äÝ•žY\Ír´–üobÿƒSëÈ:¼è8Þü£§k4‰VÀÇÙÏ­G~âZŽž îÂ;^©Ù^àMþJ€DƤ\m¹ñ}}âë(£wiPUÞ챪I©,–ñ*vn%¶3G¤­ƒ$^žx3z¸+Æ©ïF}@ª;)óÈWª:’Ê !m+`Qä-ƈ!ް»UêròzMDÓÞ”ÁéÀ½ø¢'ɨ†ÑI Ò„ŒÿЬçLÙ´ ¢K´œYAôÀÁ`ûÅÝH›ÁâžWXôã€+LwŠêõUð¶änquø•ä8˜ý÷ë ¢)·@ÂÑíèC'—½CrÙ}Uù¼²¾tç%ã˜Î,­`:FÜwO;Y˜CÑçXjÅfD·µ½ç¼¦bŒ9?.îA>‹_Áéï6N‡A¸ô9*‘l}¼oUÂà÷”1ûú˜€{­bè¹É§À™1¹Ÿe-øÜðò?”í²†´:¹Å•´î}Ó%?¬-xÚ/ ˆ‚~MŸY}ˆ¤f{{ˆý3Üu?ˆ*f˜!Ù^Î図ç†"åÈPJ˜ ý­ïßCš$ºÜg¯RÅ]ÿ;5´üf;—š¿xq m€5BôáÛ@î¶²øûx»¶ m Ñ-}='Ÿõm’¦?¬Iáu´M¯à—xyû/$Wôý"¤‘U¿ë1ÜË.KŽýi á„ëý‡ ìý±Ý‰ëý¥@^ž@ÜH ô&çÐmCÒm•Ñ}Õðßñ³–¿žþ§Â^,ŽbƒmÛy{ ÜÆŸx[XÝãy»µx*°E¢†¶Ã )H6!)H51{ éÅ¥‡¶ÍuBZÒÅ÷ቯ î@~ê™^䈫ÿŸú¦=Þ¸ÕŠ‚ˆ}“ò2Ñà׎‚Aàeéû ˆ¨[ÿàÒ¯»žÔ>N?¯ï ¥S“ z£n3zö5ð+H¹¸³™¬Ë˜#×øA²‚é½ñå1 MEnH©¦BrN‰ÞÃ~[C<Ù´‹CÆ\ð÷‘ê2Dç÷[ÛB7g © à•Q8Gê‚¥çüâó6x œüã±'$ÛZ}ô›W˜ ±î÷“HœŽbü•¿ìÍê² dˆÛoÝ2àAøÚ‚Dâ5çzÉÙ£î7Ï@S å&’(ªW–yº=xAìèjun'ÄcÃÇßÿ@J«W©{[@y—ÔQJÃç!ùý–¥W6pÁ»KNÙЕà~gq(~¸5gÊÑÕˆ¿Èx•àÛÏ—‹ÉÏ¢Vã’ý¸´2š¼¼.®çh!^SGNÒªÞ­ú~8„Ûx?õvƒ`ئ· !©c82j!•‡Ú#¾õŽÂ»W`Ô†ûàmœEa¤îÓ÷1À¼‹6LÁ?Ïô^;]ïzðòÑóxpáØÔ¨¡ÅÝ3!ˆ iý7Ôô¼Q-û|~él.œQßïEQ+‘üî3mô ¾ç«sάÙ,)*•@zæw¢¢W?È>¶- lfé#ݦŸ þz>‘ RO~r~žGê}£ Rk’Ó^Öó6ˆR&‘ÁGâ‡û?ÖÆíOj¹óͧcàn F¤Úo«ïC2㇄œ|ðG¯?‹àT%ôHÜÎðª`]*ІîC$o!æãb= ñn×òWÖ•SÀö²ýn¥kIõ:”¼oR§Ùýåó öby/º†éæ|;R½õýE¤,ŸD,„Ã#/ø×A°l;%Œ'±z‘ø³)ï•Y÷Oü]ãNí…È”å3A±QIÏAœßƒ¾(„]nI«£áGâÿØö}†µhM± YtÑc Úëg AÿâfÉ#ÀYAMà`¾=~ ‹G¡Û^MŽá+ˆB®XŒ*¹ Nëq7²ó*íÝ=—d"ˆUŒoH»<4\7’ÙõG…›®#aËû’Z.Ôˆ‰у Qµ­ÙAQGí'FÀU½o ¢!w S€°†ÕaرsÀÍ&»êt‰Æîú¯Ý ÞàÃóÞ̯ ÓAÒ‚.l ‘ñ‚/ä üBúQZ.“jo{Öãõ3H?~ÛÉñ'ø:ë㋎1þŒ¢IôÀ#ɆÞÁØ9û¯Ìà1µS "yŠ=GÒ}$èvë÷-R¡°’[¡Aÿ®W‘ê¬ÞÁ[Ò†8Ãþ »ù” /Ä7.œI*"¸+™ÿ/å„÷ª‡Cðוòj±fÜÏ•H»Ãx,Oß®4CbðÅ‚$ö<°éð oÄï¢éÏáÍF5¾ácƒ'ƒßOÑsÉw>µ‡/ ûXÅô(EýÉ'-§„d8ó­É"©i<øôýSˆ¯“rÔ· ¢Á¬/-Ni>ü椧¯ê¼oì_’wõ}$HŠô|™äI½ÞÁ§äÒ™{H}š¿çÝìÑßc~,îr‘tÑàkgøS±´%÷Šû`²Nõ_ £„ذÖyæP—­)m#ëXp&x$íyO’³~fì½K“–Leæáˆ/ñðw=ô“Ô#ëž´Þ+3õ€N=!âèõ’'¢ ÎÎ=‡ ZJÚ!Áù’ÄÊÕA4–‰ÖHm}‚ ¸„x^Èjð@TÆR·‡E] sÍüìúô’¼˜)AáíEóè/@ðàïnÿ'ˆì˜S´Oa þ‚VÂì‘ÒšÂz¤Ð(ëµ 7™?Jp‚ñdþû½ý»DpXz§Ü7]!:Ñ+ dÑ"³méaà%µr‘·âƒTâ#­Éà;¯_Ãpzº?Ë˲3m#%oÁ3!p4¡\E§h’!²´î b-©}¢$wûÒ?ˆyÖtµôÑ"öÙó· iÅz½ ¢#„U÷J†0mü7ÂüÉ{±ó)޲ÿiAWÝÌ]Bâ{ë3Š-˜NÃÉxi‚rã¢à­Rµ§Ãé)„¿®g8C4¶|S¯ªï­eyÎUŽ+â K`¶n¥¹—R™/DP>qïvqÒ>L¬XyðxÆwÈ&ç¥ï Aô”KˆS ø…,oF ¯ºØUžAbÃ;bëäðG‡C4ÑoÓóñ‡ÿ¡@â9³icÅ$oRÛÁ[—ŸQ£8Y—ö´#IÜ‘hƒ%7!èÉò¼4øæ¡‰«öA$U…\:]ƒ¸™¶4Cš#…#Fl{Üë”ñ/ðº{¦1žÐå4!üdi굿C žz‚#ð[ëý„/2~È_õ÷\›çH aýÑtï½oÂ<Åt3‰Ù¢wÆ#mØo*L n …GfàNe¾sáqæ_]Hwšs \×¶-³t ¼FíŽfàO#ßðþÑ=*ö솈ÇpaZa¿Ajˆ>3_îÆ­>Ë–#ñÆš­•šF`–ôd÷hbçêõhO1‡{ô.w8~[{)Þ›æ<:ñéiÛ#ž+»A¼ŸÚÿZÏù·þ\ ‹ ŸPƒ;ÁÑ“g¬^t"müAêGa„'É¿LçJ~Qb1k É‹›©| îŒÄi…g!vq·ØY‚dã‡Òý)q‹¿äu†40Èùgú çìtÚ0OÆ|oâ*¦oÄ&uÞßÒý›æ–g“ßëù¹nĽÙùÙØÔ@øéñöãú]{‡ÔÊRR –¯&üsq¸oÈò–:›TóÐa»ÐtÚ üò7”x‚×ÿqìÕÕoI>Ó÷‹ h¹«x#ÄïÜVïJDj)óY äTNɃTAX30xÉþ³HÄ#5œÀÿ)ßbÄø&?Ÿñ¥˜ãôäëéñ$|`³¯ïëØ­÷¬œe™´è1쮤OxRqÃb·ì¯+×Õ!5òM»¿ï[@àõgÍ„º·uŸZä» Â¥´½”Eê'/g:€ð•ù;"u;ÛǤçTÞ Bz”Ý ëËHÇò>¶~±y0Êünô5;Þ¬®!ÚwqJ¾¢—>’<ô–¼öô£ˆÙÍüI}NÎgú›è‘5 Cø ;—3Hžeø]etÜ4JoºCúÁgׂý!ŽMé )þñ>æSH*$Û=hRŸ3œ‘ünäÌÐNsbsqÛõ¾HˆfßêÂ~×ü,u AòåOáœA¤`L/!¼qã"’ßR¸Mâd(Ó8Ûñ¦¿@Òã| ‹sþ.*Yœõ2âH÷/ˆöbñªnXûuèðRÀ Þh¦#†5Ýû+`/É+Œ¿‹WÏ ÊI~mL'⑲)–÷* ÆÎ@0ÈG[8¿ØÃ›#ׂßÂÓëJLb‚izèŒô±z¸=™®&ÖäNuͳ’OXÞ“,¹I ‘¡DIí;Ó?¢†2_½t ë‹Ä¤þ;¿ç4>öçÎBnÉæ|äkX¿YÈp2üžÜ‘ÅZø$*ˆƒÛ…ç?!ìî&ºœE{½ï[½oü.çÎlµ^þ¢¸ú!…߯÷UAÜx×Â|>”ìù óþÒFÄÞL÷àöÔ’ ˆô*_ Á7öúâûŸ?xsOœü iÅ´“Q$=aºà6ãU±Úw=¦_»ñÒˆkv€w•¾¦é±ÎŸ¥·; vpçÒ‡ÇBäIÐG—åŒ`¼Š»”ù·ã‚™+xÄê /€ÚRI©YâNòoÿÚ°)ž¤®µdñ•RmD_¼~Ç“¾Ü^át6ߦ‡òÎü«{[Õw£Þ¯wƒ]ã‘ÙàG0Ÿ7‡á&I,ë#Èò™Ë-õ8õa× F±9 A)…7à^èJáo˜/OxÓóÛÙYÄ¥³¾jJv }R©ºÑÞÄyßš<À«e>RIO²Œ~›‘>”†IÒ¯±¹¡ki‰ô®TV¶$qÃælÄÏ+GšvAìJGˆuûL¤×©Üï…¨ÙŒ_Jk˜/AGíÆ"(â˜4\Dmý$oW²¹ ÞœKm<¶5ºCPUÎp×QûÍd$ ®­ügˈ‚l¼ !ŠKsó\Ʊõ—=`|/¥hy¿#uZHzmÍziFóÕÚÈBb˜T´…í37h8 ˜bðÓ˜_KЕùÚa…õcºB¼‰é2\Ñžàëv}!´bu"½ÿ©ïbßäu"C´ùLSY”ßfŒ·H¦3]1vb€PVýÒÉTÆîˆÄ¥lNF\Æxœh1›·U-¦‰âoæT„ÄÈ)æÕûvÞcy+i.Û)²3 H9ÝjXFÚAÊX¡2„#Òüú=¯ 0¯‰Ú“! Ðûd \5üû‰½Ô®ž ëàå÷yPû1˜x¬ÌžOê*Mgª•à|f¸/¥nlpœx„ZJ#ñ6›oƒíLÚà‡tk5oUÅtðŽKS˜bQ$ã±(œÉÞ÷Ã}š]EøªµôÀ!åëûˆFQô½l_%ólŒO#agòÃ)Ù„wufzYÒ|cÿ¨ÝûÁs¨pz¶’Éìûc¥c1@óæ-¾$]E¢ŒœJ%žXÜä~wÈjX•.!¨Bp ò‡¬ÿÅ“™UÙ¿5Ó£EŸæ“s!ÛpoØädpëÏNÀ欤ÿYeËî~!üÁ˜$Äó7[ööåþ9'°9¾¯Þ¯ ÉàxËå1¾€é±Ì$m¾s²m Á+èr÷PÇ›v¢²îß.\LNÛ¨rHÜ}r@Ôƒ¼Ç½êÉj  *aûšSÌ!áQÿÒ@j£Ì‚d³Þ?ˆz/ù.7ß$æWˆ å²Û7¤þÜs‘nRÌ÷ÊOlÁü0‚lî†w—åiIÓE¤•ö¸Ö#Ñ™ù øØüŒ(…¤çy¿»š­¿3)û¿!œÊò‡ô ó-Æ ª©¡„å¾ùašàö·øtæLòýèkA¼v,Mþbz£ ‡ù_Ŭ_"Êd~cþR¦{&Òt4œK^6Gê©Y” B¥Ÿï‚PÎx%ÿêjàCX¼§eü^tçèÊMé¸O!ÁøšÐ‚†KWg pÊ»iñ’iI=›m€èö?qõZ:ÈBø¨ÞÇá3ý¼ ¸.̇UˆÉÿøXÈBäC$f8_BíJH2]Ì»z|ÜÑ sÕödz8âíY7 YZj‹´Ì?ÃmÁú¼ÌÏ)qa>¡´œ/'×¹ö„쿾Œ«‹bæ§—LfxC4ÊL„g³þ×À9bîB$)·»¼Y]†Ô™ ½W}D¼3Ãéb3Ö’*§^N8]qGÖGÎþ×§ïT¥Ø¨Àç‹~nB³NôEÀ#æ?j’ä×5Ê–Á­ÐÑDáÁ/ªsKƒ÷àwnŒ#ñò£å ó ôfç‹O‘=ÙÕð*ˆw3]‰Ë'pñÈSÄܳ#š5Ïõôƒ *T9émDmÿ‰lÉ>^ˆu¿ŸŒä%m6Nˆ&ñuùUŽ#!‹í[à¿yPÞÁq§öèV"ÞáHÁWíÚìMGIÞL=jrl„öϪó§ý€øÙ5R0߀ŸÙ›0pØúˆ´L‡‘¨Ù¼„ì Ó‹%6©`†´elÎDªûÒþ;„t—{ù!µ;ã‡"«¾L?•Êè¸K(xçV‘?«!îÎú)g­%M Ïs=ôsH-"¬˜ŽÊkG–¿´ Ï__u±#ëÏâžw‰Ík'1?©pô;:àž¡Ùó®Ì!ºõOÿÜÌüè\1üv^ïXß0¾œá›Ä7E¢QùLÝ2ùöÛãàZ”"›do½O ‚ƒ?{ü÷iVŒ7óÚQû¶’ë;PÀq?6W$øÅöÕaå9$D¤ÓBЏ1tÌ'Éë™þ}Ö60ó³’ÿÍñ%Æo¼–¿ Ò7Ìo+R0}‡kº€6@ ¼Íp0šéP’[•oJ/®ž2}<íÉöW‡7"ñþ6:É6ï%Oí”ÚP[t;6‡#ÝNÓ?ÁSk¯Oâæ"5¾rû£-MU0½Hô‘¶?B‘V¡ol€gL¾­ù $ezÊOlÁkj/0_¤ŸÛƒ óWõà¹r~>tj:¸/.ßùù¾Ôô!Dáÿt¿ÙLŸÉ]B*#&$ý…>€`+;G<Á´¾ª!;ƒù„“iY™ˆø5wnXYGÆô¢´¾lî;ÿñWÆG’óM¼ž†:ƒ3„õï¤eg[}ô›~æ#i>}ý0õ2Tá»WÿqÙôv$ÏD¦ŸŒ¾:ü¤ãèþPžb~.•‡btнÐx0}OÓšü–Û¤µ9ð0pÊK¨½¥[s¦CFoö=•™¯J¼š,¨ö3ßU†9ö!—¡ôd:·:Üý~ÒÓÐüi €š+/6Ôô‚È‹Ž·ÇCiús÷ÚÈ–1‘½ýÊ'¡.¼#O¼U/¦[e„}k;2B†´I窗OÜÅGÚF ÕÂSÆä ´£m×®Èܱ‡ñ‘™Äò|¦e›†îær(mÐÓõ÷v+ZOX¬"›i‹x¡èúš¦CýêOé·Fm0òúk&-–ì<Ý”9ŸùØ3P"å6g œÃü¯Êe톀¢¾c‚æøkdcñ”)c}X囚_§ÏB¹¼Š PfeІ0:üÒω"mÓ©X½“™hŽWŸõ×@jh…|û×i„C3”åÌÉöŽß¯CeÅðFtèu^šy,Ÿ(E»÷}»ÍËëTЀúú¾>&ñåPË™\ÙHm.¡|ÎúªP¦W«"-\cÛªÌϤ.¤¶'¤ŽcxM~”ñiõ[ 4PÍãí½ÏGzóÏ)z0¾¥Íúmê9zÿ,TÃ Ìøõé3Ùüfã Š'LŸVZ%ÐFäÜõÙÙ7 Œ¦´¹3”Eí Ú^B¦=ã}ÊqQoºí‡òl¯ɋ iÌô3UkòmÆ¥@meþõÁ,’'Öë}íPÿ¢rƒª@ÖŸI7¤6ochz±¹Ú¬¤‡ú@þÀbÖ–Éÿ¡ÃîÛÛ/A¹»ÖdþñSÐä^¢†#dþ¤pd"ä_Έó°óù3ñdYï¿&¿ßD'+Ù}š(Û¦V&ã ô¥Â$:”±<”þ¤+ôHÛÇê ª#,Ø eÓ_4)Ì¥Q-£‘ŽŒ´-åù<¦ÿ*sþVŸÑ"(d 7KŸ6¡,„úÓë5[îÔ’xª;C ~%2ÿ’ú•‹åå±¼¢ òXü‹³òéL¿ Òv #?/!E Õëš?‚¦ŽÚ œÈóoU› ©q1Œ·¤ÿÇòL†ûÜòì1¥Pèí yȘ{t̪ @ò>¬¿¨ÚÇ~OoWyú^(O2¦1au-cį+S-‘aââÿddW¨3Ùþ(Ò˜RYÎú{™EoöÝxy ª zß)2¾Ð¯ŸõÑWÞ‘x‰}¶tý^¨RØ|¶lù®„;ó} åбSd´dy]Aåã8Èn±þ„*n îÈ ˜vϤi.)ùBË?PÝgþCM냩m›ïÑòátô>²–éç?¡.bßC=ˆÍ(?±>¶êms»A6’¦éDÈ ö_™#þEë§ÈÝŸ’D7 ª¥kh#êtý`:dŒ'©sŸ†:m8 MÓ'T6’ö’áüL?6‡¥>ÃòOzãÛÊætŒ(ª1´Q€ŒÙ—¨a Š’]@ƒz»'BÁ>G®d~QÕ:Æ U÷§’iÍ\J£úC©fýCM)ãDz7L¿ÕLa}@Íuš~m _VH uo*÷÷‡êæU±‰ü6ÔA¨`Gò'mW’ü¹•õ5Ù„>ç‡*žÍqª¯Q»ò1¬_¤:°Þ¤uÕxȯ²ù õ*¦¿hLôþq(¦ÖLê×ÖSüÓ T½z?‘ ¼u+ÆS3WÒÆ$Òg‘×´~e'–ÎÌ·£,Ž­Ÿ— Åc~o|Ãã[}H^Ó1™Æƒõ+¤¯<^~™c™ßM>ïNkÕî5}$å«”Ä_Ó‰ä |ªBú¦Ûh:Ü­Ûù«éyú¹|’¿úþˆìæ •–õ·Òê™þ©ªŸ+DæIþŸ®‹ï@î;(ñxR=9?äØ_,€Ò™ÅÊê#\…¼/ãqê…ÌÇ®rbBÆ ?ùºAUV¼ÉãÒÁü.Ê y Ü—k á²{ 2~²óªuÝ.§f d/X=é˜Ëü‰Y¿/Rþb¸N^³‰n,TW§F ¹XNλ·BÕ¾µÍùñ½I½¥×+ç?£ãxVP|cúòÃáÊ_Ñw¥UJR_¿Pí^úeQ ÙÏž*¾väd…­¢‚%”3Y¿OÒìBF=4c˜oZ½‹é …])‚ºC"%BP=üF¾ÈÈÌÿ¦Ôè硎¥òŨ*˜þ”iJÒšp>T;Riƒê=Œª†±ûlÔ>séPX1}NÞº9¸¡ =F'±¡|DÇ"¡3ß–¦¬ŒNС³Ó§”ǦP≬LÖä>{àãLâ‹á,UÆD:Ð ¥ ]þ dð™_B6ùud¯¶m´é´ª¡lß•YœKîéïÕ!ëÅæVÔÌ/©œÄp‡zÞmî›PGÒ×ëŠôZ†Ç$£h˜EB^NÛ fäçúyFd,,þÐìËO¨bf†všýªEÌß/ÝJ¯÷é Uã¿s¿Ø¶(|ü ¨Ç2ºóÕ«bHµ3håSæ÷U¬g8_yžùÞåKXÿT6‹éŠŠ¹ì<*-XK%e8DÕŠõõ2"Ø}Z [æÇÑX|Ž*ß” åGB/ Î@yœõ{3R)ý&ù£»ïA-añ­Íú¿Ò×Ãï*c¨R©•…Ç烻^­€b˜ŠzÂO_0½+­‘ÝâØÏüZY6';Ëâ_1›Í³©ç2œ¯æ0 .fýuÄ?râ¡òfü;ãy4Ef_Æ%»XžÊ°šÞëGm;¨§0Ÿ•ê9•o„PW1}UšÍæ*;¨-{ *æS»RxÞi×é¶( O!QÁ'ù絯"kxТüó•'¶'ÀO‰ ¦ËÉß.|-ø(Ô]¨ýs<áäôº å½QïõÛDâ–íWV…Cu‡% H}JkÿŠßàij[l…ÌÙÔþ `rø–3'Í©f~«Ì *kº=‡ùÛ˜oF:!f|ëE+ Ù&!…ƒÔýǬÿ ÎbøOqŽ}žÒžÊ h~§Æ¨¶³¾»¦ÃÍ«.Ì…"–ÍÑ)7ÕQÃ9R9Q£ 25¯ÜþÊQéÅ`Èü¤ŸG„ÊùP4.‡Ô;øùϰ}Îþ/Ÿ=Iëv£wÔ[(€Ê…Ýc¤Y«¿jÑ5@P ú¼Ò·ŒÏªV1ü¦q­¤ ÈÏÐëÂ!Ãü¾CÖg”«ÿ}ÎGuÀ0ÉwÑKH¢Ûͪ¿g ¯q0MÕ RSzÍEd~ÐÏÁ"SûïçÌ׬2fºÒˆééêÓì~õ¬‡Çò¡|ÈîoQ¤6ý~H[@Ç{u‘ÜÛÛ¿;á5l~B¹8¡ÿÕ\ÈVe s Š86$YÅ|ê¤í´± Y-ë «»3]<³Ã¿sÑ‘á:…í¿óÐ:T°Ø6Ô L§ÒtZД÷ šLæ Vf}mU Ó2ÿùªÓO1|¡¨e|´CÓÑ3W/¦Ä”ä †+ÔQ Â+R'1^YÀöUñÖ‰6´‘Êø¹òp- ’WYþÈ*d}ja]¥æû­‡ÈŠ+Œ_×Cá­¿ß2mÙžBáw4ÞLOȘ÷ž>j…Q®Põa~Tcæ'Qg­. JÚEÃHÓ.™¤6á\’g™ßJ½ïNˆi …„àj†#Õ}éøÊæP¥} HÏbú˜âÃÁê^LOÑŒ`÷ý(·Ò62É+%„~DW#c-ÇÎÆø4R}YܪԬϤt¼MéùuååW\¨Þ-Üf„LÉæ:ÕÅåq3;¥‘<Íæ†¥nT6±ƒÂœÝw•YÍî‡ÓAùvÖ‘ÍÒßã†ÌÚõª5ó¹*]˜^­ÌÖßëŶÿ-Øý+©Ç™þ¯ú5œ&'ÈoôÌÊÿpÊSE]Çö‚4ˆù$ÔOºõžÕ nT`ƒfñóœÌ‹!PìÓϯAYE¢Öã44Kéu„‘Pš±ú.ËbzI&‡éÁÊB^;#Y§MÔ’ ÕNϱ%³‘Õ—ÕKM3vÔlÎ=ëM«¨›þS/M½Žôþ¿v Ü ¥’ñXµÑÂIìþC™%û~j%ÓÕ4ÿÕ…ÛíøA[Ç#ËÝg—åÄîÛP'¹Ñ É“ú{5Ù‘¦£<È;é 2&5›ãòš£ý)a…º5«šOuzJž{˜ùU4÷™¯<ãç«!¤âAÉ¡²j<á=´<æ:M¯]A™iETƒ˜nŸqˆõû•³ÚR#áYtYÄç3ƒrϳäá[šMÂxÜ’¦Ç™Î@Æ)ý=;àù;ý¤ãoöМd8OUFމâTÇ™ß"ó#ó™Ë2£3#‡r" ÇЕH½Åæ2¢ònåvß›j]Kûò£]*ߨ7)—àÅ ÿt­6LÏTæ²¹Y&‹sÅ%æÇP‡³¹Õ´]ß~úE9m÷€üˆ…ÙÉ„G|e8!M8ƒ QPmÝ@Ž‘ªc>~uÃ[ý½6že÷íe”0_µ2”Ú ì‘5!N<, 32–‘¬dCâæ‹I‘1•õu”˜J6²ÉýoˆR/z½ Á{6ÔŽ-Cf«ŽFÃÕÊ@ÿîë[BÂæãÔ›Œ'Ù”¾ÁîaPßcx!õý‚ —Ö“úÆüqʇúyi¤e:•Jü˜š!}›CÓ4g>tY •Ó8PÚ.Èç÷²GÌ[ûžBVu-$nèX"Á½>Ý(±€Ê¦ŸfÈ8ª¿O’sÔnIøæ¸!h“|lî0y¢ Éÿ'þñøÝ[}–-û ù«¥ë÷Ü_ M>}|á©úyV¤=³¡ƒpPŽc÷/¥²¼WÑé*3–ýyÔ×–äqæ V:0Ÿ†â»ßRDÛO#šÊô.ÅMiµ«HÞý;ãÒ¤?‘P;2¿‡æ2ëKJ¿wxCTk.{_xFêÜ(RµÃo##ůœÇîÏÒXßœ=7dÔÎ{hƒ ª°_“ý]y7:6Ó *[¦Ï©ªÉÇDl„j•ç¬Ižg¼OµŸÛCøâäëYŸKÕ–õäçÝßE/%xùñòŸ5“ hÎx‘¼’áØŽ»ÈöFí况§YÝÍpïO ÛO¤^`÷«©J˜6½?ãIj†Ï$ïØyPýó7ª­lÞW •€õKÓß3¡þÃ|¼ª6 ȳèØS?ÈWÔ8ÈÊ.t,Õ›à­×$îB9–ñUàù²"Sd\ÿ§oôYân]Gò:¥›g—õb>¥Ì÷¬ßšþ¯ÂúvªkL—ÎÇÓ… ù†ÍûhrY?S½õåÌ•Ñm‘yÕ òÖ}!3…Žóô†¦¢2寣j¡þþdÜd|,MÍú/êlÎK}…°¡n`:FF¯åéE'¦³)ÇÆÿ-·\e^D¯‰Î >£Ûæ‘3¡ú¿«³‹+˾6!D° Á] )ÜS§üVqw7’Ž»»{ˆw÷Ô&îîîJ<„8ß=g3ß¿yžž™t'éPÜ{ΖµÞÚ‹ ÞA¨ÇìßÝAH¤ù‘°C¬öô›!÷4ÓzGšÃ+ÿ’¯ÃàFç]ÞŒyÌàÆKÄ~úlRk'›‹åÁå_ \¥½—Q{J|aç€Q¡f‚ÐF|xüõ\=±%ºqð›”·ƒñ;¶õbÝç1éD)€êšxÛw=Z)éºôjšê“ŽTX8Ÿ‰AÛ£j>7Štú…Å4×ÿï¼L¥=aÊflC¯]l€"ö[4¯5ôd¶¤žÐ°Ó(&ÄCuñúSNãIºï•…¤‹lp¹=’€¾³Ù¹Aþ]ΟÕHfÏuáñ=…fToÔ¢s·Q;Úû)—1{xOŸ}t1F¸ºÚKh®4[s¶ú 0® ½ªÞb=[0ƒàùØÖ³7¾ ÷³ðýÀ!Ç–ýcHU_Ùæ‚ùÙ ÎÄ1¤pþè+jõí4^ì›LôýÑ›‘ŽBÿöùõ3‰O!Œ#ýxý^´§3\s<ÿ˜-è³éþ¨o{Õ÷Ëóé oJŸ“¾Aa« íŽðˆôq*ÙÜn¶= Aµuñû–bß1¼ÿú½ÕP¸†ôž…5©ïÖ%3µø¹Ü ?¡n&éš Ý¨P^¢zPobã`)4”’oTPÖxƒ¸¿úLúõõ¹¼> lj+aØŸ@qƒî-c;&ßÿ ÿ‘~M©§½¸G¾íâ§l½W¼ç>¬´ÜP†ˆ KG?Uwªc´ib¹?a†Ø‘ÿBNº;ãXº‡´ h_ntÊ`JÉÇ\øŒöÐ*wšS _'îö«õêÿ<À>hÐÕjÉ 6î‘.@±(Pø”tHº‰Ä3Ñ©IW¬w«ªÓ_n|(?x;F,QèGƒªVc6Ø•gOvÁˆu]nòÇø[âýIu€î³íˆõΧfî׊÷>é,5çÓ*ØÄÎpð®Þýb-Ðl$žñVÄÏæÚP8ž­kš‹u&ù[ kÙÅóóÁNÉžÛâ÷ß—a“@oÌbƒ)P7á‚ Ð?¥zÄhÛ-LÄ:ŸÎ_açñA!{Ìo]Ò›ªÂ߯}û=Þ’>²Ð\¼>nÝm&é“õrņg-讋իÓzP×$ÿ»`A\!Ò›b=ÉÚ¼–P-Í!ŒQœË z{ñÚìµ_ü\éûW_B{ZÍ]š? k 5ïOW3‹ü–ºþ4'5:’n®°ˆ|®W粋42âÚ6Ò>´9ùÃsîÓûªýÆÊPñÙA:pí?zï Gf 4«|÷õ[Sýd  ÷©@Jú}7†¥S‚±"™ |@߇öA…oöä®8[ õ;Ÿ¸f·5n'?»àM¾æ[ho&$kþ<«[çw´üðçpýVc!z&çJƒwÞMÇ“›ü!âr.{QÁ¢_ê²d¹ @uòÕ8Í*`H¶òzÜôù³÷À©“ Ãñ€ê­j^¤Åô|G-pùTsê~°8`æÓºq(±áû´Üñäc< ‰—I_áWoÏ’=-ÎËÿ¼>h±~¤ûs°üS{â $K¹¾­¶Ø}·r0¤÷f¶Šžèh*~}§è„´Lk×ÄkÄ ¢:Ì/˜úÅÈ<Χ“`©mÞ4r™Ì*ÂkòýÆ$‘Ä}ð«°ží@’Æy]¦ò-YK­2W‚¤ù/Ãw’o­ÔeݾÏZ ã>}ÓÏlf[t•ÿóŽ£Yµ3»œ´…Ôä³ ?M¾W׫7ËžÈU°zÿM… ›”~úaø1V9»qRs?D~ ;ì·ª¦^ñ|ù²ÂíˆGàïü÷ª¤A+HXðßw±#€Ø0ΡyÅ,f„º½T:CF·…ì ¯Õ'ÿ[´ì&sž58úuéûlÚg’Ž<úJÃNÅ+¦``JÇœ{@bâÕú7:ЦÃßöeÁÖ³aèÓH¿®3$@x^«¬…wÀë¦i|ç²HH ¥9zÆZÖ·ƒ¨GôÞç`cÿêã õ‚Ø^–¾†([ÚŸ 'ž¼ô|Yæ°:ÿrhÿ_ÏÀ÷Þ4ÕøÃ—²X»§„ÀœûÕ¼‹ z0ŸO }u~þË?mLžõaÌf ›ø’ é äÙ×ÇI6À³Þ“Aæ¿›£ƒ~ñõšC¾@RSꃂ-åaVs!IBuoòZ÷ŠgGÉ9=ÿh>òX)§ˆ–¼,»Ðq͹í¹Žöv‰6Twž’{îP$tüÄ^tÈç?F¿9íÿNºgú¡ïïwúB;tv²R?×-|a Øm·Ðy¸Ç;—xF5ÞQÙ—Yb°šnr£iòO£Ü~s9µ§¦ÜuiÝüŸÞ¿z]sb#Ÿˆ'½„o$ ôÛ~±3Aê:š_»+ï °J€(}]6À€Øò…¹{Žõƒd7âhűÓ<÷ ×çsEô˜4ÕÿcIß—Øf›ßñÅk îÕ“!kúŠ…ïht<-{va‘"”TÇÅ’ßÁíú©m‘o€GÏ;ý*S‚dãH™KA&Z*Ò«Ò‚céþŸ“{ï‡ÔbòÛyg›÷ú:!ür¨žJ_ÇsÀéŒyÃ3ÎÛ!èz ñ¦ÏáJÇ¢3*°ØwsÆ¢÷ 1ì?†Ñˆ¿[ä‹2R])¶C-ƒu®Cû) _ÖÔôãQ‚ûaèÉíIG2“t2ÉŸÙÇ›þ• #Ÿ×»ƒ§õ1£eàì])·{\ñi´ÏŠŠ?Ä ]B:ý´ª¹wÊIÖfuIoÒ±{öñúôÆ›àpuEÑÆµÀæþ›–&†€5³vžWž‚Pk±íÔ—Ad÷¡÷x³I}çóÔnÏ»wæ¯~Ælié~Ï´cí¥Ò¬ŸÞ œZöÝZô¬x´"gŸ;ÉfüŸý« ¦3Õµžçî5ŸXà.²”¡]Á%³xI·Aèìö½à”{4Äv¤{Üß¶Ãæ]æW AÚ×¹äêNtêwùð¯1´?ñ~#•4I~H>öÔß±“»+ƒà†ØB hq›Ù$†Cêz fü…dâ|Èr‰‡Y§W×ÃGÒíÁ®¶Œ¶ä'ŸVÖKñÛ ­W·]}OÖ¹ß!Må¼&ÓM›söÜ; >àÓiù5%x_ÚÖ<ò6Dì\ÒB™ ‰Åü4}ó´?y±AþüêééB—!àhøx=ùÊ´=˜ö»ÑHç2;«ÌôµlYô½éMp½˜üÇǹ™†ís‘‹ÀýÚö˧-ì!¹ÆÃÃ3Ü^A‡̰–;[7h)ˆa¤ß¥÷(Ü•üjÞ9GF7ÿܪÕÎ8q†1@Ľ¡9ß&âò;ÏSh„w!°±Sëû³ßBÂõÛ v¼¹M’.œ_k ~§}í; Ÿ‰ÞkÖßT½ìÇ-¾£ã)ùåÏW:;™.Cxêÿݶ.)/(»Éï©>Œ;L(Éãúbãéþ†Òív·žCÌlª;­Ú‘"0ØõÄÇ6¿äŸo=SÍ1Z‚ÿ~«‹] á Õ9áMZ$ȇ|ÙwânÛ²1sðÞE¸¹# "Ìz1CDw4¨,u‘—«½ŸvÙŸ€N–×çš®žZmg‹Ž¦û‹N8}²Mä{q0i«9_ýá¹í-ÞG)Ï«:ìÒw¾¯ø½F)ïÇѦˆêèÄ.bíb‰ ͨÿ”-šÞLR~ bÓ\1^Mó«$9å'8ÉH§VŸøyNfÁCfª\Àï=×KÉK¯kWhŽU«-X¤êoú{‰-ø ¹j®*áò$wSi·»ÞBg@Pâ†JÏÌÝP1ý#´òÝ!½ ²§´‡ÉšGß…ëçg,ïß|Þ¿ËÿžøÔ³sNGˆØMúPï¾n¶é•í!líï‚‹·Zÿ­WKÙßk>DÝ?Ð'²¼&Ĥ“Ž¥ºôÆ}¨Õ|toG-XnyÿzOw ÒÚÑž:¼˜ášbe‘é# ú“MØAÄW®‡@ûa·¯uU‡P®§A¿>—zÕM»·Nk¦Ý –ËÝ·mŒr…ø TgGí ÝBlö¬Ô°SƒäÏç'•§\k‰Éo;ŠtöQûI'Y—ü&/±m^»ÝÛ3Øl?¶iÖ¢•yÌþœXù‚g:ï3²øû(?ë_§ûH›{Zõ^'¥=fÚ7U¬þNÀ­·—óÚ®úùâꆌ»vÇ1èŸü<6Ç÷H3ŒíP° Û¯?q¢âìin´šô¼)::c~Þ.q-Ã50}ýDù7ö 3'™-kfãcÔëƒ+!°Ï¹z %ò«³£¿Ú\í±£‰»•t€á{˜¾>¬ö0df;>E__d &3lI=ÎGü÷$p?š cmamˆY•s°}|R¬½Üfnʾ ?àþ½¤3‹!©å½¿']†¿kŸ…Gˆç×!òÊv^õ~½š1ÑŠK Þž³_ÏZM­= âFÑsçVQtãqo±Þ~'­ÿÑ÷!ZK”Ln¡3HïÌÖËŽN|‘XâNJJˆŸ.–±ÇvC½†ÄþÉç'è;Fè\ò¨døžO^^y 7;BtO±Üoýjõ;>jp/¬Ñ+ªCÎíþ–@zÏÄd&_n!'š^E®€ºó<»g_N¤‹ÙÍ„ê÷¿Ö¿0ó#¤5føÕD¥}QÄRÚJŸpÚÍÿøø¢µøó?\!m1Š€_7Ï!i|‰üÅo0ËÔì‹¢øÏVE¯O­À+»FVO…ð]›+GIM 5'yµ2Ò…†ýG|¬ÐûäfCr*Ÿß sí)«^]I€Ø9Äw ™L{¶^÷㊼ A^@çmPâ&ÉÈ{忞þœ‰‘¤ ÛF—„åõ˜1]þ÷Ó0׿N· TM~?¿;äÃKªÒ"ÝdÄ?òÙËiÄã 2ÁarÁŒYï÷·`É–¾_!Ö—Éˤ ;N}lÜLòG¤™U0ãÔ¼4çôÀå Þóªï—õŽë¹å¿Ÿ‘žªÖöOÇÔ“!VB¾êˆpÒ)ÇÖ#}`Œ嫸Ì麩"§D­%®n¸’öûu×ÅlöO¸1®¤/zÏyö ­Éõ\¦u.¶2²)¸µÊ¦¨‰´‡wyOìdÀýÅè× »]ÄêUÜz9Ž“¿Yækþ¶÷´9ñHœ•Äpòï„\á¾yå<îD—Ëû?fÿ—}¤ï<}þÁìã= ÕUü”j†È[g½ ú~3X Y%Ö)QÜ+ )~ÍÜ¢Î~·ªd›=§ÁÛ}û¯ý=ä¿J‰7ëS1xÈ¿Uè×z×ýwÿJ!Ù‹æNñùáÒÜ«ò¾ªr=%9'ÆôøYîÛ÷ÝÐi%Ïg@³ÍôþGãØä_n“Ž86±3É|Öìž;`„”= éZ2¢»0Ùa$ê¹nFþ·Í¶¶…AÐmšk†[WZ4:Ò›2™Y°øyKÙ“nãF­2î°ä˘<¯DJªx.í*Yp—ümÑ‘‹ rÁ§Ã|« ®çôåe#!¹¸ÅN§#Pó@ç#–»Ê+^u)j·¢ ¸\}˜ý~î" „»âƒy‚¶“^#3®jNý†ø°2;âö$vb2ˆp¨µšôqÇ)OD20œÁ/~îÒ6 ÁE8¹kÌ­®à:üyå™MÑáiÔsw÷M`Ó/¾pq›S;Žü§1=ÉÐh•힯M;G’¿.qÿœY.íkCF“®CþöZÉïˆ_ЩÓþ¿‡e {Kïab†AhA·³`8ˆîKüݤÄßNQ1ùu0„…û2A&Ä;“C¶”xAG&õ~Øy3$Ö%ŸopG¾Ï‘ÿÝ^Ôéš$FþiZŸV⦻ #œ§6€Dnsnz²·Y€üÄH.'}“¬/éæ×ü:iÑt+­"¸ÌƒöþÞ›XìhM°Šý»ïïÓ õä9 }ˆôì² t¯Æ¶£\ÛÀïÄ ¬Þ„'[@½&”ãg3am‰OûBz» 4â>fô´X´<‚6…™\¤PK±'³—-ºÚç8ÎtrÒWEî!°Sçü˜>Ÿav¹0ðž²I¥©Ñ "úPÎBòGÞï¡ÇøÉIfùâ½xŸÏ¡AmFy`Æš”¦«ßŽ]Añ¼#Ð?%îwáUÒ‰h6S½®^ÇÖ¶P°é,[ôƒq[‚bå–æGŽN8ÍÖ,ý!/žÞÝ(ÊÉÒš‘.%· ù ´çFÑþA“J¹Š´_V. œDC0“©ÕÝ—xR»èýPM⹫`èMþ0eͶ¾“,«ƒ2r# F÷OqŒü¦ú)ä‡Ö &.Œ`¢\e%ål‚Ö…ò„•ÓˆS¬u!¾iþDÒÕhRب3蛓~Ì ¢½a^m«Ë‹æBAª“Çx^ø9UéT 'ÞŤCþKÊ×Ó¥ü=m_X׋žígÒUç.#¸á,ùõ û·V('߸á*õ ªK¤OV¬$ßgAWâèS—±†”Û~¾-è ÄÑÊ÷å (Èq&ý¿a2“¸€>©Êñ†êEÃ&º¯4!¤S/-Þ®·@Q—êrƒ=“w¥€p†8 ºqîûë¬êsôïWu¦þ_oF~.ÝD±:nàÂWÒ¯+¼‰S®Cz>ÍÜÏbÃt„¼Lö…‚²€ööº#Ä(H ”I<¡‚b#3ò-ó›Ä”Mò“æ¥<1c û²@w™=¶ Rò§$ü=h^’þ@søIÊrÖþÕƒüp3• 9;VÿPon…•ä—Q»%µ25½ ¯v…žtðºhò‘j¤óP.&ŽƒÇÆP Aמx#ªöâ[Òr+.äÃÐW§½¢vqå”»H÷¤_×@,XÿƒœÊ΂‚4âÚçú’ŸË8žt¡Â9šóe½`v×¥êyšMsHA Ý‹VK¹“êb⽫6SNœ6fœØMűÝù±”éÄûW@ 3~€j7‹-ˆ†KÚKå\g1¯ãÈ[D9IwH?¦J:Xy{âži$äûÊ—ŸZ»Ÿô¡ªþ¤·Ê›«cFg(èGïGÎÊóš°-=h¢Ï±ämcñ¬ÉPp‚•µjÐö£nœJ{[ý\ÊA)ŒdØÎ4’MÛ›6”ƒ°‡ÎÃâ:hŒÄ¾¯HoOõšðüœºEÄ©ºÓþÎЇüCÊ3b×^û¸ø¼U/%_Tnû',@4Ù¤OÌúCç›êùúò“‰c«Up(¶<ü3(eþ´Q½¤¼uÕXvÝØ@^Ž7+üAXÜ©xÅä› îÁ}AÅó Çœú^ÝZfçHãkº/´AyÇo÷AhGõº¢ÌuþùT3Ð'’oE+ž-ß‚Îaý†ßª ôMHÊáÔ;’OW¼‰{@õŒ•sù žÌp¥]A·øbŒp÷9ä>$>†ÂüѪÕÄ×w&ݼV|Šå—/ƒîu•N|Y%tƒðˆôЇˆ×¥)'}¾n?ñý :âgç5$^µj9éÞµ1gØ@´Ó¨®V#»qt“çÜó@›\óÉâï@XМ- >ŸVíFº@ÅH†óìúÊÓ4¬¢çV‘t™©@}+ûxRñ=qê…¤[Sͦ9IÁò9«õ »×òôb£¶V¡l±™[ÙºÏ 4=Éϯ]J\}Õ+Z3ÒÕ©]±‡û~;Pl >Xk £J¼UaåÖ fOßyeG‚ÁH¾NE#â,h³º-OWYáý¶hý#ò…ä÷!ß‘Pfö¹«£ø\°ȶ îЕ³!w%åìsÅw¿d=hý §] ·ºÌ€`,xñ°îYÐM#߇ Iÿ¡ùF<áqØr§h¶c$’v9qØUÖ¤ÛÒêˆÏ™3‰î]UEf¤å-ò³¨G±ñÒзÊO(øE:uõb66ŠáÕ¿Â`ò·€üLZ q·Ô;(ï>ûùUçøAÊ5HØokŠ?´ÿ4„?£à!“¿÷…¬ó”;©´ N²Ñ¸EªRâ&ª—RN£â&ñûBöâ—Ô¿Bý€få‚+*¨oÒR¾Ÿþù4m‰ç§8º&)ñ"S³Þlüò§†0á(gR=­Ì/,Pjɘ,¶A›N€f Õ…ùÏhþ§y@ÏE^Ñ©}3n¬œ#”«›C>%Ãê ˜šBò·hó†lY[ Þ«¿J¯.9õkTån<žòªúÖ} ~©`(ðþ„óÄ™3XšNÚœÝÖ5âA'¾OÙœ»hNÑ|CÙšòPôÑb5û d÷bC8³H‡¬+Ç{ƾä7|Ú-¬y~èR‰G¡~°/‰  £½ˆRIu°:‰Š‚âéßÕ©¿Ro•Üó&Ð÷R¨®RšÌ„°×šògUOª|FÒ3(O±õD}P!}¡ö:=·úV ëj_ÊEÓÈ«ü´‰§ÝEý¡nùï„,â(Êk¬œ$>G”ÿ¨ªi;±Ž=AœÕLn‡uýDš{¨å4*pcsÅ>æòZ9éPÕÉG«üÀÆIi`Ëp÷b=ÂÆMAÑ|£êÁT÷ª†—±)¯UcNºÞü1¬=Ï¿^ò(kÓž_ý˜rÙóNx² Pû3P(†ÐüKw…ž!šþœªP×Ó´¯Éǯ3Ò×]ð‰øûªuYAÊÙ w!ÖÙÓÈGšõŠ|ýÊ ”£™AœÁf¹c㥷![G:WC>é…•c«rK†_SWFÜa Õ±ê‡4Ð4})âgsP!=ж.q ó«Q^•¾:+ãÌAñƒ¸[FuÍ”ÏKBéÜòhŸ-<±l»£!h¥¤»Ö<ëË Rb]M:m2éfµ5hž©êIûNCz® NÐ}¨ž@:UâæçØQÿ¦lE~E„ölŠð*ŸÇÀgfÎ?öˆNò9æ¿%_¶ÒôÊm]÷"Pô¬ò¡|.ÅfëËU½D&܃–Ùû²!ó [c8€&‡çÓ‚Þæ]Ú±TW*÷ÞLµ–êõâíªÃH¿(|£ûÆx…>w!vôŒ¬Ûâsp’æJ)ñE¤Gþ#εzíSóíH׬ž@sGÝ.âÝ ›È®±æAãbœ.þƒP?±3:ƒ~ÍÛT^‹Ø‚42âÊ kd,Ø„$v¼´áéÓÔ æ0;(o/^ïCu§ZGÏ›îù Rœ²‚ôïÂ}´]åxhBÎÔöŠŠï+>w„¾l|¨¡¨ŠK”)ž&ñ¹ûIœé•U©T¯-È÷¢õ¤ù‹b=qÍÔ-©_.x=R|¡öæåuiÎ_°À‘}\=Ä:Æ•H nKú¢‚¿¤cЮà9¥ Äc h”¤êSÎ’ªh¨‰õ ñŠ{Iß¡:Kü4U3ªŸ3½hÿc¸@yÚ¶ÄgÏ}Gù££Ëܸ š-ÄS½äºP†±µ‰ ôÆíc Ö†‚@K¶(ÕPÒW+›Ñ~_=†ž3uªóò‹éüÖ?È„šPPÙ› Àx‘ü­ù-È¿«ÐSÎFa0WÙ‡ižjH"^„® ëöÑ’ú ¤sYxAþ°‚ÝÄÏÒÚßEaÉÆfA5€ôqŒj6t(¶R} ºMŸ¿0¼·ÿFU±^&߸փö“ÚLñš?þ ´›Éß ^CyFú¹ÄÙËóüô>cìÐ_§ÜVÕC>Çåú*þQ?âˆè£×1Á;(²g°à(|Kõ°â;‹Go ùfÔeä0tªÊïXC\1E)ñ´ki m7@,<¼@ó^¬ö$b½°›¸9ŠoÄ“Ð×$}»JAïŸq;ñkôÁÄ+Õ]¢º@ØKu¶þ7å˨¶²qµr—×BytIꤿ/ cx’& ó¤½žf8ßkCþmÊëÍîFï­Þ;ržôùús”Ÿ¤SÅß–0ã¯xßÇP×áo V×\øÅìêÉ QÅ稻ڡøq?ÈZ;¹Ñ´ÄéPÿ:í5„íU¹,-)/À¸”rÌU Ûß³p,‘/ TV OÒô/¨ßÔ{±²ÎM¬÷©P÷¤Ü9mÝSÂ1âã«?oDy™æHê!|n ÚTo)Ëȯ­Ò¹è,Kì›RÎ@î"6–ÊÅCš·)ÐûªoÄ®GsÐà‹BÐneØ’>â=E\ uWú|•»Hï¯>Lº èyÒ}`åbˆØ¿0¿Ê'«ª³`ÂÖk@u’Îã ñõ8» TG‰iÈ .ZfÎäù†ƒ®'åI(cˆkYÐï+ô@7þº’*ß,òÕ*<¾¿=ä¤PÝ`œËÊñÜ ?‰áºFlœþˆ÷å| •]˜„4ϺOF]Aþ"}¯k²9^£@»>ŸÂÛUÏǘÜa ñQ•%¤[Э®ò­w¡|Gcâ ?‰×ªÉ%Ÿºpæ…ÃNK@{„úma Ãhi@3ºŠ«·“8Eú„;,˜ò?Uù[s@6’¿Oy…¸]ŠAX!ªÄ…×®cx±n¼N~°\Ú£BÎÛªûÐX\çȣХWqÈRˆ' ØO~;Õ?Ââàf %î´â+ùtjò9{•ë>³Q[ƒZMý¥ò'tBA2Õw‚Û².³¿ÌÍ\âE©¼È®u =²ªäŠZB}Žöq~„§¸T”˨ý#)?:½¨¶Q£;@¹zyU´ˆ|¶º ÄÑ× y™víß…Ú¤#ÒüaeL2¯)SUõ>(ú‘Ï\aM9ÔÚ½¬Œˆ¡ åÙè§9oñâ´£Är¸ÇhPnañ}!ûc û7ºß³ËIw)Ó9$¤Ó{¡ò!š¶Ý‹º!üÛ!öçÄ»È]=5øýe#h°5ƒ¨jº·—^;y+Æ¿µx~’ß«þªNGv‹÷Ú}â-(Û%3ðjÒã©WUñ‡¬––™¶‹ß‹K‹¼ ¤WTùÒ “É·gxKs:Må#h†GOßg[XCÎBʹúR¯yÏpý Ðu63°úÕÉê%”ûª8[Ã9­"ÔÊ—6\X&ö TjõÔéVÑû™@~ðÜÉ4Ç×¥ÏMÑ8Æù;hþ©s'n’n;;¶ûp8ÜŠ×4ÇÕvaòo7PÓÜOc{qk°—»øž‹×­¿xÎúR^ ®Ç•'µ‰ï¥7q6ur†)(ÞŸôüàZ“±ãç€^Cý¥êÝšS4wÏÚ æ¯61#™Ý”åÝÙ Œ÷~ˆðFP«šoœøÅ@VbŸ@9$ºý¤5FѯSUp‹øuѼBu›ëò ßžxBOâÓè¦Q}l˜Cû\‹ª=Ì{§`ZëÕÏŒâÏsgëÐе#ÿœ²OKÂh@œ`½é§ôŽ”{¡ÜI{$‰òÏ…ïb™µ£T)Ä5З±x= ¨ƒˆ·­ôæüÞë® Š"î¢x_ž >ötÍl˜X¼o‰û¥´<Í ( ˜Oœã¼ĽЦ‘K(¢}²îç|mû#+@u‘ø4†Ub;ÜQì;'²Ø@±oó'ß›æí1õmÄjVJWf›¯9Ù,0t“H¥_¾E¿bݹŽîCE âp*ŽSÿ•Žr‹iNdOó[E4q;õc)WJ-· =]Â|ÊÅÈB{a7õg†=4O6Óç«¸Ž²ƒ®5Ã2«Àð|Kúwâ•{@þT|Á_Šýå<çªø‡FÚ; @¹RùÏÉ_®Î`ë­DÐñ`÷ õˤºbß»›þ×Pòñ µÚ©ãêЉÄõNR}¥[ÎÚÎZ ŠpÜÕk(·Nß›Íf‚v ùöu½ÈŸ®?LûÍ;Ê5H)_)7‹êGã7öbƒª)[Ã4/q‹ô‰Ú’u¥bÝûƒaOÚŠç­xÛÛæ‚^A\ • ÍËt/èýÕd’ÏEß‹êk½3å¬*÷×U²-(Äï#õéÂ8º'ôýÙõÓ 4®ëØ@T‡)×Xšö–ùåäcÖºSž?½ŽøŒ}®«aÝ_z§2ó/?Ç‹ý6é\ ¶§H[—ô]¦~^7~.—x_êÆ”_©iBú©‚«äŒÌÞÞt7> p“Ïu›*ÞAÔˆ> _öÝ_z~µ³IdÈ=Ú¾Ëáçû›îáÜôŸ- £œ'E_eCÊ%Ò[ÿZ¢FãKú_½åäM¥Üí!ÊÖÚÓžDïM{(½[‹÷ÝPâFé’éŸ ñ«+¾¯åôu Aÿ˜ÀT¿XYš®,>Ä N‡VCõ]îÁãÛ–.xÚ);æ¾y²]üù,þZ º+¤›Ò}"Ÿ¬ð–¸ÇBñóJ)·@µ…]G A¥³‹ xL¹™š’*NëWòêU”۪ʧ é{Å M¬Wöq-èÚÒçc°$¿½få¢êI?T°™ötêTŸ(gRZ°†½>ƒÅç­'P€ÁŠt¬Ê#t~¨­ÈOfìË®ÅFbÝËð$RPNf¸ö @:J!‡æ´†é¤72–Ñ{¤™Hü@eø—Š÷åΟÉw¯¶sðµ‰?*žCËŽô;º‰Ôê=œK®î@“¯×ðŠ¸¼ê*.Œá§sAW¸¥šÄåV(iÞ.ü¥º_•Cœbã^óVb•Lû¬¼dškh&Ó¾;+„aÄïÃMâ ç6~ÃBÐþ%þsÞBâåï¾uO¼×¿gbßEß'}(׋çsqÕç›VÈ $Я¤|M;ñ¾t›¹/T¨Î"˜Ì¨™Ø·ÏD{­jNLõ ö0銄ãÄ9T¢ç\ù‡µïù ¬A9,‚ ùV´j¶öïÑrÖŽ‡Î’ö‰†FÔ/æÄ½ÕºÐ}¥šDóѼմ‡U^¤½¿v#ík´C)?@Õ8»º´¯Ì÷$˜¢1åu+oRνPå6úÐ^]¨âùêoМQXÁÆûvgA¹=Š3–¦â6bßЕµ­bßk%cF ÚÓý­ÒÒ|Jw¹ŠÇûwßó}ƒ@Á Úÿë[·@ù“üš;U\‡Lñ»¥ù úQÔ¿ ÄqS¿#½ƒö,åß©ÿRž °ÉnƼƒ A?¡Š[6”ëeAwüzƒ(ËJÐt ùœ^Bùu†»”_­˜NúPeùÿúºGujâN¨ãɤŸ˜/ƒñž¿Ò´zÓN  »›Ôà9K9=º³Ä¿Ñ‚úS}³ªü¿cÔ·ô"?¿²«èA½ž¸6Ê”×£Åù˜ôz=kø°nä¡õ=ú¡4‹Q/À˜ºôcË»•á®á&Læ@‡0Îü‘Xð¾GÙMÓ×ôÛ+¦q½–{1f5ÏÙÁÈý¼ÅðK<çØôÏ0¦x‡á¢ékõlPfú·lZ*žŒÂøU”j×ÿÝÏ¢|ùŸYœ‹)ßsæ6̓ðªóÔx™ÓþÏLÜÈ9âö”çl¡×·E ûÜ)CW]æ–ï?>™~ØÄ7©—ЉƒyN0JJ¹Îã9Æ?CW1LZ¬ü\cG<ÿ\‰>ÿ•>ÆÈõ<7Cs–K?D,x!ÇØQôï±ßÒƒÎ0Xòþ…ªà:ÆOå}-ÆNfqsÁ`_½V·Ö·18/=úîÝ1±pJnò~Öôq×'£¿Ÿï¡¯ggGG=&ï =˜ÇæÅ-æîŠÉ=xŸ…²PÎB©IüTrSÐR8Þh’ÅL´çüs¬ù”çŽa(³eT¬Æ8g^§c-‹ ¿Î&z`Ì¥-â:ãB¹㪳qgWô·XÖ΢õŽóÄ´•<¿=Üy½Š6—ãÃ×Wì->é6~h 7^ßÈ´ÇÈ\%?k_ ^öî˜éóÙÆØ~Oz÷™ßàLjoLõ">œ}w>ŸÂ4ò`Ô]ž3Œé­yŸƒá­¸Ïãã8G"+ ãÁvÓíù‡v‡ÎYˆ²!¼Ž@ßмٳÓ7zÆšîS-Z>qÀ í9ê]¶kÖ@‡•œ¯ˆNñwFÏé;cîð\!ŒîÀýYWt^ûcò|?7Ÿ¡ '¢û¤<×´±¯1ã ÷ûc/{]½±Þw®×@ÿµ¼ß5½|×Ó}¶ö/&µ¥¼[§ÄswÆŸŒÅøâÓL@Ž©¯j¬­ýþ8LÎý<øö”ýæ:iŒNàu¾üxÖ³ -žİß}ú®ÊÞ[§¼Ê ¬ ﵤúíË`Ý’Ó‡Š¯=1ýÒX.­Ó§&zñùºž¥s5þ³gµB‡™|¯±§9¥×øygú–tÍêzÏF^üõž£÷˜“¿Þ.7oü< mÀÖ'Žè¤|Ø~ð˜û( ã{¬}åþ­š6ès9¥Ó­Æ¡Æ¤ÉÿÍ2bÚ*ž¯#û‰ò Boò<\LzÃu.èYÂó.M ²÷ÏŽ:‡ ‹7õ¶Ù1S |~Š~þ~Cµ–0%çA`Ì[aøîCÇÌ5|ŠöÕw,ò\¶#{p=.ÆŒãœ|Œ^ÄõvòcŒ h÷oñúcžÐÜÑÖ´tðˆõÐçÇí-Ìßa°ÍÛ/úÒ9à¯ýÓhã,DßGœ_Þ5æeŠ7ƒéËY®/B‡Äþ‘-jyaÜrî/Ǩ¶\w‡qwxî*†ié½ñ_öío÷aÁñÂ<¿Nõ Qº¼ [Ä¢ï¤3ÝNüþ!E¼î”ÿ˜¿Ôåæ}Œ«Íu?è?b÷¹Êü>¦Ÿ#:zx¸øçÝÍ@;˜Úˆá=±Z¤þ‡G£Þ‚|þ/~ÿ¨/ðQx7¬\ÜcJÚ”/X Vݸ~L¼èûæR°1x§Æ»ò¹†/åŒìÅy٦ʟ%ÅžèÒ£Ã9ûx)ÊNñ\DŒOú~M6Ç“»ùvòå] îÛÃj¦¢Š­kaB ‹IïŠRÝfÌŸG†ûÍ4}Û|1eWƒ](ëÄû=LzÅs®MåËwœ®wz6Æ<ÓïËê”ÍÏMþ˜€!=[1!úe Ýo.‘aòlîsä_|Žn—fz}­ƒ_ó=&FVgñ(®ò·âC¢=™5Γ®X6°ˆý†àpÇíYðõ5Ö‰ë…ÑsíŒ}Š –Àì–òß´—FûŒäI“k?Àˆ<ï CZs½†¹ÞEþóôŸ5Rþ O6ç¥cr"×›^ô_Ú±ÅηúÀ0ÌP·?z5HmæÕj¢©üçAbX&ç4É?­rñðÞ7e3Z²Á’éK‡Òås0¦6ågTÒséŸ/3zü#„4®ÁzÊ=Îl}Š68/ëçsa 9Î÷°éÊ¡u&8|ê"™°Cmyž Fi8{Ÿ:ttö4 âvĺ˜zrÁý{Ig1qHµU=¢v kµÌ5³‡ú¡ÿM®2ýü=îÛàŽÃ12çã C3ªk‚ZoªšüxZ䵺–÷…02 ftô9ð‡i‹“oZaF ŸcLX»ÈÑï¤õÌÈtØ ?¿t} &ž'¾sèI¾?Á˜æ\ÇŒz4»þ>“Ü÷ïé9;£;Þd)8tü2üù¡-Gœp [Ìu]˜lÆóa16‰ç}£k-MOÝ‰Ê Ë½É¯0!†cЪçM¡‡JV4mÖ Lt;‘-Vrô™çÔšþ¾ev†Œ>KçŠ9§eø¼#Ïñœ(¬Ý‚ûQÑÞ¨82ý«~“îÞ~Á+ç~['¡K„òä…­*ôïÎ÷ΦµrIÜtQo›`çWͦç ky%·…w’_°<üáÙ+´Ÿz$Ý^?ý;29Q{tx‘²oBQ8†Zr$:.rJ÷z…þÃoþì7°:Æû³õ¦'f©97 kò=ÚvnïÏ[èÙ—Ï¥PZÖÎx‹ Åœƒ.íø|×tsy7϶Q˜ð-®ÈÛx¥ãÅöü‹xß÷ãz?”ÝåúŒp úUúr‡ÝŽ(z<¼[†æ®Û¾oî>Ú)ù+†g¢ÿùK ˜±k¸®ëªÏasƒX7ÃïwKk^ýø¶¥ 1á&ùÅì׳˜½aqfxQŠŸã?~NÊ_:s¾!†ßæû-ù#µ[ÍñæGÐvg¸bøêkh~ªÏSÝ»+(ëCßÿ˜WÇX­ë÷·¯vãBCŽ•ÃÌ©Üß….»Ì¯t]¶3¨~_ÈðPÖ¨hæ7+Ï„IZ¾/ÄŒ ãª2ó¯Xw{a»ûè7¿üµÎÒµîó{CÄñM†;«¶oM­‹’:âWÙªºüg?GèÛ®&®ã{ôÌÿqvä‚/´†Ï§ÐÌ·«G¸f;V“ó\Eô˜½Ú2¥=Æÿà\”år† ãºGLœÄ÷|è5yê ÁìŒéé)ÙÆ…ýZ`r ¾/ÇèÅœ‹'ÿþ¢ÓêµÖbðižÏ‚Ù 9ïe/™Ì, ¥*®×ÆŒí¤³t´çsVL:ÄõCèÿòë§gÏÐZ~;´ÏSCÚpÞ­éÏâ+ï—üÝ„!ݸ.AþÍÜ:Cù<ÜT¹²²öÄ´aS.6ÂCÑ-lmé ‰qè§ûÑþÖ¹(ô>Ðæ}B€?J£i>ëráV‡ÑG`âžSг˜í,]Nþy¿a6:Œ^Ö?,B2=çË¿•Ö¿QÞe0z½{ß×&³!øtmµeÞ±¾Û÷†:1Ö—ï)1ÖûÏ1éëEfÂøxÞŽbÈ}±MztUœwñ6œ7€énœ³‰Áÿ<Ÿ6©¥OTÅ{ÆLAŸã5‡|>‚‰³¹>FþΆ7õnŽ¡]xð:†'PnoSMÁ„è:hže†Ÿ|÷aÈ:®Ãz!œSŠ¡½¶;ý¶ µz¿ð<ºü˜÷ do:ø¾tpÎ<” àŸ»éß(>7CçD;ÁðÄ+xS?ø|V[&ùSž£%ñ0Àÿ^û²cÏÐfç±cr{š{8Fª6)ÿ}—ê™râdH~0;×ùóõŽŒµîŽ1§øý+ÿzgmZÖùHŒÊâüjô¯kצϬe˜pŠÏÙ1vßˡ׀–S£ôj”ä9=(]Ź'(yÊumâ×ÁyF˜4“ÙW­Á¿ÔãÅéÃè¶}Úïm/^¢çý~W\wK0rÕaa\_…‹¾’²Û÷.žÆ°Î?1UÔÜè\æ§CYon8”׬ ÇàëÖLPŒN­øÍ;ŽdŒ9ÇûGSùOÿãÿ½Ä8zÎÁ+ÿSǧç£C2Ÿÿcü/žóˆ²ÑNGÄ7 ¥³˜==-ç‡'ïÛ‹q”“†~…3»•ô°„P'òa»Ê6ç•¿æÇwìíù‹ü3/‹0ò9{¬b0á2×9bà¼à5£öH0‘cŲäemo¿ï6|,X¯’éÍVÍâE ³MÛÐ÷c®ÉŸÅvåkÀš fcÈß›cý&>ÃÔ+. …pO>'BG3îKDÙG®¿ÅàQܯ€Râba`ÊGCìÕƒ(mÌß;L>Èu¦sî×ìî—ÝDçégO¿œ&—KXi{¿õ\4{ôê€Z1ðÝþÏ¡å¯Ñ’Ü{—bЉÙ7¤è´mtýIýìMŸV2¬Öpôûݱù©üa@ÐûuvN*>ò †\ྌÃâÂäèò›øZ±!œ'oú2"/3âÓ/ñ}æ9½è¿ÏPñmÁqŒ%ž†þä¹…(¹!½v¬µú¯j™14ż‚™]°;†§q=#úOùoä”f‹Ð#Ï0ȯá4´X86ÔóÂ!p²¬-¬X]//u\íƒ!?8'­Î´fAèõâvEG?ŽÙnªcLõnBº®ÆØŸ<‡ í_Þ¾ÕiŽ»âfo̶ÃH³­+?5éñs˜ÀW‹þ“ÄvÞâ+VÛ:¦ÚÈÇrL^Çûù£“Eò}Cva\~þÊ‚¹¦oã†u¬Õ*#n{ë5ÚòœvŒâùǘ0Ìœ ì0å»>Ú ¤³¯tÇ=¤ŽºDujÔJ®wE§9é©YŸß›Êö”ÍØÛã?‹¿*ð)ÖkÉóÀ±ÚKÍÑðM1b'÷—¢ë£¬ÇØÄ­ªÆ9†èsºî³ùN›Ñc Ï•A8ÁOü Ñâ?GÐã;¯g1n6Ï+Ã0_>WÃ+ÎÓïüMÂÀ¯ç~pꯆֿ2bè1¾wÂôd®ßG·÷iÝ/‚òf|ý%~|ßñý¸Nƒ»\ü`—¾ ÝŒßv.">¯ Çï þaÞO´¨@«…±Û#ã1ñ“õ×ûKÿ¾Î[ŽÒXÎwÁÀo,î;#¾{²£é3ÿ¶ö’¿íYËçvl3 èÒ¥¬VÐ^¬Ym]HPƒ ´¿Ò¼—÷2 J®p}Œéæ&sý›[˨_@§!:í usV? Hy…Îÿ°']òc®>~fö ¹Þ Ór9oeY\O…’ä¦bAXŒ²Vó.8õLþ¦Å¡Ñ뻫埘©†71¢ä7ß+™7ýo­Úò8fÖ=:½™ä;:Žõ3'È “Õ™<üßBÀ3ª7ã¶µh){U öUßW™;Ã~Ű¡”Ó#­ê7¥ké _Á‚¶1õ1“ ›¡kë+‡{vÆàµ”$ã:L轿Îú—ÁoÓ¹¦;ÇüEÉÃnÌ(n]}Sê´Õ˜û$‹ó`yåð±zÛä‹XMð¿bíûÑô{:ç›>xFî[öÑûÜg ´ÖqNÛK;M¯÷ {Ú"jJ;qý¦Žã91è¿ß:sÈJGŒýA{˜ÒÔø?9 Ö,SŪ’Ý ¦ÂÄ;üs—¿4fœmÿÐÿ5ÏÁÈ»cÇÏ~°ý®Ú®œ‘vzîÄäU­ÅÂk5¦a²ºÚàHÒÃÙËL_m¹¿%1¼.CÇUŽÒÁ[.` çXÈËŒÄÈ:ÒcÆÜˆéïˆáô„ïÇä}Ë~Þ"Öw<ÞÄ3 ø=Óùé#WKõ‡Û€ãN®cÃø×ö\ ±ÌîT ^°¦p6fná; žÇsãQvÉrX{oÊW¸~S"y.5&Dp †Ì»áðžÛ„ç†aWä [é’õgfÃèªóTvšë 0`Ö#›u1fEÈ›×ÿa„„óèLm ?/X[c¬Ï{Æ ï—Á-ßÖwU\?ô °?2'è%ÆŸãÆ´4ÎÅ}äòÓ‹¢ùÊnrýzÍä¼HL¬ÍÆ‚bŸ¢ã~_ôr幓ò+/4²­¥Ãø'|ŒV!½†ˆëîvÖ¬YžþÆßÄèñÜ÷€±»Â¨£âùÞÝxn/†œæs Ðð\,ôÊ…‰Í­û¢Ì‚|0N8ƒ Ð1öpÆØâ)³Ñý6}ýæưÁ›ÑÜY¶óÝ0ù1÷É™™Y˜›™™Uÿ²ÿªÑ£u÷ö}Äÿc/þe^õ7ÍÄüÿÿû~eQÏþQÿûE¶ì§Œÿ«²ò[ÁÿýÛvkÝç¿óÿþ¦U»Ö}[Gu(½ø£ì¯ÿ†ð™àR¼cluster/data/votes.repub.tab0000644000176000001440000002156610400542206015643 0ustar ripleyusers"1856" "1860" "1864" "1868" "1872" "1876" "1880" "1884" "1888" "1892" "1896" "1900" "1904" "1908" "1912" "1916" "1920" "1924" "1928" "1932" "1936" "1940" "1944" "1948" "1952" "1956" "1960" "1964" "1968" "1972" "1976" "Alabama" NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 18.2 19.04 35.02 39.39 41.75 69.5 14 72.4 43.48 "Alaska" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 50.94 34.1 45.3 58.1 62.91 "Arizona" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 12.74 35.37 55.41 41.26 57.57 30.53 26.93 36.01 40.9 43.82 58.35 60.99 55.52 50.4 54.8 64.7 58.62 "Arkansas" NA NA NA 53.73 52.17 39.88 39.55 40.5 38.07 32.01 25.11 35.04 40.25 37.31 19.73 28.01 38.73 29.28 39.33 12.91 17.86 20.87 29.84 21.02 43.76 45.82 43.06 43.9 30.8 68.9 34.97 "California" 18.77 32.96 58.63 50.24 56.38 50.88 48.92 52.08 49.95 43.76 49.13 54.48 61.9 55.46 0.58 46.26 66.24 57.21 64.7 37.4 31.7 41.35 42.99 47.14 56.39 55.4 50.1 40.9 47.8 55 50.89 "Colorado" NA NA NA NA NA NA 51.28 54.39 55.31 41.13 13.84 42.04 55.27 46.88 21.88 34.75 59.32 57.02 64.72 41.43 37.09 50.92 53.21 46.52 60.27 59.49 54.63 38.7 50.5 62.6 55.89 "Connecticut" 53.18 53.86 51.38 51.54 52.25 48.34 50.52 48.01 48.44 46.8 63.24 56.94 58.13 59.43 35.88 49.8 62.72 61.54 53.63 48.54 40.35 46.3 46.94 49.55 55.7 63.73 46.27 32.2 44.3 58.6 52.64 "Delaware" 2.11 23.71 48.2 40.98 50.99 44.55 47.86 42.75 43.55 48.55 52.94 53.65 54.04 52.09 32.85 50.2 55.71 57.71 65.03 50.55 43.43 45.05 45.27 50.04 51.75 55.09 49 39.1 45.1 59.6 47.27 "Florida" NA NA NA NA 53.52 50.99 45.83 46.82 39.94 NA 24.3 19.03 21.15 21.58 8.25 18.08 30.79 28.06 56.83 25.04 23.85 25.99 29.68 33.63 55.01 57.2 51.51 48.9 40.5 71.9 46.83 "Georgia" NA NA NA 35.72 43.77 27.94 34.33 33.84 28.33 21.8 36.82 28.56 18.32 31.4 4.27 7.07 28.57 18.19 43.37 7.77 12.6 14.84 18.25 18.31 30.34 33.22 37.44 54.1 30.4 75 33.02 "Hawaii" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 49.97 21.2 38.7 62.5 48.72 "Idaho" NA NA NA NA NA NA NA NA NA 44.87 21.3 47.14 65.84 54.09 31.02 41.13 66.02 47.12 64.74 38.23 33.19 45.31 48.07 47.26 65.42 61.18 53.78 49.1 56.8 64.2 61.77 "Illinois" 40.25 50.68 54.41 55.69 56.27 50.09 51.11 50.16 49.54 45.7 55.65 52.83 58.77 54.52 22.13 52.56 67.81 58.84 56.93 42.04 39.69 48.54 48.05 49.24 54.84 59.52 49.8 40.5 47.1 59 51.11 "Indiana" 40.03 51.09 53.6 51.39 53 48.27 49.33 48.16 49.05 46.17 50.81 50.6 53.99 48.4 23.11 47.44 55.14 55.25 59.68 42.94 41.89 50.45 52.38 49.58 58.11 59.9 55.03 44 50.3 66.1 53.77 "Iowa" 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.6 55.46 57.99 63.37 55.62 24.3 54.06 70.91 55.06 61.8 39.98 42.7 52.03 51.99 47.58 63.76 59.06 56.71 38.1 53 57.6 50.51 "Kansas" NA NA 78.61 68.89 66.64 63.1 60.4 58.18 55.39 48.4 47.46 52.56 64.81 52.46 20.48 44.09 64.76 61.54 72.02 44.13 45.95 56.86 60.25 53.63 68.77 65.44 60.45 45.9 54.8 67.7 53.91 "Kentucky" 0.26 0.93 30.17 25.45 46.45 37.61 39.8 42.81 44.99 39.74 48.92 48.5 47.11 48.04 25.46 46.52 49.25 48.93 59.33 40.15 39.92 42.3 45.22 41.48 49.84 54.3 53.59 36 43.8 63.4 46.24 "Louisiana" NA NA NA 29.31 55.69 51.57 37.1 42.39 26.48 23.59 21.79 20.96 9.65 11.92 4.83 6.95 30.57 20.23 23.71 7.03 11.16 14.09 19.4 17.45 47.08 53.28 28.59 56.8 23.5 65.3 47 "Maine" 61.37 64.15 60.22 62.42 67.86 56.73 51.45 55.35 57.49 54.06 67.9 61.89 67.1 63 20.48 50.95 68.92 72.03 68.63 55.83 55.49 51.1 52.44 56.74 66.05 70.87 57.05 31.2 43.1 61.5 50.34 "Maryland" 0.32 3.11 55.1 32.8 49.65 43.94 45.37 46.09 47.4 43.48 54.6 51.5 48.83 48.85 23.69 44.78 55.11 45.29 57.06 36.04 37.04 40.83 48.15 49.4 55.36 60.04 46.39 34.5 41.9 61.3 46.87 "Massachusetts" 64.72 62.75 72.22 69.67 69.25 57.74 58.5 48.32 53.38 51.85 69.36 57.67 57.92 58.21 31.89 50.54 68.55 62.26 49.15 46.64 41.76 46.36 47 43.17 54.22 59.33 39.55 23.8 32.9 45.2 41.93 "Michigan" 56.98 57.18 55.89 56.98 62.67 52.45 52.54 47.89 49.73 47.81 53.85 58.1 69.5 61.93 27.63 52.09 72.8 75.37 70.36 44.45 38.76 49.85 49.18 49.23 55.44 55.63 48.84 33.3 41.5 56.2 52.68 "Minnesota" NA 63.42 59.06 60.8 61.55 58.77 62.28 58.8 54.12 45.96 56.62 60.21 73.95 59.11 19.25 46.35 70.78 51.18 57.77 36.29 31.01 47.66 46.86 39.89 55.33 53.68 49.16 36.2 41.5 51.6 44.3 "Mississippi" NA NA NA NA 63.47 31.92 29.94 36.25 26.63 2.64 7.27 9.73 5.46 6.56 2.47 4.91 14.03 7.6 17.9 3.55 2.74 4.19 6.44 2.62 39.56 24.46 24.67 87.1 13.5 78.2 49.21 "Missouri" NA 10.29 70.17 58.9 43.65 41.23 38.65 46.01 45.31 41.97 45.24 45.94 49.92 48.5 29.75 46.94 54.57 49.58 55.58 35.08 38.16 47.5 48.43 41.5 50.71 49.87 49.74 36 44.9 62.2 48.22 "Montana" NA NA NA NA NA NA NA NA NA 42.54 19.72 39.84 54.21 46.98 23.19 37.57 61.13 42.5 58.37 36.07 27.59 40.17 44.93 43.15 59.39 57.13 51.1 41.1 50.6 57.9 53.65 "Nebraska" NA NA NA 64.14 70.12 59.65 62.87 57.33 53.51 43.57 45.98 50.46 61.38 47.6 21.73 41.08 64.68 47.09 63.19 35.29 40.74 57.19 58.58 54.15 69.15 65.51 62.07 47.4 59.8 70.5 60.31 "Nevada" NA NA 59.84 55.39 57.43 52.73 47.6 56.21 57.23 25.84 18.79 37.75 56.66 43.93 15.89 36.4 56.92 41.76 56.54 30.59 27.19 39.92 45.38 47.26 61.45 57.97 48.84 41.4 47.5 63.7 52.27 "New Hampshire" 53.59 56.89 52.56 55.02 53.95 51.84 51.94 51.15 50.35 51.11 68.74 59.34 60.14 59.32 37.43 49.06 59.84 59.83 58.66 50.42 47.98 46.78 47.87 52.41 60.92 66.11 53.42 36.1 52.1 64 55.68 "New Jersey" 28.52 48.13 47.16 49.12 54.22 47 49.02 47.31 47.52 46.24 59.66 55.27 56.68 56.79 20.54 54.35 67.6 62.16 59.77 47.59 39.57 47.93 48.95 50.33 56.81 64.68 49.16 34.4 46.1 61.6 50.99 "New Mexico" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.5 43.28 46.44 42.93 55.39 57.81 49.41 41 51.8 61 51.04 "New York" 46.14 53.71 50.46 49.41 53.12 48.07 50.32 48.15 49.2 45.59 57.55 53.1 53.13 53.11 28.68 51.53 64.56 55.76 49.79 41.33 38.97 48.04 47.3 45.99 55.45 61.2 47.27 31.4 44.3 58.5 47.84 "North Carolina" NA NA NA 53.37 57.48 46.36 48.04 46.58 47.14 35.79 46.87 45.47 39.7 45.55 11.94 41.73 43.22 39.72 54.87 29.28 26.6 25.97 33.29 32.68 46.09 49.34 47.89 43.8 39.5 69.5 44.43 "North Dakota" NA NA NA NA NA NA NA NA NA 48.5 55.58 62.11 74.83 60.87 26.67 49.2 77.79 47.68 54.8 28 26.58 55.06 53.84 52.17 70.97 61.72 55.42 42 55.9 62.1 52.93 "Ohio" 48.49 52.33 56.33 53.97 53.24 50.21 51.73 50.99 49.51 47.66 51.86 52.3 59.73 51.03 26.82 44.18 58.47 58.33 64.89 47.04 37.43 47.8 50.18 49.24 56.76 61.11 53.28 37.1 45.2 59.6 49.9 "Oklahoma" NA NA NA NA NA NA NA NA NA NA NA NA NA 43.03 35.69 33.21 50.11 42.82 63.72 26.7 32.69 42.23 44.2 37.25 54.59 55.13 59.02 44.3 47.7 73.7 50.52 "Oregon" NA 34.48 53.9 49.63 58.74 50.91 50.51 50.99 53.82 44.59 50.07 55.25 67.06 56.39 25.3 48.47 60.2 51.01 64.18 36.89 29.64 45.63 46.94 49.78 60.54 55.25 52.62 36.3 49.8 52.4 50.01 "Pennsylvania" 33.95 56.25 51.75 52.2 62.18 50.61 50.84 52.68 52.73 51.45 60.98 60.74 67.99 58.84 22.4 54.25 65.8 65.35 65.24 50.84 40.85 46.34 48.36 50.93 52.74 56.49 48.74 34.5 44 59.1 48.57 "Rhode Island" 57.85 61.22 62.2 66.49 71.94 59.29 62.25 58.07 53.89 50.71 68.33 59.74 60.6 60.76 35.57 51.08 63.97 59.63 49.55 43.31 40.18 43.24 41.26 41.44 50.89 58.31 36.37 19.1 31.8 53 44.24 "South Carolina" NA NA NA 57.93 75.95 50.26 33.97 23.72 17.27 18.99 13.51 7.04 4.63 5.97 1.06 2.43 3.9 2.21 8.54 1.89 1.43 4.37 4.46 3.78 49.28 25.18 48.76 58.9 38.1 70.8 43.54 "South Dakota" NA NA NA NA NA NA NA NA NA 49.48 49.48 56.73 71.09 58.84 NA 49.8 60.74 49.69 60.18 34.4 42.49 57.41 58.33 51.84 69.27 58.39 58.21 44.4 53.3 54.2 50.92 "Tennessee" NA NA NA 68.33 47.57 40.21 44.53 47.83 45.85 37.51 46.23 44.93 43.4 45.95 23.84 42.7 51.28 43.59 53.76 32.46 30.81 32.36 39.22 36.87 49.98 49.21 52.92 44.5 37.8 67.7 43.21 "Texas" NA NA NA NA 40.71 29.96 23.95 28.63 24.73 19.28 30.75 30.83 21.9 22.35 8.77 17.45 23.54 19.78 51.77 11.35 12.31 19.13 16.64 24.6 53.13 55.27 48.52 36.7 39.9 66.2 48.01 "Utah" NA NA NA NA NA NA NA NA NA NA 17.27 50.59 61.45 56.19 37.46 37.82 55.93 49.26 53.58 41.05 29.79 37.59 39.42 45.52 58.93 64.56 54.81 45.3 56.5 67.6 64.94 "Vermont" 78.23 75.79 76.1 78.57 78.26 68.58 69.88 66.54 71.24 68.1 80.1 75.79 77.98 75.12 37.13 62.44 75.87 78.22 66.88 57.66 56.44 54.79 57.06 61.55 71.46 72.18 58.65 33.7 52.8 62.7 56.01 "Virginia" 0.19 1.15 NA NA 50.48 40.62 39.52 48.9 49.47 38.75 45.9 43.81 36.67 38.36 17 32.05 37.85 32.79 53.91 30.09 29.39 31.55 37.39 41.04 56.32 55.37 52.44 46.5 41.4 67.8 50.73 "Washington" NA NA NA NA NA NA NA NA NA 41.45 41.84 53.44 69.95 57.47 21.82 43.89 55.96 52.24 67.06 33.94 29.88 40.58 42.24 42.68 54.33 53.91 50.68 38 45.1 56.9 51.37 "West Virginia" NA NA 68.95 58.84 51.82 42.47 41.03 47.74 49.03 46.94 52.23 54.27 55.26 53.41 21.1 49.38 55.3 49.45 58.43 44.47 39.2 42.9 45.11 42.24 48.08 54.08 47.27 32.1 40.8 63.6 41.95 "Wisconsin" 55.29 56.58 55.88 56.24 54.6 50.9 54.04 50.38 49.77 46.02 59.93 60.04 63.24 54.52 32.68 49.25 70.65 37.06 53.52 31.19 30.26 48.32 50.37 46.28 60.95 61.58 51.77 37.9 47.9 53.4 49.16 "Wyoming" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 34.42 41.86 62.38 52.39 63.68 40.82 37.47 46.89 51.23 47.27 62.71 60.04 55.05 43.4 55.8 69 59.85 cluster/data/ruspini.tab0000644000176000001440000000150410400542206015046 0ustar ripleyusers x y 1 4 53 2 5 63 3 10 59 4 9 77 5 13 49 6 13 69 7 12 88 8 15 75 9 18 61 10 19 65 11 22 74 12 27 72 13 28 76 14 24 58 15 27 55 16 28 60 17 30 52 18 31 60 19 32 61 20 36 72 21 28 147 22 32 149 23 35 153 24 33 154 25 38 151 26 41 150 27 38 145 28 38 143 29 32 143 30 34 141 31 44 156 32 44 149 33 44 143 34 46 142 35 47 149 36 49 152 37 50 142 38 53 144 39 52 152 40 55 155 41 54 124 42 60 136 43 63 139 44 86 132 45 85 115 46 85 96 47 78 94 48 74 96 49 97 122 50 98 116 51 98 124 52 99 119 53 99 128 54 101 115 55 108 111 56 110 111 57 108 116 58 111 126 59 115 117 60 117 115 61 70 4 62 77 12 63 83 21 64 61 15 65 69 15 66 78 16 67 66 18 68 58 13 69 64 20 70 69 21 71 66 23 72 61 25 73 76 27 74 72 31 75 64 30 cluster/data/pluton.tab0000644000176000001440000000224210400542206014676 0ustar ripleyusersPu238 Pu239 Pu240 Pu241 0.126 75.804 21.204 2.18 0.133 75.515 21.408 2.24 0.127 75.175 21.668 2.305 0.156 78.872 18.428 1.906 0.503 73.317 20.223 4.128 0.113 79.116 18.548 1.69 0.129 75.751 21.162 2.26 0.124 75.326 21.557 2.282 1.022 63.287 24.493 6.99 1.412 59.553 25.576 8.027 1.533 58.688 25.719 8.279 1.534 58.758 25.692 8.261 1.437 59.728 25.146 8.377 1.439 59.544 25.126 8.569 1.375 59.877 25.128 8.428 1.153 61.182 25.1 7.802 0.201 78.244 18.488 2.351 0.176 78.166 18.629 2.365 0.239 74.254 21.515 2.901 0.102 79.84 17.872 1.674 1.07 62.455 24.656 7.512 0.851 73.189 18.285 5.597 0.125 75.968 20.794 2.407 0.142 75.957 20.867 2.341 0.352 72.885 21.718 3.618 0.351 72.907 21.721 3.601 0.346 72.919 21.713 3.6 0.217 76.089 20.225 2.556 1.068 70.129 18.573 7.689 1.171 69.273 18.633 8.3 1.213 69.147 18.64 8.363 1.226 68.294 18.869 8.826 1.111 71.076 18.122 7.248 0.183 75.714 20.75 2.488 0.162 76.15 20.345 2.524 0.113 77.845 19.108 2.275 1.309 62.382 22.754 9.311 1.638 60.112 23.32 9.972 1.589 60.519 23.128 9.97 1.411 61.585 23.133 9.339 1.457 61.332 23.239 9.321 0.397 72.291 21.761 3.836 0.328 73.451 21.429 3.419 0.242 74.888 20.939 2.875 1.367 60.507 23.603 9.839 cluster/data/plantTraits.rda0000644000176000001440000000656010432451454015702 0ustar ripleyusers‹Õ\{ŒUŸ;÷µ-EÚ‚(P£’@P”"„sa[ZÌ®l·Ý A§÷Îî̽3;÷Îv·EY"HU|A"Q1Ò˜È`ƒ òH°E(ÒŠ¤å‘µP±íR¤(48sç<¾9÷Ìë¾Zor:óó½¿ßùιw“.¾üìÙ—Ï–$I–ä´TRFvþIIÒÜYÎóSW*µ•–¢Õª’”>Þ™ZàŒO8ãæÂå‹6ßzÕ©‡ WÞsºW—Þƒîypùß=ùAô{—:a=zÏ›/Èwí˜}¡¹í¬/2ÃõYG»¼õÂlo=†isËB÷ƒþø3÷sGa.ž_€õ~¥¾¼°€–ºËÐFÏná²yú÷yr×p¼•ò¿F3Ž·ïn¾ÂÏkŽÉ‚ü®Gëûá2W@"rõË[ϳ…©Ñú|alÊ¿¾ ®fz ûuÅ‹u»è é:<†^wÕ¿ûg´û¹É sùþÂ\Ï>Õ3ßË ¥Çê )|ÛÓWXq~®¾ÐÕLù>OòèÅã|¥½'±_0eÏϘïÏoªçÓõ´\Eó­xq¢ŸâzÞˆŸÿÄÏ^=h~I}‡õ:™z¢]ñ»vðó…c½ùBFòìÄyÛçå =Rwó4´ ãêy¯nè/8^B?áÅÝ j}}ú±ë} ãâm]äïXî“oˆëÏô¡?¿è}åßÇqòöS!q„iêǪ¯{õüÃlЋ~Ü&±¾÷·z8îyµžP4í×GåwûãG{½º2œ`||ë} ïÛ­^] ½ØïEž{<þÂ\‰|ê}Dâ>èõ¡¡¡ïœb£\úΣ×MOîùå’ïÍA»ðúþ·Ÿ»ð„›'Цyóæ_ÿããÐÓÿ¹éÑOêŸE;~óÎô™½¯¡§×ß>)7’ýOôÒx¶ŽmùÜ{??©aþÕ-'Z™³¢™õ·Ï»ÿà—ÖßÄë°»î¾ûîë^Ú@ýÞ¹ûçËŸ9Lü¤óøýªŸÄ÷ò3çÞ6œÎï9qìÉ÷äÑã‹^?vÛ<º¯H|T¡Usnû{”=ôˆ?}¾´zõêu›òèùW~÷ý{où"z¦üÀKÛ6ž‚v,Ù^ûÓ1{ù}Eõìî?çùE‹nâã ë3œÝÝ›.8þº«…Þ¸ï¶ô¿§OGûñü›¼\>¸ÙbyxèŒ O•ÖêÝÅåmÏ£o­?pßoy>Ê¿÷·ìüÖß¾‰¶azúä“OzvI½ñðÃïÛð2Ú‡ç÷m9ñ†9×Ñx_»ÿôÁ™ü“hçgFìµó Î'Á/W~Ÿͳ}|nÿOìþ‹öœzíÓWÍyŽè‹²Oå§ßYæÊÅÇÞ‰‰‰¥‡l´—“#ëû½þ@é}½W<´ñp£_þ}…öÝ{êµÛWmn¨Óôy»¿ÿÁ_kÀ÷î…¾þÉôLœöÌîÜ@ô£W°?pq½04´òðýŸûìXÉë#îÕ%|ç‡{²dø2xŽ —Î Ö3Þ4÷ÌÚ}OágÈdñ¼,5ú—‘üñÈ//åcž“Íq:\›sâò$ÒÉûãâk‘•Ä9ùœðÌtxÏù¬@'i‚“<‡þò1e¾.O$–Éô#âÈb»ÄW¢W„· XËaYüì‘ã$þöHþðù˜"ò9ð„þò9 ò¾â#Ájˆ='ˆ…ðÀ¼>¨›ä&ÏÑDÌ…¨v)`; ø`)î+PNWÇUÝùö#ÍÅ’îlê,òr6yYH^Î!/‹È˹äå«äå<ÎJ¶¨+UbDÆ\yÃ*©–ZÂdnX)Ö Ëyû`æ+ÏѰö9ðó›“ÆF_žÓ›rYÀ sãÑñëœèL“ˆÁÏGtç9»YÎï#”'Ï4Gç±}‚¸'øýÓÃÙ釾Á¹”Ô˜/ÈClå=B?e©1^°7\”ÉK°fImC¯÷r¾g M $B.ß½ÈåOkÑIžš’êŸ:í¾cš?1dž‡ÈñòÜ»´i—ã÷ùÊñÆò!À‰W”'™¾Üˆì=|<‚øˆ?)p}0ºó1ÀH¿Û´Þ"ytòÑóYäyáA--HŠ“||K€ Ó³O”l>¨—GFÔ.NbO”?ò%=®Þ(;¹8B¨]J¢FÐ÷r%süA‡| ^>®a¾‡=[¢CS? Ç‰3(‡aµÈNˆÿ>T`ùPéƒ^JJrIlç«Y3o§OíÂR»òÓ,Ö:UŸ£k-Å7%Õ?ÂNås}hÖ\3ý³xi6W1ÏŽb-¬·ÖOƒ°W‡ÔXt¾6s¶Gʹ>`?÷ à¿(O­ö 8÷‚0;Àm[±äPO[‹º;À?N„É…å7Î}'w2ñûfC8üÂL¨þ‡Å…½$¹Šâm7ÖšÙ_Qû(joÆÙSIöh3=0®œhÿ­5ã{3#IÞ[ô¥+÷µV°ÖíÑŠq±&¢Ãúñÿ#Ö¼]ékíÄS3ñ6Ó«:Í4Œ.ûÜÑûZœ{CÔ™ÏÏ󨉣3Žý¤þ¶"´’ä¥ÓCd7i¹¹Ž`-É>Õ‰¯EoPÍ阒ê‘Ý0¼´»o4äø–·0¿‚êêãÁv¢j¶§©Ð§¾ÝÕ34n]Ú:@M¨-úuTØI KÇ®ý–ÕÓ’ŒDrm®iœþÐ ¿š¶ÙÎø[ÓÕ¬…iqæâž£Qx€gUè9`Gt¦٠âPøæ˜Ý¨œÊÀN`~bä%%Ð%:k#òxD~óh[oˆa+Éß‚’è[ÿ@[~ŪyÎN+w™Ó'/`tüoTÁO@­’`­ã÷2`WˆËÿ[ÊÉa5êf_‹ë_’üýªAW@MCebúvàÿg$­eÐ9vÖ‡ÝmDw—°þY/Œ]ÿnåì|Ǩß?âœOQ~†É$Â’`½½ª½;ɹR£®öµ¶öý¨Z&åk÷ˆ²Û¿Úq÷kcºú[nœxƒú|Cߎ[+ÌÖ÷Ûåw+X ³÷ÌJt逭ˆtô7°:Æ=3ejÙ À7qî0B^]Ÿ,^ÝÃîVI÷E’{CÓµŠÁŸƒv`ÍÿŸUT”²Jþ³Š˜=k–4¥Š‰YºQÑ*%uOäK¶5¬k‰îQU­‘Å5êXÌ–'‹u»uÎquDQ •w¨ªaÁÅ®¦¡ëdY«TÕbx‘Yë¸@Þum¤‚ßÓ¦6F^-ƒpç«jYcdVW•áIBTí²BijŽZJô”ŠQT,“ÐŽ;“€v´*UE­BrT‡ä8 OuÞ$b–KšŠUÓˆeUW4ƒ2WJÆ:ƒ9ÕÔ•UÆl…¤)çæa„$)cWª&WÑY–±ö XÕ›‰ž‹ŠjQ„IósшVF¬jר–J ]7QQ‹j¬ŒT'Q3J”°,UgDmÜf„SB\¬Öª&%,…i»Ø)j“%šÎ^E¯ª€`ª{k X±€Ì¨ªè€`ôj Î!t@µQ)_ÃK°cXU¦ÚA$ʴνvq [™¬U‹„X¬iÍÂd +6«Übµ:ÌØ4PŸÅÚˆI½^⢚¦f2Â6•2%Æ •Ê\¢€š^b),o.Q«œCŒÓÄ/u¢6ÁÂ^ªZ l›z½TWG©‚ej N£PÁ´-›TF5@TÀŠÉÐ{©ÓHãÈ~Í® @¨Ã„èSŠ U}ŠÉØú›©îSmŠ>m¶úŒ ó Ï^W¦lýzizûÕÃ[¿ªÀ0-@ØFX¬ ýšÎBè7tVíÅ´F)¡+ ñ(£C°íį̀CÔ¡3Ä;„è,¸Ä25 ¨®±í<`i,o– T[vQ+½CTiµj ¦“Câr[Õ(`•ŠRsgÐXÃzâ QeûÇ!Xí5>bØ+ð ÓÑŠ€`Øq S®mU&³B)Whp+ŠÃÛ C/ÙŒ¨:#¬`³jŒ¨>º¢¦Ž‚µL½^1Yfy[©¯W*\™`½j¥j³^µÒÒXF‡tç fP=dÕXƒ\¥ZE ËUZ‘%Ä%TFT˜¶Uš1 רðò3»¤Ô”3†Në*úÈÿ¾PK.Ycluster/data/flower.R0000644000176000001440000000133010400542206014303 0ustar ripleyusersflower <- data.frame(V1 = factor(c(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1), labels = 0:1), V2 = factor(c(2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1), labels = 0:1), V3 = factor(c(2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2), labels = 0:1), V4 = factor(c(4, 2, 3, 4, 5, 4, 4, 2, 3, 5, 5, 1, 1, 4, 3, 4, 2, 2)), V5 = ordered(c(3, 1, 3, 2, 2, 3, 3, 2, 1, 2, 3, 2, 2, 2, 2, 2, 2, 1)), V6 = ordered(c(15, 3,1,16, 2,12,13, 7, 4,14, 8, 9, 6,11,10,18,17, 5)), V7 = c(25, 150, 150, 125, 20, 50, 40, 100, 25, 100, 45, 90, 20, 80, 40, 200, 150, 25), V8 = c(15, 50, 50, 50, 15, 40, 20, 15, 15, 60, 10, 25, 10, 30, 20, 60, 60, 10) ) cluster/data/chorSub.rda0000644000176000001440000000255110417224251014772 0ustar ripleyusers‹]–{—SÇÏî¢m©ã.Y¦ÙhßË^J¹dÌήØ-+&íÍZS»è2CÆ$#†©$Ö¦Ò¦-ƒ¡±R£þ°.[#5­m—¢–©×÷û{¿oÏO|~Ï÷œ÷œç9·çœßÍ×Vù9U9ιL—™åR¸2ñ“á\nØ~µw5ßW1³ßXW|pbE;ÁŸàs”‡£×`öGù Õ-‡ýöFØßÀ\ð)ʃSÀ(¯‚-C¡ÝЃa÷2<ì¯ ‚žA_àôØ—ao€]ξ`8åu°a{aOƒÝCWÂþ >†^e`2ê~À7(€Š¡=Ø«`OVû±l§q-¿`_…½¶T€yZ—§@ xEcüŠcƒP÷mjÙ»Lz£r/Xn]¨Ë‚ÝöC7s~`*¸\Ä5kmC­Ù_УÁЫAôl0z-ì…Ü+°Pk;Wkù­?AŸE jÀk(ážÃ> ÛÛÈ6`)Ê[Áõå|J9ØVîX ý¢Æ”òð¡Ö%x:Wc¼åEÚ‹gÀwú~6ì÷<ž°%àz°¼¥¸\ƒNŽÌA»ùàjè]ÚžŸÍÐu€~·íаOꌲox,oãû,Ø¿çY>Ð *À(à4×Í)4Ð×l°„畹zPÏ|:T¡|X{rúˆÎj7øŒ¾´ëÁKhÓ{þ£89ç(/i^۴׫éö Àï£8'ðÎáåÓ^ÐÊñÀ/h¿é÷i­st×Ìçž‚¢8çØgóú~î ìtÝô±Usáz¶jì´^ô·uÃÀåZÞA=xPûÑ-¿C·Ss:ø¬ƒ=Sõ¼è³z•ÎóP'æTžî—iå<èßu¶¹§w§—µ÷¨_R×!ýϨQûI»´º.­?ëºÒ¾u¨mÒn¦ÆÕžÖ®+cc–í—|ã˜5îŽ4¿íºw®ä^ƒBßeï€eÚ¯#ÊýÀ©à#íÏÈ ùnÖ¼îQÿyÌ#åÖ>Ý_Ü‹jp+`ŽÔ¾ð®½{jï×›Àxi2ب±MtñÝÛ«6•à|”‹hÁð(Îoö+Õ¸yV+w¸ÿûÁn¿ ôÅ{bê¢8SeðKòÝÅ÷Ûd'–ó‚îSÛ>ùÛ­óž*ƒ3ŽóE½g¢%¶®M¶A¾ÙæùÄGš¿dL}‰/´—óH‹ŸfSý\üP3Ï~äš(V¾‹ß×Ôç)?è3ˆâ÷ŽwíØK¸?Qün ƒ½ÍÅï<ï¦7Á.~Gë>yÝ¥Þ|wýDñýñ‡ÖŠïÈtØM.¾[9×:;|·g$9.ík¯ÇèŒqožsñ»÷¥Î ý̉âÿ <3µ:s¼×ztŸìSÀCQüö×8xwÍŠsÔ•©=ß°%°ãt›rs™òf öî­Ó!åP1ìÚ(¾¹ÆuZ3¾3Œ»(Šÿ<«ñl×¹à4AßÚõçy3×^k9Bñ9î|Åäù#ÌÇż4®ÿßß´¬ºÆi0ø‹–úKÁ4Ï9®I6š4UO«gJŸ®v¹j›rá,0é™ôM&C“…&‹L›,192‘~AIϤo20š,4Yd²Ød‰I‹æY4Ï¢yͳhžEó,šgÑ<‹æY4Ï¢ùÍ·h¾Eó-šoÑ|‹æ[4ߢùÍ·hE ,Z`Ñ‹X´À¢-°hE ,ZhÑB‹Z´Ð¢…-´h¡E -ZhÑB‹VX cš£ªÌ±SuMu¢ÆÕKe”&Ue ÇTS¢Ê“ã“ªŠÆDUR%ÿã! cluster/data/animals.tab0000644000176000001440000000111410400542206014776 0ustar ripleyusers war fly ver end gro hai ant 1 1 1 1 2 1 bee 1 2 1 1 2 2 cat 2 1 2 1 1 2 cpl 1 1 1 1 1 2 chi 2 1 2 2 2 2 cow 2 1 2 1 2 2 duc 2 2 2 1 2 1 eag 2 2 2 2 1 1 ele 2 1 2 2 2 1 fly 1 2 1 1 1 1 fro 1 1 2 2 NA 1 her 1 1 2 1 2 1 lio 2 1 2 NA 2 2 liz 1 1 2 1 1 1 lob 1 1 1 1 NA 1 man 2 1 2 2 2 2 rab 2 1 2 1 2 2 sal 1 1 2 1 NA 1 spi 1 1 1 NA 1 2 wha 2 1 2 2 2 1 cluster/data/agriculture.tab0000644000176000001440000000032610400542206015704 0ustar ripleyusers x y B 16.8 2.7 DK 21.3 5.7 D 18.7 3.5 GR 5.9 22.2 E 11.4 10.9 F 17.8 6.0 IRL 10.9 14.0 I 16.6 8.5 L 21.0 3.5 NL 16.4 4.3 P 7.8 17.4 UK 14.0 2.3 cluster/README0000644000176000001440000000264210307303426012645 0ustar ripleyusersORIGINAL README : This directory contains code, help and examples for CLUS, an S-PLUS package for clustering, as described in ``Clustering in an Object-Oriented Environment'' by Anja Struyf, Mia Hubert, and Peter J. Rousseeuw (Journal of Statistical Software, volume 1). ------------------------------------------------------------------------ See http://www.stat.ucla.edu/journals/jss/ for the original version. The current port is based on material now on http://www.agoras.ua.ac.be/ KH 1998/05/21 --------- For historical reasons, we keep R/README-Splus which has no relevance to the R package. --------------------- TODO {see ./DONE-MM for things done} 3) daisy() for the case of mixed variables should allow a weight vector (of length p = #vars) for up- or downweighing variables. daisy() really should accept the other methods mva's dist() does _and_ it should use dist's C API -- but we have no C API for package code, ARRGH! 4) Eliminate the many Fortran (g77 -Wall) warnings of the form >> mona.f:101: warning: `jma' might be used uninitialized in this function 6) Mona objects describe a hierarchical clustering; they could also inherit from twins, and hence have a pltree() method for plotting the hierarchical tree. 8b) Think about "merging" the plot.agnes and plot.diana methods. ------------------ Martin , since 1999 cluster/R/0000755000176000001440000000000012124335266012170 5ustar ripleyuserscluster/R/zzz.R0000644000176000001440000000021211407171057013142 0ustar ripleyusers.onUnload <- function(libpath) { library.dynam.unload("cluster", libpath) } ## no S4 methodology here; speedup : .noGenerics <- TRUE cluster/R/silhouette.R0000644000176000001440000002145011626656307014512 0ustar ripleyuserssilhouette <- function(x, ...) UseMethod("silhouette") ## Accessor and more: silhouette.partition <- function(x, ...) { r <- x$silinfo$widths if(is.null(r)) stop("invalid partition object") attr(r, "Ordered") <- TRUE # (cluster , s.i ) attr(r, "call") <- x$call class(r) <- "silhouette" r } silhouette.clara <- function(x, full = FALSE, ...) { if(!full) return(NextMethod()) ##-> silh*.partition() ## else : full = TRUE if(is.null(x$data)) stop("full silhouette is only available for results of", " 'clara(*, keep.data = TRUE)'") ## Compute "full" silhouette -- from clustering + full distances: r <- silhouette(x$clustering, daisy(x$data, metric = attr(x, "Metric"))) attr(r, "call") <- substitute(silhouette(CL, full = TRUE), list(CL = x$call)) r } ## R-only implementation -- no longer used nor exported: silhouette.default.R <- function(x, dist, dmatrix, ...) { cll <- match.call() if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl n <- length(x) if(!all(x == round(x))) stop("'x' must only have integer codes") k <- length(clid <- sort(unique(x))) if(k <= 1 || k >= n) return(NA) ## check dist/dmatrix if(missing(dist)) { if(missing(dmatrix)) stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm)) stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") } else { # 'dist' dist <- as.dist(dist) # hopefully if(n != attr(dist, "Size")) stop("clustering 'x' and dissimilarity 'dist' are incompatible") dmatrix <- as.matrix(dist)# so we can apply(.) below } wds <- matrix(NA, n,3, dimnames = list(names(x), c("cluster","neighbor","sil_width"))) for(j in 1:k) { # j-th cluster: Nj <- sum(iC <- x == clid[j]) wds[iC, "cluster"] <- clid[j] ## minimal distances to points in all other clusters: diC <- rbind(apply(dmatrix[!iC, iC, drop = FALSE], 2, function(r) tapply(r, x[!iC], mean)))# (k-1) x Nj ## max.col() breaks ties at random; rather do not want random ## behavior of silhouette, (but rather "pam" compatible one): minC <- apply(diC, 2, which.min) ## FIXME minC <- max.col(-t(diC)) ## FIXME : extend max.col(*, ties.method = "min") {or similar} ! wds[iC,"neighbor"] <- clid[-j][minC] s.i <- if(Nj > 1) { a.i <- colSums(dmatrix[iC, iC])/(Nj - 1) # length(a.i)= Nj b.i <- diC[cbind(minC, seq(along = minC))] ifelse(a.i != b.i, (b.i - a.i) / pmax(b.i, a.i), 0) } else 0 wds[iC,"sil_width"] <- s.i } attr(wds, "Ordered") <- FALSE attr(wds, "call") <- cll class(wds) <- "silhouette" wds } ## silhouette.default.R silhouette.default <- function(x, dist, dmatrix, ...) { cll <- match.call() if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl n <- length(x) if(!all(x == round(x))) stop("'x' must only have integer codes") k <- length(ux <- unique(x <- as.integer(x))) if(k <= 1 || k >= n) # silhouette undefined for trivial clusterings return(NA) doRecode <- (any(ux < 1) || any(ux > k)) ## need to recode if(doRecode) x <- as.integer(fx <- factor(x)) # now *has* values in 1:k ## check dist/dmatrix has.dmatrix <- missing(dist) if(has.dmatrix) { if(missing(dmatrix)) stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm)) stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") } else { # 'dist' dist <- as.dist(dist) # hopefully if(n != attr(dist, "Size")) stop("clustering 'x' and dissimilarity 'dist' are incompatible") } out <- .C(sildist, d = as.numeric(if(has.dmatrix) dmatrix else dist), as.integer(n), x, as.integer(k), diC = numeric(n*k), counts = integer(k), si = numeric(n), neighbor = integer(n), ismat = has.dmatrix, DUP = FALSE)[c("si", "neighbor")] if(doRecode) { code.x <- as.integer(levels(fx)) x <- code.x[x] } wds <- cbind(cluster = x, neighbor = if(doRecode) code.x[out$neighbor] else out$neighbor, "sil_width" = out$si) if(doRecode) attr(wds, "codes") <- code.x attr(wds, "Ordered") <- FALSE attr(wds, "call") <- cll class(wds) <- "silhouette" wds } sortSilhouette <- function(object, ...) { if(is.null(n <- nrow(object)) || n < 1) stop("invalid silhouette structure") if(attr(object,"Ordered")) { if(is.null(attr(object, "iOrd"))) attr(object, "iOrd") <- 1:n return(object) } ## Else : if(is.null(rownames(object))) rownames(object) <- as.character(1:n) ## k <- length(clid <- sort(unique(cl <- object[,"cluster"])))# cluster ID s cl <- object[,"cluster"] r <- object[iOrd <- order(cl, - object[,"sil_width"]) , , drop = FALSE] ## r has lost attributes of object; restore them, but do *not* ## change dimnames: nms <- names(at <- attributes(object)) for(n in nms[!(nms %in% c("dim","dimnames","iOrd","Ordered"))]) attr(r, n) <- at[[n]] attr(r,"iOrd") <- iOrd # the ordering attr(r,"Ordered") <- TRUE r } summary.silhouette <- function(object, FUN = mean, ...) { if(ncol(object) != 3) stop("invalid 'silhouette' object") cl <- object[, "cluster"] si <- object[, "sil_width"] r <- list(si.summary = summary(si, ...), clus.avg.widths = tapply(si, cl, FUN), clus.sizes = table(cl), avg.width = FUN(si), call = attr(object,"call"), codes = attr(object,"codes"), Ordered = attr(object,"Ordered")) class(r) <- "summary.silhouette" r } print.summary.silhouette <- function(x, ...) { k <- length(csiz <- x$clus.sizes) cls <- paste("Cluster sizes", if(!is.null(x$codes)) paste(", ids = (",paste(x$codes, collapse=", "),"),", sep=""), sep="") cat("Silhouette of", sum(csiz), "units in", k, "clusters", if(!is.null(x$call)) paste("from", deparse(x$call)), ":\n", cls, "and average silhouette widths:\n") cwid <- x$clus.avg.widths names(cwid) <- csiz print(cwid, ...) cat("Individual silhouette widths:\n") print(x$si.summary, ...) invisible(x) } ## This was the internal function silhouPlot() in plot.partition() : plot.silhouette <- function(x, nmax.lab = 40, max.strlen = 5, main = NULL, sub = NULL, xlab = expression("Silhouette width " * s[i]), col = "gray", do.col.sort = length(col) > 1, border = 0, cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, ...) { if(!is.matrix(x) || ncol(x) != 3) stop("No valid silhouette information (#{clusters} =? 1)") n <- nrow(x) x <- sortSilhouette(x) s <- rev(x[, "sil_width"]) space <- c(0, rev(diff(cli <- x[, "cluster"]))) space[space != 0] <- 0.5 # gap between clusters axisnames <- (n < nmax.lab) if(axisnames) names <- substring(rev(rownames(x)), 1, max.strlen) if(is.null(main)) { main <- "Silhouette plot" if(!is.null(cll <- attr(x,"call"))) { # drop initial "silhouette": if(!is.na(charmatch("silhouette", deparse(cll[[1]])))) cll[[1]] <- as.name("FF") main <- paste(main, "of", sub("^FF","", deparse(cll))) } } smry <- summary(x) k <- length(nj <- smry$clus.sizes) # k clusters if(is.null(sub)) sub <- paste("Average silhouette width : ", round(smry$avg.width, digits = 2)) if(do.col.sort && (lc <- length(col)) > 1) { if(lc == k)# cluster wise coloring col <- col[cli] else ## unit wise coloring if(lc != n) col <- rep(col, length = n) col <- rev(col) # was rev(col[attr(x, "iOrd")]) } y <- barplot(s, space = space, names = names, xlab = xlab, xlim = c(min(0, min(s)), 1), horiz = TRUE, las = 1, mgp = c(2.5, 1, 0), col = col, border = border, cex.names = cex.names, axisnames = axisnames, ...) title(main = main, sub = sub, adj = 0) if(do.n.k) { mtext(paste("n =", n), adj = 0) mtext(substitute(k ~~ "clusters" ~~ C[j], list(k=k)), adj= 1) } if(do.clus.stat) { mtext(expression(paste(j," : ", n[j]," | ", ave[i %in% Cj] ~~ s[i])), adj = 1.04, line = -1.2) y <- rev(y) hasCodes <- !is.null(cx <- attr(x,"codes")) for(j in 1:k) { j. <- if(hasCodes) cx[j] else j yj <- mean(y[cli == j.]) text(1, yj, paste(j.,": ", nj[j]," | ", format(smry$clus.avg.widths[j], digits = 1, nsmall = 2)), xpd = NA, adj = 0.8) } } } cluster/R/plotpart.q0000644000176000001440000003721211674145034014225 0ustar ripleyusers### $Id: plotpart.q 5974 2011-12-20 17:47:08Z maechler $ plot.partition <- function(x, ask = FALSE, which.plots = NULL, nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL, stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE, span = TRUE, xlim = NULL, ylim = NULL, main = NULL, ...) { if(is.null(x$data))# data not kept x$data <- data if(is.null(x$data) && !is.null(dist)) x$diss <- dist if(is.null(which.plots) && !ask) which.plots <- { if(is.null(x$data) && (is.null(x$diss) || inherits(x, "clara"))) 2 ## no clusplot else 1:2 } if(ask && is.null(which.plots)) { ## Use 'menu' .. tmenu <- paste("plot ", ## choices : c("All", "Clusplot", "Silhouette Plot")) do.all <- FALSE repeat { if(!do.all) pick <- menu(tmenu, title = "\nMake a plot selection (or 0 to exit):\n") + 1 switch(pick, return(invisible())# 0 -> exit loop , do.all <- TRUE# 1 : All , clusplot(x, stand = stand, lines = lines, shade = shade, color = color, labels = labels, plotchar = plotchar, span = span, xlim = xlim, ylim = ylim, main = main, ...) , plot(silhouette(x), nmax.lab, max.strlen, main = main) ) if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1} } invisible() } else { ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive() if(ask) { op <- par(ask = TRUE); on.exit(par(op)) } for(i in which.plots) switch(i, clusplot(x, stand = stand, lines = lines, shade = shade, color = color, labels = labels, plotchar = plotchar, span = span, xlim = xlim, ylim = ylim, main = main, ...) , plot(silhouette(x), nmax.lab, max.strlen, main = main) ) ## and return() whatever *plot(..) returns } } clusplot <- function(x, ...) UseMethod("clusplot") ##' @title Make/Check the (n x 2) matrix needed for clusplot.default(): ##' @param x numeric matrix or dissimilarity matrix (-> clusplot.default()) ##' @param diss logical indicating if 'x' is dissimilarity matrix ##' @return x1 : (n x 2) numeric matrix; ##' var.dec: the "variance explained" ##' @author Martin Maechler mkCheckX <- function(x, diss) { if(diss) { if(any(is.na(x))) stop("NA-values are not allowed in dist-like 'x'.") if(inherits(x, "dist")) { n <- attr(x, "Size") labs <- attr(x, "Labels") } else { # x (num.vector or square matrix) must be transformed into diss. siz <- sizeDiss(x) if(is.na(siz)) { if((n <- nrow(x)) != ncol(x)) stop("Distances must be result of dist or a square matrix.") if(all.equal(x, t(x)) != TRUE) stop("the square matrix is not symmetric.") labs <- dimnames(x)[[1]] } else { if(!is.vector(x)) { labs <- attr(x, "Labels") # possibly NULL x <- as.matrix(x) if((n <- nrow(x)) == ncol(x) && all.equal(x, t(x)) == TRUE) { labs <- dimnames(x)[[1]] } else { ## Hmm, when does this ever happen : ## numeric, not-dist, non-vector, not symmetric matrix ? warning(">>>>> funny case in clusplot.default() -- please report!\n") ## if(n != sizeDiss(x)) ... if(is.null(labs)) labs <- 1:sizeDiss(x) attr(x, "Size") <- sizeDiss(x) } } else { attr(x, "Size") <- n <- siz labs <- 1:n } } } if(is.null(labs)) labs <- 1:n x1 <- cmdscale(x, k = 2, eig = TRUE, add = TRUE) if(x1$ac < 0) x1 <- cmdscale(x, k = 2, eig = TRUE) var.dec <- x1$GOF[2] # always in [0,1] x1 <- x1$points } else { ## Not (diss) if(!is.matrix(x)) stop("x is not a data matrix") if(any(is.na(x))) { y <- is.na(x) if(any(apply(y, 1, all))) stop("one or more objects contain only missing values") if(any(apply(y, 2, all))) stop("one or more variables contain only missing values") x <- apply(x, 2, function(x) { x[is.na(x)] <- median(x, na.rm = TRUE); x } ) message("Missing values were displaced by the median of the corresponding variable(s)") } n <- nrow(x) labs <- dimnames(x)[[1]] if(is.null(labs)) labs <- 1:n x1 <- if(ncol(x) == 1) { hulp <- rep(0, length(x)) var.dec <- 1 matrix(c(t(x), hulp), ncol = 2) } else { prim.pr <- princomp(x, scores = TRUE, cor = ncol(x) != 2) var.dec <- cumsum(prim.pr$sdev^2/sum(prim.pr$ sdev^2))[2] prim.pr$scores[, 1:2] } } list(x = x1, var.dec = var.dec, labs = labs) } clusplot.default <- function(x, clus, diss = FALSE, s.x.2d = mkCheckX(x, diss), stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE, col.p = "dark green", # was 5 (= shaded col) col.txt = col.p, col.clus = if(color) c(2, 4, 6, 3) else 5, cex = 1, cex.txt = cex, span = TRUE, add = FALSE, xlim = NULL, ylim = NULL, main = paste("CLUSPLOT(", deparse(substitute(x)),")"), sub = paste("These two components explain", round(100 * var.dec, digits = 2), "% of the point variability."), xlab = "Component 1", ylab = "Component 2", verbose = getOption("verbose"), ...) { force(main) if(is.data.frame(x)) x <- data.matrix(x) if(!is.numeric(x)) stop("x is not numeric") stopifnot(is.list(s.x.2d), c("x","labs","var.dec") %in% names(s.x.2d), (n <- nrow(x1 <- s.x.2d[["x"]])) > 0) labels1 <- s.x.2d[["labs"]] var.dec <- s.x.2d[["var.dec"]] ## --- The 2D space is setup and points are in x1[,] (n x 2) --- clus <- as.vector(clus) if(length(clus) != n) stop("The clustering vector is of incorrect length") clus <- as.factor(clus) if(any(is.na(clus))) stop("NA-values are not allowed in clustering vector") if(stand) x1 <- scale(x1) levclus <- levels(clus) nC <- length(levclus) # the number of clusters d.x <- diff(range(x1[, 1])) d.y <- diff(range(x1[, 2])) z <- A <- vector("list", nC) loc <- matrix(0, nrow = nC, ncol = 2) d2 <- verhoud <- numeric(nC) ## num1 .. num6 : all used only once -- there are more constants anyway num3 <- 90 num6 <- 70 for(i in 1:nC) { ##------------- i-th cluster -------------- x <- x1[clus == levclus[i],, drop = FALSE ] aantal <- nrow(x) # number of observations in cluster [i] cov <- var(if(aantal == 1) { if(verbose) cat("cluster",i," has only one observation ..\n") rbind(x, c(0, 0)) } else x) x.1 <- range(x[, 1]) y.1 <- range(x[, 2]) notrank2 <- qr(cov, tol = 0.001)$rank != 2 if(!span && notrank2) { d2[i] <- 1 if((abs(diff(x.1)) > d.x/70) || (abs(diff(y.1)) > d.y/50)) { loc[i, ] <- c(x.1[1] + diff(x.1)/2, y.1[1] + diff(y.1)/2) a <- sqrt((loc[i, 1] - x.1[1])^2 + (loc[i, 2] - y.1[1])^2) a <- a + 0.05 * a num2 <- 40 if(abs(diff(x.1)) > d.x/70 ) { ind1 <- which.max(x[,1]) ind2 <- which.min(x[,1]) q <- atan((x[ind1, 2] - x[ind2, 2])/ (x[ind1, 1] - x[ind2, 1])) b <- if(d.y == 0) 1 else if(abs(diff(y.1)) > d.y/50) diff(y.1)/10 ## num1 <- 10 else d.y/num2 } else { b <- if(d.x == 0) 1 else d.x/num2 q <- pi/2 } D <- diag(c(a^2, b^2)) R <- rbind(c(cos(q), -sin(q)), c(sin(q), cos(q))) A[[i]] <- (R %*% D) %*% t(R) } else { a <- d.x/num3 b <- d.y/num6 if(a == 0) a <- 1 if(b == 0) b <- 1 A[[i]] <- diag(c(a^2, b^2)) loc[i, ] <- x[1, ] } oppervlak <- pi * a * b } else if(span && notrank2) { d2[i] <- 1 if(sum(x[, 1] != x[1, 1]) != 0 || sum(x[, 2] != x[1, 2]) != 0) { loc[i, ] <- c(x.1[1] + diff(x.1)/2, y.1[1] + diff(y.1)/2) a <- sqrt((loc[i, 1] - x.1[1])^2 + (loc[i, 2] - y.1[1])^2) if(any(x[, 1] != x[1, 1])) { ind1 <- which.max(x[,1]) ind2 <- which.min(x[,1]) q <- atan((x[ind1, 2] - x[ind2, 2])/ (x[ind1, 1] - x[ind2, 1])) } else { q <- pi/2 } b <- 1e-7 D <- diag(c(a^2, b^2)) R <- rbind(c(cos(q), -sin(q)), c(sin(q), cos(q))) A[[i]] <- (R %*% D) %*% t(R) } else { a <- d.x/num3 b <- d.y/num6 if(a == 0) a <- 1 if(b == 0) b <- 1 A[[i]] <- diag(c(a^2, b^2)) loc[i, ] <- x[1, ] } oppervlak <- pi * a * b } else { ## rank2 if(!span) { loc[i, ] <- colMeans(x) d2[i] <- max(mahalanobis(x, loc[i, ], cov)) ## * (1+ 0.01)^2 --- dropped factor for back-compatibility } else { ## span and rank2 if(verbose) cat("span & rank2 : calling \"spannel\" ..\n") k <- 2L res <- .C(spannel, aantal, ndep= k, dat = cbind(1., x), sqdist = double(aantal), l1 = double((k+1) ^ 2), double(k), double(k), prob = double(aantal), double(k+1), eps = (0.01),## convergence tol. maxit = 5000L, ierr = integer(1)) if(res$ierr != 0) ## MM : exactmve not available here ! warning("Error in Fortran routine for the spanning ellipsoid,", "\n rank problem??") cov <- cov.wt(x, res$prob) loc[i, ] <- cov$center ## NB: cov.wt() in R has extra wt[] scaling; revert here: cov <- cov$cov * (1 - sum(cov$wt^2)) d2[i] <- weighted.mean(res$sqdist, res$prob) if(verbose) cat("ellipse( A= (", format(cov[1,]),"*", format(cov[2,2]), "),\n\td2=", format(d2[i]), ", loc[]=", format(loc[i, ]), ")\n") } A[[i]] <- cov ## oppervlak (flam.) = area (Engl.) oppervlak <- pi * d2[i] * sqrt(cov[1, 1] * cov[2, 2] - cov[1, 2]^2) } z[[i]] <- ellipsoidPoints(A[[i]], d2[i], loc[i, ], n.half= 201) verhoud[i] <- aantal/oppervlak } ## end for( i-th cluster ) x.range <- do.call(range, lapply(z, `[`, i=TRUE, j = 1)) y.range <- do.call(range, lapply(z, `[`, i=TRUE, j = 2)) verhouding <- sum(verhoud[verhoud < 1e7]) if(verhouding == 0) verhouding <- 1 ## num4 <- 37 ; num5 <- 3 --- but '41' is another constant density <- 3 + (verhoud * 37)/verhouding density[density > 41] <- 41 if (span) { if (d.x == 0) ## diff(range(x[,1]) == 0 : x-coords all the same x.range <- x1[1, 1] + c(-1,1) if (d.y == 0) ## diff(range(x[,2]) == 0 : y-coords all the same y.range <- x1[1, 2] + c(-1,1) } if(is.null(xlim)) xlim <- x.range if(is.null(ylim)) ylim <- y.range if(length(col.p) < n) col.p <- rep(col.p, length= n) ## --- Now plotting starts --- ## "Main plot" -- if(!add) { plot(x1, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, main = main, type = if(plotchar) "n" else "p", # if(plotchar) add points later col = col.p, cex = cex, ...) if(!is.null(sub) && !is.na(sub) && nchar(sub) > 0) title(sub = sub, adj = 0) } if(color) { if(length(col.clus) < min(4,nC)) stop("'col.clus' should have length 4 when color is TRUE") i.verh <- order(verhoud) jInd <- if(nC > 4) pam(verhoud[i.verh], 4)$clustering else 1:nC for(i in 1:nC) { k <- i.verh[i] polygon(z[[k]], density = if(shade) density[k] else 0, col = col.clus[jInd[i]], ...) } col.clus <- col.clus[jInd][order(i.verh)] } else { for(i in 1:nC) polygon(z[[i]], density = if(shade) density[i] else 0, col = col.clus, ...) } ## points after polygon in order to write ON TOP: if(plotchar) { karakter <- 1:19 for(i in 1:nC) { iC <- clus == levclus[i] points(x1[iC, , drop = FALSE], cex = cex, pch = karakter[1+(i-1) %% 19], col = col.p[iC], ...) } } if(nC > 1 && (lines == 1 || lines == 2)) { ## Draw lines between all pairs of the nC cluster (centers) ## utilities for computing ellipse intersections: clas.snijpunt <- function(x, loc, m, n, p) { if ( !is.na(xm <- x[1,m]) && loc[n, m] <= xm && xm <= loc[p, m]) x[1, ] else if(!is.na(xm <- x[2,m]) && loc[n, m] <= xm && xm <= loc[p, m]) x[2, ] else NA } coord.snijp1 <- function(x, gemid) x[2, 2] - 2 * x[1, 2] * gemid + x[1, 1] * gemid^2 coord.snijp2 <- function(x, d2, y) ((x[1, 1] * x[2, 2] - x[1, 2]^2) * d2)/y coord.snijp3 <- function(xx, y, gemid) { sy <- sqrt(y) sy <- c(sy, -sy) cbind(xx[1] + sy, xx[2] + gemid*sy) } afstand <- matrix(0, ncol = nC, nrow = nC) for(i in 1:(nC - 1)) { for(j in (i + 1):nC) { gemid <- (loc[j, 2] - loc[i, 2])/(loc[j, 1] - loc[i, 1]) s0 <- coord.snijp1(A[[i]], gemid) b0 <- coord.snijp2(A[[i]], d2[i], s0) snijp.1 <- coord.snijp3(loc[i,], y=b0, gemid) s1 <- coord.snijp1(A[[j]], gemid) b1 <- coord.snijp2(A[[j]], d2[j], s1) snijp.2 <- coord.snijp3(loc[j,], y=b1, gemid) if(loc[i, 1] != loc[j, 1]) { if(loc[i, 1] < loc[j, 1]) { punt.1 <- clas.snijpunt(snijp.1, loc, 1, i, j) punt.2 <- clas.snijpunt(snijp.2, loc, 1, i, j) } else { punt.1 <- clas.snijpunt(snijp.1, loc, 1, j, i) punt.2 <- clas.snijpunt(snijp.2, loc, 1, j, i) } } else { if(loc[i, 2] < loc[j, 2]) { punt.1 <- clas.snijpunt(snijp.1, loc, 2, i, j) punt.2 <- clas.snijpunt(snijp.2, loc, 2, i, j) } else { punt.1 <- clas.snijpunt(snijp.1, loc, 2, j, i) punt.2 <- clas.snijpunt(snijp.2, loc, 2, j, i) } } if(is.na(punt.1[1]) || is.na(punt.2[1]) || (sqrt((punt.1[1] - loc[i, 1])^2 + (punt.1[2] - loc[i, 2])^2) + sqrt((punt.2[1] - loc[j, 1])^2 + (punt.2[2] - loc[j, 2])^2)) > sqrt((loc[j, 1] - loc[i, 1])^2 + (loc[j, 2] - loc[i, 2])^2)) { afstand[i, j] <- NA } else if(lines == 1) { afstand[i, j] <- sqrt((loc[i, 1] - loc[j, 1])^2 + (loc[i, 2] - loc[j, 2])^2) segments(loc[i, 1], loc[i, 2], loc[j, 1], loc[j, 2], col = 6, ...) } else { ## lines == 2 afstand[i, j] <- sqrt((punt.1[1] - punt.2[1])^2 + (punt.1[2] - punt.2[2])^2) segments(punt.1[1], punt.1[2], punt.2[1], punt.2[2], col = 6, ...) } } } afstand <- t(afstand) + afstand } else afstand <- NULL if(labels) { if(labels == 1) { for(i in 1:nC) { ## add cluster border points m <- nrow(z[[i]]) ni <- length(ii <- seq(1, m, by = max(1, m %/% 40))) x1 <- rbind(x1, z[[i]][ii, ]) labels1 <- c(labels1, rep(levclus[i], ni)) ## identify() only allows one color: ##col.txt <- c(col.txt, rep(col.clus[if(color) i else 1], ni)) } identify(x1, labels = labels1, col = col.txt[1]) } else { ### FIXME --- 'cex.txt' but also allow to specify 'cex' (for the points) ??? Stext <- function(xy, labs, ...) { ## FIXME: these displacements are not quite ok! xy[, 1] <- xy[, 1] + diff(x.range)/130 xy[, 2] <- xy[, 2] + diff(y.range)/50 text(xy, labels = labs, ...) } if(labels == 3 || labels == 2) Stext(x1, labels1, col = col.txt, cex = cex.txt, ...) if(labels %in% c(2,4,5)) { maxima <- t(sapply(z, `[`, i=201, j=1:2)) Stext(maxima, levclus, font = 4, col = col.clus, cex = cex, ...) } if(labels == 5) identify(x1, labels = labels1, col = col.txt[1]) } } density[density == 41] <- NA invisible(list(Distances = afstand, Shading = density)) } clusplot.partition <- function(x, main = NULL, dist = NULL, ...) { if(is.null(main) && !is.null(x$call)) main <- paste("clusplot(",format(x$call),")", sep="") if(length(x$data) != 0 && (!any(is.na(x$data)) || data.class(x) == "clara")) clusplot.default(x$data, x$clustering, diss = FALSE, main = main, ...) else if(!is.null(dist)) clusplot.default(dist, x$clustering, diss = TRUE, main = main, ...) else if(!is.null(x$diss)) clusplot.default(x$diss, x$clustering, diss = TRUE, main = main, ...) else { ## try to find "x$diss" by looking at the pam() call: if(!is.null(x$call)) { xD <- try(eval(x$call[[2]], envir = parent.frame())) if(inherits(xD, "try-error") || !inherits(xD, "dist")) stop("no diss nor data found, nor the original argument of ", deparse(x$call)) ## else ## warning("both 'x$diss' and 'dist' are empty; ", ## "trying to find the first argument of ", deparse(x$call)) clusplot.default(xD, x$clustering, diss = TRUE, main = main, ...) } else stop("no diss nor data found for clusplot()'") } } cluster/R/plothier.q0000644000176000001440000001526411464226176014215 0ustar ripleyusers### $Id: plothier.q 5642 2010-11-03 09:33:50Z maechler $ pltree <- function(x, ...) UseMethod("pltree") ## note: pltree() can have an 'xlab' in "..." (plot.hclust has an explicit one) pltree.twins <- function(x, main = paste("Dendrogram of ", deparse(x$call)), labels = NULL, ylab = "Height", ...) { plot(as.hclust(x), labels = labels, ##- if(is.null(labels) && length(x$order.lab) != 0) ##- labels <- x$order.lab[sort.list(x$order)] ##- ##- ## calling plot.hclust() via generic : ##- plot(structure(list(merge = x$merge, order = x$order, ##- height = sort(x$height), labels = labels, ##- call = x$call, method = x$method), ##- class = "hclust"), main = main, ylab = ylab, ...) } bannerplot <- function(x, w = rev(x$height), fromLeft = TRUE, main=NULL, sub=NULL, xlab = "Height", adj = 0, col = c(2, 0), border = 0, axes = TRUE, frame.plot = axes, rev.xax = !fromLeft, xax.pretty = TRUE, labels = NULL, nmax.lab = 35, max.strlen = 5, yax.do = axes && length(x$order) <= nmax.lab, yaxRight = fromLeft, y.mar = 2.4 + max.strlen / 2.5, ...) { m <- max(w) if(axes) { if(xax.pretty) { at.vals <- if(!is.logical(xax.pretty)) pretty(c(0,w), n = xax.pretty) else pretty(c(0,w)) n <- length(at.vals <- at.vals[at.vals <= m]) if(at.vals[n] * 1.01 < m) { lab.vals <- c(at.vals, signif(m, 3)) at.vals <- c(at.vals, m) } else lab.vals <- at.vals } else { # old default for plot.agnes() and plot.diana() ss <- seq(0, floor(m), length = 11)# => intervals = 1/10 {total} at.vals <- c(ss, m) lab.vals <- round(at.vals, 2) } } if(fromLeft) { w <- rbind(w, m - w) if(missing(col)) col <- rev(col) } else { ## from Right w <- rbind(m - w, w) if(axes && rev.xax) { at.vals <- m - rev(at.vals)## == c(0, ss + m - floor(m)) lab.vals <- rev(lab.vals) } } if(yax.do) { ax <- if(yaxRight) list(side = 4, pos = m) else list(side = 2, pos = 0) if((pm <- par("mar"))[ax$side] < y.mar) { ## need space besides y axis for labeling pm[ax$side] <- y.mar op <- par(mar = pm) on.exit(par(op)) } } barplot(w, xlab = xlab, horiz = TRUE, space = 0, axes = FALSE, col = col, border = border, mgp = c(2.5, 1, 0), ...) if(frame.plot && (border == 0 || border == par("bg"))) rect(0, 0, m, ncol(w)) title(main = main, sub = sub, adj = adj) if(axes) { axis(1, at = at.vals, labels = lab.vals, ...) if(yax.do) { if(is.null(labels)) labels <- rev(if (length(x$order.lab) != 0) substring(x$order.lab, 1,max.strlen) else x$order) axis(ax$side, at = 0:(length(x$order) - 1), las = 1, labels = labels, pos = ax$pos, mgp = c(3, 1.25, 0), ...) } } invisible() } ## plot.diana() [further down] & plot.agnes() are almost identical; ## -- made bannerplot() a stand-alone function ## --> maybe *merge* these two into one plot.twins() plot.agnes <- function(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Agglomerative Coefficient = ", round(x$ac, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, ...) { if(is.null(main)) { ## Different default for banner & pltree: cl <- deparse(x$call) main1 <- paste("Banner of ", cl) main2 <- paste("Dendrogram of ", cl) } else { # same title for both main1 <- main2 <- main } if(is.null(which.plots) && !ask) which.plots <- 1:2 if(ask && is.null(which.plots)) { ## Use 'menu' .. tmenu <- paste("plot ", ## choices : c("All", "Banner", "Clustering Tree")) do.all <- FALSE repeat { if(!do.all) pick <- menu(tmenu, title = "\nMake a plot selection (or 0 to exit):\n") + 1 switch(pick, return(invisible()), # 0 -> exit loop do.all <- TRUE,# 1 : All bannerplot(x, fromLeft = TRUE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab= nmax.lab, max.strlen= max.strlen, ...), pltree (x, main = main2, sub = sub, ...) # 3 ) if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1} } } else { ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive() if(ask) { op <- par(ask = TRUE) on.exit(par(op)) } for(i in which.plots) switch(i, bannerplot(x, fromLeft = TRUE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab = nmax.lab, max.strlen = max.strlen, ...), pltree (x, main = main2, sub = sub, ...) ) } invisible() } plot.diana <- function(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, ...) { if(is.null(main)) { ## Different default for banner & pltree: cl <- deparse(x$call) main1 <- paste("Banner of ", cl) main2 <- paste("Dendrogram of ", cl) } else { # same title for both main1 <- main2 <- main } if(is.null(which.plots) && !ask) which.plots <- 1:2 if(ask && is.null(which.plots)) { ## Use 'menu' .. tmenu <- paste("plot ", ## choices : c("All", "Banner", "Clustering Tree")) do.all <- FALSE repeat { if(!do.all) pick <- menu(tmenu, title = "\nMake a plot selection (or 0 to exit):\n") + 1 switch(pick, return(invisible()), # 0 -> exit loop do.all <- TRUE,# 1 : All bannerplot(x, fromLeft = FALSE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab= nmax.lab, max.strlen= max.strlen, ...), pltree (x, main = main2, sub = sub, ...) ) if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1} } } else { ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive() if(ask) { op <- par(ask = TRUE) on.exit(par(op)) } for(i in which.plots) switch(i, bannerplot(x, fromLeft = FALSE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab = nmax.lab, max.strlen = max.strlen, ...),# 1 pltree (x, main = main2, sub = sub, ...) # i = 2 ) } invisible() } plot.mona <- function(x, main = paste("Banner of ", deparse(x$call)), sub = NULL, xlab = "Separation step", col = c(2,0), axes = TRUE, adj = 0, nmax.lab = 35, max.strlen = 5, ...) { w <- rev(x$step) m <- max(w) if(any(i0 <- w == 0)) w[i0] <- m <- m+1 bannerplot(x[c("order","order.lab")], w = w, fromLeft = TRUE, yaxRight = FALSE, col = col, main = main, sub = sub, xlab = xlab, adj= adj, axes= axes, nmax.lab= nmax.lab, max.strlen= max.strlen, xax.pretty = m+1, ...) names <- paste(" ", rev(x$variable)) is.na(names) <- i0 text(w, 1:length(names) - 0.5, names, adj = 0, col = col[1], ...) } cluster/R/pam.q0000644000176000001440000001607011712175353013134 0ustar ripleyusers#### PAM : Partitioning Around Medoids #### --- $Id: pam.q 6057 2012-02-01 08:42:19Z maechler $ pam <- function(x, k, diss = inherits(x, "dist"), metric = "euclidean", medoids = NULL, stand = FALSE, cluster.only = FALSE, do.swap = TRUE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, pamonce = FALSE, trace.lev = 0) { if((diss <- as.logical(diss))) { ## check type of input vector if(any(is.na(x))) stop(..msg$error["NAdiss"]) if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop(..msg$error["non.diss"]) attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } ## adapt S dissimilarities to Fortran: ## convert upper matrix, read by rows, to lower matrix, read by rows. n <- attr(x, "Size") dv <- x[lower.to.upper.tri.inds(n)] ## prepare arguments for the Fortran call dv <- c(0, dv) ## <- internally needed {FIXME! memory hog!} jp <- 1 mdata <- FALSE ndyst <- 0 x2 <- double()# unused in this case } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x ## put info about metric, size and NAs in arguments for the Fortran call ndyst <- if(metric == "manhattan") 2 else 1 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat valmd <- rep(valmisdat, jp) } dv <- double(1 + (n * (n - 1))/2) } if((k <- as.integer(k)) < 1 || k >= n) stop("Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2") if(is.null(medoids))# default: using "build & swap" to determine medoids" medID <- integer(k)# all 0 -> will be used as 'code' in C else { ## 'fixme': consider sort(medoids) {and rely on it in ../src/pam.c } ## "0L+..." ensure a "DUPLICATE(.)" (a real copy on the C level; as.integer(.) is not enough! if(length(medID <- if(is.integer(medoids))0L+medoids else as.integer(medoids)) != k || any(medID < 1) || any(medID > n) || any(duplicated(medID))) stop("'medoids' must be NULL or vector of ", k, " distinct indices in {1,2, .., n}, n=", n) ## use observation numbers 'medID' as starting medoids for 'swap' only } nisol <- integer(if(cluster.only) 1 else k) if(do.swap) nisol[1] <- 1L stopifnot(length(cluster.only) == 1, length(trace.lev) == 1) ## call C routine --- FIXME -- using .Call() would simplify logic *much* storage.mode(dv) <- "double" storage.mode(x2) <- "double" res <- .C(cl_pam, as.integer(n), as.integer(jp), k, x = x2, dys = dv, jdyss = as.integer(diss), if(mdata)valmd else double(1), if(mdata) jtmd else integer(jp), as.integer(ndyst), integer(n), # nsend[] logical(n), # nrepr[] integer(if(cluster.only) 1 else n), # nelem[] double(n), # radus[] double(n), # damer[] avsil = double(n), # 'ttd' double(n), # separ[] ttsil = as.double(0), obj = as.double(c(cluster.only, trace.lev)),# in & out! med = medID,# in & out(if !cluster.only) clu = integer(n), clusinf = if(cluster.only) 0. else matrix(0., k, 5), silinf = if(cluster.only) 0. else matrix(0., n, 4), isol = nisol, as.integer(pamonce), DUP = FALSE) # care!! xLab <- if(diss) attr(x, "Labels") else dimnames(x)[[1]] r.clu <- res$clu if(length(xLab) > 0) names(r.clu) <- xLab ## Error if have NA's in diss: if(!diss && res$jdyss == -1) stop("No clustering performed, NAs in the computed dissimilarity matrix.") if(cluster.only) return(r.clu) ## Else, usually medID <- res$med if(any(medID <= 0)) stop("error from .C(cl_pam, *): invalid medID's") sildim <- res$silinf[, 4] if(diss) { if(keep.diss) disv <- x ## add labels to Fortran output r.med <- if(length(xLab) > 0) { sildim <- xLab[sildim] xLab[medID] } else medID } else { if(keep.diss) { ## adapt Fortran output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv <- res$dys[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(n)] class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- dimnames(x)[[1]] } ## add labels to Fortran output r.med <- x[medID, , drop =FALSE] if(length(xLab) > 0) sildim <- xLab[sildim] } ## add names & dimnames to Fortran output r.obj <- structure(res$obj, .Names = c("build", "swap")) r.isol <- factor(res$isol, levels = 0:2, labels = c("no", "L", "L*")) names(r.isol) <- 1:k r.clusinf <- res$clusinf dimnames(r.clusinf) <- list(NULL, c("size", "max_diss", "av_diss", "diameter", "separation")) ## construct S object r <- list(medoids = r.med, id.med = medID, clustering = r.clu, objective = r.obj, isolation = r.isol, clusinfo = r.clusinf, silinfo = if(k != 1) { silinf <- res$silinf[, -4, drop=FALSE] dimnames(silinf) <- list(sildim, c("cluster", "neighbor", "sil_width")) list(widths = silinf, clus.avg.widths = res$avsil[1:k], avg.width = res$ttsil) }, diss = if(keep.diss)disv, call = match.call()) if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA r$data <- x2 } class(r) <- c("pam", "partition") r } ## non-exported: .print.pam <- function(x, ...) { cat("Medoids:\n"); print(cbind(ID = x$id.med, x$medoids), ...) cat("Clustering vector:\n"); print(x$clustering, ...) cat("Objective function:\n"); print(x$objective, ...) } print.pam <- function(x, ...) { .print.pam(x, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } summary.pam <- function(object, ...) { class(object) <- "summary.pam" object } print.summary.pam <- function(x, ...) { .print.pam(x, ...) cat("\nNumerical information per cluster:\n"); print(x$clusinfo, ...) cat("\nIsolated clusters:\n L-clusters: ") print(names(x$isolation[x$isolation == "L"]), quote = FALSE, ...) cat(" L*-clusters: ") print(names(x$isolation[x$isolation == "L*"]), quote = FALSE, ...) if(length(x$silinfo) != 0) { cat("\nSilhouette plot information:\n") print(x$silinfo[[1]], ...) cat("Average silhouette width per cluster:\n") print(x$silinfo[[2]], ...) cat("Average silhouette width of total data set:\n") print(x$silinfo[[3]], ...) } if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } cluster/R/mona.q0000644000176000001440000000607412014446721013310 0ustar ripleyusers mona <- function(x) { ## check type of input matrix if(!is.matrix(x) && !is.data.frame(x)) stop("x must be a matrix or data frame.") if(!all(sapply(lapply(as.data.frame(x), function(y) levels(as.factor(y))), length) == 2)) stop("All variables must be binary (factor with 2 levels).") n <- nrow(x) jp <- ncol(x) ## change levels of input matrix x2 <- apply(as.matrix(x), 2, function(x) as.integer(factor(x))) - 1L x2[is.na(x2)] <- 2:2 ## was ## x2 <- apply(as.matrix(x), 2, factor) ## x2[x2 == "1"] <- "0" ## x2[x2 == "2"] <- "1" ## x2[is.na(x2)] <- "2" ## storage.mode(x2) <- "integer" ## call Fortran routine res <- .Fortran(cl_mona, as.integer(n), as.integer(jp), x2 = x2,# x[,] error = 0L, nban = integer(n), ner = integer(n), integer(n), lava = integer(n), integer(jp)) ## stop with a message when two many missing values: if(res$error != 0) { ch <- gettext("No clustering performed, ") switch(res$error, ## 1 : stop(ch,"an object was found with all values missing."), ## 2 : stop(ch,"a variable was found with at least 50% missing values."), ## 3 : stop(ch,"a variable was found with all non missing values identical."), ## 4 : stop(ch,"all variables have at least one missing value.") ) } ##O res$x2 <- matrix(as.numeric(substring(res$x2, ##O 1:nchar(res$x2), 1:nchar(res$x2))), ##O n, jp) storage.mode(res$x2) <- "integer" # keeping dim() dimnames(res$x2) <- dnx <- dimnames(x) ## add labels to Fortran output if(length(dnx[[2]]) != 0) { lava <- as.character(res$lava) lava[lava != "0"] <- dnx[[2]][res$lava] lava[lava == "0"] <- "NULL" res$lava <- lava } ## construct "mona" object clustering <- list(data = res$x2, order = res$ner, variable = res$lava[ -1 ], step = res$nban[-1], call = match.call()) if(length(dnx[[1]]) != 0) clustering$order.lab <- dnx[[1]][res$ner] class(clustering) <- "mona" clustering } print.mona <- function(x, ...) { cat("Revised data:\n") print(x$data, quote = FALSE, ...) cat("Order of objects:\n") print(if (length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Variable used:\n") print(x$variable, quote = FALSE, ...) cat("Separation step:\n") print(x$step, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } ## FIXME: These should differ from print() summary.mona <- function(object, ...) { class(object) <- "summary.mona" object } print.summary.mona <- function(x, ...) { print.mona(x, ...) invisible(x) } cluster/R/internal.R0000644000176000001440000000177211466075310014135 0ustar ripleyusers#### Cluster - Internal Utilities #### ============================ (new by Martin Mächler) ## This was size(); seems slightly useful in general sizeDiss <- function(d) { ## find 'n' for d == dissimilarity-like(), i.e. length(d)= n(n-1)/2 discr <- 1 + 8 * length(d) sqrtdiscr <- round(sqrt(discr)) if(sqrtdiscr^2 == discr) (1 + sqrtdiscr)/2 else NA } ## used in ./agnes.q, ./clara.q, ./diana.q und ./pam.q : lower.to.upper.tri.inds <- function(n) { n1 <- as.integer(n - 1) if(n1 < 1) stop("'n' must be >= 2") else if(n1 == 1) 1L else rep(1:n1, 1:n1) + c(0, unlist(lapply(2:n1, function(k) cumsum(c(0, (n - 2):(n - k)))))) } upper.to.lower.tri.inds <- function(n) { if((n2 <- as.integer(n - 2)) < 0) stop("'n' must be >= 2") rep(1 + cumsum(0:n2), (n - 1):1) + unlist(lapply(0:n2, function(k) cumsum(k:n2))) } #### consider to *not* export these when I will use a name space : meanabsdev <- function(y) mean(abs(y - mean(y, na.rm = TRUE)), na.rm = TRUE) cluster/R/fanny.q0000644000176000001440000002103411674325037013471 0ustar ripleyusers#### $Id: fanny.q 5977 2011-12-21 09:42:55Z maechler $ fanny <- function(x, k, diss = inherits(x, "dist"), memb.exp = 2, metric = c("euclidean", "manhattan", "SqEuclidean"), stand = FALSE, iniMem.p = NULL, cluster.only = FALSE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, maxit = 500, tol = 1e-15, trace.lev = 0) { if((diss <- as.logical(diss))) { ## check type of input vector if(any(is.na(x))) stop(..msg$error["NAdiss"]) if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop(..msg$error["non.diss"]) attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } ## prepare arguments for the Fortran call n <- attr(x, "Size") dv <- as.double(c(x, 0))# add extra one jp <- 1 mdata <- FALSE ndyst <- 0L x2 <- double(n) jdyss <- 1 } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x metric <- match.arg(metric) ## put info about metric, size and NAs in arguments for the Fortran call ndyst <- which(metric == eval(formals()$metric))# 1, 2, or 3 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat valmd <- rep(valmisdat, jp) } dv <- double(1 + (n * (n - 1))/2) jdyss <- 0 } if((k <- as.integer(k)) < 1 || k > n%/%2 - 1) stop("'k' (number of clusters) must be in {1,2, .., n/2 -1}") if(length(memb.exp) != 1 || (memb.exp <- as.double(memb.exp)) < 1 || memb.exp == Inf) stop("'memb.exp' must be a finite number > 1") if((maxit <- as.integer(maxit)[1]) < 0) stop("'maxit' must be non-negative integer") computeP <- is.null(iniMem.p) # default: determine initial membership in C if(computeP)# default: determine initial membership in C iniMem.p <- matrix(0., n, k)# all 0 -> will be used as 'code' else { dm <- dim(iniMem.p) if(length(dm) !=2 || !all(dm == c(n,k)) || !is.numeric(iniMem.p) || any(iniMem.p < 0) || !isTRUE(all.equal(unname(rowSums(iniMem.p)), rep(1, n)))) stop("'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1") if(!is.double(iniMem.p)) storage.mode(iniMem.p) <- "double" } stopifnot(length(cluster.only) == 1) stopifnot(length(trace.lev) == 1) ## call Fortran routine storage.mode(x2) <- "double" res <- .C(cl_fanny, as.integer(n), as.integer(jp), k, x2, dis = dv, ok = as.integer(jdyss), if(mdata)valmd else double(1), if(mdata) jtmd else integer(jp), ndyst, integer(n), # nsend integer(n), # nelem integer(n), # negbr double(n), # syl p = iniMem.p, dp = matrix(0., n, k),# < must all be 0 on entry! avsil = double(k),# 'pt' integer(k), # nfuzz double(k), # esp double(k), # ef double(n), # dvec ttsil = as.double(0), obj = as.double(c(cluster.only, trace.lev, computeP, 0)),# in & out! clu = integer(n), silinf = if(cluster.only) 0. else matrix(0., n, 4), memb.exp = memb.exp,# = 'r' tol = as.double(tol), maxit = maxit) if(!(converged <- res$maxit > 0)) { warning(sprintf( "FANNY algorithm has not converged in 'maxit' = %d iterations", maxit)) } if(!cluster.only) sildim <- res$silinf[, 4] if(diss) { if(keep.diss) disv <- x labs <- attr(x, "Labels") } else { ## give warning if some dissimilarities are missing. if(res$ok == -1) stop("No clustering performed, NA-values in the dissimilarity matrix.") labs <- dimnames(x)[[1]] if(keep.diss) { disv <- res$dis[ - (1 + (n * (n - 1))/2)] # drop the extra one disv[disv == -1] <- NA class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- labs } } ## add labels, dimnames, etc to Fortran output: if(length(labs) != 0) { if(!cluster.only) sildim <- labs[sildim] dimnames(res$p) <- list(labs, NULL) names(res$clu) <- labs } coeff <- if(memb.exp == 2) res$obj[3:4] else { ## usual partition coefficient with " ^ 2 " : cf <- sum(res$p ^ 2) / n c(cf, (k * cf - 1)/(k - 1)) } names(coeff) <- c("dunn_coeff", "normalized") if(abs(coeff["normalized"]) < 1e-7) warning("the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?") k.crisp <- res$obj[1] res$obj <- c("objective" = res$obj[2]) r <- list(membership = res$p, coeff = coeff, memb.exp = memb.exp, clustering = res$clu, k.crisp = k.crisp, # 'obj*': also containing iterations for back compatibility: objective = c(res$obj, "tolerance" = res$tol), convergence = c(iterations = res$maxit, converged = converged, maxit = maxit), diss = if(keep.diss) disv, call = match.call()) if(k != 1 && !cluster.only) { dimnames(res$silinf) <- list(sildim, c("cluster", "neighbor", "sil_width", "")) r$silinfo <- list(widths = res$silinf[, -4], clus.avg.widths = res$avsil[1:k], avg.width = res$ttsil) } if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA r$data <- x2 } class(r) <- c("fanny", "partition") r } ## non-exported: .print.fanny <- function(x, digits = getOption("digits"), ...) { cat("Fuzzy Clustering object of class 'fanny' :") print(formatC(cbind(" " = c("m.ship.expon." = x$memb.exp, x$objective[c("objective", "tolerance")], x$convergence, "n" = nrow(x$membership))), digits = digits), quote = FALSE, ...) k <- ncol(x$membership) cat("Membership coefficients (in %, rounded):\n"); print(round(100 * x$membership), ...) cat("Fuzzyness coefficients:\n"); print(x$coeff, digits = digits, ...) cat("Closest hard clustering:\n"); print(x$clustering, ...) if(x$k.crisp < k) cat(sprintf("k_crisp (= %d) < k !!\n", x$k.crisp)) } print.fanny <- function(x, digits = getOption("digits"), ...) { .print.fanny(x, digits = digits, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } summary.fanny <- function(object, ...) { class(object) <- "summary.fanny" object } print.summary.fanny <- function(x, digits = getOption("digits"), ...) { .print.fanny(x, digits = digits, ...) if(length(x$silinfo) != 0) { cat("\nSilhouette plot information:\n") print(x$silinfo[[1]], ...) cat("Average silhouette width per cluster:\n") print(x$silinfo[[2]], ...) cat("Average silhouette width of total data set:\n") print(x$silinfo[[3]], ...) } if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } ## FIXME: Export and document these! ----------------------- ## Convert crisp clustering vector to fuzzy membership matrix as.membership <- function(clustering, keep.names = TRUE) { stopifnot(is.numeric(clustering), clustering == round(clustering)) n <- length(clustering) k <- length(u <- sort(unique(clustering))) r <- matrix(0L, n, k) if(k == 0 || n == 0) return(r) if(keep.names) dimnames(r) <- list(names(clustering), NULL) if(any(u != 1:k)) clustering <- match(clustering, u) r[cbind(1:n, clustering)] <- 1L r } ## "Generalized Inverse" transformation: ## Convert fuzzy membership matrix to closest crisp clustering vector toCrisp <- function(m) { dm <- dim(m) if(length(dm) != 2 || !is.numeric(m) || any(m < 0) || !isTRUE(all.equal(unname(rowSums(m)), rep(1, dm[1])))) stop("'m', a membership matrix, must be nonnegative with rowSums == 1") apply(m, 1, which.max) } cluster/R/ellipsoidhull.R0000644000176000001440000000771211501236356015171 0ustar ripleyusers#### ellipsoidhull : Find (and optionally draw) #### ----------- the smallest ellipsoid containining a set of points #### #### Just making the algorithms in clusplot() available more generally #### ( --> ./plotpart.q ) ### Author: Martin Maechler, Date: 21 Jan 2002, 15:41 ellipsoidhull <- function(x, tol = 0.01, maxit = 5000, ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE) { if(!is.matrix(x) || !is.numeric(x)) stop("'x' must be numeric n x p matrix") if(any(is.na(x))) { warning("omitting NAs") x <- na.omit(x) } n <- nrow(x) if(n == 0) stop("no points without missing values") p <- ncol(x) res <- .C(spannel, n, ndep= p, dat = cbind(1., x), sqdist = double(n), l1 = double((p+1) ^ 2), double(p), double(p), prob = double(n), double(p+1), eps = as.double(tol), maxit = as.integer(maxit), ierr = integer(1))# 0 or non-zero if(res$ierr != 0) cat("Error in Fortran routine computing the spanning ellipsoid,", "\n probably collinear data\n", sep="") if(any(res$prob < 0) || all(res$prob == 0)) stop("computed some negative or all 0 'prob'abilities") conv <- res$maxit < maxit if(!conv) warning("possibly not converged in ", maxit, " iterations") conv <- conv && res$ierr == 0 cov <- cov.wt(x, res$prob) ## cov.wt() in R has extra wt[] scaling; revert here res <- list(loc = cov$center, cov = cov$cov * (1 - sum(cov$wt^2)), d2 = weighted.mean(res$sqdist, res$prob), wt = if(ret.wt) cov$wt, sqdist = if(ret.sqdist) res$sqdist, prob= if(ret.pr) res$prob, tol = tol, eps = max(res$sqdist) - p, it = res$maxit, maxit= maxit, ierr = res$ierr, conv = conv) class(res) <- "ellipsoid" res } print.ellipsoid <- function(x, digits = max(1, getOption("digits") - 2), ...) { d <- length(x$loc) cat("'ellipsoid' in", d, "dimensions:\n center = (", format(x$loc, digits=digits), "); squared ave.radius d^2 = ", format(x$d2, digits=digits), "\n and shape matrix =\n") print(x$cov, digits = digits, ...) cat(" hence,",if(d==2)"area" else "volume"," = ", format(volume(x), digits=digits),"\n") if(!is.null(x$conv) && !x$conv) { cat("\n** Warning: ** the algorithm did not terminate reliably!\n ", if(x$ierr) "most probably because of collinear data" else "(in the available number of iterations)", "\n") } invisible(x) } volume <- function(object) UseMethod("volume") volume.ellipsoid <- function(object) { A <- object$cov pi * object$d2 * sqrt(det(A)) } ## For p = 2 : ## Return (x[i],y[i]) points, i = 1:n, on boundary of ellipse, given ## by 2 x 2 matrix A[], origin 'loc' and d(xy, loc) ^2 = 'd2' ellipsoidPoints <- function(A, d2, loc, n.half = 201) { if(length(d <- dim(A)) != 2 || (p <- d[1]) != d[2]) stop("'A' must be p x p cov-matrix defining an ellipsoid") if(p == 2) { detA <- A[1, 1] * A[2, 2] - A[1, 2]^2 yl2 <- A[2, 2] * d2 # = (y_max - y_loc)^2 y <- seq( - sqrt(yl2), sqrt(yl2), length = n.half) sqrt.discr <- sqrt(detA * pmax(0, yl2 - y^2))/A[2, 2] sqrt.discr[c(1, n.half)] <- 0 b <- loc[1] + A[1, 2]/A[2, 2] * y y <- loc[2] + y return(rbind(cbind( b - sqrt.discr, y), cbind(rev(b + sqrt.discr), rev(y)))) } else { ## p >= 3 detA <- det(A) ##-- need something like polar coordinates stop("ellipsoidPoints() not yet implemented for p >= 3 dim.") } } predict.ellipsoid <- function(object, n.out = 201, ...) ellipsoidPoints(object$cov, d2 = object$d2, loc= object$loc, n.half = n.out) cluster/R/diana.q0000644000176000001440000001103511705523412013422 0ustar ripleyusers### $Id: diana.q 6015 2012-01-18 11:03:38Z maechler $ diana <- function(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, ##_not_yet stop.at.k = FALSE, keep.diss = n < 100, keep.data = !diss, trace.lev = 0) { if((diss <- as.logical(diss))) { ## check type of input vector if(any(is.na(x))) stop(..msg$error["NAdiss"]) if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop(..msg$error["non.diss"]) attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } n <- as.integer(attr(x, "Size")) dv <- x[lower.to.upper.tri.inds(n)] ## prepare arguments for the Fortran call dv <- c(0., dv)# double jp <- 1L mdata <- FALSE ndyst <- 0 x2 <- double(1) } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x ndyst <- if(metric == "manhattan") 2 else 1 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat valmd <- rep(valmisdat, jp) } dv <- double(1 + (n * (n - 1))/2) } stopifnot(length(trace.lev <- as.integer(trace.lev)) == 1) ##_not_yet stopifnot(is.logical(stop.at.k) || ##_not_yet (is.numeric(stop.at.k) && 1 <= stop.at.k && stop.at.k <= n)) C.keep.diss <- keep.diss && !diss res <- .C(twins, n, jp, as.double(x2), dv, dis = double(if(C.keep.diss) length(dv) else 1), jdyss = if(C.keep.diss) diss + 10L else as.integer(diss), if(mdata)valmd else double(1), if(mdata) jtmd else integer(jp), as.integer(ndyst), 2L,# jalg = 2 <==> DIANA 0L, ##_not_yet as.integer(stop.at.k),# default: 0 do *not* stop early, integer(n), ner = integer(n), ban = double(n), dc = as.double(trace.lev),# in / out double(1), merge = matrix(0L, n - 1, 2), # integer DUP = FALSE) if(!diss) { ## give warning if some dissimilarities are missing. if(res$jdyss == -1) stop("No clustering performed, NA's in dissimilarity matrix.\n") if(keep.diss) { ## adapt Fortran output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv <- res$dis[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(n)] class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- dimnames(x)[[1]] } ## add labels to Fortran output if(length(dimnames(x)[[1]]) != 0) order.lab <- dimnames(x)[[1]][res$ner] } else { if(keep.diss) disv <- x ## add labels to Fortran output if(length(attr(x, "Labels")) != 0) order.lab <- attr(x, "Labels")[res$ner] } clustering <- list(order = res$ner, height = res$ban[-1], dc = res$dc, merge = res$merge, diss = if(keep.diss)disv, call = match.call()) if(exists("order.lab")) clustering$order.lab <- order.lab if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA clustering$data <- x2 } class(clustering) <- c("diana", "twins") clustering } print.diana <- function(x, ...) { cat("Merge:\n") print(x$merge, ...) cat("Order of objects:\n") print(if (length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Height:\n") print(x$height, ...) cat("Divisive coefficient:\n") print(x$dc, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } summary.diana <- function(object, ...) { class(object) <- "summary.diana" object } print.summary.diana <- function(x, ...) { cat("Merge:\n"); print(x$merge, ...) cat("Order of objects:\n") print(if(length(x$order.lab)) x$order.lab else x$order, quote = FALSE, ...) cat("Height:\n"); print(x$height, ...) cat("Divisive coefficient:\n"); print(x$dc, ...) if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } cluster/R/daisy.q0000644000176000001440000001651312014772531013467 0ustar ripleyusers daisy <- function(x, metric = c("euclidean", "manhattan", "gower"), stand = FALSE, type = list(), weights = rep.int(1, p)) { ## check type of input matrix if(length(dx <- dim(x)) != 2 || !(is.data.frame(x) || is.numeric(x))) stop("x is not a dataframe or a numeric matrix.") n <- dx[1]# nrow p <- dx[2]# ncol varnms <- dimnames(x)[[2]] pColl <- function(n) paste(n, collapse = ", ") if(length(type)) { if(!is.list(type) || is.null(ntyp <- names(type)) || any(ntyp == "")) stop("invalid ", sQuote("type"),"; must be named list") ## check each component to be valid column names or numbers: for(nt in ntyp) { cvec <- type[[nt]] if(is.character(cvec)) { if(!is.null(varnms) && !all(cvec %in% varnms)) stop("type$", nt, " has invalid column names") } else if(is.numeric(cvec)) { if(!all(1 <= cvec & cvec <= p)) stop("type$", nt, " must be in 1:ncol(x)") } else stop("type$", nt, " must contain column names or numbers") } tA <- type$asymm tS <- type$symm if(!is.null(tA) || !is.null(tS)) { ## tA and tS might be character and integer! d.bin <- cbind(as.data.frame(x[, tA, drop= FALSE]), x[, tS, drop= FALSE]) lenB <- sapply(lapply(d.bin, function(y) levels(as.factor(y))), length) if(any(lenB > 2)) stop("at least one binary variable has more than 2 levels.") if(any(lenB < 2)) warning("at least one binary variable has not 2 different levels.") ## Convert factors to integer, such that ("0","1") --> (0,1): if(any(is.f <- sapply(d.bin, is.factor))) d.bin[is.f] <- lapply(d.bin[is.f], function(f) as.integer(as.character(f))) if(!all(sapply(d.bin, function(y) is.logical(y) || all(sort(unique(as.numeric(y[!is.na(y)])))%in% 0:1)))) stop("at least one binary variable has values not in {0,1,NA}") } } ## transform variables and construct 'type' vector if(is.data.frame(x)) { type2 <- sapply(x, data.class) x <- data.matrix(x) } else { ## matrix type2 <- rep("numeric", p) names(type2) <- colnames(x) } if(length(type)) { tT <- type$ ordratio tL <- type$ logratio x[, names(type2[tT])] <- unclass(as.ordered(x[, names(type2[tT])])) x[, names(type2[tL])] <- log10( x[, names(type2[tL])]) type2[tA] <- "A" type2[tS] <- "S" type2[tT] <- "T" # was "O" (till 2000-12-14) accidentally ! } type2[tI <- type2 %in% c("numeric", "integer") ] <- "I" if(n > 9 && any(tI) && any(iBin <- apply(x[, tI, drop = FALSE], 2, function(v) length(table(v)) == 2))) warning("binary variable(s) ", pColl(which(tI)[iBin]), " treated as interval scaled") type2[type2 == "ordered"] <- "O" type2[type2 == "factor"] <- "N" if(any(ilog <- type2 == "logical")) { warning(sprintf(ngettext(sum(ilog), "setting 'logical' variable %s to type 'asymm'", "setting 'logical' variables %s to type 'asymm'"), pColl(which(ilog))), domain = NA) type2[ilog] <- "A" } ## Note: We have 2 status codings: ndyst = (0,1,2) and jdat = (1,2); ## the latter is superfluous in principle ## standardize, if necessary all.I <- all(type2 == "I") if(all.I && { metric <- match.arg(metric); metric != "gower" }) { if(stand) { x <- scale(x, center = TRUE, scale = FALSE) #-> 0-means sx <- colMeans(abs(x), na.rm = TRUE)# can still have NA's if(0 %in% sx) { warning(sQuote("x"), " has constant columns ", pColl(which(sx == 0)), "; these are standardized to 0") sx[sx == 0] <- 1 } x <- scale(x, center = FALSE, scale = sx) } jdat <- 2L ndyst <- if(metric == "manhattan") 2L else 1L } else { ## mixed case or explicit "gower" if(!missing(metric) && metric != "gower" && !all.I) warning("with mixed variables, metric \"gower\" is used automatically") ## FIXME: think of a robust alternative scaling to ## Gower's (x - min(x)) / (max(x) - min(x)) colR <- apply(x, 2, range, na.rm = TRUE) colmin <- colR[1,] sx <- colR[2,] - colmin if(any(sx == 0)) sx[sx == 0] <- 1 x <- scale(x, center = colmin, scale = sx) jdat <- 1L ndyst <- 0L ## weights only used in this "gower" case if(length(weights) == 1) weights <- rep.int(weights, p) else if(length(weights) != p) stop("'weights' must be of length p (or 1)") } ## type2 <- paste(type2, collapse = "") typeCodes <- c('A','S','N','O','I','T') ## 1 2 3 4 5 6 --> passed to Fortran below type3 <- match(type2, typeCodes)# integer if(any(ina <- is.na(type3))) stop("invalid type ", type2[ina], " for column numbers ", pColl(which(is.na))) if((mdata <- any(inax <- is.na(x)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x, na.rm=TRUE))) x[inax] <- valmisdat valmd <- rep(valmisdat, p) } ## call Fortran routine storage.mode(x) <- "double" disv <- .Fortran(cl_daisy, n, p, x, if(mdata)valmd else double(1), as.double(weights), if(mdata) jtmd else integer(1), jdat, type3, # vtype ndyst, as.integer(mdata), dis = double((n * (n - 1))/2), NAOK = TRUE,# only to allow "+- Inf" DUP = FALSE)$dis ## adapt Fortran output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv[disv == -1] <- NA full <- matrix(0, n, n) full[!lower.tri(full, diag = TRUE)] <- disv disv <- t(full)[lower.tri(full)] ## give warning if some dissimilarities are missimg if(any(is.na(disv))) attr(disv, "NA.message") <- "NA-values in the dissimilarity matrix !" ## construct S object -- "dist" methods are *there* ! class(disv) <- dissiCl # see ./0aaa.R attr(disv, "Labels") <- dimnames(x)[[1]] attr(disv, "Size") <- n attr(disv, "Metric") <- if(!ndyst) "mixed" else metric if(!ndyst) attr(disv, "Types") <- typeCodes[type3] disv } print.dissimilarity <- function(x, diag = NULL, upper = NULL, digits = getOption("digits"), justify = "none", right = TRUE, ...) { cat("Dissimilarities :\n") ##orig {Rousseeuw..}: print(as.vector(x), ...) stats:::print.dist(x, diag=diag, upper=upper, digits=digits, justify=justify, right=right, ...) ## cat("\n") if(!is.null(attr(x, "na.message"))) cat("Warning : ", attr(x, "NA.message"), "\n") cat("Metric : ", attr(x, "Metric"), if(!is.null(aT <- attr(x,"Types"))) paste("; Types =", paste(aT, collapse=", ")), "\n") cat("Number of objects : ", attr(x, "Size"), "\n", sep="") invisible(x) } summary.dissimilarity <- function(object, digits = max(3, getOption("digits") - 2), ...) ## 'digits': want a bit higher precision { sx <- summary(as.vector(object), digits = digits, ...) at <- attributes(object) r <- c(list(summ = sx, n = length(object)), at[names(at) != "class"]) class(r) <- "summary.dissimilarity" r } print.summary.dissimilarity <- function(x, ...) { cat(x$n, "dissimilarities, summarized :\n") print(x$summ, ...) cat("Metric : ", x $ Metric, if(!is.null(aT <- x $ Types)) paste("; Types =", paste(aT, collapse=", ")), "\n") cat("Number of objects : ", x $ Size, "\n", sep="") if(!is.null(x $ na.message)) cat("Warning : ", x $ NA.message, "\n") invisible(x) } cluster/R/coef.R0000644000176000001440000000253011674345261013234 0ustar ripleyusers#### R-interface to Agglomerative / Divisive coefficient #### coef.twins <- function(object, ...) { if(inherits(object, "agnes")) object$ac else if(inherits(object, "diana")) object$dc else stop("invalid 'twins' object") } coef.hclust <- function(object, ...) { ## Author: Martin Maechler, Date: 27 Nov 2004 ## Now "really" using $merge _and_ $height -- assuming they match! ht <- object$height mrg <- object$merge nh <- length(ht) stopifnot(nh > 0, is.matrix(mrg), dim(mrg) == c(nh,2), is.numeric(ht), is.numeric(mrg), !is.unsorted(ht))# then they match with merge ## stopifnot(all.equal(1:n, sort(-mrg[mrg < 0]))) 1 - sum(rowSums(mrg < 0) * ht) / max(ht) / (nh+1) } ## Note this is (the only!) direct interface to bncoef(), ## ---- which is used internally both in agnes() and diana() : coefHier <- function(object) { ## Purpose: Compute agglomerative *or* divisive coefficient from hclust/agnes/diana ## ---------------------------------------------------------------------- ## Author: Martin Maechler, Date: 27 Nov 2004 nh <- length(ht <- object$height) stopifnot(nh > 0, is.numeric(ht)) .C(R_bncoef, n = as.integer(nh + 1L), ban= as.double(c(0., ht)),# <-- is this really tbe ban[]nner, as in ../src/twins.c ? cf = double(1))$cf } cluster/R/clusGap.R0000644000176000001440000001376312033660155013720 0ustar ripleyusers#### Originally from orphaned package SLmisc #### (Version: 1.4.1, 2007-04-12, Maintainer: Matthias Kohl ) #### License: GPL (version 2 or later) #### #### which said #### "function corresponds to function gap in package SAGx" ## MM: SAGx is now in Bioconductor --- 1.10.1{devel} or 1.11.1{release} ## had gap() *corrected* to re-cluster using FUNcluster --> see ./gap-SAGx.R.~orig~ ## ## MM: Package 'lga' -- has gap() and lga and robust lga [-> UBC] ## - it uses boot() nicely [2012-01: ORPHANED because Justin Harrington is amiss] ## MM: renamed arguments, and changed almost everything clusGap <- function (x, FUNcluster, K.max, B = 100, verbose = interactive(), ...) { stopifnot(is.function(FUNcluster), length(dim(x)) == 2, K.max >= 2, (n <- nrow(x)) >= 1, (p <- ncol(x)) >= 1) if(B != (B. <- as.integer(B)) || (B <- B.) <= 0) stop("'B' has to be a positive integer") if(is.data.frame(x)) x <- as.matrix(x) ii <- seq_len(n) W.k <- function(X, kk) { clus <- if(kk > 1) FUNcluster(X, kk, ...)$cluster else rep.int(1L, nrow(X)) ## ---------- = = -------- kmeans() has 'cluster'; pam() 'clustering' 0.5* sum(vapply(split(ii, clus), function(I) { xs <- X[I,, drop=FALSE] sum(dist(xs)/nrow(xs)) }, 0.)) } logW <- E.logW <- SE.sim <- numeric(K.max) if(verbose) cat("Clustering k = 1,2,..., K.max (= ",K.max,"): .. ", sep='') for(k in 1:K.max) logW[k] <- log(W.k(x, k)) if(verbose) cat("done\n") ## Scale 'x' into "hypercube" -- we later fill with H0-generated data xs <- scale(x, center=TRUE, scale=FALSE) m.x <- rep(attr(xs,"scaled:center"), each = n)# for back transforming V.sx <- svd(xs)$v rng.x1 <- apply(xs %*% V.sx, # = transformed(x) 2, range) logWks <- matrix(0., B, K.max) if(verbose) cat("Bootstrapping, b = 1,2,..., B (= ", B, ") [one \".\" per sample]:\n", sep="") for (b in 1:B) { ## Generate "H0"-data as "parametric bootstrap sample" : z1 <- apply(rng.x1, 2, function(M, nn) runif(nn, min=M[1], max=M[2]), nn=n) z <- tcrossprod(z1, V.sx) + m.x # back transformed for(k in 1:K.max) { logWks[b,k] <- log(W.k(z, k)) } if(verbose) cat(".", if(b %% 50 == 0) paste(b,"\n")) } if(verbose && (B %% 50 != 0)) cat("",B,"\n") E.logW <- colMeans(logWks) SE.sim <- sqrt((1 + 1/B) * apply(logWks, 2, var)) structure(class = "clusGap", list(Tab = cbind(logW, E.logW, gap = E.logW - logW, SE.sim), ## K.max == nrow(T) n = n, B = B, FUNcluster=FUNcluster)) } ## lga/R/gap.R --- has for Tibshirani et al (2001): ## ElogWks[k,] <- c(mean(BootOutput), sqrt(var(BootOutput)*(1+1/B))) ## GAP[k] <- ElogWks[k,1] - logWks[k] ## if (k > 1) ## if(GAP[k-1] >= GAP[k]-ElogWks[k,2] & !doall) ## finished <- TRUE ## so they effectively only look for the *first* (local) maximum which .. ## MM: <==> diff(GAP) = GAP[k] - GAP[k-1] <= +SE.sim[k] ## criteria.DandF() -- Dudoit and Fridlyand (2002) ## ---------------- looks at the *global* maximum and then to the left.. ## y <- x$data ## crit <- diff(y[which.max(y[,"Gap"]), c("Sks", "Gap")]) ## nclust <- min(which(y[,"Gap"] > crit)) ## return(ifelse(nclust == nrow(y), NA, nclust)) maxSE <- function(f, SE.f, method = c("firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"), SE.factor = 1) { method <- match.arg(method) stopifnot((K <- length(f)) >= 1, K == length(SE.f), SE.f >= 0, SE.factor >= 0) fSE <- SE.factor * SE.f switch(method, "firstmax" = { ## the first local maximum (== firstSEmax with SE.factor == 0) decr <- (dg <- diff(f)) <= 0 # length K-1 if(any(decr)) which.max(decr) else K # the first TRUE, or K }, "globalmax" = { which.max(f) }, "Tibs2001SEmax" = { ## The one Tibshirani et al (2001) proposed: ## "the smallest k such that f(k) >= f(k+1) - s_{k+1}" g.s <- f - fSE if(any(mp <- f[-K] >= g.s[-1])) which.max(mp) else K }, "firstSEmax" = { ## M.Maechler(2012): rather .. ## look at the first *local* maximum and then to the left ..: decr <- (dg <- diff(f)) <= 0 # length K-1 nc <- if(any(decr)) which.max(decr) else K # the first TRUE, or K if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc])) which(mp)[1] else nc }, "globalSEmax" = { ## Dudoit and Fridlyand (2002) *thought* Tibshirani proposed.. ## in 'lga', see criteria.DandF(): ## looks at the *global* maximum and then to the left.. nc <- which.max(f) if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc])) which(mp)[1] else nc }) } print.clusGap <- function(x, method="firstSEmax", SE.factor = 1, ...) { method <- match.arg(method, choices = eval(formals(maxSE)$method)) stopifnot((K <- nrow(T <- x$Tab)) >= 1, SE.factor >= 0) cat("Clustering Gap statistic [\"clusGap\"].\n", sprintf("B=%d simulated reference sets, k = 1..%d\n",x$B, K), sep="") nc <- maxSE(f = T[,"gap"], SE.f = T[,"SE.sim"], method=method, SE.factor=SE.factor) cat(sprintf(" --> Number of clusters (method '%s'%s): %d\n", method, if(grepl("SE", method)) sprintf(", SE.factor=%g",SE.factor) else "", nc)) print(T, ...) invisible(x) } plot.clusGap <- function(x, type="b", xlab = "k", ylab = expression(Gap[k]), do.arrows=TRUE, arrowArgs = list(col="red3", length=1/16, angle=90, code=3), ...) { stopifnot(is.matrix(Tab <- x$Tab), is.numeric(Tab)) K <- nrow(Tab) k <- seq_len(K) # == 1,2,... k gap <- Tab[, "gap"] plot(k, gap, type=type, xlab=xlab, ylab=ylab, ...) if(do.arrows) do.call(arrows, c(list(k, gap+ Tab[, "SE.sim"], k, gap- Tab[, "SE.sim"]), arrowArgs)) invisible() } cluster/R/clara.q0000644000176000001440000001532712014772531013442 0ustar ripleyusers#### CLARA := Clustering LARge Applications #### #### Note that the algorithm is O(n), but O(ns^2) where ns == sampsize clara <- function(x, k, metric = "euclidean", stand = FALSE, samples = 5, sampsize = min(n, 40 + 2 * k), trace = 0, medoids.x = TRUE, keep.data = medoids.x, rngR = FALSE, pamLike = FALSE) { ## check type of input matrix and values of input numbers if(inherits(x, "dist"))# catch user error stop("'x' is a \"dist\" object, but should be a data matrix or frame") x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") n <- nrow(x) if((k <- as.integer(k)) < 1 || k > n - 1) stop("The number of cluster should be at least 1 and at most n-1." ) if((sampsize <- as.integer(sampsize)) < max(2,k+1)) stop(gettextf("'sampsize' should be at least %d = max(2, 1+ number of clusters)", max(2,k+1)), domain=NA) if(n < sampsize) stop(gettextf("'sampsize' = %d should not be larger than the number of objects, %d", sampsize, n), domain=NA) if((samples <- as.integer(samples)) < 1) stop("'samples' should be at least 1") jp <- ncol(x) namx <- dimnames(x)[[1]] ## standardize, if necessary {careful not to copy unnecessarily}: if(medoids.x) ## need to save original 'x' ox <- x else if(keep.data) stop("when 'medoids.x' is FALSE, 'keep.data' must be too") if(stand) x <- scale(x, scale = apply(x, 2, meanabsdev)) if(keep.data) data <- x ## put info about metric, size and NAs in arguments for the .C call if((mdata <- any(inax <- is.na(x)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x, na.rm=TRUE))) x[inax] <- valmisdat } else rm(inax) # save space doDUP <- nzchar(dup <- Sys.getenv("R_cluster_clara_dup")) && isTRUE(as.logical(dup)) if((trace <- as.integer(trace))) cat(sprintf("calling .C(cl_clara, ..., DUP = %s):\n", doDUP)) res <- .C(cl_clara, n, jp, k, clu = as.double(x), nran = samples, nsam = sampsize, dis = double(1 + (sampsize * (sampsize - 1))/2), mdata = as.integer(mdata), valmd = if(mdata) rep(valmisdat, jp) else -1., jtmd = if(mdata) jtmd else integer(1), ndyst = as.integer(if(metric == "manhattan") 2 else 1), as.logical(rngR[1]), as.logical(pamLike[1]), integer(sampsize),# = nrepr integer(sampsize),# = nsel sample= integer(sampsize),# = nbest integer(k), # = nr imed = integer(k), # = nrx double(k), # = radus double(k), # = ttd double(k), # = ratt avdis = double(k), # = ttbes maxdis = double(k), # = rdbes ratdis = double(k), # = rabes size = integer(k), # = mtt obj = double(1), avsil = double(k), ttsil = double(1), silinf = matrix(0, sampsize, 4), jstop = integer(1), trace = trace, tmp = double (3 * sampsize), itmp = integer(6 * sampsize), DUP = doDUP) ## give a warning when errors occured if(res$jstop) { if(mdata && any(aNA <- apply(inax,1, all))) { i <- which(aNA) nNA <- length(i) pasteC <- function(...) paste(..., collapse= ",") stop(ngettext(nNA, sprintf("Observation %d has *only* NAs --> omit it for clustering", i[1]), ## nNA > 1 : paste(if(nNA < 13) sprintf("Observations %s", pasteC(i)) else sprintf("%d observations (%s ...)", nNA, pasteC(i[1:12])), "\thave *only* NAs --> na.omit() them for clustering!", sep = "\n")), domain = NA) } ## else if(res$jstop == 1) stop("Each of the random samples contains objects between which\n", " no distance can be computed.") if(res$jstop == 2) stop("For each of the ", samples, " samples, at least one object was found which\n could not", " be assigned to a cluster (because of missing values).") ## else {cannot happen} stop("invalid 'jstop' from .C(cl_clara,.): ", res$jstop) } ## 'res$clu' is still large; cut down ASAP res$clu <- as.integer(res$clu[1:n]) sildim <- res$silinf[, 4] ## adapt C output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv <- res$dis[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(sampsize)] class(disv) <- dissiCl attr(disv, "Size") <- sampsize attr(disv, "Metric") <- metric attr(disv, "Labels") <- namx[res$sample] res$med <- if(medoids.x) ox[res$imed, , drop = FALSE] ## add labels to C output if(!is.null(namx)) { sildim <- namx[sildim] res$sample <- namx[res$sample] names(res$clu) <- namx } r <- list(sample = res$sample, medoids = res$med, i.med = res$imed, clustering = res$clu, objective = res$obj, clusinfo = cbind(size = res$size, "max_diss" = res$maxdis, "av_diss" = res$avdis, isolation = res$ratdis), diss = disv, call = match.call()) ## add dimnames to C output if(k > 1) { dimnames(res$silinf) <- list(sildim, c("cluster", "neighbor", "sil_width", "")) r$silinfo <- list(widths = res$silinf[, -4], clus.avg.widths = res$avsil, avg.width = res$ttsil) } if(keep.data) r$data <- data class(r) <- c("clara", "partition") r } print.clara <- function(x, ...) { cat("Call: ", deparse(x$call), "\nMedoids:\n"); print(x$medoids, ...) cat("Objective function:\t ", format(x$objective, ...),"\n", "Clustering vector: \t", sep=""); str(x$clustering, vec.len = 7) cat("Cluster sizes: \t", x$clusinfo[,1], "\nBest sample:\n"); print(x$sample, quote = FALSE, ...) cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } summary.clara <- function(object, ...) { class(object) <- "summary.clara" object } print.summary.clara <- function(x, ...) { cat("Object of class 'clara' from call:\n", deparse(x$call), "\nMedoids:\n"); print(x$medoids, ...) cat("Objective function:\t ", format(x$objective, ...), "\nNumerical information per cluster:\n") print(x$clusinfo, ...) if(has.sil <- !is.null(x$silinfo)) { cat("Average silhouette width per cluster:\n") print(x$silinfo[[2]], ...) cat("Average silhouette width of best sample:", format(x$silinfo[[3]], ...), "\n") } cat("\nBest sample:\n"); print(x$sample, quote = FALSE, ...) cat("Clustering vector:\n"); print(x$clustering, ...) if(has.sil) { cat("\nSilhouette plot information for best sample:\n") print(x$silinfo[[1]], ...) } if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } cluster/R/agnes.q0000644000176000001440000001304611705523412013447 0ustar ripleyusers#### $Id: agnes.q 6015 2012-01-18 11:03:38Z maechler $ agnes <- function(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, method = "average", par.method, keep.diss = n < 100, keep.data = !diss) { METHODS <- c("average", "single","complete", "ward","weighted", "flexible") ## hclust has more; 1 2 3 4 5 6 meth <- pmatch(method, METHODS) if(is.na(meth)) stop("invalid clustering method") if(meth == -1) stop("ambiguous clustering method") method <- METHODS[meth] if(method == "flexible") { ## Lance-Williams formula (but *constant* coefficients): par.method <- as.numeric(par.method) # or barf stopifnot((np <- length(par.method)) >= 1) if(np == 1)## default (a1= a, a2= a, b= 1-2a, c = 0) par.method <- c(par.method, par.method, 1-2*par.method, 0) else if(np == 3) par.method <- c(par.method, 0) else if(np != 4) stop("'par.method' must be of length 1, 3, or 4") attr(method,"par") <- par.method } else par.method <- double(1) if((diss <- as.logical(diss))) { ## check type of input vector if(any(is.na(x))) stop(..msg$error["NAdiss"]) if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop(..msg$error["non.diss"]) attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } n <- attr(x, "Size") dv <- x[lower.to.upper.tri.inds(n)] ## prepare arguments for the Fortran call dv <- c(0., dv)# "double", 1st elem. "only for Fortran" (?) jp <- 1 mdata <- FALSE ndyst <- 0 x2 <- double(1) } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x storage.mode(x2) <- "double" ndyst <- if(metric == "manhattan") 2 else 1 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat valmd <- rep(valmisdat, jp) } dv <- double(1 + (n * (n - 1))/2) } if(n <= 1) stop("need at least 2 objects to cluster") C.keep.diss <- keep.diss && !diss res <- .C(twins, as.integer(n), as.integer(jp), x2, dv, dis = double(if(C.keep.diss) length(dv) else 1), jdyss = if(C.keep.diss) diss + 10L else as.integer(diss), if(mdata) valmd else double(1), if(mdata) jtmd else integer(jp), as.integer(ndyst), 1L,# jalg = 1 <==> AGNES meth,# integer integer(n), ner = integer(n), ban = double(n), ac = as.double(0), ## as.double(trace.lev),# in / out par.method, merge = matrix(0L, n - 1, 2), # integer DUP = FALSE) if(!diss) { ##give warning if some dissimilarities are missing. if(res$jdyss == -1) stop("No clustering performed, NA-values in the dissimilarity matrix.\n" ) if(keep.diss) { ## adapt Fortran output to S: ## convert lower matrix,read by rows, to upper matrix, read by rows. disv <- res$dis[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(n)] class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- dimnames(x)[[1]] } ##add labels to Fortran output if(length(dimnames(x)[[1]]) != 0) order.lab <- dimnames(x)[[1]][res$ner] } else { if(keep.diss) disv <- x ##add labels to Fortran output if(length(attr(x, "Labels")) != 0) order.lab <- attr(x, "Labels")[res$ner] } clustering <- list(order = res$ner, height = res$ban[-1], ac = res$ac, merge = res$merge, diss = if(keep.diss)disv, call = match.call(), method = METHODS[meth]) if(exists("order.lab")) clustering$order.lab <- order.lab if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA clustering$data <- x2 } class(clustering) <- c("agnes", "twins") clustering } summary.agnes <- function(object, ...) { class(object) <- "summary.agnes" object } print.agnes <- function(x, ...) { cat("Call: ", deparse(x$call), "\nAgglomerative coefficient: ", format(x$ac, ...), "\nOrder of objects:\n") print(if(length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Height (summary):\n"); print(summary(x$height), ...) cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } print.summary.agnes <- function(x, ...) { ## a bit more than print.agnes() .. cat("Object of class 'agnes' from call:\n", deparse(x$call), "\nAgglomerative coefficient: ", format(x$ac, ...), "\nOrder of objects:\n") print(if(length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Merge:\n"); print(x$merge, ...) cat("Height:\n"); print(x$height, ...) if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } as.dendrogram.twins <- function(object, ...) ## ... : really only 'hang' as.dendrogram(as.hclust(object), ...) cluster/R/0aaa.R0000644000176000001440000000120512036113605013104 0ustar ripleyusers## Ensure consistent "diss.." class --- make "namespace-private-global ! dissiCl <- c("dissimilarity", "dist") ## consistent error / warning messages; could use for internationalization ..msg <- list(error = c(NAdiss = "NA-values in the dissimilarity matrix not allowed.", non.diss="x is not and cannot be converted to class dissimilarity" ), warn = c() ) ## Not exported, and only used because CRAN checks must be faster doExtras <- function() { interactive() || nzchar(Sys.getenv("R_CLUSTER_CHECK_EXTRA")) || identical("true", unname(Sys.getenv("R_PKG_CHECKING_doExtras"))) } cluster/PORTING0000644000176000001440000000503107302543047013033 0ustar ripleyusers* R/mona.q: Replace if(!is.matrix(x)) stop(message = "x is not a matrix.") by if(!is.matrix(x) && !is.data.frame(x)) stop("x must be a matrix or data frame.") Comment x2 <- paste(x2, collapse = "") and replace storage.mode(x2) <- "character" by storage.mode(x2) <- "integer" * R/plothier.q: Replace `pick <- 2' by `pick <- 3'. (Undo when plclust is available.) -- undone for version 1.3-2 Replace invisible(return(x)) by return(invisible(x)) * R/plotpart.q: Replace invisible(return(x)) by return(invisible(x)) Replace `pick <- 2' by `pick <- 4'. (Undo when clusplots really work.) In code for clusplot.default(), remove cmd() and replace x1 <- cmd(x, k = 2, eig = T, add = T) if(x1$ac < 0) x1 <- cmd(x, k = 2, eig = T) by x1 <- cmdscale(x, k = 2, eig = T) (Unfix when we have the `add' argument to cmdscale().) Replace `rep.int' by `rep'. Replace `text.default' by `text'. * R/zzz.R: Add .First.lib <- function(lib, pkg) { require(mva) library.dynam("cluster", pkg, lib) assign("plclust", .Alias(plot.hclust), pos = "package:cluster") } * src/mona.f: Replace CHARACTER KX(NN,JPP),NZF by INTEGER KX(NN,JPP),NZF Change all '0' to 0. Change all '1' to 1. * R/daisy.q: * src/daisy.f: (BDR) Rewrite to pass integers rather than C character strings to Fortran. ************************************************************************ The directory `man' contains R documentation sources converted via `Sd2Rd -x' from the S documentation sources. (In earlier versions, it was helpful to run `.CONV/FIXME.pl' before converting.) * man/fanny.Rd: Replace the displayed equation by \deqn{\sum_{v=1}^k \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^2 u_{jv}^2 d(i,j)}{ 2 \sum_{j=1}^n u_{jv}^2}}{ SUM_v (SUM_(i,j) u(i,v)^2 u(j,v)^2 d(i,j)) / (2 SUM_j u(j,v)^2)} All examples hand-edited! ************************************************************************ ============== Martin Maechler (many things are in Changelog!) =========== src/clara.f : ~~~~~~~~~~~ *) to lowercase only : tr A-Z a-z < clara.f.~1~ > clara.f.~2~ 1) to lowercase and change initial comments to 'c' (because of Emacs' indentation): tr A-Z a-z < clara.f.~1~ | sed '/^cc/s//c/'> clara.f.~2~ 2) Inside Emacs of clara.f.~2~ C-x C-w (write-file "clara.f.~3~") Repeat about 6 times M-C-q (fortran-indent-subprogram) M-C-e (end-of-fortran-subprogram) M-> (end-of-buffer) C-x C-o (delete-blank-lines) C-x h (mark-whole-buffer) M-x tabify (tabify (point-min) (point-max)) cluster/NAMESPACE0000644000176000001440000000404511713745006013211 0ustar ripleyusersuseDynLib(cluster, .registration=TRUE) ## S3 Generics: export(clusplot, pltree, silhouette, volume) ## Normal functions (non-generics, non-methods): ## The original constructors: export(agnes, clara, daisy, diana, fanny, mona, pam) ## and the rest export(bannerplot, ellipsoidhull, ellipsoidPoints, clusGap, maxSE, lower.to.upper.tri.inds, upper.to.lower.tri.inds, meanabsdev, sizeDiss, sortSilhouette) ## Methods also useful as 'standalone functions': export(predict.ellipsoid) export(coef.hclust, coefHier) ## things from (ex-)mva -- not really needed.. #importFrom(stats, as.hclust, as.dist, as.dendrogram, cutree) importFrom(stats, as.hclust, as.dendrogram) importFrom(graphics, plot) importFrom(grDevices, dev.interactive) ###---- Methods ---- all documented but not exported ## of own generics S3method(clusplot, default) S3method(clusplot, partition) S3method(pltree, twins) S3method(silhouette, default) S3method(silhouette, clara) S3method(silhouette, partition) S3method(volume, ellipsoid) ## register all the method for generics elsewhere in case namespace is ## loaded but not currently attached. S3method(as.dendrogram, twins) S3method(coef, hclust) S3method(coef, twins) S3method(plot, agnes) S3method(plot, diana) S3method(plot, mona) S3method(plot, partition) S3method(plot, silhouette) S3method(plot, clusGap) #exported: S3method(predict, ellipsoid) S3method(print, agnes) S3method(print, clara) S3method(print, diana) S3method(print, dissimilarity) S3method(print, ellipsoid) S3method(print, fanny) S3method(print, mona) S3method(print, pam) S3method(print, clusGap) S3method(print, summary.agnes) S3method(print, summary.clara) S3method(print, summary.diana) S3method(print, summary.dissimilarity) S3method(print, summary.fanny) S3method(print, summary.mona) S3method(print, summary.pam) S3method(print, summary.silhouette) S3method(summary, agnes) S3method(summary, clara) S3method(summary, diana) S3method(summary, dissimilarity) S3method(summary, fanny) S3method(summary, mona) S3method(summary, pam) S3method(summary, silhouette) cluster/INDEX0000644000176000001440000000526710366141463012573 0ustar ripleyusersagnes Agglomerative Nesting clara Clustering Large Applications daisy Dissimilarity Matrix Calculation diana DIvisive ANAlysis Clustering fanny Fuzzy Analysis Clustering mona MONothetic Analysis Clustering of Binary Variables pam Partitioning Around Medoids dissimilarity.object Dissimilarity Matrix Object partition.object Partitioning Object twins.object Hierarchical Clustering Object agnes.object Agglomerative Nesting (AGNES) Object clara.object Clustering Large Applications (CLARA) Object diana.object Divisive Analysis (DIANA) Object fanny.object Fuzzy Analysis (FANNY) Object mona.object Monothetic Analysis (MONA) Object pam.object Partitioning Around Medoids (PAM) Object sizeDiss Sample Size of Dissimilarity Like Object clusplot Cluster Plot - Generic Function clusplot.default Bivariate Cluster Plot (Clusplot) Default Method clusplot.partition Bivariate Clusplot of a Partitioning Object coef.hclust Agglomerative Coefficient for 'hclust' Objects pltree Clustering Trees - Generic Function pltree.twins Clustering Tree of a Hierarchical Clustering bannerplot Plot Banner (of Hierarchical Clustering) silhouette Compute or Extract Silhouette Information from Clustering ellipsoidhull Compute the Ellipsoid Hull or Spanning Ellipsoid of a Point Set predict.ellipsoid Predict Method for Ellipsoid Objects volume.ellipsoid Compute the Volume of Planar Object lower.to.upper.tri.inds Permute Indices for Triangular Matrices plot.agnes Plots of an Agglomerative Hierarchical Clustering plot.diana Plots of a Divisive Hierarchical Clustering plot.mona Banner of Monothetic Divisive Hierarchical Clusterings plot.partition Plot of a Partition of the Data Set print.dissimilarity Print and Summary Methods for Dissimilarity Objects print.agnes Print Method for AGNES Objects print.clara Print Method for CLARA Objects print.diana Print Method for DIANA Objects print.fanny Print Method for FANNY Objects print.mona Print Method for MONA Objects print.pam Print Method for PAM Objects summary.agnes Summary Method for 'agnes' Objects summary.clara Summary Method for 'clara' Objects summary.diana Summary Method for 'diana' Objects summary.fanny Summary Method for 'fanny' Objects summary.mona Summary Method for 'mona' Objects summary.pam Summary Method for PAM Objects cluster-internal Internal cluster functions DATASETS agriculture European Union Agricultural Workforces animals Attributes of Animals flower Flower Characteristics pluton Isotopic Composition Plutonium Batches ruspini Ruspini Data votes.repub Votes for Republican Candidate in Presidential Elections xclara Bivariate Data Set with 3 Clusters cluster/DESCRIPTION0000644000176000001440000000150412124341736013474 0ustar ripleyusersPackage: cluster Version: 1.14.4 Date: 2013-03-26 Priority: recommended Author: Martin Maechler, based on S original by Peter Rousseeuw , Anja.Struyf@uia.ua.ac.be and Mia.Hubert@uia.ua.ac.be, and initial R port by Kurt.Hornik@R-project.org Maintainer: Martin Maechler Title: Cluster Analysis Extended Rousseeuw et al. Description: Cluster Analysis, extended original from Peter Rousseeuw, Anja Struyf and Mia Hubert. Depends: R (>= 2.10.0), stats, graphics, utils Suggests: MASS SuggestsNote: two small examples using MASS' cov.rob() and mvrnorm() LazyLoad: yes LazyData: yes ByteCompile: yes BuildResaveData: no License: GPL (>= 2) Packaged: 2013-03-26 15:11:50 UTC; maechler NeedsCompilation: yes Repository: CRAN Date/Publication: 2013-03-26 16:50:54 cluster/ChangeLog0000644000176000001440000014350611713745006013552 0ustar ripleyusers2012-02-06 Martin Maechler * R/clusGap.R (maxSE): with 5 methods to compute "maximal gap", used also in print() method. 2012-01-21 Martin Maechler * R/clusGap.R (print.clusGap, plot.clusGap): added 2012-01-17 Martin Maechler * R/clusGap.R: new clusGap() for computing the cluster gap statistic. 2011-12-21 Martin Maechler * R/coef.R (coefHier): newly export coefHier(), interfacing to C's bncoef(). * tests/diana-boots.R (dianaBoot): add a "bootstrap", for considerably more tests. * tests/diana-ex.R: reorganized * src/twins.c: newly translated from Fortran, for more future flexibility * src/twins.f: no longer there * R/agnes.q (as.dendrogram.twins): define for 'twins' rather than just 'agnes'. 2011-12-03 Martin Maechler * R/plotpart.q (clusplot.default): xlab, and ylab arguments; new 's.x.2d' argument which allows to specify the plot setup in details. (clas.snijpunt): also check for NAs in x[1:2,m]. Suggestions originally from Peter Ruckdeschel 2011-10-16 Martin Maechler * po/: setup of for translation 2011-09-14 Martin Maechler * inst/CITATION: update according to state-of-the-art. 2011-08-29 Martin Maechler * DESCRIPTION (Version): 1.14.1, released to CRAN, 2011-10-16 * R/silhouette.R (plot.silhouette): col = had ordering bug. * man/silhouette.Rd: added example 2011-06-07 Martin Maechler * DESCRIPTION (Version): 1.14.0, released to CRAN * R/plotpart.q (clusplot.default): use message() and warning() appropriately instead of just cat(). Further, add a 'add = FALSE' argument which allows *adding* ellipses to an existing plot. 2011-06-06 Martin Maechler * R/plotpart.q (clusplot.default): new 'cex' and 'cex.txt', the latter for the "point" labels. 2011-05-20 Martin Maechler * src/pam.c (cl_pam): trace_lev: also trace the computation of dissimilarities (a very expensive part, for large n). * R/pam.q (pam): cluster.only=TRUE: if the *computed* dissimilarities have NA's the necessary error was not signalled. 2011-03-07 Martin Maechler * tests/*.Rout.save: update after R-devel's "scientific()" patch. * DESCRIPTION (Version): 1.13.4 2011-02-21 Martin Maechler * DESCRIPTION (Version): 1.13.3, released to CRAN 2010-12-12 Martin Maechler * R/ellipsoidhull.R, *: use "full argument names" 2010-11-09 Martin Maechler * R/clara.q (clara): enforce sampsize > k {as the help file always said}; allow to disable 'DUP=FALSE' via env variable. 2010-11-08 Martin Maechler * src/clara.c (cl_clara): inserting rand_k into nsel[]: simplified and avoiding (typically inconsequential) out_of_bound array access. 2010-10-21 Martin Maechler * DESCRIPTION (Version): 1.13.2 ; Enhances: MASS * src/cluster.h, src/daisy.f, src/mona.f: * src/init.c (FortEntries): s/cl_/cl/ for now, as some f77/g77 setups cannot deal with it correctly. 2010-06-24 Martin Maechler * DESCRIPTION (Version): 1.13.1 2010-06-23 Martin Maechler * man/daisy.Rd: add our *generalized* Gower formula (LaTeX), and update the detailled description accordingly. 2010-06-02 Martin Maechler * DESCRIPTION (Version): 1.13.0 * src/daisy.f (cl_daisy): renamed from daisy * src/init.c, src/*.[chf]: renamed others, so we can use * NAMESPACE (useDynLib): .registration = TRUE 2010-05-28 Martin Maechler * R/daisy.q (daisy): using variable weights (for gower). * src/daisy.f: ditto 2010-03-29 Martin Maechler * DESCRIPTION (Version): 1.12.3 * R/plothier.q (bannerplot): do not use formals(pretty) 2009-12-05 Martin Maechler * man/daisy.Rd: fix page number for Gower (1971) reference. 2009-10-05 Martin Maechler * DESCRIPTION (Version): 1.12.1 * man/ellipsoidhull.Rd: fix missing \link ((parts former 'graphics' have long been moved to 'grDevices'. 2009-05-13 Martin Maechler * DESCRIPTION (Version): 1.12.0 -- released to CRAN * R/silhouette.R (silhouette.default, print.summary.*, plot.*): silhouette(x, *) and its methods now also work when x uses integer codes different than 1:k. Previously, this could seg.fault. * tests/silhouette-default.R: test it. 2009-03-12 Martin Maechler * man/daisy.Rd: fix typo/thinko (thanks to Felix Mueller). 2009-01-21 Martin Maechler * R/agnes.q (as.dendrogram.agnes): new utility * NAMESPACE: exported 2009-01-17 Martin Maechler * man/plot.agnes.Rd: link to pltree.twins() directly * R/agnes.q (print.summary.agnes): output 'agnes' instead of `agnes' 2009-01-07 Martin Maechler * DESCRIPTION (Version, Date): 1.11.12 and CRAN-released 2009-01-05 Martin Maechler * man/ellipsoidhull.Rd: fix for more stringent Rd parsing 2008-09-16 Martin Maechler * R/ellipsoidhull.R (print.ellipsoid): and others: replace backquotes ( ` ) by regular quotes. 2008-06-15 Martin Maechler * R/silhouette.R (sortSilhouette): do *not* overwrite the correctly reordered rownames: Fixes very long standing bug which lead to wrong observation labels in silhouette plots. 2008-04-10 Martin Maechler * R/zzz.R (.onUnload): add ..dynam.unload()ing 2008-02-29 Martin Maechler * DESCRIPTION (Date, Version): 1.11.10 and release 2007-12-19 Martin Maechler * src/pam.c (cstat, dark): move variable declarations to local context. (bswap): add a few R_CheckUserInterrupt() calls * R/pam.q (pam): new argument 'do.swap = TRUE'. Setting it to FALSE allows to skip the more expensive "SWAP" phase. * man/pam.Rd: (ditto) 2007-12-18 Martin Maechler * src/ind_2.h (ind_2): for m := max(i,j) > 46342, (m-2)*(m-1) give integer overflow which leads to a seg.fault. Now fixed. * src/pam.c (bswap): slightly improve trace output. * tests/pam.R, tests/pam.Rout.save: corresponding slight adaptions. * man/pam.Rd: "large" and * man/clara.Rd: "small" datasets; pam() works with thousands ... * R/clara.q (clara): error out when users try clara(, ...). 2007-09-28 Martin Maechler * DESCRIPTION (Date, Version): 1.11.9 * man/pam.Rd: in invisible example, do not use partially matched [[. 2007-09-04 Martin Maechler * src/mona.f (mona): dummy initializations for "-Wall" * src/fanny.c (fygur): dummy init 'lang' 2007-08-23 Martin Maechler * src/daisy.f et al: de-tabify the fortran sources 2007-06-05 Martin Maechler * DESCRIPTION (Version): 1.11.7 (mainly for R-devel) 2007-06-02 Martin Maechler * man/plot.diana.Rd, etc: s/dendrogramm/dendrogram/ 2007-05-31 Martin Maechler * man/plantTraits.Rd: add \encoding{latin1} 2007-04-09 Martin Maechler * DESCRIPTION (Version): 1.11.6 - needed for R version >= 2.6.0. * R/daisy.q (daisy): for 2-valued variables only warn about non-specified type when n >= 10. 2007-04-07 Martin Maechler * R/diana.q (diana): don't use as.double(.) and DUP=FALSE ! 2007-03-30 Martin Maechler * man/diana.Rd: merge man/diana.object.Rd into *one* help file * DESCRIPTION (Version): 1.11.5 - for R 2.5.0 2007-03-29 Martin Maechler * R/silhouette.R (silhouette.default): use x$clustering only when x is a list. 2006-12-11 Martin Maechler * DESCRIPTION (Version): 1.11.4 -- ready * tests/to-25x, tests/to-24x: different outputs for R 2.5.0 (unstable) * tests/clara.Rout.save-R25x,..: and R-2.4.x * tests/silhouette-default.R: use [as.integer(rownames(.)), ] since we cannot guarantee rownames there. 2006-12-05 Martin Maechler * src/fanny.c (fuzzy): 14 instead of 15 sign.digits for trace_lev >= 2 printing. * R/clara.q (stop): message for NA-observations: only show first dozen of observation numbers. * tests/clara-NAs.R: test the above * tests/clara-NAs.Rout.save: ditto 2006-12-02 Martin Maechler * DESCRIPTION (Version): 1.11.3 -- released to CRAN * R/daisy.q (daisy): finish metric "gower", and test an example 2006-12-01 Martin Maechler * man/daisy.Rd: more content about Gower; thanks to Gavin Simpson 2006-11-23 Martin Maechler * R/daisy.q (daisy): metric = "gower" -- not yet finished 2006-09-07 Martin Maechler * DESCRIPTION (Version): 1.11.2 released * src/init.c: register dysta3 as C, not Fortran; drop trailing "_" ==> need to adapt these: * src/fanny.c: * src/cluster.h: * tests/dysta-ex.R: * tests/dysta-ex.Rout.save: 2006-08-24 Martin Maechler * DESCRIPTION (Version): 1.11.1 released to CRAN * R/silhouette.R (sortSilhouette): make sure "iOrd" is there also if "Ordered" is TRUE. (fixes plotting w/ col = 'clusterwise'.) 2006-08-23 Martin Maechler * tests/mona.R: update (slightly extend) regression test * tests/mona.Rout.save: consequence * R/mona.q (mona): replace apply(, 2, factor) with safer code, to work correctly in R 2.4.0 and later (thanks to Brian). 2006-06-12 Martin Maechler * R/plotpart.q (clusplot.default): new argument 'sub'; default is unchanged, but you can suppress the subtitle. 2006-05-17 Martin Maechler * src/fanny.c (fygur): a little more cleanup -- for compatibility with dark() in src/pam.c * DESCRIPTION (Version): 1.11.0, to be released to CRAN 2006-05-03 Martin Maechler * DESCRIPTION (Depends): require R >= 2.2.1 -- which should have happened earlier, since registration of Fortran symbols failed before. 2006-04-18 Martin Maechler * man/plantTraits.Rd: new dataset with many variables, including all kinds of factors, provided by Jeanne Vallet. * data/plantTraits.rda: useful for daisy() illustrations 2006-04-12 Martin Maechler * R/fanny.q (fanny): several new options: metric = "SqEuclidean", iniMem.p: initial membership matrix can be specified; cluster.only, keep.diss, keep.data: for saving memory/speed. warning when k.crisp < k. printing of $membership is now in rounded percent * man/fanny.Rd: document the above * tests/fanny-ex.R: much extended 2006-04-11 Martin Maechler * man/chorSub.Rd: new dataset; * data/chorSub.Rd: useful for fanny() illustrations * R/fanny.q (fanny): Dunn's partition coefficient (and its normalization) now always use sum.. u.. ^ 2 (and not "^ memb.exp"). option 'trace.lev' for iteration monitoring * src/fanny.c (fuzzy): (dito) 2006-04-08 Martin Maechler * src/fanny.c: Translated from Fortran (fanny.f); potentially with more options, etc. 2006-03-20 Martin Maechler * DESCRIPTION (Version): 1.10.5 for CRAN * .rsync-exclude: new file useful since I moved my RCS archive to SVN (using 'cvs2svn' and a perl script on 'svn-stat'). 2006-02-27 Martin Maechler * src/sildist.c (sildist): 2nd round (by Romain and me) 2006-02-25 Martin Maechler * R/silhouette.R (silhouette.default): new C version in file * src/sildist.c (sildist): provided by Romain Francois 2006-02-04 Martin Maechler * R/ellipsoidhull.R (print.ellipsoid): also work without 'conv' slot * man/ellipsoidhull.Rd: finally explain 'd2'; and \link to other ellipse implementations. * man/volume.ellipsoid.Rd: + example of 'ellipsoid' construction 2006-01-31 Martin Maechler * man/bannerplot.Rd: (et al.): use \dQuote{} and \sQuote{} 2006-01-27 Martin Maechler * man/pltree.twins.Rd: explain the as.hclust() dispatch; add example, particularly for plot(as.dendrogram(.)). 2006-01-26 Martin Maechler * DESCRIPTION (Version): 1.10.4 -- released to CRAN 2006-01-25 Martin Maechler * R/silhouette.R (silhouette.clara): added (for "full = TRUE" option) 2006-01-07 Martin Maechler * DESCRIPTION (Version): 1.10.3 * src/init.c: registering all (R-callable) C and Fortran routines * src/cluster.h: declare the remaining callable routines 2005-12-30 Martin Maechler * inst/CITATION: first version * R/silhouette.R (plot.silhouette): 'nmax.lab' was not obeyed properly (labels were drawn for large n when the silhouette had rownames). * man/silhouette.Rd: explain clara's silhouette and "full" one. 2005-09-06 Martin Maechler * man/pluton.Rd: \source: new URL @ www.agoras.ua.ac.be * man/clusplot.default.Rd: dito * man/ellipsoidhull.Rd: "" * README: "" 2005-08-31 Martin Maechler * DESCRIPTION (Version): 1.10.2 released to CRAN * src/clara.c (bswap2): minor cosmetic 2005-08-30 Martin Maechler * R/daisy.q (daisy): the case of a *binary*-only matrix (not data frame), used to fail, now works fine, tested in * tests/daisy-ex.R: * src/twins.f: get rid of gfortran warnings * src/mona.f: dito * src/fanny.f: " * src/meet.f: " 2005-08-06 Martin Maechler * tests/clara*: adapt to slightly changed output * src/clara.c (clara), (bswap2): better trace_lev printing; code more in line with 'pam'. 2005-08-05 Martin Maechler * DESCRIPTION (Version): 1.10.2 {not released} * R/clara.q (clara): no longer support deprecated 'keepdata'; also return 'i.med'; new argument 'medoids.x' allowing to save even more memory; do not assign 'data' when keep.data=FALSE. * man/clara.Rd: + 'i.med' + 'medoids.x' - 'keepdata' * man/clara.object.Rd: dito 2005-07-19 Martin Maechler * tests/silhouette-default.R: if(FALSE) library(*, lib="..MM..") 2005-06-30 Martin Maechler * tests/silhouette-default.R (isTRUE): need for R < 2.1.x 2005-06-29 Martin Maechler * DESCRIPTION (Date): * DESCRIPTION (Version): 1.10.1 * R/silhouette.R (plot.silhouette): format() clus.avg.widths more nicely * R/silhouette.R (silhouette.default): don't use max.col() which returns random. * tests/silhouette-default.R: test only 3rd column difference * tests/silhouette-default.Rout: update {for non-random sil*.default()} 2005-06-20 Martin Maechler * tests/silhouette-default.R: added test for equivalence of silhouette.default() and silhouette.partition() * tests/silhouette-default.Rout.save: new output --> The silhouette() results are *still* not always the same (-> fixed) * R/silhouette.R (silhouette.default): if Nj := #{obs. in C_j} = 1 the silhouette width is 0, not 1, as in pam's original code. * man/silhouette.Rd: document the case of 1-obs-clusters. 2005-06-09 Martin Maechler * DESCRIPTION (Version): 1.10.0 2005-06-08 Martin Maechler * R/pam.q (pam): new argument 'trace.lev = 0' * man/pam.object.Rd: pam() also return $medID * src/pam.c (dark): take special care of empty clusters (ntt == 0) * src/pam.c (pam): also work when medoids are not increasing. * R/pam.q (pam): user-supplied 'medoids': even more checks 2005-06-02 Martin Maechler * tests/fanny-ex.R: use new RNG (hence change example); add samples of memb.exp -> 1 * DESCRIPTION (Author): new formulation for better auto-citation() * src/fanny.f (fanny): 'r' ("membership exponent") becomes user-specifiable argument; dito for 'maxit' * R/fanny.q (fanny): new arguments 'memb.exp' ("membership exponent"); 'tol' and 'maxit' * R/fanny.q (.print.fanny): utility to be used in both print.*() methods; nicer, using formatC() and 'digits' explicitly. * man/fanny.Rd: new arguments, see above * man/fanny.object.Rd: mention new '$ objective' result component * man/print.fanny.Rd: now also for [print.]summary.fanny(); mention new explicit 'digits' argument to print methods. 2005-04-04 Martin Maechler * DESCRIPTION (Version): 1.9.8 * R/daisy.q (print.dissimilarity): rewritten, now based on print.hclust(). * R/zzz.R (gettextf): ..etc: define substitutes such as to allow running in R < 2.1.0 * man/ellipsoidhull.Rd: fixed outdated "link[pkg]" * man/clusplot.default.Rd: dito * man/*.Rd : etc 2005-03-22 Martin Maechler * R/clara.q (clara): start to 'rationalize' messages for future translation. * src/cluster.h: define the _(String) macro (for i18n), not yet used. 2005-01-21 Martin Maechler * man/clara.Rd: get rid of iso-latin1 character 2004-12-14 Martin Maechler * man/agnes.Rd: concept{UPGMA ..} 2004-11-27 Martin Maechler * DESCRIPTION (Version): 1.9.7 (not yet CRAN) * R/plothier.q (bannerplot): ouse default 'main' and 'sub' * man/bannerplot.Rd: + example * R/coef.R (coef.hclust): Implement R function for computing the agglomerative/divisive coefficient. * man/coef.hclust.Rd: new; including example * man/predict.ellipsoid.Rd: wrong link 2004-11-26 Martin Maechler * src/twins.f (bncoef): drop unused argument 'ner' 2004-11-20 Martin Maechler * man/clusplot.default.Rd: fix typo 2004-10-28 Martin Maechler * man/predict.ellipsoid.Rd: add nice example 2004-08-19 Martin Maechler * DESCRIPTION (LazyData): yes 2004-08-12 Martin Maechler * DESCRIPTION (Version): 1.9.6 (Depends): added standard packages * R/daisy.q (daisy): scale=TRUE didn't collaborate with NAs (since 2003-11-29). * tests/daisy-ex.R: add an example, testing the above 2004-08-03 Martin Maechler * DESCRIPTION (Version): 1.9.5 --> to CRAN 2004-07-21 Martin Maechler * man/mona.Rd: drop an extraneous "}\n" that Kurt's new tools found * man/twins.object.Rd: dito 2004-07-06 Martin Maechler * DESCRIPTION (Version): 1.9.5 (not yet released) * man/pam.Rd: note on 'medoids' 2004-07-02 Martin Maechler * R/pam.q: new argument 'medoids' * man/pam.Rd: ex.with 'medoids' * src/pam.c (bswap): new argument 'med_given' 2004-06-29 Martin Maechler * src/pam.c (bswap): moved ammax/nmax computation into same loop as beter[]. added many comments. GOAL: Allow initial medoids user-specification. >>> TODO: do the analogous thing in src/clara.c 's bswap2() 2004-06-24 Martin Maechler * DESCRIPTION (Version): 1.9.4 -- fix dysta() using jtmd[] problems (thanks to valgrind and BDR). * R/agnes.q (agnes): initialize 'jtmd' to length p in any case * R/diana.q (diana): ditto * R/fanny.q (fanny): " * R/pam.q (pam): " * R/daisy.q (daisy): pass 'mdata' to Fortran * src/daisy.f (daisy): new "boolean" argument 'mdata' * src/clara.c (clara): pass has_NA to dysta2() and don't use jtmd[] and valmd[] if(!has_NA) * src/cluster.h (dysta2): new arg 'has_NA' 2004-06-18 Martin Maechler * DESCRIPTION (Version): 1.9.3 {only change = below} * src/ind_2.h (ind_2): use __inline__ only for gcc (__GNUC__) 2004-06-11 Martin Maechler * man/clara.Rd: finish \note{} about 'rngR' (thanks to Christian H). 2004-06-01 Martin Maechler * src/ind_2.h: new file for __inline__ definition of ind_2(), #included from these two files: * src/clara.c: * src/pam.c: * tests/agnes-ex.R: test for "weighted" == "flexible"(0.5) 2004-05-28 Martin Maechler * DESCRIPTION (Version): 1.9.2 (not yet released) * R/agnes.q (agnes): New method = "flexible" (and 'par.method = ..') * src/twins.f (averl): implementing "flexible" Lance-Williams formula. * man/agnes.Rd: 2004-05-25 Martin Maechler * R/silhouette.R (silhouette.default): give silhouette 0 (not NaN) when a_i = b_i {thanks to example and suggestions by Witek Wolsky}. 2004-04-26 Martin Maechler * src/pam.c: cosmetic efficiency improvement from Li Long 2004-03-11 Martin Maechler * DESCRIPTION (Version): 1.8.1 for R versions <= 1.8.1 1.9.1 for R versions >= 1.9.0 -- released to CRAN * src/clara.c (clara): fixed second NA-handling bug: the test should be "nunfs+1 != jran" Fixed "rngR = TRUE" bug: accidentally still used internal RNG. Fixed 3rd bug if (lrg_sam) : need jk < n_sam for nsel[jk]. More & nicer printing for 'trace > 0'. * R/clara.q (clara): more useful error msg for too many NAs (jstop=1) 2004-03-10 Martin Maechler * src/clara.c (clara): finally fixed longstanding bug which could cause wrong results and segmentation faults, instead of ending with the following error: * tests/clara-NAs.R: new file w/ former seg.fault * src/cluster.h (ind_2): new function instead of F77_CALL(meet) * src/clara.c (clara): defined here (and used). * src/pam.c (pam): used here 2004-03-09 Martin Maechler * R/clara.q (clara): sampsize = min(n, ) 2004-03-08 Martin Maechler * tests/agnes-ex.R: do not use object.size(), since * tests/diana-ex.R: it's different on 64 bit, e.g. * R/agnes.q (agnes): \ when diss = TRUE, * R/diana.q (diana): \ but 'x' is not a "dissimilarity", * R/fanny.q (fanny): / try as.dist(x) * R/pam.q (pam): / and be consistent in all 4 functions 2004-01-23 Martin Maechler * R/silhouette.R (silhouette.default): do not require cluster codes in 1:k (Murad Nayal ). * man/silhouette.Rd: accordingly; mention 2 <= k <= n-1 2003-12-22 Martin Maechler * DESCRIPTION (Version): 1.8.0 for R versions <= 1.8.1 1.9.0 for R versions >= 1.9.0 2003-12-19 Martin Maechler * NAMESPACE: Finally make a namespaced package 2003-12-01 Martin Maechler * R/pam.q (print.summary.pam): readable indication *no* L/L* clusters. (print.pam): improve as well. * man/pam.Rd: documenting `cluster.only' 2003-11-29 Martin Maechler * R/daisy.q (daisy): better checking & handling if(stand) * R/pam.q (pam): new argument `cluster.only = FALSE'. If true, only return the clustering vector, no other info. * src/pam.c: translated from src/pam.f and "cleaned"; `all_stats' := NOT(cluster_only). * src/cluster.h: new pam.c related headers * src/dysta.f (dysta): new file: part of former src/pam.f * R/pam.q (pam): use .C 2003-11-28 Martin Maechler * R/daisy.q (daisy): fix the new scale 0 code (from 11-17) 2003-11-17 Martin Maechler * DESCRIPTION (Version): 1.7.7 -- (not released) * tests/daisy-ex.R: new test for this: * R/daisy.q (daisy): scale 0 does not give NaN dissimilarities anymore. Use pColl() in messages * R/zzz.R (sQuote): & dQuote() for R version < 1.8 2003-10-25 Martin Maechler * man/fanny.Rd: typo in the text-part of \deqn{} 2003-09-24 Martin Maechler * DESCRIPTION (Date): Sept.24: 20th anniversary w/ Lilo! -- release * man/plot.mona.Rd: yet another codoc 2003-09-23 Martin Maechler * DESCRIPTION (Version): 1.7.6 (for R 1.8.0) * man/daisy.Rd: codoc difference for argument *defaults* * man/ellipsoidhull.Rd: " " " " * man/plot.mona.Rd: " " " " * man/plot.diana.Rd: " " " " use \method{..}{..} (more!) also for these: * man/predict.ellipsoid.Rd * man/silhouette.Rd: * man/plot.agnes.Rd: * man/plot.partition.Rd: * man/ellipsoidhull.Rd: 2003-09-03 Martin Maechler * DESCRIPTION (Date): -> release 2003-08-27 Martin Maechler * man/flower.Rd: use V1-V8 in doc (since can't change the data). 2003-08-19 Martin Maechler * DESCRIPTION (Version): 1.7.5 -- not yet released 2003-08-13 Martin Maechler __ All these are thanks to Luke Tierney's checkUsagePackage() !! __ * R/silhouette.R (plot.silhouette): `clw' (sortSilhouette): `clid' & `k' (summary.silhouette): `n' * R/ellipsoidhull.R (ellipsoidhull): `ina' unneeded * R/plotpart.q (clusplot.default): extraneous if() in "funny case" 2003-07-18 Martin Maechler * DESCRIPTION (Date): updated --> release to CRAN * R/daisy.q (daisy): checking "> 2 levels" for binary vars (gave wrong error when only 1 level; thanks to Duncan.Mackay@flinders.edu.au). Now allow "only 1 level" for binary vars 2003-07-10 Martin Maechler * DESCRIPTION (Date): update; not released, but put up on FTP * R/silhouette.R (sortSilhouette): keep ordering; use it to * R/silhouette.R (plot.silhouette): order colors, allow cluster colors 2003-07-09 Martin Maechler * DESCRIPTION (Version): 1.7.4 -- not yet released * R/daisy.q (daisy): better error message for invalid type components; now also works for * man/daisy.Rd: new example `dfl3' 2003-06-10 Martin Maechler * DESCRIPTION (Version): 1.7.3 * R/silhouette.R (silhouette.default): fix bugs for case "Nj == 1" * tests/silhouette-default.Rout.save: * tests/silhouette-default.R: new test for the above 2003-06-04 Martin Maechler * tests/clara-ex.R and man/clara.Rd: add "drop = FALSE" 2003-06-03 Martin Maechler * man/clara.Rd: the "simulation" example is now correct for any seed. * tests/clara-ex.R: using "correct" ex. above 2003-06-02 Martin Maechler * R/daisy.q (daisy): s/codes/unclass/ * tests/ellipsoid-ex.R: no need for try() anymore; use new RNG * tests/clara.R: * tests/clara-ex.R: better if(...) RNGdefault("1.6") * tests/fanny-ex.R: * tests/mona.R: 2003-05-28 Martin Maechler * DESCRIPTION (Version): 1.7.2 * R/plotpart.q: clusplot.partition(): try to find `x$diss' by looking up x$call. 2003-04-30 Martin Maechler * DESCRIPTION (Version): 1.7.1 * man/pam.object.Rd: add example about assessing #{clusters} via silhouette widths from Ch.Hennig. * R/plotpart.q (plot.partition): new argument `dist' It doesn't try a clusplot if `keep.diss' was FALSE and no `dist' is specified. * R/plotpart.q (clusplot.partition): new `dist' argument * R/pam.q (pam): keep.diss passed wrong `jdyss' to Fortran! 2003-04-08 Martin Maechler * R/plothier.q (pltree.twins): simplified label construction; call plot( ) instead of plot.hclust(). 2003-03-26 Martin Maechler * tests/clara-ex.R: new, because of unclear non-reproducibility * tests/clara.R: not example(clara) and smaller, moved some to above * DESCRIPTION (Version): 1.7.0 (for R 1.7.0) * DESCRIPTION (Depends): at least R 1.4 * R/zzz.R: requiring 1.4 needs much less * R/zzz.R: define colSums() substitute for old R versions * tests/ : updated *.Rout.save files * .Rbuildignore: R/README-Splus is way outdated and irrelevant 2003-03-17 Martin Maechler * R/0aaa.R: Make sure assigning class c("dissimilarity", "dist") (had many cases where only "dissim.." was used!). * R/*.q: assign (dissim.., dist) class 2003-03-11 Martin Maechler * R/clara.q (clara): new argument `rngR' allowing to use R's RNG instead of the primitive builtin randm(). * man/clara.Rd: example showing its use. * src/clara.c (clara): * R/pam.q (pam): new `keep.data' and DUP=FALSE in .Fortran 2003-03-05 Martin Maechler * R/agnes.q (agnes): use DUP = FALSE * R/diana.q (diana): for both "twins()" routines --> 3x less memory! 2003-02-22 Martin Maechler * R/clara.q (clara): do *not* transpose the x[,] matrix anymore * src/clara.c (clara): C code accesses un-transposed x[,] \ --> + 10% speed 2003-02-07 Martin Maechler * R/silhouette.R (silhouette.default): fixed k=2 case; * man/silhouette.Rd: new argument `dmatrix'. 2003-02-06 Martin Maechler * man/plot.partition.Rd: new `main' argument (instead of "...") * R/plotpart.q: passed to plot.silhouette(). 2003-01-27 Martin Maechler * R/agnes.q (agnes): store `method' with object for as.hclust.twins()! \ new logical args `keep.diss' and `keep.data' * R/diana.q (diana): > if (!keep.diss), do not need "dis2" * src/twins.f (twins):/ and don't copy the dist()s here. * man/pltree.twins.Rd: mention new `ylab' argument. * R/plothier.q: __unfinished__ : xlab/ylab depend on "which.plot"! 2002-12-30 Martin Maechler * man/bannerplot.Rd: and * R/plothier.q (bannerplot): new `labels' e.g. passed from plot.agnes(). * man/plot.agnes.Rd: finally added `labels = ' example * tests/clara.R: for() loops were silly; + other ex + comments * src/clara.c: cosmetic * src/fanny.f: and * src/pam.f: cosmetic "syncing" of almost identical parts 2002-12-28 Martin Maechler * tests/clara.Rout.save: * tests/clara.R: add ruspini examples * src/clara.c: finally found & fixed "clara(ruspini, 4)" problem 2002-12-21 Martin Maechler * DESCRIPTION (Version): 1.6-4 {for R 1.6.2} * R/diana.q (diana): since 1.6-1, integer x was not coerced * tests/agnes-ex.R, * tests/diana-ex.R, and *.Rout.save: new test files * src/twins.f: some comments added; gotos removed banag() and bandy() were identical --> renamed to bncoef() 2002-12-17 Martin Maechler * R/agnes.q: agnes(*, method = "no") no longer segfaults * R/silhouette.R (plot.silhouette): adj=1.04 (not "1.05") * R/plothier.q (pltree.twins): new `labels' arg. such that it can be given to plot.agnes() e.g. 2002-12-05 Martin Maechler * DESCRIPTION (Date, Version): 1.6-3 * src/cluster.h: F77_NAME(meet) * src/clara.c: F77_CALL(meet) 2002-10-28 Martin Maechler * src/pam.f (pam): comments, (bswap): variable renaming 2002-10-26 Martin Maechler * DESCRIPTION (Version): 1.6-003 := pre-1.6-3 2002-10-23 Martin Maechler * R/agnes.q (print.summary.agnes): oops: had `clara' there! 2002-10-23 Martin Maechler * Version 1.6-2 ==> released to CRAN 2002-10-21 Martin Maechler * R/daisy.q (daisy): allow "+- Inf" in `x' (gave error from Fortran) 2002-10-19 Martin Maechler * R/silhouette.R (plot.silhouette): also use border = 0 and new default col = "gray". * R/plothier.q (bannerplot): arguments inside = . , border = 0 work with R 1.6.1's barplot() {give "not yet .." otherwise}. * tests/mona.R and mona.Rout.save: new tests * R/mona.q (mona): get rid of 1:nchar() warnings; other cosmetic * src/mona.f (mona): variable renaming; logical cleanup in NA dealing * R/zzz.R (stop): multi-argument version for R versions < 1.6 2002-10-18 Martin Maechler * R CMD check now okay. * man/plot.mona.Rd: update (for bannerplot() usage), same for * man/plot.agnes.Rd and * man/plot.diana.Rd. * R/plothier.q (bannerplot): finally working for all three; argument `xax.pretty = TRUE' (set to FALSE for old behavior). (plot.mona): using bannerplot(); {and is.na(.) <- .} 2002-10-16 Martin Maechler * R/plothier.q (bannerplot): newly standalone function, called from plot.diana(), plot.agnes() -- not yet plot.mona() 2002-10-15 Martin Maechler * DESCRIPTION (Version): 1.6-2 (not yet released) * R/plothier.q (plot.diana, bannerplot): rev(.) x-axis labels --> fixing bug introduced at/before version 1.2-5 * R/plothier.q (plot.agnes, bannerplot) and plot.diana: add "las = 2" to vertical axis(), and add space to right for plot.diana. 2002-09-12 Martin Maechler * DESCRIPTION (Date): -- released locally only 2002-09-11 Martin Maechler * R/clara.q (clara): for the case of k = 1 (and diss=FALSE), * R/pam.q (pam): medoids should stay matrix * src/pam.f (pam): put under RCS; replaced goto by if..else.. * man/pam.Rd: mention silhouette() directly * man/silhouette.Rd: document summary(); + 1ex 2002-09-09 Martin Maechler * DESCRIPTION (Version): 1.6-1 ==> for CRAN and R 1.6.0 2002-09-07 Martin Maechler * R/silhouette.R (silhouette): New class, generic generator, methods; particularly, plot.silhouette() was plot.partition internal. improve documentation about silhouette info; update * R/clara.q: * R/plotpart.q (plot.partition): 2002-09-06 Martin Maechler * man/partition.object.Rd, * man/pam.object.Rd, man/clara.object.Rd, man/fanny.object.Rd, * man/plot.partition.Rd: reorganize documentation on silhouette information, thanks to thoughts from Christian Hennig. 2002-09-04 Martin Maechler * man/agnes.object.Rd: mention as.hclust() properly * src/spannel.c (spannel): bail out also when *last* deter became <= 0 * tests/ellipsoid-ex.R (and *.Rout.save): updated * DESCRIPTION (Version): 1.6-0 * R/zzz.R: dev.interactive(), warnings(),... for old R versions * man/summary.clara.Rd: make example run for R versions < 1.5.0 2002-09-03 Martin Maechler * R/plotpart.q (plot.partition): new `data' argument and smart `which.plots' default for clara(*, keep = FALSE) * R/daisy.q (summary.dissimilarity): fix ``number of dissim'' * R/*.q: 1) use apply(inax,2, any)--- really want colNAs() 2) use better "valmisdat" (missing value). * tests/dysta-ex.R (dysta): new file, explore "dysta" & "dysta3" in Fortran * src/fanny.f: get rid of many gotos 2002-09-02 Martin Maechler * tests/fanny-ex.R: and ...Rout.save : new files * src/twins.f (twins): drop dysta4(), use dysta() from pam.f 2002-08-31 Martin Maechler * DESCRIPTION (Version): 1.5-4 -- have achieved something {want more} * src/clara.c (clara): finally found and fixed the rare segfault bug: jran == 1 was wrong test (when 1st sample was unusable) (randm): faster implementation of primitive RNG * src/spannel.c: new translated from src/spannel.f in order to debug the compiler-dependent outputs in * tests/ellipsoid-ex.Rout.save etc * tests/sweep-ex.R (.C) 2002-08-30 Martin Maechler * src/clara.c (clara): * man/clara.object.Rd: * R/clara.q: new argument `keepdata' and `trace' * R/plotpart.q (clusplot.default): xlim, ylim: believe user 2002-08-27 Martin Maechler * src/clara.c (clara): new file, translated from Fortran; easier to debug the (rare!) seg.fault. "make check fine" * DESCRIPTION (Depends): R >= 1.2 * R/plothier.q (pltree.twins): (drop code for R version < 1.2.0) 2002-08-23 Martin Maechler * DESCRIPTION (Version): 1.5-3 {released only locally} * R/plotpart.q (clusplot.default): xlim, ylim had "<" and ">" reversed * man/clusplot.default.Rd: fix interactive(), add x- and ylim example 2002-08-02 Martin Maechler * src/twins.f (twins) et al: * src/fanny.f (fanny) et al: * src/mona.f (mona): * src/daisy.f (daisy): explicit instead of "implicit" var.declaration * src/spannel.f (spannel) et al: no "implicit none" 2002-07-29 Martin Maechler * DESCRIPTION (Version): 1.5-2 * R/daisy.q (print.summary.dissimilarity): new function and summary.dissimilarity() now returns a classed object. * R/agnes.q (print.summary.agnes), * R/clara.q (print.summary.clara), * R/diana.q (print.summary.diana), * R/fanny.q (print.summary.fanny), * R/pam.q (print.summary.pam): print.summary.*() now only *summarizes* dissimilarities (if at all) 2002-07-27 Martin Maechler * tests/pam.R and tests/pam.Rout.save : new files * man/summary.agnes.Rd: + example * man/summary.clara.Rd: + example * R/clara.q (print.clara): improvements: call, no long clus. vector * R/agnes.q (print.agnes): similar * man/daisy.Rd : added "[mva]" to \link{}s. The same in: * man/clusplot.default.Rd: new `col.clus' argument, new option `labels = 5'. * R/plotpart.q (clusplot.default): cosmetical cleaning; `col.p' is now vectorized for point coloring. The cluster ellipse is now labeled with font=4 and proper color 2002-06-17 Martin Maechler * man/sizeDiss.Rd: fix \value{} * tests/daisy-ex.R: new file (and *.Rout.save) 2002-06-17 Martin Maechler * submit to CRAN -- won't be in R's 1.5.1-recommended * R/daisy.q (daisy): make sure "0","1" factors are valid binary vars several extra checks {incl apply() `bug' * man/diana.object.Rd: show how to use cutree(). 2002-05-22 Martin Maechler * R/daisy.q (daisy): warn if binary variables have non-{0,1} values. * src/pam.f (cstat) et al: eliminated many gotos; +comments * src/meet.f (meet): + comment 2002-05-21 Martin Maechler * DESCRIPTION (Version): 1.5-1 new daisy() behavior for binary variables * src/daisy.f (daisy): add comments; use if..else.. instead of goto * man/dissimilarity.object.Rd: new "Types" attribute in mixed case. * man/daisy.Rd: * R/daisy.q (daisy): fix data.class "integer", allow type = "symm"; return types used in mixed case; correctly modify jtmd[] for binary variables (!) 2002-03-30 Martin Maechler * R/plotpart.q: replace `` == "NA" '' by is.na(.) * R/mona.q (mona): 2002-03-04 Martin Maechler * DESCRIPTION (Version): 1.4-1 * R/zzz.R (.First.lib), * R/plothier.q: replace plclust() by plot[.hclust]() everywhere. 2002-01-29 Martin Maechler * R/pam.q (pam): comment on "valmisdat"; same in * R/fanny.q, R/agnes.q, R/clara.q, R/diana.q * src/pam.f (dysta): comment + white space * src/fanny.f (fanny): lowercase and indent + comments 2002-01-24 Martin Maechler * man/agnes.Rd, diana.Rd, pam.Rd, clara.Rd, mona.Rd, fanny.Rd : Reference and BACKGROUND section only in agnes.Rd; the others refer to agnes. * man/fanny.Rd: clean * R/agnes.q (agnes): \ ``diss = inherits(x, "dist")'' * R/diana.q (diana): > instead of "diss = FALSE" * R/fanny.q (fanny): / as we have changed pam() already in 1.4-0 2002-01-23 Martin Maechler * DESCRIPTION (Version): 1.4-0 * man/ellipsoidhull.Rd: example * tests/ellipsoid-ex.R and *.Rout: finalized * man/pluton.Rd: work around Rdconv \eqn{.}{.} bug. 2002-01-22 Martin Maechler * R/ellipsehull.R (ellipsehull) et al: generalized from 2 to p dimensions. -- points generation: not yet! * tests/ellipsehull.R: new test file * man/clusplot.partition.Rd: clean up * man/clusplot.default.Rd: proper reference to Pison et al * man/clusplot.Rd: clean 2002-01-21 Martin Maechler * R/ellipsehull.R (ellipsehull) and others: new functions * R/plotpart.q (clusplot.default) use new ellipsePoints(); simplification by using "d2" (= dist^2) instead of "dist". 2002-01-19 Martin Maechler * R/plotpart.q (clusplot.default) re-scale cov.wt() result: Finally return the smallest possible ellipses. NOTA BENE ===> (numerical) results are *different* ! 2002-01-18 Martin Maechler * R/plotpart.q (clusplot.default) {spannel}: Finally found why our ellipses are not quite ok : R's cov.wt() differs from S-plus' ! * src/spannel.f (spannel): new argument `maxit' (was 5000). * R/plotpart.q (cusplot.default): cleanup, eliminating internal kleur() & plotje(); and "spannel" arguments; new maxit; lower eps use which.min() and which.max(); ... * R/pam.q (pam): diss has new default = inherits(x, "dist") which is TRUE therefore for dissimilarity or dist objects. 2002-01-12 Martin Maechler | 2002-01-18 Martin Maechler * R/agnes.q, R/diana.q : a bit of cleanup in the two twins calling functions. * man/lower.to.upper.tri.inds.Rd, * man/cluster-internal.Rd: new for formerly local functions, now in * R/internal.R (sizeDiss), * R/internal.R (lower.to.upper.tri.inds), and upper.to...: new functions instead of local functions in several places, e.g., * R/diana.q, R/fanny.q, ... * R/plotpart.q (clusplot.default): fix bug PR#1249: cmd() != cmdscale(); use new cmdscale(*, add=TRUE) ---> (slightly) different result sometimes fix long-standing typo in NA case + more cleanup * R/plotpart.q (clusplot.partition): explicit `main' argument with better default. 2001-12-06 Martin Maechler * DESCRIPTION (Version): 1.3-6 * R/plotpart.q: enable `density =' for polygon shading. 2001-11-27 Martin Maechler * R/zzz.R: get rid of .Alias 2001-11-06 Martin Maechler * DESCRIPTION (Version): 1.3-5 * R/plothier.q: Fix menu() bug thanks to Richard Rowe. * R/plotpart.q: ditto * R/agnes.q: properly allow integer matrix input: don't use all(sapply(x, data.class) == "numeric") anymore. * R/clara.q, R/diana.q, R/fanny.q, R/pam.q: ditto 2001-11-05 Martin Maechler * R/pam.q: `call' via match.call() instead of sys.call, and as list component instead of attribute. [R philosophy compatibility] * R/mona.q: ditto * R/fanny.q, R/diana.q, R/clara.q, R/agnes.q, * R/plothier.q, R/plotpart.q: ditto 2001-10-09 Martin Maechler * DESCRIPTION (Version): 1.3-5b (beta) for sending to Robert G * R/plothier.q: plot.diana() must have main=NULL * R/diana.q: minor code cleanup 2001-08-27 Martin Maechler * README.orig: renamed from R/README-splus 2001-08-22 Martin Maechler * DESCRIPTION (Version): New version is 1.3-4 * man/flower.Rd: nicer using \describe{} * man/plot.partition.Rd (and R/plotpart.q): new argument `which.plots' (as in the other plot.* functions). * R/plothier.q: All plot.* methods which produce more than one plot now call par(ask = TRUE) automagically when `needed' (as e.g., in plot.lm()). * man/*.Rd: document all arguments; a bit more cleanup. R (1.4.0) CMD check is now okay. 2001-08-18 Martin Maechler * R/*.q and man/*.Rd: generic/method argument fixes 2001-05-26 Martin Maechler * man/*.Rd: indenting in all dataset examples * man/votes.repub.Rd: usage fix 2001-05-23 Martin Maechler * INDEX: structured logically, rather than alphabetically * tests/clara.R: new test * src/clara.f (clara): added comments * R/clara.q (clara) : comments and cleanup 2001-05-22 Martin Maechler * DESCRIPTION (Version): New version is 1.3-3. * R/agnes.q and all others: `components' not `arguments' in print.*() * src/meet.f (meet): use [if then else] instead of goto * src/clara.f (clara): all declarations explicit; some cleanup 2001-05-21 Martin Maechler * DESCRIPTION (Package): licence changed to GPL (Rousseeuw's e-mail) * R/pam.q: minor code cleanup for Fortran interface * src/pam.f (pam): all declarations explicit * README: integrated former ./README_MM * src/twins.f, many R/*.q and * man/pltree.Rd: replace s/S-plus/S/ in many places 2001-03-21 Martin Maechler * man/print.summary.FOO.Rd: drop these files, move some to FOO.Rd * man/print*.Rd: cleanup, use \method{} 2001-01-04 Martin Maechler * DESCRIPTION (Version): New version is 1.3-2. * man/print*.Rd: Better \title{.}, hence * INDEX * man/*.Rd: Remove \keyword{libcluster}; we have {cluster}. 2001-01-03 Martin Maechler * DESCRIPTION (Version): New version is 1.3-1. 2001-01-02 Martin Maechler * man/*.Rd: fixes for codoc() * src/spannel.f (spannel): improve readability, indent properly, add a few comments * src/clara.f: * src/pam.f: * src/twins.f: * R/*.q : Added PACKAGE = .. to all .Fortran() calls ===== Many codoc() fixes; particularly summary.*(*, ...) * R/plotpart.q: (clusplot.partition): simplified * R/agnes.q: T/F -> TRUE/FALSE and more * R/clara.q: * R/diana.q: * R/fanny.q: * R/mona.q: * R/pam.q: 2000-12-30 Martin Maechler * DESCRIPTION (Version): New version is 1.2-5. 2000-12-14 Martin Maechler * src/daisy.f: indented do loops; one if / else. * R/daisy.q: __ daisy() __ - "ordratio" |-> "T", was "O" erronously! - `metric' and `list' argument checking * man/clusplot.default.Rd: updated and cleaned. 2000-12-02 Martin Maechler * R/plothier.q: plot.agnes() & plot.diana() : main=NULL defaults to two different titles for both plots 2000-11-30 Martin Maechler * man/...Rd: - \section{NOTE} becomes \note - fix most T/F to TRUE/FALSE, .. * R/plothier.q: - cleanup (T/F); indenting - plot.mona(): las = 1 for axis; allow main= - plot.diana(): `which.plot' and main= and sub= - plot.agnes(): `which.plot' and main= and sub= - pltree.twins(): allow main= ; rm"plot = TRUE" (warn) --> now depends on R 1.2's plot.hclust() * R/plotpart.q: clusplot.default() -- now works! - *much* clean up - color choice such as to see points - got rid of NaN warning - eliminated "polygon(*,density.) warnings by '##no yet'" 2000-11-29 Martin Maechler * R/daisy.q: add "dist" class (and fix T/F to TRUE/FALSE etc) * R/daisy.q and * man/print.dissimilarity.Rd: add summary.dissimilarity() * man/dissimilarity.object.Rd: cleanup, use \value{.}, doc. "dist" * man/daisy.Rd: cleanup, use \value{.} * man/agnes.Rd: cleanup. * man/*.object.Rd: cleanup, use \value{.} 2000-12-24 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-4. (Maintainer): New entry. Thu Feb 17 22:56:58 2000 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-3. * src/Makefile: Removed. Tue Dec 28 18:41:09 1999 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-2. * data/00Index: Added entry for `xclara'. * man/xclara.Rd: New file. * data/figure2.R: * data/table4.R: Removed as unused and undocumented. Sun Dec 5 20:14:45 1999 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-1. * R/daisy.q: * src/daisy.f: * PORTING: Rewrite to pass integers rather than character strings to Fortran (changes provided by BDR). Sun Apr 11 04:21:03 1999 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-0. * R/plotpart.q: Replace rep.int() by rep(). * R/zzz.R: Make .First.lib() use plot.hclust() for plclust() which seems to do the job, sort of. * data/animals.R: Replaced by `animals.tab'. * data/ruspini.R: Replaced by `ruspini.tab'. * data/votes.repub.tab: New file. * man/agriculture.Rd: New file. * man/animals.Rd: New file. * man/flower.Rd: New file. * man/ruspini.Rd: New file. * man/votes.repub.Rd: New file. * man/*: Hand-edit all examples to make executable. Fri Nov 27 23:53:11 1998 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-3. * R/mona.q: Barf only if neither a matrix nor a data frame (remember that in S, is.matrix() is TRUE for data frames). * man/*: Converted anew via `Sd2Rd -x' using Sd2Rd 0.3-2. * TODO: Removed. Tue Jun 16 09:23:15 1998 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-2. * DESCRIPTION: * PORTING: * TITLE: * R/zzz.R: * src/Makefile: Change old `clus' to new name `cluster'. Mon Jun 15 11:01:52 1998 Kurt Hornik * ChangeLog: Finally started, current version is 1.1-1.